From bbb62f520a3b0d7f5ae99bea98429c5bb09c8fd9 Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Tue, 1 Sep 2020 18:23:30 -0400 Subject: [PATCH 01/18] Link conan dependencies --- 3rdparty/miniz/CMakeLists.txt | 43 +++++++++++++++++++++++++++++------ 3rdparty/stb/CMakeLists.txt | 34 +++++++++++++++++++++++++-- conanfile.py | 6 ++++- 3 files changed, 73 insertions(+), 10 deletions(-) diff --git a/3rdparty/miniz/CMakeLists.txt b/3rdparty/miniz/CMakeLists.txt index c3e420f..7cbac1a 100644 --- a/3rdparty/miniz/CMakeLists.txt +++ b/3rdparty/miniz/CMakeLists.txt @@ -1,12 +1,41 @@ cmake_minimum_required(VERSION 3.0) project(miniz) -set(BUILD_SHARED_LIBS OFF) +find_package(miniz QUIET) -add_library(miniz EXCLUDE_FROM_ALL miniz.c miniz.h) -target_include_directories(miniz PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +if(miniz_FOUND) -set_target_properties(miniz PROPERTIES - FOLDER "3rdparty" - POSITION_INDEPENDENT_CODE ON -) + if(NOT TARGET miniz) + if(TARGET miniz::miniz) + add_library(miniz ALIAS miniz::miniz) + else() + add_library(miniz INTERFACE) + if(miniz_INCLUDE_DIRS) + target_include_directories(miniz INTERFACE ${miniz_INCLUDE_DIRS}) + endif() + if(miniz_LIBRARIES) + target_link_libraries(miniz INTERFACE ${miniz_LIBRARIES}) + endif() + if(miniz_COMPILE_DEFINITIONS) + target_compile_definitions(miniz INTERFACE ${miniz_COMPILE_DEFINITIONS}) + endif() + if(miniz_COMPILE_OPTIONS_LIST) + target_compile_options(miniz INTERFACE ${miniz_COMPILE_OPTIONS_LIST}) + endif() + endif() + endif() + +else() + + message(STATUS "Miniz not found") + message(STATUS "Building miniz from 3rdparty sources") + + add_library(miniz STATIC EXCLUDE_FROM_ALL miniz.c miniz.h) + target_include_directories(miniz PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + + set_target_properties(miniz PROPERTIES + FOLDER "3rdparty" + POSITION_INDEPENDENT_CODE ON + ) + +endif() diff --git a/3rdparty/stb/CMakeLists.txt b/3rdparty/stb/CMakeLists.txt index 04f8801..8278701 100644 --- a/3rdparty/stb/CMakeLists.txt +++ b/3rdparty/stb/CMakeLists.txt @@ -1,2 +1,32 @@ -add_library(stb INTERFACE) -target_include_directories(stb INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) +cmake_minimum_required(VERSION 3.0) +project(stb) + +find_package(stb QUIET) + +if(stb_FOUND) + + if(NOT TARGET stb) + add_library(stb INTERFACE) + if(stb_INCLUDE_DIRS) + target_include_directories(stb INTERFACE ${stb_INCLUDE_DIRS}) + endif() + if(stb_LIBRARIES) + target_link_libraries(stb INTERFACE ${stb_LIBRARIES}) + endif() + if(stb_COMPILE_DEFINITIONS) + target_compile_definitions(stb INTERFACE ${stb_COMPILE_DEFINITIONS}) + endif() + if(stb_COMPILE_OPTIONS_LIST) + target_compile_options(stb INTERFACE ${stb_COMPILE_OPTIONS_LIST}) + endif() + endif() + +else() + + message(STATUS "Stb not found") + message(STATUS "Building stb from 3rdparty sources") + + add_library(stb INTERFACE) + target_include_directories(stb INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) + +endif() diff --git a/conanfile.py b/conanfile.py index 504ef2e..ddcddde 100644 --- a/conanfile.py +++ b/conanfile.py @@ -10,7 +10,7 @@ class Crunch2Conan(ConanFile): topics = ("conan", "crunch", "texture", "compression", "decompression", "transcoding") settings = "os", "compiler", "arch", "build_type" exports_sources = ["CMakeLists.txt", "license.txt", "crnlib/*", "crunch/*", "inc/*", "3rdparty/*"] - generators = "cmake" + generators = "cmake", "cmake_find_package" options = { "fPIC": [True, False], "shared": [True, False], @@ -34,6 +34,10 @@ def configure(self): if self.options.shared: del self.options.fPIC + def requirements(self): + self.requires("miniz/2.1.0") + self.requires("stb/20200203") + def _configure_cmake(self): if self._cmake: return self._cmake From f69df5b5d06518b6c247299bfb8a2f6066ebb158 Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Sun, 6 Sep 2020 16:48:19 -0400 Subject: [PATCH 02/18] Fixed typo in command list --- crunch/crunch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crunch/crunch.cpp b/crunch/crunch.cpp index bdf691e..32ac164 100644 --- a/crunch/crunch.cpp +++ b/crunch/crunch.cpp @@ -125,7 +125,7 @@ class crunch console::message("\nImage rescaling (mutually exclusive options)"); console::printf("-rescale - Rescale image to specified resolution"); - console::printf("-relscale - Rescale image to specified relative resolution"); + console::printf("-relrescale - Rescale image to specified relative resolution"); console::printf("-rescalemode - Auto-rescale non-power of two images"); console::printf(" nearest - Use nearest power of 2, hi - Use next, lo - Use previous"); From 556ab1c4b8e5e86410ac15cba6767a6242f095b5 Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Sun, 6 Sep 2020 17:18:47 -0400 Subject: [PATCH 03/18] Avoid ambiguity error on GCC --- crnlib/crn_vector.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crnlib/crn_vector.cpp b/crnlib/crn_vector.cpp index 532c5bf..c018e54 100644 --- a/crnlib/crn_vector.cpp +++ b/crnlib/crn_vector.cpp @@ -20,8 +20,8 @@ bool elemental_vector::increase_capacity(uint min_new_capacity, bool grow_hint, return true; ptr_bits_t new_capacity = min_new_capacity; - if ((grow_hint) && (!math::is_power_of_2(new_capacity))) - new_capacity = math::next_pow2(new_capacity); + if ((grow_hint) && (!math::is_power_of_2((uint64)new_capacity))) + new_capacity = math::next_pow2((uint64)new_capacity); CRNLIB_ASSERT(new_capacity && (new_capacity > m_capacity)); From 915140f3bf76592201b75b47aec61cf89699abb4 Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Sun, 6 Sep 2020 17:20:42 -0400 Subject: [PATCH 04/18] add initialization of m_num_prev_results member on crn_dxt1 --- crnlib/crn_dxt1.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/crnlib/crn_dxt1.cpp b/crnlib/crn_dxt1.cpp index ad7ebb2..576cac7 100644 --- a/crnlib/crn_dxt1.cpp +++ b/crnlib/crn_dxt1.cpp @@ -57,6 +57,7 @@ dxt1_endpoint_optimizer::dxt1_endpoint_optimizer() m_lo_cells.reserve(128); m_hi_cells.reserve(128); + m_num_prev_results = 0; } // All selectors are equal. Try compressing as if it was solid, using the block's average color, using ryg's optimal single color compression tables. From bb83362b5f43d0f192737515aaafb0ecc8890d4c Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Sun, 6 Sep 2020 17:23:37 -0400 Subject: [PATCH 05/18] Return NULL instead of false when the function expects a pointer --- inc/crn_decomp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inc/crn_decomp.h b/inc/crn_decomp.h index 340ce5b..fb31455 100644 --- a/inc/crn_decomp.h +++ b/inc/crn_decomp.h @@ -2344,11 +2344,11 @@ const void* crnd_get_level_data(const void* pData, uint32 data_size, uint32 leve uint32 crnd_get_segmented_file_size(const void* pData, uint32 data_size) { if ((!pData) || (data_size < cCRNHeaderMinSize)) - return false; + return NULL; const crn_header* pHeader = crnd_get_header(pData, data_size); if (!pHeader) - return false; + return NULL; uint32 size = pHeader->m_header_size; From 19dacc2a81082fe6eb3156e31b3f795d97d64799 Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Sun, 6 Sep 2020 18:54:00 -0400 Subject: [PATCH 06/18] Format some files --- .editorconfig | 1 + crnlib/crn_arealist.cpp | 988 +++++++++++++++++++++------------------- crnlib/crn_arealist.h | 93 ++-- crnlib/crn_assert.cpp | 82 ++-- crnlib/crn_checksum.cpp | 110 ++--- crnlib/crn_checksum.h | 15 +- crnlib/crn_console.cpp | 413 +++++++++-------- crnlib/crn_console.h | 186 ++++---- crnlib/crn_rand.cpp | 669 ++++++++++++++------------- crnlib/crn_rand.h | 164 +++---- crnlib/crn_ray.h | 91 ++-- crnlib/crn_strutils.cpp | 983 ++++++++++++++++++++++----------------- crnlib/crn_timer.cpp | 273 ++++++----- crnlib/crn_timer.h | 151 +++--- crnlib/crn_traits.h | 3 +- crnlib/crn_utils.cpp | 130 +++--- crnlib/crn_utils.h | 746 ++++++++++++++++-------------- 17 files changed, 2833 insertions(+), 2265 deletions(-) diff --git a/.editorconfig b/.editorconfig index 29956bb..75cc784 100644 --- a/.editorconfig +++ b/.editorconfig @@ -7,3 +7,4 @@ insert_final_newline = true indent_style = space indent_size = 4 trim_trailing_whitespace=true +guidelines = 120 diff --git a/crnlib/crn_arealist.cpp b/crnlib/crn_arealist.cpp index df52dd6..6e124f5 100644 --- a/crnlib/crn_arealist.cpp +++ b/crnlib/crn_arealist.cpp @@ -6,620 +6,668 @@ #define RECT_DEBUG -namespace crnlib { +namespace crnlib +{ -static void area_fatal_error(const char*, const char* pMsg, ...) { - va_list args; - va_start(args, pMsg); + static void area_fatal_error(const char*, const char* pMsg, ...) + { + va_list args; + va_start(args, pMsg); - char buf[512]; -#ifdef _MSC_VER - _vsnprintf_s(buf, sizeof(buf), pMsg, args); + char buf[512]; +#if defined(CRN_CC_MSVC) + _vsnprintf_s(buf, sizeof(buf), pMsg, args); #else - vsnprintf(buf, sizeof(buf), pMsg, args); + vsnprintf(buf, sizeof(buf), pMsg, args); #endif - va_end(args); + va_end(args); - CRNLIB_FAIL(buf); -} + CRNLIB_FAIL(buf); + } -static Area* delete_area(Area_List* Plist, Area* Parea) { - Area *p, *q; + static Area* delete_area(Area_List* Plist, Area* Parea) + { + Area* p, * q; #ifdef RECT_DEBUG - if ((Parea == Plist->Phead) || (Parea == Plist->Ptail)) - area_fatal_error("delete_area", "tried to remove head or tail"); + if ((Parea == Plist->Phead) || (Parea == Plist->Ptail)) + { + area_fatal_error("delete_area", "tried to remove head or tail"); + } #endif - p = Parea->Pprev; - q = Parea->Pnext; - p->Pnext = q; - q->Pprev = p; - - Parea->Pnext = Plist->Pfree; - Parea->Pprev = NULL; - Plist->Pfree = Parea; - - return (q); -} + p = Parea->Pprev; + q = Parea->Pnext; + p->Pnext = q; + q->Pprev = p; -static Area* alloc_area(Area_List* Plist) { - Area* p = Plist->Pfree; + Parea->Pnext = Plist->Pfree; + Parea->Pprev = NULL; + Plist->Pfree = Parea; - if (p == NULL) { - if (Plist->next_free == Plist->total_areas) - area_fatal_error("alloc_area", "Out of areas!"); - - p = Plist->Phead + Plist->next_free; - Plist->next_free++; - } else - Plist->Pfree = p->Pnext; + return q; + } - return (p); -} + static Area* alloc_area(Area_List* Plist) + { + Area* p = Plist->Pfree; -static Area* insert_area_before(Area_List* Plist, Area* Parea, - int x1, int y1, int x2, int y2) { - Area *p, *Pnew_area = alloc_area(Plist); + if (p == NULL) + { + if (Plist->next_free == Plist->total_areas) + { + area_fatal_error("alloc_area", "Out of areas!"); + } + p = Plist->Phead + Plist->next_free; + Plist->next_free++; + } + else + { + Plist->Pfree = p->Pnext; + } + return p; + } - p = Parea->Pprev; + static Area* insert_area_before(Area_List* Plist, Area* Parea, int x1, int y1, int x2, int y2) + { + Area* p, * Pnew_area = alloc_area(Plist); - p->Pnext = Pnew_area; + p = Parea->Pprev; - Pnew_area->Pprev = p; - Pnew_area->Pnext = Parea; + p->Pnext = Pnew_area; - Parea->Pprev = Pnew_area; + Pnew_area->Pprev = p; + Pnew_area->Pnext = Parea; - Pnew_area->x1 = x1; - Pnew_area->y1 = y1; - Pnew_area->x2 = x2; - Pnew_area->y2 = y2; + Parea->Pprev = Pnew_area; - return (Pnew_area); -} + Pnew_area->x1 = x1; + Pnew_area->y1 = y1; + Pnew_area->x2 = x2; + Pnew_area->y2 = y2; -static Area* insert_area_after(Area_List* Plist, Area* Parea, - int x1, int y1, int x2, int y2) { - Area *p, *Pnew_area = alloc_area(Plist); + return Pnew_area; + } - p = Parea->Pnext; + static Area* insert_area_after(Area_List* Plist, Area* Parea, int x1, int y1, int x2, int y2) + { + Area* p, * Pnew_area = alloc_area(Plist); - p->Pprev = Pnew_area; + p = Parea->Pnext; - Pnew_area->Pnext = p; - Pnew_area->Pprev = Parea; + p->Pprev = Pnew_area; - Parea->Pnext = Pnew_area; + Pnew_area->Pnext = p; + Pnew_area->Pprev = Parea; - Pnew_area->x1 = x1; - Pnew_area->y1 = y1; - Pnew_area->x2 = x2; - Pnew_area->y2 = y2; + Parea->Pnext = Pnew_area; - return (Pnew_area); -} + Pnew_area->x1 = x1; + Pnew_area->y1 = y1; + Pnew_area->x2 = x2; + Pnew_area->y2 = y2; -void Area_List_deinit(Area_List* Pobj_base) { - Area_List* Plist = (Area_List*)Pobj_base; + return Pnew_area; + } - if (!Plist) - return; + void Area_List_deinit(Area_List* Pobj_base) + { + Area_List* Plist = (Area_List*)Pobj_base; - if (Plist->Phead) { - crnlib_free(Plist->Phead); - Plist->Phead = NULL; - } + if (!Plist) + { + return; + } - crnlib_free(Plist); -} + if (Plist->Phead) + { + crnlib_free(Plist->Phead); + Plist->Phead = NULL; + } -Area_List* Area_List_init(int max_areas) { - Area_List* Plist = (Area_List*)crnlib_calloc(1, sizeof(Area_List)); + crnlib_free(Plist); + } - Plist->total_areas = max_areas + 2; + Area_List* Area_List_init(int max_areas) + { + Area_List* Plist = (Area_List*)crnlib_calloc(1, sizeof(Area_List)); - Plist->Phead = (Area*)crnlib_calloc(max_areas + 2, sizeof(Area)); - Plist->Ptail = Plist->Phead + 1; + Plist->total_areas = max_areas + 2; - Plist->Phead->Pprev = NULL; - Plist->Phead->Pnext = Plist->Ptail; + Plist->Phead = (Area*)crnlib_calloc(max_areas + 2, sizeof(Area)); + Plist->Ptail = Plist->Phead + 1; - Plist->Ptail->Pprev = Plist->Phead; - Plist->Ptail->Pnext = NULL; + Plist->Phead->Pprev = NULL; + Plist->Phead->Pnext = Plist->Ptail; - Plist->Pfree = NULL; - Plist->next_free = 2; + Plist->Ptail->Pprev = Plist->Phead; + Plist->Ptail->Pnext = NULL; - return (Plist); -} + Plist->Pfree = NULL; + Plist->next_free = 2; -void Area_List_print(Area_List* Plist) { - Area* Parea = Plist->Phead->Pnext; + return Plist; + } - while (Parea != Plist->Ptail) { - printf("%04i %04i : %04i %04i\n", Parea->x1, Parea->y1, Parea->x2, Parea->y2); + void Area_List_print(Area_List* Plist) + { + Area* Parea = Plist->Phead->Pnext; - Parea = Parea->Pnext; - } -} + while (Parea != Plist->Ptail) + { + printf("%04i %04i : %04i %04i\n", Parea->x1, Parea->y1, Parea->x2, Parea->y2); -Area_List* Area_List_dup_new(Area_List* Plist, - int x_ofs, int y_ofs) { - int i; - Area_List* Pnew_list = (Area_List*)crnlib_calloc(1, sizeof(Area_List)); + Parea = Parea->Pnext; + } + } - Pnew_list->total_areas = Plist->total_areas; + Area_List* Area_List_dup_new(Area_List* Plist, int x_ofs, int y_ofs) + { + int i; + Area_List* Pnew_list = (Area_List*)crnlib_calloc(1, sizeof(Area_List)); - Pnew_list->Phead = (Area*)crnlib_malloc(sizeof(Area) * Plist->total_areas); - Pnew_list->Ptail = Pnew_list->Phead + 1; + Pnew_list->total_areas = Plist->total_areas; - Pnew_list->Pfree = (Plist->Pfree) ? ((Plist->Pfree - Plist->Phead) + Pnew_list->Phead) : NULL; + Pnew_list->Phead = (Area*)crnlib_malloc(sizeof(Area) * Plist->total_areas); + Pnew_list->Ptail = Pnew_list->Phead + 1; - Pnew_list->next_free = Plist->next_free; + Pnew_list->Pfree = (Plist->Pfree) ? ((Plist->Pfree - Plist->Phead) + Pnew_list->Phead) : NULL; - memcpy(Pnew_list->Phead, Plist->Phead, sizeof(Area) * Plist->total_areas); + Pnew_list->next_free = Plist->next_free; - for (i = 0; i < Plist->total_areas; i++) { - Pnew_list->Phead[i].Pnext = (Plist->Phead[i].Pnext == NULL) ? NULL : (Plist->Phead[i].Pnext - Plist->Phead) + Pnew_list->Phead; - Pnew_list->Phead[i].Pprev = (Plist->Phead[i].Pprev == NULL) ? NULL : (Plist->Phead[i].Pprev - Plist->Phead) + Pnew_list->Phead; + memcpy(Pnew_list->Phead, Plist->Phead, sizeof(Area) * Plist->total_areas); - Pnew_list->Phead[i].x1 += x_ofs; - Pnew_list->Phead[i].y1 += y_ofs; - Pnew_list->Phead[i].x2 += x_ofs; - Pnew_list->Phead[i].y2 += y_ofs; - } + for (i = 0; i < Plist->total_areas; i++) + { + Pnew_list->Phead[i].Pnext = (Plist->Phead[i].Pnext == NULL) ? NULL : (Plist->Phead[i].Pnext - Plist->Phead) + Pnew_list->Phead; + Pnew_list->Phead[i].Pprev = (Plist->Phead[i].Pprev == NULL) ? NULL : (Plist->Phead[i].Pprev - Plist->Phead) + Pnew_list->Phead; - return (Pnew_list); -} + Pnew_list->Phead[i].x1 += x_ofs; + Pnew_list->Phead[i].y1 += y_ofs; + Pnew_list->Phead[i].x2 += x_ofs; + Pnew_list->Phead[i].y2 += y_ofs; + } -uint Area_List_get_num(Area_List* Plist) { - uint num = 0; + return Pnew_list; + } - Area* Parea = Plist->Phead->Pnext; + uint Area_List_get_num(Area_List* Plist) + { + uint num = 0; - while (Parea != Plist->Ptail) { - num++; + Area* Parea = Plist->Phead->Pnext; - Parea = Parea->Pnext; - } + while (Parea != Plist->Ptail) + { + num++; - return num; -} + Parea = Parea->Pnext; + } -void Area_List_dup(Area_List* Psrc_list, Area_List* Pdst_list, - int x_ofs, int y_ofs) { - int i; + return num; + } - if (Psrc_list->total_areas != Pdst_list->total_areas) - area_fatal_error("Area_List_dup", "Src and Dst total_areas must be equal!"); + void Area_List_dup(Area_List* Psrc_list, Area_List* Pdst_list, int x_ofs, int y_ofs) + { + int i; - Pdst_list->Pfree = (Psrc_list->Pfree) ? ((Psrc_list->Pfree - Psrc_list->Phead) + Pdst_list->Phead) : NULL; + if (Psrc_list->total_areas != Pdst_list->total_areas) + { + area_fatal_error("Area_List_dup", "Src and Dst total_areas must be equal!"); + } + Pdst_list->Pfree = (Psrc_list->Pfree) ? ((Psrc_list->Pfree - Psrc_list->Phead) + Pdst_list->Phead) : NULL; - Pdst_list->next_free = Psrc_list->next_free; + Pdst_list->next_free = Psrc_list->next_free; - memcpy(Pdst_list->Phead, Psrc_list->Phead, sizeof(Area) * Psrc_list->total_areas); + memcpy(Pdst_list->Phead, Psrc_list->Phead, sizeof(Area) * Psrc_list->total_areas); - if ((x_ofs) || (y_ofs)) { - for (i = 0; i < Psrc_list->total_areas; i++) { - Pdst_list->Phead[i].Pnext = (Psrc_list->Phead[i].Pnext == NULL) ? NULL : (Psrc_list->Phead[i].Pnext - Psrc_list->Phead) + Pdst_list->Phead; - Pdst_list->Phead[i].Pprev = (Psrc_list->Phead[i].Pprev == NULL) ? NULL : (Psrc_list->Phead[i].Pprev - Psrc_list->Phead) + Pdst_list->Phead; + if ((x_ofs) || (y_ofs)) + { + for (i = 0; i < Psrc_list->total_areas; i++) + { + Pdst_list->Phead[i].Pnext = (Psrc_list->Phead[i].Pnext == NULL) ? NULL : (Psrc_list->Phead[i].Pnext - Psrc_list->Phead) + Pdst_list->Phead; + Pdst_list->Phead[i].Pprev = (Psrc_list->Phead[i].Pprev == NULL) ? NULL : (Psrc_list->Phead[i].Pprev - Psrc_list->Phead) + Pdst_list->Phead; - Pdst_list->Phead[i].x1 += x_ofs; - Pdst_list->Phead[i].y1 += y_ofs; - Pdst_list->Phead[i].x2 += x_ofs; - Pdst_list->Phead[i].y2 += y_ofs; - } - } else { - for (i = 0; i < Psrc_list->total_areas; i++) { - Pdst_list->Phead[i].Pnext = (Psrc_list->Phead[i].Pnext == NULL) ? NULL : (Psrc_list->Phead[i].Pnext - Psrc_list->Phead) + Pdst_list->Phead; - Pdst_list->Phead[i].Pprev = (Psrc_list->Phead[i].Pprev == NULL) ? NULL : (Psrc_list->Phead[i].Pprev - Psrc_list->Phead) + Pdst_list->Phead; + Pdst_list->Phead[i].x1 += x_ofs; + Pdst_list->Phead[i].y1 += y_ofs; + Pdst_list->Phead[i].x2 += x_ofs; + Pdst_list->Phead[i].y2 += y_ofs; + } + } + else + { + for (i = 0; i < Psrc_list->total_areas; i++) + { + Pdst_list->Phead[i].Pnext = (Psrc_list->Phead[i].Pnext == NULL) ? NULL : (Psrc_list->Phead[i].Pnext - Psrc_list->Phead) + Pdst_list->Phead; + Pdst_list->Phead[i].Pprev = (Psrc_list->Phead[i].Pprev == NULL) ? NULL : (Psrc_list->Phead[i].Pprev - Psrc_list->Phead) + Pdst_list->Phead; + } + } } - } -} -void Area_List_copy( - Area_List* Psrc_list, Area_List* Pdst_list, - int x_ofs, int y_ofs) { - Area* Parea = Psrc_list->Phead->Pnext; + void Area_List_copy(Area_List* Psrc_list, Area_List* Pdst_list, int x_ofs, int y_ofs) + { + Area* Parea = Psrc_list->Phead->Pnext; - Area_List_clear(Pdst_list); + Area_List_clear(Pdst_list); - if ((x_ofs) || (y_ofs)) { - Area* Pprev_area = Pdst_list->Phead; + if ((x_ofs) || (y_ofs)) + { + Area* Pprev_area = Pdst_list->Phead; - while (Parea != Psrc_list->Ptail) { - // Area *p, *Pnew_area; - Area* Pnew_area; + while (Parea != Psrc_list->Ptail) + { + // Area *p, *Pnew_area; + Area* Pnew_area; - if (Pdst_list->next_free == Pdst_list->total_areas) - area_fatal_error("Area_List_copy", "Out of areas!"); + if (Pdst_list->next_free == Pdst_list->total_areas) + { + area_fatal_error("Area_List_copy", "Out of areas!"); + } - Pnew_area = Pdst_list->Phead + Pdst_list->next_free; - Pdst_list->next_free++; + Pnew_area = Pdst_list->Phead + Pdst_list->next_free; + Pdst_list->next_free++; - Pnew_area->Pprev = Pprev_area; - Pprev_area->Pnext = Pnew_area; + Pnew_area->Pprev = Pprev_area; + Pprev_area->Pnext = Pnew_area; - Pnew_area->x1 = Parea->x1 + x_ofs; - Pnew_area->y1 = Parea->y1 + y_ofs; - Pnew_area->x2 = Parea->x2 + x_ofs; - Pnew_area->y2 = Parea->y2 + y_ofs; + Pnew_area->x1 = Parea->x1 + x_ofs; + Pnew_area->y1 = Parea->y1 + y_ofs; + Pnew_area->x2 = Parea->x2 + x_ofs; + Pnew_area->y2 = Parea->y2 + y_ofs; - Pprev_area = Pnew_area; + Pprev_area = Pnew_area; - Parea = Parea->Pnext; - } + Parea = Parea->Pnext; + } - Pprev_area->Pnext = Pdst_list->Ptail; - } else { + Pprev_area->Pnext = Pdst_list->Ptail; + } + else + { #if 0 - while (Parea != Psrc_list->Ptail) - { - insert_area_after(Pdst_list, Pdst_list->Phead, - Parea->x1, - Parea->y1, - Parea->x2, - Parea->y2); - - Parea = Parea->Pnext; - } + while (Parea != Psrc_list->Ptail) + { + insert_area_after(Pdst_list, Pdst_list->Phead, Parea->x1, Parea->y1, Parea->x2, Parea->y2); + Parea = Parea->Pnext; + } #endif - Area* Pprev_area = Pdst_list->Phead; + Area* Pprev_area = Pdst_list->Phead; - while (Parea != Psrc_list->Ptail) { - // Area *p, *Pnew_area; - Area* Pnew_area; + while (Parea != Psrc_list->Ptail) + { + // Area *p, *Pnew_area; + Area* Pnew_area; - if (Pdst_list->next_free == Pdst_list->total_areas) - area_fatal_error("Area_List_copy", "Out of areas!"); + if (Pdst_list->next_free == Pdst_list->total_areas) + { + area_fatal_error("Area_List_copy", "Out of areas!"); + } - Pnew_area = Pdst_list->Phead + Pdst_list->next_free; - Pdst_list->next_free++; + Pnew_area = Pdst_list->Phead + Pdst_list->next_free; + Pdst_list->next_free++; - Pnew_area->Pprev = Pprev_area; - Pprev_area->Pnext = Pnew_area; + Pnew_area->Pprev = Pprev_area; + Pprev_area->Pnext = Pnew_area; - Pnew_area->x1 = Parea->x1; - Pnew_area->y1 = Parea->y1; - Pnew_area->x2 = Parea->x2; - Pnew_area->y2 = Parea->y2; + Pnew_area->x1 = Parea->x1; + Pnew_area->y1 = Parea->y1; + Pnew_area->x2 = Parea->x2; + Pnew_area->y2 = Parea->y2; - Pprev_area = Pnew_area; + Pprev_area = Pnew_area; - Parea = Parea->Pnext; - } + Parea = Parea->Pnext; + } - Pprev_area->Pnext = Pdst_list->Ptail; - } -} + Pprev_area->Pnext = Pdst_list->Ptail; + } + } -void Area_List_clear(Area_List* Plist) { - Plist->Phead->Pnext = Plist->Ptail; - Plist->Ptail->Pprev = Plist->Phead; - Plist->Pfree = NULL; - Plist->next_free = 2; -} + void Area_List_clear(Area_List* Plist) + { + Plist->Phead->Pnext = Plist->Ptail; + Plist->Ptail->Pprev = Plist->Phead; + Plist->Pfree = NULL; + Plist->next_free = 2; + } -void Area_List_set(Area_List* Plist, int x1, int y1, int x2, int y2) { - Plist->Pfree = NULL; + void Area_List_set(Area_List* Plist, int x1, int y1, int x2, int y2) + { + Plist->Pfree = NULL; - Plist->Phead[2].x1 = x1; - Plist->Phead[2].y1 = y1; - Plist->Phead[2].x2 = x2; - Plist->Phead[2].y2 = y2; + Plist->Phead[2].x1 = x1; + Plist->Phead[2].y1 = y1; + Plist->Phead[2].x2 = x2; + Plist->Phead[2].y2 = y2; - Plist->Phead[2].Pprev = Plist->Phead; - Plist->Phead->Pnext = Plist->Phead + 2; + Plist->Phead[2].Pprev = Plist->Phead; + Plist->Phead->Pnext = Plist->Phead + 2; - Plist->Phead[2].Pnext = Plist->Ptail; - Plist->Ptail->Pprev = Plist->Phead + 2; + Plist->Phead[2].Pnext = Plist->Ptail; + Plist->Ptail->Pprev = Plist->Phead + 2; - Plist->next_free = 3; -} + Plist->next_free = 3; + } -void Area_List_remove(Area_List* Plist, - int x1, int y1, int x2, int y2) { - int l, h; - Area* Parea = Plist->Phead->Pnext; + void Area_List_remove(Area_List* Plist, int x1, int y1, int x2, int y2) + { + int l, h; + Area* Parea = Plist->Phead->Pnext; #ifdef RECT_DEBUG - if ((x1 > x2) || (y1 > y2)) - area_fatal_error("area_list_remove", "invalid coords: %i %i %i %i", x1, y1, x2, y2); + if ((x1 > x2) || (y1 > y2)) + { + area_fatal_error("area_list_remove", "invalid coords: %i %i %i %i", x1, y1, x2, y2); + } #endif - while (Parea != Plist->Ptail) { - // Not touching - if ((x2 < Parea->x1) || (x1 > Parea->x2) || - (y2 < Parea->y1) || (y1 > Parea->y2)) { - Parea = Parea->Pnext; - continue; - } + while (Parea != Plist->Ptail) + { + // Not touching + if ((x2 < Parea->x1) || (x1 > Parea->x2) || (y2 < Parea->y1) || (y1 > Parea->y2)) + { + Parea = Parea->Pnext; + continue; + } - // Completely covers - if ((x1 <= Parea->x1) && (x2 >= Parea->x2) && - (y1 <= Parea->y1) && (y2 >= Parea->y2)) { - if ((x1 == Parea->x1) && (x2 == Parea->x2) && - (y1 == Parea->y1) && (y2 == Parea->y2)) { - delete_area(Plist, Parea); - return; - } + // Completely covers + if ((x1 <= Parea->x1) && (x2 >= Parea->x2) && (y1 <= Parea->y1) && (y2 >= Parea->y2)) + { + if ((x1 == Parea->x1) && (x2 == Parea->x2) && (y1 == Parea->y1) && (y2 == Parea->y2)) + { + delete_area(Plist, Parea); + return; + } - Parea = delete_area(Plist, Parea); + Parea = delete_area(Plist, Parea); - continue; - } + continue; + } - // top - if (y1 > Parea->y1) { - insert_area_before(Plist, Parea, - Parea->x1, Parea->y1, - Parea->x2, y1 - 1); - } + // top + if (y1 > Parea->y1) + { + insert_area_before(Plist, Parea, Parea->x1, Parea->y1, Parea->x2, y1 - 1); + } - // bottom - if (y2 < Parea->y2) { - insert_area_before(Plist, Parea, - Parea->x1, y2 + 1, - Parea->x2, Parea->y2); - } + // bottom + if (y2 < Parea->y2) + { + insert_area_before(Plist, Parea, Parea->x1, y2 + 1, Parea->x2, Parea->y2); + } - l = math::maximum(y1, Parea->y1); - h = math::minimum(y2, Parea->y2); + l = math::maximum(y1, Parea->y1); + h = math::minimum(y2, Parea->y2); - // left middle - if (x1 > Parea->x1) { - insert_area_before(Plist, Parea, - Parea->x1, l, - x1 - 1, h); - } + // left middle + if (x1 > Parea->x1) + { + insert_area_before(Plist, Parea, Parea->x1, l, x1 - 1, h); + } - // right middle - if (x2 < Parea->x2) { - insert_area_before(Plist, Parea, - x2 + 1, l, - Parea->x2, h); - } + // right middle + if (x2 < Parea->x2) + { + insert_area_before(Plist, Parea, x2 + 1, l, Parea->x2, h); + } - // early out - we know there's nothing else to remove, as areas can - // never overlap - if ((x1 >= Parea->x1) && (x2 <= Parea->x2) && - (y1 >= Parea->y1) && (y2 <= Parea->y2)) { - delete_area(Plist, Parea); - return; - } + // early out - we know there's nothing else to remove, as areas can + // never overlap + if ((x1 >= Parea->x1) && (x2 <= Parea->x2) && (y1 >= Parea->y1) && (y2 <= Parea->y2)) + { + delete_area(Plist, Parea); + return; + } - Parea = delete_area(Plist, Parea); - } -} + Parea = delete_area(Plist, Parea); + } + } -void Area_List_insert(Area_List* Plist, - int x1, int y1, int x2, int y2, - bool combine) { - Area* Parea = Plist->Phead->Pnext; + void Area_List_insert(Area_List* Plist, int x1, int y1, int x2, int y2, bool combine) + { + Area* Parea = Plist->Phead->Pnext; #ifdef RECT_DEBUG - if ((x1 > x2) || (y1 > y2)) - area_fatal_error("Area_List_insert", "invalid coords: %i %i %i %i", x1, y1, x2, y2); + if ((x1 > x2) || (y1 > y2)) + { + area_fatal_error("Area_List_insert", "invalid coords: %i %i %i %i", x1, y1, x2, y2); + } #endif - while (Parea != Plist->Ptail) { - // totally covers - if ((x1 <= Parea->x1) && (x2 >= Parea->x2) && - (y1 <= Parea->y1) && (y2 >= Parea->y2)) { - Parea = delete_area(Plist, Parea); - continue; - } - - // intersects - if ((x2 >= Parea->x1) && (x1 <= Parea->x2) && - (y2 >= Parea->y1) && (y1 <= Parea->y2)) { - int ax1, ay1, ax2, ay2; - - ax1 = Parea->x1; - ay1 = Parea->y1; - ax2 = Parea->x2; - ay2 = Parea->y2; - - if (x1 < ax1) - Area_List_insert(Plist, x1, math::maximum(y1, ay1), ax1 - 1, math::minimum(y2, ay2), combine); + while (Parea != Plist->Ptail) + { + // totally covers + if ((x1 <= Parea->x1) && (x2 >= Parea->x2) && (y1 <= Parea->y1) && (y2 >= Parea->y2)) + { + Parea = delete_area(Plist, Parea); + continue; + } - if (x2 > ax2) - Area_List_insert(Plist, ax2 + 1, math::maximum(y1, ay1), x2, math::minimum(y2, ay2), combine); + // intersects + if ((x2 >= Parea->x1) && (x1 <= Parea->x2) && (y2 >= Parea->y1) && (y1 <= Parea->y2)) + { + int ax1, ay1, ax2, ay2; + + ax1 = Parea->x1; + ay1 = Parea->y1; + ax2 = Parea->x2; + ay2 = Parea->y2; + + if (x1 < ax1) + { + Area_List_insert(Plist, x1, math::maximum(y1, ay1), ax1 - 1, math::minimum(y2, ay2), combine); + } + + if (x2 > ax2) + { + Area_List_insert(Plist, ax2 + 1, math::maximum(y1, ay1), x2, math::minimum(y2, ay2), combine); + } + + if (y1 < ay1) + { + Area_List_insert(Plist, x1, y1, x2, ay1 - 1, combine); + } + if (y2 > ay2) + { + Area_List_insert(Plist, x1, ay2 + 1, x2, y2, combine); + } + + return; + } - if (y1 < ay1) - Area_List_insert(Plist, x1, y1, x2, ay1 - 1, combine); + if (combine) + { + if ((x1 == Parea->x1) && (x2 == Parea->x2)) + { + if ((y2 == Parea->y1 - 1) || (y1 == Parea->y2 + 1)) + { + delete_area(Plist, Parea); + Area_List_insert(Plist, x1, math::minimum(y1, Parea->y1), x2, math::maximum(y2, Parea->y2), CRNLIB_TRUE); + return; + } + } + else if ((y1 == Parea->y1) && (y2 == Parea->y2)) + { + if ((x2 == Parea->x1 - 1) || (x1 == Parea->x2 + 1)) + { + delete_area(Plist, Parea); + Area_List_insert(Plist, math::minimum(x1, Parea->x1), y1, math::maximum(x2, Parea->x2), y2, CRNLIB_TRUE); + return; + } + } + } - if (y2 > ay2) - Area_List_insert(Plist, x1, ay2 + 1, x2, y2, combine); + Parea = Parea->Pnext; + } - return; + insert_area_before(Plist, Parea, x1, y1, x2, y2); } - if (combine) { - if ((x1 == Parea->x1) && (x2 == Parea->x2)) { - if ((y2 == Parea->y1 - 1) || (y1 == Parea->y2 + 1)) { - delete_area(Plist, Parea); - Area_List_insert(Plist, x1, math::minimum(y1, Parea->y1), x2, math::maximum(y2, Parea->y2), CRNLIB_TRUE); - return; - } - } else if ((y1 == Parea->y1) && (y2 == Parea->y2)) { - if ((x2 == Parea->x1 - 1) || (x1 == Parea->x2 + 1)) { - delete_area(Plist, Parea); - Area_List_insert(Plist, math::minimum(x1, Parea->x1), y1, math::maximum(x2, Parea->x2), y2, CRNLIB_TRUE); - return; - } - } - } + void Area_List_intersect_area(Area_List* Plist, int x1, int y1, int x2, int y2) + { + Area* Parea = Plist->Phead->Pnext; - Parea = Parea->Pnext; - } + while (Parea != Plist->Ptail) + { + // doesn't cover + if ((x2 < Parea->x1) || (x1 > Parea->x2) || (y2 < Parea->y1) || (y1 > Parea->y2)) + { + Parea = delete_area(Plist, Parea); + continue; + } - insert_area_before(Plist, Parea, x1, y1, x2, y2); -} + // totally covers + if ((x1 <= Parea->x1) && (x2 >= Parea->x2) && (y1 <= Parea->y1) && (y2 >= Parea->y2)) + { + Parea = Parea->Pnext; + continue; + } -void Area_List_intersect_area(Area_List* Plist, - int x1, int y1, int x2, int y2) { - Area* Parea = Plist->Phead->Pnext; + // Oct 21- should insert after, because deleted area will access the NEXT area! + // insert_area_after(Plist, Parea, + // math::maximum(x1, Parea->x1), + // math::maximum(y1, Parea->y1), + // math::minimum(x2, Parea->x2), + // math::minimum(y2, Parea->y2)); - while (Parea != Plist->Ptail) { - // doesn't cover - if ((x2 < Parea->x1) || (x1 > Parea->x2) || - (y2 < Parea->y1) || (y1 > Parea->y2)) { - Parea = delete_area(Plist, Parea); - continue; - } + insert_area_before(Plist, Parea, + math::maximum(x1, Parea->x1), math::maximum(y1, Parea->y1), + math::minimum(x2, Parea->x2), math::minimum(y2, Parea->y2)); - // totally covers - if ((x1 <= Parea->x1) && (x2 >= Parea->x2) && - (y1 <= Parea->y1) && (y2 >= Parea->y2)) { - Parea = Parea->Pnext; - continue; + Parea = delete_area(Plist, Parea); + } } - // Oct 21- should insert after, because deleted area will access the NEXT area! - // insert_area_after(Plist, Parea, - // math::maximum(x1, Parea->x1), - // math::maximum(y1, Parea->y1), - // math::minimum(x2, Parea->x2), - // math::minimum(y2, Parea->y2)); +#if 0 + void Area_List_intersect_Area_List(Area_List* Pouter_list, Area_List* Pinner_list, Area_List* Pdst_list) + { + Area* Parea1 = Pouter_list->Phead->Pnext; - insert_area_before(Plist, Parea, - math::maximum(x1, Parea->x1), - math::maximum(y1, Parea->y1), - math::minimum(x2, Parea->x2), - math::minimum(y2, Parea->y2)); + while (Parea1 != Pouter_list->Ptail) + { + Area* Parea2 = Pinner_list->Phead->Pnext; + int x1, y1, x2, y2; - Parea = delete_area(Plist, Parea); - } -} + x1 = Parea1->x1; x2 = Parea1->x2; + y1 = Parea1->y1; y2 = Parea1->y2; -#if 0 - void Area_List_intersect_Area_List( - Area_List *Pouter_list, - Area_List *Pinner_list, - Area_List *Pdst_list) - { - Area *Parea1 = Pouter_list->Phead->Pnext; - - while (Parea1 != Pouter_list->Ptail) - { - Area *Parea2 = Pinner_list->Phead->Pnext; - int x1, y1, x2, y2; - - x1 = Parea1->x1; x2 = Parea1->x2; - y1 = Parea1->y1; y2 = Parea1->y2; - - while (Parea2 != Pinner_list->Ptail) - { - if ((x1 <= Parea2->x2) && (x2 >= Parea2->x1) && - (y1 <= Parea2->y2) && (y2 >= Parea2->y1)) + while (Parea2 != Pinner_list->Ptail) { - insert_area_after(Pdst_list, Pdst_list->Phead, - math::maximum(x1, Parea2->x1), - math::maximum(y1, Parea2->y1), - math::minimum(x2, Parea2->x2), - math::minimum(y2, Parea2->y2)); + if ((x1 <= Parea2->x2) && (x2 >= Parea2->x1) && (y1 <= Parea2->y2) && (y2 >= Parea2->y1)) + { + insert_area_after(Pdst_list, Pdst_list->Phead, + math::maximum(x1, Parea2->x1), math::maximum(y1, Parea2->y1), + math::minimum(x2, Parea2->x2), math::minimum(y2, Parea2->y2)); + } + + Parea2 = Parea2->Pnext; } - Parea2 = Parea2->Pnext; - } - - Parea1 = Parea1->Pnext; - } - } + Parea1 = Parea1->Pnext; + } + } #endif -#if 1 -void Area_List_intersect_Area_List(Area_List* Pouter_list, - Area_List* Pinner_list, - Area_List* Pdst_list) { - Area* Parea1 = Pouter_list->Phead->Pnext; - - while (Parea1 != Pouter_list->Ptail) { - Area* Parea2 = Pinner_list->Phead->Pnext; - int x1, y1, x2, y2; - - x1 = Parea1->x1; - x2 = Parea1->x2; - y1 = Parea1->y1; - y2 = Parea1->y2; - - while (Parea2 != Pinner_list->Ptail) { - if ((x1 <= Parea2->x2) && (x2 >= Parea2->x1) && - (y1 <= Parea2->y2) && (y2 >= Parea2->y1)) { - int nx1, ny1, nx2, ny2; - - nx1 = math::maximum(x1, Parea2->x1); - ny1 = math::maximum(y1, Parea2->y1); - nx2 = math::minimum(x2, Parea2->x2); - ny2 = math::minimum(y2, Parea2->y2); - - if (Pdst_list->Phead->Pnext == Pdst_list->Ptail) { - insert_area_after(Pdst_list, Pdst_list->Phead, - nx1, ny1, nx2, ny2); - } else { - Area_Ptr Ptemp = Pdst_list->Phead->Pnext; - if ((Ptemp->x1 == nx1) && (Ptemp->x2 == nx2)) { - if (Ptemp->y1 == (ny2 + 1)) { - Ptemp->y1 = ny1; - goto next; - } else if (Ptemp->y2 == (ny1 - 1)) { - Ptemp->y2 = ny2; - goto next; - } - } else if ((Ptemp->y1 == ny1) && (Ptemp->y2 == ny2)) { - if (Ptemp->x1 == (nx2 + 1)) { - Ptemp->x1 = nx1; - goto next; - } else if (Ptemp->x2 == (nx1 - 1)) { - Ptemp->x2 = nx2; - goto next; - } - } + void Area_List_intersect_Area_List(Area_List* Pouter_list, Area_List* Pinner_list, Area_List* Pdst_list) + { + Area* Parea1 = Pouter_list->Phead->Pnext; - insert_area_after(Pdst_list, Pdst_list->Phead, - nx1, ny1, nx2, ny2); - } - } + while (Parea1 != Pouter_list->Ptail) + { + Area* Parea2 = Pinner_list->Phead->Pnext; + int x1, y1, x2, y2; - next: + x1 = Parea1->x1; + x2 = Parea1->x2; + y1 = Parea1->y1; + y2 = Parea1->y2; - Parea2 = Parea2->Pnext; - } + while (Parea2 != Pinner_list->Ptail) + { + if ((x1 <= Parea2->x2) && (x2 >= Parea2->x1) && (y1 <= Parea2->y2) && (y2 >= Parea2->y1)) + { + int nx1, ny1, nx2, ny2; + + nx1 = math::maximum(x1, Parea2->x1); + ny1 = math::maximum(y1, Parea2->y1); + nx2 = math::minimum(x2, Parea2->x2); + ny2 = math::minimum(y2, Parea2->y2); + + if (Pdst_list->Phead->Pnext == Pdst_list->Ptail) + { + insert_area_after(Pdst_list, Pdst_list->Phead, nx1, ny1, nx2, ny2); + } + else + { + Area_Ptr Ptemp = Pdst_list->Phead->Pnext; + if ((Ptemp->x1 == nx1) && (Ptemp->x2 == nx2)) + { + if (Ptemp->y1 == (ny2 + 1)) + { + Ptemp->y1 = ny1; + goto next; + } + else if (Ptemp->y2 == (ny1 - 1)) + { + Ptemp->y2 = ny2; + goto next; + } + } + else if ((Ptemp->y1 == ny1) && (Ptemp->y2 == ny2)) + { + if (Ptemp->x1 == (nx2 + 1)) + { + Ptemp->x1 = nx1; + goto next; + } + else if (Ptemp->x2 == (nx1 - 1)) + { + Ptemp->x2 = nx2; + goto next; + } + } + + insert_area_after(Pdst_list, Pdst_list->Phead, nx1, ny1, nx2, ny2); + } + } + + next: + + Parea2 = Parea2->Pnext; + } - Parea1 = Parea1->Pnext; - } -} -#endif + Parea1 = Parea1->Pnext; + } + } -Area_List_Ptr Area_List_create_optimal(Area_List_Ptr Plist) { - Area_Ptr Parea = Plist->Phead->Pnext, Parea_after; - int num = 2; - Area_List_Ptr Pnew_list; + Area_List_Ptr Area_List_create_optimal(Area_List_Ptr Plist) + { + Area_Ptr Parea = Plist->Phead->Pnext, Parea_after; + int num = 2; + Area_List_Ptr Pnew_list; - while (Parea != Plist->Ptail) { - num++; - Parea = Parea->Pnext; - } + while (Parea != Plist->Ptail) + { + num++; + Parea = Parea->Pnext; + } - Pnew_list = Area_List_init(num); + Pnew_list = Area_List_init(num); - Parea = Plist->Phead->Pnext; + Parea = Plist->Phead->Pnext; - Parea_after = Pnew_list->Phead; + Parea_after = Pnew_list->Phead; - while (Parea != Plist->Ptail) { - Parea_after = insert_area_after(Pnew_list, Parea_after, - Parea->x1, Parea->y1, - Parea->x2, Parea->y2); + while (Parea != Plist->Ptail) + { + Parea_after = insert_area_after(Pnew_list, Parea_after, Parea->x1, Parea->y1, Parea->x2, Parea->y2); - Parea = Parea->Pnext; - } + Parea = Parea->Pnext; + } - return (Pnew_list); -} + return Pnew_list; + } } // namespace crnlib diff --git a/crnlib/crn_arealist.h b/crnlib/crn_arealist.h index 3950cd2..317d7b9 100644 --- a/crnlib/crn_arealist.h +++ b/crnlib/crn_arealist.h @@ -4,70 +4,69 @@ #include "crn_export.h" -namespace crnlib { -struct Area { - struct Area *Pprev, *Pnext; +namespace crnlib +{ + struct Area { + struct Area *Pprev, *Pnext; - int x1, y1, x2, y2; + int x1, y1, x2, y2; - uint get_width() const { return x2 - x1 + 1; } - uint get_height() const { return y2 - y1 + 1; } - uint get_area() const { return get_width() * get_height(); } -}; + uint get_width() const + { + return x2 - x1 + 1; + } + uint get_height() const + { + return y2 - y1 + 1; + } + uint get_area() const + { + return get_width() * get_height(); + } + }; -typedef Area* Area_Ptr; + typedef Area* Area_Ptr; -struct Area_List { - int total_areas; - int next_free; + struct Area_List + { + int total_areas; + int next_free; - Area *Phead, *Ptail, *Pfree; -}; + Area *Phead, *Ptail, *Pfree; + }; -typedef Area_List* Area_List_Ptr; + typedef Area_List* Area_List_Ptr; -CRN_EXPORT Area_List* Area_List_init(int max_areas); -CRN_EXPORT void Area_List_deinit(Area_List* Pobj_base); + CRN_EXPORT Area_List* Area_List_init(int max_areas); + CRN_EXPORT void Area_List_deinit(Area_List* Pobj_base); -CRN_EXPORT void Area_List_print(Area_List* Plist); + CRN_EXPORT void Area_List_print(Area_List* Plist); -CRN_EXPORT Area_List* Area_List_dup_new(Area_List* Plist, - int x_ofs, int y_ofs); + CRN_EXPORT Area_List* Area_List_dup_new(Area_List* Plist, int x_ofs, int y_ofs); -CRN_EXPORT uint Area_List_get_num(Area_List* Plist); + CRN_EXPORT uint Area_List_get_num(Area_List* Plist); -// src and dst area lists must have the same number of total areas. -CRN_EXPORT void Area_List_dup(Area_List* Psrc_list, - Area_List* Pdst_list, - int x_ofs, int y_ofs); + // src and dst area lists must have the same number of total areas. + CRN_EXPORT void Area_List_dup(Area_List* Psrc_list, Area_List* Pdst_list, int x_ofs, int y_ofs); -CRN_EXPORT void Area_List_copy(Area_List* Psrc_list, - Area_List* Pdst_list, - int x_ofs, int y_ofs); + CRN_EXPORT void Area_List_copy(Area_List* Psrc_list, Area_List* Pdst_list, int x_ofs, int y_ofs); -CRN_EXPORT void Area_List_clear(Area_List* Plist); + CRN_EXPORT void Area_List_clear(Area_List* Plist); -CRN_EXPORT void Area_List_set(Area_List* Plist, - int x1, int y1, int x2, int y2); + CRN_EXPORT void Area_List_set(Area_List* Plist, int x1, int y1, int x2, int y2); -// logical: x and (not y) -CRN_EXPORT void Area_List_remove(Area_List* Plist, - int x1, int y1, int x2, int y2); + // logical: x and (not y) + CRN_EXPORT void Area_List_remove(Area_List* Plist, int x1, int y1, int x2, int y2); -// logical: x or y -CRN_EXPORT void Area_List_insert(Area_List* Plist, - int x1, int y1, int x2, int y2, - bool combine); + // logical: x or y + CRN_EXPORT void Area_List_insert(Area_List* Plist, int x1, int y1, int x2, int y2, bool combine); -// logical: x and y -CRN_EXPORT void Area_List_intersect_area(Area_List* Plist, - int x1, int y1, int x2, int y2); + // logical: x and y + CRN_EXPORT void Area_List_intersect_area(Area_List* Plist, int x1, int y1, int x2, int y2); -// logical: x and y -CRN_EXPORT void Area_List_intersect_Area_List(Area_List* Pouter_list, - Area_List* Pinner_list, - Area_List* Pdst_list); + // logical: x and y + CRN_EXPORT void Area_List_intersect_Area_List(Area_List* Pouter_list, Area_List* Pinner_list, Area_List* Pdst_list); -CRN_EXPORT Area_List_Ptr Area_List_create_optimal(Area_List_Ptr Plist); + CRN_EXPORT Area_List_Ptr Area_List_create_optimal(Area_List_Ptr Plist); -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_assert.cpp b/crnlib/crn_assert.cpp index 4fe6b51..91d0500 100644 --- a/crnlib/crn_assert.cpp +++ b/crnlib/crn_assert.cpp @@ -1,6 +1,8 @@ // File: crn_assert.cpp // See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" + #if CRNLIB_USE_WIN32_API #include "crn_winhdr.h" #endif @@ -8,56 +10,72 @@ static bool g_fail_exceptions; static bool g_exit_on_failure = true; -void crnlib_enable_fail_exceptions(bool enabled) { - g_fail_exceptions = enabled; +void crnlib_enable_fail_exceptions(bool enabled) +{ + g_fail_exceptions = enabled; } -void crnlib_assert(const char* pExp, const char* pFile, unsigned line) { - char buf[512]; +void crnlib_assert(const char* pExp, const char* pFile, unsigned line) +{ + char buf[512]; - sprintf_s(buf, sizeof(buf), "%s(%u): Assertion failed: \"%s\"\n", pFile, line, pExp); + sprintf_s(buf, sizeof(buf), "%s(%u): Assertion failed: \"%s\"\n", pFile, line, pExp); - crnlib_output_debug_string(buf); + crnlib_output_debug_string(buf); - fputs(buf, stderr); + fputs(buf, stderr); - if (crnlib_is_debugger_present()) - crnlib_debug_break(); + if (crnlib_is_debugger_present()) + { + crnlib_debug_break(); + } } -void crnlib_fail(const char* pExp, const char* pFile, unsigned line) { - char buf[512]; +void crnlib_fail(const char* pExp, const char* pFile, unsigned line) +{ + char buf[512]; - sprintf_s(buf, sizeof(buf), "%s(%u): Failure: \"%s\"\n", pFile, line, pExp); + sprintf_s(buf, sizeof(buf), "%s(%u): Failure: \"%s\"\n", pFile, line, pExp); - crnlib_output_debug_string(buf); + crnlib_output_debug_string(buf); - fputs(buf, stderr); + fputs(buf, stderr); - if (crnlib_is_debugger_present()) - crnlib_debug_break(); + if (crnlib_is_debugger_present()) + { + crnlib_debug_break(); + } #if CRNLIB_USE_WIN32_API - if (g_fail_exceptions) - RaiseException(CRNLIB_FAIL_EXCEPTION_CODE, 0, 0, NULL); - else + if (g_fail_exceptions) + { + RaiseException(CRNLIB_FAIL_EXCEPTION_CODE, 0, 0, NULL); + } + else #endif - if (g_exit_on_failure) - exit(EXIT_FAILURE); + { + if (g_exit_on_failure) + { + exit(EXIT_FAILURE); + } + } } -void trace(const char* pFmt, va_list args) { - if (crnlib_is_debugger_present()) { - char buf[512]; - vsprintf_s(buf, sizeof(buf), pFmt, args); +void trace(const char* pFmt, va_list args) +{ + if (crnlib_is_debugger_present()) + { + char buf[512]; + vsprintf_s(buf, sizeof(buf), pFmt, args); - crnlib_output_debug_string(buf); - } + crnlib_output_debug_string(buf); + } }; -void trace(const char* pFmt, ...) { - va_list args; - va_start(args, pFmt); - trace(pFmt, args); - va_end(args); +void trace(const char* pFmt, ...) +{ + va_list args; + va_start(args, pFmt); + trace(pFmt, args); + va_end(args); }; diff --git a/crnlib/crn_checksum.cpp b/crnlib/crn_checksum.cpp index e5efad4..fe774a1 100644 --- a/crnlib/crn_checksum.cpp +++ b/crnlib/crn_checksum.cpp @@ -1,58 +1,66 @@ // File: crn_checksum.cpp #include "crn_core.h" -namespace crnlib { -// From the public domain stb.h header. -uint adler32(const void* pBuf, size_t buflen, uint adler32) { - const uint8* buffer = static_cast(pBuf); - - const unsigned long ADLER_MOD = 65521; - unsigned long s1 = adler32 & 0xffff, s2 = adler32 >> 16; - size_t blocklen; - unsigned long i; - - blocklen = buflen % 5552; - while (buflen) { - for (i = 0; i + 7 < blocklen; i += 8) { - s1 += buffer[0], s2 += s1; - s1 += buffer[1], s2 += s1; - s1 += buffer[2], s2 += s1; - s1 += buffer[3], s2 += s1; - s1 += buffer[4], s2 += s1; - s1 += buffer[5], s2 += s1; - s1 += buffer[6], s2 += s1; - s1 += buffer[7], s2 += s1; - - buffer += 8; +namespace crnlib +{ + // From the public domain stb.h header. + uint adler32(const void* pBuf, size_t buflen, uint adler32) + { + const uint8* buffer = static_cast(pBuf); + + const unsigned long ADLER_MOD = 65521; + unsigned long s1 = adler32 & 0xffff, s2 = adler32 >> 16; + size_t blocklen; + unsigned long i; + + blocklen = buflen % 5552; + while (buflen) + { + for (i = 0; i + 7 < blocklen; i += 8) + { + s1 += buffer[0], s2 += s1; + s1 += buffer[1], s2 += s1; + s1 += buffer[2], s2 += s1; + s1 += buffer[3], s2 += s1; + s1 += buffer[4], s2 += s1; + s1 += buffer[5], s2 += s1; + s1 += buffer[6], s2 += s1; + s1 += buffer[7], s2 += s1; + + buffer += 8; + } + + for (; i < blocklen; ++i) + { + s1 += *buffer++, s2 += s1; + } + + s1 %= ADLER_MOD, s2 %= ADLER_MOD; + buflen -= blocklen; + blocklen = 5552; + } + return (s2 << 16) + s1; } - for (; i < blocklen; ++i) - s1 += *buffer++, s2 += s1; - - s1 %= ADLER_MOD, s2 %= ADLER_MOD; - buflen -= blocklen; - blocklen = 5552; - } - return (s2 << 16) + s1; -} - -uint16 crc16(const void* pBuf, size_t len, uint16 crc) { - crc = ~crc; - - const uint8* p = reinterpret_cast(pBuf); - while (len) { - const uint16 q = *p++ ^ (crc >> 8); - crc <<= 8U; - uint16 r = (q >> 4) ^ q; - crc ^= r; - r <<= 5U; - crc ^= r; - r <<= 7U; - crc ^= r; - len--; - } - - return static_cast(~crc); -} + uint16 crc16(const void* pBuf, size_t len, uint16 crc) + { + crc = ~crc; + + const uint8* p = reinterpret_cast(pBuf); + while (len) + { + const uint16 q = *p++ ^ (crc >> 8); + crc <<= 8U; + uint16 r = (q >> 4) ^ q; + crc ^= r; + r <<= 5U; + crc ^= r; + r <<= 7U; + crc ^= r; + len--; + } + + return static_cast(~crc); + } } // namespace crnlib diff --git a/crnlib/crn_checksum.h b/crnlib/crn_checksum.h index dcca475..9179284 100644 --- a/crnlib/crn_checksum.h +++ b/crnlib/crn_checksum.h @@ -3,12 +3,13 @@ #include "crn_export.h" -namespace crnlib { -const uint cInitAdler32 = 1U; -CRN_EXPORT uint adler32(const void* pBuf, size_t buflen, uint adler32 = cInitAdler32); - -// crc16() intended for small buffers - doesn't use an acceleration table. -const uint cInitCRC16 = 0; -CRN_EXPORT uint16 crc16(const void* pBuf, size_t len, uint16 crc = cInitCRC16); +namespace crnlib +{ + const uint cInitAdler32 = 1U; + CRN_EXPORT uint adler32(const void* pBuf, size_t buflen, uint adler32 = cInitAdler32); + + // crc16() intended for small buffers - doesn't use an acceleration table. + const uint cInitCRC16 = 0; + CRN_EXPORT uint16 crc16(const void* pBuf, size_t len, uint16 crc = cInitCRC16); } // namespace crnlib diff --git a/crnlib/crn_console.cpp b/crnlib/crn_console.cpp index 8a40f4a..f13d445 100644 --- a/crnlib/crn_console.cpp +++ b/crnlib/crn_console.cpp @@ -5,196 +5,231 @@ #include "crn_data_stream.h" #include "crn_threading.h" -namespace crnlib { -eConsoleMessageType console::m_default_category = cInfoConsoleMessage; -crnlib::vector console::m_output_funcs; -bool console::m_crlf = true; -bool console::m_prefixes = true; -bool console::m_output_disabled; -data_stream* console::m_pLog_stream; -mutex* console::m_pMutex; -uint console::m_num_messages[cCMTTotal]; -bool console::m_at_beginning_of_line = true; - -const uint cConsoleBufSize = 4096; - -void console::init() { - if (!m_pMutex) { - m_pMutex = crnlib_new(); - } -} - -void console::deinit() { - if (m_pMutex) { - crnlib_delete(m_pMutex); - m_pMutex = NULL; - } -} - -void console::disable_crlf() { - init(); - - m_crlf = false; -} - -void console::enable_crlf() { - init(); - - m_crlf = true; -} +namespace crnlib +{ + eConsoleMessageType console::m_default_category = cInfoConsoleMessage; + crnlib::vector console::m_output_funcs; + bool console::m_crlf = true; + bool console::m_prefixes = true; + bool console::m_output_disabled; + data_stream* console::m_pLog_stream; + mutex* console::m_pMutex; + uint console::m_num_messages[cCMTTotal]; + bool console::m_at_beginning_of_line = true; + + const uint cConsoleBufSize = 4096; + + void console::init() + { + if (!m_pMutex) + { + m_pMutex = crnlib_new(); + } + } + + void console::deinit() + { + if (m_pMutex) + { + crnlib_delete(m_pMutex); + m_pMutex = NULL; + } + } + + void console::disable_crlf() + { + init(); + + m_crlf = false; + } + + void console::enable_crlf() + { + init(); + + m_crlf = true; + } + + void console::vprintf(eConsoleMessageType type, const char* p, va_list args) + { + init(); + + scoped_mutex lock(*m_pMutex); + + m_num_messages[type]++; + + char buf[cConsoleBufSize]; + vsprintf_s(buf, cConsoleBufSize, p, args); + + bool handled = false; + + if (m_output_funcs.size()) + { + for (uint i = 0; i < m_output_funcs.size(); i++) + { + if (m_output_funcs[i].m_func(type, buf, m_output_funcs[i].m_pData)) + { + handled = true; + } + } + } + + const char* pPrefix = NULL; + if ((m_prefixes) && (m_at_beginning_of_line)) + { + switch (type) + { + case cDebugConsoleMessage: + pPrefix = "Debug: "; + break; + case cWarningConsoleMessage: + pPrefix = "Warning: "; + break; + case cErrorConsoleMessage: + pPrefix = "Error: "; + break; + default: + break; + } + } + + if ((!m_output_disabled) && (!handled)) + { + if (pPrefix) + { + ::printf("%s", pPrefix); + } + ::printf(m_crlf ? "%s\n" : "%s", buf); + } + + uint n = strlen(buf); + m_at_beginning_of_line = (m_crlf) || ((n) && (buf[n - 1] == '\n')); + + if ((type != cProgressConsoleMessage) && (m_pLog_stream)) + { + // Yes this is bad. + dynamic_string tmp_buf(buf); + + tmp_buf.translate_lf_to_crlf(); + + m_pLog_stream->printf(m_crlf ? "%s\r\n" : "%s", tmp_buf.get_ptr()); + m_pLog_stream->flush(); + } + } + + void console::printf(eConsoleMessageType type, const char* p, ...) + { + va_list args; + va_start(args, p); + vprintf(type, p, args); + va_end(args); + } + + void console::printf(const char* p, ...) + { + va_list args; + va_start(args, p); + vprintf(m_default_category, p, args); + va_end(args); + } + + void console::set_default_category(eConsoleMessageType category) + { + init(); + + m_default_category = category; + } + + eConsoleMessageType console::get_default_category() + { + init(); + + return m_default_category; + } -void console::vprintf(eConsoleMessageType type, const char* p, va_list args) { - init(); - - scoped_mutex lock(*m_pMutex); + void console::add_console_output_func(console_output_func pFunc, void* pData) + { + init(); - m_num_messages[type]++; - - char buf[cConsoleBufSize]; - vsprintf_s(buf, cConsoleBufSize, p, args); - - bool handled = false; - - if (m_output_funcs.size()) { - for (uint i = 0; i < m_output_funcs.size(); i++) - if (m_output_funcs[i].m_func(type, buf, m_output_funcs[i].m_pData)) - handled = true; - } - - const char* pPrefix = NULL; - if ((m_prefixes) && (m_at_beginning_of_line)) { - switch (type) { - case cDebugConsoleMessage: - pPrefix = "Debug: "; - break; - case cWarningConsoleMessage: - pPrefix = "Warning: "; - break; - case cErrorConsoleMessage: - pPrefix = "Error: "; - break; - default: - break; - } - } - - if ((!m_output_disabled) && (!handled)) { - if (pPrefix) - ::printf("%s", pPrefix); - ::printf(m_crlf ? "%s\n" : "%s", buf); - } - - uint n = strlen(buf); - m_at_beginning_of_line = (m_crlf) || ((n) && (buf[n - 1] == '\n')); - - if ((type != cProgressConsoleMessage) && (m_pLog_stream)) { - // Yes this is bad. - dynamic_string tmp_buf(buf); - - tmp_buf.translate_lf_to_crlf(); - - m_pLog_stream->printf(m_crlf ? "%s\r\n" : "%s", tmp_buf.get_ptr()); - m_pLog_stream->flush(); - } -} - -void console::printf(eConsoleMessageType type, const char* p, ...) { - va_list args; - va_start(args, p); - vprintf(type, p, args); - va_end(args); -} - -void console::printf(const char* p, ...) { - va_list args; - va_start(args, p); - vprintf(m_default_category, p, args); - va_end(args); -} - -void console::set_default_category(eConsoleMessageType category) { - init(); - - m_default_category = category; -} - -eConsoleMessageType console::get_default_category() { - init(); - - return m_default_category; -} - -void console::add_console_output_func(console_output_func pFunc, void* pData) { - init(); - - scoped_mutex lock(*m_pMutex); - - m_output_funcs.push_back(console_func(pFunc, pData)); -} - -void console::remove_console_output_func(console_output_func pFunc) { - init(); - - scoped_mutex lock(*m_pMutex); - - for (int i = m_output_funcs.size() - 1; i >= 0; i--) { - if (m_output_funcs[i].m_func == pFunc) { - m_output_funcs.erase(m_output_funcs.begin() + i); - } - } - - if (!m_output_funcs.size()) { - m_output_funcs.clear(); - } -} - -void console::progress(const char* p, ...) { - va_list args; - va_start(args, p); - vprintf(cProgressConsoleMessage, p, args); - va_end(args); -} - -void console::info(const char* p, ...) { - va_list args; - va_start(args, p); - vprintf(cInfoConsoleMessage, p, args); - va_end(args); -} - -void console::message(const char* p, ...) { - va_list args; - va_start(args, p); - vprintf(cMessageConsoleMessage, p, args); - va_end(args); -} - -void console::cons(const char* p, ...) { - va_list args; - va_start(args, p); - vprintf(cConsoleConsoleMessage, p, args); - va_end(args); -} - -void console::debug(const char* p, ...) { - va_list args; - va_start(args, p); - vprintf(cDebugConsoleMessage, p, args); - va_end(args); -} - -void console::warning(const char* p, ...) { - va_list args; - va_start(args, p); - vprintf(cWarningConsoleMessage, p, args); - va_end(args); -} - -void console::error(const char* p, ...) { - va_list args; - va_start(args, p); - vprintf(cErrorConsoleMessage, p, args); - va_end(args); -} + scoped_mutex lock(*m_pMutex); + + m_output_funcs.push_back(console_func(pFunc, pData)); + } + + void console::remove_console_output_func(console_output_func pFunc) + { + init(); + + scoped_mutex lock(*m_pMutex); + + for (int i = m_output_funcs.size() - 1; i >= 0; i--) + { + if (m_output_funcs[i].m_func == pFunc) + { + m_output_funcs.erase(m_output_funcs.begin() + i); + } + } + + if (!m_output_funcs.size()) + { + m_output_funcs.clear(); + } + } + + void console::progress(const char* p, ...) + { + va_list args; + va_start(args, p); + vprintf(cProgressConsoleMessage, p, args); + va_end(args); + } + + void console::info(const char* p, ...) + { + va_list args; + va_start(args, p); + vprintf(cInfoConsoleMessage, p, args); + va_end(args); + } + + void console::message(const char* p, ...) + { + va_list args; + va_start(args, p); + vprintf(cMessageConsoleMessage, p, args); + va_end(args); + } + + void console::cons(const char* p, ...) + { + va_list args; + va_start(args, p); + vprintf(cConsoleConsoleMessage, p, args); + va_end(args); + } + + void console::debug(const char* p, ...) + { + va_list args; + va_start(args, p); + vprintf(cDebugConsoleMessage, p, args); + va_end(args); + } + + void console::warning(const char* p, ...) + { + va_list args; + va_start(args, p); + vprintf(cWarningConsoleMessage, p, args); + va_end(args); + } + + void console::error(const char* p, ...) + { + va_list args; + va_start(args, p); + vprintf(cErrorConsoleMessage, p, args); + va_end(args); + } } // namespace crnlib diff --git a/crnlib/crn_console.h b/crnlib/crn_console.h index e10655e..1c811fb 100644 --- a/crnlib/crn_console.h +++ b/crnlib/crn_console.h @@ -1,6 +1,9 @@ // File: crn_console.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once + +#include "crn_core.h" #include "crn_dynamic_string.h" #include "crn_export.h" @@ -8,115 +11,130 @@ #include #include #endif -namespace crnlib { -class dynamic_string; -class data_stream; -class mutex; -enum eConsoleMessageType { - cDebugConsoleMessage, // debugging messages - cProgressConsoleMessage, // progress messages - cInfoConsoleMessage, // ordinary messages - cConsoleConsoleMessage, // user console output - cMessageConsoleMessage, // high importance messages - cWarningConsoleMessage, // warnings - cErrorConsoleMessage, // errors +#if defined(CRN_CC_GNU) +#include +#include +#endif + +namespace crnlib +{ + class dynamic_string; + class data_stream; + class mutex; + + enum eConsoleMessageType + { + cDebugConsoleMessage, // debugging messages + cProgressConsoleMessage, // progress messages + cInfoConsoleMessage, // ordinary messages + cConsoleConsoleMessage, // user console output + cMessageConsoleMessage, // high importance messages + cWarningConsoleMessage, // warnings + cErrorConsoleMessage, // errors + + cCMTTotal, + }; - cCMTTotal -}; + typedef bool (*console_output_func)(eConsoleMessageType type, const char* pMsg, void* pData); -typedef bool (*console_output_func)(eConsoleMessageType type, const char* pMsg, void* pData); + class console + { + public: + CRN_EXPORT static void init(); + CRN_EXPORT static void deinit(); -class console { - public: - CRN_EXPORT static void init(); - CRN_EXPORT static void deinit(); + static bool is_initialized() { return m_pMutex != NULL; } - static bool is_initialized() { return m_pMutex != NULL; } + CRN_EXPORT static void set_default_category(eConsoleMessageType category); + CRN_EXPORT static eConsoleMessageType get_default_category(); - CRN_EXPORT static void set_default_category(eConsoleMessageType category); - CRN_EXPORT static eConsoleMessageType get_default_category(); + CRN_EXPORT static void add_console_output_func(console_output_func pFunc, void* pData); + CRN_EXPORT static void remove_console_output_func(console_output_func pFunc); - CRN_EXPORT static void add_console_output_func(console_output_func pFunc, void* pData); - CRN_EXPORT static void remove_console_output_func(console_output_func pFunc); + CRN_EXPORT static void printf(const char* p, ...); - CRN_EXPORT static void printf(const char* p, ...); + CRN_EXPORT static void vprintf(eConsoleMessageType type, const char* p, va_list args); + CRN_EXPORT static void printf(eConsoleMessageType type, const char* p, ...); - CRN_EXPORT static void vprintf(eConsoleMessageType type, const char* p, va_list args); - CRN_EXPORT static void printf(eConsoleMessageType type, const char* p, ...); + CRN_EXPORT static void cons(const char* p, ...); + CRN_EXPORT static void debug(const char* p, ...); + CRN_EXPORT static void progress(const char* p, ...); + CRN_EXPORT static void info(const char* p, ...); + CRN_EXPORT static void message(const char* p, ...); + CRN_EXPORT static void warning(const char* p, ...); + CRN_EXPORT static void error(const char* p, ...); - CRN_EXPORT static void cons(const char* p, ...); - CRN_EXPORT static void debug(const char* p, ...); - CRN_EXPORT static void progress(const char* p, ...); - CRN_EXPORT static void info(const char* p, ...); - CRN_EXPORT static void message(const char* p, ...); - CRN_EXPORT static void warning(const char* p, ...); - CRN_EXPORT static void error(const char* p, ...); + // FIXME: All console state is currently global! + CRN_EXPORT static void disable_prefixes(); + CRN_EXPORT static void enable_prefixes(); + static bool get_prefixes() { return m_prefixes; } + static bool get_at_beginning_of_line() { return m_at_beginning_of_line; } - // FIXME: All console state is currently global! - CRN_EXPORT static void disable_prefixes(); - CRN_EXPORT static void enable_prefixes(); - static bool get_prefixes() { return m_prefixes; } - static bool get_at_beginning_of_line() { return m_at_beginning_of_line; } + CRN_EXPORT static void disable_crlf(); + CRN_EXPORT static void enable_crlf(); + static bool get_crlf() { return m_crlf; } - CRN_EXPORT static void disable_crlf(); - CRN_EXPORT static void enable_crlf(); - static bool get_crlf() { return m_crlf; } + static void disable_output() { m_output_disabled = true; } + static void enable_output() { m_output_disabled = false; } + static bool get_output_disabled() { return m_output_disabled; } - static void disable_output() { m_output_disabled = true; } - static void enable_output() { m_output_disabled = false; } - static bool get_output_disabled() { return m_output_disabled; } + static void set_log_stream(data_stream* pStream) { m_pLog_stream = pStream; } + static data_stream* get_log_stream() { return m_pLog_stream; } - static void set_log_stream(data_stream* pStream) { m_pLog_stream = pStream; } - static data_stream* get_log_stream() { return m_pLog_stream; } + static uint get_num_messages(eConsoleMessageType type) { return m_num_messages[type]; } - static uint get_num_messages(eConsoleMessageType type) { return m_num_messages[type]; } + private: + static eConsoleMessageType m_default_category; - private: - static eConsoleMessageType m_default_category; + struct console_func + { + console_func(console_output_func func = NULL, void* pData = NULL): + m_func(func), + m_pData(pData) + { + } - struct console_func { - console_func(console_output_func func = NULL, void* pData = NULL) - : m_func(func), m_pData(pData) {} + console_output_func m_func; + void* m_pData; + }; - console_output_func m_func; - void* m_pData; - }; - CRN_EXPORT static crnlib::vector m_output_funcs; + CRN_EXPORT static crnlib::vector m_output_funcs; - CRN_EXPORT static bool m_crlf, m_prefixes, m_output_disabled; + CRN_EXPORT static bool m_crlf, m_prefixes, m_output_disabled; - CRN_EXPORT static data_stream* m_pLog_stream; + CRN_EXPORT static data_stream* m_pLog_stream; - CRN_EXPORT static mutex* m_pMutex; + CRN_EXPORT static mutex* m_pMutex; - CRN_EXPORT static uint m_num_messages[cCMTTotal]; + CRN_EXPORT static uint m_num_messages[cCMTTotal]; - CRN_EXPORT static bool m_at_beginning_of_line; -}; + CRN_EXPORT static bool m_at_beginning_of_line; + }; #if defined(WIN32) -inline int crn_getch() { - return _getch(); -} -#elif defined(__GNUC__) -#include -#include -inline int crn_getch() { - struct termios oldt, newt; - int ch; - tcgetattr(STDIN_FILENO, &oldt); - newt = oldt; - newt.c_lflag &= ~(ICANON | ECHO); - tcsetattr(STDIN_FILENO, TCSANOW, &newt); - ch = getchar(); - tcsetattr(STDIN_FILENO, TCSANOW, &oldt); - return ch; -} + inline int crn_getch() + { + return _getch(); + } +#elif defined(CRN_CC_GNU) + inline int crn_getch() + { + struct termios oldt, newt; + int ch; + tcgetattr(STDIN_FILENO, &oldt); + newt = oldt; + newt.c_lflag &= ~(ICANON | ECHO); + tcsetattr(STDIN_FILENO, TCSANOW, &newt); + ch = getchar(); + tcsetattr(STDIN_FILENO, TCSANOW, &oldt); + return ch; + } #else -inline int crn_getch() { - printf("crn_getch: Unimplemented"); - return 0; -} + inline int crn_getch() + { + printf("crn_getch: Unimplemented"); + return 0; + } #endif } // namespace crnlib diff --git a/crnlib/crn_rand.cpp b/crnlib/crn_rand.cpp index 9ea0575..753e16d 100644 --- a/crnlib/crn_rand.cpp +++ b/crnlib/crn_rand.cpp @@ -25,325 +25,384 @@ //#define rot(x,k) (((x)<<(k))|((x)>>(32-(k)))) #define rot(x, k) CRNLIB_ROTATE_LEFT(x, k) -namespace crnlib { -static const double cNorm = 1.0 / (double)0x100000000ULL; - -kiss99::kiss99() { - x = 123456789; - y = 362436000; - z = 521288629; - c = 7654321; -} - -void kiss99::seed(uint32 i, uint32 j, uint32 k) { - x = i; - y = j; - z = k; - c = 7654321; -} - -inline uint32 kiss99::next() { - x = 69069 * x + 12345; - - y ^= (y << 13); - y ^= (y >> 17); - y ^= (y << 5); - - uint64 t = c; - t += (698769069ULL * z); - c = static_cast(t >> 32); - z = static_cast(t); - - return (x + y + z); -} - -inline uint32 ranctx::next() { - uint32 e = a - rot(b, 27); - a = b ^ rot(c, 17); - b = c + d; - c = d + e; - d = e + a; - return d; -} - -void ranctx::seed(uint32 seed) { - a = 0xf1ea5eed, b = c = d = seed; - for (uint32 i = 0; i < 20; ++i) - next(); -} - -well512::well512() { - seed(0xDEADBE3F); -} - -void well512::seed(uint32 seed[well512::cStateSize]) { - memcpy(m_state, seed, sizeof(m_state)); - m_index = 0; -} - -void well512::seed(uint32 seed) { - uint32 jsr = utils::swap32(seed) ^ 0xAAC29377; - - for (uint i = 0; i < cStateSize; i++) { - SHR3; - seed = bitmix32c(seed); - - m_state[i] = seed ^ jsr; - } - m_index = 0; -} - -void well512::seed(uint32 seed1, uint32 seed2, uint32 seed3) { - uint32 jsr = seed2; - uint32 jcong = seed3; - - for (uint i = 0; i < cStateSize; i++) { - SHR3; - seed1 = bitmix32c(seed1); - CONG; - - m_state[i] = seed1 ^ jsr ^ jcong; - } - m_index = 0; -} - -inline uint32 well512::next() { - uint32 a, b, c, d; - a = m_state[m_index]; - c = m_state[(m_index + 13) & 15]; - b = a ^ c ^ (a << 16) ^ (c << 15); - c = m_state[(m_index + 9) & 15]; - c ^= (c >> 11); - a = m_state[m_index] = b ^ c; - d = a ^ ((a << 5) & 0xDA442D20UL); - m_index = (m_index + 15) & 15; - a = m_state[m_index]; - m_state[m_index] = a ^ b ^ d ^ (a << 2) ^ (b << 18) ^ (c << 28); - return m_state[m_index]; -} - -random::random() { - seed(12345, 65435, 34221); -} - -random::random(uint32 i) { - seed(i); -} - -void random::seed(uint32 i1, uint32 i2, uint32 i3) { - m_ranctx.seed(i1 ^ i2 ^ i3); - - m_kiss99.seed(i1, i2, i3); - - m_well512.seed(i1, i2, i3); - - for (uint i = 0; i < 100; i++) - urand32(); -} - -void random::seed(uint32 i) { - uint32 jsr = i; - SHR3; - SHR3; - uint32 jcong = utils::swap32(~jsr); - CONG; - CONG; - uint32 i1 = SHR3 ^ CONG; - uint32 i2 = SHR3 ^ CONG; - uint32 i3 = SHR3 + CONG; - seed(i1, i2, i3); -} - -uint32 random::urand32() { - return m_kiss99.next() ^ (m_ranctx.next() + m_well512.next()); -} - -uint64 random::urand64() { - uint64 result = urand32(); - result <<= 32ULL; - result |= urand32(); - return result; -} -uint32 random::fast_urand32() { - return m_well512.next(); -} - -uint32 random::bit() { - uint32 k = urand32(); - return (k ^ (k >> 6) ^ (k >> 10) ^ (k >> 30)) & 1; -} - -double random::drand(double l, double h) { - CRNLIB_ASSERT(l <= h); - if (l >= h) - return l; - - return math::clamp(l + (h - l) * (urand32() * cNorm), l, h); -} - -float random::frand(float l, float h) { - CRNLIB_ASSERT(l <= h); - if (l >= h) - return l; - - float r = static_cast(l + (h - l) * (urand32() * cNorm)); - - return math::clamp(r, l, h); -} - -int random::irand(int l, int h) { - CRNLIB_ASSERT(l < h); - if (l >= h) - return l; - - uint32 range = static_cast(h - l); - - uint32 rnd = urand32(); +namespace crnlib +{ + static const double cNorm = 1.0 / (double)0x100000000ULL; + + kiss99::kiss99() + { + x = 123456789; + y = 362436000; + z = 521288629; + c = 7654321; + } + + void kiss99::seed(uint32 i, uint32 j, uint32 k) + { + x = i; + y = j; + z = k; + c = 7654321; + } + + inline uint32 kiss99::next() + { + x = 69069 * x + 12345; + + y ^= (y << 13); + y ^= (y >> 17); + y ^= (y << 5); + + uint64 t = c; + t += (698769069ULL * z); + c = static_cast(t >> 32); + z = static_cast(t); + + return (x + y + z); + } + + inline uint32 ranctx::next() + { + uint32 e = a - rot(b, 27); + a = b ^ rot(c, 17); + b = c + d; + c = d + e; + d = e + a; + return d; + } + + void ranctx::seed(uint32 seed) + { + a = 0xf1ea5eed, b = c = d = seed; + for (uint32 i = 0; i < 20; ++i) + { + next(); + } + } + + well512::well512() + { + seed(0xDEADBE3F); + } + + void well512::seed(uint32 seed[well512::cStateSize]) + { + memcpy(m_state, seed, sizeof(m_state)); + m_index = 0; + } + + void well512::seed(uint32 seed) + { + uint32 jsr = utils::swap32(seed) ^ 0xAAC29377; + + for (uint i = 0; i < cStateSize; i++) + { + SHR3; + seed = bitmix32c(seed); + + m_state[i] = seed ^ jsr; + } + m_index = 0; + } + + void well512::seed(uint32 seed1, uint32 seed2, uint32 seed3) + { + uint32 jsr = seed2; + uint32 jcong = seed3; + + for (uint i = 0; i < cStateSize; i++) + { + SHR3; + seed1 = bitmix32c(seed1); + CONG; + + m_state[i] = seed1 ^ jsr ^ jcong; + } + m_index = 0; + } + + inline uint32 well512::next() + { + uint32 a, b, c, d; + a = m_state[m_index]; + c = m_state[(m_index + 13) & 15]; + b = a ^ c ^ (a << 16) ^ (c << 15); + c = m_state[(m_index + 9) & 15]; + c ^= (c >> 11); + a = m_state[m_index] = b ^ c; + d = a ^ ((a << 5) & 0xDA442D20UL); + m_index = (m_index + 15) & 15; + a = m_state[m_index]; + m_state[m_index] = a ^ b ^ d ^ (a << 2) ^ (b << 18) ^ (c << 28); + return m_state[m_index]; + } + + random::random() + { + seed(12345, 65435, 34221); + } + + random::random(uint32 i) + { + seed(i); + } + + void random::seed(uint32 i1, uint32 i2, uint32 i3) + { + m_ranctx.seed(i1 ^ i2 ^ i3); + + m_kiss99.seed(i1, i2, i3); + + m_well512.seed(i1, i2, i3); + + for (uint i = 0; i < 100; i++) + { + urand32(); + } + } + + void random::seed(uint32 i) + { + uint32 jsr = i; + SHR3; + SHR3; + uint32 jcong = utils::swap32(~jsr); + CONG; + CONG; + uint32 i1 = SHR3 ^ CONG; + uint32 i2 = SHR3 ^ CONG; + uint32 i3 = SHR3 + CONG; + seed(i1, i2, i3); + } + + uint32 random::urand32() + { + return m_kiss99.next() ^ (m_ranctx.next() + m_well512.next()); + } + + uint64 random::urand64() + { + uint64 result = urand32(); + result <<= 32ULL; + result |= urand32(); + return result; + } + uint32 random::fast_urand32() + { + return m_well512.next(); + } + + uint32 random::bit() + { + uint32 k = urand32(); + return (k ^ (k >> 6) ^ (k >> 10) ^ (k >> 30)) & 1; + } + + double random::drand(double l, double h) + { + CRNLIB_ASSERT(l <= h); + if (l >= h) + { + return l; + } + + return math::clamp(l + (h - l) * (urand32() * cNorm), l, h); + } + + float random::frand(float l, float h) + { + CRNLIB_ASSERT(l <= h); + if (l >= h) + { + return l; + } + + float r = static_cast(l + (h - l) * (urand32() * cNorm)); + + return math::clamp(r, l, h); + } + + int random::irand(int l, int h) + { + CRNLIB_ASSERT(l < h); + if (l >= h) + { + return l; + } + + uint32 range = static_cast(h - l); + + uint32 rnd = urand32(); #if defined(_M_IX86) && defined(_MSC_VER) - //uint32 rnd_range = static_cast(__emulu(range, rnd) >> 32U); - uint32 x[2]; - *reinterpret_cast(x) = __emulu(range, rnd); - uint32 rnd_range = x[1]; + //uint32 rnd_range = static_cast(__emulu(range, rnd) >> 32U); + uint32 x[2]; + *reinterpret_cast(x) = __emulu(range, rnd); + uint32 rnd_range = x[1]; #else - uint32 rnd_range = static_cast((((uint64)range) * ((uint64)rnd)) >> 32U); + uint32 rnd_range = static_cast((((uint64)range) * ((uint64)rnd)) >> 32U); #endif - int result = l + rnd_range; - CRNLIB_ASSERT((result >= l) && (result < h)); - return result; -} - -int random::irand_inclusive(int l, int h) { - CRNLIB_ASSERT(h < cINT32_MAX); - return irand(l, h + 1); -} - -/* - ALGORITHM 712, COLLECTED ALGORITHMS FROM ACM. - THIS WORK PUBLISHED IN TRANSACTIONS ON MATHEMATICAL SOFTWARE, - VOL. 18, NO. 4, DECEMBER, 1992, PP. 434-435. - The function returns a normally distributed pseudo-random number - with a given mean and standard devaiation. Calls are made to a - function subprogram which must return independent random - numbers uniform in the interval (0,1). - The algorithm uses the ratio of uniforms method of A.J. Kinderman - and J.F. Monahan augmented with quadratic bounding curves. - */ -double random::gaussian(double mean, double stddev) { - double q, u, v, x, y; - - /* - Generate P = (u,v) uniform in rect. enclosing acceptance region - Make sure that any random numbers <= 0 are rejected, since - gaussian() requires uniforms > 0, but RandomUniform() delivers >= 0. - */ - do { - u = drand(0, 1); - v = drand(0, 1); - if (u <= 0.0 || v <= 0.0) { - u = 1.0; - v = 1.0; - } - v = 1.7156 * (v - 0.5); - - /* Evaluate the quadratic form */ - x = u - 0.449871; - y = fabs(v) + 0.386595; - q = x * x + y * (0.19600 * y - 0.25472 * x); - - /* Accept P if inside inner ellipse */ - if (q < 0.27597) - break; - - /* Reject P if outside outer ellipse, or outside acceptance region */ - } while ((q > 0.27846) || (v * v > -4.0 * log(u) * u * u)); - - /* Return ratio of P's coordinates as the normal deviate */ - return (mean + stddev * v / u); -} - -void random::test() { -} - -fast_random::fast_random() - : jsr(0xABCD917A), - jcong(0x17F3DEAD) { -} - -fast_random::fast_random(const fast_random& other) - : jsr(other.jsr), jcong(other.jcong) { -} - -fast_random::fast_random(uint32 i) { - seed(i); -} - -fast_random& fast_random::operator=(const fast_random& other) { - jsr = other.jsr; - jcong = other.jcong; - return *this; -} - -void fast_random::seed(uint32 i) { - jsr = i; - SHR3; - SHR3; - jcong = (~i) ^ 0xDEADBEEF; - - SHR3; - CONG; -} - -uint32 fast_random::urand32() { - return SHR3 ^ CONG; -} - -uint64 fast_random::urand64() { - uint64 result = urand32(); - result <<= 32ULL; - result |= urand32(); - return result; -} -int fast_random::irand(int l, int h) { - CRNLIB_ASSERT(l < h); - if (l >= h) - return l; - - uint32 range = static_cast(h - l); - - uint32 rnd = urand32(); + int result = l + rnd_range; + CRNLIB_ASSERT((result >= l) && (result < h)); + return result; + } + + int random::irand_inclusive(int l, int h) + { + CRNLIB_ASSERT(h < cINT32_MAX); + return irand(l, h + 1); + } + + /* + ALGORITHM 712, COLLECTED ALGORITHMS FROM ACM. + THIS WORK PUBLISHED IN TRANSACTIONS ON MATHEMATICAL SOFTWARE, + VOL. 18, NO. 4, DECEMBER, 1992, PP. 434-435. + The function returns a normally distributed pseudo-random number + with a given mean and standard devaiation. Calls are made to a + function subprogram which must return independent random + numbers uniform in the interval (0,1). + The algorithm uses the ratio of uniforms method of A.J. Kinderman + and J.F. Monahan augmented with quadratic bounding curves. + */ + double random::gaussian(double mean, double stddev) + { + double q, u, v, x, y; + + /* + Generate P = (u,v) uniform in rect. enclosing acceptance region + Make sure that any random numbers <= 0 are rejected, since + gaussian() requires uniforms > 0, but RandomUniform() delivers >= 0. + */ + do + { + u = drand(0, 1); + v = drand(0, 1); + if (u <= 0.0 || v <= 0.0) + { + u = 1.0; + v = 1.0; + } + v = 1.7156 * (v - 0.5); + + /* Evaluate the quadratic form */ + x = u - 0.449871; + y = fabs(v) + 0.386595; + q = x * x + y * (0.19600 * y - 0.25472 * x); + + /* Accept P if inside inner ellipse */ + if (q < 0.27597) + { + break; + } + + /* Reject P if outside outer ellipse, or outside acceptance region */ + } + while ((q > 0.27846) || (v * v > -4.0 * log(u) * u * u)); + + /* Return ratio of P's coordinates as the normal deviate */ + return (mean + stddev * v / u); + } + + void random::test() + { + } + + fast_random::fast_random(): + jsr(0xABCD917A), + jcong(0x17F3DEAD) + { + } + + fast_random::fast_random(const fast_random& other): + jsr(other.jsr), + jcong(other.jcong) + { + } + + fast_random::fast_random(uint32 i) + { + seed(i); + } + + fast_random& fast_random::operator=(const fast_random& other) + { + jsr = other.jsr; + jcong = other.jcong; + return *this; + } + + void fast_random::seed(uint32 i) + { + jsr = i; + SHR3; + SHR3; + jcong = (~i) ^ 0xDEADBEEF; + + SHR3; + CONG; + } + + uint32 fast_random::urand32() + { + return SHR3 ^ CONG; + } + + uint64 fast_random::urand64() + { + uint64 result = urand32(); + result <<= 32ULL; + result |= urand32(); + return result; + } + int fast_random::irand(int l, int h) + { + CRNLIB_ASSERT(l < h); + if (l >= h) + { + return l; + } + + uint32 range = static_cast(h - l); + + uint32 rnd = urand32(); #if defined(_M_IX86) && defined(_MSC_VER) - //uint32 rnd_range = static_cast(__emulu(range, rnd) >> 32U); - uint32 x[2]; - *reinterpret_cast(x) = __emulu(range, rnd); - uint32 rnd_range = x[1]; + //uint32 rnd_range = static_cast(__emulu(range, rnd) >> 32U); + uint32 x[2]; + *reinterpret_cast(x) = __emulu(range, rnd); + uint32 rnd_range = x[1]; #else - uint32 rnd_range = static_cast((((uint64)range) * ((uint64)rnd)) >> 32U); + uint32 rnd_range = static_cast((((uint64)range) * ((uint64)rnd)) >> 32U); #endif - int result = l + rnd_range; - CRNLIB_ASSERT((result >= l) && (result < h)); - return result; -} + int result = l + rnd_range; + CRNLIB_ASSERT((result >= l) && (result < h)); + return result; + } -double fast_random::drand(double l, double h) { - CRNLIB_ASSERT(l <= h); - if (l >= h) - return l; + double fast_random::drand(double l, double h) + { + CRNLIB_ASSERT(l <= h); + if (l >= h) + { + return l; + } - return math::clamp(l + (h - l) * (urand32() * cNorm), l, h); -} + return math::clamp(l + (h - l) * (urand32() * cNorm), l, h); + } -float fast_random::frand(float l, float h) { - CRNLIB_ASSERT(l <= h); - if (l >= h) - return l; + float fast_random::frand(float l, float h) + { + CRNLIB_ASSERT(l <= h); + if (l >= h) + { + return l; + } - float r = static_cast(l + (h - l) * (urand32() * cNorm)); + float r = static_cast(l + (h - l) * (urand32() * cNorm)); - return math::clamp(r, l, h); -} + return math::clamp(r, l, h); + } } // namespace crnlib diff --git a/crnlib/crn_rand.h b/crnlib/crn_rand.h index 47129a5..7ea38fe 100644 --- a/crnlib/crn_rand.h +++ b/crnlib/crn_rand.h @@ -1,115 +1,125 @@ // File: crn_rand.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once #include "crn_export.h" -namespace crnlib { -class CRN_EXPORT kiss99 { - public: - kiss99(); +namespace crnlib +{ + class CRN_EXPORT kiss99 + { + public: + kiss99(); - void seed(uint32 i, uint32 j, uint32 k); + void seed(uint32 i, uint32 j, uint32 k); - inline uint32 next(); + inline uint32 next(); - private: - uint32 x; - uint32 y; - uint32 z; - uint32 c; -}; + private: + uint32 x; + uint32 y; + uint32 z; + uint32 c; + }; -class CRN_EXPORT well512 { - public: - well512(); + class CRN_EXPORT well512 + { + public: + well512(); - enum { cStateSize = 16 }; - void seed(uint32 seed[cStateSize]); - void seed(uint32 seed); - void seed(uint32 seed1, uint32 seed2, uint32 seed3); + enum { cStateSize = 16 }; + void seed(uint32 seed[cStateSize]); + void seed(uint32 seed); + void seed(uint32 seed1, uint32 seed2, uint32 seed3); - inline uint32 next(); + inline uint32 next(); - private: - uint32 m_state[cStateSize]; - uint32 m_index; -}; + private: + uint32 m_state[cStateSize]; + uint32 m_index; + }; -class CRN_EXPORT ranctx { - public: - ranctx() { seed(0xDE149737); } + class CRN_EXPORT ranctx + { + public: + ranctx() + { + seed(0xDE149737); + } - void seed(uint32 seed); + void seed(uint32 seed); - inline uint32 next(); + inline uint32 next(); - private: - uint32 a; - uint32 b; - uint32 c; - uint32 d; -}; + private: + uint32 a; + uint32 b; + uint32 c; + uint32 d; + }; -class CRN_EXPORT random { - public: - random(); - random(uint32 i); + class CRN_EXPORT random + { + public: + random(); + random(uint32 i); - void seed(uint32 i); - void seed(uint32 i1, uint32 i2, uint32 i3); + void seed(uint32 i); + void seed(uint32 i1, uint32 i2, uint32 i3); - uint32 urand32(); - uint64 urand64(); + uint32 urand32(); + uint64 urand64(); - // "Fast" variant uses no multiplies. - uint32 fast_urand32(); + // "Fast" variant uses no multiplies. + uint32 fast_urand32(); - uint32 bit(); + uint32 bit(); - // Returns random between [0, 1) - double drand(double l, double h); + // Returns random between [0, 1) + double drand(double l, double h); - float frand(float l, float h); + float frand(float l, float h); - // Returns random between [l, h) - int irand(int l, int h); + // Returns random between [l, h) + int irand(int l, int h); - // Returns random between [l, h] - int irand_inclusive(int l, int h); + // Returns random between [l, h] + int irand_inclusive(int l, int h); - double gaussian(double mean, double stddev); + double gaussian(double mean, double stddev); - void test(); + void test(); - private: - ranctx m_ranctx; - kiss99 m_kiss99; - well512 m_well512; -}; + private: + ranctx m_ranctx; + kiss99 m_kiss99; + well512 m_well512; + }; -// Simpler, minimal state PRNG -class CRN_EXPORT fast_random { - public: - fast_random(); - fast_random(uint32 i); - fast_random(const fast_random& other); - fast_random& operator=(const fast_random& other); + // Simpler, minimal state PRNG + class CRN_EXPORT fast_random + { + public: + fast_random(); + fast_random(uint32 i); + fast_random(const fast_random& other); + fast_random& operator=(const fast_random& other); - void seed(uint32 i); + void seed(uint32 i); - uint32 urand32(); - uint64 urand64(); + uint32 urand32(); + uint64 urand64(); - int irand(int l, int h); + int irand(int l, int h); - double drand(double l, double h); + double drand(double l, double h); - float frand(float l, float h); + float frand(float l, float h); - private: - uint32 jsr; - uint32 jcong; -}; + private: + uint32 jsr; + uint32 jcong; + }; } // namespace crnlib diff --git a/crnlib/crn_ray.h b/crnlib/crn_ray.h index 34c66ea..60112c4 100644 --- a/crnlib/crn_ray.h +++ b/crnlib/crn_ray.h @@ -1,48 +1,75 @@ // File: crn_ray.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once + #include "crn_vec.h" -namespace crnlib { -template -class ray { - public: - typedef vector_type vector_t; - typedef typename vector_type::scalar_type scalar_type; +namespace crnlib +{ + template + class ray + { + public: + typedef vector_type vector_t; + typedef typename vector_type::scalar_type scalar_type; - inline ray() {} - inline ray(eClear) { clear(); } - inline ray(const vector_type& origin, const vector_type& direction) - : m_origin(origin), m_direction(direction) {} + inline ray() + { + } + inline ray(eClear) + { + clear(); + } + inline ray(const vector_type& origin, const vector_type& direction): + m_origin(origin), + m_direction(direction) + { + } - inline void clear() { - m_origin.clear(); - m_direction.clear(); - } + inline void clear() + { + m_origin.clear(); + m_direction.clear(); + } - inline const vector_type& get_origin(void) const { return m_origin; } - inline void set_origin(const vector_type& origin) { m_origin = origin; } + inline const vector_type& get_origin(void) const + { + return m_origin; + } + inline void set_origin(const vector_type& origin) + { + m_origin = origin; + } - inline const vector_type& get_direction(void) const { return m_direction; } - inline void set_direction(const vector_type& direction) { m_direction = direction; } + inline const vector_type& get_direction(void) const + { + return m_direction; + } + inline void set_direction(const vector_type& direction) + { + m_direction = direction; + } - inline scalar_type set_endpoints(const vector_type& start, const vector_type& end, const vector_type& def) { - m_origin = start; + inline scalar_type set_endpoints(const vector_type& start, const vector_type& end, const vector_type& def) + { + m_origin = start; - m_direction = end - start; - return m_direction.normalize(&def); - } + m_direction = end - start; + return m_direction.normalize(&def); + } - inline vector_type eval(scalar_type t) const { - return m_origin + m_direction * t; - } + inline vector_type eval(scalar_type t) const + { + return m_origin + m_direction * t; + } - private: - vector_type m_origin; - vector_type m_direction; -}; + private: + vector_type m_origin; + vector_type m_direction; + }; -typedef ray ray2F; -typedef ray ray3F; + typedef ray ray2F; + typedef ray ray3F; } // namespace crnlib diff --git a/crnlib/crn_strutils.cpp b/crnlib/crn_strutils.cpp index 29ee079..1ee14f8 100644 --- a/crnlib/crn_strutils.cpp +++ b/crnlib/crn_strutils.cpp @@ -3,540 +3,703 @@ #include "crn_core.h" #include "crn_strutils.h" -namespace crnlib { -char* crn_strdup(const char* pStr) { - if (!pStr) - pStr = ""; - - size_t l = strlen(pStr) + 1; - char* p = (char*)crnlib_malloc(l); - if (p) - memcpy(p, pStr, l); - - return p; -} - -int crn_stricmp(const char* p, const char* q) { - return _stricmp(p, q); -} - -char* strcpy_safe(char* pDst, uint dst_len, const char* pSrc) { - CRNLIB_ASSERT(pDst && pSrc && dst_len); - if (!dst_len) - return pDst; - - char* q = pDst; - char c; - - do { - if (dst_len == 1) { - *q++ = '\0'; - break; - } +namespace crnlib +{ + char* crn_strdup(const char* pStr) + { + if (!pStr) + { + pStr = ""; + } - c = *pSrc++; - *q++ = c; + size_t l = strlen(pStr) + 1; + char* p = (char*)crnlib_malloc(l); + if (p) + { + memcpy(p, pStr, l); + } - dst_len--; + return p; + } - } while (c); + int crn_stricmp(const char* p, const char* q) + { + return _stricmp(p, q); + } - CRNLIB_ASSERT((q - pDst) <= (int)dst_len); + char* strcpy_safe(char* pDst, uint dst_len, const char* pSrc) + { + CRNLIB_ASSERT(pDst && pSrc && dst_len); + if (!dst_len) + { + return pDst; + } - return pDst; -} + char* q = pDst; + char c; -bool int_to_string(int value, char* pDst, uint len) { - CRNLIB_ASSERT(pDst); + do + { + if (dst_len == 1) + { + *q++ = '\0'; + break; + } - const uint cBufSize = 16; - char buf[cBufSize]; + c = *pSrc++; + *q++ = c; - uint j = static_cast((value < 0) ? -value : value); + dst_len--; - char* p = buf + cBufSize - 1; + } + while (c); - *p-- = '\0'; + CRNLIB_ASSERT((q - pDst) <= (int)dst_len); - do { - *p-- = static_cast('0' + (j % 10)); - j /= 10; - } while (j); + return pDst; + } - if (value < 0) - *p-- = '-'; + bool int_to_string(int value, char* pDst, uint len) + { + CRNLIB_ASSERT(pDst); - const size_t total_bytes = (buf + cBufSize - 1) - p; - if (total_bytes > len) - return false; + const uint cBufSize = 16; + char buf[cBufSize]; - for (size_t i = 0; i < total_bytes; i++) - pDst[i] = p[1 + i]; + uint j = static_cast((value < 0) ? -value : value); - return true; -} + char* p = buf + cBufSize - 1; -bool uint_to_string(uint value, char* pDst, uint len) { - CRNLIB_ASSERT(pDst); + *p-- = '\0'; - const uint cBufSize = 16; - char buf[cBufSize]; + do + { + *p-- = static_cast('0' + (j % 10)); + j /= 10; + } + while (j); - char* p = buf + cBufSize - 1; + if (value < 0) + { + *p-- = '-'; + } - *p-- = '\0'; + const size_t total_bytes = (buf + cBufSize - 1) - p; + if (total_bytes > len) + { + return false; + } - do { - *p-- = static_cast('0' + (value % 10)); - value /= 10; - } while (value); + for (size_t i = 0; i < total_bytes; i++) + { + pDst[i] = p[1 + i]; + } - const size_t total_bytes = (buf + cBufSize - 1) - p; - if (total_bytes > len) - return false; + return true; + } - for (size_t i = 0; i < total_bytes; i++) - pDst[i] = p[1 + i]; + bool uint_to_string(uint value, char* pDst, uint len) + { + CRNLIB_ASSERT(pDst); - return true; -} + const uint cBufSize = 16; + char buf[cBufSize]; -bool string_to_int(const char*& pBuf, int& value) { - value = 0; + char* p = buf + cBufSize - 1; - CRNLIB_ASSERT(pBuf); - const char* p = pBuf; + *p-- = '\0'; - while (*p && isspace(*p)) - p++; + do + { + *p-- = static_cast('0' + (value % 10)); + value /= 10; + } + while (value); - uint result = 0; - bool negative = false; + const size_t total_bytes = (buf + cBufSize - 1) - p; + if (total_bytes > len) + { + return false; + } - if (!isdigit(*p)) { - if (p[0] == '-') { - negative = true; - p++; - } else - return false; - } + for (size_t i = 0; i < total_bytes; i++) + { + pDst[i] = p[1 + i]; + } - while (*p && isdigit(*p)) { - if (result & 0xE0000000U) - return false; + return true; + } - const uint result8 = result << 3U; - const uint result2 = result << 1U; + bool string_to_int(const char*& pBuf, int& value) + { + value = 0; - if (result2 > (0xFFFFFFFFU - result8)) - return false; + CRNLIB_ASSERT(pBuf); + const char* p = pBuf; - result = result8 + result2; + while (*p && isspace(*p)) + { + p++; + } - uint c = p[0] - '0'; - if (c > (0xFFFFFFFFU - result)) - return false; + uint result = 0; + bool negative = false; - result += c; + if (!isdigit(*p)) + { + if (p[0] == '-') + { + negative = true; + p++; + } + else + { + return false; + } + } - p++; - } + while (*p && isdigit(*p)) + { + if (result & 0xE0000000U) + { + return false; + } - if (negative) { - if (result > 0x80000000U) { - value = 0; - return false; - } - value = -static_cast(result); - } else { - if (result > 0x7FFFFFFFU) { - value = 0; - return false; - } - value = static_cast(result); - } + const uint result8 = result << 3U; + const uint result2 = result << 1U; - pBuf = p; + if (result2 > (0xFFFFFFFFU - result8)) + { + return false; + } - return true; -} + result = result8 + result2; -bool string_to_int64(const char*& pBuf, int64& value) { - value = 0; + uint c = p[0] - '0'; + if (c > (0xFFFFFFFFU - result)) + { + return false; + } - CRNLIB_ASSERT(pBuf); - const char* p = pBuf; + result += c; - while (*p && isspace(*p)) - p++; + p++; + } - uint64 result = 0; - bool negative = false; + if (negative) + { + if (result > 0x80000000U) + { + value = 0; + return false; + } + value = -static_cast(result); + } + else + { + if (result > 0x7FFFFFFFU) + { + value = 0; + return false; + } + value = static_cast(result); + } - if (!isdigit(*p)) { - if (p[0] == '-') { - negative = true; - p++; - } else - return false; - } + pBuf = p; - while (*p && isdigit(*p)) { - if (result & 0xE000000000000000ULL) - return false; + return true; + } - const uint64 result8 = result << 3U; - const uint64 result2 = result << 1U; + bool string_to_int64(const char*& pBuf, int64& value) + { + value = 0; - if (result2 > (0xFFFFFFFFFFFFFFFFULL - result8)) - return false; + CRNLIB_ASSERT(pBuf); + const char* p = pBuf; - result = result8 + result2; + while (*p && isspace(*p)) + { + p++; + } - uint c = p[0] - '0'; - if (c > (0xFFFFFFFFFFFFFFFFULL - result)) - return false; + uint64 result = 0; + bool negative = false; - result += c; + if (!isdigit(*p)) + { + if (p[0] == '-') + { + negative = true; + p++; + } + else + { + return false; + } + } - p++; - } + while (*p && isdigit(*p)) + { + if (result & 0xE000000000000000ULL) + { + return false; + } - if (negative) { - if (result > 0x8000000000000000ULL) { - value = 0; - return false; - } - value = -static_cast(result); - } else { - if (result > 0x7FFFFFFFFFFFFFFFULL) { - value = 0; - return false; - } - value = static_cast(result); - } + const uint64 result8 = result << 3U; + const uint64 result2 = result << 1U; - pBuf = p; + if (result2 > (0xFFFFFFFFFFFFFFFFULL - result8)) + { + return false; + } - return true; -} + result = result8 + result2; -bool string_to_uint(const char*& pBuf, uint& value) { - value = 0; + uint c = p[0] - '0'; + if (c > (0xFFFFFFFFFFFFFFFFULL - result)) + { + return false; + } - CRNLIB_ASSERT(pBuf); - const char* p = pBuf; + result += c; - while (*p && isspace(*p)) - p++; + p++; + } - uint result = 0; + if (negative) + { + if (result > 0x8000000000000000ULL) + { + value = 0; + return false; + } + value = -static_cast(result); + } + else + { + if (result > 0x7FFFFFFFFFFFFFFFULL) + { + value = 0; + return false; + } + value = static_cast(result); + } - if (!isdigit(*p)) - return false; + pBuf = p; - while (*p && isdigit(*p)) { - if (result & 0xE0000000U) - return false; + return true; + } - const uint result8 = result << 3U; - const uint result2 = result << 1U; + bool string_to_uint(const char*& pBuf, uint& value) + { + value = 0; - if (result2 > (0xFFFFFFFFU - result8)) - return false; + CRNLIB_ASSERT(pBuf); + const char* p = pBuf; - result = result8 + result2; + while (*p && isspace(*p)) + { + p++; + } - uint c = p[0] - '0'; - if (c > (0xFFFFFFFFU - result)) - return false; + uint result = 0; - result += c; + if (!isdigit(*p)) + { + return false; + } + + while (*p && isdigit(*p)) + { + if (result & 0xE0000000U) + { + return false; + } - p++; - } + const uint result8 = result << 3U; + const uint result2 = result << 1U; - value = result; + if (result2 > (0xFFFFFFFFU - result8)) + { + return false; + } - pBuf = p; + result = result8 + result2; - return true; -} + uint c = p[0] - '0'; + if (c > (0xFFFFFFFFU - result)) + { + return false; + } -bool string_to_uint64(const char*& pBuf, uint64& value) { - value = 0; + result += c; - CRNLIB_ASSERT(pBuf); - const char* p = pBuf; + p++; + } - while (*p && isspace(*p)) - p++; + value = result; - uint64 result = 0; + pBuf = p; - if (!isdigit(*p)) - return false; + return true; + } - while (*p && isdigit(*p)) { - if (result & 0xE000000000000000ULL) - return false; + bool string_to_uint64(const char*& pBuf, uint64& value) + { + value = 0; - const uint64 result8 = result << 3U; - const uint64 result2 = result << 1U; + CRNLIB_ASSERT(pBuf); + const char* p = pBuf; - if (result2 > (0xFFFFFFFFFFFFFFFFULL - result8)) - return false; + while (*p && isspace(*p)) + { + p++; + } - result = result8 + result2; + uint64 result = 0; - uint c = p[0] - '0'; - if (c > (0xFFFFFFFFFFFFFFFFULL - result)) - return false; + if (!isdigit(*p)) + { + return false; + } - result += c; + while (*p && isdigit(*p)) + { + if (result & 0xE000000000000000ULL) + { + return false; + } - p++; - } + const uint64 result8 = result << 3U; + const uint64 result2 = result << 1U; - value = result; + if (result2 > (0xFFFFFFFFFFFFFFFFULL - result8)) + { + return false; + } - pBuf = p; + result = result8 + result2; - return true; -} + uint c = p[0] - '0'; + if (c > (0xFFFFFFFFFFFFFFFFULL - result)) + { + return false; + } -bool string_to_bool(const char* p, bool& value) { - CRNLIB_ASSERT(p); + result += c; - value = false; + p++; + } - if (_stricmp(p, "false") == 0) - return true; + value = result; - if (_stricmp(p, "true") == 0) { - value = true; - return true; - } + pBuf = p; - const char* q = p; - uint v; - if (string_to_uint(q, v)) { - if (!v) - return true; - else if (v == 1) { - value = true; - return true; - } - } - - return false; -} - -bool string_to_float(const char*& p, float& value, uint round_digit) { - double d; - if (!string_to_double(p, d, round_digit)) { - value = 0; - return false; - } - value = static_cast(d); - return true; -} - -bool string_to_double(const char*& p, double& value, uint round_digit) { - return string_to_double(p, p + 128, value, round_digit); -} - -// I wrote this approx. 20 years ago in C/assembly using a limited FP emulator package, so it's a bit crude. -bool string_to_double(const char*& p, const char* pEnd, double& value, uint round_digit) { - CRNLIB_ASSERT(p); - - value = 0; - - enum { AF_BLANK = 1, - AF_SIGN = 2, - AF_DPOINT = 3, - AF_BADCHAR = 4, - AF_OVRFLOW = 5, - AF_EXPONENT = 6, - AF_NODIGITS = 7 }; - int status = 0; - - const char* buf = p; - - int got_sign_flag = 0, got_dp_flag = 0, got_num_flag = 0; - int got_e_flag = 0, got_e_sign_flag = 0, e_sign = 0; - uint whole_count = 0, frac_count = 0; - - double whole = 0, frac = 0, scale = 1, exponent = 1; - - if (p >= pEnd) { - status = AF_NODIGITS; - goto af_exit; - } - - while (*buf) { - if (!isspace(*buf)) - break; - if (++buf >= pEnd) { - status = AF_NODIGITS; - goto af_exit; + return true; } - } - p = buf; + bool string_to_bool(const char* p, bool& value) + { + CRNLIB_ASSERT(p); + + value = false; - while (*buf) { - p = buf; - if (buf >= pEnd) - break; + if (_stricmp(p, "false") == 0) + { + return true; + } - int i = *buf++; + if (_stricmp(p, "true") == 0) + { + value = true; + return true; + } - switch (i) { - case 'e': - case 'E': { - got_e_flag = 1; - goto exit_while; - } - case '+': { - if ((got_num_flag) || (got_sign_flag)) { - status = AF_SIGN; - goto af_exit; + const char* q = p; + uint v; + if (string_to_uint(q, v)) + { + if (!v) + { + return true; + } + else if (v == 1) + { + value = true; + return true; + } } - got_sign_flag = 1; + return false; + } - break; - } - case '-': { - if ((got_num_flag) || (got_sign_flag)) { - status = AF_SIGN; - goto af_exit; + bool string_to_float(const char*& p, float& value, uint round_digit) + { + double d; + if (!string_to_double(p, d, round_digit)) + { + value = 0; + return false; } + value = static_cast(d); + return true; + } - got_sign_flag = -1; + bool string_to_double(const char*& p, double& value, uint round_digit) + { + return string_to_double(p, p + 128, value, round_digit); + } - break; - } - case '.': { - if (got_dp_flag) { - status = AF_DPOINT; - goto af_exit; + // I wrote this approx. 20 years ago in C/assembly using a limited FP emulator package, so it's a bit crude. + bool string_to_double(const char*& p, const char* pEnd, double& value, uint round_digit) + { + CRNLIB_ASSERT(p); + + value = 0; + + enum + { + AF_BLANK = 1, + AF_SIGN = 2, + AF_DPOINT = 3, + AF_BADCHAR = 4, + AF_OVRFLOW = 5, + AF_EXPONENT = 6, + AF_NODIGITS = 7 + }; + int status = 0; + + const char* buf = p; + + int got_sign_flag = 0, got_dp_flag = 0, got_num_flag = 0; + int got_e_flag = 0, got_e_sign_flag = 0, e_sign = 0; + uint whole_count = 0, frac_count = 0; + + double whole = 0, frac = 0, scale = 1, exponent = 1; + + if (p >= pEnd) + { + status = AF_NODIGITS; + goto af_exit; } - got_dp_flag = 1; + while (*buf) + { + if (!isspace(*buf)) + { + break; + } + if (++buf >= pEnd) + { + status = AF_NODIGITS; + goto af_exit; + } + } - break; - } - default: { - if ((i < '0') || (i > '9')) - goto exit_while; - else { - i -= '0'; + p = buf; - got_num_flag = 1; + while (*buf) + { + p = buf; + if (buf >= pEnd) + break; - if (got_dp_flag) { - if (frac_count < round_digit) { - frac = frac * 10.0f + i; + int i = *buf++; - scale = scale * 10.0f; - } else if (frac_count == round_digit) { - if (i >= 5) /* check for round */ - frac = frac + 1.0f; + switch (i) + { + case 'e': + case 'E': + { + got_e_flag = 1; + goto exit_while; } + case '+': + { + if ((got_num_flag) || (got_sign_flag)) + { + status = AF_SIGN; + goto af_exit; + } - frac_count++; - } else { - whole = whole * 10.0f + i; - - whole_count++; + got_sign_flag = 1; - if (whole > 1e+100) { - status = AF_OVRFLOW; - goto af_exit; + break; } - } - } + case '-': + { + if ((got_num_flag) || (got_sign_flag)) + { + status = AF_SIGN; + goto af_exit; + } - break; - } - } - } + got_sign_flag = -1; -exit_while: + break; + } + case '.': + { + if (got_dp_flag) + { + status = AF_DPOINT; + goto af_exit; + } - if (got_e_flag) { - if ((got_num_flag == 0) && (got_dp_flag)) { - status = AF_EXPONENT; - goto af_exit; - } + got_dp_flag = 1; - int e = 0; - e_sign = 1; - got_num_flag = 0; - got_e_sign_flag = 0; + break; + } + default: + { + if ((i < '0') || (i > '9')) + { + goto exit_while; + } + else + { + i -= '0'; + + got_num_flag = 1; + + if (got_dp_flag) + { + if (frac_count < round_digit) + { + frac = frac * 10.0f + i; + + scale = scale * 10.0f; + } + else if (frac_count == round_digit) + { + if (i >= 5) /* check for round */ + { + frac = frac + 1.0f; + } + } + + frac_count++; + } + else + { + whole = whole * 10.0f + i; + + whole_count++; + + if (whole > 1e+100) + { + status = AF_OVRFLOW; + goto af_exit; + } + } + } + + break; + } + } + } - while (*buf) { - p = buf; - if (buf >= pEnd) - break; + exit_while: - int i = *buf++; + if (got_e_flag) + { + if ((got_num_flag == 0) && (got_dp_flag)) + { + status = AF_EXPONENT; + goto af_exit; + } - if (i == '+') { - if ((got_num_flag) || (got_e_sign_flag)) { - status = AF_EXPONENT; - goto af_exit; - } + int e = 0; + e_sign = 1; + got_num_flag = 0; + got_e_sign_flag = 0; + + while (*buf) + { + p = buf; + if (buf >= pEnd) + { + break; + } + + int i = *buf++; + + if (i == '+') + { + if ((got_num_flag) || (got_e_sign_flag)) + { + status = AF_EXPONENT; + goto af_exit; + } + + e_sign = 1; + got_e_sign_flag = 1; + } + else if (i == '-') + { + if ((got_num_flag) || (got_e_sign_flag)) + { + status = AF_EXPONENT; + goto af_exit; + } + + e_sign = -1; + got_e_sign_flag = 1; + } + else if ((i >= '0') && (i <= '9')) + { + got_num_flag = 1; + + if ((e = (e * 10) + (i - 48)) > 100) + { + status = AF_EXPONENT; + goto af_exit; + } + } + else + { + break; + } + } - e_sign = 1; - got_e_sign_flag = 1; - } else if (i == '-') { - if ((got_num_flag) || (got_e_sign_flag)) { - status = AF_EXPONENT; - goto af_exit; + for (int i = 1; i <= e; i++) /* compute 10^e */ + { + exponent = exponent * 10.0f; + } } - e_sign = -1; - got_e_sign_flag = 1; - } else if ((i >= '0') && (i <= '9')) { - got_num_flag = 1; - - if ((e = (e * 10) + (i - 48)) > 100) { - status = AF_EXPONENT; - goto af_exit; + if (((whole_count + frac_count) == 0) && (got_e_flag == 0)) + { + status = AF_NODIGITS; + goto af_exit; } - } else - break; - } - - for (int i = 1; i <= e; i++) /* compute 10^e */ - exponent = exponent * 10.0f; - } - - if (((whole_count + frac_count) == 0) && (got_e_flag == 0)) { - status = AF_NODIGITS; - goto af_exit; - } - if (frac) - whole = whole + (frac / scale); + if (frac) + { + whole = whole + (frac / scale); + } - if (got_e_flag) { - if (e_sign > 0) - whole = whole * exponent; - else - whole = whole / exponent; - } + if (got_e_flag) + { + if (e_sign > 0) + { + whole = whole * exponent; + } + else + { + whole = whole / exponent; + } + } - if (got_sign_flag < 0) - whole = -whole; + if (got_sign_flag < 0) + { + whole = -whole; + } - value = whole; + value = whole; -af_exit: - return (status == 0); -} + af_exit: + return status == 0; + } } // namespace crnlib diff --git a/crnlib/crn_timer.cpp b/crnlib/crn_timer.cpp index ac79b5a..5b64a4d 100644 --- a/crnlib/crn_timer.cpp +++ b/crnlib/crn_timer.cpp @@ -1,8 +1,10 @@ // File: crn_win32_timer.cpp // See Copyright Notice and license at the end of inc/crnlib.h + +#include + #include "crn_core.h" #include "crn_timer.h" -#include #include "crn_timer.h" @@ -10,129 +12,164 @@ #include "crn_winhdr.h" #endif -namespace crnlib { -unsigned long long timer::g_init_ticks; -unsigned long long timer::g_freq; -double timer::g_inv_freq; +namespace crnlib +{ + unsigned long long timer::g_init_ticks; + unsigned long long timer::g_freq; + double timer::g_inv_freq; #if defined(CRNLIB_USE_WIN32_API) -inline void query_counter(timer_ticks* pTicks) { - QueryPerformanceCounter(reinterpret_cast(pTicks)); -} -inline void query_counter_frequency(timer_ticks* pTicks) { - QueryPerformanceFrequency(reinterpret_cast(pTicks)); -} -#elif defined(__GNUC__) + inline void query_counter(timer_ticks* pTicks) + { + QueryPerformanceCounter(reinterpret_cast(pTicks)); + } + inline void query_counter_frequency(timer_ticks* pTicks) + { + QueryPerformanceFrequency(reinterpret_cast(pTicks)); + } +#elif defined(CRN_CC_GNU) #include -inline void query_counter(timer_ticks* pTicks) { - struct timeval cur_time; - gettimeofday(&cur_time, NULL); - *pTicks = static_cast(cur_time.tv_sec) * 1000000ULL + static_cast(cur_time.tv_usec); -} -inline void query_counter_frequency(timer_ticks* pTicks) { - *pTicks = 1000000; -} + inline void query_counter(timer_ticks* pTicks) + { + struct timeval cur_time; + gettimeofday(&cur_time, NULL); + *pTicks = static_cast(cur_time.tv_sec) * 1000000ULL + static_cast(cur_time.tv_usec); + } + inline void query_counter_frequency(timer_ticks* pTicks) + { + *pTicks = 1000000; + } #else #error Unimplemented #endif -timer::timer() - : m_start_time(0), - m_stop_time(0), - m_started(false), - m_stopped(false) { - if (!g_inv_freq) - init(); -} - -timer::timer(timer_ticks start_ticks) { - if (!g_inv_freq) - init(); - - m_start_time = start_ticks; - - m_started = true; - m_stopped = false; -} - -void timer::start(timer_ticks start_ticks) { - m_start_time = start_ticks; - - m_started = true; - m_stopped = false; -} - -void timer::start() { - query_counter(&m_start_time); - - m_started = true; - m_stopped = false; -} - -void timer::stop() { - CRNLIB_ASSERT(m_started); - - query_counter(&m_stop_time); - - m_stopped = true; -} - -double timer::get_elapsed_secs() const { - CRNLIB_ASSERT(m_started); - if (!m_started) - return 0; - - timer_ticks stop_time = m_stop_time; - if (!m_stopped) - query_counter(&stop_time); - - timer_ticks delta = stop_time - m_start_time; - return delta * g_inv_freq; -} - -timer_ticks timer::get_elapsed_us() const { - CRNLIB_ASSERT(m_started); - if (!m_started) - return 0; - - timer_ticks stop_time = m_stop_time; - if (!m_stopped) - query_counter(&stop_time); - - timer_ticks delta = stop_time - m_start_time; - return (delta * 1000000ULL + (g_freq >> 1U)) / g_freq; -} - -void timer::init() { - if (!g_inv_freq) { - query_counter_frequency(&g_freq); - g_inv_freq = 1.0f / g_freq; - - query_counter(&g_init_ticks); - } -} - -timer_ticks timer::get_init_ticks() { - if (!g_inv_freq) - init(); - - return g_init_ticks; -} - -timer_ticks timer::get_ticks() { - if (!g_inv_freq) - init(); - - timer_ticks ticks; - query_counter(&ticks); - return ticks - g_init_ticks; -} - -double timer::ticks_to_secs(timer_ticks ticks) { - if (!g_inv_freq) - init(); - - return ticks * g_inv_freq; -} + timer::timer(): + m_start_time(0), + m_stop_time(0), + m_started(false), + m_stopped(false) + { + if (!g_inv_freq) + { + init(); + } + } + + timer::timer(timer_ticks start_ticks) + { + if (!g_inv_freq) + { + init(); + } + + m_start_time = start_ticks; + + m_started = true; + m_stopped = false; + } + + void timer::start(timer_ticks start_ticks) + { + m_start_time = start_ticks; + + m_started = true; + m_stopped = false; + } + + void timer::start() + { + query_counter(&m_start_time); + + m_started = true; + m_stopped = false; + } + + void timer::stop() + { + CRNLIB_ASSERT(m_started); + + query_counter(&m_stop_time); + + m_stopped = true; + } + + double timer::get_elapsed_secs() const + { + CRNLIB_ASSERT(m_started); + if (!m_started) + { + return 0; + } + + timer_ticks stop_time = m_stop_time; + if (!m_stopped) + { + query_counter(&stop_time); + } + + timer_ticks delta = stop_time - m_start_time; + return delta * g_inv_freq; + } + + timer_ticks timer::get_elapsed_us() const + { + CRNLIB_ASSERT(m_started); + if (!m_started) + { + return 0; + } + + timer_ticks stop_time = m_stop_time; + if (!m_stopped) + { + query_counter(&stop_time); + } + + timer_ticks delta = stop_time - m_start_time; + return (delta * 1000000ULL + (g_freq >> 1U)) / g_freq; + } + + void timer::init() + { + if (!g_inv_freq) + { + query_counter_frequency(&g_freq); + g_inv_freq = 1.0f / g_freq; + + query_counter(&g_init_ticks); + } + } + + timer_ticks timer::get_init_ticks() + { + if (!g_inv_freq) + { + init(); + } + + return g_init_ticks; + } + + timer_ticks timer::get_ticks() + { + if (!g_inv_freq) + { + init(); + } + + timer_ticks ticks; + query_counter(&ticks); + return ticks - g_init_ticks; + } + + double timer::ticks_to_secs(timer_ticks ticks) + { + if (!g_inv_freq) + { + init(); + } + + return ticks * g_inv_freq; + } } // namespace crnlib diff --git a/crnlib/crn_timer.h b/crnlib/crn_timer.h index 927f1dc..895b9ab 100644 --- a/crnlib/crn_timer.h +++ b/crnlib/crn_timer.h @@ -1,66 +1,101 @@ // File: crn_win32_timer.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once #include "crn_export.h" -namespace crnlib { -typedef unsigned long long timer_ticks; - -class CRN_EXPORT timer { - public: - timer(); - timer(timer_ticks start_ticks); - - void start(); - void start(timer_ticks start_ticks); - - void stop(); - - double get_elapsed_secs() const; - inline double get_elapsed_ms() const { return get_elapsed_secs() * 1000.0f; } - timer_ticks get_elapsed_us() const; - - static void init(); - static inline timer_ticks get_ticks_per_sec() { return g_freq; } - static timer_ticks get_init_ticks(); - static timer_ticks get_ticks(); - static double ticks_to_secs(timer_ticks ticks); - static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; } - static inline double get_secs() { return ticks_to_secs(get_ticks()); } - static inline double get_ms() { return ticks_to_ms(get_ticks()); } - - private: - static timer_ticks g_init_ticks; - static timer_ticks g_freq; - static double g_inv_freq; - - timer_ticks m_start_time; - timer_ticks m_stop_time; - - bool m_started : 1; - bool m_stopped : 1; -}; - -// Prints object's lifetime to stdout -class timed_scope { - const char* m_pName; - timer m_tm; - - public: - inline timed_scope(const char* pName = "timed_scope") - : m_pName(pName) { m_tm.start(); } - - inline double get_elapsed_secs() const { return m_tm.get_elapsed_secs(); } - inline double get_elapsed_ms() const { return m_tm.get_elapsed_ms(); } - - const timer& get_timer() const { return m_tm; } - timer& get_timer() { return m_tm; } - - inline ~timed_scope() { - double secs = m_tm.get_elapsed_secs(); - printf("%s: %f secs, %f ms\n", m_pName, secs, secs * 1000.0f); - } -}; +namespace crnlib +{ + typedef unsigned long long timer_ticks; + + class CRN_EXPORT timer + { + public: + timer(); + timer(timer_ticks start_ticks); + + void start(); + void start(timer_ticks start_ticks); + + void stop(); + + double get_elapsed_secs() const; + inline double get_elapsed_ms() const + { + return get_elapsed_secs() * 1000.0f; + } + timer_ticks get_elapsed_us() const; + + static void init(); + static inline timer_ticks get_ticks_per_sec() + { + return g_freq; + } + static timer_ticks get_init_ticks(); + static timer_ticks get_ticks(); + static double ticks_to_secs(timer_ticks ticks); + static inline double ticks_to_ms(timer_ticks ticks) + { + return ticks_to_secs(ticks) * 1000.0f; + } + static inline double get_secs() + { + return ticks_to_secs(get_ticks()); + } + static inline double get_ms() + { + return ticks_to_ms(get_ticks()); + } + + private: + static timer_ticks g_init_ticks; + static timer_ticks g_freq; + static double g_inv_freq; + + timer_ticks m_start_time; + timer_ticks m_stop_time; + + bool m_started : 1; + bool m_stopped : 1; + }; + + // Prints object's lifetime to stdout + class timed_scope + { + public: + inline timed_scope(const char* pName = "timed_scope"): + m_pName(pName) + { + m_tm.start(); + } + + inline double get_elapsed_secs() const + { + return m_tm.get_elapsed_secs(); + } + inline double get_elapsed_ms() const + { + return m_tm.get_elapsed_ms(); + } + + const timer& get_timer() const + { + return m_tm; + } + timer& get_timer() + { + return m_tm; + } + + inline ~timed_scope() + { + double secs = m_tm.get_elapsed_secs(); + printf("%s: %f secs, %f ms\n", m_pName, secs, secs * 1000.0f); + } + private: + const char* m_pName; + timer m_tm; + }; } // namespace crnlib diff --git a/crnlib/crn_traits.h b/crnlib/crn_traits.h index 06bef69..8031b03 100644 --- a/crnlib/crn_traits.h +++ b/crnlib/crn_traits.h @@ -2,7 +2,8 @@ // See Copyright Notice and license at the end of inc/crnlib.h #pragma once -namespace crnlib { +namespace crnlib +{ template struct int_traits { enum { cMin = crnlib::cINT32_MIN, diff --git a/crnlib/crn_utils.cpp b/crnlib/crn_utils.cpp index 0c64e1b..5bde3ad 100644 --- a/crnlib/crn_utils.cpp +++ b/crnlib/crn_utils.cpp @@ -2,59 +2,81 @@ #include "crn_core.h" #include "crn_utils.h" -namespace crnlib { -namespace utils { -void endian_switch_words(uint16* p, uint num) { - uint16* p_end = p + num; - while (p != p_end) { - uint16 k = *p; - *p++ = swap16(k); - } -} - -void endian_switch_dwords(uint32* p, uint num) { - uint32* p_end = p + num; - while (p != p_end) { - uint32 k = *p; - *p++ = swap32(k); - } -} - -void copy_words(uint16* pDst, const uint16* pSrc, uint num, bool endian_switch) { - if (!endian_switch) - memcpy(pDst, pSrc, num << 1U); - else { - uint16* pDst_end = pDst + num; - while (pDst != pDst_end) - *pDst++ = swap16(*pSrc++); - } -} - -void copy_dwords(uint32* pDst, const uint32* pSrc, uint num, bool endian_switch) { - if (!endian_switch) - memcpy(pDst, pSrc, num << 2U); - else { - uint32* pDst_end = pDst + num; - while (pDst != pDst_end) - *pDst++ = swap32(*pSrc++); - } -} - -uint compute_max_mips(uint width, uint height) { - if ((width | height) == 0) - return 0; - - uint num_mips = 1; - - while ((width > 1U) || (height > 1U)) { - width >>= 1U; - height >>= 1U; - num_mips++; - } - - return num_mips; -} - -} // namespace utils +namespace crnlib +{ + namespace utils + { + void endian_switch_words(uint16* p, uint num) + { + uint16* p_end = p + num; + while (p != p_end) + { + uint16 k = *p; + *p++ = swap16(k); + } + } + + void endian_switch_dwords(uint32* p, uint num) + { + uint32* p_end = p + num; + while (p != p_end) + { + uint32 k = *p; + *p++ = swap32(k); + } + } + + void copy_words(uint16* pDst, const uint16* pSrc, uint num, bool endian_switch) + { + if (!endian_switch) + { + memcpy(pDst, pSrc, num << 1U); + } + else + { + uint16* pDst_end = pDst + num; + while (pDst != pDst_end) + { + *pDst++ = swap16(*pSrc++); + } + } + } + + void copy_dwords(uint32* pDst, const uint32* pSrc, uint num, bool endian_switch) + { + if (!endian_switch) + { + memcpy(pDst, pSrc, num << 2U); + } + else + { + uint32* pDst_end = pDst + num; + while (pDst != pDst_end) + { + *pDst++ = swap32(*pSrc++); + } + } + } + + uint compute_max_mips(uint width, uint height) + { + if ((width | height) == 0) + { + return 0; + } + + uint num_mips = 1; + + while ((width > 1U) || (height > 1U)) + { + width >>= 1U; + height >>= 1U; + num_mips++; + } + + return num_mips; + } + + } // namespace utils } // namespace crnlib diff --git a/crnlib/crn_utils.h b/crnlib/crn_utils.h index 3b68069..302481a 100644 --- a/crnlib/crn_utils.h +++ b/crnlib/crn_utils.h @@ -2,12 +2,14 @@ // See Copyright Notice and license at the end of inc/crnlib.h #pragma once +#include "crn_core.h" + #define CRNLIB_MIN(a, b) (((a) < (b)) ? (a) : (b)) #define CRNLIB_MAX(a, b) (((a) < (b)) ? (b) : (a)) #define CRNLIB_ARRAYSIZE(x) (sizeof(x) / sizeof(x[0])) -#ifdef _MSC_VER +#if defined(CRN_CC_MSVC) // Need to explictly extern these with MSVC, but not MinGW. extern "C" unsigned long __cdecl _lrotl(unsigned long, int); #pragma intrinsic(_lrotl) @@ -25,341 +27,425 @@ extern "C" unsigned long __cdecl _lrotr(unsigned long, int); #endif template -T decay_array_to_subtype(T (&a)[N]); +T decay_array_to_subtype(T(&a)[N]); #define CRNLIB_ARRAY_SIZE(X) (sizeof(X) / sizeof(decay_array_to_subtype(X))) #define CRNLIB_SIZEOF_U32(x) static_cast(sizeof(x)) -namespace crnlib { -namespace utils { -template -inline void swap(T& l, T& r) { - T temp(l); - l = r; - r = temp; -} - -template -inline void zero_object(T& obj) { - memset((void*)&obj, 0, sizeof(obj)); -} - -template -inline void zero_this(T* pObj) { - memset((void*)pObj, 0, sizeof(*pObj)); -} - -inline bool is_bit_set(uint bits, uint mask) { - return (bits & mask) != 0; -} - -inline void set_bit(uint& bits, uint mask, bool state) { - if (state) - bits |= mask; - else - bits &= ~mask; -} - -inline bool is_flag_set(uint bits, uint flag) { - CRNLIB_ASSERT(flag < 32U); - return is_bit_set(bits, 1U << flag); -} - -inline void set_flag(uint& bits, uint flag, bool state) { - CRNLIB_ASSERT(flag < 32U); - set_bit(bits, 1U << flag, state); -} - -inline void invert_buf(void* pBuf, uint size) { - uint8* p = static_cast(pBuf); - - const uint half_size = size >> 1; - for (uint i = 0; i < half_size; i++) - utils::swap(p[i], p[size - 1U - i]); -} - -// buffer_is_little_endian is the endianness of the buffer's data -template -inline void write_obj(const T& obj, void* pBuf, bool buffer_is_little_endian) { - const uint8* pSrc = reinterpret_cast(&obj); - uint8* pDst = static_cast(pBuf); - - if (c_crnlib_little_endian_platform == buffer_is_little_endian) - memcpy(pDst, pSrc, sizeof(T)); - else { - for (uint i = 0; i < sizeof(T); i++) - pDst[i] = pSrc[sizeof(T) - 1 - i]; - } -} - -// buffer_is_little_endian is the endianness of the buffer's data -template -inline void read_obj(T& obj, const void* pBuf, bool buffer_is_little_endian) { - const uint8* pSrc = reinterpret_cast(pBuf); - uint8* pDst = reinterpret_cast(&obj); - - if (c_crnlib_little_endian_platform == buffer_is_little_endian) - memcpy(pDst, pSrc, sizeof(T)); - else { - for (uint i = 0; i < sizeof(T); i++) - pDst[i] = pSrc[sizeof(T) - 1 - i]; - } -} - -template -inline bool write_obj(const T& obj, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { - if (buf_size < sizeof(T)) - return false; - - utils::write_obj(obj, pBuf, buffer_is_little_endian); - - pBuf = static_cast(pBuf) + sizeof(T); - buf_size -= sizeof(T); - - return true; -} - -inline bool write_val(uint8 val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { - return write_obj(val, pBuf, buf_size, buffer_is_little_endian); -} -inline bool write_val(uint16 val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { - return write_obj(val, pBuf, buf_size, buffer_is_little_endian); -} -inline bool write_val(uint val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { - return write_obj(val, pBuf, buf_size, buffer_is_little_endian); -} -inline bool write_val(int val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { - return write_obj(val, pBuf, buf_size, buffer_is_little_endian); -} -inline bool write_val(uint64 val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { - return write_obj(val, pBuf, buf_size, buffer_is_little_endian); -} -inline bool write_val(float val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { - return write_obj(val, pBuf, buf_size, buffer_is_little_endian); -} -inline bool write_val(double val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { - return write_obj(val, pBuf, buf_size, buffer_is_little_endian); -} - -template -inline bool read_obj(T& obj, const void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { - if (buf_size < sizeof(T)) { - zero_object(obj); - return false; - } - - utils::read_obj(obj, pBuf, buffer_is_little_endian); - - pBuf = static_cast(pBuf) + sizeof(T); - buf_size -= sizeof(T); - - return true; -} - -#if defined(_MSC_VER) -static CRN_FORCE_INLINE uint16 swap16(uint16 x) { - return _byteswap_ushort(x); -} -static CRN_FORCE_INLINE uint32 swap32(uint32 x) { - return _byteswap_ulong(x); -} -static CRN_FORCE_INLINE uint64 swap64(uint64 x) { - return _byteswap_uint64(x); -} -#elif defined(__GNUC__) -static CRN_FORCE_INLINE uint16 swap16(uint16 x) { - return static_cast((x << 8U) | (x >> 8U)); -} -static CRN_FORCE_INLINE uint32 swap32(uint32 x) { - return __builtin_bswap32(x); -} -static CRN_FORCE_INLINE uint64 swap64(uint64 x) { - return __builtin_bswap64(x); -} +namespace crnlib +{ + namespace utils + { + template + inline void swap(T& l, T& r) + { + T temp(l); + l = r; + r = temp; + } + + template + inline void zero_object(T& obj) + { + memset((void*)&obj, 0, sizeof(obj)); + } + + template + inline void zero_this(T* pObj) + { + memset((void*)pObj, 0, sizeof(*pObj)); + } + + inline bool is_bit_set(uint bits, uint mask) + { + return (bits & mask) != 0; + } + + inline void set_bit(uint& bits, uint mask, bool state) + { + if (state) + { + bits |= mask; + } + else + { + bits &= ~mask; + } + } + + inline bool is_flag_set(uint bits, uint flag) + { + CRNLIB_ASSERT(flag < 32U); + return is_bit_set(bits, 1U << flag); + } + + inline void set_flag(uint& bits, uint flag, bool state) + { + CRNLIB_ASSERT(flag < 32U); + set_bit(bits, 1U << flag, state); + } + + inline void invert_buf(void* pBuf, uint size) + { + uint8* p = static_cast(pBuf); + + const uint half_size = size >> 1; + for (uint i = 0; i < half_size; i++) + { + utils::swap(p[i], p[size - 1U - i]); + } + } + + // buffer_is_little_endian is the endianness of the buffer's data + template + inline void write_obj(const T& obj, void* pBuf, bool buffer_is_little_endian) + { + const uint8* pSrc = reinterpret_cast(&obj); + uint8* pDst = static_cast(pBuf); + + if (c_crnlib_little_endian_platform == buffer_is_little_endian) + { + memcpy(pDst, pSrc, sizeof(T)); + } + else + { + for (uint i = 0; i < sizeof(T); i++) + { + pDst[i] = pSrc[sizeof(T) - 1 - i]; + } + } + } + + // buffer_is_little_endian is the endianness of the buffer's data + template + inline void read_obj(T& obj, const void* pBuf, bool buffer_is_little_endian) + { + const uint8* pSrc = reinterpret_cast(pBuf); + uint8* pDst = reinterpret_cast(&obj); + + if (c_crnlib_little_endian_platform == buffer_is_little_endian) + { + memcpy(pDst, pSrc, sizeof(T)); + } + else + { + for (uint i = 0; i < sizeof(T); i++) + { + pDst[i] = pSrc[sizeof(T) - 1 - i]; + } + } + } + + template + inline bool write_obj(const T& obj, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) + { + if (buf_size < sizeof(T)) + { + return false; + } + + utils::write_obj(obj, pBuf, buffer_is_little_endian); + + pBuf = static_cast(pBuf) + sizeof(T); + buf_size -= sizeof(T); + + return true; + } + + inline bool write_val(uint8 val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) + { + return write_obj(val, pBuf, buf_size, buffer_is_little_endian); + } + inline bool write_val(uint16 val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) + { + return write_obj(val, pBuf, buf_size, buffer_is_little_endian); + } + inline bool write_val(uint val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) + { + return write_obj(val, pBuf, buf_size, buffer_is_little_endian); + } + inline bool write_val(int val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) + { + return write_obj(val, pBuf, buf_size, buffer_is_little_endian); + } + inline bool write_val(uint64 val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) + { + return write_obj(val, pBuf, buf_size, buffer_is_little_endian); + } + inline bool write_val(float val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) + { + return write_obj(val, pBuf, buf_size, buffer_is_little_endian); + } + inline bool write_val(double val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) + { + return write_obj(val, pBuf, buf_size, buffer_is_little_endian); + } + + template + inline bool read_obj(T& obj, const void*& pBuf, uint& buf_size, bool buffer_is_little_endian) + { + if (buf_size < sizeof(T)) + { + zero_object(obj); + return false; + } + + utils::read_obj(obj, pBuf, buffer_is_little_endian); + + pBuf = static_cast(pBuf) + sizeof(T); + buf_size -= sizeof(T); + + return true; + } + +#if defined(CRN_CC_MSVC) + static CRN_FORCE_INLINE uint16 swap16(uint16 x) + { + return _byteswap_ushort(x); + } + static CRN_FORCE_INLINE uint32 swap32(uint32 x) + { + return _byteswap_ulong(x); + } + static CRN_FORCE_INLINE uint64 swap64(uint64 x) + { + return _byteswap_uint64(x); + } +#elif defined(CRN_CC_GNU) + static CRN_FORCE_INLINE uint16 swap16(uint16 x) + { + return static_cast((x << 8U) | (x >> 8U)); + } + static CRN_FORCE_INLINE uint32 swap32(uint32 x) + { + return __builtin_bswap32(x); + } + static CRN_FORCE_INLINE uint64 swap64(uint64 x) + { + return __builtin_bswap64(x); + } #else -static CRN_FORCE_INLINE uint16 swap16(uint16 x) { - return static_cast((x << 8U) | (x >> 8U)); -} -static CRN_FORCE_INLINE uint32 swap32(uint32 x) { - return ((x << 24U) | ((x << 8U) & 0x00FF0000U) | ((x >> 8U) & 0x0000FF00U) | (x >> 24U)); -} -static CRN_FORCE_INLINE uint64 swap64(uint64 x) { - return (static_cast(swap32(static_cast(x))) << 32ULL) | swap32(static_cast(x >> 32U)); -} + static CRN_FORCE_INLINE uint16 swap16(uint16 x) { + return static_cast((x << 8U) | (x >> 8U)); + } + static CRN_FORCE_INLINE uint32 swap32(uint32 x) { + return ((x << 24U) | ((x << 8U) & 0x00FF0000U) | ((x >> 8U) & 0x0000FF00U) | (x >> 24U)); + } + static CRN_FORCE_INLINE uint64 swap64(uint64 x) { + return (static_cast(swap32(static_cast(x))) << 32ULL) | swap32(static_cast(x >> 32U)); + } #endif -// Assumes x has been read from memory as a little endian value, converts to native endianness for manipulation. -CRN_FORCE_INLINE uint16 swap_le16_to_native(uint16 x) { - return c_crnlib_little_endian_platform ? x : swap16(x); -} -CRN_FORCE_INLINE uint32 swap_le32_to_native(uint32 x) { - return c_crnlib_little_endian_platform ? x : swap32(x); -} -CRN_FORCE_INLINE uint64 swap_le64_to_native(uint64 x) { - return c_crnlib_little_endian_platform ? x : swap64(x); -} - -// Assumes x has been read from memory as a big endian value, converts to native endianness for manipulation. -CRN_FORCE_INLINE uint16 swap_be16_to_native(uint16 x) { - return c_crnlib_big_endian_platform ? x : swap16(x); -} -CRN_FORCE_INLINE uint32 swap_be32_to_native(uint32 x) { - return c_crnlib_big_endian_platform ? x : swap32(x); -} -CRN_FORCE_INLINE uint64 swap_be64_to_native(uint64 x) { - return c_crnlib_big_endian_platform ? x : swap64(x); -} - -CRN_FORCE_INLINE uint32 read_le32(const void* p) { - return swap_le32_to_native(*static_cast(p)); -} -CRN_FORCE_INLINE void write_le32(void* p, uint32 x) { - *static_cast(p) = swap_le32_to_native(x); -} -CRN_FORCE_INLINE uint64 read_le64(const void* p) { - return swap_le64_to_native(*static_cast(p)); -} -CRN_FORCE_INLINE void write_le64(void* p, uint64 x) { - *static_cast(p) = swap_le64_to_native(x); -} - -CRN_FORCE_INLINE uint32 read_be32(const void* p) { - return swap_be32_to_native(*static_cast(p)); -} -CRN_FORCE_INLINE void write_be32(void* p, uint32 x) { - *static_cast(p) = swap_be32_to_native(x); -} -CRN_FORCE_INLINE uint64 read_be64(const void* p) { - return swap_be64_to_native(*static_cast(p)); -} -CRN_FORCE_INLINE void write_be64(void* p, uint64 x) { - *static_cast(p) = swap_be64_to_native(x); -} - -inline void endian_swap_mem16(uint16* p, uint n) { - while (n--) { - *p = swap16(*p); - ++p; - } -} -inline void endian_swap_mem32(uint32* p, uint n) { - while (n--) { - *p = swap32(*p); - ++p; - } -} -inline void endian_swap_mem64(uint64* p, uint n) { - while (n--) { - *p = swap64(*p); - ++p; - } -} - -inline void endian_swap_mem(void* p, uint size_in_bytes, uint type_size) { - switch (type_size) { - case sizeof(uint16): - endian_swap_mem16(static_cast(p), size_in_bytes / type_size); - break; - case sizeof(uint32): - endian_swap_mem32(static_cast(p), size_in_bytes / type_size); - break; - case sizeof(uint64): - endian_swap_mem64(static_cast(p), size_in_bytes / type_size); - break; - } -} - -inline void fast_memset(void* pDst, int val, size_t size) { - memset(pDst, val, size); -} - -inline void fast_memcpy(void* pDst, const void* pSrc, size_t size) { - memcpy(pDst, pSrc, size); -} - -inline uint count_leading_zeros(uint v) { - uint temp; - uint n = 32; - - temp = v >> 16; - if (temp) { - n -= 16; - v = temp; - } - - temp = v >> 8; - if (temp) { - n -= 8; - v = temp; - } - - temp = v >> 4; - if (temp) { - n -= 4; - v = temp; - } - - temp = v >> 2; - if (temp) { - n -= 2; - v = temp; - } - - temp = v >> 1; - if (temp) { - n -= 1; - v = temp; - } - - if (v & 1) - n--; - - return n; -} - -inline uint count_leading_zeros16(uint v) { - CRNLIB_ASSERT(v < 0x10000); - - uint temp; - uint n = 16; - - temp = v >> 8; - if (temp) { - n -= 8; - v = temp; - } - - temp = v >> 4; - if (temp) { - n -= 4; - v = temp; - } - - temp = v >> 2; - if (temp) { - n -= 2; - v = temp; - } - - temp = v >> 1; - if (temp) { - n -= 1; - v = temp; - } - - if (v & 1) - n--; - - return n; -} - -void endian_switch_words(uint16* p, uint num); -void endian_switch_dwords(uint32* p, uint num); -void copy_words(uint16* pDst, const uint16* pSrc, uint num, bool endian_switch); -void copy_dwords(uint32* pDst, const uint32* pSrc, uint num, bool endian_switch); - -uint compute_max_mips(uint width, uint height); - -} // namespace utils + // Assumes x has been read from memory as a little endian value, converts to native endianness for manipulation. + CRN_FORCE_INLINE uint16 swap_le16_to_native(uint16 x) + { + return c_crnlib_little_endian_platform ? x : swap16(x); + } + CRN_FORCE_INLINE uint32 swap_le32_to_native(uint32 x) + { + return c_crnlib_little_endian_platform ? x : swap32(x); + } + CRN_FORCE_INLINE uint64 swap_le64_to_native(uint64 x) + { + return c_crnlib_little_endian_platform ? x : swap64(x); + } + + // Assumes x has been read from memory as a big endian value, converts to native endianness for manipulation. + CRN_FORCE_INLINE uint16 swap_be16_to_native(uint16 x) + { + return c_crnlib_big_endian_platform ? x : swap16(x); + } + CRN_FORCE_INLINE uint32 swap_be32_to_native(uint32 x) + { + return c_crnlib_big_endian_platform ? x : swap32(x); + } + CRN_FORCE_INLINE uint64 swap_be64_to_native(uint64 x) + { + return c_crnlib_big_endian_platform ? x : swap64(x); + } + + CRN_FORCE_INLINE uint32 read_le32(const void* p) + { + return swap_le32_to_native(*static_cast(p)); + } + CRN_FORCE_INLINE void write_le32(void* p, uint32 x) + { + *static_cast(p) = swap_le32_to_native(x); + } + CRN_FORCE_INLINE uint64 read_le64(const void* p) + { + return swap_le64_to_native(*static_cast(p)); + } + CRN_FORCE_INLINE void write_le64(void* p, uint64 x) + { + *static_cast(p) = swap_le64_to_native(x); + } + + CRN_FORCE_INLINE uint32 read_be32(const void* p) + { + return swap_be32_to_native(*static_cast(p)); + } + CRN_FORCE_INLINE void write_be32(void* p, uint32 x) + { + *static_cast(p) = swap_be32_to_native(x); + } + CRN_FORCE_INLINE uint64 read_be64(const void* p) + { + return swap_be64_to_native(*static_cast(p)); + } + CRN_FORCE_INLINE void write_be64(void* p, uint64 x) + { + *static_cast(p) = swap_be64_to_native(x); + } + + inline void endian_swap_mem16(uint16* p, uint n) + { + while (n--) + { + *p = swap16(*p); + ++p; + } + } + inline void endian_swap_mem32(uint32* p, uint n) + { + while (n--) + { + *p = swap32(*p); + ++p; + } + } + inline void endian_swap_mem64(uint64* p, uint n) + { + while (n--) + { + *p = swap64(*p); + ++p; + } + } + + inline void endian_swap_mem(void* p, uint size_in_bytes, uint type_size) { + switch (type_size) + { + case sizeof(uint16) : + endian_swap_mem16(static_cast(p), size_in_bytes / type_size); + break; + case sizeof(uint32) : + endian_swap_mem32(static_cast(p), size_in_bytes / type_size); + break; + case sizeof(uint64) : + endian_swap_mem64(static_cast(p), size_in_bytes / type_size); + break; + } + } + + inline void fast_memset(void* pDst, int val, size_t size) + { + memset(pDst, val, size); + } + + inline void fast_memcpy(void* pDst, const void* pSrc, size_t size) + { + memcpy(pDst, pSrc, size); + } + + inline uint count_leading_zeros(uint v) + { + uint temp; + uint n = 32; + + temp = v >> 16; + if (temp) + { + n -= 16; + v = temp; + } + + temp = v >> 8; + if (temp) + { + n -= 8; + v = temp; + } + + temp = v >> 4; + if (temp) + { + n -= 4; + v = temp; + } + + temp = v >> 2; + if (temp) + { + n -= 2; + v = temp; + } + + temp = v >> 1; + if (temp) + { + n -= 1; + v = temp; + } + + if (v & 1) + { + n--; + } + + return n; + } + + inline uint count_leading_zeros16(uint v) + { + CRNLIB_ASSERT(v < 0x10000); + + uint temp; + uint n = 16; + + temp = v >> 8; + if (temp) + { + n -= 8; + v = temp; + } + + temp = v >> 4; + if (temp) + { + n -= 4; + v = temp; + } + + temp = v >> 2; + if (temp) + { + n -= 2; + v = temp; + } + + temp = v >> 1; + if (temp) + { + n -= 1; + v = temp; + } + + if (v & 1) + { + n--; + } + + return n; + } + + void endian_switch_words(uint16* p, uint num); + void endian_switch_dwords(uint32* p, uint num); + void copy_words(uint16* pDst, const uint16* pSrc, uint num, bool endian_switch); + void copy_dwords(uint32* pDst, const uint32* pSrc, uint num, bool endian_switch); + + uint compute_max_mips(uint width, uint height); + + } // namespace utils } // namespace crnlib From 597fc508af90037bf2e59d323722d19f4110f415 Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Sun, 6 Sep 2020 21:50:38 -0400 Subject: [PATCH 07/18] Format some files --- crnlib/crn_comp.h | 245 ++-- crnlib/crn_data_stream.cpp | 218 ++-- crnlib/crn_data_stream.h | 239 ++-- crnlib/crn_dxt.cpp | 787 +++++++------ crnlib/crn_dxt.h | 675 ++++++----- crnlib/crn_dxt_hc.h | 418 +++---- crnlib/crn_dxt_hc_common.cpp | 14 +- crnlib/crn_dxt_hc_common.h | 72 +- crnlib/crn_etc.h | 2 +- crnlib/crn_pixel_format.h | 4 +- crnlib/crn_resample_filters.cpp | 574 +++++---- crnlib/crn_resample_filters.h | 23 +- crnlib/crn_texture_comp.cpp | 888 ++++++++------ crnlib/crn_texture_comp.h | 58 +- crnlib/crn_texture_file_types.cpp | 210 ++-- crnlib/crn_texture_file_types.h | 90 +- crnlib/crn_vec.h | 1795 +++++++++++++++++------------ crnlib/crn_vec_interval.h | 96 +- crnlib/crnlib.cpp | 2 +- 19 files changed, 3618 insertions(+), 2792 deletions(-) diff --git a/crnlib/crn_comp.h b/crnlib/crn_comp.h index c65007e..b71f6ea 100644 --- a/crnlib/crn_comp.h +++ b/crnlib/crn_comp.h @@ -2,9 +2,9 @@ // See Copyright Notice and license at the end of inc/crnlib.h #pragma once -#include "../inc/crn_defs.h" +#include "crn_defs.h" -#include "../inc/crnlib.h" +#include "crnlib.h" #include "crn_symbol_codec.h" #include "crn_dxt_hc.h" #include "crn_image.h" @@ -12,115 +12,136 @@ #include "crn_texture_comp.h" #include "crn_export.h" -namespace crnlib { -class CRN_EXPORT crn_comp : public itexture_comp { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(crn_comp); - - public: - crn_comp(); - virtual ~crn_comp(); - - virtual const char* get_ext() const { return "CRN"; } - - virtual bool compress_init(const crn_comp_params&) { return true; }; - virtual bool compress_pass(const crn_comp_params& params, float* pEffective_bitrate); - virtual void compress_deinit(); - - virtual const crnlib::vector& get_comp_data() const { return m_comp_data; } - virtual crnlib::vector& get_comp_data() { return m_comp_data; } - - uint get_comp_data_size() const { return m_comp_data.size(); } - const uint8* get_comp_data_ptr() const { return m_comp_data.size() ? &m_comp_data[0] : NULL; } - - private: - task_pool m_task_pool; - const crn_comp_params* m_pParams; - - image_u8 m_images[cCRNMaxFaces][cCRNMaxLevels]; - - enum comp { - cColor, - cAlpha0, - cAlpha1, - cNumComps - }; - - bool m_has_comp[cNumComps]; - bool m_has_etc_color_blocks; - bool m_has_subblocks; - - struct level_details { - uint first_block; - uint num_blocks; - uint block_width; - }; - crnlib::vector m_levels; - - uint m_total_blocks; - crnlib::vector m_color_endpoints; - crnlib::vector m_alpha_endpoints; - crnlib::vector m_color_selectors; - crnlib::vector m_alpha_selectors; - crnlib::vector m_endpoint_indices; - crnlib::vector m_selector_indices; - - crnd::crn_header m_crn_header; - crnlib::vector m_comp_data; - - dxt_hc m_hvq; - - symbol_histogram m_reference_hist; - static_huffman_data_model m_reference_dm; - - crnlib::vector m_endpoint_remaping[2]; - symbol_histogram m_endpoint_index_hist[2]; - static_huffman_data_model m_endpoint_index_dm[2]; - - crnlib::vector m_selector_remaping[2]; - symbol_histogram m_selector_index_hist[2]; - static_huffman_data_model m_selector_index_dm[2]; - - crnlib::vector m_packed_blocks[cCRNMaxLevels]; - crnlib::vector m_packed_data_models; - crnlib::vector m_packed_color_endpoints; - crnlib::vector m_packed_color_selectors; - crnlib::vector m_packed_alpha_endpoints; - crnlib::vector m_packed_alpha_selectors; - - bool pack_color_endpoints(crnlib::vector& packed_data, const crnlib::vector& remapping); - bool pack_color_endpoints_etc(crnlib::vector& packed_data, const crnlib::vector& remapping); - bool pack_color_selectors(crnlib::vector& packed_data, const crnlib::vector& remapping); - bool pack_alpha_endpoints(crnlib::vector& packed_data, const crnlib::vector& remapping); - bool pack_alpha_selectors(crnlib::vector& packed_data, const crnlib::vector& remapping); - bool pack_blocks( - uint group, - bool clear_histograms, - symbol_codec* pCodec, - const crnlib::vector* pColor_endpoint_remap, - const crnlib::vector* pColor_selector_remap, - const crnlib::vector* pAlpha_endpoint_remap, - const crnlib::vector* pAlpha_selector_remap - ); - - bool alias_images(); - void clear(); - bool quantize_images(); - - void optimize_color_endpoints_task(uint64 data, void* pData_ptr); - void optimize_color_selectors(); - void optimize_color(); - - void optimize_alpha_endpoints_task(uint64 data, void* pData_ptr); - void optimize_alpha_selectors(); - void optimize_alpha(); - - bool pack_data_models(); - static void append_vec(crnlib::vector& a, const void* p, uint size); - static void append_vec(crnlib::vector& a, const crnlib::vector& b); - bool create_comp_data(); - - bool update_progress(uint phase_index, uint subphase_index, uint subphase_total); - bool compress_internal(); -}; +namespace crnlib +{ + class CRN_EXPORT crn_comp : public itexture_comp + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(crn_comp); + public: + crn_comp(); + virtual ~crn_comp(); + + virtual const char* get_ext() const + { + return "CRN"; + } + + virtual bool compress_init(const crn_comp_params&) + { + return true; + }; + virtual bool compress_pass(const crn_comp_params& params, float* pEffective_bitrate); + virtual void compress_deinit(); + + virtual const crnlib::vector& get_comp_data() const + { + return m_comp_data; + } + virtual crnlib::vector& get_comp_data() + { + return m_comp_data; + } + + uint get_comp_data_size() const + { + return m_comp_data.size(); + } + const uint8* get_comp_data_ptr() const + { + return m_comp_data.size() ? &m_comp_data[0] : NULL; + } + + private: + task_pool m_task_pool; + const crn_comp_params* m_pParams; + + image_u8 m_images[cCRNMaxFaces][cCRNMaxLevels]; + + enum comp + { + cColor, + cAlpha0, + cAlpha1, + cNumComps + }; + + bool m_has_comp[cNumComps]; + bool m_has_etc_color_blocks; + bool m_has_subblocks; + + struct level_details + { + uint first_block; + uint num_blocks; + uint block_width; + }; + crnlib::vector m_levels; + + uint m_total_blocks; + crnlib::vector m_color_endpoints; + crnlib::vector m_alpha_endpoints; + crnlib::vector m_color_selectors; + crnlib::vector m_alpha_selectors; + crnlib::vector m_endpoint_indices; + crnlib::vector m_selector_indices; + + crnd::crn_header m_crn_header; + crnlib::vector m_comp_data; + + dxt_hc m_hvq; + + symbol_histogram m_reference_hist; + static_huffman_data_model m_reference_dm; + + crnlib::vector m_endpoint_remaping[2]; + symbol_histogram m_endpoint_index_hist[2]; + static_huffman_data_model m_endpoint_index_dm[2]; + + crnlib::vector m_selector_remaping[2]; + symbol_histogram m_selector_index_hist[2]; + static_huffman_data_model m_selector_index_dm[2]; + + crnlib::vector m_packed_blocks[cCRNMaxLevels]; + crnlib::vector m_packed_data_models; + crnlib::vector m_packed_color_endpoints; + crnlib::vector m_packed_color_selectors; + crnlib::vector m_packed_alpha_endpoints; + crnlib::vector m_packed_alpha_selectors; + + bool pack_color_endpoints(crnlib::vector& packed_data, const crnlib::vector& remapping); + bool pack_color_endpoints_etc(crnlib::vector& packed_data, const crnlib::vector& remapping); + bool pack_color_selectors(crnlib::vector& packed_data, const crnlib::vector& remapping); + bool pack_alpha_endpoints(crnlib::vector& packed_data, const crnlib::vector& remapping); + bool pack_alpha_selectors(crnlib::vector& packed_data, const crnlib::vector& remapping); + bool pack_blocks( + uint group, + bool clear_histograms, + symbol_codec* pCodec, + const crnlib::vector* pColor_endpoint_remap, + const crnlib::vector* pColor_selector_remap, + const crnlib::vector* pAlpha_endpoint_remap, + const crnlib::vector* pAlpha_selector_remap + ); + + bool alias_images(); + void clear(); + bool quantize_images(); + + void optimize_color_endpoints_task(uint64 data, void* pData_ptr); + void optimize_color_selectors(); + void optimize_color(); + + void optimize_alpha_endpoints_task(uint64 data, void* pData_ptr); + void optimize_alpha_selectors(); + void optimize_alpha(); + + bool pack_data_models(); + static void append_vec(crnlib::vector& a, const void* p, uint size); + static void append_vec(crnlib::vector& a, const crnlib::vector& b); + bool create_comp_data(); + + bool update_progress(uint phase_index, uint subphase_index, uint subphase_total); + bool compress_internal(); + }; } // namespace crnlib diff --git a/crnlib/crn_data_stream.cpp b/crnlib/crn_data_stream.cpp index c1ed214..f2a100e 100644 --- a/crnlib/crn_data_stream.cpp +++ b/crnlib/crn_data_stream.cpp @@ -3,112 +3,142 @@ #include "crn_core.h" #include "crn_data_stream.h" -namespace crnlib { -data_stream::data_stream() - : m_attribs(0), - m_opened(false), - m_error(false), - m_got_cr(false) { -} - -data_stream::data_stream(const char* pName, uint attribs) - : m_name(pName), - m_attribs(static_cast(attribs)), - m_opened(false), - m_error(false), - m_got_cr(false) { -} - -uint64 data_stream::skip(uint64 len) { - uint64 total_bytes_read = 0; - - const uint cBufSize = 1024; - uint8 buf[cBufSize]; - - while (len) { - const uint64 bytes_to_read = math::minimum(sizeof(buf), len); - const uint64 bytes_read = read(buf, static_cast(bytes_to_read)); - total_bytes_read += bytes_read; - - if (bytes_read != bytes_to_read) - break; - - len -= bytes_read; - } - - return total_bytes_read; -} - -bool data_stream::read_line(dynamic_string& str) { - str.empty(); - - for (;;) { - const int c = read_byte(); - - const bool prev_got_cr = m_got_cr; - m_got_cr = false; - - if (c < 0) { - if (!str.is_empty()) - break; - - return false; - } else if ((26 == c) || (!c)) - continue; - else if (13 == c) { - m_got_cr = true; - break; - } else if (10 == c) { - if (prev_got_cr) - continue; - - break; +namespace crnlib +{ + data_stream::data_stream(): + m_attribs(0), + m_opened(false), + m_error(false), + m_got_cr(false) + { } - str.append_char(static_cast(c)); - } + data_stream::data_stream(const char* pName, uint attribs): + m_name(pName), + m_attribs(static_cast(attribs)), + m_opened(false), + m_error(false), + m_got_cr(false) + { + } + + uint64 data_stream::skip(uint64 len) + { + uint64 total_bytes_read = 0; + + const uint cBufSize = 1024; + uint8 buf[cBufSize]; - return true; -} + while (len) + { + const uint64 bytes_to_read = math::minimum(sizeof(buf), len); + const uint64 bytes_read = read(buf, static_cast(bytes_to_read)); + total_bytes_read += bytes_read; -bool data_stream::printf(const char* p, ...) { - va_list args; + if (bytes_read != bytes_to_read) + { + break; + } - va_start(args, p); - dynamic_string buf; - buf.format_args(p, args); - va_end(args); + len -= bytes_read; + } + + return total_bytes_read; + } - return write(buf.get_ptr(), buf.get_len() * sizeof(char)) == buf.get_len() * sizeof(char); -} + bool data_stream::read_line(dynamic_string& str) + { + str.empty(); + + for (;;) + { + const int c = read_byte(); + + const bool prev_got_cr = m_got_cr; + m_got_cr = false; + + if (c < 0) + { + if (!str.is_empty()) + { + break; + } + return false; + } + else if ((26 == c) || (!c)) + { + continue; + } + else if (13 == c) + { + m_got_cr = true; + break; + } + else if (10 == c) + { + if (prev_got_cr) + { + continue; + } + break; + } + + str.append_char(static_cast(c)); + } + + return true; + } -bool data_stream::write_line(const dynamic_string& str) { - if (!str.is_empty()) - return write(str.get_ptr(), str.get_len()) == str.get_len(); + bool data_stream::printf(const char* p, ...) + { + va_list args; - return true; -} + va_start(args, p); + dynamic_string buf; + buf.format_args(p, args); + va_end(args); -bool data_stream::read_array(vector& buf) { - if (buf.size() < get_remaining()) { - if (get_remaining() > 1024U * 1024U * 1024U) - return false; + return write(buf.get_ptr(), buf.get_len() * sizeof(char)) == buf.get_len() * sizeof(char); + } - buf.resize((uint)get_remaining()); - } + bool data_stream::write_line(const dynamic_string& str) + { + if (!str.is_empty()) + { + return write(str.get_ptr(), str.get_len()) == str.get_len(); + } - if (!get_remaining()) { - buf.resize(0); - return true; - } + return true; + } - return read(&buf[0], buf.size()) == buf.size(); -} + bool data_stream::read_array(vector& buf) + { + if (buf.size() < get_remaining()) + { + if (get_remaining() > 1024U * 1024U * 1024U) + { + return false; + } + + buf.resize((uint)get_remaining()); + } + + if (!get_remaining()) + { + buf.resize(0); + return true; + } + + return read(&buf[0], buf.size()) == buf.size(); + } -bool data_stream::write_array(const vector& buf) { - if (!buf.empty()) - return write(&buf[0], buf.size()) == buf.size(); - return true; -} + bool data_stream::write_array(const vector& buf) + { + if (!buf.empty()) + { + return write(&buf[0], buf.size()) == buf.size(); + } + return true; + } } // namespace crnlib diff --git a/crnlib/crn_data_stream.h b/crnlib/crn_data_stream.h index 16487df..ceb118e 100644 --- a/crnlib/crn_data_stream.h +++ b/crnlib/crn_data_stream.h @@ -4,97 +4,152 @@ #include "crn_export.h" -namespace crnlib { -enum data_stream_attribs { - cDataStreamReadable = 1, - cDataStreamWritable = 2, - cDataStreamSeekable = 4 -}; - -const int64 DATA_STREAM_SIZE_UNKNOWN = cINT64_MAX; -const int64 DATA_STREAM_SIZE_INFINITE = cUINT64_MAX; - -class CRN_EXPORT data_stream { - data_stream(const data_stream&); - data_stream& operator=(const data_stream&); - - public: - data_stream(); - data_stream(const char* pName, uint attribs); - - virtual ~data_stream() {} - - virtual data_stream* get_parent() { return NULL; } - - virtual bool close() { - m_opened = false; - m_error = false; - m_got_cr = false; - return true; - } - - typedef uint16 attribs_t; - inline attribs_t get_attribs() const { return m_attribs; } - - inline bool is_opened() const { return m_opened; } - - inline bool is_readable() const { return utils::is_bit_set(m_attribs, cDataStreamReadable); } - inline bool is_writable() const { return utils::is_bit_set(m_attribs, cDataStreamWritable); } - inline bool is_seekable() const { return utils::is_bit_set(m_attribs, cDataStreamSeekable); } - - inline bool get_error() const { return m_error; } - - inline const dynamic_string& get_name() const { return m_name; } - inline void set_name(const char* pName) { m_name.set(pName); } - - virtual uint read(void* pBuf, uint len) = 0; - virtual uint64 skip(uint64 len); - - virtual uint write(const void* pBuf, uint len) = 0; - virtual bool flush() = 0; - - virtual bool is_size_known() const { return true; } - - // Returns DATA_STREAM_SIZE_UNKNOWN if size hasn't been determined yet, or DATA_STREAM_SIZE_INFINITE for infinite streams. - virtual uint64 get_size() = 0; - virtual uint64 get_remaining() = 0; - - virtual uint64 get_ofs() = 0; - virtual bool seek(int64 ofs, bool relative) = 0; - - virtual const void* get_ptr() const { return NULL; } - - inline int read_byte() { - uint8 c; - if (read(&c, 1) != 1) - return -1; - return c; - } - inline bool write_byte(uint8 c) { return write(&c, 1) == 1; } - - bool read_line(dynamic_string& str); - bool printf(const char* p, ...); - bool write_line(const dynamic_string& str); - bool write_bom() { - uint16 bom = 0xFEFF; - return write(&bom, sizeof(bom)) == sizeof(bom); - } - - bool read_array(vector& buf); - bool write_array(const vector& buf); - - protected: - dynamic_string m_name; - - attribs_t m_attribs; - bool m_opened : 1; - bool m_error : 1; - bool m_got_cr : 1; - - inline void set_error() { m_error = true; } - inline void clear_error() { m_error = false; } - - inline void post_seek() { m_got_cr = false; } -}; +namespace crnlib +{ + enum data_stream_attribs + { + cDataStreamReadable = 1, + cDataStreamWritable = 2, + cDataStreamSeekable = 4 + }; + + const int64 DATA_STREAM_SIZE_UNKNOWN = cINT64_MAX; + const int64 DATA_STREAM_SIZE_INFINITE = cUINT64_MAX; + + class CRN_EXPORT data_stream + { + data_stream(const data_stream&); + data_stream& operator=(const data_stream&); + + public: + data_stream(); + data_stream(const char* pName, uint attribs); + + virtual ~data_stream() + { + } + + virtual data_stream* get_parent() + { + return NULL; + } + + virtual bool close() + { + m_opened = false; + m_error = false; + m_got_cr = false; + return true; + } + + typedef uint16 attribs_t; + inline attribs_t get_attribs() const + { + return m_attribs; + } + + inline bool is_opened() const + { + return m_opened; + } + + inline bool is_readable() const + { + return utils::is_bit_set(m_attribs, cDataStreamReadable); + } + inline bool is_writable() const + { + return utils::is_bit_set(m_attribs, cDataStreamWritable); + } + inline bool is_seekable() const + { + return utils::is_bit_set(m_attribs, cDataStreamSeekable); + } + + inline bool get_error() const + { + return m_error; + } + + inline const dynamic_string& get_name() const + { + return m_name; + } + inline void set_name(const char* pName) + { + m_name.set(pName); + } + + virtual uint read(void* pBuf, uint len) = 0; + virtual uint64 skip(uint64 len); + + virtual uint write(const void* pBuf, uint len) = 0; + virtual bool flush() = 0; + + virtual bool is_size_known() const + { + return true; + } + + // Returns DATA_STREAM_SIZE_UNKNOWN if size hasn't been determined yet, or DATA_STREAM_SIZE_INFINITE for infinite streams. + virtual uint64 get_size() = 0; + virtual uint64 get_remaining() = 0; + + virtual uint64 get_ofs() = 0; + virtual bool seek(int64 ofs, bool relative) = 0; + + virtual const void* get_ptr() const + { + return NULL; + } + + inline int read_byte() + { + uint8 c; + if (read(&c, 1) != 1) + { + return -1; + } + return c; + } + inline bool write_byte(uint8 c) + { + return write(&c, 1) == 1; + } + + bool read_line(dynamic_string& str); + bool printf(const char* p, ...); + bool write_line(const dynamic_string& str); + bool write_bom() + { + uint16 bom = 0xFEFF; + return write(&bom, sizeof(bom)) == sizeof(bom); + } + + bool read_array(vector& buf); + bool write_array(const vector& buf); + + protected: + dynamic_string m_name; + + attribs_t m_attribs; + bool m_opened : 1; + bool m_error : 1; + bool m_got_cr : 1; + + inline void set_error() + { + m_error = true; + } + inline void clear_error() + { + m_error = false; + } + + inline void post_seek() + { + m_got_cr = false; + } + }; } // namespace crnlib diff --git a/crnlib/crn_dxt.cpp b/crnlib/crn_dxt.cpp index c13a8ad..33b4c36 100644 --- a/crnlib/crn_dxt.cpp +++ b/crnlib/crn_dxt.cpp @@ -1,5 +1,6 @@ // File: crn_dxt.cpp // See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" #include "crn_dxt.h" #include "crn_dxt1.h" @@ -7,371 +8,427 @@ #include "crn_dxt_fast.h" #include "crn_intersect.h" -namespace crnlib { -const uint8 g_dxt5_from_linear[cDXT5SelectorValues] = {0U, 2U, 3U, 4U, 5U, 6U, 7U, 1U}; -const uint8 g_dxt5_to_linear[cDXT5SelectorValues] = {0U, 7U, 1U, 2U, 3U, 4U, 5U, 6U}; - -const uint8 g_dxt5_alpha6_to_linear[cDXT5SelectorValues] = {0U, 5U, 1U, 2U, 3U, 4U, 0U, 0U}; - -const uint8 g_dxt1_from_linear[cDXT1SelectorValues] = {0U, 2U, 3U, 1U}; -const uint8 g_dxt1_to_linear[cDXT1SelectorValues] = {0U, 3U, 1U, 2U}; - -const uint8 g_six_alpha_invert_table[cDXT5SelectorValues] = {1, 0, 5, 4, 3, 2, 6, 7}; -const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues] = {1, 0, 7, 6, 5, 4, 3, 2}; - -const char* get_dxt_format_string(dxt_format fmt) { - switch (fmt) { - case cDXT1: - return "DXT1"; - case cDXT1A: - return "DXT1A"; - case cDXT3: - return "DXT3"; - case cDXT5: - return "DXT5"; - case cDXT5A: - return "DXT5A"; - case cDXN_XY: - return "DXN_XY"; - case cDXN_YX: - return "DXN_YX"; - case cETC1: - return "ETC1"; - case cETC2: - return "ETC2"; - case cETC2A: - return "ETC2A"; - case cETC1S: - return "ETC1S"; - case cETC2AS: - return "ETC2AS"; - default: - break; - } - CRNLIB_ASSERT(false); - return "?"; -} - -const char* get_dxt_compressor_name(crn_dxt_compressor_type c) { - switch (c) { - case cCRNDXTCompressorCRN: - return "CRN"; - case cCRNDXTCompressorCRNF: - return "CRNF"; - case cCRNDXTCompressorRYG: - return "RYG"; +namespace crnlib +{ + const uint8 g_dxt5_from_linear[cDXT5SelectorValues] = { 0U, 2U, 3U, 4U, 5U, 6U, 7U, 1U }; + const uint8 g_dxt5_to_linear[cDXT5SelectorValues] = { 0U, 7U, 1U, 2U, 3U, 4U, 5U, 6U }; + + const uint8 g_dxt5_alpha6_to_linear[cDXT5SelectorValues] = { 0U, 5U, 1U, 2U, 3U, 4U, 0U, 0U }; + + const uint8 g_dxt1_from_linear[cDXT1SelectorValues] = { 0U, 2U, 3U, 1U }; + const uint8 g_dxt1_to_linear[cDXT1SelectorValues] = { 0U, 3U, 1U, 2U }; + + const uint8 g_six_alpha_invert_table[cDXT5SelectorValues] = { 1, 0, 5, 4, 3, 2, 6, 7 }; + const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues] = { 1, 0, 7, 6, 5, 4, 3, 2 }; + + const char* get_dxt_format_string(dxt_format fmt) + { + switch (fmt) + { + case cDXT1: + return "DXT1"; + case cDXT1A: + return "DXT1A"; + case cDXT3: + return "DXT3"; + case cDXT5: + return "DXT5"; + case cDXT5A: + return "DXT5A"; + case cDXN_XY: + return "DXN_XY"; + case cDXN_YX: + return "DXN_YX"; + case cETC1: + return "ETC1"; + case cETC2: + return "ETC2"; + case cETC2A: + return "ETC2A"; + case cETC1S: + return "ETC1S"; + case cETC2AS: + return "ETC2AS"; + default: + break; + } + CRNLIB_ASSERT(false); + return "?"; + } + + const char* get_dxt_compressor_name(crn_dxt_compressor_type c) + { + switch (c) + { + case cCRNDXTCompressorCRN: + return "CRN"; + case cCRNDXTCompressorCRNF: + return "CRNF"; + case cCRNDXTCompressorRYG: + return "RYG"; #if CRNLIB_SUPPORT_ATI_COMPRESS - case cCRNDXTCompressorATI: - return "ATI"; + case cCRNDXTCompressorATI: + return "ATI"; #endif - default: - break; - } - CRNLIB_ASSERT(false); - return "?"; -} - -uint get_dxt_format_bits_per_pixel(dxt_format fmt) { - switch (fmt) { - case cDXT1: - case cDXT1A: - case cDXT5A: - case cETC1: - case cETC2: - case cETC1S: - return 4; - case cDXT3: - case cDXT5: - case cDXN_XY: - case cDXN_YX: - case cETC2A: - case cETC2AS: - return 8; - default: - break; - } - CRNLIB_ASSERT(false); - return 0; -} - -bool get_dxt_format_has_alpha(dxt_format fmt) { - switch (fmt) { - case cDXT1A: - case cDXT3: - case cDXT5: - case cDXT5A: - case cETC2A: - case cETC2AS: - return true; - default: - break; - } - return false; -} - -uint16 dxt1_block::pack_color(const color_quad_u8& color, bool scaled, uint bias) { - uint r = color.r; - uint g = color.g; - uint b = color.b; - - if (scaled) { - r = (r * 31U + bias) / 255U; - g = (g * 63U + bias) / 255U; - b = (b * 31U + bias) / 255U; - } - - r = math::minimum(r, 31U); - g = math::minimum(g, 63U); - b = math::minimum(b, 31U); - - return static_cast(b | (g << 5U) | (r << 11U)); -} - -uint16 dxt1_block::pack_color(uint r, uint g, uint b, bool scaled, uint bias) { - return pack_color(color_quad_u8(r, g, b, 0), scaled, bias); -} - -color_quad_u8 dxt1_block::unpack_color(uint16 packed_color, bool scaled, uint alpha) { - uint b = packed_color & 31U; - uint g = (packed_color >> 5U) & 63U; - uint r = (packed_color >> 11U) & 31U; - - if (scaled) { - b = (b << 3U) | (b >> 2U); - g = (g << 2U) | (g >> 4U); - r = (r << 3U) | (r >> 2U); - } - - return color_quad_u8(cNoClamp, r, g, b, math::minimum(alpha, 255U)); -} - -void dxt1_block::unpack_color(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled) { - color_quad_u8 c(unpack_color(packed_color, scaled, 0)); - r = c.r; - g = c.g; - b = c.b; -} - -void dxt1_block::get_block_colors_NV5x(color_quad_u8* pDst, uint16 packed_col0, uint16 packed_col1, bool color4) { - color_quad_u8 col0(unpack_color(packed_col0, false)); - color_quad_u8 col1(unpack_color(packed_col1, false)); - - pDst[0].r = (3 * col0.r * 22) / 8; - pDst[0].b = (3 * col0.b * 22) / 8; - pDst[0].g = (col0.g << 2) | (col0.g >> 4); - pDst[0].a = 0xFF; - - pDst[1].r = (3 * col1.r * 22) / 8; - pDst[1].g = (col1.g << 2) | (col1.g >> 4); - pDst[1].b = (3 * col1.b * 22) / 8; - pDst[1].a = 0xFF; - - int gdiff = pDst[1].g - pDst[0].g; - - if (color4) //(packed_col0 > packed_col1) - { - pDst[2].r = static_cast(((2 * col0.r + col1.r) * 22) / 8); - pDst[2].g = static_cast((256 * pDst[0].g + gdiff / 4 + 128 + gdiff * 80) / 256); - pDst[2].b = static_cast(((2 * col0.b + col1.b) * 22) / 8); - pDst[2].a = 0xFF; - - pDst[3].r = static_cast(((2 * col1.r + col0.r) * 22) / 8); - pDst[3].g = static_cast((256 * pDst[1].g - gdiff / 4 + 128 - gdiff * 80) / 256); - pDst[3].b = static_cast(((2 * col1.b + col0.b) * 22) / 8); - pDst[3].a = 0xFF; - } else { - pDst[2].r = static_cast(((col0.r + col1.r) * 33) / 8); - pDst[2].g = static_cast((256 * pDst[0].g + gdiff / 4 + 128 + gdiff * 128) / 256); - pDst[2].b = static_cast(((col0.b + col1.b) * 33) / 8); - pDst[2].a = 0xFF; - - pDst[3].r = 0x00; - pDst[3].g = 0x00; - pDst[3].b = 0x00; - pDst[3].a = 0x00; - } -} - -uint dxt1_block::get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1) { - color_quad_u8 c0(unpack_color(color0, true)); - color_quad_u8 c1(unpack_color(color1, true)); - - pDst[0] = c0; - pDst[1] = c1; - pDst[2].set_noclamp_rgba((c0.r + c1.r) >> 1U, (c0.g + c1.g) >> 1U, (c0.b + c1.b) >> 1U, 255U); - pDst[3].set_noclamp_rgba(0, 0, 0, 0); - - return 3; -} - -uint dxt1_block::get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1) { - color_quad_u8 c0(unpack_color(color0, true)); - color_quad_u8 c1(unpack_color(color1, true)); - - pDst[0] = c0; - pDst[1] = c1; - - // The compiler changes the div3 into a mul by recip+shift. - pDst[2].set_noclamp_rgba((c0.r * 2 + c1.r) / 3, (c0.g * 2 + c1.g) / 3, (c0.b * 2 + c1.b) / 3, 255U); - pDst[3].set_noclamp_rgba((c1.r * 2 + c0.r) / 3, (c1.g * 2 + c0.g) / 3, (c1.b * 2 + c0.b) / 3, 255U); - - return 4; -} - -uint dxt1_block::get_block_colors3_round(color_quad_u8* pDst, uint16 color0, uint16 color1) { - color_quad_u8 c0(unpack_color(color0, true)); - color_quad_u8 c1(unpack_color(color1, true)); - - pDst[0] = c0; - pDst[1] = c1; - pDst[2].set_noclamp_rgba((c0.r + c1.r + 1) >> 1U, (c0.g + c1.g + 1) >> 1U, (c0.b + c1.b + 1) >> 1U, 255U); - pDst[3].set_noclamp_rgba(0, 0, 0, 0); - - return 3; -} - -uint dxt1_block::get_block_colors4_round(color_quad_u8* pDst, uint16 color0, uint16 color1) { - color_quad_u8 c0(unpack_color(color0, true)); - color_quad_u8 c1(unpack_color(color1, true)); - - pDst[0] = c0; - pDst[1] = c1; - - // 12/14/08 - Supposed to round according to DX docs, but this conflicts with the OpenGL S3TC spec. ? - // The compiler changes the div3 into a mul by recip+shift. - pDst[2].set_noclamp_rgba((c0.r * 2 + c1.r + 1) / 3, (c0.g * 2 + c1.g + 1) / 3, (c0.b * 2 + c1.b + 1) / 3, 255U); - pDst[3].set_noclamp_rgba((c1.r * 2 + c0.r + 1) / 3, (c1.g * 2 + c0.g + 1) / 3, (c1.b * 2 + c0.b + 1) / 3, 255U); - - return 4; -} - -uint dxt1_block::get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1) { - if (color0 > color1) - return get_block_colors4(pDst, color0, color1); - else - return get_block_colors3(pDst, color0, color1); -} - -uint dxt1_block::get_block_colors_round(color_quad_u8* pDst, uint16 color0, uint16 color1) { - if (color0 > color1) - return get_block_colors4_round(pDst, color0, color1); - else - return get_block_colors3_round(pDst, color0, color1); -} - -color_quad_u8 dxt1_block::unpack_endpoint(uint32 endpoints, uint index, bool scaled, uint alpha) { - CRNLIB_ASSERT(index < 2); - return unpack_color(static_cast((endpoints >> (index * 16U)) & 0xFFFFU), scaled, alpha); -} - -uint dxt1_block::pack_endpoints(uint lo, uint hi) { - CRNLIB_ASSERT((lo <= 0xFFFFU) && (hi <= 0xFFFFU)); - return lo | (hi << 16U); -} - -void dxt3_block::set_alpha(uint x, uint y, uint value, bool scaled) { - CRNLIB_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); - - if (scaled) { - CRNLIB_ASSERT(value <= 0xFF); - value = (value * 15U + 128U) / 255U; - } else { - CRNLIB_ASSERT(value <= 0xF); - } - - uint ofs = (y << 1U) + (x >> 1U); - uint c = m_alpha[ofs]; - - c &= ~(0xF << ((x & 1U) << 2U)); - c |= (value << ((x & 1U) << 2U)); - - m_alpha[ofs] = static_cast(c); -} - -uint dxt3_block::get_alpha(uint x, uint y, bool scaled) const { - CRNLIB_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); - - uint value = m_alpha[(y << 1U) + (x >> 1U)]; - if (x & 1) - value >>= 4; - value &= 0xF; - - if (scaled) - value = (value << 4U) | value; - - return value; -} - -uint dxt5_block::get_block_values6(color_quad_u8* pDst, uint l, uint h) { - pDst[0].a = static_cast(l); - pDst[1].a = static_cast(h); - pDst[2].a = static_cast((l * 4 + h) / 5); - pDst[3].a = static_cast((l * 3 + h * 2) / 5); - pDst[4].a = static_cast((l * 2 + h * 3) / 5); - pDst[5].a = static_cast((l + h * 4) / 5); - pDst[6].a = 0; - pDst[7].a = 255; - return 6; -} - -uint dxt5_block::get_block_values8(color_quad_u8* pDst, uint l, uint h) { - pDst[0].a = static_cast(l); - pDst[1].a = static_cast(h); - pDst[2].a = static_cast((l * 6 + h) / 7); - pDst[3].a = static_cast((l * 5 + h * 2) / 7); - pDst[4].a = static_cast((l * 4 + h * 3) / 7); - pDst[5].a = static_cast((l * 3 + h * 4) / 7); - pDst[6].a = static_cast((l * 2 + h * 5) / 7); - pDst[7].a = static_cast((l + h * 6) / 7); - return 8; -} - -uint dxt5_block::get_block_values(color_quad_u8* pDst, uint l, uint h) { - if (l > h) - return get_block_values8(pDst, l, h); - else - return get_block_values6(pDst, l, h); -} - -uint dxt5_block::get_block_values6(uint* pDst, uint l, uint h) { - pDst[0] = l; - pDst[1] = h; - pDst[2] = (l * 4 + h) / 5; - pDst[3] = (l * 3 + h * 2) / 5; - pDst[4] = (l * 2 + h * 3) / 5; - pDst[5] = (l + h * 4) / 5; - pDst[6] = 0; - pDst[7] = 255; - return 6; -} - -uint dxt5_block::get_block_values8(uint* pDst, uint l, uint h) { - pDst[0] = l; - pDst[1] = h; - pDst[2] = (l * 6 + h) / 7; - pDst[3] = (l * 5 + h * 2) / 7; - pDst[4] = (l * 4 + h * 3) / 7; - pDst[5] = (l * 3 + h * 4) / 7; - pDst[6] = (l * 2 + h * 5) / 7; - pDst[7] = (l + h * 6) / 7; - return 8; -} - -uint dxt5_block::unpack_endpoint(uint packed, uint index) { - CRNLIB_ASSERT(index < 2); - return (packed >> (8 * index)) & 0xFF; -} - -uint dxt5_block::pack_endpoints(uint lo, uint hi) { - CRNLIB_ASSERT((lo <= 0xFF) && (hi <= 0xFF)); - return lo | (hi << 8U); -} - -uint dxt5_block::get_block_values(uint* pDst, uint l, uint h) { - if (l > h) - return get_block_values8(pDst, l, h); - else - return get_block_values6(pDst, l, h); -} - + default: + break; + } + CRNLIB_ASSERT(false); + return "?"; + } + + uint get_dxt_format_bits_per_pixel(dxt_format fmt) + { + switch (fmt) + { + case cDXT1: + case cDXT1A: + case cDXT5A: + case cETC1: + case cETC2: + case cETC1S: + return 4; + case cDXT3: + case cDXT5: + case cDXN_XY: + case cDXN_YX: + case cETC2A: + case cETC2AS: + return 8; + default: + break; + } + CRNLIB_ASSERT(false); + return 0; + } + + bool get_dxt_format_has_alpha(dxt_format fmt) + { + switch (fmt) { + case cDXT1A: + case cDXT3: + case cDXT5: + case cDXT5A: + case cETC2A: + case cETC2AS: + return true; + default: + break; + } + return false; + } + + uint16 dxt1_block::pack_color(const color_quad_u8& color, bool scaled, uint bias) + { + uint r = color.r; + uint g = color.g; + uint b = color.b; + + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 63U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + + r = math::minimum(r, 31U); + g = math::minimum(g, 63U); + b = math::minimum(b, 31U); + + return static_cast(b | (g << 5U) | (r << 11U)); + } + + uint16 dxt1_block::pack_color(uint r, uint g, uint b, bool scaled, uint bias) + { + return pack_color(color_quad_u8(r, g, b, 0), scaled, bias); + } + + color_quad_u8 dxt1_block::unpack_color(uint16 packed_color, bool scaled, uint alpha) + { + uint b = packed_color & 31U; + uint g = (packed_color >> 5U) & 63U; + uint r = (packed_color >> 11U) & 31U; + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 2U) | (g >> 4U); + r = (r << 3U) | (r >> 2U); + } + + return color_quad_u8(cNoClamp, r, g, b, math::minimum(alpha, 255U)); + } + + void dxt1_block::unpack_color(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled) + { + color_quad_u8 c(unpack_color(packed_color, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + void dxt1_block::get_block_colors_NV5x(color_quad_u8* pDst, uint16 packed_col0, uint16 packed_col1, bool color4) + { + color_quad_u8 col0(unpack_color(packed_col0, false)); + color_quad_u8 col1(unpack_color(packed_col1, false)); + + pDst[0].r = (3 * col0.r * 22) / 8; + pDst[0].b = (3 * col0.b * 22) / 8; + pDst[0].g = (col0.g << 2) | (col0.g >> 4); + pDst[0].a = 0xFF; + + pDst[1].r = (3 * col1.r * 22) / 8; + pDst[1].g = (col1.g << 2) | (col1.g >> 4); + pDst[1].b = (3 * col1.b * 22) / 8; + pDst[1].a = 0xFF; + + int gdiff = pDst[1].g - pDst[0].g; + + if (color4) //(packed_col0 > packed_col1) + { + pDst[2].r = static_cast(((2 * col0.r + col1.r) * 22) / 8); + pDst[2].g = static_cast((256 * pDst[0].g + gdiff / 4 + 128 + gdiff * 80) / 256); + pDst[2].b = static_cast(((2 * col0.b + col1.b) * 22) / 8); + pDst[2].a = 0xFF; + + pDst[3].r = static_cast(((2 * col1.r + col0.r) * 22) / 8); + pDst[3].g = static_cast((256 * pDst[1].g - gdiff / 4 + 128 - gdiff * 80) / 256); + pDst[3].b = static_cast(((2 * col1.b + col0.b) * 22) / 8); + pDst[3].a = 0xFF; + } + else + { + pDst[2].r = static_cast(((col0.r + col1.r) * 33) / 8); + pDst[2].g = static_cast((256 * pDst[0].g + gdiff / 4 + 128 + gdiff * 128) / 256); + pDst[2].b = static_cast(((col0.b + col1.b) * 33) / 8); + pDst[2].a = 0xFF; + + pDst[3].r = 0x00; + pDst[3].g = 0x00; + pDst[3].b = 0x00; + pDst[3].a = 0x00; + } + } + + uint dxt1_block::get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + color_quad_u8 c0(unpack_color(color0, true)); + color_quad_u8 c1(unpack_color(color1, true)); + + pDst[0] = c0; + pDst[1] = c1; + pDst[2].set_noclamp_rgba((c0.r + c1.r) >> 1U, (c0.g + c1.g) >> 1U, (c0.b + c1.b) >> 1U, 255U); + pDst[3].set_noclamp_rgba(0, 0, 0, 0); + + return 3; + } + + uint dxt1_block::get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + color_quad_u8 c0(unpack_color(color0, true)); + color_quad_u8 c1(unpack_color(color1, true)); + + pDst[0] = c0; + pDst[1] = c1; + + // The compiler changes the div3 into a mul by recip+shift. + pDst[2].set_noclamp_rgba((c0.r * 2 + c1.r) / 3, (c0.g * 2 + c1.g) / 3, (c0.b * 2 + c1.b) / 3, 255U); + pDst[3].set_noclamp_rgba((c1.r * 2 + c0.r) / 3, (c1.g * 2 + c0.g) / 3, (c1.b * 2 + c0.b) / 3, 255U); + + return 4; + } + + uint dxt1_block::get_block_colors3_round(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + color_quad_u8 c0(unpack_color(color0, true)); + color_quad_u8 c1(unpack_color(color1, true)); + + pDst[0] = c0; + pDst[1] = c1; + pDst[2].set_noclamp_rgba((c0.r + c1.r + 1) >> 1U, (c0.g + c1.g + 1) >> 1U, (c0.b + c1.b + 1) >> 1U, 255U); + pDst[3].set_noclamp_rgba(0, 0, 0, 0); + + return 3; + } + + uint dxt1_block::get_block_colors4_round(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + color_quad_u8 c0(unpack_color(color0, true)); + color_quad_u8 c1(unpack_color(color1, true)); + + pDst[0] = c0; + pDst[1] = c1; + + // 12/14/08 - Supposed to round according to DX docs, but this conflicts with the OpenGL S3TC spec. ? + // The compiler changes the div3 into a mul by recip+shift. + pDst[2].set_noclamp_rgba((c0.r * 2 + c1.r + 1) / 3, (c0.g * 2 + c1.g + 1) / 3, (c0.b * 2 + c1.b + 1) / 3, 255U); + pDst[3].set_noclamp_rgba((c1.r * 2 + c0.r + 1) / 3, (c1.g * 2 + c0.g + 1) / 3, (c1.b * 2 + c0.b + 1) / 3, 255U); + + return 4; + } + + uint dxt1_block::get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + if (color0 > color1) + { + return get_block_colors4(pDst, color0, color1); + } + else + { + return get_block_colors3(pDst, color0, color1); + } + } + + uint dxt1_block::get_block_colors_round(color_quad_u8* pDst, uint16 color0, uint16 color1) { + if (color0 > color1) + { + return get_block_colors4_round(pDst, color0, color1); + } + else + { + return get_block_colors3_round(pDst, color0, color1); + } + } + + color_quad_u8 dxt1_block::unpack_endpoint(uint32 endpoints, uint index, bool scaled, uint alpha) + { + CRNLIB_ASSERT(index < 2); + return unpack_color(static_cast((endpoints >> (index * 16U)) & 0xFFFFU), scaled, alpha); + } + + uint dxt1_block::pack_endpoints(uint lo, uint hi) + { + CRNLIB_ASSERT((lo <= 0xFFFFU) && (hi <= 0xFFFFU)); + return lo | (hi << 16U); + } + + void dxt3_block::set_alpha(uint x, uint y, uint value, bool scaled) + { + CRNLIB_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); + + if (scaled) + { + CRNLIB_ASSERT(value <= 0xFF); + value = (value * 15U + 128U) / 255U; + } + else + { + CRNLIB_ASSERT(value <= 0xF); + } + + uint ofs = (y << 1U) + (x >> 1U); + uint c = m_alpha[ofs]; + + c &= ~(0xF << ((x & 1U) << 2U)); + c |= (value << ((x & 1U) << 2U)); + + m_alpha[ofs] = static_cast(c); + } + + uint dxt3_block::get_alpha(uint x, uint y, bool scaled) const + { + CRNLIB_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); + + uint value = m_alpha[(y << 1U) + (x >> 1U)]; + if (x & 1) + { + value >>= 4; + } + value &= 0xF; + + if (scaled) + { + value = (value << 4U) | value; + } + + return value; + } + + uint dxt5_block::get_block_values6(color_quad_u8* pDst, uint l, uint h) + { + pDst[0].a = static_cast(l); + pDst[1].a = static_cast(h); + pDst[2].a = static_cast((l * 4 + h) / 5); + pDst[3].a = static_cast((l * 3 + h * 2) / 5); + pDst[4].a = static_cast((l * 2 + h * 3) / 5); + pDst[5].a = static_cast((l + h * 4) / 5); + pDst[6].a = 0; + pDst[7].a = 255; + return 6; + } + + uint dxt5_block::get_block_values8(color_quad_u8* pDst, uint l, uint h) + { + pDst[0].a = static_cast(l); + pDst[1].a = static_cast(h); + pDst[2].a = static_cast((l * 6 + h) / 7); + pDst[3].a = static_cast((l * 5 + h * 2) / 7); + pDst[4].a = static_cast((l * 4 + h * 3) / 7); + pDst[5].a = static_cast((l * 3 + h * 4) / 7); + pDst[6].a = static_cast((l * 2 + h * 5) / 7); + pDst[7].a = static_cast((l + h * 6) / 7); + return 8; + } + + uint dxt5_block::get_block_values(color_quad_u8* pDst, uint l, uint h) + { + if (l > h) + { + return get_block_values8(pDst, l, h); + } + else + { + return get_block_values6(pDst, l, h); + } + } + + uint dxt5_block::get_block_values6(uint* pDst, uint l, uint h) + { + pDst[0] = l; + pDst[1] = h; + pDst[2] = (l * 4 + h) / 5; + pDst[3] = (l * 3 + h * 2) / 5; + pDst[4] = (l * 2 + h * 3) / 5; + pDst[5] = (l + h * 4) / 5; + pDst[6] = 0; + pDst[7] = 255; + return 6; + } + + uint dxt5_block::get_block_values8(uint* pDst, uint l, uint h) + { + pDst[0] = l; + pDst[1] = h; + pDst[2] = (l * 6 + h) / 7; + pDst[3] = (l * 5 + h * 2) / 7; + pDst[4] = (l * 4 + h * 3) / 7; + pDst[5] = (l * 3 + h * 4) / 7; + pDst[6] = (l * 2 + h * 5) / 7; + pDst[7] = (l + h * 6) / 7; + return 8; + } + + uint dxt5_block::unpack_endpoint(uint packed, uint index) + { + CRNLIB_ASSERT(index < 2); + return (packed >> (8 * index)) & 0xFF; + } + + uint dxt5_block::pack_endpoints(uint lo, uint hi) + { + CRNLIB_ASSERT((lo <= 0xFF) && (hi <= 0xFF)); + return lo | (hi << 8U); + } + + uint dxt5_block::get_block_values(uint* pDst, uint l, uint h) + { + if (l > h) + { + return get_block_values8(pDst, l, h); + } + else + { + return get_block_values6(pDst, l, h); + } + } } // namespace crnlib diff --git a/crnlib/crn_dxt.h b/crnlib/crn_dxt.h index 4fdc51c..bde8a2b 100644 --- a/crnlib/crn_dxt.h +++ b/crnlib/crn_dxt.h @@ -1,7 +1,7 @@ // File: crn_dxt.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once -#include "../inc/crnlib.h" +#include "crnlib.h" #include "crn_color.h" #include "crn_vec.h" #include "crn_rand.h" @@ -12,315 +12,372 @@ #define CRNLIB_DXT_ALT_ROUNDING 1 -namespace crnlib { -enum dxt_constants { - cDXT1BytesPerBlock = 8U, - cDXT5NBytesPerBlock = 16U, +namespace crnlib +{ + enum dxt_constants + { + cDXT1BytesPerBlock = 8U, + cDXT5NBytesPerBlock = 16U, - cDXT5SelectorBits = 3U, - cDXT5SelectorValues = 1U << cDXT5SelectorBits, - cDXT5SelectorMask = cDXT5SelectorValues - 1U, + cDXT5SelectorBits = 3U, + cDXT5SelectorValues = 1U << cDXT5SelectorBits, + cDXT5SelectorMask = cDXT5SelectorValues - 1U, - cDXT1SelectorBits = 2U, - cDXT1SelectorValues = 1U << cDXT1SelectorBits, - cDXT1SelectorMask = cDXT1SelectorValues - 1U, - - cDXTBlockShift = 2U, - cDXTBlockSize = 1U << cDXTBlockShift -}; - -enum dxt_format { - cDXTInvalid = -1, - - // cDXT1/1A must appear first! - cDXT1, - cDXT1A, - - cDXT3, - cDXT5, - cDXT5A, - - cDXN_XY, // inverted relative to standard ATI2, 360's DXN - cDXN_YX, // standard ATI2, - - cETC1, - cETC2, - cETC2A, - cETC1S, - cETC2AS, -}; - -const float cDXT1MaxLinearValue = 3.0f; -const float cDXT1InvMaxLinearValue = 1.0f / 3.0f; - -const float cDXT5MaxLinearValue = 7.0f; -const float cDXT5InvMaxLinearValue = 1.0f / 7.0f; - -// Converts DXT1 raw color selector index to a linear value. -CRN_EXPORT extern const uint8 g_dxt1_to_linear[cDXT1SelectorValues]; - -// Converts DXT5 raw alpha selector index to a linear value. -CRN_EXPORT extern const uint8 g_dxt5_to_linear[cDXT5SelectorValues]; - -// Converts DXT1 linear color selector index to a raw value (inverse of g_dxt1_to_linear). -CRN_EXPORT extern const uint8 g_dxt1_from_linear[cDXT1SelectorValues]; - -// Converts DXT5 linear alpha selector index to a raw value (inverse of g_dxt5_to_linear). -CRN_EXPORT extern const uint8 g_dxt5_from_linear[cDXT5SelectorValues]; - -CRN_EXPORT extern const uint8 g_dxt5_alpha6_to_linear[cDXT5SelectorValues]; - -CRN_EXPORT extern const uint8 g_six_alpha_invert_table[cDXT5SelectorValues]; -CRN_EXPORT extern const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues]; - -CRN_EXPORT const char* get_dxt_format_string(dxt_format fmt); -CRN_EXPORT uint get_dxt_format_bits_per_pixel(dxt_format fmt); -CRN_EXPORT bool get_dxt_format_has_alpha(dxt_format fmt); - -CRN_EXPORT const char* get_dxt_quality_string(crn_dxt_quality q); - -CRN_EXPORT const char* get_dxt_compressor_name(crn_dxt_compressor_type c); - -struct dxt1_block { - uint8 m_low_color[2]; - uint8 m_high_color[2]; - - enum { cNumSelectorBytes = 4 }; - uint8 m_selectors[cNumSelectorBytes]; - - inline void clear() { - utils::zero_this(this); - } - - // These methods assume the in-memory rep is in LE byte order. - inline uint get_low_color() const { - return m_low_color[0] | (m_low_color[1] << 8U); - } - - inline uint get_high_color() const { - return m_high_color[0] | (m_high_color[1] << 8U); - } - - inline void set_low_color(uint16 c) { - m_low_color[0] = static_cast(c & 0xFF); - m_low_color[1] = static_cast((c >> 8) & 0xFF); - } - - inline void set_high_color(uint16 c) { - m_high_color[0] = static_cast(c & 0xFF); - m_high_color[1] = static_cast((c >> 8) & 0xFF); - } - - inline bool is_constant_color_block() const { return get_low_color() == get_high_color(); } - inline bool is_alpha_block() const { return get_low_color() <= get_high_color(); } - inline bool is_non_alpha_block() const { return !is_alpha_block(); } - - inline uint get_selector(uint x, uint y) const { - CRNLIB_ASSERT((x < 4U) && (y < 4U)); - return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; - } - - inline void set_selector(uint x, uint y, uint val) { - CRNLIB_ASSERT((x < 4U) && (y < 4U) && (val < 4U)); - - m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); - m_selectors[y] |= (val << (x * cDXT1SelectorBits)); - } - - inline void flip_x(uint w = 4, uint h = 4) { - for (uint x = 0; x < (w / 2); x++) { - for (uint y = 0; y < h; y++) { - const uint c = get_selector(x, y); - set_selector(x, y, get_selector((w - 1) - x, y)); - set_selector((w - 1) - x, y, c); - } - } - } - - inline void flip_y(uint w = 4, uint h = 4) { - for (uint y = 0; y < (h / 2); y++) { - for (uint x = 0; x < w; x++) { - const uint c = get_selector(x, y); - set_selector(x, y, get_selector(x, (h - 1) - y)); - set_selector(x, (h - 1) - y, c); - } - } - } - - static uint16 pack_color(const color_quad_u8& color, bool scaled, uint bias = 127U); - static uint16 pack_color(uint r, uint g, uint b, bool scaled, uint bias = 127U); - - static color_quad_u8 unpack_color(uint16 packed_color, bool scaled, uint alpha = 255U); - static void unpack_color(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled); - - static uint get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1); - static uint get_block_colors3_round(color_quad_u8* pDst, uint16 color0, uint16 color1); - - static uint get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1); - static uint get_block_colors4_round(color_quad_u8* pDst, uint16 color0, uint16 color1); - - // pDst must point to an array at least cDXT1SelectorValues long. - static uint get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1); - - static uint get_block_colors_round(color_quad_u8* pDst, uint16 color0, uint16 color1); - - static color_quad_u8 unpack_endpoint(uint32 endpoints, uint index, bool scaled, uint alpha = 255U); - static uint pack_endpoints(uint lo, uint hi); - - static void get_block_colors_NV5x(color_quad_u8* pDst, uint16 packed_col0, uint16 packed_col1, bool color4); -}; - -CRNLIB_DEFINE_BITWISE_COPYABLE(dxt1_block); - -struct dxt3_block { - enum { cNumAlphaBytes = 8 }; - uint8 m_alpha[cNumAlphaBytes]; - - void set_alpha(uint x, uint y, uint value, bool scaled); - uint get_alpha(uint x, uint y, bool scaled) const; - - inline void flip_x(uint w = 4, uint h = 4) { - for (uint x = 0; x < (w / 2); x++) { - for (uint y = 0; y < h; y++) { - const uint c = get_alpha(x, y, false); - set_alpha(x, y, get_alpha((w - 1) - x, y, false), false); - set_alpha((w - 1) - x, y, c, false); - } - } - } - - inline void flip_y(uint w = 4, uint h = 4) { - for (uint y = 0; y < (h / 2); y++) { - for (uint x = 0; x < w; x++) { - const uint c = get_alpha(x, y, false); - set_alpha(x, y, get_alpha(x, (h - 1) - y, false), false); - set_alpha(x, (h - 1) - y, c, false); - } - } - } -}; - -CRNLIB_DEFINE_BITWISE_COPYABLE(dxt3_block); - -struct dxt5_block { - uint8 m_endpoints[2]; - - enum { cNumSelectorBytes = 6 }; - uint8 m_selectors[cNumSelectorBytes]; - - inline void clear() { - utils::zero_this(this); - } - - inline uint get_low_alpha() const { - return m_endpoints[0]; - } - - inline uint get_high_alpha() const { - return m_endpoints[1]; - } - - inline void set_low_alpha(uint i) { - CRNLIB_ASSERT(i <= cUINT8_MAX); - m_endpoints[0] = static_cast(i); - } - - inline void set_high_alpha(uint i) { - CRNLIB_ASSERT(i <= cUINT8_MAX); - m_endpoints[1] = static_cast(i); - } - - inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); } - - uint get_endpoints_as_word() const { return m_endpoints[0] | (m_endpoints[1] << 8); } - uint get_selectors_as_word(uint index) { - CRNLIB_ASSERT(index < 3); - return m_selectors[index * 2] | (m_selectors[index * 2 + 1] << 8); - } - - inline uint get_selector(uint x, uint y) const { - CRNLIB_ASSERT((x < 4U) && (y < 4U)); - - uint selector_index = (y * 4) + x; - uint bit_index = selector_index * cDXT5SelectorBits; - - uint byte_index = bit_index >> 3; - uint bit_ofs = bit_index & 7; - - uint v = m_selectors[byte_index]; - if (byte_index < (cNumSelectorBytes - 1)) - v |= (m_selectors[byte_index + 1] << 8); - - return (v >> bit_ofs) & 7; - } - - inline void set_selector(uint x, uint y, uint val) { - CRNLIB_ASSERT((x < 4U) && (y < 4U) && (val < 8U)); - - uint selector_index = (y * 4) + x; - uint bit_index = selector_index * cDXT5SelectorBits; - - uint byte_index = bit_index >> 3; - uint bit_ofs = bit_index & 7; - - uint v = m_selectors[byte_index]; - if (byte_index < (cNumSelectorBytes - 1)) - v |= (m_selectors[byte_index + 1] << 8); - - v &= (~(7 << bit_ofs)); - v |= (val << bit_ofs); - - m_selectors[byte_index] = static_cast(v); - if (byte_index < (cNumSelectorBytes - 1)) - m_selectors[byte_index + 1] = static_cast(v >> 8); - } - - inline void flip_x(uint w = 4, uint h = 4) { - for (uint x = 0; x < (w / 2); x++) { - for (uint y = 0; y < h; y++) { - const uint c = get_selector(x, y); - set_selector(x, y, get_selector((w - 1) - x, y)); - set_selector((w - 1) - x, y, c); - } - } - } - - inline void flip_y(uint w = 4, uint h = 4) { - for (uint y = 0; y < (h / 2); y++) { - for (uint x = 0; x < w; x++) { - const uint c = get_selector(x, y); - set_selector(x, y, get_selector(x, (h - 1) - y)); - set_selector(x, (h - 1) - y, c); - } - } - } - - enum { cMaxSelectorValues = 8 }; - - // Results written to alpha channel. - static uint get_block_values6(color_quad_u8* pDst, uint l, uint h); - static uint get_block_values8(color_quad_u8* pDst, uint l, uint h); - static uint get_block_values(color_quad_u8* pDst, uint l, uint h); - - static uint get_block_values6(uint* pDst, uint l, uint h); - static uint get_block_values8(uint* pDst, uint l, uint h); - // pDst must point to an array at least cDXT5SelectorValues long. - static uint get_block_values(uint* pDst, uint l, uint h); - - static uint unpack_endpoint(uint packed, uint index); - static uint pack_endpoints(uint lo, uint hi); -}; - -CRNLIB_DEFINE_BITWISE_COPYABLE(dxt5_block); - -struct dxt_pixel_block { - color_quad_u8 m_pixels[cDXTBlockSize][cDXTBlockSize]; // [y][x] - - inline void clear() { - utils::zero_object(*this); - } -}; - -CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_pixel_block); + cDXT1SelectorBits = 2U, + cDXT1SelectorValues = 1U << cDXT1SelectorBits, + cDXT1SelectorMask = cDXT1SelectorValues - 1U, + + cDXTBlockShift = 2U, + cDXTBlockSize = 1U << cDXTBlockShift, + }; + + enum dxt_format + { + cDXTInvalid = -1, + + // cDXT1/1A must appear first! + cDXT1, + cDXT1A, + + cDXT3, + cDXT5, + cDXT5A, + + cDXN_XY, // inverted relative to standard ATI2, 360's DXN + cDXN_YX, // standard ATI2, + + cETC1, + cETC2, + cETC2A, + cETC1S, + cETC2AS, + }; + + const float cDXT1MaxLinearValue = 3.0f; + const float cDXT1InvMaxLinearValue = 1.0f / 3.0f; + + const float cDXT5MaxLinearValue = 7.0f; + const float cDXT5InvMaxLinearValue = 1.0f / 7.0f; + + // Converts DXT1 raw color selector index to a linear value. + CRN_EXPORT extern const uint8 g_dxt1_to_linear[cDXT1SelectorValues]; + + // Converts DXT5 raw alpha selector index to a linear value. + CRN_EXPORT extern const uint8 g_dxt5_to_linear[cDXT5SelectorValues]; + + // Converts DXT1 linear color selector index to a raw value (inverse of g_dxt1_to_linear). + CRN_EXPORT extern const uint8 g_dxt1_from_linear[cDXT1SelectorValues]; + + // Converts DXT5 linear alpha selector index to a raw value (inverse of g_dxt5_to_linear). + CRN_EXPORT extern const uint8 g_dxt5_from_linear[cDXT5SelectorValues]; + + CRN_EXPORT extern const uint8 g_dxt5_alpha6_to_linear[cDXT5SelectorValues]; + + CRN_EXPORT extern const uint8 g_six_alpha_invert_table[cDXT5SelectorValues]; + CRN_EXPORT extern const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues]; + + CRN_EXPORT const char* get_dxt_format_string(dxt_format fmt); + CRN_EXPORT uint get_dxt_format_bits_per_pixel(dxt_format fmt); + CRN_EXPORT bool get_dxt_format_has_alpha(dxt_format fmt); + + CRN_EXPORT const char* get_dxt_quality_string(crn_dxt_quality q); + + CRN_EXPORT const char* get_dxt_compressor_name(crn_dxt_compressor_type c); + + struct dxt1_block + { + uint8 m_low_color[2]; + uint8 m_high_color[2]; + + enum { cNumSelectorBytes = 4 }; + uint8 m_selectors[cNumSelectorBytes]; + + inline void clear() + { + utils::zero_this(this); + } + + // These methods assume the in-memory rep is in LE byte order. + inline uint get_low_color() const + { + return m_low_color[0] | (m_low_color[1] << 8U); + } + + inline uint get_high_color() const + { + return m_high_color[0] | (m_high_color[1] << 8U); + } + + inline void set_low_color(uint16 c) + { + m_low_color[0] = static_cast(c & 0xFF); + m_low_color[1] = static_cast((c >> 8) & 0xFF); + } + + inline void set_high_color(uint16 c) + { + m_high_color[0] = static_cast(c & 0xFF); + m_high_color[1] = static_cast((c >> 8) & 0xFF); + } + + inline bool is_constant_color_block() const + { + return get_low_color() == get_high_color(); + } + inline bool is_alpha_block() const + { + return get_low_color() <= get_high_color(); + } + inline bool is_non_alpha_block() const { + return !is_alpha_block(); + } + + inline uint get_selector(uint x, uint y) const + { + CRNLIB_ASSERT((x < 4U) && (y < 4U)); + return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; + } + + inline void set_selector(uint x, uint y, uint val) + { + CRNLIB_ASSERT((x < 4U) && (y < 4U) && (val < 4U)); + + m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); + m_selectors[y] |= (val << (x * cDXT1SelectorBits)); + } + + inline void flip_x(uint w = 4, uint h = 4) + { + for (uint x = 0; x < (w / 2); x++) + { + for (uint y = 0; y < h; y++) + { + const uint c = get_selector(x, y); + set_selector(x, y, get_selector((w - 1) - x, y)); + set_selector((w - 1) - x, y, c); + } + } + } + + inline void flip_y(uint w = 4, uint h = 4) + { + for (uint y = 0; y < (h / 2); y++) + { + for (uint x = 0; x < w; x++) + { + const uint c = get_selector(x, y); + set_selector(x, y, get_selector(x, (h - 1) - y)); + set_selector(x, (h - 1) - y, c); + } + } + } + + static uint16 pack_color(const color_quad_u8& color, bool scaled, uint bias = 127U); + static uint16 pack_color(uint r, uint g, uint b, bool scaled, uint bias = 127U); + + static color_quad_u8 unpack_color(uint16 packed_color, bool scaled, uint alpha = 255U); + static void unpack_color(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled); + + static uint get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1); + static uint get_block_colors3_round(color_quad_u8* pDst, uint16 color0, uint16 color1); + + static uint get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1); + static uint get_block_colors4_round(color_quad_u8* pDst, uint16 color0, uint16 color1); + + // pDst must point to an array at least cDXT1SelectorValues long. + static uint get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1); + + static uint get_block_colors_round(color_quad_u8* pDst, uint16 color0, uint16 color1); + + static color_quad_u8 unpack_endpoint(uint32 endpoints, uint index, bool scaled, uint alpha = 255U); + static uint pack_endpoints(uint lo, uint hi); + + static void get_block_colors_NV5x(color_quad_u8* pDst, uint16 packed_col0, uint16 packed_col1, bool color4); + }; + + CRNLIB_DEFINE_BITWISE_COPYABLE(dxt1_block); + + struct dxt3_block + { + enum { cNumAlphaBytes = 8 }; + uint8 m_alpha[cNumAlphaBytes]; + + void set_alpha(uint x, uint y, uint value, bool scaled); + uint get_alpha(uint x, uint y, bool scaled) const; + + inline void flip_x(uint w = 4, uint h = 4) + { + for (uint x = 0; x < (w / 2); x++) + { + for (uint y = 0; y < h; y++) + { + const uint c = get_alpha(x, y, false); + set_alpha(x, y, get_alpha((w - 1) - x, y, false), false); + set_alpha((w - 1) - x, y, c, false); + } + } + } + + inline void flip_y(uint w = 4, uint h = 4) + { + for (uint y = 0; y < (h / 2); y++) + { + for (uint x = 0; x < w; x++) + { + const uint c = get_alpha(x, y, false); + set_alpha(x, y, get_alpha(x, (h - 1) - y, false), false); + set_alpha(x, (h - 1) - y, c, false); + } + } + } + }; + + CRNLIB_DEFINE_BITWISE_COPYABLE(dxt3_block); + + struct dxt5_block + { + uint8 m_endpoints[2]; + + enum { cNumSelectorBytes = 6 }; + uint8 m_selectors[cNumSelectorBytes]; + + inline void clear() + { + utils::zero_this(this); + } + + inline uint get_low_alpha() const + { + return m_endpoints[0]; + } + + inline uint get_high_alpha() const + { + return m_endpoints[1]; + } + + inline void set_low_alpha(uint i) + { + CRNLIB_ASSERT(i <= cUINT8_MAX); + m_endpoints[0] = static_cast(i); + } + + inline void set_high_alpha(uint i) + { + CRNLIB_ASSERT(i <= cUINT8_MAX); + m_endpoints[1] = static_cast(i); + } + + inline bool is_alpha6_block() const + { + return get_low_alpha() <= get_high_alpha(); + } + + uint get_endpoints_as_word() const + { + return m_endpoints[0] | (m_endpoints[1] << 8); + } + uint get_selectors_as_word(uint index) + { + CRNLIB_ASSERT(index < 3); + return m_selectors[index * 2] | (m_selectors[index * 2 + 1] << 8); + } + + inline uint get_selector(uint x, uint y) const + { + CRNLIB_ASSERT((x < 4U) && (y < 4U)); + + uint selector_index = (y * 4) + x; + uint bit_index = selector_index * cDXT5SelectorBits; + + uint byte_index = bit_index >> 3; + uint bit_ofs = bit_index & 7; + + uint v = m_selectors[byte_index]; + if (byte_index < (cNumSelectorBytes - 1)) + v |= (m_selectors[byte_index + 1] << 8); + + return (v >> bit_ofs) & 7; + } + + inline void set_selector(uint x, uint y, uint val) + { + CRNLIB_ASSERT((x < 4U) && (y < 4U) && (val < 8U)); + + uint selector_index = (y * 4) + x; + uint bit_index = selector_index * cDXT5SelectorBits; + + uint byte_index = bit_index >> 3; + uint bit_ofs = bit_index & 7; + + uint v = m_selectors[byte_index]; + if (byte_index < (cNumSelectorBytes - 1)) + v |= (m_selectors[byte_index + 1] << 8); + + v &= (~(7 << bit_ofs)); + v |= (val << bit_ofs); + + m_selectors[byte_index] = static_cast(v); + if (byte_index < (cNumSelectorBytes - 1)) + { + m_selectors[byte_index + 1] = static_cast(v >> 8); + } + } + + inline void flip_x(uint w = 4, uint h = 4) + { + for (uint x = 0; x < (w / 2); x++) + { + for (uint y = 0; y < h; y++) + { + const uint c = get_selector(x, y); + set_selector(x, y, get_selector((w - 1) - x, y)); + set_selector((w - 1) - x, y, c); + } + } + } + + inline void flip_y(uint w = 4, uint h = 4) + { + for (uint y = 0; y < (h / 2); y++) + { + for (uint x = 0; x < w; x++) + { + const uint c = get_selector(x, y); + set_selector(x, y, get_selector(x, (h - 1) - y)); + set_selector(x, (h - 1) - y, c); + } + } + } + + enum { cMaxSelectorValues = 8 }; + + // Results written to alpha channel. + static uint get_block_values6(color_quad_u8* pDst, uint l, uint h); + static uint get_block_values8(color_quad_u8* pDst, uint l, uint h); + static uint get_block_values(color_quad_u8* pDst, uint l, uint h); + + static uint get_block_values6(uint* pDst, uint l, uint h); + static uint get_block_values8(uint* pDst, uint l, uint h); + // pDst must point to an array at least cDXT5SelectorValues long. + static uint get_block_values(uint* pDst, uint l, uint h); + + static uint unpack_endpoint(uint packed, uint index); + static uint pack_endpoints(uint lo, uint hi); + }; + + CRNLIB_DEFINE_BITWISE_COPYABLE(dxt5_block); + + struct dxt_pixel_block + { + color_quad_u8 m_pixels[cDXTBlockSize][cDXTBlockSize]; // [y][x] + + inline void clear() + { + utils::zero_object(*this); + } + }; + + CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_pixel_block); } // namespace crnlib diff --git a/crnlib/crn_dxt_hc.h b/crnlib/crn_dxt_hc.h index e259c9e..7e31217 100644 --- a/crnlib/crn_dxt_hc.h +++ b/crnlib/crn_dxt_hc.h @@ -12,202 +12,228 @@ #include "crn_threading.h" #define CRN_NO_FUNCTION_DEFINITIONS -#include "../inc/crnlib.h" - -namespace crnlib { -const uint cTotalCompressionPhases = 25; - -class dxt_hc { - public: - dxt_hc(); - ~dxt_hc(); - - struct endpoint_indices_details { - union { - struct { - uint16 color; - uint16 alpha0; - uint16 alpha1; - }; - uint16 component[3]; +#include "crnlib.h" + +namespace crnlib +{ + const uint cTotalCompressionPhases = 25; + + class dxt_hc + { + public: + dxt_hc(); + ~dxt_hc(); + + struct endpoint_indices_details + { + union { + struct { + uint16 color; + uint16 alpha0; + uint16 alpha1; + }; + uint16 component[3]; + }; + uint8 reference; + endpoint_indices_details() + { + utils::zero_object(*this); + } + }; + + struct selector_indices_details + { + union { + struct { + uint16 color; + uint16 alpha0; + uint16 alpha1; + }; + uint16 component[3]; + }; + selector_indices_details() + { + utils::zero_object(*this); + } + }; + + struct tile_details + { + crnlib::vector pixels; + float weight; + vec<6, float> color_endpoint; + vec<2, float> alpha_endpoints[2]; + uint16 cluster_indices[3]; + }; + crnlib::vector m_tiles; + uint m_num_tiles; + float m_color_derating[cCRNMaxLevels][8]; + float m_alpha_derating[8]; + float m_uint8_to_float[256]; + + color_quad_u8(*m_blocks)[16]; + uint m_num_blocks; + crnlib::vector m_block_weights; + crnlib::vector m_block_encodings; + crnlib::vector m_block_selectors[3]; + crnlib::vector m_color_selectors; + crnlib::vector m_alpha_selectors; + crnlib::vector m_color_selectors_used; + crnlib::vector m_alpha_selectors_used; + crnlib::vector m_tile_indices; + crnlib::vector m_endpoint_indices; + crnlib::vector m_selector_indices; + + struct params + { + params(): + m_num_blocks(0), + m_num_levels(0), + m_num_faces(0), + m_format(cDXT1), + m_perceptual(true), + m_hierarchical(true), + m_color_endpoint_codebook_size(3072), + m_color_selector_codebook_size(3072), + m_alpha_endpoint_codebook_size(3072), + m_alpha_selector_codebook_size(3072), + m_adaptive_tile_color_psnr_derating(2.0f), + m_adaptive_tile_alpha_psnr_derating(2.0f), + m_adaptive_tile_color_alpha_weighting_ratio(3.0f), + m_debugging(false), + m_pProgress_func(0), + m_pProgress_func_data(0) + { + m_alpha_component_indices[0] = 3; + m_alpha_component_indices[1] = 0; + for (uint i = 0; i < cCRNMaxLevels; i++) + { + m_levels[i].m_first_block = 0; + m_levels[i].m_num_blocks = 0; + m_levels[i].m_block_width = 0; + } + } + + uint m_num_blocks; + uint m_num_levels; + uint m_num_faces; + + struct { + uint m_first_block; + uint m_num_blocks; + uint m_block_width; + float m_weight; + } m_levels[cCRNMaxLevels]; + + dxt_format m_format; + bool m_perceptual; + bool m_hierarchical; + + uint m_color_endpoint_codebook_size; + uint m_color_selector_codebook_size; + uint m_alpha_endpoint_codebook_size; + uint m_alpha_selector_codebook_size; + + float m_adaptive_tile_color_psnr_derating; + float m_adaptive_tile_alpha_psnr_derating; + float m_adaptive_tile_color_alpha_weighting_ratio; + uint m_alpha_component_indices[2]; + + task_pool* m_pTask_pool; + bool m_debugging; + crn_progress_callback_func m_pProgress_func; + void* m_pProgress_func_data; + }; + + void clear(); + bool compress( + color_quad_u8(*blocks)[16], + crnlib::vector& endpoint_indices, + crnlib::vector& selector_indices, + crnlib::vector& color_endpoints, + crnlib::vector& alpha_endpoints, + crnlib::vector& color_selectors, + crnlib::vector& alpha_selectors, + const params& p + ); + + private: + params m_params; + + uint m_num_alpha_blocks; + bool m_has_color_blocks; + bool m_has_etc_color_blocks; + bool m_has_subblocks; + + enum { + cColor = 0, + cAlpha0 = 1, + cAlpha1 = 2, + cNumComps = 3 + }; + + struct color_cluster + { + color_cluster() : + first_endpoint(0), + second_endpoint(0) + { + } + + crnlib::vector blocks[3]; + crnlib::vector pixels; + uint first_endpoint; + uint second_endpoint; + color_quad_u8 color_values[4]; + }; + crnlib::vector m_color_clusters; + + struct alpha_cluster + { + alpha_cluster(): + first_endpoint(0), + second_endpoint(0) + { + } + + crnlib::vector blocks[3]; + crnlib::vector pixels; + uint first_endpoint; + uint second_endpoint; + uint alpha_values[8]; + bool refined_alpha; + uint refined_alpha_values[8]; + }; + crnlib::vector m_alpha_clusters; + + crn_thread_id_t m_main_thread_id; + bool m_canceled; + task_pool* m_pTask_pool; + + int m_prev_phase_index; + int m_prev_percentage_complete; + + vec<6, float> palettize_color(color_quad_u8* pixels, uint pixels_count); + vec<2, float> palettize_alpha(color_quad_u8* pixels, uint pixels_count, uint comp_index); + void determine_tiles_task(uint64 data, void* pData_ptr); + void determine_tiles_task_etc(uint64 data, void* pData_ptr); + + void determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr); + void determine_color_endpoint_codebook_task_etc(uint64 data, void* pData_ptr); + void determine_color_endpoint_clusters_task(uint64 data, void* pData_ptr); + void determine_color_endpoints(); + + void determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr); + void determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr); + void determine_alpha_endpoints(); + + void create_color_selector_codebook_task(uint64 data, void* pData_ptr); + void create_color_selector_codebook(); + + void create_alpha_selector_codebook_task(uint64 data, void* pData_ptr); + void create_alpha_selector_codebook(); + + bool update_progress(uint phase_index, uint subphase_index, uint subphase_total); }; - uint8 reference; - endpoint_indices_details() { utils::zero_object(*this); } - }; - - struct selector_indices_details { - union { - struct { - uint16 color; - uint16 alpha0; - uint16 alpha1; - }; - uint16 component[3]; - }; - selector_indices_details() { utils::zero_object(*this); } - }; - - struct tile_details { - crnlib::vector pixels; - float weight; - vec<6, float> color_endpoint; - vec<2, float> alpha_endpoints[2]; - uint16 cluster_indices[3]; - }; - crnlib::vector m_tiles; - uint m_num_tiles; - float m_color_derating[cCRNMaxLevels][8]; - float m_alpha_derating[8]; - float m_uint8_to_float[256]; - - color_quad_u8 (*m_blocks)[16]; - uint m_num_blocks; - crnlib::vector m_block_weights; - crnlib::vector m_block_encodings; - crnlib::vector m_block_selectors[3]; - crnlib::vector m_color_selectors; - crnlib::vector m_alpha_selectors; - crnlib::vector m_color_selectors_used; - crnlib::vector m_alpha_selectors_used; - crnlib::vector m_tile_indices; - crnlib::vector m_endpoint_indices; - crnlib::vector m_selector_indices; - - struct params { - params() - : m_num_blocks(0), - m_num_levels(0), - m_num_faces(0), - m_format(cDXT1), - m_perceptual(true), - m_hierarchical(true), - m_color_endpoint_codebook_size(3072), - m_color_selector_codebook_size(3072), - m_alpha_endpoint_codebook_size(3072), - m_alpha_selector_codebook_size(3072), - m_adaptive_tile_color_psnr_derating(2.0f), - m_adaptive_tile_alpha_psnr_derating(2.0f), - m_adaptive_tile_color_alpha_weighting_ratio(3.0f), - m_debugging(false), - m_pProgress_func(0), - m_pProgress_func_data(0) { - m_alpha_component_indices[0] = 3; - m_alpha_component_indices[1] = 0; - for (uint i = 0; i < cCRNMaxLevels; i++) { - m_levels[i].m_first_block = 0; - m_levels[i].m_num_blocks = 0; - m_levels[i].m_block_width = 0; - } - } - - uint m_num_blocks; - uint m_num_levels; - uint m_num_faces; - - struct { - uint m_first_block; - uint m_num_blocks; - uint m_block_width; - float m_weight; - } m_levels[cCRNMaxLevels]; - - dxt_format m_format; - bool m_perceptual; - bool m_hierarchical; - - uint m_color_endpoint_codebook_size; - uint m_color_selector_codebook_size; - uint m_alpha_endpoint_codebook_size; - uint m_alpha_selector_codebook_size; - - float m_adaptive_tile_color_psnr_derating; - float m_adaptive_tile_alpha_psnr_derating; - float m_adaptive_tile_color_alpha_weighting_ratio; - uint m_alpha_component_indices[2]; - - task_pool* m_pTask_pool; - bool m_debugging; - crn_progress_callback_func m_pProgress_func; - void* m_pProgress_func_data; - }; - - void clear(); - bool compress( - color_quad_u8 (*blocks)[16], - crnlib::vector& endpoint_indices, - crnlib::vector& selector_indices, - crnlib::vector& color_endpoints, - crnlib::vector& alpha_endpoints, - crnlib::vector& color_selectors, - crnlib::vector& alpha_selectors, - const params& p - ); - - private: - params m_params; - - uint m_num_alpha_blocks; - bool m_has_color_blocks; - bool m_has_etc_color_blocks; - bool m_has_subblocks; - - enum { - cColor = 0, - cAlpha0 = 1, - cAlpha1 = 2, - cNumComps = 3 - }; - - struct color_cluster { - color_cluster() : first_endpoint(0), second_endpoint(0) {} - crnlib::vector blocks[3]; - crnlib::vector pixels; - uint first_endpoint; - uint second_endpoint; - color_quad_u8 color_values[4]; - }; - crnlib::vector m_color_clusters; - - struct alpha_cluster { - alpha_cluster() : first_endpoint(0), second_endpoint(0) {} - crnlib::vector blocks[3]; - crnlib::vector pixels; - uint first_endpoint; - uint second_endpoint; - uint alpha_values[8]; - bool refined_alpha; - uint refined_alpha_values[8]; - }; - crnlib::vector m_alpha_clusters; - - crn_thread_id_t m_main_thread_id; - bool m_canceled; - task_pool* m_pTask_pool; - - int m_prev_phase_index; - int m_prev_percentage_complete; - - vec<6, float> palettize_color(color_quad_u8* pixels, uint pixels_count); - vec<2, float> palettize_alpha(color_quad_u8* pixels, uint pixels_count, uint comp_index); - void determine_tiles_task(uint64 data, void* pData_ptr); - void determine_tiles_task_etc(uint64 data, void* pData_ptr); - - void determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr); - void determine_color_endpoint_codebook_task_etc(uint64 data, void* pData_ptr); - void determine_color_endpoint_clusters_task(uint64 data, void* pData_ptr); - void determine_color_endpoints(); - - void determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr); - void determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr); - void determine_alpha_endpoints(); - - void create_color_selector_codebook_task(uint64 data, void* pData_ptr); - void create_color_selector_codebook(); - - void create_alpha_selector_codebook_task(uint64 data, void* pData_ptr); - void create_alpha_selector_codebook(); - - bool update_progress(uint phase_index, uint subphase_index, uint subphase_total); -}; } // namespace crnlib diff --git a/crnlib/crn_dxt_hc_common.cpp b/crnlib/crn_dxt_hc_common.cpp index 2d3d2ef..d43bb9f 100644 --- a/crnlib/crn_dxt_hc_common.cpp +++ b/crnlib/crn_dxt_hc_common.cpp @@ -1,10 +1,12 @@ // File: crn_dxt_hc_common.cpp // See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" #include "crn_dxt_hc_common.h" -namespace crnlib { -chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings] = +namespace crnlib +{ + chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings] = { {1, {{0, 0, 8, 8, 0}}}, @@ -17,9 +19,10 @@ chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings] = {3, {{0, 0, 4, 8, 3}, {4, 0, 4, 4, 6}, {4, 4, 4, 4, 8}}}, {3, {{4, 0, 4, 8, 4}, {0, 0, 4, 4, 5}, {0, 4, 4, 4, 7}}}, - {4, {{0, 0, 4, 4, 5}, {4, 0, 4, 4, 6}, {0, 4, 4, 4, 7}, {4, 4, 4, 4, 8}}}}; + {4, {{0, 0, 4, 4, 5}, {4, 0, 4, 4, 6}, {0, 4, 4, 4, 7}, {4, 4, 4, 4, 8}}} + }; -chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts] = + chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts] = { // 2x2 {0, 0, 8, 8, 0}, @@ -36,6 +39,7 @@ chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts] = {0, 0, 4, 4, 5}, {4, 0, 4, 4, 6}, {0, 4, 4, 4, 7}, - {4, 4, 4, 4, 8}}; + {4, 4, 4, 4, 8} + }; } // namespace crnlib diff --git a/crnlib/crn_dxt_hc_common.h b/crnlib/crn_dxt_hc_common.h index 1f098fb..33e4fd2 100644 --- a/crnlib/crn_dxt_hc_common.h +++ b/crnlib/crn_dxt_hc_common.h @@ -1,42 +1,46 @@ // File: crn_dxt_hc_common.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once #include "crn_export.h" -namespace crnlib { -struct chunk_tile_desc { - // These values are in pixels, and always a multiple of cBlockPixelWidth/cBlockPixelHeight. - uint m_x_ofs; - uint m_y_ofs; - uint m_width; - uint m_height; - uint m_layout_index; -}; - -struct chunk_encoding_desc { - uint m_num_tiles; - chunk_tile_desc m_tiles[4]; -}; - -const uint cChunkPixelWidth = 8; -const uint cChunkPixelHeight = 8; -const uint cChunkBlockWidth = 2; -const uint cChunkBlockHeight = 2; - -const uint cChunkMaxTiles = 4; - -const uint cBlockPixelWidthShift = 2; -const uint cBlockPixelHeightShift = 2; - -const uint cBlockPixelWidth = 4; -const uint cBlockPixelHeight = 4; - -const uint cNumChunkEncodings = 8; -CRN_EXPORT extern chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings]; - -const uint cNumChunkTileLayouts = 9; -const uint cFirst4x4ChunkTileLayout = 5; -CRN_EXPORT extern chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts]; +namespace crnlib +{ + struct chunk_tile_desc + { + // These values are in pixels, and always a multiple of cBlockPixelWidth/cBlockPixelHeight. + uint m_x_ofs; + uint m_y_ofs; + uint m_width; + uint m_height; + uint m_layout_index; + }; + + struct chunk_encoding_desc + { + uint m_num_tiles; + chunk_tile_desc m_tiles[4]; + }; + + const uint cChunkPixelWidth = 8; + const uint cChunkPixelHeight = 8; + const uint cChunkBlockWidth = 2; + const uint cChunkBlockHeight = 2; + + const uint cChunkMaxTiles = 4; + + const uint cBlockPixelWidthShift = 2; + const uint cBlockPixelHeightShift = 2; + + const uint cBlockPixelWidth = 4; + const uint cBlockPixelHeight = 4; + + const uint cNumChunkEncodings = 8; + CRN_EXPORT extern chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings]; + + const uint cNumChunkTileLayouts = 9; + const uint cFirst4x4ChunkTileLayout = 5; + CRN_EXPORT extern chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts]; } // namespace crnlib diff --git a/crnlib/crn_etc.h b/crnlib/crn_etc.h index b312b2b..322c990 100644 --- a/crnlib/crn_etc.h +++ b/crnlib/crn_etc.h @@ -1,7 +1,7 @@ // File: crn_etc.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once -#include "../inc/crnlib.h" +#include "crnlib.h" #include "crn_dxt.h" #include "crn_export.h" diff --git a/crnlib/crn_pixel_format.h b/crnlib/crn_pixel_format.h index fa288cf..c8654c8 100644 --- a/crnlib/crn_pixel_format.h +++ b/crnlib/crn_pixel_format.h @@ -2,8 +2,8 @@ // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_dxt.h" -#include "../inc/crnlib.h" -#include "../inc/dds_defs.h" +#include "crnlib.h" +#include "dds_defs.h" namespace crnlib { namespace pixel_format_helpers { diff --git a/crnlib/crn_resample_filters.cpp b/crnlib/crn_resample_filters.cpp index 852ac70..7b315ef 100644 --- a/crnlib/crn_resample_filters.cpp +++ b/crnlib/crn_resample_filters.cpp @@ -1,278 +1,371 @@ // File: crn_resample_filters.cpp // RG: This is public domain code, originally derived from Graphics Gems 3, see: http://code.google.com/p/imageresampler/ + #include "crn_core.h" #include "crn_resample_filters.h" -namespace crnlib { #ifndef M_PI #define M_PI 3.14159265358979323846 #endif -// To add your own filter, insert the new function below and update the filter table. -// There is no need to make the filter function particularly fast, because it's -// only called during initializing to create the X and Y axis contributor tables. +namespace crnlib +{ + // To add your own filter, insert the new function below and update the filter table. + // There is no need to make the filter function particularly fast, because it's + // only called during initializing to create the X and Y axis contributor tables. #define BOX_FILTER_SUPPORT (0.5f) -static float box_filter(float t) /* pulse/Fourier window */ -{ - // make_clist() calls the filter function with t inverted (pos = left, neg = right) - if ((t >= -0.5f) && (t < 0.5f)) - return 1.0f; - else - return 0.0f; -} + static float box_filter(float t) /* pulse/Fourier window */ + { + // make_clist() calls the filter function with t inverted (pos = left, neg = right) + if ((t >= -0.5f) && (t < 0.5f)) + { + return 1.0f; + } + else + { + return 0.0f; + } + } #define TENT_FILTER_SUPPORT (1.0f) -static float tent_filter(float t) /* box (*) box, bilinear/triangle */ -{ - if (t < 0.0f) - t = -t; - - if (t < 1.0f) - return 1.0f - t; - else - return 0.0f; -} + static float tent_filter(float t) /* box (*) box, bilinear/triangle */ + { + if (t < 0.0f) + { + t = -t; + } + if (t < 1.0f) + { + return 1.0f - t; + } + else + { + return 0.0f; + } + } #define BELL_SUPPORT (1.5f) -static float bell_filter(float t) /* box (*) box (*) box */ -{ - if (t < 0.0f) - t = -t; + static float bell_filter(float t) /* box (*) box (*) box */ + { + if (t < 0.0f) + { + t = -t; + } - if (t < .5f) - return (.75f - (t * t)); + if (t < .5f) + { + return (.75f - (t * t)); + } - if (t < 1.5f) { - t = (t - 1.5f); - return (.5f * (t * t)); - } + if (t < 1.5f) + { + t = (t - 1.5f); + return (.5f * (t * t)); + } - return (0.0f); -} + return 0.0f; + } #define B_SPLINE_SUPPORT (2.0f) -static float B_spline_filter(float t) /* box (*) box (*) box (*) box */ -{ - float tt; + static float B_spline_filter(float t) /* box (*) box (*) box (*) box */ + { + float tt; + + if (t < 0.0f) + { + t = -t; + } + + if (t < 1.0f) + { + tt = t * t; + return ((.5f * tt * t) - tt + (2.0f / 3.0f)); + } + else if (t < 2.0f) + { + t = 2.0f - t; + return ((1.0f / 6.0f) * (t * t * t)); + } + + return (0.0f); + } + + // Dodgson, N., "Quadratic Interpolation for Image Resampling" +#define QUADRATIC_SUPPORT 1.5f + static float quadratic(float t, const float R) + { + if (t < 0.0f) + { + t = -t; + } + if (t < QUADRATIC_SUPPORT) + { + float tt = t * t; + if (t <= .5f) + { + return (-2.0f * R) * tt + .5f * (R + 1.0f); + } + else + { + return (R * tt) + (-2.0f * R - .5f) * t + (3.0f / 4.0f) * (R + 1.0f); + } + } + else + { + return 0.0f; + } + } + + static float quadratic_interp_filter(float t) + { + return quadratic(t, 1.0f); + } - if (t < 0.0f) - t = -t; + static float quadratic_approx_filter(float t) + { + return quadratic(t, .5f); + } - if (t < 1.0f) { - tt = t * t; - return ((.5f * tt * t) - tt + (2.0f / 3.0f)); - } else if (t < 2.0f) { - t = 2.0f - t; - return ((1.0f / 6.0f) * (t * t * t)); - } + static float quadratic_mix_filter(float t) + { + return quadratic(t, .8f); + } + + // Mitchell, D. and A. Netravali, "Reconstruction Filters in Computer Graphics." + // Computer Graphics, Vol. 22, No. 4, pp. 221-228. + // (B, C) + // (1/3, 1/3) - Defaults recommended by Mitchell and Netravali + // (1, 0) - Equivalent to the Cubic B-Spline + // (0, 0.5) - Equivalent to the Catmull-Rom Spline + // (0, C) - The family of Cardinal Cubic Splines + // (B, 0) - Duff's tensioned B-Splines. + static float mitchell(float t, const float B, const float C) + { + float tt; - return (0.0f); -} + tt = t * t; -// Dodgson, N., "Quadratic Interpolation for Image Resampling" -#define QUADRATIC_SUPPORT 1.5f -static float quadratic(float t, const float R) { - if (t < 0.0f) - t = -t; - if (t < QUADRATIC_SUPPORT) { - float tt = t * t; - if (t <= .5f) - return (-2.0f * R) * tt + .5f * (R + 1.0f); - else - return (R * tt) + (-2.0f * R - .5f) * t + (3.0f / 4.0f) * (R + 1.0f); - } else - return 0.0f; -} - -static float quadratic_interp_filter(float t) { - return quadratic(t, 1.0f); -} - -static float quadratic_approx_filter(float t) { - return quadratic(t, .5f); -} - -static float quadratic_mix_filter(float t) { - return quadratic(t, .8f); -} - -// Mitchell, D. and A. Netravali, "Reconstruction Filters in Computer Graphics." -// Computer Graphics, Vol. 22, No. 4, pp. 221-228. -// (B, C) -// (1/3, 1/3) - Defaults recommended by Mitchell and Netravali -// (1, 0) - Equivalent to the Cubic B-Spline -// (0, 0.5) - Equivalent to the Catmull-Rom Spline -// (0, C) - The family of Cardinal Cubic Splines -// (B, 0) - Duff's tensioned B-Splines. -static float mitchell(float t, const float B, const float C) { - float tt; - - tt = t * t; - - if (t < 0.0f) - t = -t; - - if (t < 1.0f) { - t = (((12.0f - 9.0f * B - 6.0f * C) * (t * tt)) + ((-18.0f + 12.0f * B + 6.0f * C) * tt) + (6.0f - 2.0f * B)); - - return (t / 6.0f); - } else if (t < 2.0f) { - t = (((-1.0f * B - 6.0f * C) * (t * tt)) + ((6.0f * B + 30.0f * C) * tt) + ((-12.0f * B - 48.0f * C) * t) + (8.0f * B + 24.0f * C)); - - return (t / 6.0f); - } - - return (0.0f); -} + if (t < 0.0f) + { + t = -t; + } -#define MITCHELL_SUPPORT (2.0f) -static float mitchell_filter(float t) { - return mitchell(t, 1.0f / 3.0f, 1.0f / 3.0f); -} + if (t < 1.0f) + { + t = (((12.0f - 9.0f * B - 6.0f * C) * (t * tt)) + ((-18.0f + 12.0f * B + 6.0f * C) * tt) + (6.0f - 2.0f * B)); -#define CATMULL_ROM_SUPPORT (2.0f) -static float catmull_rom_filter(float t) { - return mitchell(t, 0.0f, .5f); -} + return (t / 6.0f); + } + else if (t < 2.0f) + { + t = (((-1.0f * B - 6.0f * C) * (t * tt)) + ((6.0f * B + 30.0f * C) * tt) + ((-12.0f * B - 48.0f * C) * t) + (8.0f * B + 24.0f * C)); -static double sinc(double x) { - x = (x * M_PI); + return (t / 6.0f); + } - if ((x < 0.01f) && (x > -0.01f)) - return 1.0f + x * x * (-1.0f / 6.0f + x * x * 1.0f / 120.0f); + return 0.0f; + } - return sin(x) / x; -} +#define MITCHELL_SUPPORT (2.0f) + static float mitchell_filter(float t) + { + return mitchell(t, 1.0f / 3.0f, 1.0f / 3.0f); + } + +#define CATMULL_ROM_SUPPORT (2.0f) + static float catmull_rom_filter(float t) + { + return mitchell(t, 0.0f, .5f); + } -static float clean(double t) { - const float EPSILON = .0000125f; - if (fabs(t) < EPSILON) - return 0.0f; - return (float)t; -} + static double sinc(double x) + { + x = (x * M_PI); -//static double blackman_window(double x) -//{ -// return .42f + .50f * cos(M_PI*x) + .08f * cos(2.0f*M_PI*x); -//} + if ((x < 0.01f) && (x > -0.01f)) + { + return 1.0f + x * x * (-1.0f / 6.0f + x * x * 1.0f / 120.0f); + } + return sin(x) / x; + } -static double blackman_exact_window(double x) { - return 0.42659071f + 0.49656062f * cos(M_PI * x) + 0.07684867f * cos(2.0f * M_PI * x); -} + static float clean(double t) + { + const float EPSILON = .0000125f; + if (fabs(t) < EPSILON) + { + return 0.0f; + } + return static_cast(t); + } + + //static double blackman_window(double x) + //{ + // return .42f + .50f * cos(M_PI*x) + .08f * cos(2.0f*M_PI*x); + //} + + static double blackman_exact_window(double x) + { + return 0.42659071f + 0.49656062f * cos(M_PI * x) + 0.07684867f * cos(2.0f * M_PI * x); + } #define BLACKMAN_SUPPORT (3.0f) -static float blackman_filter(float t) { - if (t < 0.0f) - t = -t; - - if (t < 3.0f) - //return clean(sinc(t) * blackman_window(t / 3.0f)); - return clean(sinc(t) * blackman_exact_window(t / 3.0f)); - else - return (0.0f); -} + static float blackman_filter(float t) + { + if (t < 0.0f) + { + t = -t; + } + + if (t < 3.0f) + { + //return clean(sinc(t) * blackman_window(t / 3.0f)); + return clean(sinc(t) * blackman_exact_window(t / 3.0f)); + } + else + { + return 0.0f; + } + } #define GAUSSIAN_SUPPORT (1.25f) -static float gaussian_filter(float t) // with blackman window -{ - if (t < 0) - t = -t; - if (t < GAUSSIAN_SUPPORT) - return clean(exp(-2.0f * t * t) * sqrt(2.0f / M_PI) * blackman_exact_window(t / GAUSSIAN_SUPPORT)); - else - return 0.0f; -} - -// Windowed sinc -- see "Jimm Blinn's Corner: Dirty Pixels" pg. 26. + static float gaussian_filter(float t) // with blackman window + { + if (t < 0) + { + t = -t; + } + if (t < GAUSSIAN_SUPPORT) + { + return clean(exp(-2.0f * t * t) * sqrt(2.0f / M_PI) * blackman_exact_window(t / GAUSSIAN_SUPPORT)); + } + else + { + return 0.0f; + } + } + + // Windowed sinc -- see "Jimm Blinn's Corner: Dirty Pixels" pg. 26. #define LANCZOS3_SUPPORT (3.0f) -static float lanczos3_filter(float t) { - if (t < 0.0f) - t = -t; - - if (t < 3.0f) - return clean(sinc(t) * sinc(t / 3.0f)); - else - return (0.0f); -} + static float lanczos3_filter(float t) { + if (t < 0.0f) + { + t = -t; + } + + if (t < 3.0f) + { + return clean(sinc(t) * sinc(t / 3.0f)); + } + else + { + return 0.0f; + } + } #define LANCZOS4_SUPPORT (4.0f) -static float lanczos4_filter(float t) { - if (t < 0.0f) - t = -t; - - if (t < 4.0f) - return clean(sinc(t) * sinc(t / 4.0f)); - else - return (0.0f); -} + static float lanczos4_filter(float t) + { + if (t < 0.0f) + { + t = -t; + } + + if (t < 4.0f) + { + return clean(sinc(t) * sinc(t / 4.0f)); + } + else + { + return 0.0f; + } + } #define LANCZOS6_SUPPORT (6.0f) -static float lanczos6_filter(float t) { - if (t < 0.0f) - t = -t; - - if (t < 6.0f) - return clean(sinc(t) * sinc(t / 6.0f)); - else - return (0.0f); -} + static float lanczos6_filter(float t) { + if (t < 0.0f) + { + t = -t; + } + + if (t < 6.0f) + { + return clean(sinc(t) * sinc(t / 6.0f)); + } + else + { + return (0.0f); + } + } #define LANCZOS12_SUPPORT (12.0f) -static float lanczos12_filter(float t) { - if (t < 0.0f) - t = -t; - - if (t < 12.0f) - return clean(sinc(t) * sinc(t / 12.0f)); - else - return (0.0f); -} - -static double bessel0(double x) { - const double EPSILON_RATIO = 1E-16; - double xh, sum, pow, ds; - int k; - - xh = 0.5 * x; - sum = 1.0; - pow = 1.0; - k = 0; - ds = 1.0; - while (ds > sum * EPSILON_RATIO) // FIXME: Shouldn't this stop after X iterations for max. safety? - { - ++k; - pow = pow * (xh / k); - ds = pow * pow; - sum = sum + ds; - } - - return sum; -} - -static const float KAISER_ALPHA = 4.0; -static double kaiser(double alpha, double half_width, double x) { - const double ratio = (x / half_width); - return bessel0(alpha * sqrt(1 - ratio * ratio)) / bessel0(alpha); -} + static float lanczos12_filter(float t) { + if (t < 0.0f) + { + t = -t; + } + + if (t < 12.0f) + { + return clean(sinc(t) * sinc(t / 12.0f)); + } + else + { + return (0.0f); + } + } + + static double bessel0(double x) + { + const double EPSILON_RATIO = 1E-16; + double xh, sum, pow, ds; + int k; + + xh = 0.5 * x; + sum = 1.0; + pow = 1.0; + k = 0; + ds = 1.0; + while (ds > sum * EPSILON_RATIO) // FIXME: Shouldn't this stop after X iterations for max. safety? + { + ++k; + pow = pow * (xh / k); + ds = pow * pow; + sum = sum + ds; + } + + return sum; + } + + static const float KAISER_ALPHA = 4.0; + static double kaiser(double alpha, double half_width, double x) + { + const double ratio = (x / half_width); + return bessel0(alpha * sqrt(1 - ratio * ratio)) / bessel0(alpha); + } #define KAISER_SUPPORT 3 -static float kaiser_filter(float t) { - if (t < 0.0f) - t = -t; - - if (t < KAISER_SUPPORT) { - // db atten - const float att = 40.0f; - const float alpha = (float)(exp(log((double)0.58417 * (att - 20.96)) * 0.4) + 0.07886 * (att - 20.96)); - //const float alpha = KAISER_ALPHA; - return (float)clean(sinc(t) * kaiser(alpha, KAISER_SUPPORT, t)); - } - - return 0.0f; -} - -const resample_filter g_resample_filters[] = + static float kaiser_filter(float t) + { + if (t < 0.0f) + { + t = -t; + } + + if (t < KAISER_SUPPORT) + { + // db atten + const float att = 40.0f; + const float alpha = (float)(exp(log((double)0.58417 * (att - 20.96)) * 0.4) + 0.07886 * (att - 20.96)); + //const float alpha = KAISER_ALPHA; + return (float)clean(sinc(t) * kaiser(alpha, KAISER_SUPPORT, t)); + } + + return 0.0f; + } + + const resample_filter g_resample_filters[] = { {"box", box_filter, BOX_FILTER_SUPPORT}, {"tent", tent_filter, TENT_FILTER_SUPPORT}, @@ -290,15 +383,20 @@ const resample_filter g_resample_filters[] = {"quadratic_interp", quadratic_interp_filter, QUADRATIC_SUPPORT}, {"quadratic_approx", quadratic_approx_filter, QUADRATIC_SUPPORT}, {"quadratic_mix", quadratic_mix_filter, QUADRATIC_SUPPORT}, -}; + }; -const int g_num_resample_filters = sizeof(g_resample_filters) / sizeof(g_resample_filters[0]); + const int g_num_resample_filters = sizeof(g_resample_filters) / sizeof(g_resample_filters[0]); -int find_resample_filter(const char* pName) { - for (int i = 0; i < g_num_resample_filters; i++) - if (_stricmp(pName, g_resample_filters[i].name) == 0) - return i; - return cInvalidIndex; -} + int find_resample_filter(const char* pName) + { + for (int i = 0; i < g_num_resample_filters; i++) + { + if (_stricmp(pName, g_resample_filters[i].name) == 0) + { + return i; + } + } + return cInvalidIndex; + } } // namespace crnlib diff --git a/crnlib/crn_resample_filters.h b/crnlib/crn_resample_filters.h index 9a3bf30..1ce4928 100644 --- a/crnlib/crn_resample_filters.h +++ b/crnlib/crn_resample_filters.h @@ -1,21 +1,24 @@ // File: crn_resample_filters.h // RG: This is public domain code, originally derived from Graphics Gems 3, see: http://code.google.com/p/imageresampler/ + #pragma once #include "crn_export.h" -namespace crnlib { -typedef float (*resample_filter_func)(float t); +namespace crnlib +{ + typedef float (*resample_filter_func)(float t); -struct resample_filter { - char name[32]; - resample_filter_func func; - float support; -}; + struct resample_filter + { + char name[32]; + resample_filter_func func; + float support; + }; -CRN_EXPORT extern const resample_filter g_resample_filters[]; -CRN_EXPORT extern const int g_num_resample_filters; + CRN_EXPORT extern const resample_filter g_resample_filters[]; + CRN_EXPORT extern const int g_num_resample_filters; -CRN_EXPORT int find_resample_filter(const char* pName); + CRN_EXPORT int find_resample_filter(const char* pName); } // namespace crnlib diff --git a/crnlib/crn_texture_comp.cpp b/crnlib/crn_texture_comp.cpp index 63f7fde..e2d9365 100644 --- a/crnlib/crn_texture_comp.cpp +++ b/crnlib/crn_texture_comp.cpp @@ -1,460 +1,592 @@ // File: crn_texture_comp.cpp // See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" #include "crn_texture_comp.h" #include "crn_dds_comp.h" #include "crn_console.h" #include "crn_rect.h" -namespace crnlib { -static itexture_comp* create_texture_comp(crn_file_type file_type) { - if (file_type == cCRNFileTypeCRN) - return crnlib_new(); - else if (file_type == cCRNFileTypeDDS) - return crnlib_new(); - else - return NULL; -} - -bool create_compressed_texture(const crn_comp_params& params, crnlib::vector& comp_data, uint32* pActual_quality_level, float* pActual_bitrate) { - crn_comp_params local_params(params); - - if (pixel_format_helpers::is_crn_format_non_srgb(local_params.m_format)) { - if (local_params.get_flag(cCRNCompFlagPerceptual)) { - console::info("Output pixel format is swizzled or not RGB, disabling perceptual color metrics"); - - // Destination compressed pixel format is swizzled or not RGB at all, so be sure perceptual colorspace metrics are disabled. - local_params.set_flag(cCRNCompFlagPerceptual, false); - } - } - - if (pActual_quality_level) - *pActual_quality_level = 0; - if (pActual_bitrate) - *pActual_bitrate = 0.0f; - - comp_data.resize(0); - - itexture_comp* pTexture_comp = create_texture_comp(local_params.m_file_type); - if (!pTexture_comp) - return false; - - if (!pTexture_comp->compress_init(local_params)) { - crnlib_delete(pTexture_comp); - return false; - } - - if ((local_params.m_target_bitrate <= 0.0f) || - (local_params.m_format == cCRNFmtDXT3) || - ((local_params.m_file_type == cCRNFileTypeCRN) && ((local_params.m_flags & cCRNCompFlagManualPaletteSizes) != 0))) { - if ((local_params.m_file_type == cCRNFileTypeCRN) || - ((local_params.m_file_type == cCRNFileTypeDDS) && (local_params.m_quality_level < cCRNMaxQualityLevel))) { - console::info("Compressing using quality level %i", local_params.m_quality_level); - } - if (local_params.m_format == cCRNFmtDXT3) { - if (local_params.m_file_type == cCRNFileTypeCRN) - console::warning("CRN format doesn't support DXT3"); - else if ((local_params.m_file_type == cCRNFileTypeDDS) && (local_params.m_quality_level < cCRNMaxQualityLevel)) - console::warning("Clustered DDS compressor doesn't support DXT3"); - } - if (!pTexture_comp->compress_pass(local_params, pActual_bitrate)) { - crnlib_delete(pTexture_comp); - return false; +namespace crnlib +{ + static itexture_comp* create_texture_comp(crn_file_type file_type) + { + if (file_type == cCRNFileTypeCRN) + { + return crnlib_new(); + } + else if (file_type == cCRNFileTypeDDS) + { + return crnlib_new(); + } + else + { + return NULL; + } } - comp_data.swap(pTexture_comp->get_comp_data()); + bool create_compressed_texture(const crn_comp_params& params, crnlib::vector& comp_data, uint32* pActual_quality_level, float* pActual_bitrate) + { + crn_comp_params local_params(params); - if ((pActual_quality_level) && (local_params.m_target_bitrate <= 0.0)) - *pActual_quality_level = local_params.m_quality_level; + if (pixel_format_helpers::is_crn_format_non_srgb(local_params.m_format)) + { + if (local_params.get_flag(cCRNCompFlagPerceptual)) + { + console::info("Output pixel format is swizzled or not RGB, disabling perceptual color metrics"); - crnlib_delete(pTexture_comp); - return true; - } + // Destination compressed pixel format is swizzled or not RGB at all, so be sure perceptual colorspace metrics are disabled. + local_params.set_flag(cCRNCompFlagPerceptual, false); + } + } - // Interpolative search to find closest quality level to target bitrate. - const int cLowestQuality = 0; - const int cHighestQuality = cCRNMaxQualityLevel; - const int cNumQualityLevels = cHighestQuality - cLowestQuality + 1; + if (pActual_quality_level) + { + *pActual_quality_level = 0; + } + if (pActual_bitrate) + { + *pActual_bitrate = 0.0f; + } - float best_bitrate = 1e+10f; - int best_quality_level = -1; - const uint cMaxIterations = 8; + comp_data.resize(0); - for (;;) { - int low_quality = cLowestQuality; - int high_quality = cHighestQuality; + itexture_comp* pTexture_comp = create_texture_comp(local_params.m_file_type); + if (!pTexture_comp) + { + return false; + } - float cached_bitrates[cNumQualityLevels]; - for (int i = 0; i < cNumQualityLevels; i++) - cached_bitrates[i] = -1.0f; + if (!pTexture_comp->compress_init(local_params)) + { + crnlib_delete(pTexture_comp); + return false; + } - float highest_bitrate = 0.0f; + if ((local_params.m_target_bitrate <= 0.0f) || (local_params.m_format == cCRNFmtDXT3) || ((local_params.m_file_type == cCRNFileTypeCRN) && ((local_params.m_flags & cCRNCompFlagManualPaletteSizes) != 0))) + { + if ((local_params.m_file_type == cCRNFileTypeCRN) || ((local_params.m_file_type == cCRNFileTypeDDS) && (local_params.m_quality_level < cCRNMaxQualityLevel))) + { + console::info("Compressing using quality level %i", local_params.m_quality_level); + } + if (local_params.m_format == cCRNFmtDXT3) + { + if (local_params.m_file_type == cCRNFileTypeCRN) + { + console::warning("CRN format doesn't support DXT3"); + } + else if ((local_params.m_file_type == cCRNFileTypeDDS) && (local_params.m_quality_level < cCRNMaxQualityLevel)) + { + console::warning("Clustered DDS compressor doesn't support DXT3"); + } + } + if (!pTexture_comp->compress_pass(local_params, pActual_bitrate)) + { + crnlib_delete(pTexture_comp); + return false; + } - uint iter_count = 0; - bool force_binary_search = false; + comp_data.swap(pTexture_comp->get_comp_data()); - while (low_quality <= high_quality) { - if (params.m_flags & cCRNCompFlagDebugging) { - console::debug("Quality level bracket: [%u, %u]", low_quality, high_quality); - } + if ((pActual_quality_level) && (local_params.m_target_bitrate <= 0.0)) + { + *pActual_quality_level = local_params.m_quality_level; + } + + crnlib_delete(pTexture_comp); + return true; + } - int trial_quality = (low_quality + high_quality) / 2; + // Interpolative search to find closest quality level to target bitrate. + const int cLowestQuality = 0; + const int cHighestQuality = cCRNMaxQualityLevel; + const int cNumQualityLevels = cHighestQuality - cLowestQuality + 1; - if ((iter_count) && (!force_binary_search)) { - int bracket_low = trial_quality; - while ((cached_bitrates[bracket_low] < 0) && (bracket_low > cLowestQuality)) - bracket_low--; + float best_bitrate = 1e+10f; + int best_quality_level = -1; + const uint cMaxIterations = 8; - if (cached_bitrates[bracket_low] < 0) - trial_quality = static_cast(math::lerp((float)low_quality, (float)high_quality, .33f)); - else { - int bracket_high = trial_quality + 1; - if (bracket_high <= cHighestQuality) { - while ((cached_bitrates[bracket_high] < 0) && (bracket_high < cHighestQuality)) - bracket_high++; + for (;;) + { + int low_quality = cLowestQuality; + int high_quality = cHighestQuality; - if (cached_bitrates[bracket_high] >= 0) { - float bracket_low_bitrate = cached_bitrates[bracket_low]; - float bracket_high_bitrate = cached_bitrates[bracket_high]; + float cached_bitrates[cNumQualityLevels]; + for (int i = 0; i < cNumQualityLevels; i++) + cached_bitrates[i] = -1.0f; - if ((bracket_low_bitrate < bracket_high_bitrate) && - (bracket_low_bitrate < local_params.m_target_bitrate) && - (bracket_high_bitrate >= local_params.m_target_bitrate)) { - int quality = low_quality + static_cast(((local_params.m_target_bitrate - bracket_low_bitrate) * (high_quality - low_quality)) / (bracket_high_bitrate - bracket_low_bitrate)); + float highest_bitrate = 0.0f; - if ((quality >= low_quality) && (quality <= high_quality)) { - trial_quality = quality; + uint iter_count = 0; + bool force_binary_search = false; + + while (low_quality <= high_quality) + { + if (params.m_flags & cCRNCompFlagDebugging) + { + console::debug("Quality level bracket: [%u, %u]", low_quality, high_quality); } - } - } - } - } - } - console::info("Compressing to quality level %u", trial_quality); + int trial_quality = (low_quality + high_quality) / 2; + + if ((iter_count) && (!force_binary_search)) + { + int bracket_low = trial_quality; + while ((cached_bitrates[bracket_low] < 0) && (bracket_low > cLowestQuality)) + bracket_low--; + + if (cached_bitrates[bracket_low] < 0) + { + trial_quality = static_cast(math::lerp((float)low_quality, (float)high_quality, .33f)); + } + else + { + int bracket_high = trial_quality + 1; + if (bracket_high <= cHighestQuality) + { + while ((cached_bitrates[bracket_high] < 0) && (bracket_high < cHighestQuality)) + { + bracket_high++; + } + + if (cached_bitrates[bracket_high] >= 0) + { + float bracket_low_bitrate = cached_bitrates[bracket_low]; + float bracket_high_bitrate = cached_bitrates[bracket_high]; + + if ((bracket_low_bitrate < bracket_high_bitrate) && + (bracket_low_bitrate < local_params.m_target_bitrate) && + (bracket_high_bitrate >= local_params.m_target_bitrate)) + { + int quality = low_quality + static_cast(((local_params.m_target_bitrate - bracket_low_bitrate) * (high_quality - low_quality)) / (bracket_high_bitrate - bracket_low_bitrate)); + + if ((quality >= low_quality) && (quality <= high_quality)) + { + trial_quality = quality; + } + } + } + } + } + } - float bitrate = 0.0f; + console::info("Compressing to quality level %u", trial_quality); - local_params.m_quality_level = trial_quality; + float bitrate = 0.0f; - if (!pTexture_comp->compress_pass(local_params, &bitrate)) { - crnlib_delete(pTexture_comp); - return false; - } + local_params.m_quality_level = trial_quality; + + if (!pTexture_comp->compress_pass(local_params, &bitrate)) + { + crnlib_delete(pTexture_comp); + return false; + } - cached_bitrates[trial_quality] = bitrate; + cached_bitrates[trial_quality] = bitrate; - highest_bitrate = math::maximum(highest_bitrate, bitrate); + highest_bitrate = math::maximum(highest_bitrate, bitrate); - console::info("\nTried quality level %u, bpp: %3.3f", trial_quality, bitrate); + console::info("\nTried quality level %u, bpp: %3.3f", trial_quality, bitrate); - if ((best_quality_level < 0) || - ((bitrate <= local_params.m_target_bitrate) && (best_bitrate > local_params.m_target_bitrate)) || - (((bitrate <= local_params.m_target_bitrate) || (best_bitrate > local_params.m_target_bitrate)) && (fabs(bitrate - local_params.m_target_bitrate) < fabs(best_bitrate - local_params.m_target_bitrate)))) { - best_bitrate = bitrate; - comp_data.swap(pTexture_comp->get_comp_data()); - best_quality_level = trial_quality; - if (params.m_flags & cCRNCompFlagDebugging) { - console::debug("Choose new best quality level"); - } + if ((best_quality_level < 0) || + ((bitrate <= local_params.m_target_bitrate) && (best_bitrate > local_params.m_target_bitrate)) || + (((bitrate <= local_params.m_target_bitrate) || (best_bitrate > local_params.m_target_bitrate)) && (fabs(bitrate - local_params.m_target_bitrate) < fabs(best_bitrate - local_params.m_target_bitrate)))) + { + best_bitrate = bitrate; + comp_data.swap(pTexture_comp->get_comp_data()); + best_quality_level = trial_quality; + if (params.m_flags & cCRNCompFlagDebugging) + { + console::debug("Choose new best quality level"); + } - if ((best_bitrate <= local_params.m_target_bitrate) && (fabs(best_bitrate - local_params.m_target_bitrate) < .005f)) - break; - } + if ((best_bitrate <= local_params.m_target_bitrate) && (fabs(best_bitrate - local_params.m_target_bitrate) < .005f)) + { + break; + } + } - if (bitrate > local_params.m_target_bitrate) - high_quality = trial_quality - 1; - else - low_quality = trial_quality + 1; + if (bitrate > local_params.m_target_bitrate) + { + high_quality = trial_quality - 1; + } + else + { + low_quality = trial_quality + 1; + } - iter_count++; - if (iter_count > cMaxIterations) { - force_binary_search = true; - } - } + iter_count++; + if (iter_count > cMaxIterations) + { + force_binary_search = true; + } + } - if (((local_params.m_flags & cCRNCompFlagHierarchical) != 0) && - (highest_bitrate < local_params.m_target_bitrate) && - (fabs(best_bitrate - local_params.m_target_bitrate) >= .005f)) { - console::info("Unable to achieve desired bitrate - disabling adaptive block sizes and retrying search."); + if (((local_params.m_flags & cCRNCompFlagHierarchical) != 0) && + (highest_bitrate < local_params.m_target_bitrate) && + (fabs(best_bitrate - local_params.m_target_bitrate) >= .005f)) + { + console::info("Unable to achieve desired bitrate - disabling adaptive block sizes and retrying search."); - local_params.m_flags &= ~cCRNCompFlagHierarchical; + local_params.m_flags &= ~cCRNCompFlagHierarchical; - crnlib_delete(pTexture_comp); - pTexture_comp = create_texture_comp(local_params.m_file_type); + crnlib_delete(pTexture_comp); + pTexture_comp = create_texture_comp(local_params.m_file_type); + + if (!pTexture_comp->compress_init(local_params)) + { + crnlib_delete(pTexture_comp); + return false; + } + } + else + { + break; + } + } - if (!pTexture_comp->compress_init(local_params)) { crnlib_delete(pTexture_comp); - return false; - } - } else - break; - } + pTexture_comp = NULL; - crnlib_delete(pTexture_comp); - pTexture_comp = NULL; + if (best_quality_level < 0) + { + return false; + } - if (best_quality_level < 0) - return false; + if (pActual_quality_level) + { + *pActual_quality_level = best_quality_level; + } + if (pActual_bitrate) + { + *pActual_bitrate = best_bitrate; + } - if (pActual_quality_level) - *pActual_quality_level = best_quality_level; - if (pActual_bitrate) - *pActual_bitrate = best_bitrate; + console::printf("Selected quality level %u bpp: %f", best_quality_level, best_bitrate); - console::printf("Selected quality level %u bpp: %f", best_quality_level, best_bitrate); + return true; + } - return true; -} + static bool create_dds_tex(const crn_comp_params& params, mipmapped_texture& dds_tex) + { + image_u8 images[cCRNMaxFaces][cCRNMaxLevels]; + + bool has_alpha = false; + for (uint face_index = 0; face_index < params.m_faces; face_index++) + { + for (uint level_index = 0; level_index < params.m_levels; level_index++) + { + const uint width = math::maximum(1U, params.m_width >> level_index); + const uint height = math::maximum(1U, params.m_height >> level_index); + + if (!params.m_pImages[face_index][level_index]) + { + return false; + } -static bool create_dds_tex(const crn_comp_params& params, mipmapped_texture& dds_tex) { - image_u8 images[cCRNMaxFaces][cCRNMaxLevels]; + images[face_index][level_index].alias((color_quad_u8*)params.m_pImages[face_index][level_index], width, height); + if (!has_alpha) + { + has_alpha = image_utils::has_alpha(images[face_index][level_index]); + } + } + } - bool has_alpha = false; - for (uint face_index = 0; face_index < params.m_faces; face_index++) { - for (uint level_index = 0; level_index < params.m_levels; level_index++) { - const uint width = math::maximum(1U, params.m_width >> level_index); - const uint height = math::maximum(1U, params.m_height >> level_index); + for (uint face_index = 0; face_index < params.m_faces; face_index++) + { + for (uint level_index = 0; level_index < params.m_levels; level_index++) + { + images[face_index][level_index].set_component_valid(3, has_alpha); + } + } - if (!params.m_pImages[face_index][level_index]) - return false; + face_vec faces(params.m_faces); - images[face_index][level_index].alias((color_quad_u8*)params.m_pImages[face_index][level_index], width, height); - if (!has_alpha) - has_alpha = image_utils::has_alpha(images[face_index][level_index]); - } - } + for (uint face_index = 0; face_index < params.m_faces; face_index++) + { + for (uint level_index = 0; level_index < params.m_levels; level_index++) + { + mip_level* pMip = crnlib_new(); - for (uint face_index = 0; face_index < params.m_faces; face_index++) - for (uint level_index = 0; level_index < params.m_levels; level_index++) - images[face_index][level_index].set_component_valid(3, has_alpha); + image_u8* pImage = crnlib_new(); + pImage->swap(images[face_index][level_index]); + pMip->assign(pImage); - face_vec faces(params.m_faces); + faces[face_index].push_back(pMip); + } + } - for (uint face_index = 0; face_index < params.m_faces; face_index++) { - for (uint level_index = 0; level_index < params.m_levels; level_index++) { - mip_level* pMip = crnlib_new(); + dds_tex.assign(faces); - image_u8* pImage = crnlib_new(); - pImage->swap(images[face_index][level_index]); - pMip->assign(pImage); +#ifdef CRNLIB_BUILD_DEBUG + CRNLIB_ASSERT(dds_tex.check()); +#endif - faces[face_index].push_back(pMip); + return true; } - } - dds_tex.assign(faces); + bool create_texture_mipmaps(mipmapped_texture& work_tex, const crn_comp_params& params, const crn_mipmap_params& mipmap_params, bool generate_mipmaps) + { + crn_comp_params new_params(params); -#ifdef CRNLIB_BUILD_DEBUG - CRNLIB_ASSERT(dds_tex.check()); -#endif + bool generate_new_mips = false; - return true; -} + switch (mipmap_params.m_mode) + { + case cCRNMipModeUseSourceOrGenerateMips: + { + if (work_tex.get_num_levels() == 1) + generate_new_mips = true; + break; + } + case cCRNMipModeUseSourceMips: + { + break; + } + case cCRNMipModeGenerateMips: + { + generate_new_mips = true; + break; + } + case cCRNMipModeNoMips: + { + work_tex.discard_mipmaps(); + break; + } + default: + { + CRNLIB_ASSERT(0); + break; + } + } -bool create_texture_mipmaps(mipmapped_texture& work_tex, const crn_comp_params& params, const crn_mipmap_params& mipmap_params, bool generate_mipmaps) { - crn_comp_params new_params(params); + rect window_rect(mipmap_params.m_window_left, mipmap_params.m_window_top, mipmap_params.m_window_right, mipmap_params.m_window_bottom); - bool generate_new_mips = false; + if (!window_rect.is_empty()) + { + if (work_tex.get_num_faces() > 1) + { + console::warning("Can't crop cubemap textures"); + } + else + { + console::info("Cropping input texture from window (%ux%u)-(%ux%u)", window_rect.get_left(), window_rect.get_top(), window_rect.get_right(), window_rect.get_bottom()); - switch (mipmap_params.m_mode) { - case cCRNMipModeUseSourceOrGenerateMips: { - if (work_tex.get_num_levels() == 1) - generate_new_mips = true; - break; - } - case cCRNMipModeUseSourceMips: { - break; - } - case cCRNMipModeGenerateMips: { - generate_new_mips = true; - break; - } - case cCRNMipModeNoMips: { - work_tex.discard_mipmaps(); - break; - } - default: { - CRNLIB_ASSERT(0); - break; - } - } + if (!work_tex.crop(window_rect.get_left(), window_rect.get_top(), window_rect.get_width(), window_rect.get_height())) + { + console::warning("Failed cropping window rect"); + } + } + } - rect window_rect(mipmap_params.m_window_left, mipmap_params.m_window_top, mipmap_params.m_window_right, mipmap_params.m_window_bottom); + int new_width = work_tex.get_width(); + int new_height = work_tex.get_height(); + + if ((mipmap_params.m_clamp_width) && (mipmap_params.m_clamp_height)) + { + if ((new_width > (int)mipmap_params.m_clamp_width) || (new_height > (int)mipmap_params.m_clamp_height)) + { + if (!mipmap_params.m_clamp_scale) + { + if (work_tex.get_num_faces() > 1) + { + console::warning("Can't crop cubemap textures"); + } + else + { + new_width = math::minimum(mipmap_params.m_clamp_width, new_width); + new_height = math::minimum(mipmap_params.m_clamp_height, new_height); + console::info("Clamping input texture to %ux%u", new_width, new_height); + work_tex.crop(0, 0, new_width, new_height); + } + } + } + } - if (!window_rect.is_empty()) { - if (work_tex.get_num_faces() > 1) { - console::warning("Can't crop cubemap textures"); - } else { - console::info("Cropping input texture from window (%ux%u)-(%ux%u)", window_rect.get_left(), window_rect.get_top(), window_rect.get_right(), window_rect.get_bottom()); + if (mipmap_params.m_scale_mode != cCRNSMDisabled) + { + bool is_pow2 = math::is_power_of_2((uint32)new_width) && math::is_power_of_2((uint32)new_height); + + switch (mipmap_params.m_scale_mode) + { + case cCRNSMAbsolute: + { + new_width = (uint)mipmap_params.m_scale_x; + new_height = (uint)mipmap_params.m_scale_y; + break; + } + case cCRNSMRelative: + { + new_width = (uint)(mipmap_params.m_scale_x * new_width + .5f); + new_height = (uint)(mipmap_params.m_scale_y * new_height + .5f); + break; + } + case cCRNSMLowerPow2: + { + if (!is_pow2) + { + math::compute_lower_pow2_dim(new_width, new_height); + } + break; + } + case cCRNSMNearestPow2: + { + if (!is_pow2) + { + int lwidth = new_width; + int lheight = new_height; + math::compute_lower_pow2_dim(lwidth, lheight); + + int uwidth = new_width; + int uheight = new_height; + math::compute_upper_pow2_dim(uwidth, uheight); + + if (labs(new_width - lwidth) < labs(new_width - uwidth)) + { + new_width = lwidth; + } + else + { + new_width = uwidth; + } + + if (labs(new_height - lheight) < labs(new_height - uheight)) + { + new_height = lheight; + } + else + { + new_height = uheight; + } + } + break; + } + case cCRNSMNextPow2: { + if (!is_pow2) + { + math::compute_upper_pow2_dim(new_width, new_height); + } + break; + } + default: + break; + } + } - if (!work_tex.crop(window_rect.get_left(), window_rect.get_top(), window_rect.get_width(), window_rect.get_height())) - console::warning("Failed cropping window rect"); - } - } - - int new_width = work_tex.get_width(); - int new_height = work_tex.get_height(); - - if ((mipmap_params.m_clamp_width) && (mipmap_params.m_clamp_height)) { - if ((new_width > (int)mipmap_params.m_clamp_width) || (new_height > (int)mipmap_params.m_clamp_height)) { - if (!mipmap_params.m_clamp_scale) { - if (work_tex.get_num_faces() > 1) { - console::warning("Can't crop cubemap textures"); - } else { - new_width = math::minimum(mipmap_params.m_clamp_width, new_width); - new_height = math::minimum(mipmap_params.m_clamp_height, new_height); - console::info("Clamping input texture to %ux%u", new_width, new_height); - work_tex.crop(0, 0, new_width, new_height); - } - } - } - } - - if (mipmap_params.m_scale_mode != cCRNSMDisabled) { - bool is_pow2 = math::is_power_of_2((uint32)new_width) && math::is_power_of_2((uint32)new_height); - - switch (mipmap_params.m_scale_mode) { - case cCRNSMAbsolute: { - new_width = (uint)mipmap_params.m_scale_x; - new_height = (uint)mipmap_params.m_scale_y; - break; - } - case cCRNSMRelative: { - new_width = (uint)(mipmap_params.m_scale_x * new_width + .5f); - new_height = (uint)(mipmap_params.m_scale_y * new_height + .5f); - break; - } - case cCRNSMLowerPow2: { - if (!is_pow2) - math::compute_lower_pow2_dim(new_width, new_height); - break; - } - case cCRNSMNearestPow2: { - if (!is_pow2) { - int lwidth = new_width; - int lheight = new_height; - math::compute_lower_pow2_dim(lwidth, lheight); - - int uwidth = new_width; - int uheight = new_height; - math::compute_upper_pow2_dim(uwidth, uheight); - - if (labs(new_width - lwidth) < labs(new_width - uwidth)) - new_width = lwidth; - else - new_width = uwidth; - - if (labs(new_height - lheight) < labs(new_height - uheight)) - new_height = lheight; - else - new_height = uheight; - } - break; - } - case cCRNSMNextPow2: { - if (!is_pow2) - math::compute_upper_pow2_dim(new_width, new_height); - break; - } - default: - break; - } - } - - if ((mipmap_params.m_clamp_width) && (mipmap_params.m_clamp_height)) { - if ((new_width > (int)mipmap_params.m_clamp_width) || (new_height > (int)mipmap_params.m_clamp_height)) { - if (mipmap_params.m_clamp_scale) { - new_width = math::minimum(mipmap_params.m_clamp_width, new_width); - new_height = math::minimum(mipmap_params.m_clamp_height, new_height); - } - } - } + if ((mipmap_params.m_clamp_width) && (mipmap_params.m_clamp_height)) + { + if ((new_width > (int)mipmap_params.m_clamp_width) || (new_height > (int)mipmap_params.m_clamp_height)) + { + if (mipmap_params.m_clamp_scale) + { + new_width = math::minimum(mipmap_params.m_clamp_width, new_width); + new_height = math::minimum(mipmap_params.m_clamp_height, new_height); + } + } + } - new_width = math::clamp(new_width, 1, cCRNMaxLevelResolution); - new_height = math::clamp(new_height, 1, cCRNMaxLevelResolution); + new_width = math::clamp(new_width, 1, cCRNMaxLevelResolution); + new_height = math::clamp(new_height, 1, cCRNMaxLevelResolution); - if ((new_width != (int)work_tex.get_width()) || (new_height != (int)work_tex.get_height()) || (mipmap_params.m_renormalize == true && mipmap_params.m_rtopmip == true)) { - console::info("Resampling input texture to %ux%u", new_width, new_height); + if ((new_width != (int)work_tex.get_width()) || (new_height != (int)work_tex.get_height()) || (mipmap_params.m_renormalize == true && mipmap_params.m_rtopmip == true)) + { + console::info("Resampling input texture to %ux%u", new_width, new_height); - const char* pFilter = crn_get_mip_filter_name(mipmap_params.m_filter); + const char* pFilter = crn_get_mip_filter_name(mipmap_params.m_filter); - bool srgb = mipmap_params.m_gamma_filtering != 0; + bool srgb = mipmap_params.m_gamma_filtering != 0; - mipmapped_texture::resample_params res_params; - res_params.m_pFilter = pFilter; - res_params.m_wrapping = mipmap_params.m_tiled != 0; - if (work_tex.get_num_faces()) - res_params.m_wrapping = false; - res_params.m_renormalize = mipmap_params.m_renormalize != 0; - res_params.m_filter_scale = 1.0f; - res_params.m_gamma = mipmap_params.m_gamma; - res_params.m_srgb = srgb; - res_params.m_multithreaded = (params.m_num_helper_threads > 0); + mipmapped_texture::resample_params res_params; + res_params.m_pFilter = pFilter; + res_params.m_wrapping = mipmap_params.m_tiled != 0; + if (work_tex.get_num_faces()) + { + res_params.m_wrapping = false; + } + res_params.m_renormalize = mipmap_params.m_renormalize != 0; + res_params.m_filter_scale = 1.0f; + res_params.m_gamma = mipmap_params.m_gamma; + res_params.m_srgb = srgb; + res_params.m_multithreaded = (params.m_num_helper_threads > 0); + + if (!work_tex.resize(new_width, new_height, res_params)) + { + console::error("Failed resizing texture!"); + return false; + } + } - if (!work_tex.resize(new_width, new_height, res_params)) { - console::error("Failed resizing texture!"); - return false; - } - } - - if ((generate_new_mips) && (generate_mipmaps)) { - bool srgb = mipmap_params.m_gamma_filtering != 0; - - const char* pFilter = crn_get_mip_filter_name(mipmap_params.m_filter); - - mipmapped_texture::generate_mipmap_params gen_params; - gen_params.m_pFilter = pFilter; - gen_params.m_wrapping = mipmap_params.m_tiled != 0; - gen_params.m_renormalize = mipmap_params.m_renormalize != 0; - gen_params.m_filter_scale = mipmap_params.m_blurriness; - gen_params.m_gamma = mipmap_params.m_gamma; - gen_params.m_srgb = srgb; - gen_params.m_multithreaded = params.m_num_helper_threads > 0; - gen_params.m_max_mips = mipmap_params.m_max_levels; - gen_params.m_min_mip_size = mipmap_params.m_min_mip_size; - - console::info("Generating mipmaps using filter \"%s\"", pFilter); - - timer tm; - tm.start(); - if (!work_tex.generate_mipmaps(gen_params, true)) { - console::error("Failed generating mipmaps!"); - return false; - } - double t = tm.get_elapsed_secs(); + if ((generate_new_mips) && (generate_mipmaps)) + { + bool srgb = mipmap_params.m_gamma_filtering != 0; + + const char* pFilter = crn_get_mip_filter_name(mipmap_params.m_filter); + + mipmapped_texture::generate_mipmap_params gen_params; + gen_params.m_pFilter = pFilter; + gen_params.m_wrapping = mipmap_params.m_tiled != 0; + gen_params.m_renormalize = mipmap_params.m_renormalize != 0; + gen_params.m_filter_scale = mipmap_params.m_blurriness; + gen_params.m_gamma = mipmap_params.m_gamma; + gen_params.m_srgb = srgb; + gen_params.m_multithreaded = params.m_num_helper_threads > 0; + gen_params.m_max_mips = mipmap_params.m_max_levels; + gen_params.m_min_mip_size = mipmap_params.m_min_mip_size; + + console::info("Generating mipmaps using filter \"%s\"", pFilter); + + timer tm; + tm.start(); + if (!work_tex.generate_mipmaps(gen_params, true)) + { + console::error("Failed generating mipmaps!"); + return false; + } + double t = tm.get_elapsed_secs(); - console::info("Generated %u mipmap levels in %3.3fs", work_tex.get_num_levels() - 1, t); - } + console::info("Generated %u mipmap levels in %3.3fs", work_tex.get_num_levels() - 1, t); + } - return true; -} + return true; + } -bool create_compressed_texture(const crn_comp_params& params, const crn_mipmap_params& mipmap_params, crnlib::vector& comp_data, uint32* pActual_quality_level, float* pActual_bitrate) { - comp_data.resize(0); - if (pActual_bitrate) - *pActual_bitrate = 0.0f; - if (pActual_quality_level) - *pActual_quality_level = 0; + bool create_compressed_texture(const crn_comp_params& params, const crn_mipmap_params& mipmap_params, crnlib::vector& comp_data, uint32* pActual_quality_level, float* pActual_bitrate) + { + comp_data.resize(0); + if (pActual_bitrate) + { + *pActual_bitrate = 0.0f; + } + if (pActual_quality_level) + { + *pActual_quality_level = 0; + } - mipmapped_texture work_tex; - if (!create_dds_tex(params, work_tex)) { - console::error("Failed creating DDS texture from crn_comp_params!"); - return false; - } + mipmapped_texture work_tex; + if (!create_dds_tex(params, work_tex)) + { + console::error("Failed creating DDS texture from crn_comp_params!"); + return false; + } - if (!create_texture_mipmaps(work_tex, params, mipmap_params, true)) - return false; + if (!create_texture_mipmaps(work_tex, params, mipmap_params, true)) + { + return false; + } - crn_comp_params new_params(params); - new_params.m_levels = work_tex.get_num_levels(); - memset(new_params.m_pImages, 0, sizeof(new_params.m_pImages)); + crn_comp_params new_params(params); + new_params.m_levels = work_tex.get_num_levels(); + memset(new_params.m_pImages, 0, sizeof(new_params.m_pImages)); - for (uint f = 0; f < work_tex.get_num_faces(); f++) - for (uint l = 0; l < work_tex.get_num_levels(); l++) - new_params.m_pImages[f][l] = (uint32*)work_tex.get_level(f, l)->get_image()->get_ptr(); + for (uint f = 0; f < work_tex.get_num_faces(); f++) + { + for (uint l = 0; l < work_tex.get_num_levels(); l++) + { + new_params.m_pImages[f][l] = (uint32*)work_tex.get_level(f, l)->get_image()->get_ptr(); + } + } - return create_compressed_texture(new_params, comp_data, pActual_quality_level, pActual_bitrate); -} + return create_compressed_texture(new_params, comp_data, pActual_quality_level, pActual_bitrate); + } } // namespace crnlib diff --git a/crnlib/crn_texture_comp.h b/crnlib/crn_texture_comp.h index 9c1a3ea..867b1d4 100644 --- a/crnlib/crn_texture_comp.h +++ b/crnlib/crn_texture_comp.h @@ -1,32 +1,42 @@ // File: crn_texture_comp.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once -#include "../inc/crnlib.h" +#include "crnlib.h" #include "crn_export.h" -namespace crnlib { -class mipmapped_texture; - -class itexture_comp { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(itexture_comp); - - public: - itexture_comp() {} - virtual ~itexture_comp() {} - - virtual const char* get_ext() const = 0; - - virtual bool compress_init(const crn_comp_params& params) = 0; - virtual bool compress_pass(const crn_comp_params& params, float* pEffective_bitrate) = 0; - virtual void compress_deinit() = 0; - - virtual const crnlib::vector& get_comp_data() const = 0; - virtual crnlib::vector& get_comp_data() = 0; -}; - -CRN_EXPORT bool create_compressed_texture(const crn_comp_params& params, crnlib::vector& comp_data, uint32* pActual_quality_level, float* pActual_bitrate); -CRN_EXPORT bool create_texture_mipmaps(mipmapped_texture& work_tex, const crn_comp_params& params, const crn_mipmap_params& mipmap_params, bool generate_mipmaps); -CRN_EXPORT bool create_compressed_texture(const crn_comp_params& params, const crn_mipmap_params& mipmap_params, crnlib::vector& comp_data, uint32* pActual_quality_level, float* pActual_bitrate); +namespace crnlib +{ + class mipmapped_texture; + + class itexture_comp + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(itexture_comp); + public: + itexture_comp() + { + } + virtual ~itexture_comp() + { + } + + virtual const char* get_ext() const = 0; + + virtual bool compress_init(const crn_comp_params& params) = 0; + virtual bool compress_pass(const crn_comp_params& params, float* pEffective_bitrate) = 0; + virtual void compress_deinit() = 0; + + virtual const crnlib::vector& get_comp_data() const = 0; + virtual crnlib::vector& get_comp_data() = 0; + }; + + CRN_EXPORT bool create_compressed_texture(const crn_comp_params& params, crnlib::vector& comp_data, + uint32* pActual_quality_level, float* pActual_bitrate); + CRN_EXPORT bool create_texture_mipmaps(mipmapped_texture& work_tex, const crn_comp_params& params, + const crn_mipmap_params& mipmap_params, bool generate_mipmaps); + CRN_EXPORT bool create_compressed_texture(const crn_comp_params& params, const crn_mipmap_params& mipmap_params, + crnlib::vector& comp_data, uint32* pActual_quality_level, + float* pActual_bitrate); } // namespace crnlib diff --git a/crnlib/crn_texture_file_types.cpp b/crnlib/crn_texture_file_types.cpp index edcba06..d1d214a 100644 --- a/crnlib/crn_texture_file_types.cpp +++ b/crnlib/crn_texture_file_types.cpp @@ -1,103 +1,121 @@ // File: crn_texture_file_types.cpp // See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" #include "crn_texture_file_types.h" #include "crn_file_utils.h" -namespace crnlib { -const char* texture_file_types::get_extension(format fmt) { - CRNLIB_ASSERT(fmt < cNumFileFormats); - if (fmt >= cNumFileFormats) - return NULL; - - static const char* extensions[cNumFileFormats] = - { - "dds", - "crn", - "ktx", - - "tga", - "png", - "jpg", - "jpeg", - "bmp", - "gif", - "tif", - "tiff", - "ppm", - "pgm", - "psd", - "jp2", - - "", - ""}; - return extensions[fmt]; -} - -texture_file_types::format texture_file_types::determine_file_format(const char* pFilename) { - dynamic_string ext; - if (!file_utils::split_path(pFilename, NULL, NULL, NULL, &ext)) - return cFormatInvalid; - - if (ext.is_empty()) - return cFormatInvalid; - - if (ext[0] == '.') - ext.right(1); - - for (uint i = 0; i < cNumFileFormats; i++) - if (ext == get_extension(static_cast(i))) - return static_cast(i); - - return cFormatInvalid; -} - -bool texture_file_types::supports_mipmaps(format fmt) { - switch (fmt) { - case cFormatCRN: - case cFormatDDS: - case cFormatKTX: - return true; - default: - break; - } - - return false; -} - -bool texture_file_types::supports_alpha(format fmt) { - switch (fmt) { - case cFormatJPG: - case cFormatJPEG: - case cFormatGIF: - case cFormatJP2: - return false; - default: - break; - } - - return true; -} - -const char* get_texture_type_desc(texture_type t) { - switch (t) { - case cTextureTypeUnknown: - return "Unknown"; - case cTextureTypeRegularMap: - return "2D map"; - case cTextureTypeNormalMap: - return "Normal map"; - case cTextureTypeVerticalCrossCubemap: - return "Vertical Cross Cubemap"; - case cTextureTypeCubemap: - return "Cubemap"; - default: - break; - } - - CRNLIB_ASSERT(false); - - return "?"; -} - +namespace crnlib +{ + const char* texture_file_types::get_extension(format fmt) + { + CRNLIB_ASSERT(fmt < cNumFileFormats); + if (fmt >= cNumFileFormats) + { + return NULL; + } + + static const char* extensions[cNumFileFormats] = + { + "dds", + "crn", + "ktx", + + "tga", + "png", + "jpg", + "jpeg", + "bmp", + "gif", + "tif", + "tiff", + "ppm", + "pgm", + "psd", + "jp2", + + "", + "" + }; + return extensions[fmt]; + } + + texture_file_types::format texture_file_types::determine_file_format(const char* pFilename) + { + dynamic_string ext; + if (!file_utils::split_path(pFilename, NULL, NULL, NULL, &ext)) + { + return cFormatInvalid; + } + + if (ext.is_empty()) + { + return cFormatInvalid; + } + + if (ext[0] == '.') + { + ext.right(1); + } + + for (uint i = 0; i < cNumFileFormats; i++) + { + if (ext == get_extension(static_cast(i))) + { + return static_cast(i); + } + } + return cFormatInvalid; + } + + bool texture_file_types::supports_mipmaps(format fmt) + { + switch (fmt) { + case cFormatCRN: + case cFormatDDS: + case cFormatKTX: + return true; + default: + break; + } + + return false; + } + + bool texture_file_types::supports_alpha(format fmt) + { + switch (fmt) { + case cFormatJPG: + case cFormatJPEG: + case cFormatGIF: + case cFormatJP2: + return false; + default: + break; + } + + return true; + } + + const char* get_texture_type_desc(texture_type t) + { + switch (t) { + case cTextureTypeUnknown: + return "Unknown"; + case cTextureTypeRegularMap: + return "2D map"; + case cTextureTypeNormalMap: + return "Normal map"; + case cTextureTypeVerticalCrossCubemap: + return "Vertical Cross Cubemap"; + case cTextureTypeCubemap: + return "Cubemap"; + default: + break; + } + + CRNLIB_ASSERT(false); + + return "?"; + } } // namespace crnlib diff --git a/crnlib/crn_texture_file_types.h b/crnlib/crn_texture_file_types.h index a40a62a..ca0e8d2 100644 --- a/crnlib/crn_texture_file_types.h +++ b/crnlib/crn_texture_file_types.h @@ -1,64 +1,70 @@ // File: crn_texture_file_types.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once -#include "../inc/crnlib.h" + +#include "crnlib.h" #include "crn_vec.h" #include "crn_pixel_format.h" #include "crn_export.h" -namespace crnlib { -struct texture_file_types { - enum format { - cFormatInvalid = -1, +namespace crnlib +{ + struct texture_file_types + { + enum format + { + cFormatInvalid = -1, - cFormatDDS, - cFormatCRN, - cFormatKTX, + cFormatDDS, + cFormatCRN, + cFormatKTX, - cNumMipmappedFileFormats, + cNumMipmappedFileFormats, - cFormatTGA = cNumMipmappedFileFormats, - cFormatPNG, - cFormatJPG, - cFormatJPEG, - cFormatBMP, - cFormatGIF, - cFormatTIF, - cFormatTIFF, - cFormatPPM, - cFormatPGM, - cFormatPSD, - cFormatJP2, + cFormatTGA = cNumMipmappedFileFormats, + cFormatPNG, + cFormatJPG, + cFormatJPEG, + cFormatBMP, + cFormatGIF, + cFormatTIF, + cFormatTIFF, + cFormatPPM, + cFormatPGM, + cFormatPSD, + cFormatJP2, - cNumRegularFileFormats, + cNumRegularFileFormats, - cNumImageFileFormats = cNumRegularFileFormats - cNumMipmappedFileFormats, + cNumImageFileFormats = cNumRegularFileFormats - cNumMipmappedFileFormats, - // Not really a file format - cFormatClipboard = cNumRegularFileFormats, - cFormatDragDrop, + // Not really a file format + cFormatClipboard = cNumRegularFileFormats, + cFormatDragDrop, - cNumFileFormats - }; + cNumFileFormats, + }; - CRN_EXPORT static const char* get_extension(format fmt); + CRN_EXPORT static const char* get_extension(format fmt); - CRN_EXPORT static format determine_file_format(const char* pFilename); + CRN_EXPORT static format determine_file_format(const char* pFilename); - CRN_EXPORT static bool supports_mipmaps(format fmt); - CRN_EXPORT static bool supports_alpha(format fmt); -}; + CRN_EXPORT static bool supports_mipmaps(format fmt); + CRN_EXPORT static bool supports_alpha(format fmt); + }; -enum texture_type { - cTextureTypeUnknown = 0, - cTextureTypeRegularMap, - cTextureTypeNormalMap, - cTextureTypeVerticalCrossCubemap, - cTextureTypeCubemap, + enum texture_type + { + cTextureTypeUnknown = 0, + cTextureTypeRegularMap, + cTextureTypeNormalMap, + cTextureTypeVerticalCrossCubemap, + cTextureTypeCubemap, - cNumTextureTypes -}; + cNumTextureTypes + }; -CRN_EXPORT const char* get_texture_type_desc(texture_type t); + CRN_EXPORT const char* get_texture_type_desc(texture_type t); } // namespace crnlib diff --git a/crnlib/crn_vec.h b/crnlib/crn_vec.h index e4896cf..05d59b9 100644 --- a/crnlib/crn_vec.h +++ b/crnlib/crn_vec.h @@ -5,756 +5,1049 @@ #include "crn_core.h" #include "crn_rand.h" -namespace crnlib { -template -class vec : public helpers::rel_ops > { - public: - typedef T scalar_type; - enum { num_elements = N }; - - inline vec() {} - - inline vec(eClear) { clear(); } - - inline vec(const vec& other) { - for (uint i = 0; i < N; i++) - m_s[i] = other.m_s[i]; - } - - template - inline vec(const vec& other) { - set(other); - } - - template - inline vec(const vec& other, T w) { - *this = other; - m_s[N - 1] = w; - } - - explicit inline vec(T val) { - set(val); - } - - inline vec(T val0, T val1) { - set(val0, val1); - } - - inline vec(T val0, T val1, T val2) { - set(val0, val1, val2); - } - - inline vec(T val0, T val1, T val2, T val3) { - set(val0, val1, val2, val3); - } - - inline void clear() { - if (N > 4) - memset(m_s, 0, sizeof(m_s)); - else { - for (uint i = 0; i < N; i++) - m_s[i] = 0; - } - } - - template - inline vec& set(const vec& other) { - if ((void*)this == (void*)&other) - return *this; - const uint m = math::minimum(N, ON); - uint i; - for (i = 0; i < m; i++) - m_s[i] = static_cast(other[i]); - for (; i < N; i++) - m_s[i] = 0; - return *this; - } - - inline vec& set_component(uint index, T val) { - CRNLIB_ASSERT(index < N); - m_s[index] = val; - return *this; - } - - inline vec& set(T val) { - for (uint i = 0; i < N; i++) - m_s[i] = val; - return *this; - } - - inline vec& set(T val0, T val1) { - m_s[0] = val0; - if (N >= 2) { - m_s[1] = val1; - - for (uint i = 2; i < N; i++) - m_s[i] = 0; - } - return *this; - } - - inline vec& set(T val0, T val1, T val2) { - m_s[0] = val0; - if (N >= 2) { - m_s[1] = val1; - - if (N >= 3) { - m_s[2] = val2; - - for (uint i = 3; i < N; i++) - m_s[i] = 0; - } - } - return *this; - } - - inline vec& set(T val0, T val1, T val2, T val3) { - m_s[0] = val0; - if (N >= 2) { - m_s[1] = val1; - - if (N >= 3) { - m_s[2] = val2; - - if (N >= 4) { - m_s[3] = val3; - - for (uint i = 4; i < N; i++) - m_s[i] = 0; - } - } - } - return *this; - } - - inline vec& set(const T* pValues) { - for (uint i = 0; i < N; i++) - m_s[i] = pValues[i]; - return *this; - } - - template - inline vec& swizzle_set(const vec& other, uint i) { - return set(static_cast(other[i])); - } - - template - inline vec& swizzle_set(const vec& other, uint i, uint j) { - return set(static_cast(other[i]), static_cast(other[j])); - } - - template - inline vec& swizzle_set(const vec& other, uint i, uint j, uint k) { - return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k])); - } - - template - inline vec& swizzle_set(const vec& other, uint i, uint j, uint k, uint l) { - return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k]), static_cast(other[l])); - } - - inline vec& operator=(const vec& rhs) { - if (this != &rhs) { - for (uint i = 0; i < N; i++) - m_s[i] = rhs.m_s[i]; - } - return *this; - } - - template - inline vec& operator=(const vec& other) { - if ((void*)this == (void*)&other) - return *this; - - uint s = math::minimum(N, O); - - uint i; - for (i = 0; i < s; i++) - m_s[i] = static_cast(other[i]); - - for (; i < N; i++) - m_s[i] = 0; - - return *this; - } - - inline bool operator==(const vec& rhs) const { - for (uint i = 0; i < N; i++) - if (!(m_s[i] == rhs.m_s[i])) - return false; - return true; - } - - inline bool operator<(const vec& rhs) const { - for (uint i = 0; i < N; i++) { - if (m_s[i] < rhs.m_s[i]) - return true; - else if (!(m_s[i] == rhs.m_s[i])) - return false; - } - - return false; - } - - inline T operator[](uint i) const { - CRNLIB_ASSERT(i < N); - return m_s[i]; - } - - inline T& operator[](uint i) { - CRNLIB_ASSERT(i < N); - return m_s[i]; - } - - inline operator size_t() const { - return (size_t)fast_hash(this, sizeof(*this)); - } - - inline T get_x(void) const { return m_s[0]; } - inline T get_y(void) const { - CRNLIB_ASSUME(N >= 2); - return m_s[1]; - } - inline T get_z(void) const { - CRNLIB_ASSUME(N >= 3); - return m_s[2]; - } - inline T get_w(void) const { - CRNLIB_ASSUME(N >= 4); - return m_s[3]; - } - - inline vec& set_x(T v) { - m_s[0] = v; - return *this; - } - inline vec& set_y(T v) { - CRNLIB_ASSUME(N >= 2); - m_s[1] = v; - return *this; - } - inline vec& set_z(T v) { - CRNLIB_ASSUME(N >= 3); - m_s[2] = v; - return *this; - } - inline vec& set_w(T v) { - CRNLIB_ASSUME(N >= 4); - m_s[3] = v; - return *this; - } - - inline vec as_point() const { - vec result(*this); - result[N - 1] = 1; - return result; - } - - inline vec as_dir() const { - vec result(*this); - result[N - 1] = 0; - return result; - } - - inline vec<2, T> select2(uint i, uint j) const { - CRNLIB_ASSERT((i < N) && (j < N)); - return vec<2, T>(m_s[i], m_s[j]); - } - - inline vec<3, T> select3(uint i, uint j, uint k) const { - CRNLIB_ASSERT((i < N) && (j < N) && (k < N)); - return vec<3, T>(m_s[i], m_s[j], m_s[k]); - } - - inline vec<4, T> select4(uint i, uint j, uint k, uint l) const { - CRNLIB_ASSERT((i < N) && (j < N) && (k < N) && (l < N)); - return vec<4, T>(m_s[i], m_s[j], m_s[k], m_s[l]); - } - - inline bool is_dir() const { return m_s[N - 1] == 0; } - inline bool is_vector() const { return is_dir(); } - inline bool is_point() const { return m_s[N - 1] == 1; } - - inline vec project() const { - vec result(*this); - if (result[N - 1]) - result /= result[N - 1]; - return result; - } - - inline vec broadcast(unsigned i) const { - return vec((*this)[i]); - } - - inline vec swizzle(uint i, uint j) const { - return vec((*this)[i], (*this)[j]); - } - - inline vec swizzle(uint i, uint j, uint k) const { - return vec((*this)[i], (*this)[j], (*this)[k]); - } - - inline vec swizzle(uint i, uint j, uint k, uint l) const { - return vec((*this)[i], (*this)[j], (*this)[k], (*this)[l]); - } - - inline vec operator-() const { - vec result; - for (uint i = 0; i < N; i++) - result.m_s[i] = -m_s[i]; - return result; - } - - inline vec operator+() const { - return *this; - } - - inline vec& operator+=(const vec& other) { - for (uint i = 0; i < N; i++) - m_s[i] += other.m_s[i]; - return *this; - } - - inline vec& operator-=(const vec& other) { - for (uint i = 0; i < N; i++) - m_s[i] -= other.m_s[i]; - return *this; - } - - inline vec& operator*=(const vec& other) { - for (uint i = 0; i < N; i++) - m_s[i] *= other.m_s[i]; - return *this; - } - - inline vec& operator/=(const vec& other) { - for (uint i = 0; i < N; i++) - m_s[i] /= other.m_s[i]; - return *this; - } - - inline vec& operator*=(T s) { - for (uint i = 0; i < N; i++) - m_s[i] *= s; - return *this; - } - - inline vec& operator/=(T s) { - for (uint i = 0; i < N; i++) - m_s[i] /= s; - return *this; - } - - friend inline T operator*(const vec& lhs, const vec& rhs) { - T result = lhs.m_s[0] * rhs.m_s[0]; - for (uint i = 1; i < N; i++) - result += lhs.m_s[i] * rhs.m_s[i]; - return result; - } - - friend inline vec operator*(const vec& lhs, T val) { - vec result; - for (uint i = 0; i < N; i++) - result.m_s[i] = lhs.m_s[i] * val; - return result; - } - - friend inline vec operator*(T val, const vec& lhs) { - vec result; - for (uint i = 0; i < N; i++) - result.m_s[i] = lhs.m_s[i] * val; - return result; - } - - friend inline vec operator/(const vec& lhs, const vec& rhs) { - vec result; - for (uint i = 0; i < N; i++) - result.m_s[i] = lhs.m_s[i] / rhs.m_s[i]; - return result; - } - - friend inline vec operator/(const vec& lhs, T val) { - vec result; - for (uint i = 0; i < N; i++) - result.m_s[i] = lhs.m_s[i] / val; - return result; - } - - friend inline vec operator+(const vec& lhs, const vec& rhs) { - vec result; - for (uint i = 0; i < N; i++) - result.m_s[i] = lhs.m_s[i] + rhs.m_s[i]; - return result; - } - - friend inline vec operator-(const vec& lhs, const vec& rhs) { - vec result; - for (uint i = 0; i < N; i++) - result.m_s[i] = lhs.m_s[i] - rhs.m_s[i]; - return result; - } - - static inline vec<3, T> cross2(const vec& a, const vec& b) { - CRNLIB_ASSUME(N >= 2); - return vec<3, T>(0, 0, a[0] * b[1] - a[1] * b[0]); - } - - static inline vec<3, T> cross3(const vec& a, const vec& b) { - CRNLIB_ASSUME(N >= 3); - return vec<3, T>(a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]); - } - - static inline vec<3, T> cross(const vec& a, const vec& b) { - CRNLIB_ASSUME(N >= 2); - - if (N == 2) - return cross2(a, b); - else - return cross3(a, b); - } - - inline T dot(const vec& rhs) const { - return *this * rhs; - } - - inline T dot2(const vec& rhs) const { - CRNLIB_ASSUME(N >= 2); - return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1]; - } - - inline T dot3(const vec& rhs) const { - CRNLIB_ASSUME(N >= 3); - return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2]; - } - - inline T norm(void) const { - T sum = m_s[0] * m_s[0]; - for (uint i = 1; i < N; i++) - sum += m_s[i] * m_s[i]; - return sum; - } - - inline T length(void) const { - return sqrt(norm()); - } - - inline T squared_distance(const vec& rhs) const { - T dist2 = 0; - for (uint i = 0; i < N; i++) { - T d = m_s[i] - rhs.m_s[i]; - dist2 += d * d; - } - return dist2; - } - - inline T squared_distance(const vec& rhs, T early_out) const { - T dist2 = 0; - for (uint i = 0; i < N; i++) { - T d = m_s[i] - rhs.m_s[i]; - dist2 += d * d; - if (dist2 > early_out) - break; - } - return dist2; - } - - inline T distance(const vec& rhs) const { - T dist2 = 0; - for (uint i = 0; i < N; i++) { - T d = m_s[i] - rhs.m_s[i]; - dist2 += d * d; - } - return sqrt(dist2); - } - - inline vec inverse() const { - vec result; - for (uint i = 0; i < N; i++) - result[i] = m_s[i] ? (1.0f / m_s[i]) : 0; - return result; - } - - inline double normalize(const vec* pDefaultVec = NULL) { - double n = m_s[0] * m_s[0]; - for (uint i = 1; i < N; i++) - n += m_s[i] * m_s[i]; - - if (n != 0) - *this *= static_cast((1.0f / sqrt(n))); - else if (pDefaultVec) - *this = *pDefaultVec; - return n; - } - - inline double normalize3(const vec* pDefaultVec = NULL) { - CRNLIB_ASSUME(N >= 3); - - double n = m_s[0] * m_s[0] + m_s[1] * m_s[1] + m_s[2] * m_s[2]; - - if (n != 0) - *this *= static_cast((1.0f / sqrt(n))); - else if (pDefaultVec) - *this = *pDefaultVec; - return n; - } - - inline vec& normalize_in_place(const vec* pDefaultVec = NULL) { - normalize(pDefaultVec); - return *this; - } - - inline vec& normalize3_in_place(const vec* pDefaultVec = NULL) { - normalize3(pDefaultVec); - return *this; - } - - inline vec get_normalized(const vec* pDefaultVec = NULL) const { - vec result(*this); - result.normalize(pDefaultVec); - return result; - } - - inline vec get_normalized3(const vec* pDefaultVec = NULL) const { - vec result(*this); - result.normalize3(pDefaultVec); - return result; - } - - inline vec& clamp(T l, T h) { - for (uint i = 0; i < N; i++) - m_s[i] = static_cast(math::clamp(m_s[i], l, h)); - return *this; - } - - inline vec& clamp(const vec& l, const vec& h) { - for (uint i = 0; i < N; i++) - m_s[i] = static_cast(math::clamp(m_s[i], l[i], h[i])); - return *this; - } - - inline bool is_within_bounds(const vec& l, const vec& h) const { - for (uint i = 0; i < N; i++) - if ((m_s[i] < l[i]) || (m_s[i] > h[i])) - return false; - - return true; - } - - inline bool is_within_bounds(T l, T h) const { - for (uint i = 0; i < N; i++) - if ((m_s[i] < l) || (m_s[i] > h)) - return false; - - return true; - } - - inline uint get_major_axis(void) const { - T m = fabs(m_s[0]); - uint r = 0; - for (uint i = 1; i < N; i++) { - const T c = fabs(m_s[i]); - if (c > m) { - m = c; - r = i; - } - } - return r; - } - - inline uint get_minor_axis(void) const { - T m = fabs(m_s[0]); - uint r = 0; - for (uint i = 1; i < N; i++) { - const T c = fabs(m_s[i]); - if (c < m) { - m = c; - r = i; - } - } - return r; - } - - inline T get_absolute_minimum(void) const { - T result = fabs(m_s[0]); - for (uint i = 1; i < N; i++) - result = math::minimum(result, fabs(m_s[i])); - return result; - } - - inline T get_absolute_maximum(void) const { - T result = fabs(m_s[0]); - for (uint i = 1; i < N; i++) - result = math::maximum(result, fabs(m_s[i])); - return result; - } - - inline T get_minimum(void) const { - T result = m_s[0]; - for (uint i = 1; i < N; i++) - result = math::minimum(result, m_s[i]); - return result; - } - - inline T get_maximum(void) const { - T result = m_s[0]; - for (uint i = 1; i < N; i++) - result = math::maximum(result, m_s[i]); - return result; - } - - inline vec& remove_unit_direction(const vec& dir) { - T p = *this * dir; - *this -= (p * dir); - return *this; - } - - inline bool all_less(const vec& b) const { - for (uint i = 0; i < N; i++) - if (m_s[i] >= b.m_s[i]) - return false; - return true; - } - - inline bool all_less_equal(const vec& b) const { - for (uint i = 0; i < N; i++) - if (m_s[i] > b.m_s[i]) - return false; - return true; - } - - inline bool all_greater(const vec& b) const { - for (uint i = 0; i < N; i++) - if (m_s[i] <= b.m_s[i]) - return false; - return true; - } - - inline bool all_greater_equal(const vec& b) const { - for (uint i = 0; i < N; i++) - if (m_s[i] < b.m_s[i]) - return false; - return true; - } - - inline vec get_negate_xyz() const { - vec ret; - - ret[0] = -m_s[0]; - if (N >= 2) - ret[1] = -m_s[1]; - if (N >= 3) - ret[2] = -m_s[2]; - - for (uint i = 3; i < N; i++) - ret[i] = m_s[i]; - - return ret; - } - - inline vec& invert() { - for (uint i = 0; i < N; i++) - if (m_s[i] != 0.0f) - m_s[i] = 1.0f / m_s[i]; - return *this; - } - - static inline vec mul_components(const vec& lhs, const vec& rhs) { - vec result; - for (uint i = 0; i < N; i++) - result[i] = lhs.m_s[i] * rhs.m_s[i]; - return result; - } - - static inline vec make_axis(uint i) { - vec result; - result.clear(); - result[i] = 1; - return result; - } - - static inline vec component_max(const vec& a, const vec& b) { - vec ret; - for (uint i = 0; i < N; i++) - ret.m_s[i] = math::maximum(a.m_s[i], b.m_s[i]); - return ret; - } - - static inline vec component_min(const vec& a, const vec& b) { - vec ret; - for (uint i = 0; i < N; i++) - ret.m_s[i] = math::minimum(a.m_s[i], b.m_s[i]); - return ret; - } - - static inline vec lerp(const vec& a, const vec& b, float t) { - vec ret; - for (uint i = 0; i < N; i++) - ret.m_s[i] = a.m_s[i] + (b.m_s[i] - a.m_s[i]) * t; - return ret; - } - - static inline vec make_random(random& r, float l, float h) { - vec result; - for (uint i = 0; i < N; i++) - result[i] = r.frand(l, h); - return result; - } - - static inline vec make_random(fast_random& r, float l, float h) { - vec result; - for (uint i = 0; i < N; i++) - result[i] = r.frand(l, h); - return result; - } - - static inline vec make_random(random& r, const vec& l, const vec& h) { - vec result; - for (uint i = 0; i < N; i++) - result[i] = r.frand(l[i], h[i]); - return result; - } - - static inline vec make_random(fast_random& r, const vec& l, const vec& h) { - vec result; - for (uint i = 0; i < N; i++) - result[i] = r.frand(l[i], h[i]); - return result; - } - - private: - T m_s[N]; -}; - -typedef vec<1, double> vec1D; -typedef vec<2, double> vec2D; -typedef vec<3, double> vec3D; -typedef vec<4, double> vec4D; - -typedef vec<1, float> vec1F; - -typedef vec<2, float> vec2F; -typedef crnlib::vector vec2F_array; - -typedef vec<3, float> vec3F; -typedef crnlib::vector vec3F_array; - -typedef vec<4, float> vec4F; -typedef crnlib::vector vec4F_array; - -typedef vec<2, int> vec2I; -typedef vec<3, int> vec3I; - -typedef vec<2, int16> vec2I16; -typedef vec<3, int16> vec3I16; - -template -struct scalar_type > { - enum { cFlag = true }; - static inline void construct(vec* p) {} - static inline void construct(vec* p, const vec& init) { memcpy(p, &init, sizeof(vec)); } - static inline void construct_array(vec*, uint) {} - static inline void destruct(vec*) {} - static inline void destruct_array(vec*, uint) {} -}; +namespace crnlib +{ + template + class CRN_EXPORT vec : public helpers::rel_ops> + { + public: + typedef T scalar_type; + enum { num_elements = N }; + + inline vec() + { + } + + inline vec(eClear) + { + clear(); + } + + inline vec(const vec& other) + { + for (uint i = 0; i < N; i++) + { + m_s[i] = other.m_s[i]; + } + } + + template + inline vec(const vec& other) + { + set(other); + } + + template + inline vec(const vec& other, T w) + { + *this = other; + m_s[N - 1] = w; + } + + explicit inline vec(T val) + { + set(val); + } + + inline vec(T val0, T val1) + { + set(val0, val1); + } + + inline vec(T val0, T val1, T val2) + { + set(val0, val1, val2); + } + + inline vec(T val0, T val1, T val2, T val3) + { + set(val0, val1, val2, val3); + } + + inline void clear() + { + if (N > 4) + { + memset(m_s, 0, sizeof(m_s)); + } + else + { + for (uint i = 0; i < N; i++) + { + m_s[i] = 0; + } + } + } + + template + inline vec& set(const vec& other) + { + if ((void*)this == (void*)&other) + { + return *this; + } + const uint m = math::minimum(N, ON); + uint i; + for (i = 0; i < m; i++) + { + m_s[i] = static_cast(other[i]); + } + for (; i < N; i++) + { + m_s[i] = 0; + } + return *this; + } + + inline vec& set_component(uint index, T val) + { + CRNLIB_ASSERT(index < N); + m_s[index] = val; + return *this; + } + + inline vec& set(T val) + { + for (uint i = 0; i < N; i++) + { + m_s[i] = val; + } + return *this; + } + + inline vec& set(T val0, T val1) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + for (uint i = 2; i < N; i++) + { + m_s[i] = 0; + } + } + return *this; + } + + inline vec& set(T val0, T val1, T val2) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + if (N >= 3) + { + m_s[2] = val2; + + for (uint i = 3; i < N; i++) + { + m_s[i] = 0; + } + } + } + return *this; + } + + inline vec& set(T val0, T val1, T val2, T val3) { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + if (N >= 3) + { + m_s[2] = val2; + + if (N >= 4) + { + m_s[3] = val3; + + for (uint i = 4; i < N; i++) + { + m_s[i] = 0; + } + } + } + } + return *this; + } + + inline vec& set(const T* pValues) + { + for (uint i = 0; i < N; i++) + { + m_s[i] = pValues[i]; + } + return *this; + } + + template + inline vec& swizzle_set(const vec& other, uint i) + { + return set(static_cast(other[i])); + } + + template + inline vec& swizzle_set(const vec& other, uint i, uint j) + { + return set(static_cast(other[i]), static_cast(other[j])); + } + + template + inline vec& swizzle_set(const vec& other, uint i, uint j, uint k) + { + return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k])); + } + + template + inline vec& swizzle_set(const vec& other, uint i, uint j, uint k, uint l) + { + return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k]), static_cast(other[l])); + } + + inline vec& operator=(const vec& rhs) + { + if (this != &rhs) + { + for (uint i = 0; i < N; i++) + { + m_s[i] = rhs.m_s[i]; + } + } + return *this; + } + + template + inline vec& operator=(const vec& other) + { + if ((void*)this == (void*)&other) + { + return *this; + } + + uint s = math::minimum(N, O); + + uint i; + for (i = 0; i < s; i++) + { + m_s[i] = static_cast(other[i]); + } + + for (; i < N; i++) + { + m_s[i] = 0; + } + + return *this; + } + + inline bool operator==(const vec& rhs) const + { + for (uint i = 0; i < N; i++) + { + if (!(m_s[i] == rhs.m_s[i])) + { + return false; + } + } + return true; + } + + inline bool operator<(const vec& rhs) const + { + for (uint i = 0; i < N; i++) + { + if (m_s[i] < rhs.m_s[i]) + { + return true; + } + else if (!(m_s[i] == rhs.m_s[i])) + { + return false; + } + } + + return false; + } + + inline T operator[](uint i) const + { + CRNLIB_ASSERT(i < N); + return m_s[i]; + } + + inline T& operator[](uint i) + { + CRNLIB_ASSERT(i < N); + return m_s[i]; + } + + inline operator size_t() const + { + return static_cast(fast_hash(this, sizeof(*this))); + } + + inline T get_x(void) const + { + return m_s[0]; + } + inline T get_y(void) const + { + CRNLIB_ASSUME(N >= 2); + return m_s[1]; + } + inline T get_z(void) const + { + CRNLIB_ASSUME(N >= 3); + return m_s[2]; + } + inline T get_w(void) const + { + CRNLIB_ASSUME(N >= 4); + return m_s[3]; + } + + inline vec& set_x(T v) + { + m_s[0] = v; + return *this; + } + inline vec& set_y(T v) + { + CRNLIB_ASSUME(N >= 2); + m_s[1] = v; + return *this; + } + inline vec& set_z(T v) + { + CRNLIB_ASSUME(N >= 3); + m_s[2] = v; + return *this; + } + inline vec& set_w(T v) + { + CRNLIB_ASSUME(N >= 4); + m_s[3] = v; + return *this; + } + + inline vec as_point() const + { + vec result(*this); + result[N - 1] = 1; + return result; + } + + inline vec as_dir() const + { + vec result(*this); + result[N - 1] = 0; + return result; + } + + inline vec<2, T> select2(uint i, uint j) const + { + CRNLIB_ASSERT((i < N) && (j < N)); + return vec<2, T>(m_s[i], m_s[j]); + } + + inline vec<3, T> select3(uint i, uint j, uint k) const + { + CRNLIB_ASSERT((i < N) && (j < N) && (k < N)); + return vec<3, T>(m_s[i], m_s[j], m_s[k]); + } + + inline vec<4, T> select4(uint i, uint j, uint k, uint l) const + { + CRNLIB_ASSERT((i < N) && (j < N) && (k < N) && (l < N)); + return vec<4, T>(m_s[i], m_s[j], m_s[k], m_s[l]); + } + + inline bool is_dir() const + { + return m_s[N - 1] == 0; + } + inline bool is_vector() const + { + return is_dir(); + } + inline bool is_point() const + { + return m_s[N - 1] == 1; + } + + inline vec project() const + { + vec result(*this); + if (result[N - 1]) + { + result /= result[N - 1]; + } + return result; + } + + inline vec broadcast(unsigned i) const + { + return vec((*this)[i]); + } + + inline vec swizzle(uint i, uint j) const + { + return vec((*this)[i], (*this)[j]); + } + + inline vec swizzle(uint i, uint j, uint k) const { + return vec((*this)[i], (*this)[j], (*this)[k]); + } + + inline vec swizzle(uint i, uint j, uint k, uint l) const + { + return vec((*this)[i], (*this)[j], (*this)[k], (*this)[l]); + } + + inline vec operator-() const + { + vec result; + for (uint i = 0; i < N; i++) + { + result.m_s[i] = -m_s[i]; + } + return result; + } + + inline vec operator+() const + { + return *this; + } + + inline vec& operator+=(const vec& other) + { + for (uint i = 0; i < N; i++) + { + m_s[i] += other.m_s[i]; + } + return *this; + } + + inline vec& operator-=(const vec& other) + { + for (uint i = 0; i < N; i++) + { + m_s[i] -= other.m_s[i]; + } + return *this; + } + + inline vec& operator*=(const vec& other) + { + for (uint i = 0; i < N; i++) + { + m_s[i] *= other.m_s[i]; + } + return *this; + } + + inline vec& operator/=(const vec& other) + { + for (uint i = 0; i < N; i++) + { + m_s[i] /= other.m_s[i]; + } + return *this; + } + + inline vec& operator*=(T s) + { + for (uint i = 0; i < N; i++) + { + m_s[i] *= s; + } + return *this; + } + + inline vec& operator/=(T s) + { + for (uint i = 0; i < N; i++) + { + m_s[i] /= s; + } + return *this; + } + + friend inline T operator*(const vec& lhs, const vec& rhs) + { + T result = lhs.m_s[0] * rhs.m_s[0]; + for (uint i = 1; i < N; i++) + { + result += lhs.m_s[i] * rhs.m_s[i]; + } + return result; + } + + friend inline vec operator*(const vec& lhs, T val) + { + vec result; + for (uint i = 0; i < N; i++) + { + result.m_s[i] = lhs.m_s[i] * val; + } + return result; + } + + friend inline vec operator*(T val, const vec& lhs) + { + vec result; + for (uint i = 0; i < N; i++) + { + result.m_s[i] = lhs.m_s[i] * val; + } + return result; + } + + friend inline vec operator/(const vec& lhs, const vec& rhs) + { + vec result; + for (uint i = 0; i < N; i++) + { + result.m_s[i] = lhs.m_s[i] / rhs.m_s[i]; + } + return result; + } + + friend inline vec operator/(const vec& lhs, T val) + { + vec result; + for (uint i = 0; i < N; i++) + { + result.m_s[i] = lhs.m_s[i] / val; + } + return result; + } + + friend inline vec operator+(const vec& lhs, const vec& rhs) + { + vec result; + for (uint i = 0; i < N; i++) + { + result.m_s[i] = lhs.m_s[i] + rhs.m_s[i]; + } + return result; + } + + friend inline vec operator-(const vec& lhs, const vec& rhs) + { + vec result; + for (uint i = 0; i < N; i++) + { + result.m_s[i] = lhs.m_s[i] - rhs.m_s[i]; + } + return result; + } + + static inline vec<3, T> cross2(const vec& a, const vec& b) + { + CRNLIB_ASSUME(N >= 2); + return vec<3, T>(0, 0, a[0] * b[1] - a[1] * b[0]); + } + + static inline vec<3, T> cross3(const vec& a, const vec& b) + { + CRNLIB_ASSUME(N >= 3); + return vec<3, T>(a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]); + } + + static inline vec<3, T> cross(const vec& a, const vec& b) + { + CRNLIB_ASSUME(N >= 2); + + if (N == 2) + { + return cross2(a, b); + } + else + { + return cross3(a, b); + } + } + + inline T dot(const vec& rhs) const + { + return *this * rhs; + } + + inline T dot2(const vec& rhs) const + { + CRNLIB_ASSUME(N >= 2); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1]; + } + + inline T dot3(const vec& rhs) const + { + CRNLIB_ASSUME(N >= 3); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2]; + } + + inline T norm(void) const + { + T sum = m_s[0] * m_s[0]; + for (uint i = 1; i < N; i++) + { + sum += m_s[i] * m_s[i]; + } + return sum; + } + + inline T length(void) const + { + return sqrt(norm()); + } + + inline T squared_distance(const vec& rhs) const + { + T dist2 = 0; + for (uint i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + } + return dist2; + } + + inline T squared_distance(const vec& rhs, T early_out) const { + T dist2 = 0; + for (uint i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + if (dist2 > early_out) + { + break; + } + } + return dist2; + } + + inline T distance(const vec& rhs) const + { + T dist2 = 0; + for (uint i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + } + return sqrt(dist2); + } + + inline vec inverse() const + { + vec result; + for (uint i = 0; i < N; i++) + { + result[i] = m_s[i] ? (1.0f / m_s[i]) : 0; + } + return result; + } + + inline double normalize(const vec* pDefaultVec = NULL) { + double n = m_s[0] * m_s[0]; + for (uint i = 1; i < N; i++) + { + n += m_s[i] * m_s[i]; + } + + if (n != 0) + { + *this *= static_cast((1.0f / sqrt(n))); + } + else if (pDefaultVec) + { + *this = *pDefaultVec; + } + return n; + } + + inline double normalize3(const vec* pDefaultVec = NULL) + { + CRNLIB_ASSUME(N >= 3); + + double n = m_s[0] * m_s[0] + m_s[1] * m_s[1] + m_s[2] * m_s[2]; + + if (n != 0) + { + *this *= static_cast((1.0f / sqrt(n))); + } + else if (pDefaultVec) + { + *this = *pDefaultVec; + } + return n; + } + + inline vec& normalize_in_place(const vec* pDefaultVec = NULL) + { + normalize(pDefaultVec); + return *this; + } + + inline vec& normalize3_in_place(const vec* pDefaultVec = NULL) + { + normalize3(pDefaultVec); + return *this; + } + + inline vec get_normalized(const vec* pDefaultVec = NULL) const + { + vec result(*this); + result.normalize(pDefaultVec); + return result; + } + + inline vec get_normalized3(const vec* pDefaultVec = NULL) const + { + vec result(*this); + result.normalize3(pDefaultVec); + return result; + } + + inline vec& clamp(T l, T h) + { + for (uint i = 0; i < N; i++) + { + m_s[i] = static_cast(math::clamp(m_s[i], l, h)); + } + return *this; + } + + inline vec& clamp(const vec& l, const vec& h) + { + for (uint i = 0; i < N; i++) + { + m_s[i] = static_cast(math::clamp(m_s[i], l[i], h[i])); + } + return *this; + } + + inline bool is_within_bounds(const vec& l, const vec& h) const + { + for (uint i = 0; i < N; i++) + { + if ((m_s[i] < l[i]) || (m_s[i] > h[i])) + { + return false; + } + } + return true; + } + + inline bool is_within_bounds(T l, T h) const + { + for (uint i = 0; i < N; i++) + { + if ((m_s[i] < l) || (m_s[i] > h)) + { + return false; + } + } + return true; + } + + inline uint get_major_axis(void) const { + T m = fabs(m_s[0]); + uint r = 0; + for (uint i = 1; i < N; i++) + { + const T c = fabs(m_s[i]); + if (c > m) + { + m = c; + r = i; + } + } + return r; + } + + inline uint get_minor_axis(void) const { + T m = fabs(m_s[0]); + uint r = 0; + for (uint i = 1; i < N; i++) + { + const T c = fabs(m_s[i]); + if (c < m) + { + m = c; + r = i; + } + } + return r; + } + + inline T get_absolute_minimum(void) const + { + T result = fabs(m_s[0]); + for (uint i = 1; i < N; i++) + { + result = math::minimum(result, fabs(m_s[i])); + } + return result; + } + + inline T get_absolute_maximum(void) const + { + T result = fabs(m_s[0]); + for (uint i = 1; i < N; i++) + { + result = math::maximum(result, fabs(m_s[i])); + } + return result; + } + + inline T get_minimum(void) const + { + T result = m_s[0]; + for (uint i = 1; i < N; i++) + { + result = math::minimum(result, m_s[i]); + } + return result; + } + + inline T get_maximum(void) const + { + T result = m_s[0]; + for (uint i = 1; i < N; i++) + { + result = math::maximum(result, m_s[i]); + } + return result; + } + + inline vec& remove_unit_direction(const vec& dir) + { + T p = *this * dir; + *this -= (p * dir); + return *this; + } + + inline bool all_less(const vec& b) const + { + for (uint i = 0; i < N; i++) + { + if (m_s[i] >= b.m_s[i]) + { + return false; + } + } + return true; + } + + inline bool all_less_equal(const vec& b) const + { + for (uint i = 0; i < N; i++) + { + if (m_s[i] > b.m_s[i]) + { + return false; + } + } + return true; + } + + inline bool all_greater(const vec& b) const + { + for (uint i = 0; i < N; i++) + { + if (m_s[i] <= b.m_s[i]) + { + return false; + } + } + return true; + } + + inline bool all_greater_equal(const vec& b) const + { + for (uint i = 0; i < N; i++) + { + if (m_s[i] < b.m_s[i]) + { + return false; + } + } + return true; + } + + inline vec get_negate_xyz() const + { + vec ret; + + ret[0] = -m_s[0]; + if (N >= 2) + { + ret[1] = -m_s[1]; + } + if (N >= 3) + { + ret[2] = -m_s[2]; + } + for (uint i = 3; i < N; i++) + { + ret[i] = m_s[i]; + } + return ret; + } + + inline vec& invert() + { + for (uint i = 0; i < N; i++) + { + if (m_s[i] != 0.0f) + { + m_s[i] = 1.0f / m_s[i]; + } + } + return *this; + } + + static inline vec mul_components(const vec& lhs, const vec& rhs) + { + vec result; + for (uint i = 0; i < N; i++) + { + result[i] = lhs.m_s[i] * rhs.m_s[i]; + } + return result; + } + + static inline vec make_axis(uint i) + { + vec result; + result.clear(); + result[i] = 1; + return result; + } + + static inline vec component_max(const vec& a, const vec& b) + { + vec ret; + for (uint i = 0; i < N; i++) + { + ret.m_s[i] = math::maximum(a.m_s[i], b.m_s[i]); + } + return ret; + } + + static inline vec component_min(const vec& a, const vec& b) + { + vec ret; + for (uint i = 0; i < N; i++) + { + ret.m_s[i] = math::minimum(a.m_s[i], b.m_s[i]); + } + return ret; + } + + static inline vec lerp(const vec& a, const vec& b, float t) + { + vec ret; + for (uint i = 0; i < N; i++) + { + ret.m_s[i] = a.m_s[i] + (b.m_s[i] - a.m_s[i]) * t; + } + return ret; + } + + static inline vec make_random(random& r, float l, float h) + { + vec result; + for (uint i = 0; i < N; i++) + { + result[i] = r.frand(l, h); + } + return result; + } + + static inline vec make_random(fast_random& r, float l, float h) + { + vec result; + for (uint i = 0; i < N; i++) + { + result[i] = r.frand(l, h); + } + return result; + } + + static inline vec make_random(random& r, const vec& l, const vec& h) + { + vec result; + for (uint i = 0; i < N; i++) + { + result[i] = r.frand(l[i], h[i]); + } + return result; + } + + static inline vec make_random(fast_random& r, const vec& l, const vec& h) + { + vec result; + for (uint i = 0; i < N; i++) + { + result[i] = r.frand(l[i], h[i]); + } + return result; + } + + private: + T m_s[N]; + }; + + typedef vec<1, double> vec1D; + typedef vec<2, double> vec2D; + typedef vec<3, double> vec3D; + typedef vec<4, double> vec4D; + + typedef vec<1, float> vec1F; + + typedef vec<2, float> vec2F; + typedef crnlib::vector vec2F_array; + + typedef vec<3, float> vec3F; + typedef crnlib::vector vec3F_array; + + typedef vec<4, float> vec4F; + typedef crnlib::vector vec4F_array; + + typedef vec<2, int> vec2I; + typedef vec<3, int> vec3I; + + typedef vec<2, int16> vec2I16; + typedef vec<3, int16> vec3I16; + + template + struct scalar_type> + { + enum { cFlag = true }; + static inline void construct(vec* p) + { + } + + static inline void construct(vec* p, const vec& init) + { + memcpy(p, &init, sizeof(vec)); + } + + static inline void construct_array(vec*, uint) + { + } + + static inline void destruct(vec*) + { + } + + static inline void destruct_array(vec*, uint) + { + } + }; } // namespace crnlib diff --git a/crnlib/crn_vec_interval.h b/crnlib/crn_vec_interval.h index 75bd63b..7d05aba 100644 --- a/crnlib/crn_vec_interval.h +++ b/crnlib/crn_vec_interval.h @@ -1,48 +1,60 @@ // File: crn_vec_interval.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once -#include "crn_vec.h" -namespace crnlib { -template -class vec_interval { - public: - enum { N = T::num_elements }; - typedef typename T::scalar_type scalar_type; - - inline vec_interval(const T& v) { - m_bounds[0] = v; - m_bounds[1] = v; - } - inline vec_interval(const T& low, const T& high) { - m_bounds[0] = low; - m_bounds[1] = high; - } - - inline void clear() { - m_bounds[0].clear(); - m_bounds[1].clear(); - } - - inline const T& operator[](uint i) const { - CRNLIB_ASSERT(i < 2); - return m_bounds[i]; - } - inline T& operator[](uint i) { - CRNLIB_ASSERT(i < 2); - return m_bounds[i]; - } - - private: - T m_bounds[2]; -}; - -typedef vec_interval vec_interval1F; -typedef vec_interval vec_interval2F; -typedef vec_interval vec_interval3F; -typedef vec_interval vec_interval4F; - -typedef vec_interval2F aabb2F; -typedef vec_interval3F aabb3F; +#include "crn_vec.h" +#include "crn_export.h" + +namespace crnlib +{ + template + class CRN_EXPORT vec_interval + { + public: + enum { N = T::num_elements }; + typedef typename T::scalar_type scalar_type; + + inline vec_interval(const T& v) + { + m_bounds[0] = v; + m_bounds[1] = v; + } + + inline vec_interval(const T& low, const T& high) + { + m_bounds[0] = low; + m_bounds[1] = high; + } + + inline void clear() + { + m_bounds[0].clear(); + m_bounds[1].clear(); + } + + inline const T& operator[](uint i) const + { + CRNLIB_ASSERT(i < 2); + return m_bounds[i]; + } + + inline T& operator[](uint i) + { + CRNLIB_ASSERT(i < 2); + return m_bounds[i]; + } + + private: + T m_bounds[2]; + }; + + typedef vec_interval vec_interval1F; + typedef vec_interval vec_interval2F; + typedef vec_interval vec_interval3F; + typedef vec_interval vec_interval4F; + + typedef vec_interval2F aabb2F; + typedef vec_interval3F aabb3F; } // namespace crnlib diff --git a/crnlib/crnlib.cpp b/crnlib/crnlib.cpp index b4c3d56..e471a29 100644 --- a/crnlib/crnlib.cpp +++ b/crnlib/crnlib.cpp @@ -1,7 +1,7 @@ // File: crnlib.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" -#include "../inc/crnlib.h" +#include "crnlib.h" #include "crn_comp.h" #include "crn_dds_comp.h" #include "crn_dynamic_stream.h" From 4ae511b21f5374733c53f2558b4aab8709970523 Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Sun, 6 Sep 2020 23:46:37 -0400 Subject: [PATCH 08/18] Format some files --- crnlib/crn_color.h | 2050 ++++++++++++++++++++--------------- crnlib/crn_file_utils.cpp | 1018 +++++++++-------- crnlib/crn_file_utils.h | 73 +- crnlib/crn_find_files.cpp | 506 +++++---- crnlib/crn_find_files.h | 123 ++- crnlib/crn_hash.cpp | 96 +- crnlib/crn_hash.h | 51 +- crnlib/crn_lzma_codec.cpp | 247 +++-- crnlib/crn_lzma_codec.h | 80 +- crnlib/crn_threading_null.h | 364 ++++--- inc/crnlib.h | 688 ++++++------ inc/dds_defs.h | 279 ++--- 12 files changed, 3120 insertions(+), 2455 deletions(-) diff --git a/crnlib/crn_color.h b/crnlib/crn_color.h index ed76006..2536424 100644 --- a/crnlib/crn_color.h +++ b/crnlib/crn_color.h @@ -1,900 +1,1178 @@ // File: crn_color.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once + #include "crn_core.h" -namespace crnlib { -template -struct color_quad_component_traits { - enum { - cSigned = false, - cFloat = false, - cMin = cUINT8_MIN, - cMax = cUINT8_MAX - }; -}; - -template <> -struct color_quad_component_traits { - enum { - cSigned = true, - cFloat = false, - cMin = cINT8_MIN, - cMax = cINT8_MAX - }; -}; - -template <> -struct color_quad_component_traits { - enum { - cSigned = true, - cFloat = false, - cMin = cINT16_MIN, - cMax = cINT16_MAX - }; -}; - -template <> -struct color_quad_component_traits { - enum { - cSigned = false, - cFloat = false, - cMin = cUINT16_MIN, - cMax = cUINT16_MAX - }; -}; - -template <> -struct color_quad_component_traits { - enum { - cSigned = true, - cFloat = false, - cMin = cINT32_MIN, - cMax = cINT32_MAX - }; -}; - -template <> -struct color_quad_component_traits { - enum { - cSigned = false, - cFloat = false, - cMin = cUINT32_MIN, - cMax = cUINT32_MAX - }; -}; - -template <> -struct color_quad_component_traits { - enum { - cSigned = false, - cFloat = true, - cMin = cINT32_MIN, - cMax = cINT32_MAX - }; -}; - -template <> -struct color_quad_component_traits { - enum { - cSigned = false, - cFloat = true, - cMin = cINT32_MIN, - cMax = cINT32_MAX - }; -}; - -template -class color_quad : public helpers::rel_ops > { - template - static inline parameter_type clamp(T v) { - parameter_type result = static_cast(v); - if (!component_traits::cFloat) { - if (v < component_traits::cMin) - result = static_cast(component_traits::cMin); - else if (v > component_traits::cMax) - result = static_cast(component_traits::cMax); - } - return result; - } - -#ifdef _MSC_VER - template <> - static inline parameter_type clamp(int v) { - if (!component_traits::cFloat) { - if ((!component_traits::cSigned) && (component_traits::cMin == 0) && (component_traits::cMax == 0xFF)) { - if (v & 0xFFFFFF00U) - v = (~(static_cast(v) >> 31)) & 0xFF; - } else { - if (v < component_traits::cMin) - v = component_traits::cMin; - else if (v > component_traits::cMax) - v = component_traits::cMax; - } - } - return static_cast(v); - } +namespace crnlib +{ + template + struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = false, + cMin = cUINT8_MIN, + cMax = cUINT8_MAX + }; + }; + + template <> + struct color_quad_component_traits + { + enum + { + cSigned = true, + cFloat = false, + cMin = cINT8_MIN, + cMax = cINT8_MAX + }; + }; + + template <> + struct color_quad_component_traits + { + enum + { + cSigned = true, + cFloat = false, + cMin = cINT16_MIN, + cMax = cINT16_MAX + }; + }; + + template <> + struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = false, + cMin = cUINT16_MIN, + cMax = cUINT16_MAX + }; + }; + + template <> + struct color_quad_component_traits + { + enum + { + cSigned = true, + cFloat = false, + cMin = cINT32_MIN, + cMax = cINT32_MAX + }; + }; + + template <> + struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = false, + cMin = cUINT32_MIN, + cMax = cUINT32_MAX + }; + }; + + template <> + struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = true, + cMin = cINT32_MIN, + cMax = cINT32_MAX + }; + }; + + template <> + struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = true, + cMin = cINT32_MIN, + cMax = cINT32_MAX + }; + }; + + template + class color_quad : public helpers::rel_ops> + { + template + static inline parameter_type clamp(T v) + { + parameter_type result = static_cast(v); + if (!component_traits::cFloat) + { + if (v < component_traits::cMin) + { + result = static_cast(component_traits::cMin); + } + else if (v > component_traits::cMax) + { + result = static_cast(component_traits::cMax); + } + } + return result; + } + +#if defined(CRN_CC_MSVC) + template <> + static inline parameter_type clamp(int v) + { + if (!component_traits::cFloat) + { + if ((!component_traits::cSigned) && (component_traits::cMin == 0) && (component_traits::cMax == 0xFF)) + { + if (v & 0xFFFFFF00U) + { + v = (~(static_cast(v) >> 31)) & 0xFF; + } + } + else + { + if (v < component_traits::cMin) + { + v = component_traits::cMin; + } + else if (v > component_traits::cMax) + { + v = component_traits::cMax; + } + } + } + return static_cast(v); + } #endif - public: - typedef component_type component_t; - typedef parameter_type parameter_t; - typedef color_quad_component_traits component_traits; + public: + typedef component_type component_t; + typedef parameter_type parameter_t; + typedef color_quad_component_traits component_traits; + + enum { cNumComps = 4 }; + + union { + struct + { + component_type r; + component_type g; + component_type b; + component_type a; + }; + + component_type c[cNumComps]; + + uint32 m_u32; + }; + + inline color_quad() + { + } + + inline color_quad(eClear): + r(0), g(0), b(0), a(0) + { + } + + inline color_quad(const color_quad& other): + r(other.r), g(other.g), b(other.b), a(other.a) { + } + + explicit inline color_quad(parameter_type y, parameter_type alpha = component_traits::cMax) + { + set(y, alpha); + } + + inline color_quad(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) + { + set(red, green, blue, alpha); + } + + explicit inline color_quad(eNoClamp, parameter_type y, parameter_type alpha = component_traits::cMax) + { + set_noclamp_y_alpha(y, alpha); + } + + inline color_quad(eNoClamp, parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) + { + set_noclamp_rgba(red, green, blue, alpha); + } + + template + inline color_quad(const color_quad& other): + r(static_cast(clamp(other.r))), g(static_cast(clamp(other.g))), b(static_cast(clamp(other.b))), a(static_cast(clamp(other.a))) + { + } + + inline void clear() + { + r = 0; + g = 0; + b = 0; + a = 0; + } + + inline color_quad& operator=(const color_quad& other) + { + r = other.r; + g = other.g; + b = other.b; + a = other.a; + return *this; + } + + inline color_quad& set_rgb(const color_quad& other) + { + r = other.r; + g = other.g; + b = other.b; + return *this; + } + + template + inline color_quad& operator=(const color_quad& other) + { + r = static_cast(clamp(other.r)); + g = static_cast(clamp(other.g)); + b = static_cast(clamp(other.b)); + a = static_cast(clamp(other.a)); + return *this; + } + + inline color_quad& operator=(parameter_type y) + { + set(y, component_traits::cMax); + return *this; + } + + inline color_quad& set(parameter_type y, parameter_type alpha = component_traits::cMax) { + y = clamp(y); + alpha = clamp(alpha); + r = static_cast(y); + g = static_cast(y); + b = static_cast(y); + a = static_cast(alpha); + return *this; + } + + inline color_quad& set_noclamp_y_alpha(parameter_type y, parameter_type alpha = component_traits::cMax) + { + CRNLIB_ASSERT((y >= component_traits::cMin) && (y <= component_traits::cMax)); + CRNLIB_ASSERT((alpha >= component_traits::cMin) && (alpha <= component_traits::cMax)); + + r = static_cast(y); + g = static_cast(y); + b = static_cast(y); + a = static_cast(alpha); + return *this; + } + + inline color_quad& set(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) + { + r = static_cast(clamp(red)); + g = static_cast(clamp(green)); + b = static_cast(clamp(blue)); + a = static_cast(clamp(alpha)); + return *this; + } + + inline color_quad& set_noclamp_rgba(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha) + { + CRNLIB_ASSERT((red >= component_traits::cMin) && (red <= component_traits::cMax)); + CRNLIB_ASSERT((green >= component_traits::cMin) && (green <= component_traits::cMax)); + CRNLIB_ASSERT((blue >= component_traits::cMin) && (blue <= component_traits::cMax)); + CRNLIB_ASSERT((alpha >= component_traits::cMin) && (alpha <= component_traits::cMax)); + + r = static_cast(red); + g = static_cast(green); + b = static_cast(blue); + a = static_cast(alpha); + return *this; + } + + inline color_quad& set_noclamp_rgb(parameter_type red, parameter_type green, parameter_type blue) + { + CRNLIB_ASSERT((red >= component_traits::cMin) && (red <= component_traits::cMax)); + CRNLIB_ASSERT((green >= component_traits::cMin) && (green <= component_traits::cMax)); + CRNLIB_ASSERT((blue >= component_traits::cMin) && (blue <= component_traits::cMax)); + + r = static_cast(red); + g = static_cast(green); + b = static_cast(blue); + return *this; + } + + static inline parameter_type get_min_comp() + { + return component_traits::cMin; + } + static inline parameter_type get_max_comp() + { + return component_traits::cMax; + } + static inline bool get_comps_are_signed() + { + return component_traits::cSigned; + } + + inline component_type operator[](uint i) const + { + CRNLIB_ASSERT(i < cNumComps); + return c[i]; + } + inline component_type& operator[](uint i) + { + CRNLIB_ASSERT(i < cNumComps); + return c[i]; + } + + inline color_quad& set_component(uint i, parameter_type f) + { + CRNLIB_ASSERT(i < cNumComps); + + c[i] = static_cast(clamp(f)); + + return *this; + } + + inline color_quad& set_grayscale(parameter_t l) + { + component_t x = static_cast(clamp(l)); + c[0] = x; + c[1] = x; + c[2] = x; + return *this; + } + + inline color_quad& clamp(const color_quad& l, const color_quad& h) + { + for (uint i = 0; i < cNumComps; i++) + { + c[i] = static_cast(math::clamp(c[i], l[i], h[i])); + } + return *this; + } + + inline color_quad& clamp(parameter_type l, parameter_type h) + { + for (uint i = 0; i < cNumComps; i++) + { + c[i] = static_cast(math::clamp(c[i], l, h)); + } + return *this; + } + + // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y). + inline parameter_type get_luma() const + { + return static_cast((19595U * r + 38470U * g + 7471U * b + 32768U) >> 16U); + } + + // Returns REC 709 luma. + inline parameter_type get_luma_rec709() const + { + return static_cast((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); + } + + // Beware of endianness! + inline uint32 get_uint32() const + { + CRNLIB_ASSERT(sizeof(*this) == sizeof(uint32)); + return *reinterpret_cast(this); + } + + // Beware of endianness! + inline uint64 get_uint64() const + { + CRNLIB_ASSERT(sizeof(*this) == sizeof(uint64)); + return *reinterpret_cast(this); + } + + inline uint squared_distance(const color_quad& c, bool alpha = true) const + { + return math::square(r - c.r) + math::square(g - c.g) + math::square(b - c.b) + (alpha ? math::square(a - c.a) : 0); + } + + inline bool rgb_equals(const color_quad& rhs) const + { + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); + } + + inline bool operator==(const color_quad& rhs) const + { + if (sizeof(color_quad) == sizeof(uint32)) + { + return m_u32 == rhs.m_u32; + } + else + { + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b) && (a == rhs.a); + } + } + + inline bool operator<(const color_quad& rhs) const + { + for (uint i = 0; i < cNumComps; i++) + { + if (c[i] < rhs.c[i]) + { + return true; + } + else if (!(c[i] == rhs.c[i])) + { + return false; + } + } + return false; + } + + color_quad& operator+=(const color_quad& other) + { + for (uint i = 0; i < 4; i++) + { + c[i] = static_cast(clamp(c[i] + other.c[i])); + } + return *this; + } + + color_quad& operator-=(const color_quad& other) + { + for (uint i = 0; i < 4; i++) + { + c[i] = static_cast(clamp(c[i] - other.c[i])); + } + return *this; + } + + color_quad& operator*=(parameter_type v) + { + for (uint i = 0; i < 4; i++) + { + c[i] = static_cast(clamp(c[i] * v)); + } + return *this; + } + + color_quad& operator/=(parameter_type v) + { + for (uint i = 0; i < 4; i++) + { + c[i] = static_cast(c[i] / v); + } + return *this; + } + + color_quad get_swizzled(uint x, uint y, uint z, uint w) const + { + CRNLIB_ASSERT((x | y | z | w) < 4); + return color_quad(c[x], c[y], c[z], c[w]); + } + + friend color_quad operator+(const color_quad& lhs, const color_quad& rhs) + { + color_quad result(lhs); + result += rhs; + return result; + } + + friend color_quad operator-(const color_quad& lhs, const color_quad& rhs) + { + color_quad result(lhs); + result -= rhs; + return result; + } + + friend color_quad operator*(const color_quad& lhs, parameter_type v) + { + color_quad result(lhs); + result *= v; + return result; + } + + friend color_quad operator/(const color_quad& lhs, parameter_type v) + { + color_quad result(lhs); + result /= v; + return result; + } + + friend color_quad operator*(parameter_type v, const color_quad& rhs) { + color_quad result(rhs); + result *= v; + return result; + } + + inline bool is_grayscale() const + { + return (c[0] == c[1]) && (c[1] == c[2]); + } + + uint get_min_component_index(bool alpha = true) const + { + uint index = 0; + uint limit = alpha ? cNumComps : (cNumComps - 1); + for (uint i = 1; i < limit; i++) + { + if (c[i] < c[index]) + { + index = i; + } + } + return index; + } + + uint get_max_component_index(bool alpha = true) const + { + uint index = 0; + uint limit = alpha ? cNumComps : (cNumComps - 1); + for (uint i = 1; i < limit; i++) + { + if (c[i] > c[index]) + { + index = i; + } + } + return index; + } + + operator size_t() const + { + return (size_t)fast_hash(this, sizeof(*this)); + } + + void get_float4(float* pDst) + { + for (uint i = 0; i < 4; i++) + { + pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); + } + } - enum { cNumComps = 4 }; + void get_float3(float* pDst) + { + for (uint i = 0; i < 3; i++) + { + pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); + } + } + + static color_quad component_min(const color_quad& a, const color_quad& b) + { + color_quad result; + for (uint i = 0; i < 4; i++) + { + result[i] = static_cast(math::minimum(a[i], b[i])); + } + return result; + } + + static color_quad component_max(const color_quad& a, const color_quad& b) + { + color_quad result; + for (uint i = 0; i < 4; i++) + { + result[i] = static_cast(math::maximum(a[i], b[i])); + } + return result; + } + + static color_quad make_black() + { + return color_quad(0, 0, 0, component_traits::cMax); + } - union { - struct + static color_quad make_white() + { + return color_quad(component_traits::cMax, component_traits::cMax, component_traits::cMax, component_traits::cMax); + } + }; // class color_quad + + template + struct scalar_type> { - component_type r; - component_type g; - component_type b; - component_type a; + enum { cFlag = true }; + static inline void construct(color_quad* p) + { + } + static inline void construct(color_quad* p, const color_quad& init) + { + memcpy(p, &init, sizeof(color_quad)); + } + static inline void construct_array(color_quad*, uint) + { + } + static inline void destruct(color_quad*) + { + } + static inline void destruct_array(color_quad*, uint) + { + } }; - component_type c[cNumComps]; - - uint32 m_u32; - }; - - inline color_quad() { - } - - inline color_quad(eClear) - : r(0), g(0), b(0), a(0) { - } - - inline color_quad(const color_quad& other) - : r(other.r), g(other.g), b(other.b), a(other.a) { - } - - explicit inline color_quad(parameter_type y, parameter_type alpha = component_traits::cMax) { - set(y, alpha); - } - - inline color_quad(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) { - set(red, green, blue, alpha); - } - - explicit inline color_quad(eNoClamp, parameter_type y, parameter_type alpha = component_traits::cMax) { - set_noclamp_y_alpha(y, alpha); - } - - inline color_quad(eNoClamp, parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) { - set_noclamp_rgba(red, green, blue, alpha); - } - - template - inline color_quad(const color_quad& other) - : r(static_cast(clamp(other.r))), g(static_cast(clamp(other.g))), b(static_cast(clamp(other.b))), a(static_cast(clamp(other.a))) { - } - - inline void clear() { - r = 0; - g = 0; - b = 0; - a = 0; - } - - inline color_quad& operator=(const color_quad& other) { - r = other.r; - g = other.g; - b = other.b; - a = other.a; - return *this; - } - - inline color_quad& set_rgb(const color_quad& other) { - r = other.r; - g = other.g; - b = other.b; - return *this; - } - - template - inline color_quad& operator=(const color_quad& other) { - r = static_cast(clamp(other.r)); - g = static_cast(clamp(other.g)); - b = static_cast(clamp(other.b)); - a = static_cast(clamp(other.a)); - return *this; - } - - inline color_quad& operator=(parameter_type y) { - set(y, component_traits::cMax); - return *this; - } - - inline color_quad& set(parameter_type y, parameter_type alpha = component_traits::cMax) { - y = clamp(y); - alpha = clamp(alpha); - r = static_cast(y); - g = static_cast(y); - b = static_cast(y); - a = static_cast(alpha); - return *this; - } - - inline color_quad& set_noclamp_y_alpha(parameter_type y, parameter_type alpha = component_traits::cMax) { - CRNLIB_ASSERT((y >= component_traits::cMin) && (y <= component_traits::cMax)); - CRNLIB_ASSERT((alpha >= component_traits::cMin) && (alpha <= component_traits::cMax)); - - r = static_cast(y); - g = static_cast(y); - b = static_cast(y); - a = static_cast(alpha); - return *this; - } - - inline color_quad& set(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) { - r = static_cast(clamp(red)); - g = static_cast(clamp(green)); - b = static_cast(clamp(blue)); - a = static_cast(clamp(alpha)); - return *this; - } - - inline color_quad& set_noclamp_rgba(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha) { - CRNLIB_ASSERT((red >= component_traits::cMin) && (red <= component_traits::cMax)); - CRNLIB_ASSERT((green >= component_traits::cMin) && (green <= component_traits::cMax)); - CRNLIB_ASSERT((blue >= component_traits::cMin) && (blue <= component_traits::cMax)); - CRNLIB_ASSERT((alpha >= component_traits::cMin) && (alpha <= component_traits::cMax)); - - r = static_cast(red); - g = static_cast(green); - b = static_cast(blue); - a = static_cast(alpha); - return *this; - } - - inline color_quad& set_noclamp_rgb(parameter_type red, parameter_type green, parameter_type blue) { - CRNLIB_ASSERT((red >= component_traits::cMin) && (red <= component_traits::cMax)); - CRNLIB_ASSERT((green >= component_traits::cMin) && (green <= component_traits::cMax)); - CRNLIB_ASSERT((blue >= component_traits::cMin) && (blue <= component_traits::cMax)); - - r = static_cast(red); - g = static_cast(green); - b = static_cast(blue); - return *this; - } - - static inline parameter_type get_min_comp() { return component_traits::cMin; } - static inline parameter_type get_max_comp() { return component_traits::cMax; } - static inline bool get_comps_are_signed() { return component_traits::cSigned; } - - inline component_type operator[](uint i) const { - CRNLIB_ASSERT(i < cNumComps); - return c[i]; - } - inline component_type& operator[](uint i) { - CRNLIB_ASSERT(i < cNumComps); - return c[i]; - } - - inline color_quad& set_component(uint i, parameter_type f) { - CRNLIB_ASSERT(i < cNumComps); - - c[i] = static_cast(clamp(f)); - - return *this; - } - - inline color_quad& set_grayscale(parameter_t l) { - component_t x = static_cast(clamp(l)); - c[0] = x; - c[1] = x; - c[2] = x; - return *this; - } - - inline color_quad& clamp(const color_quad& l, const color_quad& h) { - for (uint i = 0; i < cNumComps; i++) - c[i] = static_cast(math::clamp(c[i], l[i], h[i])); - return *this; - } - - inline color_quad& clamp(parameter_type l, parameter_type h) { - for (uint i = 0; i < cNumComps; i++) - c[i] = static_cast(math::clamp(c[i], l, h)); - return *this; - } - - // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y). - inline parameter_type get_luma() const { - return static_cast((19595U * r + 38470U * g + 7471U * b + 32768U) >> 16U); - } - - // Returns REC 709 luma. - inline parameter_type get_luma_rec709() const { - return static_cast((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); - } - - // Beware of endianness! - inline uint32 get_uint32() const { - CRNLIB_ASSERT(sizeof(*this) == sizeof(uint32)); - return *reinterpret_cast(this); - } - - // Beware of endianness! - inline uint64 get_uint64() const { - CRNLIB_ASSERT(sizeof(*this) == sizeof(uint64)); - return *reinterpret_cast(this); - } - - inline uint squared_distance(const color_quad& c, bool alpha = true) const { - return math::square(r - c.r) + math::square(g - c.g) + math::square(b - c.b) + (alpha ? math::square(a - c.a) : 0); - } - - inline bool rgb_equals(const color_quad& rhs) const { - return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); - } - - inline bool operator==(const color_quad& rhs) const { - if (sizeof(color_quad) == sizeof(uint32)) - return m_u32 == rhs.m_u32; - else - return (r == rhs.r) && (g == rhs.g) && (b == rhs.b) && (a == rhs.a); - } - - inline bool operator<(const color_quad& rhs) const { - for (uint i = 0; i < cNumComps; i++) { - if (c[i] < rhs.c[i]) - return true; - else if (!(c[i] == rhs.c[i])) - return false; - } - return false; - } - - color_quad& operator+=(const color_quad& other) { - for (uint i = 0; i < 4; i++) - c[i] = static_cast(clamp(c[i] + other.c[i])); - return *this; - } - - color_quad& operator-=(const color_quad& other) { - for (uint i = 0; i < 4; i++) - c[i] = static_cast(clamp(c[i] - other.c[i])); - return *this; - } - - color_quad& operator*=(parameter_type v) { - for (uint i = 0; i < 4; i++) - c[i] = static_cast(clamp(c[i] * v)); - return *this; - } - - color_quad& operator/=(parameter_type v) { - for (uint i = 0; i < 4; i++) - c[i] = static_cast(c[i] / v); - return *this; - } - - color_quad get_swizzled(uint x, uint y, uint z, uint w) const { - CRNLIB_ASSERT((x | y | z | w) < 4); - return color_quad(c[x], c[y], c[z], c[w]); - } - - friend color_quad operator+(const color_quad& lhs, const color_quad& rhs) { - color_quad result(lhs); - result += rhs; - return result; - } - - friend color_quad operator-(const color_quad& lhs, const color_quad& rhs) { - color_quad result(lhs); - result -= rhs; - return result; - } - - friend color_quad operator*(const color_quad& lhs, parameter_type v) { - color_quad result(lhs); - result *= v; - return result; - } - - friend color_quad operator/(const color_quad& lhs, parameter_type v) { - color_quad result(lhs); - result /= v; - return result; - } - - friend color_quad operator*(parameter_type v, const color_quad& rhs) { - color_quad result(rhs); - result *= v; - return result; - } - - inline bool is_grayscale() const { - return (c[0] == c[1]) && (c[1] == c[2]); - } - - uint get_min_component_index(bool alpha = true) const { - uint index = 0; - uint limit = alpha ? cNumComps : (cNumComps - 1); - for (uint i = 1; i < limit; i++) - if (c[i] < c[index]) - index = i; - return index; - } - - uint get_max_component_index(bool alpha = true) const { - uint index = 0; - uint limit = alpha ? cNumComps : (cNumComps - 1); - for (uint i = 1; i < limit; i++) - if (c[i] > c[index]) - index = i; - return index; - } - - operator size_t() const { - return (size_t)fast_hash(this, sizeof(*this)); - } - - void get_float4(float* pDst) { - for (uint i = 0; i < 4; i++) - pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); - } - - void get_float3(float* pDst) { - for (uint i = 0; i < 3; i++) - pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); - } - - static color_quad component_min(const color_quad& a, const color_quad& b) { - color_quad result; - for (uint i = 0; i < 4; i++) - result[i] = static_cast(math::minimum(a[i], b[i])); - return result; - } - - static color_quad component_max(const color_quad& a, const color_quad& b) { - color_quad result; - for (uint i = 0; i < 4; i++) - result[i] = static_cast(math::maximum(a[i], b[i])); - return result; - } - - static color_quad make_black() { - return color_quad(0, 0, 0, component_traits::cMax); - } - - static color_quad make_white() { - return color_quad(component_traits::cMax, component_traits::cMax, component_traits::cMax, component_traits::cMax); - } -}; // class color_quad - -template -struct scalar_type > { - enum { cFlag = true }; - static inline void construct(color_quad* p) {} - static inline void construct(color_quad* p, const color_quad& init) { memcpy(p, &init, sizeof(color_quad)); } - static inline void construct_array(color_quad*, uint) {} - static inline void destruct(color_quad*) {} - static inline void destruct_array(color_quad*, uint) {} -}; - -typedef color_quad color_quad_u8; -typedef color_quad color_quad_i8; -typedef color_quad color_quad_i16; -typedef color_quad color_quad_u16; -typedef color_quad color_quad_i32; -typedef color_quad color_quad_u32; -typedef color_quad color_quad_f; -typedef color_quad color_quad_d; - -namespace color { -inline uint elucidian_distance(uint r0, uint g0, uint b0, uint r1, uint g1, uint b1) { - int dr = (int)r0 - (int)r1; - int dg = (int)g0 - (int)g1; - int db = (int)b0 - (int)b1; - - return static_cast(dr * dr + dg * dg + db * db); -} - -inline uint elucidian_distance(uint r0, uint g0, uint b0, uint a0, uint r1, uint g1, uint b1, uint a1) { - int dr = (int)r0 - (int)r1; - int dg = (int)g0 - (int)g1; - int db = (int)b0 - (int)b1; - int da = (int)a0 - (int)a1; - - return static_cast(dr * dr + dg * dg + db * db + da * da); -} - -inline uint elucidian_distance(const color_quad_u8& c0, const color_quad_u8& c1, bool alpha) { - if (alpha) - return elucidian_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a); - else - return elucidian_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b); -} - -inline uint weighted_elucidian_distance(uint r0, uint g0, uint b0, uint r1, uint g1, uint b1, uint wr, uint wg, uint wb) { - int dr = (int)r0 - (int)r1; - int dg = (int)g0 - (int)g1; - int db = (int)b0 - (int)b1; - - return static_cast((wr * dr * dr) + (wg * dg * dg) + (wb * db * db)); -} - -inline uint weighted_elucidian_distance( - uint r0, uint g0, uint b0, uint a0, - uint r1, uint g1, uint b1, uint a1, - uint wr, uint wg, uint wb, uint wa) { - int dr = (int)r0 - (int)r1; - int dg = (int)g0 - (int)g1; - int db = (int)b0 - (int)b1; - int da = (int)a0 - (int)a1; - - return static_cast((wr * dr * dr) + (wg * dg * dg) + (wb * db * db) + (wa * da * da)); -} - -inline uint weighted_elucidian_distance(const color_quad_u8& c0, const color_quad_u8& c1, uint wr, uint wg, uint wb, uint wa) { - return weighted_elucidian_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a, wr, wg, wb, wa); -} - -//const uint cRWeight = 8;//24; -//const uint cGWeight = 24;//73; -//const uint cBWeight = 1;//3; - -const uint cRWeight = 8; //24; -const uint cGWeight = 25; //73; -const uint cBWeight = 1; //3; - -inline uint color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha) { - if (perceptual) { - if (alpha) - return weighted_elucidian_distance(e1, e2, cRWeight, cGWeight, cBWeight, cRWeight + cGWeight + cBWeight); - else - return weighted_elucidian_distance(e1, e2, cRWeight, cGWeight, cBWeight, 0); - } else - return elucidian_distance(e1, e2, alpha); -} - -inline uint peak_color_error(const color_quad_u8& e1, const color_quad_u8& e2) { - return math::maximum(labs(e1[0] - e2[0]), labs(e1[1] - e2[1]), labs(e1[2] - e2[2])); - //return math::square(e1[0] - e2[0]) + math::square(e1[1] - e2[1]) + math::square(e1[2] - e2[2]); -} - -// y - [0,255] -// co - [-127,127] -// cg - [-126,127] -inline void RGB_to_YCoCg(int r, int g, int b, int& y, int& co, int& cg) { - y = (r >> 2) + (g >> 1) + (b >> 2); - co = (r >> 1) - (b >> 1); - cg = -(r >> 2) + (g >> 1) - (b >> 2); -} - -inline void YCoCg_to_RGB(int y, int co, int cg, int& r, int& g, int& b) { - int tmp = y - cg; - g = y + cg; - r = tmp + co; - b = tmp - co; -} - -static inline uint8 clamp_component(int i) { - if (static_cast(i) > 255U) { - if (i < 0) - i = 0; - else if (i > 255) - i = 255; - } - return static_cast(i); -} - -// RGB->YCbCr constants, scaled by 2^16 -const int YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329; -// YCbCr->RGB constants, scaled by 2^16 -const int R_CR = 91881, B_CB = 116130, G_CR = -46802, G_CB = -22554; - -inline int RGB_to_Y(const color_quad_u8& rgb) { - const int r = rgb[0], g = rgb[1], b = rgb[2]; - return (r * YR + g * YG + b * YB + 32768) >> 16; -} - -// RGB to YCbCr (same as JFIF JPEG). -// Odd default biases account for 565 endpoint packing. -inline void RGB_to_YCC(color_quad_u8& ycc, const color_quad_u8& rgb, int cb_bias = 123, int cr_bias = 125) { - const int r = rgb[0], g = rgb[1], b = rgb[2]; - ycc.a = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); - ycc.r = clamp_component(cb_bias + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16)); - ycc.g = clamp_component(cr_bias + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16)); - ycc.b = 0; -} - -// YCbCr to RGB. -// Odd biases account for 565 endpoint packing. -inline void YCC_to_RGB(color_quad_u8& rgb, const color_quad_u8& ycc, int cb_bias = 123, int cr_bias = 125) { - const int y = ycc.a; - const int cb = ycc.r - cb_bias; - const int cr = ycc.g - cr_bias; - rgb.r = clamp_component(y + ((R_CR * cr + 32768) >> 16)); - rgb.g = clamp_component(y + ((G_CR * cr + G_CB * cb + 32768) >> 16)); - rgb.b = clamp_component(y + ((B_CB * cb + 32768) >> 16)); - rgb.a = 255; -} - -// Float RGB->YCbCr constants -const float S = 1.0f / 65536.0f; -const float F_YR = S * YR, F_YG = S * YG, F_YB = S * YB, F_CB_R = S * CB_R, F_CB_G = S * CB_G, F_CB_B = S * CB_B, F_CR_R = S * CR_R, F_CR_G = S * CR_G, F_CR_B = S * CR_B; -// Float YCbCr->RGB constants -const float F_R_CR = S * R_CR, F_B_CB = S * B_CB, F_G_CR = S * G_CR, F_G_CB = S * G_CB; - -inline void RGB_to_YCC_float(color_quad_f& ycc, const color_quad_u8& rgb) { - const int r = rgb[0], g = rgb[1], b = rgb[2]; - ycc.a = r * F_YR + g * F_YG + b * F_YB; - ycc.r = r * F_CB_R + g * F_CB_G + b * F_CB_B; - ycc.g = r * F_CR_R + g * F_CR_G + b * F_CR_B; - ycc.b = 0; -} - -inline void YCC_float_to_RGB(color_quad_u8& rgb, const color_quad_f& ycc) { - float y = ycc.a, cb = ycc.r, cr = ycc.g; - rgb.r = color::clamp_component(static_cast(.5f + y + F_R_CR * cr)); - rgb.g = color::clamp_component(static_cast(.5f + y + F_G_CR * cr + F_G_CB * cb)); - rgb.b = color::clamp_component(static_cast(.5f + y + F_B_CB * cb)); - rgb.a = 255; -} - -} // namespace color - -// This class purposely trades off speed for extremely flexibility. It can handle any component swizzle, any pixel type from 1-4 components and 1-32 bits/component, -// any pixel size between 1-16 bytes/pixel, any pixel stride, any color_quad data type (signed/unsigned/float 8/16/32 bits/component), and scaled/non-scaled components. -// On the downside, it's freaking slow. -class pixel_packer { - public: - pixel_packer() { - clear(); - } - - pixel_packer(uint num_comps, uint bits_per_comp, int pixel_stride = -1, bool reversed = false) { - init(num_comps, bits_per_comp, pixel_stride, reversed); - } - - pixel_packer(const char* pComp_map, int pixel_stride = -1, int force_comp_size = -1) { - init(pComp_map, pixel_stride, force_comp_size); - } - - void clear() { - utils::zero_this(this); - } - - inline bool is_valid() const { return m_pixel_stride > 0; } - - inline uint get_pixel_stride() const { return m_pixel_stride; } - void set_pixel_stride(uint n) { m_pixel_stride = n; } - - uint get_num_comps() const { return m_num_comps; } - uint get_comp_size(uint index) const { - CRNLIB_ASSERT(index < 4); - return m_comp_size[index]; - } - uint get_comp_ofs(uint index) const { - CRNLIB_ASSERT(index < 4); - return m_comp_ofs[index]; - } - uint get_comp_max(uint index) const { - CRNLIB_ASSERT(index < 4); - return m_comp_max[index]; - } - bool get_rgb_is_luma() const { return m_rgb_is_luma; } - - template - const void* unpack(const void* p, color_quad_type& color, bool rescale = true) const { - const uint8* pSrc = static_cast(p); - - for (uint i = 0; i < 4; i++) { - const uint comp_size = m_comp_size[i]; - if (!comp_size) { - if (color_quad_type::component_traits::cFloat) - color[i] = static_cast((i == 3) ? 1 : 0); - else - color[i] = static_cast((i == 3) ? color_quad_type::component_traits::cMax : 0); - continue; - } - - uint n = 0, dst_bit_ofs = 0; - uint src_bit_ofs = m_comp_ofs[i]; - while (dst_bit_ofs < comp_size) { - const uint byte_bit_ofs = src_bit_ofs & 7; - n |= ((pSrc[src_bit_ofs >> 3] >> byte_bit_ofs) << dst_bit_ofs); - - const uint bits_read = 8 - byte_bit_ofs; - src_bit_ofs += bits_read; - dst_bit_ofs += bits_read; - } - - const uint32 mx = m_comp_max[i]; - n &= mx; - - const uint32 h = static_cast(color_quad_type::component_traits::cMax); - - if (color_quad_type::component_traits::cFloat) - color.set_component(i, static_cast(n)); - else if (rescale) - color.set_component(i, static_cast((static_cast(n) * h + (mx >> 1U)) / mx)); - else if (color_quad_type::component_traits::cSigned) - color.set_component(i, static_cast(math::minimum(n, h))); - else - color.set_component(i, static_cast(n)); - } - - if (m_rgb_is_luma) { - color[0] = color[1]; - color[2] = color[1]; - } - - return pSrc + m_pixel_stride; - } - - template - void* pack(const color_quad_type& color, void* p, bool rescale = true) const { - uint8* pDst = static_cast(p); - - for (uint i = 0; i < 4; i++) { - const uint comp_size = m_comp_size[i]; - if (!comp_size) - continue; - - uint32 mx = m_comp_max[i]; - - uint32 n; - if (color_quad_type::component_traits::cFloat) { - typename color_quad_type::parameter_t t = color[i]; - if (t < 0.0f) - n = 0; - else if (t > static_cast(mx)) - n = mx; - else - n = math::minimum(static_cast(floor(t + .5f)), mx); - } else if (rescale) { - if (color_quad_type::component_traits::cSigned) - n = math::maximum(static_cast(color[i]), 0); - else - n = static_cast(color[i]); - - const uint32 h = static_cast(color_quad_type::component_traits::cMax); - n = static_cast((static_cast(n) * mx + (h >> 1)) / h); - } else { - if (color_quad_type::component_traits::cSigned) - n = math::minimum(static_cast(math::maximum(static_cast(color[i]), 0)), mx); - else - n = math::minimum(static_cast(color[i]), mx); - } - - uint src_bit_ofs = 0; - uint dst_bit_ofs = m_comp_ofs[i]; - while (src_bit_ofs < comp_size) { - const uint cur_byte_bit_ofs = (dst_bit_ofs & 7); - const uint cur_byte_bits = 8 - cur_byte_bit_ofs; - - uint byte_val = pDst[dst_bit_ofs >> 3]; - uint bit_mask = (mx << cur_byte_bit_ofs) & 0xFF; - byte_val &= ~bit_mask; - byte_val |= (n << cur_byte_bit_ofs); - pDst[dst_bit_ofs >> 3] = static_cast(byte_val); - - mx >>= cur_byte_bits; - n >>= cur_byte_bits; - - dst_bit_ofs += cur_byte_bits; - src_bit_ofs += cur_byte_bits; - } - } - - return pDst + m_pixel_stride; - } - - bool init(uint num_comps, uint bits_per_comp, int pixel_stride = -1, bool reversed = false) { - clear(); - - if ((num_comps < 1) || (num_comps > 4) || (bits_per_comp < 1) || (bits_per_comp > 32)) { - CRNLIB_ASSERT(0); - return false; - } - - for (uint i = 0; i < num_comps; i++) { - m_comp_size[i] = bits_per_comp; - m_comp_ofs[i] = i * bits_per_comp; - if (reversed) - m_comp_ofs[i] = ((num_comps - 1) * bits_per_comp) - m_comp_ofs[i]; - } - - for (uint i = 0; i < 4; i++) - m_comp_max[i] = static_cast((1ULL << m_comp_size[i]) - 1ULL); - - m_pixel_stride = (pixel_stride >= 0) ? pixel_stride : (num_comps * bits_per_comp + 7) / 8; - - return true; - } - - // Format examples: - // R16G16B16 - // B5G6R5 - // B5G5R5x1 - // Y8A8 - // A8R8G8B8 - // First component is at LSB in memory. Assumes unsigned integer components, 1-32bits each. - bool init(const char* pComp_map, int pixel_stride = -1, int force_comp_size = -1) { - clear(); - - uint cur_bit_ofs = 0; - - while (*pComp_map) { - char c = *pComp_map++; - - int comp_index = -1; - if (c == 'R') - comp_index = 0; - else if (c == 'G') - comp_index = 1; - else if (c == 'B') - comp_index = 2; - else if (c == 'A') - comp_index = 3; - else if (c == 'Y') - comp_index = 4; - else if (c != 'x') - return false; - - uint comp_size = 0; - - uint n = *pComp_map; - if ((n >= '0') && (n <= '9')) { - comp_size = n - '0'; - pComp_map++; - - n = *pComp_map; - if ((n >= '0') && (n <= '9')) { - comp_size = (comp_size * 10) + (n - '0'); - pComp_map++; - } - } - - if (force_comp_size != -1) - comp_size = force_comp_size; - - if ((!comp_size) || (comp_size > 32)) - return false; - - if (comp_index == 4) { - if (m_comp_size[0] || m_comp_size[1] || m_comp_size[2]) - return false; - - //m_comp_ofs[0] = m_comp_ofs[1] = m_comp_ofs[2] = cur_bit_ofs; - //m_comp_size[0] = m_comp_size[1] = m_comp_size[2] = comp_size; - m_comp_ofs[1] = cur_bit_ofs; - m_comp_size[1] = comp_size; - m_rgb_is_luma = true; - m_num_comps++; - } else if (comp_index >= 0) { - if (m_comp_size[comp_index]) - return false; - - m_comp_ofs[comp_index] = cur_bit_ofs; - m_comp_size[comp_index] = comp_size; - m_num_comps++; - } - - cur_bit_ofs += comp_size; - } - - for (uint i = 0; i < 4; i++) - m_comp_max[i] = static_cast((1ULL << m_comp_size[i]) - 1ULL); - - if (pixel_stride >= 0) - m_pixel_stride = pixel_stride; - else - m_pixel_stride = (cur_bit_ofs + 7) / 8; - return true; - } - - private: - uint m_pixel_stride; - uint m_num_comps; - uint m_comp_size[4]; - uint m_comp_ofs[4]; - uint m_comp_max[4]; - bool m_rgb_is_luma; -}; + typedef color_quad color_quad_u8; + typedef color_quad color_quad_i8; + typedef color_quad color_quad_i16; + typedef color_quad color_quad_u16; + typedef color_quad color_quad_i32; + typedef color_quad color_quad_u32; + typedef color_quad color_quad_f; + typedef color_quad color_quad_d; + + namespace color + { + inline uint elucidian_distance(uint r0, uint g0, uint b0, uint r1, uint g1, uint b1) + { + int dr = (int)r0 - (int)r1; + int dg = (int)g0 - (int)g1; + int db = (int)b0 - (int)b1; + + return static_cast(dr * dr + dg * dg + db * db); + } + + inline uint elucidian_distance(uint r0, uint g0, uint b0, uint a0, uint r1, uint g1, uint b1, uint a1) + { + int dr = (int)r0 - (int)r1; + int dg = (int)g0 - (int)g1; + int db = (int)b0 - (int)b1; + int da = (int)a0 - (int)a1; + + return static_cast(dr * dr + dg * dg + db * db + da * da); + } + + inline uint elucidian_distance(const color_quad_u8& c0, const color_quad_u8& c1, bool alpha) + { + if (alpha) + { + return elucidian_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a); + } + else + { + return elucidian_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b); + } + } + + inline uint weighted_elucidian_distance(uint r0, uint g0, uint b0, uint r1, uint g1, uint b1, uint wr, uint wg, uint wb) + { + int dr = (int)r0 - (int)r1; + int dg = (int)g0 - (int)g1; + int db = (int)b0 - (int)b1; + + return static_cast((wr * dr * dr) + (wg * dg * dg) + (wb * db * db)); + } + + inline uint weighted_elucidian_distance( + uint r0, uint g0, uint b0, uint a0, + uint r1, uint g1, uint b1, uint a1, + uint wr, uint wg, uint wb, uint wa) + { + int dr = (int)r0 - (int)r1; + int dg = (int)g0 - (int)g1; + int db = (int)b0 - (int)b1; + int da = (int)a0 - (int)a1; + + return static_cast((wr * dr * dr) + (wg * dg * dg) + (wb * db * db) + (wa * da * da)); + } + + inline uint weighted_elucidian_distance(const color_quad_u8& c0, const color_quad_u8& c1, uint wr, uint wg, uint wb, uint wa) + { + return weighted_elucidian_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a, wr, wg, wb, wa); + } + + //const uint cRWeight = 8;//24; + //const uint cGWeight = 24;//73; + //const uint cBWeight = 1;//3; + + const uint cRWeight = 8; //24; + const uint cGWeight = 25; //73; + const uint cBWeight = 1; //3; + + inline uint color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha) + { + if (perceptual) + { + if (alpha) + { + return weighted_elucidian_distance(e1, e2, cRWeight, cGWeight, cBWeight, cRWeight + cGWeight + cBWeight); + } + else + { + return weighted_elucidian_distance(e1, e2, cRWeight, cGWeight, cBWeight, 0); + } + } + else + { + return elucidian_distance(e1, e2, alpha); + } + } + + inline uint peak_color_error(const color_quad_u8& e1, const color_quad_u8& e2) + { + return math::maximum(labs(e1[0] - e2[0]), labs(e1[1] - e2[1]), labs(e1[2] - e2[2])); + //return math::square(e1[0] - e2[0]) + math::square(e1[1] - e2[1]) + math::square(e1[2] - e2[2]); + } + + // y - [0,255] + // co - [-127,127] + // cg - [-126,127] + inline void RGB_to_YCoCg(int r, int g, int b, int& y, int& co, int& cg) + { + y = (r >> 2) + (g >> 1) + (b >> 2); + co = (r >> 1) - (b >> 1); + cg = -(r >> 2) + (g >> 1) - (b >> 2); + } + + inline void YCoCg_to_RGB(int y, int co, int cg, int& r, int& g, int& b) + { + int tmp = y - cg; + g = y + cg; + r = tmp + co; + b = tmp - co; + } + + static inline uint8 clamp_component(int i) + { + if (static_cast(i) > 255U) + { + if (i < 0) + { + i = 0; + } + else if (i > 255) + { + i = 255; + } + } + return static_cast(i); + } + + // RGB->YCbCr constants, scaled by 2^16 + const int YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329; + // YCbCr->RGB constants, scaled by 2^16 + const int R_CR = 91881, B_CB = 116130, G_CR = -46802, G_CB = -22554; + + inline int RGB_to_Y(const color_quad_u8& rgb) + { + const int r = rgb[0], g = rgb[1], b = rgb[2]; + return (r * YR + g * YG + b * YB + 32768) >> 16; + } + + // RGB to YCbCr (same as JFIF JPEG). + // Odd default biases account for 565 endpoint packing. + inline void RGB_to_YCC(color_quad_u8& ycc, const color_quad_u8& rgb, int cb_bias = 123, int cr_bias = 125) + { + const int r = rgb[0], g = rgb[1], b = rgb[2]; + ycc.a = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); + ycc.r = clamp_component(cb_bias + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16)); + ycc.g = clamp_component(cr_bias + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16)); + ycc.b = 0; + } + + // YCbCr to RGB. + // Odd biases account for 565 endpoint packing. + inline void YCC_to_RGB(color_quad_u8& rgb, const color_quad_u8& ycc, int cb_bias = 123, int cr_bias = 125) + { + const int y = ycc.a; + const int cb = ycc.r - cb_bias; + const int cr = ycc.g - cr_bias; + rgb.r = clamp_component(y + ((R_CR * cr + 32768) >> 16)); + rgb.g = clamp_component(y + ((G_CR * cr + G_CB * cb + 32768) >> 16)); + rgb.b = clamp_component(y + ((B_CB * cb + 32768) >> 16)); + rgb.a = 255; + } + + // Float RGB->YCbCr constants + const float S = 1.0f / 65536.0f; + const float F_YR = S * YR, F_YG = S * YG, F_YB = S * YB, F_CB_R = S * CB_R, F_CB_G = S * CB_G, F_CB_B = S * CB_B, F_CR_R = S * CR_R, F_CR_G = S * CR_G, F_CR_B = S * CR_B; + // Float YCbCr->RGB constants + const float F_R_CR = S * R_CR, F_B_CB = S * B_CB, F_G_CR = S * G_CR, F_G_CB = S * G_CB; + + inline void RGB_to_YCC_float(color_quad_f& ycc, const color_quad_u8& rgb) + { + const int r = rgb[0], g = rgb[1], b = rgb[2]; + ycc.a = r * F_YR + g * F_YG + b * F_YB; + ycc.r = r * F_CB_R + g * F_CB_G + b * F_CB_B; + ycc.g = r * F_CR_R + g * F_CR_G + b * F_CR_B; + ycc.b = 0; + } + + inline void YCC_float_to_RGB(color_quad_u8& rgb, const color_quad_f& ycc) + { + float y = ycc.a, cb = ycc.r, cr = ycc.g; + rgb.r = color::clamp_component(static_cast(.5f + y + F_R_CR * cr)); + rgb.g = color::clamp_component(static_cast(.5f + y + F_G_CR * cr + F_G_CB * cb)); + rgb.b = color::clamp_component(static_cast(.5f + y + F_B_CB * cb)); + rgb.a = 255; + } + + } // namespace color + + // This class purposely trades off speed for extremely flexibility. It can handle any component swizzle, any pixel type from 1-4 components and 1-32 bits/component, + // any pixel size between 1-16 bytes/pixel, any pixel stride, any color_quad data type (signed/unsigned/float 8/16/32 bits/component), and scaled/non-scaled components. + // On the downside, it's freaking slow. + class pixel_packer { + public: + pixel_packer() + { + clear(); + } + + pixel_packer(uint num_comps, uint bits_per_comp, int pixel_stride = -1, bool reversed = false) + { + init(num_comps, bits_per_comp, pixel_stride, reversed); + } + + pixel_packer(const char* pComp_map, int pixel_stride = -1, int force_comp_size = -1) + { + init(pComp_map, pixel_stride, force_comp_size); + } + + void clear() + { + utils::zero_this(this); + } + + inline bool is_valid() const + { + return m_pixel_stride > 0; + } + + inline uint get_pixel_stride() const + { + return m_pixel_stride; + } + void set_pixel_stride(uint n) + { + m_pixel_stride = n; + } + + uint get_num_comps() const + { + return m_num_comps; + } + uint get_comp_size(uint index) const + { + CRNLIB_ASSERT(index < 4); + return m_comp_size[index]; + } + uint get_comp_ofs(uint index) const + { + CRNLIB_ASSERT(index < 4); + return m_comp_ofs[index]; + } + uint get_comp_max(uint index) const + { + CRNLIB_ASSERT(index < 4); + return m_comp_max[index]; + } + bool get_rgb_is_luma() const + { + return m_rgb_is_luma; + } + + template + const void* unpack(const void* p, color_quad_type& color, bool rescale = true) const + { + const uint8* pSrc = static_cast(p); + + for (uint i = 0; i < 4; i++) + { + const uint comp_size = m_comp_size[i]; + if (!comp_size) + { + if (color_quad_type::component_traits::cFloat) + { + color[i] = static_cast((i == 3) ? 1 : 0); + } + else + { + color[i] = static_cast((i == 3) ? color_quad_type::component_traits::cMax : 0); + } + continue; + } + + uint n = 0, dst_bit_ofs = 0; + uint src_bit_ofs = m_comp_ofs[i]; + while (dst_bit_ofs < comp_size) + { + const uint byte_bit_ofs = src_bit_ofs & 7; + n |= ((pSrc[src_bit_ofs >> 3] >> byte_bit_ofs) << dst_bit_ofs); + + const uint bits_read = 8 - byte_bit_ofs; + src_bit_ofs += bits_read; + dst_bit_ofs += bits_read; + } + + const uint32 mx = m_comp_max[i]; + n &= mx; + + const uint32 h = static_cast(color_quad_type::component_traits::cMax); + + if (color_quad_type::component_traits::cFloat) + { + color.set_component(i, static_cast(n)); + } + else if (rescale) + { + color.set_component(i, static_cast((static_cast(n) * h + (mx >> 1U)) / mx)); + } + else if (color_quad_type::component_traits::cSigned) + { + color.set_component(i, static_cast(math::minimum(n, h))); + } + else + { + color.set_component(i, static_cast(n)); + } + } + + if (m_rgb_is_luma) + { + color[0] = color[1]; + color[2] = color[1]; + } + + return pSrc + m_pixel_stride; + } + + template + void* pack(const color_quad_type& color, void* p, bool rescale = true) const + { + uint8* pDst = static_cast(p); + + for (uint i = 0; i < 4; i++) + { + const uint comp_size = m_comp_size[i]; + if (!comp_size) + { + continue; + } + + uint32 mx = m_comp_max[i]; + + uint32 n; + if (color_quad_type::component_traits::cFloat) + { + typename color_quad_type::parameter_t t = color[i]; + if (t < 0.0f) + { + n = 0; + } + else if (t > static_cast(mx)) + { + n = mx; + } + else + { + n = math::minimum(static_cast(floor(t + .5f)), mx); + } + } + else if (rescale) { + if (color_quad_type::component_traits::cSigned) + { + n = math::maximum(static_cast(color[i]), 0); + } + else + { + n = static_cast(color[i]); + } + const uint32 h = static_cast(color_quad_type::component_traits::cMax); + n = static_cast((static_cast(n) * mx + (h >> 1)) / h); + } + else { + if (color_quad_type::component_traits::cSigned) + { + n = math::minimum(static_cast(math::maximum(static_cast(color[i]), 0)), mx); + } + else + { + n = math::minimum(static_cast(color[i]), mx); + } + } + + uint src_bit_ofs = 0; + uint dst_bit_ofs = m_comp_ofs[i]; + while (src_bit_ofs < comp_size) + { + const uint cur_byte_bit_ofs = (dst_bit_ofs & 7); + const uint cur_byte_bits = 8 - cur_byte_bit_ofs; + + uint byte_val = pDst[dst_bit_ofs >> 3]; + uint bit_mask = (mx << cur_byte_bit_ofs) & 0xFF; + byte_val &= ~bit_mask; + byte_val |= (n << cur_byte_bit_ofs); + pDst[dst_bit_ofs >> 3] = static_cast(byte_val); + + mx >>= cur_byte_bits; + n >>= cur_byte_bits; + + dst_bit_ofs += cur_byte_bits; + src_bit_ofs += cur_byte_bits; + } + } + + return pDst + m_pixel_stride; + } + + bool init(uint num_comps, uint bits_per_comp, int pixel_stride = -1, bool reversed = false) + { + clear(); + + if ((num_comps < 1) || (num_comps > 4) || (bits_per_comp < 1) || (bits_per_comp > 32)) + { + CRNLIB_ASSERT(0); + return false; + } + + for (uint i = 0; i < num_comps; i++) + { + m_comp_size[i] = bits_per_comp; + m_comp_ofs[i] = i * bits_per_comp; + if (reversed) + { + m_comp_ofs[i] = ((num_comps - 1) * bits_per_comp) - m_comp_ofs[i]; + } + } + + for (uint i = 0; i < 4; i++) + { + m_comp_max[i] = static_cast((1ULL << m_comp_size[i]) - 1ULL); + } + + m_pixel_stride = (pixel_stride >= 0) ? pixel_stride : (num_comps * bits_per_comp + 7) / 8; + + return true; + } + + // Format examples: + // R16G16B16 + // B5G6R5 + // B5G5R5x1 + // Y8A8 + // A8R8G8B8 + // First component is at LSB in memory. Assumes unsigned integer components, 1-32bits each. + bool init(const char* pComp_map, int pixel_stride = -1, int force_comp_size = -1) + { + clear(); + + uint cur_bit_ofs = 0; + + while (*pComp_map) + { + char c = *pComp_map++; + + int comp_index = -1; + if (c == 'R') + { + comp_index = 0; + } + else if (c == 'G') + { + comp_index = 1; + } + else if (c == 'B') + { + comp_index = 2; + } + else if (c == 'A') + { + comp_index = 3; + } + else if (c == 'Y') + { + comp_index = 4; + } + else if (c != 'x') + { + return false; + } + + uint comp_size = 0; + + uint n = *pComp_map; + if ((n >= '0') && (n <= '9')) + { + comp_size = n - '0'; + pComp_map++; + + n = *pComp_map; + if ((n >= '0') && (n <= '9')) + { + comp_size = (comp_size * 10) + (n - '0'); + pComp_map++; + } + } + + if (force_comp_size != -1) + { + comp_size = force_comp_size; + } + + if ((!comp_size) || (comp_size > 32)) + { + return false; + } + + if (comp_index == 4) + { + if (m_comp_size[0] || m_comp_size[1] || m_comp_size[2]) + { + return false; + } + + //m_comp_ofs[0] = m_comp_ofs[1] = m_comp_ofs[2] = cur_bit_ofs; + //m_comp_size[0] = m_comp_size[1] = m_comp_size[2] = comp_size; + m_comp_ofs[1] = cur_bit_ofs; + m_comp_size[1] = comp_size; + m_rgb_is_luma = true; + m_num_comps++; + } + else if (comp_index >= 0) + { + if (m_comp_size[comp_index]) + { + return false; + } + + m_comp_ofs[comp_index] = cur_bit_ofs; + m_comp_size[comp_index] = comp_size; + m_num_comps++; + } + + cur_bit_ofs += comp_size; + } + + for (uint i = 0; i < 4; i++) + { + m_comp_max[i] = static_cast((1ULL << m_comp_size[i]) - 1ULL); + } + + if (pixel_stride >= 0) + { + m_pixel_stride = pixel_stride; + } + else + { + m_pixel_stride = (cur_bit_ofs + 7) / 8; + } + return true; + } + + private: + uint m_pixel_stride; + uint m_num_comps; + uint m_comp_size[4]; + uint m_comp_ofs[4]; + uint m_comp_max[4]; + bool m_rgb_is_luma; + }; } // namespace crnlib diff --git a/crnlib/crn_file_utils.cpp b/crnlib/crn_file_utils.cpp index ec1f3d8..f96929f 100644 --- a/crnlib/crn_file_utils.cpp +++ b/crnlib/crn_file_utils.cpp @@ -1,5 +1,6 @@ // File: crn_file_utils.cpp // See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" #include "crn_file_utils.h" #include "crn_strutils.h" @@ -18,509 +19,664 @@ #include #endif -namespace crnlib { +namespace crnlib +{ #if CRNLIB_USE_WIN32_API -bool file_utils::is_read_only(const char* pFilename) { - uint32 dst_file_attribs = GetFileAttributesA(pFilename); - if (dst_file_attribs == INVALID_FILE_ATTRIBUTES) - return false; - if (dst_file_attribs & FILE_ATTRIBUTE_READONLY) - return true; - return false; -} - -bool file_utils::disable_read_only(const char* pFilename) { - uint32 dst_file_attribs = GetFileAttributesA(pFilename); - if (dst_file_attribs == INVALID_FILE_ATTRIBUTES) - return false; - if (dst_file_attribs & FILE_ATTRIBUTE_READONLY) { - dst_file_attribs &= ~FILE_ATTRIBUTE_READONLY; - if (SetFileAttributesA(pFilename, dst_file_attribs)) - return true; - } - return false; -} - -bool file_utils::is_older_than(const char* pSrcFilename, const char* pDstFilename) { - WIN32_FILE_ATTRIBUTE_DATA src_file_attribs; - const BOOL src_file_exists = GetFileAttributesExA(pSrcFilename, GetFileExInfoStandard, &src_file_attribs); - - WIN32_FILE_ATTRIBUTE_DATA dst_file_attribs; - const BOOL dest_file_exists = GetFileAttributesExA(pDstFilename, GetFileExInfoStandard, &dst_file_attribs); - - if ((dest_file_exists) && (src_file_exists)) { - LONG timeComp = CompareFileTime(&src_file_attribs.ftLastWriteTime, &dst_file_attribs.ftLastWriteTime); - if (timeComp < 0) - return true; - } - return false; -} - -bool file_utils::does_file_exist(const char* pFilename) { - const DWORD fullAttributes = GetFileAttributesA(pFilename); - - if (fullAttributes == INVALID_FILE_ATTRIBUTES) - return false; - - if (fullAttributes & FILE_ATTRIBUTE_DIRECTORY) - return false; - - return true; -} - -bool file_utils::does_dir_exist(const char* pDir) { - //-- Get the file attributes. - DWORD fullAttributes = GetFileAttributesA(pDir); - - if (fullAttributes == INVALID_FILE_ATTRIBUTES) - return false; - - if (fullAttributes & FILE_ATTRIBUTE_DIRECTORY) - return true; - - return false; -} - -bool file_utils::get_file_size(const char* pFilename, uint64& file_size) { - file_size = 0; - - WIN32_FILE_ATTRIBUTE_DATA attr; - - if (0 == GetFileAttributesExA(pFilename, GetFileExInfoStandard, &attr)) - return false; - - if (attr.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) - return false; - - file_size = static_cast(attr.nFileSizeLow) | (static_cast(attr.nFileSizeHigh) << 32U); - - return true; -} + bool file_utils::is_read_only(const char* pFilename) + { + uint32 dst_file_attribs = GetFileAttributesA(pFilename); + if (dst_file_attribs == INVALID_FILE_ATTRIBUTES) + { + return false; + } + if (dst_file_attribs & FILE_ATTRIBUTE_READONLY) + { + return true; + } + return false; + } + + bool file_utils::disable_read_only(const char* pFilename) + { + uint32 dst_file_attribs = GetFileAttributesA(pFilename); + if (dst_file_attribs == INVALID_FILE_ATTRIBUTES) + { + return false; + } + if (dst_file_attribs & FILE_ATTRIBUTE_READONLY) + { + dst_file_attribs &= ~FILE_ATTRIBUTE_READONLY; + if (SetFileAttributesA(pFilename, dst_file_attribs)) + { + return true; + } + } + return false; + } + + bool file_utils::is_older_than(const char* pSrcFilename, const char* pDstFilename) + { + WIN32_FILE_ATTRIBUTE_DATA src_file_attribs; + const BOOL src_file_exists = GetFileAttributesExA(pSrcFilename, GetFileExInfoStandard, &src_file_attribs); + + WIN32_FILE_ATTRIBUTE_DATA dst_file_attribs; + const BOOL dest_file_exists = GetFileAttributesExA(pDstFilename, GetFileExInfoStandard, &dst_file_attribs); + + if ((dest_file_exists) && (src_file_exists)) + { + LONG timeComp = CompareFileTime(&src_file_attribs.ftLastWriteTime, &dst_file_attribs.ftLastWriteTime); + if (timeComp < 0) + { + return true; + } + } + return false; + } + + bool file_utils::does_file_exist(const char* pFilename) { + const DWORD fullAttributes = GetFileAttributesA(pFilename); + + if (fullAttributes == INVALID_FILE_ATTRIBUTES) + return false; + + if (fullAttributes & FILE_ATTRIBUTE_DIRECTORY) + return false; + + return true; + } + + bool file_utils::does_dir_exist(const char* pDir) + { + //-- Get the file attributes. + DWORD fullAttributes = GetFileAttributesA(pDir); + + if (fullAttributes == INVALID_FILE_ATTRIBUTES) + { + return false; + } + + if (fullAttributes & FILE_ATTRIBUTE_DIRECTORY) + { + return true; + } + + return false; + } + + bool file_utils::get_file_size(const char* pFilename, uint64& file_size) { + file_size = 0; + + WIN32_FILE_ATTRIBUTE_DATA attr; + + if (0 == GetFileAttributesExA(pFilename, GetFileExInfoStandard, &attr)) + { + return false; + } + + if (attr.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + { + return false; + } + + file_size = static_cast(attr.nFileSizeLow) | (static_cast(attr.nFileSizeHigh) << 32U); + + return true; + } #elif defined(__GNUC__) -bool file_utils::is_read_only(const char* pFilename) { - pFilename; - // TODO - return false; -} - -bool file_utils::disable_read_only(const char* pFilename) { - pFilename; - // TODO - return false; -} - -bool file_utils::is_older_than(const char* pSrcFilename, const char* pDstFilename) { - pSrcFilename, pDstFilename; - // TODO - return false; -} - -bool file_utils::does_file_exist(const char* pFilename) { - struct stat stat_buf; - int result = stat(pFilename, &stat_buf); - if (result) - return false; - if (S_ISREG(stat_buf.st_mode)) - return true; - return false; -} - -bool file_utils::does_dir_exist(const char* pDir) { - struct stat stat_buf; - int result = stat(pDir, &stat_buf); - if (result) - return false; - if (S_ISDIR(stat_buf.st_mode) || S_ISLNK(stat_buf.st_mode)) - return true; - return false; -} - -bool file_utils::get_file_size(const char* pFilename, uint64& file_size) { - file_size = 0; - struct stat stat_buf; - int result = stat(pFilename, &stat_buf); - if (result) - return false; - if (!S_ISREG(stat_buf.st_mode)) - return false; - file_size = stat_buf.st_size; - return true; -} + bool file_utils::is_read_only(const char* pFilename) + { + pFilename; + // TODO + return false; + } + + bool file_utils::disable_read_only(const char* pFilename) + { + pFilename; + // TODO + return false; + } + + bool file_utils::is_older_than(const char* pSrcFilename, const char* pDstFilename) + { + pSrcFilename, pDstFilename; + // TODO + return false; + } + + bool file_utils::does_file_exist(const char* pFilename) + { + struct stat stat_buf; + int result = stat(pFilename, &stat_buf); + if (result) + { + return false; + } + if (S_ISREG(stat_buf.st_mode)) + { + return true; + } + return false; + } + + bool file_utils::does_dir_exist(const char* pDir) + { + struct stat stat_buf; + int result = stat(pDir, &stat_buf); + if (result) + { + return false; + } + if (S_ISDIR(stat_buf.st_mode) || S_ISLNK(stat_buf.st_mode)) + { + return true; + } + return false; + } + + bool file_utils::get_file_size(const char* pFilename, uint64& file_size) + { + file_size = 0; + struct stat stat_buf; + int result = stat(pFilename, &stat_buf); + if (result) + { + return false; + } + if (!S_ISREG(stat_buf.st_mode)) + { + return false; + } + file_size = stat_buf.st_size; + return true; + } #else -bool file_utils::is_read_only(const char* pFilename) { - return false; -} - -bool file_utils::disable_read_only(const char* pFilename) { - pFilename; - // TODO - return false; -} - -bool file_utils::is_older_than(const char* pSrcFilename, const char* pDstFilename) { - return false; -} - -bool file_utils::does_file_exist(const char* pFilename) { - FILE* pFile; - crn_fopen(&pFile, pFilename, "rb"); - if (!pFile) - return false; - fclose(pFile); - return true; -} - -bool file_utils::does_dir_exist(const char* pDir) { - return false; -} - -bool file_utils::get_file_size(const char* pFilename, uint64& file_size) { - FILE* pFile; - crn_fopen(&pFile, pFilename, "rb"); - if (!pFile) - return false; - crn_fseek(pFile, 0, SEEK_END); - file_size = crn_ftell(pFile); - fclose(pFile); - return true; -} -#endif + bool file_utils::is_read_only(const char* pFilename) + { + return false; + } + + bool file_utils::disable_read_only(const char* pFilename) + { + pFilename; + // TODO + return false; + } + + bool file_utils::is_older_than(const char* pSrcFilename, const char* pDstFilename) + { + return false; + } -bool file_utils::get_file_size(const char* pFilename, uint32& file_size) { - uint64 file_size64; - if (!get_file_size(pFilename, file_size64)) { - file_size = 0; - return false; - } + bool file_utils::does_file_exist(const char* pFilename) + { + FILE* pFile; + crn_fopen(&pFile, pFilename, "rb"); + if (!pFile) + { + return false; + } + fclose(pFile); + return true; + } - if (file_size64 > cUINT32_MAX) - file_size64 = cUINT32_MAX; + bool file_utils::does_dir_exist(const char* pDir) + { + return false; + } + + bool file_utils::get_file_size(const char* pFilename, uint64& file_size) + { + FILE* pFile; + crn_fopen(&pFile, pFilename, "rb"); + if (!pFile) + { + return false; + } + crn_fseek(pFile, 0, SEEK_END); + file_size = crn_ftell(pFile); + fclose(pFile); + return true; + } +#endif - file_size = static_cast(file_size64); - return true; -} + bool file_utils::get_file_size(const char* pFilename, uint32& file_size) + { + uint64 file_size64; + if (!get_file_size(pFilename, file_size64)) + { + file_size = 0; + return false; + } + + if (file_size64 > cUINT32_MAX) + { + file_size64 = cUINT32_MAX; + } + + file_size = static_cast(file_size64); + return true; + } -bool file_utils::is_path_separator(char c) { + bool file_utils::is_path_separator(char c) + { #ifdef WIN32 - return (c == '/') || (c == '\\'); + return (c == '/') || (c == '\\'); #else - return (c == '/'); + return (c == '/'); #endif -} + } -bool file_utils::is_path_or_drive_separator(char c) { + bool file_utils::is_path_or_drive_separator(char c) + { #ifdef WIN32 - return (c == '/') || (c == '\\') || (c == ':'); + return (c == '/') || (c == '\\') || (c == ':'); #else - return (c == '/'); + return (c == '/'); #endif -} + } -bool file_utils::is_drive_separator(char c) { + bool file_utils::is_drive_separator(char c) + { #ifdef WIN32 - return (c == ':'); + return (c == ':'); #else - c; - return false; + c; + return false; #endif -} + } -bool file_utils::split_path(const char* p, dynamic_string* pDrive, dynamic_string* pDir, dynamic_string* pFilename, dynamic_string* pExt) { - CRNLIB_ASSERT(p); + bool file_utils::split_path(const char* p, dynamic_string* pDrive, dynamic_string* pDir, dynamic_string* pFilename, dynamic_string* pExt) + { + CRNLIB_ASSERT(p); #ifdef WIN32 - char drive_buf[_MAX_DRIVE]; - char dir_buf[_MAX_DIR]; - char fname_buf[_MAX_FNAME]; - char ext_buf[_MAX_EXT]; + char drive_buf[_MAX_DRIVE]; + char dir_buf[_MAX_DIR]; + char fname_buf[_MAX_FNAME]; + char ext_buf[_MAX_EXT]; #ifdef _MSC_VER - // Compiling with MSVC - errno_t error = _splitpath_s(p, - pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0, - pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0, - pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0, - pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0); - if (error != 0) - return false; + // Compiling with MSVC + errno_t error = _splitpath_s(p, + pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0, + pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0, + pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0, + pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0); + if (error != 0) + { + return false; + } #else - // Compiling with MinGW - _splitpath(p, - pDrive ? drive_buf : NULL, - pDir ? dir_buf : NULL, - pFilename ? fname_buf : NULL, - pExt ? ext_buf : NULL); + // Compiling with MinGW + _splitpath(p, + pDrive ? drive_buf : NULL, + pDir ? dir_buf : NULL, + pFilename ? fname_buf : NULL, + pExt ? ext_buf : NULL); #endif - if (pDrive) - *pDrive = drive_buf; - if (pDir) - *pDir = dir_buf; - if (pFilename) - *pFilename = fname_buf; - if (pExt) - *pExt = ext_buf; + if (pDrive) + { + *pDrive = drive_buf; + } + if (pDir) + { + *pDir = dir_buf; + } + if (pFilename) + { + *pFilename = fname_buf; + } + if (pExt) + { + *pExt = ext_buf; + } #else - char dirtmp[1024]; - char nametmp[1024]; - strcpy_safe(dirtmp, sizeof(dirtmp), p); - strcpy_safe(nametmp, sizeof(nametmp), p); - - if (pDrive) - pDrive->clear(); - - const char* pDirName = dirname(dirtmp); - if (!pDirName) - return false; - - if (pDir) { - pDir->set(pDirName); - if ((!pDir->is_empty()) && (pDir->back() != '/')) - pDir->append_char('/'); - } - - const char* pBaseName = basename(nametmp); - if (!pBaseName) - return false; - - if (pFilename) { - pFilename->set(pBaseName); - remove_extension(*pFilename); - } - - if (pExt) { - pExt->set(pBaseName); - get_extension(*pExt); - *pExt = "." + *pExt; - } + char dirtmp[1024]; + char nametmp[1024]; + strcpy_safe(dirtmp, sizeof(dirtmp), p); + strcpy_safe(nametmp, sizeof(nametmp), p); + + if (pDrive) + { + pDrive->clear(); + } + + const char* pDirName = dirname(dirtmp); + if (!pDirName) + { + return false; + } + + if (pDir) + { + pDir->set(pDirName); + if ((!pDir->is_empty()) && (pDir->back() != '/')) + { + pDir->append_char('/'); + } + } + + const char* pBaseName = basename(nametmp); + if (!pBaseName) + { + return false; + } + + if (pFilename) + { + pFilename->set(pBaseName); + remove_extension(*pFilename); + } + + if (pExt) + { + pExt->set(pBaseName); + get_extension(*pExt); + *pExt = "." + *pExt; + } #endif // #ifdef WIN32 - return true; -} - -bool file_utils::split_path(const char* p, dynamic_string& path, dynamic_string& filename) { - dynamic_string temp_drive, temp_path, temp_ext; - if (!split_path(p, &temp_drive, &temp_path, &filename, &temp_ext)) - return false; - - filename += temp_ext; - - combine_path(path, temp_drive.get_ptr(), temp_path.get_ptr()); - return true; -} - -bool file_utils::get_pathname(const char* p, dynamic_string& path) { - dynamic_string temp_drive, temp_path; - if (!split_path(p, &temp_drive, &temp_path, NULL, NULL)) - return false; - - combine_path(path, temp_drive.get_ptr(), temp_path.get_ptr()); - return true; -} - -bool file_utils::get_filename(const char* p, dynamic_string& filename) { - dynamic_string temp_ext; - if (!split_path(p, NULL, NULL, &filename, &temp_ext)) - return false; - - filename += temp_ext; - return true; -} - -void file_utils::combine_path(dynamic_string& dst, const char* pA, const char* pB) { - dynamic_string temp(pA); - if ((!temp.is_empty()) && (!is_path_separator(pB[0]))) { - char c = temp[temp.get_len() - 1]; - if (!is_path_separator(c)) - temp.append_char(CRNLIB_PATH_SEPERATOR_CHAR); - } - temp += pB; - dst.swap(temp); -} - -void file_utils::combine_path(dynamic_string& dst, const char* pA, const char* pB, const char* pC) { - combine_path(dst, pA, pB); - combine_path(dst, dst.get_ptr(), pC); -} - -bool file_utils::full_path(dynamic_string& path) { + return true; + } + + bool file_utils::split_path(const char* p, dynamic_string& path, dynamic_string& filename) + { + dynamic_string temp_drive, temp_path, temp_ext; + if (!split_path(p, &temp_drive, &temp_path, &filename, &temp_ext)) + { + return false; + } + + filename += temp_ext; + + combine_path(path, temp_drive.get_ptr(), temp_path.get_ptr()); + return true; + } + + bool file_utils::get_pathname(const char* p, dynamic_string& path) + { + dynamic_string temp_drive, temp_path; + if (!split_path(p, &temp_drive, &temp_path, NULL, NULL)) + { + return false; + } + + combine_path(path, temp_drive.get_ptr(), temp_path.get_ptr()); + return true; + } + + bool file_utils::get_filename(const char* p, dynamic_string& filename) + { + dynamic_string temp_ext; + if (!split_path(p, NULL, NULL, &filename, &temp_ext)) + { + return false; + } + + filename += temp_ext; + return true; + } + + void file_utils::combine_path(dynamic_string& dst, const char* pA, const char* pB) + { + dynamic_string temp(pA); + if ((!temp.is_empty()) && (!is_path_separator(pB[0]))) + { + char c = temp[temp.get_len() - 1]; + if (!is_path_separator(c)) + { + temp.append_char(CRNLIB_PATH_SEPERATOR_CHAR); + } + } + temp += pB; + dst.swap(temp); + } + + void file_utils::combine_path(dynamic_string& dst, const char* pA, const char* pB, const char* pC) + { + combine_path(dst, pA, pB); + combine_path(dst, dst.get_ptr(), pC); + } + + bool file_utils::full_path(dynamic_string& path) { #ifdef WIN32 - char buf[1024]; - char* p = _fullpath(buf, path.get_ptr(), sizeof(buf)); - if (!p) - return false; + char buf[1024]; + char* p = _fullpath(buf, path.get_ptr(), sizeof(buf)); + if (!p) + { + return false; + } #else - char buf[PATH_MAX]; - char* p; - dynamic_string pn, fn; - split_path(path.get_ptr(), pn, fn); - if ((fn == ".") || (fn == "..")) { - p = realpath(path.get_ptr(), buf); - if (!p) - return false; - path.set(buf); - } else { - if (pn.is_empty()) - pn = "./"; - p = realpath(pn.get_ptr(), buf); - if (!p) - return false; - combine_path(path, buf, fn.get_ptr()); - } + char buf[PATH_MAX]; + char* p; + dynamic_string pn, fn; + split_path(path.get_ptr(), pn, fn); + if ((fn == ".") || (fn == "..")) + { + p = realpath(path.get_ptr(), buf); + if (!p) + { + return false; + } + path.set(buf); + } + else + { + if (pn.is_empty()) + { + pn = "./"; + } + p = realpath(pn.get_ptr(), buf); + if (!p) + { + return false; + } + combine_path(path, buf, fn.get_ptr()); + } #endif - return true; -} + return true; + } -bool file_utils::get_extension(dynamic_string& filename) { - int sep = -1; + bool file_utils::get_extension(dynamic_string& filename) + { + int sep = -1; #ifdef WIN32 - sep = filename.find_right('\\'); + sep = filename.find_right('\\'); #endif - if (sep < 0) - sep = filename.find_right('/'); + if (sep < 0) + { + sep = filename.find_right('/'); + } - int dot = filename.find_right('.'); - if (dot < sep) { - filename.clear(); - return false; - } + int dot = filename.find_right('.'); + if (dot < sep) + { + filename.clear(); + return false; + } - filename.right(dot + 1); + filename.right(dot + 1); - return true; -} + return true; + } -bool file_utils::remove_extension(dynamic_string& filename) { - int sep = -1; + bool file_utils::remove_extension(dynamic_string& filename) + { + int sep = -1; #ifdef WIN32 - sep = filename.find_right('\\'); + sep = filename.find_right('\\'); #endif - if (sep < 0) - sep = filename.find_right('/'); + if (sep < 0) + { + sep = filename.find_right('/'); + } - int dot = filename.find_right('.'); - if (dot < sep) - return false; + int dot = filename.find_right('.'); + if (dot < sep) + { + return false; + } - filename.left(dot); + filename.left(dot); - return true; -} + return true; + } -bool file_utils::create_path(const dynamic_string& fullpath) { + bool file_utils::create_path(const dynamic_string& fullpath) + { #ifdef WIN32 - bool got_unc = false; + bool got_unc = false; #endif - dynamic_string cur_path; + dynamic_string cur_path; - const int l = fullpath.get_len(); + const int l = fullpath.get_len(); - int n = 0; - while (n < l) { - const char c = fullpath.get_ptr()[n]; + int n = 0; + while (n < l) + { + const char c = fullpath.get_ptr()[n]; - const bool sep = is_path_separator(c); - const bool back_sep = is_path_separator(cur_path.back()); - const bool is_last_char = (n == (l - 1)); + const bool sep = is_path_separator(c); + const bool back_sep = is_path_separator(cur_path.back()); + const bool is_last_char = (n == (l - 1)); - if (((sep) && (!back_sep)) || (is_last_char)) { - if ((is_last_char) && (!sep)) - cur_path.append_char(c); + if (((sep) && (!back_sep)) || (is_last_char)) + { + if ((is_last_char) && (!sep)) + { + cur_path.append_char(c); + } - bool valid = !cur_path.is_empty(); + bool valid = !cur_path.is_empty(); #ifdef WIN32 - // reject obvious stuff (drives, beginning of UNC paths): - // c:\b\cool - // \\machine\blah - // \cool\blah - if ((cur_path.get_len() == 2) && (cur_path[1] == ':')) - valid = false; - else if ((cur_path.get_len() >= 2) && (cur_path[0] == '\\') && (cur_path[1] == '\\')) { - if (!got_unc) - valid = false; - got_unc = true; - } else if (cur_path == "\\") - valid = false; + // reject obvious stuff (drives, beginning of UNC paths): + // c:\b\cool + // \\machine\blah + // \cool\blah + if ((cur_path.get_len() == 2) && (cur_path[1] == ':')) + { + valid = false; + } + + else if ((cur_path.get_len() >= 2) && (cur_path[0] == '\\') && (cur_path[1] == '\\')) + { + if (!got_unc) + { + valid = false; + } + got_unc = true; + } + else if (cur_path == "\\") + { + valid = false; + } #endif - if (cur_path == "/") - valid = false; + if (cur_path == "/") + { + valid = false; + } - if ((valid) && (cur_path.get_len())) { + if ((valid) && (cur_path.get_len())) + { #ifdef WIN32 - _mkdir(cur_path.get_ptr()); + _mkdir(cur_path.get_ptr()); #else - mkdir(cur_path.get_ptr(), S_IRWXU | S_IRWXG | S_IRWXO); + mkdir(cur_path.get_ptr(), S_IRWXU | S_IRWXG | S_IRWXO); #endif - } - } + } + } - cur_path.append_char(c); + cur_path.append_char(c); - n++; - } + n++; + } - return true; -} - -void file_utils::trim_trailing_seperator(dynamic_string& path) { - if ((path.get_len()) && (is_path_separator(path.back()))) - path.truncate(path.get_len() - 1); -} - -// See http://www.codeproject.com/KB/string/wildcmp.aspx -int file_utils::wildcmp(const char* pWild, const char* pString) { - const char *cp = NULL, *mp = NULL; - - while ((*pString) && (*pWild != '*')) { - if ((*pWild != *pString) && (*pWild != '?')) - return 0; - pWild++; - pString++; - } - - // Either *pString=='\0' or *pWild='*' here. - - while (*pString) { - if (*pWild == '*') { - if (!*++pWild) - return 1; - mp = pWild; - cp = pString + 1; - } else if ((*pWild == *pString) || (*pWild == '?')) { - pWild++; - pString++; - } else { - pWild = mp; - pString = cp++; + return true; } - } - while (*pWild == '*') - pWild++; + void file_utils::trim_trailing_seperator(dynamic_string& path) + { + if ((path.get_len()) && (is_path_separator(path.back()))) + { + path.truncate(path.get_len() - 1); + } + } - return !*pWild; -} + // See http://www.codeproject.com/KB/string/wildcmp.aspx + int file_utils::wildcmp(const char* pWild, const char* pString) + { + const char* cp = NULL, * mp = NULL; + + while ((*pString) && (*pWild != '*')) + { + if ((*pWild != *pString) && (*pWild != '?')) + { + return 0; + } + pWild++; + pString++; + } + + // Either *pString=='\0' or *pWild='*' here. + + while (*pString) + { + if (*pWild == '*') + { + if (!*++pWild) + { + return 1; + } + mp = pWild; + cp = pString + 1; + } + else if ((*pWild == *pString) || (*pWild == '?')) + { + pWild++; + pString++; + } + else + { + pWild = mp; + pString = cp++; + } + } + + while (*pWild == '*') + { + pWild++; + } + + return !*pWild; + } -bool file_utils::write_buf_to_file(const char* pPath, const void* pData, size_t data_size) { - FILE* pFile = NULL; + bool file_utils::write_buf_to_file(const char* pPath, const void* pData, size_t data_size) + { + FILE* pFile = NULL; -#ifdef _MSC_VER - // Compiling with MSVC - if (fopen_s(&pFile, pPath, "wb")) - return false; +#if defined(CRN_CC_MSVC) + // Compiling with MSVC + if (fopen_s(&pFile, pPath, "wb")) + { + return false; + } #else - pFile = fopen(pPath, "wb"); + pFile = fopen(pPath, "wb"); #endif - if (!pFile) - return false; + if (!pFile) + { + return false; + } - bool success = fwrite(pData, 1, data_size, pFile) == data_size; + bool success = fwrite(pData, 1, data_size, pFile) == data_size; - fclose(pFile); + fclose(pFile); - return success; -} + return success; + } } // namespace crnlib diff --git a/crnlib/crn_file_utils.h b/crnlib/crn_file_utils.h index a6ac874..ca02b04 100644 --- a/crnlib/crn_file_utils.h +++ b/crnlib/crn_file_utils.h @@ -4,40 +4,41 @@ #include "crn_export.h" -namespace crnlib { -struct CRN_EXPORT file_utils { - // Returns true if pSrcFilename is older than pDstFilename - static bool is_read_only(const char* pFilename); - static bool disable_read_only(const char* pFilename); - static bool is_older_than(const char* pSrcFilename, const char* pDstFilename); - static bool does_file_exist(const char* pFilename); - static bool does_dir_exist(const char* pDir); - static bool get_file_size(const char* pFilename, uint64& file_size); - static bool get_file_size(const char* pFilename, uint32& file_size); - - static bool is_path_separator(char c); - static bool is_path_or_drive_separator(char c); - static bool is_drive_separator(char c); - - static bool split_path(const char* p, dynamic_string* pDrive, dynamic_string* pDir, dynamic_string* pFilename, dynamic_string* pExt); - static bool split_path(const char* p, dynamic_string& path, dynamic_string& filename); - - static bool get_pathname(const char* p, dynamic_string& path); - static bool get_filename(const char* p, dynamic_string& filename); - - static void combine_path(dynamic_string& dst, const char* pA, const char* pB); - static void combine_path(dynamic_string& dst, const char* pA, const char* pB, const char* pC); - - static bool full_path(dynamic_string& path); - static bool get_extension(dynamic_string& filename); - static bool remove_extension(dynamic_string& filename); - static bool create_path(const dynamic_string& path); - static void trim_trailing_seperator(dynamic_string& path); - - static int wildcmp(const char* pWild, const char* pString); - - static bool write_buf_to_file(const char* pPath, const void* pData, size_t data_size); - -}; // struct file_utils - +namespace crnlib +{ + struct CRN_EXPORT file_utils + { + // Returns true if pSrcFilename is older than pDstFilename + static bool is_read_only(const char* pFilename); + static bool disable_read_only(const char* pFilename); + static bool is_older_than(const char* pSrcFilename, const char* pDstFilename); + static bool does_file_exist(const char* pFilename); + static bool does_dir_exist(const char* pDir); + static bool get_file_size(const char* pFilename, uint64& file_size); + static bool get_file_size(const char* pFilename, uint32& file_size); + + static bool is_path_separator(char c); + static bool is_path_or_drive_separator(char c); + static bool is_drive_separator(char c); + + static bool split_path(const char* p, dynamic_string* pDrive, dynamic_string* pDir, dynamic_string* pFilename, dynamic_string* pExt); + static bool split_path(const char* p, dynamic_string& path, dynamic_string& filename); + + static bool get_pathname(const char* p, dynamic_string& path); + static bool get_filename(const char* p, dynamic_string& filename); + + static void combine_path(dynamic_string& dst, const char* pA, const char* pB); + static void combine_path(dynamic_string& dst, const char* pA, const char* pB, const char* pC); + + static bool full_path(dynamic_string& path); + static bool get_extension(dynamic_string& filename); + static bool remove_extension(dynamic_string& filename); + static bool create_path(const dynamic_string& path); + static void trim_trailing_seperator(dynamic_string& path); + + static int wildcmp(const char* pWild, const char* pString); + + static bool write_buf_to_file(const char* pPath, const void* pData, size_t data_size); + + }; // struct file_utils } // namespace crnlib diff --git a/crnlib/crn_find_files.cpp b/crnlib/crn_find_files.cpp index 47d11b5..09bb972 100644 --- a/crnlib/crn_find_files.cpp +++ b/crnlib/crn_find_files.cpp @@ -13,239 +13,333 @@ #include #endif -namespace crnlib { +namespace crnlib +{ #ifdef CRNLIB_USE_WIN32_API -bool find_files::find(const char* pBasepath, const char* pFilespec, uint flags) { - m_last_error = S_OK; - m_files.resize(0); + bool find_files::find(const char* pBasepath, const char* pFilespec, uint flags) + { + m_last_error = S_OK; + m_files.resize(0); - return find_internal(pBasepath, "", pFilespec, flags, 0); -} - -bool find_files::find(const char* pSpec, uint flags) { - dynamic_string find_name(pSpec); - - if (!file_utils::full_path(find_name)) - return false; - - dynamic_string find_pathname, find_filename; - if (!file_utils::split_path(find_name.get_ptr(), find_pathname, find_filename)) - return false; - - return find(find_pathname.get_ptr(), find_filename.get_ptr(), flags); -} - -bool find_files::find_internal(const char* pBasepath, const char* pRelpath, const char* pFilespec, uint flags, int level) { - WIN32_FIND_DATAA find_data; - - dynamic_string filename; + return find_internal(pBasepath, "", pFilespec, flags, 0); + } - dynamic_string_array child_paths; - if (flags & cFlagRecursive) { - if (strlen(pRelpath)) - file_utils::combine_path(filename, pBasepath, pRelpath, "*"); - else - file_utils::combine_path(filename, pBasepath, "*"); + bool find_files::find(const char* pSpec, uint flags) + { + dynamic_string find_name(pSpec); - HANDLE handle = FindFirstFileA(filename.get_ptr(), &find_data); - if (handle == INVALID_HANDLE_VALUE) { - HRESULT hres = GetLastError(); - if ((level == 0) && (hres != NO_ERROR) && (hres != ERROR_FILE_NOT_FOUND)) { - m_last_error = hres; - return false; - } - } else { - do { - const bool is_dir = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; + if (!file_utils::full_path(find_name)) + { + return false; + } - bool skip = !is_dir; - if (is_dir) - skip = (strcmp(find_data.cFileName, ".") == 0) || (strcmp(find_data.cFileName, "..") == 0); + dynamic_string find_pathname, find_filename; + if (!file_utils::split_path(find_name.get_ptr(), find_pathname, find_filename)) + { + return false; + } - if (find_data.dwFileAttributes & (FILE_ATTRIBUTE_SYSTEM | FILE_ATTRIBUTE_TEMPORARY)) - skip = true; + return find(find_pathname.get_ptr(), find_filename.get_ptr(), flags); + } - if (find_data.dwFileAttributes & FILE_ATTRIBUTE_HIDDEN) { - if ((flags & cFlagAllowHidden) == 0) - skip = true; + bool find_files::find_internal(const char* pBasepath, const char* pRelpath, const char* pFilespec, uint flags, int level) + { + WIN32_FIND_DATAA find_data; + + dynamic_string filename; + + dynamic_string_array child_paths; + if (flags & cFlagRecursive) + { + if (strlen(pRelpath)) + { + file_utils::combine_path(filename, pBasepath, pRelpath, "*"); + } + else + { + file_utils::combine_path(filename, pBasepath, "*"); + } + + HANDLE handle = FindFirstFileA(filename.get_ptr(), &find_data); + if (handle == INVALID_HANDLE_VALUE) + { + HRESULT hres = GetLastError(); + if ((level == 0) && (hres != NO_ERROR) && (hres != ERROR_FILE_NOT_FOUND)) + { + m_last_error = hres; + return false; + } + } + else + { + do + { + const bool is_dir = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; + + bool skip = !is_dir; + if (is_dir) + { + skip = (strcmp(find_data.cFileName, ".") == 0) || (strcmp(find_data.cFileName, "..") == 0); + } + + if (find_data.dwFileAttributes & (FILE_ATTRIBUTE_SYSTEM | FILE_ATTRIBUTE_TEMPORARY)) + { + skip = true; + } + + if (find_data.dwFileAttributes & FILE_ATTRIBUTE_HIDDEN) + { + if ((flags & cFlagAllowHidden) == 0) + { + skip = true; + } + } + + if (!skip) + { + dynamic_string child_path(find_data.cFileName); + if ((!child_path.count_char('?')) && (!child_path.count_char('*'))) + { + child_paths.push_back(child_path); + } + } + + } + while (FindNextFileA(handle, &find_data) != 0); + + HRESULT hres = GetLastError(); + + FindClose(handle); + handle = INVALID_HANDLE_VALUE; + + if (hres != ERROR_NO_MORE_FILES) + { + m_last_error = hres; + return false; + } + } } - if (!skip) { - dynamic_string child_path(find_data.cFileName); - if ((!child_path.count_char('?')) && (!child_path.count_char('*'))) - child_paths.push_back(child_path); + if (strlen(pRelpath)) + { + file_utils::combine_path(filename, pBasepath, pRelpath, pFilespec); + } + else + { + file_utils::combine_path(filename, pBasepath, pFilespec); } - } while (FindNextFileA(handle, &find_data) != 0); - - HRESULT hres = GetLastError(); + HANDLE handle = FindFirstFileA(filename.get_ptr(), &find_data); + if (handle == INVALID_HANDLE_VALUE) + { + HRESULT hres = GetLastError(); + if ((level == 0) && (hres != NO_ERROR) && (hres != ERROR_FILE_NOT_FOUND)) + { + m_last_error = hres; + return false; + } + } + else + { + do + { + const bool is_dir = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; + + bool skip = false; + if (is_dir) + { + skip = (strcmp(find_data.cFileName, ".") == 0) || (strcmp(find_data.cFileName, "..") == 0); + } + + if (find_data.dwFileAttributes & (FILE_ATTRIBUTE_SYSTEM | FILE_ATTRIBUTE_TEMPORARY)) + { + skip = true; + } + + if (find_data.dwFileAttributes & FILE_ATTRIBUTE_HIDDEN) + { + if ((flags & cFlagAllowHidden) == 0) + { + skip = true; + } + } + + if (!skip) + { + if (((is_dir) && (flags & cFlagAllowDirs)) || ((!is_dir) && (flags & cFlagAllowFiles))) + { + m_files.resize(m_files.size() + 1); + file_desc& file = m_files.back(); + file.m_is_dir = is_dir; + file.m_base = pBasepath; + file.m_name = find_data.cFileName; + file.m_rel = pRelpath; + if (strlen(pRelpath)) + { + file_utils::combine_path(file.m_fullname, pBasepath, pRelpath, find_data.cFileName); + } + else + { + file_utils::combine_path(file.m_fullname, pBasepath, find_data.cFileName); + } + } + } + + } + while (FindNextFileA(handle, &find_data) != 0); + + HRESULT hres = GetLastError(); + + FindClose(handle); + + if (hres != ERROR_NO_MORE_FILES) + { + m_last_error = hres; + return false; + } + } - FindClose(handle); - handle = INVALID_HANDLE_VALUE; + for (uint i = 0; i < child_paths.size(); i++) + { + dynamic_string child_path; + if (strlen(pRelpath)) + { + file_utils::combine_path(child_path, pRelpath, child_paths[i].get_ptr()); + } + else + { + child_path = child_paths[i]; + } + + if (!find_internal(pBasepath, child_path.get_ptr(), pFilespec, flags, level + 1)) + { + return false; + } + } - if (hres != ERROR_NO_MORE_FILES) { - m_last_error = hres; - return false; - } + return true; } - } - - if (strlen(pRelpath)) - file_utils::combine_path(filename, pBasepath, pRelpath, pFilespec); - else - file_utils::combine_path(filename, pBasepath, pFilespec); - - HANDLE handle = FindFirstFileA(filename.get_ptr(), &find_data); - if (handle == INVALID_HANDLE_VALUE) { - HRESULT hres = GetLastError(); - if ((level == 0) && (hres != NO_ERROR) && (hres != ERROR_FILE_NOT_FOUND)) { - m_last_error = hres; - return false; +#elif defined(CRN_CC_GNU) + bool find_files::find(const char* pBasepath, const char* pFilespec, uint flags) + { + m_files.resize(0); + return find_internal(pBasepath, "", pFilespec, flags, 0); } - } else { - do { - const bool is_dir = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; - - bool skip = false; - if (is_dir) - skip = (strcmp(find_data.cFileName, ".") == 0) || (strcmp(find_data.cFileName, "..") == 0); - - if (find_data.dwFileAttributes & (FILE_ATTRIBUTE_SYSTEM | FILE_ATTRIBUTE_TEMPORARY)) - skip = true; - - if (find_data.dwFileAttributes & FILE_ATTRIBUTE_HIDDEN) { - if ((flags & cFlagAllowHidden) == 0) - skip = true; - } - - if (!skip) { - if (((is_dir) && (flags & cFlagAllowDirs)) || ((!is_dir) && (flags & cFlagAllowFiles))) { - m_files.resize(m_files.size() + 1); - file_desc& file = m_files.back(); - file.m_is_dir = is_dir; - file.m_base = pBasepath; - file.m_name = find_data.cFileName; - file.m_rel = pRelpath; - if (strlen(pRelpath)) - file_utils::combine_path(file.m_fullname, pBasepath, pRelpath, find_data.cFileName); - else - file_utils::combine_path(file.m_fullname, pBasepath, find_data.cFileName); - } - } - - } while (FindNextFileA(handle, &find_data) != 0); - - HRESULT hres = GetLastError(); - - FindClose(handle); - - if (hres != ERROR_NO_MORE_FILES) { - m_last_error = hres; - return false; - } - } - - for (uint i = 0; i < child_paths.size(); i++) { - dynamic_string child_path; - if (strlen(pRelpath)) - file_utils::combine_path(child_path, pRelpath, child_paths[i].get_ptr()); - else - child_path = child_paths[i]; - - if (!find_internal(pBasepath, child_path.get_ptr(), pFilespec, flags, level + 1)) - return false; - } - - return true; -} -#elif defined(__GNUC__) -bool find_files::find(const char* pBasepath, const char* pFilespec, uint flags) { - m_files.resize(0); - return find_internal(pBasepath, "", pFilespec, flags, 0); -} - -bool find_files::find(const char* pSpec, uint flags) { - dynamic_string find_name(pSpec); - - if (!file_utils::full_path(find_name)) - return false; - - dynamic_string find_pathname, find_filename; - if (!file_utils::split_path(find_name.get_ptr(), find_pathname, find_filename)) - return false; - - return find(find_pathname.get_ptr(), find_filename.get_ptr(), flags); -} -bool find_files::find_internal(const char* pBasepath, const char* pRelpath, const char* pFilespec, uint flags, int level) { - dynamic_string pathname; - if (strlen(pRelpath)) - file_utils::combine_path(pathname, pBasepath, pRelpath); - else - pathname = pBasepath; + bool find_files::find(const char* pSpec, uint flags) + { + dynamic_string find_name(pSpec); - if (!pathname.is_empty()) { - char c = pathname.back(); - if (c != '/') - pathname += "/"; - } - - DIR* dp = opendir(pathname.get_ptr()); - - if (!dp) - return level ? true : false; + if (!file_utils::full_path(find_name)) + { + return false; + } - dynamic_string_array paths; + dynamic_string find_pathname, find_filename; + if (!file_utils::split_path(find_name.get_ptr(), find_pathname, find_filename)) + { + return false; + } - for (;;) { - struct dirent* ep = readdir(dp); - if (!ep) - break; - if ((strcmp(ep->d_name, ".") == 0) || (strcmp(ep->d_name, "..") == 0)) - continue; + return find(find_pathname.get_ptr(), find_filename.get_ptr(), flags); + } - const bool is_directory = (ep->d_type & DT_DIR) != 0; - const bool is_file = (ep->d_type & DT_REG) != 0; + bool find_files::find_internal(const char* pBasepath, const char* pRelpath, const char* pFilespec, uint flags, int level) + { + dynamic_string pathname; + if (strlen(pRelpath)) + { + file_utils::combine_path(pathname, pBasepath, pRelpath); + } + else + { + pathname = pBasepath; + } - dynamic_string filename(ep->d_name); + if (!pathname.is_empty()) + { + char c = pathname.back(); + if (c != '/') + { + pathname += "/"; + } + } - if (is_directory) { - if (flags & cFlagRecursive) { - paths.push_back(filename); - } - } + DIR* dp = opendir(pathname.get_ptr()); - if (((is_file) && (flags & cFlagAllowFiles)) || ((is_directory) && (flags & cFlagAllowDirs))) { - if (0 == fnmatch(pFilespec, filename.get_ptr(), 0)) { - m_files.resize(m_files.size() + 1); - file_desc& file = m_files.back(); - file.m_is_dir = is_directory; - file.m_base = pBasepath; - file.m_rel = pRelpath; - file.m_name = filename; - file.m_fullname = pathname + filename; - } - } - } + if (!dp) + { + return level ? true : false; + } - closedir(dp); - dp = NULL; + dynamic_string_array paths; + + for (;;) + { + struct dirent* ep = readdir(dp); + if (!ep) + { + break; + } + if ((strcmp(ep->d_name, ".") == 0) || (strcmp(ep->d_name, "..") == 0)) + { + continue; + } + + const bool is_directory = (ep->d_type & DT_DIR) != 0; + const bool is_file = (ep->d_type & DT_REG) != 0; + + dynamic_string filename(ep->d_name); + + if (is_directory) + { + if (flags & cFlagRecursive) + { + paths.push_back(filename); + } + } + + if (((is_file) && (flags & cFlagAllowFiles)) || ((is_directory) && (flags & cFlagAllowDirs))) + { + if (0 == fnmatch(pFilespec, filename.get_ptr(), 0)) + { + m_files.resize(m_files.size() + 1); + file_desc& file = m_files.back(); + file.m_is_dir = is_directory; + file.m_base = pBasepath; + file.m_rel = pRelpath; + file.m_name = filename; + file.m_fullname = pathname + filename; + } + } + } - if (flags & cFlagRecursive) { - for (uint i = 0; i < paths.size(); i++) { - dynamic_string childpath; - if (strlen(pRelpath)) - file_utils::combine_path(childpath, pRelpath, paths[i].get_ptr()); - else - childpath = paths[i]; + closedir(dp); + dp = NULL; + + if (flags & cFlagRecursive) + { + for (uint i = 0; i < paths.size(); i++) + { + dynamic_string childpath; + if (strlen(pRelpath)) + { + file_utils::combine_path(childpath, pRelpath, paths[i].get_ptr()); + } + else + { + childpath = paths[i]; + } + + if (!find_internal(pBasepath, childpath.get_ptr(), pFilespec, flags, level + 1)) + { + return false; + } + } + } - if (!find_internal(pBasepath, childpath.get_ptr(), pFilespec, flags, level + 1)) - return false; + return true; } - } - - return true; -} #else #error Unimplemented #endif diff --git a/crnlib/crn_find_files.h b/crnlib/crn_find_files.h index 158c87f..4cab3c6 100644 --- a/crnlib/crn_find_files.h +++ b/crnlib/crn_find_files.h @@ -1,58 +1,81 @@ // File: crn_win32_find_files.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once #include "crn_export.h" -namespace crnlib { -class CRN_EXPORT find_files { - public: - struct file_desc { - inline file_desc() - : m_is_dir(false) {} - - dynamic_string m_fullname; - dynamic_string m_base; - dynamic_string m_rel; - dynamic_string m_name; - bool m_is_dir; - - inline bool operator==(const file_desc& other) const { return m_fullname == other.m_fullname; } - inline bool operator<(const file_desc& other) const { return m_fullname < other.m_fullname; } - - inline operator size_t() const { return static_cast(m_fullname); } - }; - - typedef crnlib::vector file_desc_vec; - - inline find_files() { - m_last_error = 0; // S_OK; - } - - enum flags { - cFlagRecursive = 1, - cFlagAllowDirs = 2, - cFlagAllowFiles = 4, - cFlagAllowHidden = 8 - }; - - bool find(const char* pBasepath, const char* pFilespec, uint flags = cFlagAllowFiles); - - bool find(const char* pSpec, uint flags = cFlagAllowFiles); - - // An HRESULT under Win32. FIXME: Abstract this better? - inline int64 get_last_error() const { return m_last_error; } - - const file_desc_vec& get_files() const { return m_files; } - - private: - file_desc_vec m_files; - - // A HRESULT under Win32 - int64 m_last_error; - - bool find_internal(const char* pBasepath, const char* pRelpath, const char* pFilespec, uint flags, int level); - -}; // class find_files +namespace crnlib +{ + class CRN_EXPORT find_files + { + public: + struct file_desc + { + inline file_desc(): + m_is_dir(false) + { + } + + dynamic_string m_fullname; + dynamic_string m_base; + dynamic_string m_rel; + dynamic_string m_name; + bool m_is_dir; + + inline bool operator==(const file_desc& other) const + { + return m_fullname == other.m_fullname; + } + inline bool operator<(const file_desc& other) const + { + return m_fullname < other.m_fullname; + } + + inline operator size_t() const + { + return static_cast(m_fullname); + } + }; + + typedef crnlib::vector file_desc_vec; + + inline find_files() + { + m_last_error = 0; // S_OK; + } + + enum flags + { + cFlagRecursive = 1, + cFlagAllowDirs = 2, + cFlagAllowFiles = 4, + cFlagAllowHidden = 8 + }; + + bool find(const char* pBasepath, const char* pFilespec, uint flags = cFlagAllowFiles); + + bool find(const char* pSpec, uint flags = cFlagAllowFiles); + + // An HRESULT under Win32. FIXME: Abstract this better? + inline int64 get_last_error() const + { + return m_last_error; + } + + const file_desc_vec& get_files() const + { + return m_files; + } + + private: + file_desc_vec m_files; + + // A HRESULT under Win32 + int64 m_last_error; + + bool find_internal(const char* pBasepath, const char* pRelpath, const char* pFilespec, uint flags, int level); + + }; // class find_files } // namespace crnlib diff --git a/crnlib/crn_hash.cpp b/crnlib/crn_hash.cpp index 0d5b38a..aa6e6e3 100644 --- a/crnlib/crn_hash.cpp +++ b/crnlib/crn_hash.cpp @@ -2,6 +2,7 @@ // See Paul Hsieh's page at: http://www.azillionmonkeys.com/qed/hash.html // Also see http://www.concentric.net/~Ttwang/tech/inthash.htm, // http://burtleburtle.net/bob/hash/integer.html + #include "crn_core.h" #undef get16bits @@ -13,56 +14,61 @@ #define get16bits(d) ((((uint32)(((const uint8*)(d))[1])) << 8) + (uint32)(((const uint8*)(d))[0])) #endif -namespace crnlib { -uint32 fast_hash(const void* p, int len) { - const char* data = static_cast(p); - - uint32 hash = len, tmp; - int rem; +namespace crnlib +{ + uint32 fast_hash(const void* p, int len) + { + const char* data = static_cast(p); - if (len <= 0 || data == NULL) - return 0; + uint32 hash = len, tmp; + int rem; - rem = len & 3; - len >>= 2; + if (len <= 0 || data == NULL) + { + return 0; + } - /* Main loop */ - for (; len > 0; len--) { - hash += get16bits(data); - tmp = (get16bits(data + 2) << 11) ^ hash; - hash = (hash << 16) ^ tmp; - data += 2 * sizeof(uint16); - hash += hash >> 11; - } + rem = len & 3; + len >>= 2; - /* Handle end cases */ - switch (rem) { - case 3: - hash += get16bits(data); - hash ^= hash << 16; - hash ^= data[sizeof(uint16)] << 18; - hash += hash >> 11; - break; - case 2: - hash += get16bits(data); - hash ^= hash << 11; - hash += hash >> 17; - break; - case 1: - hash += *data; - hash ^= hash << 10; - hash += hash >> 1; - } + /* Main loop */ + for (; len > 0; len--) + { + hash += get16bits(data); + tmp = (get16bits(data + 2) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + data += 2 * sizeof(uint16); + hash += hash >> 11; + } - /* Force "avalanching" of final 127 bits */ - hash ^= hash << 3; - hash += hash >> 5; - hash ^= hash << 4; - hash += hash >> 17; - hash ^= hash << 25; - hash += hash >> 6; + /* Handle end cases */ + switch (rem) + { + case 3: + hash += get16bits(data); + hash ^= hash << 16; + hash ^= data[sizeof(uint16)] << 18; + hash += hash >> 11; + break; + case 2: + hash += get16bits(data); + hash ^= hash << 11; + hash += hash >> 17; + break; + case 1: + hash += *data; + hash ^= hash << 10; + hash += hash >> 1; + } - return hash; -} + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + return hash; + } } // namespace crnlib diff --git a/crnlib/crn_hash.h b/crnlib/crn_hash.h index 612ec4c..0fc7b40 100644 --- a/crnlib/crn_hash.h +++ b/crnlib/crn_hash.h @@ -1,33 +1,36 @@ // File: crn_hash.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once #include "crn_export.h" -namespace crnlib { -CRN_EXPORT uint32 fast_hash(const void* p, int len); - -// 4-byte integer hash, full avalanche -inline uint32 bitmix32c(uint32 a) { - a = (a + 0x7ed55d16) + (a << 12); - a = (a ^ 0xc761c23c) ^ (a >> 19); - a = (a + 0x165667b1) + (a << 5); - a = (a + 0xd3a2646c) ^ (a << 9); - a = (a + 0xfd7046c5) + (a << 3); - a = (a ^ 0xb55a4f09) ^ (a >> 16); - return a; -} +namespace crnlib +{ + CRN_EXPORT uint32 fast_hash(const void* p, int len); -// 4-byte integer hash, full avalanche, no constants -inline uint32 bitmix32(uint32 a) { - a -= (a << 6); - a ^= (a >> 17); - a -= (a << 9); - a ^= (a << 4); - a -= (a << 3); - a ^= (a << 10); - a ^= (a >> 15); - return a; -} + // 4-byte integer hash, full avalanche + inline uint32 bitmix32c(uint32 a) + { + a = (a + 0x7ed55d16) + (a << 12); + a = (a ^ 0xc761c23c) ^ (a >> 19); + a = (a + 0x165667b1) + (a << 5); + a = (a + 0xd3a2646c) ^ (a << 9); + a = (a + 0xfd7046c5) + (a << 3); + a = (a ^ 0xb55a4f09) ^ (a >> 16); + return a; + } + // 4-byte integer hash, full avalanche, no constants + inline uint32 bitmix32(uint32 a) + { + a -= (a << 6); + a ^= (a >> 17); + a -= (a << 9); + a ^= (a << 4); + a -= (a << 3); + a ^= (a << 10); + a ^= (a >> 15); + return a; + } } // namespace crnlib diff --git a/crnlib/crn_lzma_codec.cpp b/crnlib/crn_lzma_codec.cpp index 7d41280..e075a4b 100644 --- a/crnlib/crn_lzma_codec.cpp +++ b/crnlib/crn_lzma_codec.cpp @@ -9,126 +9,155 @@ #include "crn_checksum.h" #include "crn_threading.h" -namespace crnlib { -lzma_codec::lzma_codec() - : m_pCompress(LzmaCompress), - m_pUncompress(LzmaUncompress) { - CRNLIB_ASSUME(cLZMAPropsSize == LZMA_PROPS_SIZE); -} - -lzma_codec::~lzma_codec() { -} - -bool lzma_codec::pack(const void* p, uint n, crnlib::vector& buf) { - if (n > 1024U * 1024U * 1024U) - return false; - - uint max_comp_size = n + math::maximum(128, n >> 8); - buf.resize(sizeof(header) + max_comp_size); - - header* pHDR = reinterpret_cast(&buf[0]); - uint8* pComp_data = &buf[sizeof(header)]; - - utils::zero_object(*pHDR); - - pHDR->m_uncomp_size = n; - pHDR->m_adler32 = adler32(p, n); - - if (n) { - size_t destLen = 0; - size_t outPropsSize = 0; - int status = SZ_ERROR_INPUT_EOF; - - for (uint trial = 0; trial < 3; trial++) { - destLen = max_comp_size; - outPropsSize = cLZMAPropsSize; - - status = (*m_pCompress)(pComp_data, &destLen, reinterpret_cast(p), n, - pHDR->m_lzma_props, &outPropsSize, - -1, /* 0 <= level <= 9, default = 5 */ - 0, /* default = (1 << 24) */ - -1, /* 0 <= lc <= 8, default = 3 */ - -1, /* 0 <= lp <= 4, default = 0 */ - -1, /* 0 <= pb <= 4, default = 2 */ - -1, /* 5 <= fb <= 273, default = 32 */ -#ifdef WIN32 - (g_number_of_processors > 1) ? 2 : 1 -#else - 1 -#endif - ); - - if (status != SZ_ERROR_OUTPUT_EOF) - break; - - max_comp_size += ((n + 1) / 2); - buf.resize(sizeof(header) + max_comp_size); - pHDR = reinterpret_cast(&buf[0]); - pComp_data = &buf[sizeof(header)]; +namespace crnlib +{ + lzma_codec::lzma_codec(): + m_pCompress(LzmaCompress), + m_pUncompress(LzmaUncompress) + { + CRNLIB_ASSUME(cLZMAPropsSize == LZMA_PROPS_SIZE); } - if (status != SZ_OK) { - buf.clear(); - return false; + lzma_codec::~lzma_codec() + { } - pHDR->m_comp_size = static_cast(destLen); - - buf.resize(CRNLIB_SIZEOF_U32(header) + static_cast(destLen)); - } - - pHDR->m_sig = header::cSig; - pHDR->m_checksum = static_cast(adler32((uint8*)pHDR + header::cChecksumSkipBytes, sizeof(header) - header::cChecksumSkipBytes)); - - return true; -} - -bool lzma_codec::unpack(const void* p, uint n, crnlib::vector& buf) { - buf.resize(0); - - if (n < sizeof(header)) - return false; - - const header& hdr = *static_cast(p); - if (hdr.m_sig != header::cSig) - return false; - - if (static_cast(adler32((const uint8*)&hdr + header::cChecksumSkipBytes, sizeof(hdr) - header::cChecksumSkipBytes)) != hdr.m_checksum) - return false; - - if (!hdr.m_uncomp_size) - return true; - - if (!hdr.m_comp_size) - return false; + bool lzma_codec::pack(const void* p, uint n, crnlib::vector& buf) + { + if (n > 1024U * 1024U * 1024U) + { + return false; + } + + uint max_comp_size = n + math::maximum(128, n >> 8); + buf.resize(sizeof(header) + max_comp_size); + + header* pHDR = reinterpret_cast(&buf[0]); + uint8* pComp_data = &buf[sizeof(header)]; + + utils::zero_object(*pHDR); + + pHDR->m_uncomp_size = n; + pHDR->m_adler32 = adler32(p, n); + + if (n) + { + size_t destLen = 0; + size_t outPropsSize = 0; + int status = SZ_ERROR_INPUT_EOF; + + for (uint trial = 0; trial < 3; trial++) + { + destLen = max_comp_size; + outPropsSize = cLZMAPropsSize; + + status = (*m_pCompress)(pComp_data, &destLen, reinterpret_cast(p), n, + pHDR->m_lzma_props, &outPropsSize, + -1, /* 0 <= level <= 9, default = 5 */ + 0, /* default = (1 << 24) */ + -1, /* 0 <= lc <= 8, default = 3 */ + -1, /* 0 <= lp <= 4, default = 0 */ + -1, /* 0 <= pb <= 4, default = 2 */ + -1, /* 5 <= fb <= 273, default = 32 */ +#ifdef WIN32 + (g_number_of_processors > 1) ? 2 : 1 +#else + 1 +#endif + ); - if (hdr.m_uncomp_size > 1024U * 1024U * 1024U) - return false; + if (status != SZ_ERROR_OUTPUT_EOF) + { + break; + } - if (!buf.try_resize(hdr.m_uncomp_size)) - return false; + max_comp_size += ((n + 1) / 2); + buf.resize(sizeof(header) + max_comp_size); + pHDR = reinterpret_cast(&buf[0]); + pComp_data = &buf[sizeof(header)]; + } - const uint8* pComp_data = static_cast(p) + sizeof(header); - size_t srcLen = n - sizeof(header); - if (srcLen < hdr.m_comp_size) - return false; + if (status != SZ_OK) + { + buf.clear(); + return false; + } - size_t destLen = hdr.m_uncomp_size; + pHDR->m_comp_size = static_cast(destLen); - int status = (*m_pUncompress)(&buf[0], &destLen, pComp_data, &srcLen, - hdr.m_lzma_props, cLZMAPropsSize); + buf.resize(CRNLIB_SIZEOF_U32(header) + static_cast(destLen)); + } - if ((status != SZ_OK) || (destLen != hdr.m_uncomp_size)) { - buf.clear(); - return false; - } + pHDR->m_sig = header::cSig; + pHDR->m_checksum = static_cast(adler32((uint8*)pHDR + header::cChecksumSkipBytes, sizeof(header) - header::cChecksumSkipBytes)); - if (adler32(&buf[0], buf.size()) != hdr.m_adler32) { - buf.clear(); - return false; - } + return true; + } - return true; -} + bool lzma_codec::unpack(const void* p, uint n, crnlib::vector& buf) + { + buf.resize(0); + + if (n < sizeof(header)) + { + return false; + } + + const header& hdr = *static_cast(p); + if (hdr.m_sig != header::cSig) + { + return false; + } + + if (static_cast(adler32((const uint8*)&hdr + header::cChecksumSkipBytes, sizeof(hdr) - header::cChecksumSkipBytes)) != hdr.m_checksum) + { + return false; + } + + if (!hdr.m_uncomp_size) + { + return true; + } + + if (!hdr.m_comp_size) + { + return false; + } + + if (hdr.m_uncomp_size > 1024U * 1024U * 1024U) + { + return false; + } + + if (!buf.try_resize(hdr.m_uncomp_size)) + { + return false; + } + + const uint8* pComp_data = static_cast(p) + sizeof(header); + size_t srcLen = n - sizeof(header); + if (srcLen < hdr.m_comp_size) + { + return false; + } + + size_t destLen = hdr.m_uncomp_size; + + int status = (*m_pUncompress)(&buf[0], &destLen, pComp_data, &srcLen, hdr.m_lzma_props, cLZMAPropsSize); + + if ((status != SZ_OK) || (destLen != hdr.m_uncomp_size)) + { + buf.clear(); + return false; + } + + if (adler32(&buf[0], buf.size()) != hdr.m_adler32) + { + buf.clear(); + return false; + } + + return true; + } } // namespace crnlib diff --git a/crnlib/crn_lzma_codec.h b/crnlib/crn_lzma_codec.h index cd51151..bfba7f0 100644 --- a/crnlib/crn_lzma_codec.h +++ b/crnlib/crn_lzma_codec.h @@ -5,55 +5,63 @@ #include "crn_packed_uint.h" #include "crn_export.h" -namespace crnlib { -class CRN_EXPORT lzma_codec { - public: - lzma_codec(); - ~lzma_codec(); +namespace crnlib +{ + class CRN_EXPORT lzma_codec + { + public: + lzma_codec(); + ~lzma_codec(); - // Always available, because we're statically linking in lzmalib now vs. dynamically loading the DLL. - bool is_initialized() const { return true; } + // Always available, because we're statically linking in lzmalib now vs. dynamically loading the DLL. + bool is_initialized() const + { + return true; + } - bool pack(const void* p, uint n, crnlib::vector& buf); + bool pack(const void* p, uint n, crnlib::vector& buf); - bool unpack(const void* p, uint n, crnlib::vector& buf); + bool unpack(const void* p, uint n, crnlib::vector& buf); - private: - typedef int(CRNLIB_STDCALL* LzmaCompressFuncPtr)(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t srcLen, - unsigned char* outProps, size_t* outPropsSize, /* *outPropsSize must be = 5 */ - int level, /* 0 <= level <= 9, default = 5 */ - unsigned dictSize, /* default = (1 << 24) */ - int lc, /* 0 <= lc <= 8, default = 3 */ - int lp, /* 0 <= lp <= 4, default = 0 */ - int pb, /* 0 <= pb <= 4, default = 2 */ - int fb, /* 5 <= fb <= 273, default = 32 */ - int numThreads /* 1 or 2, default = 2 */ - ); + private: + typedef int(CRNLIB_STDCALL* LzmaCompressFuncPtr)(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t srcLen, + unsigned char* outProps, size_t* outPropsSize, /* *outPropsSize must be = 5 */ + int level, /* 0 <= level <= 9, default = 5 */ + unsigned dictSize, /* default = (1 << 24) */ + int lc, /* 0 <= lc <= 8, default = 3 */ + int lp, /* 0 <= lp <= 4, default = 0 */ + int pb, /* 0 <= pb <= 4, default = 2 */ + int fb, /* 5 <= fb <= 273, default = 32 */ + int numThreads /* 1 or 2, default = 2 */ + ); - typedef int(CRNLIB_STDCALL* LzmaUncompressFuncPtr)(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t* srcLen, - const unsigned char* props, size_t propsSize); + typedef int(CRNLIB_STDCALL* LzmaUncompressFuncPtr)(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t* srcLen, + const unsigned char* props, size_t propsSize); - LzmaCompressFuncPtr m_pCompress; - LzmaUncompressFuncPtr m_pUncompress; + LzmaCompressFuncPtr m_pCompress; + LzmaUncompressFuncPtr m_pUncompress; - enum { cLZMAPropsSize = 5 }; + enum { cLZMAPropsSize = 5 }; #pragma pack(push) #pragma pack(1) - struct header { - enum { cSig = 'L' | ('0' << 8), - cChecksumSkipBytes = 3 }; - packed_uint<2> m_sig; - uint8 m_checksum; + struct header + { + enum { + cSig = 'L' | ('0' << 8), + cChecksumSkipBytes = 3 + }; + packed_uint<2> m_sig; + uint8 m_checksum; - uint8 m_lzma_props[cLZMAPropsSize]; + uint8 m_lzma_props[cLZMAPropsSize]; - packed_uint<4> m_comp_size; - packed_uint<4> m_uncomp_size; + packed_uint<4> m_comp_size; + packed_uint<4> m_uncomp_size; - packed_uint<4> m_adler32; - }; + packed_uint<4> m_adler32; + }; #pragma pack(pop) -}; + }; } // namespace crnlib diff --git a/crnlib/crn_threading_null.h b/crnlib/crn_threading_null.h index 9db6c2b..f5f2fe8 100644 --- a/crnlib/crn_threading_null.h +++ b/crnlib/crn_threading_null.h @@ -1,166 +1,226 @@ // File: crn_threading_null.h // See Copyright Notice and license at the end of include/crnlib.h + #pragma once +#include "crn_core.h" #include "crn_atomics.h" -namespace crnlib { -const uint g_number_of_processors = 1; - -inline void crn_threading_init() { -} +namespace crnlib +{ + const uint g_number_of_processors = 1; -typedef uint64 crn_thread_id_t; -inline crn_thread_id_t crn_get_current_thread_id() { - return 0; -} + inline void crn_threading_init() + { + } -inline void crn_sleep(unsigned int milliseconds) { - milliseconds; -} + typedef uint64 crn_thread_id_t; + inline crn_thread_id_t crn_get_current_thread_id() + { + return 0; + } -inline uint crn_get_max_helper_threads() { - return 0; -} - -class mutex { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(mutex); + inline void crn_sleep(unsigned int milliseconds) + { + milliseconds; + } - public: - inline mutex(unsigned int spin_count = 0) { - spin_count; - } - - inline ~mutex() { - } - - inline void lock() { - } - - inline void unlock() { - } - - inline void set_spin_count(unsigned int count) { - count; - } -}; - -class scoped_mutex { - scoped_mutex(const scoped_mutex&); - scoped_mutex& operator=(const scoped_mutex&); - - public: - inline scoped_mutex(mutex& lock) - : m_lock(lock) { m_lock.lock(); } - inline ~scoped_mutex() { m_lock.unlock(); } - - private: - mutex& m_lock; -}; - -// Simple non-recursive spinlock. -class spinlock { - public: - inline spinlock() { - } - - inline void lock(uint32 max_spins = 4096, bool yielding = true, bool memoryBarrier = true) { - max_spins, yielding, memoryBarrier; - } - - inline void lock_no_barrier(uint32 max_spins = 4096, bool yielding = true) { - max_spins, yielding; - } - - inline void unlock() { - } - - inline void unlock_no_barrier() { - } -}; - -class scoped_spinlock { - scoped_spinlock(const scoped_spinlock&); - scoped_spinlock& operator=(const scoped_spinlock&); - - public: - inline scoped_spinlock(spinlock& lock) - : m_lock(lock) { m_lock.lock(); } - inline ~scoped_spinlock() { m_lock.unlock(); } - - private: - spinlock& m_lock; -}; - -class semaphore { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(semaphore); - - public: - inline semaphore(long initialCount = 0, long maximumCount = 1, const char* pName = NULL) { - initialCount, maximumCount, pName; - } - - inline ~semaphore() { - } - - inline void release(long releaseCount = 1, long* pPreviousCount = NULL) { - releaseCount, pPreviousCount; - } - - inline bool wait(uint32 milliseconds = cUINT32_MAX) { - milliseconds; - return true; - } -}; - -class task_pool { - public: - inline task_pool() {} - inline task_pool(uint num_threads) { num_threads; } - inline ~task_pool() {} - - inline bool init(uint num_threads) { - num_threads; - return true; - } - inline void deinit() {} - - inline uint get_num_threads() const { return 0; } - inline uint get_num_outstanding_tasks() const { return 0; } - - // C-style task callback - typedef void (*task_callback_func)(uint64 data, void* pData_ptr); - inline bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = NULL) { - pFunc(data, pData_ptr); - return true; - } - - class executable_task { - public: - virtual void execute_task(uint64 data, void* pData_ptr) = 0; - }; - - // It's the caller's responsibility to delete pObj within the execute_task() method, if needed! - inline bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = NULL) { - pObj->execute_task(data, pData_ptr); - return true; - } - - template - inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = NULL) { - (pObject->*pObject_method)(data, pData_ptr); - return true; - } - - template - inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = NULL) { - for (uint i = 0; i < num_tasks; i++) { - (pObject->*pObject_method)(first_data + i, pData_ptr); + inline uint crn_get_max_helper_threads() + { + return 0; } - return true; - } - inline void join() {} -}; + class mutex + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(mutex); + public: + inline mutex(unsigned int spin_count = 0) + { + spin_count; + } + + inline ~mutex() + { + } + + inline void lock() + { + } + + inline void unlock() + { + } + + inline void set_spin_count(unsigned int count) + { + count; + } + }; + + class scoped_mutex + { + scoped_mutex(const scoped_mutex&); + scoped_mutex& operator=(const scoped_mutex&); + + public: + inline scoped_mutex(mutex& lock): + m_lock(lock) + { + m_lock.lock(); + } + inline ~scoped_mutex() + { + m_lock.unlock(); + } + + private: + mutex& m_lock; + }; + + // Simple non-recursive spinlock. + class spinlock + { + public: + inline spinlock() + { + } + + inline void lock(uint32 max_spins = 4096, bool yielding = true, bool memoryBarrier = true) + { + max_spins, yielding, memoryBarrier; + } + + inline void lock_no_barrier(uint32 max_spins = 4096, bool yielding = true) + { + max_spins, yielding; + } + + inline void unlock() + { + } + + inline void unlock_no_barrier() + { + } + }; + + class scoped_spinlock + { + scoped_spinlock(const scoped_spinlock&); + scoped_spinlock& operator=(const scoped_spinlock&); + public: + inline scoped_spinlock(spinlock& lock): + m_lock(lock) + { + m_lock.lock(); + } + inline ~scoped_spinlock() + { + m_lock.unlock(); + } + + private: + spinlock& m_lock; + }; + + class semaphore + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(semaphore); + public: + inline semaphore(long initialCount = 0, long maximumCount = 1, const char* pName = NULL) + { + initialCount, maximumCount, pName; + } + + inline ~semaphore() + { + } + + inline void release(long releaseCount = 1, long* pPreviousCount = NULL) + { + releaseCount, pPreviousCount; + } + + inline bool wait(uint32 milliseconds = cUINT32_MAX) + { + milliseconds; + return true; + } + }; + + class task_pool + { + public: + inline task_pool() + { + } + + inline task_pool(uint num_threads) + { + num_threads; + } + inline ~task_pool() + { + } + + inline bool init(uint num_threads) + { + num_threads; + return true; + } + inline void deinit() + { + } + + inline uint get_num_threads() const + { + return 0; + } + inline uint get_num_outstanding_tasks() const + { + return 0; + } + + // C-style task callback + typedef void (*task_callback_func)(uint64 data, void* pData_ptr); + inline bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = NULL) + { + pFunc(data, pData_ptr); + return true; + } + + class executable_task { + public: + virtual void execute_task(uint64 data, void* pData_ptr) = 0; + }; + + // It's the caller's responsibility to delete pObj within the execute_task() method, if needed! + inline bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = NULL) + { + pObj->execute_task(data, pData_ptr); + return true; + } + + template + inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = NULL) + { + (pObject->*pObject_method)(data, pData_ptr); + return true; + } + + template + inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = NULL) + { + for (uint i = 0; i < num_tasks; i++) + { + (pObject->*pObject_method)(first_data + i, pData_ptr); + } + return true; + } + + inline void join() + { + } + }; } // namespace crnlib diff --git a/inc/crnlib.h b/inc/crnlib.h index bfbf9b0..d79e942 100644 --- a/inc/crnlib.h +++ b/inc/crnlib.h @@ -34,341 +34,341 @@ typedef unsigned int crn_bool; // crnlib can compress to these file types. enum crn_file_type { - // .CRN - cCRNFileTypeCRN = 0, + // .CRN + cCRNFileTypeCRN = 0, - // .DDS using regular DXT or clustered DXT - cCRNFileTypeDDS, + // .DDS using regular DXT or clustered DXT + cCRNFileTypeDDS, - cCRNFileTypeForceDWORD = 0xFFFFFFFF + cCRNFileTypeForceDWORD = 0xFFFFFFFF }; // Supported compressed pixel formats. // Basically all the standard DX9 formats, with some swizzled DXT5 formats // (most of them supported by ATI's Compressonator), along with some ATI/X360 GPU specific formats. enum crn_format { - cCRNFmtInvalid = -1, + cCRNFmtInvalid = -1, - cCRNFmtDXT1 = 0, + cCRNFmtDXT1 = 0, - cCRNFmtFirstValid = cCRNFmtDXT1, + cCRNFmtFirstValid = cCRNFmtDXT1, - // cCRNFmtDXT3 is not currently supported when writing to CRN - only DDS. - cCRNFmtDXT3, + // cCRNFmtDXT3 is not currently supported when writing to CRN - only DDS. + cCRNFmtDXT3, - cCRNFmtDXT5, + cCRNFmtDXT5, - // Various DXT5 derivatives - cCRNFmtDXT5_CCxY, // Luma-chroma - cCRNFmtDXT5_xGxR, // Swizzled 2-component - cCRNFmtDXT5_xGBR, // Swizzled 3-component - cCRNFmtDXT5_AGBR, // Swizzled 4-component + // Various DXT5 derivatives + cCRNFmtDXT5_CCxY, // Luma-chroma + cCRNFmtDXT5_xGxR, // Swizzled 2-component + cCRNFmtDXT5_xGBR, // Swizzled 3-component + cCRNFmtDXT5_AGBR, // Swizzled 4-component - // ATI 3DC and X360 DXN - cCRNFmtDXN_XY, - cCRNFmtDXN_YX, + // ATI 3DC and X360 DXN + cCRNFmtDXN_XY, + cCRNFmtDXN_YX, - // DXT5 alpha blocks only - cCRNFmtDXT5A, + // DXT5 alpha blocks only + cCRNFmtDXT5A, - cCRNFmtETC1, - cCRNFmtETC2, - cCRNFmtETC2A, - cCRNFmtETC1S, - cCRNFmtETC2AS, + cCRNFmtETC1, + cCRNFmtETC2, + cCRNFmtETC2A, + cCRNFmtETC1S, + cCRNFmtETC2AS, - cCRNFmtTotal, + cCRNFmtTotal, - cCRNFmtForceDWORD = 0xFFFFFFFF + cCRNFmtForceDWORD = 0xFFFFFFFF }; // Various library/file format limits. enum crn_limits { - // Max. mipmap level resolution on any axis. - cCRNMaxLevelResolution = 4096, + // Max. mipmap level resolution on any axis. + cCRNMaxLevelResolution = 4096, - cCRNMinPaletteSize = 8, - cCRNMaxPaletteSize = 8192, + cCRNMinPaletteSize = 8, + cCRNMaxPaletteSize = 8192, - cCRNMaxFaces = 6, - cCRNMaxLevels = 16, + cCRNMaxFaces = 6, + cCRNMaxLevels = 16, - cCRNMaxHelperThreads = 15, + cCRNMaxHelperThreads = 15, - cCRNMinQualityLevel = 0, - cCRNMaxQualityLevel = 255 + cCRNMinQualityLevel = 0, + cCRNMaxQualityLevel = 255 }; // CRN/DDS compression flags. // See the m_flags member in the crn_comp_params struct, below. enum crn_comp_flags { - // Enables perceptual colorspace distance metrics if set. - // Important: Be sure to disable this when compressing non-sRGB colorspace images, like normal maps! - // Default: Set - cCRNCompFlagPerceptual = 1, - - // Enables (up to) 8x8 macroblock usage if set. If disabled, only 4x4 blocks are allowed. - // Compression ratio will be lower when disabled, but may cut down on blocky artifacts because the process used to determine - // where large macroblocks can be used without artifacts isn't perfect. - // Default: Set. - cCRNCompFlagHierarchical = 2, - - // cCRNCompFlagQuick disables several output file optimizations - intended for things like quicker previews. - // Default: Not set. - cCRNCompFlagQuick = 4, - - // DXT1: OK to use DXT1 alpha blocks for better quality or DXT1A transparency. - // DXT5: OK to use both DXT5 block types. - // Currently only used when writing to .DDS files, as .CRN uses only a subset of the possible DXTn block types. - // Default: Set. - cCRNCompFlagUseBothBlockTypes = 8, - - // OK to use DXT1A transparent indices to encode black (assumes pixel shader ignores fetched alpha). - // Currently only used when writing to .DDS files, .CRN never uses alpha blocks. - // Default: Not set. - cCRNCompFlagUseTransparentIndicesForBlack = 16, - - // Disables endpoint caching, for more deterministic output. - // Currently only used when writing to .DDS files. - // Default: Not set. - cCRNCompFlagDisableEndpointCaching = 32, - - // If enabled, use the cCRNColorEndpointPaletteSize, etc. params to control the CRN palette sizes. Only useful when writing to .CRN files. - // Default: Not set. - cCRNCompFlagManualPaletteSizes = 64, - - // If enabled, DXT1A alpha blocks are used to encode single bit transparency. - // Default: Not set. - cCRNCompFlagDXT1AForTransparency = 128, - - // If enabled, the DXT1 compressor's color distance metric assumes the pixel shader will be converting the fetched RGB results to luma (Y part of YCbCr). - // This increases quality when compressing grayscale images, because the compressor can spread the luma error amoung all three channels (i.e. it can generate blocks - // with some chroma present if doing so will ultimately lead to lower luma error). - // Only enable on grayscale source images. - // Default: Not set. - cCRNCompFlagGrayscaleSampling = 256, - - // If enabled, debug information will be output during compression. - // Default: Not set. - cCRNCompFlagDebugging = 0x80000000, - - cCRNCompFlagForceDWORD = 0xFFFFFFFF + // Enables perceptual colorspace distance metrics if set. + // Important: Be sure to disable this when compressing non-sRGB colorspace images, like normal maps! + // Default: Set + cCRNCompFlagPerceptual = 1, + + // Enables (up to) 8x8 macroblock usage if set. If disabled, only 4x4 blocks are allowed. + // Compression ratio will be lower when disabled, but may cut down on blocky artifacts because the process used to determine + // where large macroblocks can be used without artifacts isn't perfect. + // Default: Set. + cCRNCompFlagHierarchical = 2, + + // cCRNCompFlagQuick disables several output file optimizations - intended for things like quicker previews. + // Default: Not set. + cCRNCompFlagQuick = 4, + + // DXT1: OK to use DXT1 alpha blocks for better quality or DXT1A transparency. + // DXT5: OK to use both DXT5 block types. + // Currently only used when writing to .DDS files, as .CRN uses only a subset of the possible DXTn block types. + // Default: Set. + cCRNCompFlagUseBothBlockTypes = 8, + + // OK to use DXT1A transparent indices to encode black (assumes pixel shader ignores fetched alpha). + // Currently only used when writing to .DDS files, .CRN never uses alpha blocks. + // Default: Not set. + cCRNCompFlagUseTransparentIndicesForBlack = 16, + + // Disables endpoint caching, for more deterministic output. + // Currently only used when writing to .DDS files. + // Default: Not set. + cCRNCompFlagDisableEndpointCaching = 32, + + // If enabled, use the cCRNColorEndpointPaletteSize, etc. params to control the CRN palette sizes. Only useful when writing to .CRN files. + // Default: Not set. + cCRNCompFlagManualPaletteSizes = 64, + + // If enabled, DXT1A alpha blocks are used to encode single bit transparency. + // Default: Not set. + cCRNCompFlagDXT1AForTransparency = 128, + + // If enabled, the DXT1 compressor's color distance metric assumes the pixel shader will be converting the fetched RGB results to luma (Y part of YCbCr). + // This increases quality when compressing grayscale images, because the compressor can spread the luma error amoung all three channels (i.e. it can generate blocks + // with some chroma present if doing so will ultimately lead to lower luma error). + // Only enable on grayscale source images. + // Default: Not set. + cCRNCompFlagGrayscaleSampling = 256, + + // If enabled, debug information will be output during compression. + // Default: Not set. + cCRNCompFlagDebugging = 0x80000000, + + cCRNCompFlagForceDWORD = 0xFFFFFFFF }; // Controls DXTn quality vs. speed control - only used when compressing to .DDS. enum crn_dxt_quality { - cCRNDXTQualitySuperFast, - cCRNDXTQualityFast, - cCRNDXTQualityNormal, - cCRNDXTQualityBetter, - cCRNDXTQualityUber, + cCRNDXTQualitySuperFast, + cCRNDXTQualityFast, + cCRNDXTQualityNormal, + cCRNDXTQualityBetter, + cCRNDXTQualityUber, - cCRNDXTQualityTotal, + cCRNDXTQualityTotal, - cCRNDXTQualityForceDWORD = 0xFFFFFFFF + cCRNDXTQualityForceDWORD = 0xFFFFFFFF }; // Which DXTn compressor to use when compressing to plain (non-clustered) .DDS. enum crn_dxt_compressor_type { - cCRNDXTCompressorCRN, // Use crnlib's ETC1 or DXTc block compressor (default, highest quality, comparable or better than ati_compress or squish, and crnlib's ETC1 is a lot fasterw with similiar quality to Erricson's) - cCRNDXTCompressorCRNF, // Use crnlib's "fast" DXTc block compressor - cCRNDXTCompressorRYG, // Use RYG's DXTc block compressor (low quality, but very fast) + cCRNDXTCompressorCRN, // Use crnlib's ETC1 or DXTc block compressor (default, highest quality, comparable or better than ati_compress or squish, and crnlib's ETC1 is a lot fasterw with similiar quality to Erricson's) + cCRNDXTCompressorCRNF, // Use crnlib's "fast" DXTc block compressor + cCRNDXTCompressorRYG, // Use RYG's DXTc block compressor (low quality, but very fast) #if CRNLIB_SUPPORT_ATI_COMPRESS - cCRNDXTCompressorATI, + cCRNDXTCompressorATI, #endif #if CRNLIB_SUPPORT_SQUISH - cCRNDXTCompressorSquish, + cCRNDXTCompressorSquish, #endif - cCRNTotalDXTCompressors, + cCRNTotalDXTCompressors, - cCRNDXTCompressorForceDWORD = 0xFFFFFFFF + cCRNDXTCompressorForceDWORD = 0xFFFFFFFF }; // Progress callback function. // Processing will stop prematurely (and fail) if the callback returns false. // phase_index, total_phases - high level progress // subphase_index, total_subphases - progress within current phase -typedef crn_bool (*crn_progress_callback_func)(crn_uint32 phase_index, crn_uint32 total_phases, crn_uint32 subphase_index, crn_uint32 total_subphases, void* pUser_data_ptr); +typedef crn_bool(*crn_progress_callback_func)(crn_uint32 phase_index, crn_uint32 total_phases, crn_uint32 subphase_index, crn_uint32 total_subphases, void* pUser_data_ptr); // CRN/DDS compression parameters struct. struct crn_comp_params { - inline crn_comp_params() { clear(); } - - // Clear struct to default parameters. - inline void clear() { - m_size_of_obj = sizeof(*this); - m_file_type = cCRNFileTypeCRN; - m_faces = 1; - m_width = 0; - m_height = 0; - m_levels = 1; - m_format = cCRNFmtDXT1; - m_flags = cCRNCompFlagPerceptual | cCRNCompFlagHierarchical | cCRNCompFlagUseBothBlockTypes; - - for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) - for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) - m_pImages[f][l] = NULL; - - m_target_bitrate = 0.0f; - m_quality_level = cCRNMaxQualityLevel; - m_dxt1a_alpha_threshold = 128; - m_dxt_quality = cCRNDXTQualityUber; - m_dxt_compressor_type = cCRNDXTCompressorCRN; - m_alpha_component = 3; - - m_crn_adaptive_tile_color_psnr_derating = 2.0f; - m_crn_adaptive_tile_alpha_psnr_derating = 2.0f; - m_crn_color_endpoint_palette_size = 0; - m_crn_color_selector_palette_size = 0; - m_crn_alpha_endpoint_palette_size = 0; - m_crn_alpha_selector_palette_size = 0; - - m_num_helper_threads = 0; - m_userdata0 = 0; - m_userdata1 = 0; - m_pProgress_func = NULL; - m_pProgress_func_data = NULL; - } - - inline bool operator==(const crn_comp_params& rhs) const { + inline crn_comp_params() { clear(); } + + // Clear struct to default parameters. + inline void clear() { + m_size_of_obj = sizeof(*this); + m_file_type = cCRNFileTypeCRN; + m_faces = 1; + m_width = 0; + m_height = 0; + m_levels = 1; + m_format = cCRNFmtDXT1; + m_flags = cCRNCompFlagPerceptual | cCRNCompFlagHierarchical | cCRNCompFlagUseBothBlockTypes; + + for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) + for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) + m_pImages[f][l] = NULL; + + m_target_bitrate = 0.0f; + m_quality_level = cCRNMaxQualityLevel; + m_dxt1a_alpha_threshold = 128; + m_dxt_quality = cCRNDXTQualityUber; + m_dxt_compressor_type = cCRNDXTCompressorCRN; + m_alpha_component = 3; + + m_crn_adaptive_tile_color_psnr_derating = 2.0f; + m_crn_adaptive_tile_alpha_psnr_derating = 2.0f; + m_crn_color_endpoint_palette_size = 0; + m_crn_color_selector_palette_size = 0; + m_crn_alpha_endpoint_palette_size = 0; + m_crn_alpha_selector_palette_size = 0; + + m_num_helper_threads = 0; + m_userdata0 = 0; + m_userdata1 = 0; + m_pProgress_func = NULL; + m_pProgress_func_data = NULL; + } + + inline bool operator==(const crn_comp_params& rhs) const { #define CRNLIB_COMP(x) \ do { \ if ((x) != (rhs.x)) \ return false; \ } while (0) - CRNLIB_COMP(m_size_of_obj); - CRNLIB_COMP(m_file_type); - CRNLIB_COMP(m_faces); - CRNLIB_COMP(m_width); - CRNLIB_COMP(m_height); - CRNLIB_COMP(m_levels); - CRNLIB_COMP(m_format); - CRNLIB_COMP(m_flags); - CRNLIB_COMP(m_target_bitrate); - CRNLIB_COMP(m_quality_level); - CRNLIB_COMP(m_dxt1a_alpha_threshold); - CRNLIB_COMP(m_dxt_quality); - CRNLIB_COMP(m_dxt_compressor_type); - CRNLIB_COMP(m_alpha_component); - CRNLIB_COMP(m_crn_adaptive_tile_color_psnr_derating); - CRNLIB_COMP(m_crn_adaptive_tile_alpha_psnr_derating); - CRNLIB_COMP(m_crn_color_endpoint_palette_size); - CRNLIB_COMP(m_crn_color_selector_palette_size); - CRNLIB_COMP(m_crn_alpha_endpoint_palette_size); - CRNLIB_COMP(m_crn_alpha_selector_palette_size); - CRNLIB_COMP(m_num_helper_threads); - CRNLIB_COMP(m_userdata0); - CRNLIB_COMP(m_userdata1); - CRNLIB_COMP(m_pProgress_func); - CRNLIB_COMP(m_pProgress_func_data); - - for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) - for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) - CRNLIB_COMP(m_pImages[f][l]); + CRNLIB_COMP(m_size_of_obj); + CRNLIB_COMP(m_file_type); + CRNLIB_COMP(m_faces); + CRNLIB_COMP(m_width); + CRNLIB_COMP(m_height); + CRNLIB_COMP(m_levels); + CRNLIB_COMP(m_format); + CRNLIB_COMP(m_flags); + CRNLIB_COMP(m_target_bitrate); + CRNLIB_COMP(m_quality_level); + CRNLIB_COMP(m_dxt1a_alpha_threshold); + CRNLIB_COMP(m_dxt_quality); + CRNLIB_COMP(m_dxt_compressor_type); + CRNLIB_COMP(m_alpha_component); + CRNLIB_COMP(m_crn_adaptive_tile_color_psnr_derating); + CRNLIB_COMP(m_crn_adaptive_tile_alpha_psnr_derating); + CRNLIB_COMP(m_crn_color_endpoint_palette_size); + CRNLIB_COMP(m_crn_color_selector_palette_size); + CRNLIB_COMP(m_crn_alpha_endpoint_palette_size); + CRNLIB_COMP(m_crn_alpha_selector_palette_size); + CRNLIB_COMP(m_num_helper_threads); + CRNLIB_COMP(m_userdata0); + CRNLIB_COMP(m_userdata1); + CRNLIB_COMP(m_pProgress_func); + CRNLIB_COMP(m_pProgress_func_data); + + for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) + for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) + CRNLIB_COMP(m_pImages[f][l]); #undef CRNLIB_COMP - return true; - } - - // Returns true if the input parameters are reasonable. - inline bool check() const { - if ((m_file_type > cCRNFileTypeDDS) || - (((int)m_quality_level < (int)cCRNMinQualityLevel) || ((int)m_quality_level > (int)cCRNMaxQualityLevel)) || - (m_dxt1a_alpha_threshold > 255) || - ((m_faces != 1) && (m_faces != 6)) || - ((m_width < 1) || (m_width > cCRNMaxLevelResolution)) || - ((m_height < 1) || (m_height > cCRNMaxLevelResolution)) || - ((m_levels < 1) || (m_levels > cCRNMaxLevels)) || - ((m_format < cCRNFmtDXT1) || (m_format >= cCRNFmtTotal)) || - ((m_crn_color_endpoint_palette_size) && ((m_crn_color_endpoint_palette_size < cCRNMinPaletteSize) || (m_crn_color_endpoint_palette_size > cCRNMaxPaletteSize))) || - ((m_crn_color_selector_palette_size) && ((m_crn_color_selector_palette_size < cCRNMinPaletteSize) || (m_crn_color_selector_palette_size > cCRNMaxPaletteSize))) || - ((m_crn_alpha_endpoint_palette_size) && ((m_crn_alpha_endpoint_palette_size < cCRNMinPaletteSize) || (m_crn_alpha_endpoint_palette_size > cCRNMaxPaletteSize))) || - ((m_crn_alpha_selector_palette_size) && ((m_crn_alpha_selector_palette_size < cCRNMinPaletteSize) || (m_crn_alpha_selector_palette_size > cCRNMaxPaletteSize))) || - (m_alpha_component > 3) || - (m_num_helper_threads > cCRNMaxHelperThreads) || - (m_dxt_quality > cCRNDXTQualityUber) || - (m_dxt_compressor_type >= cCRNTotalDXTCompressors)) { - return false; + return true; } - return true; - } - // Helper to set/get flags from m_flags member. - inline bool get_flag(crn_comp_flags flag) const { return (m_flags & flag) != 0; } - inline void set_flag(crn_comp_flags flag, bool val) { - m_flags &= ~flag; - if (val) - m_flags |= flag; - } + // Returns true if the input parameters are reasonable. + inline bool check() const { + if ((m_file_type > cCRNFileTypeDDS) || + (((int)m_quality_level < (int)cCRNMinQualityLevel) || ((int)m_quality_level > (int)cCRNMaxQualityLevel)) || + (m_dxt1a_alpha_threshold > 255) || + ((m_faces != 1) && (m_faces != 6)) || + ((m_width < 1) || (m_width > cCRNMaxLevelResolution)) || + ((m_height < 1) || (m_height > cCRNMaxLevelResolution)) || + ((m_levels < 1) || (m_levels > cCRNMaxLevels)) || + ((m_format < cCRNFmtDXT1) || (m_format >= cCRNFmtTotal)) || + ((m_crn_color_endpoint_palette_size) && ((m_crn_color_endpoint_palette_size < cCRNMinPaletteSize) || (m_crn_color_endpoint_palette_size > cCRNMaxPaletteSize))) || + ((m_crn_color_selector_palette_size) && ((m_crn_color_selector_palette_size < cCRNMinPaletteSize) || (m_crn_color_selector_palette_size > cCRNMaxPaletteSize))) || + ((m_crn_alpha_endpoint_palette_size) && ((m_crn_alpha_endpoint_palette_size < cCRNMinPaletteSize) || (m_crn_alpha_endpoint_palette_size > cCRNMaxPaletteSize))) || + ((m_crn_alpha_selector_palette_size) && ((m_crn_alpha_selector_palette_size < cCRNMinPaletteSize) || (m_crn_alpha_selector_palette_size > cCRNMaxPaletteSize))) || + (m_alpha_component > 3) || + (m_num_helper_threads > cCRNMaxHelperThreads) || + (m_dxt_quality > cCRNDXTQualityUber) || + (m_dxt_compressor_type >= cCRNTotalDXTCompressors)) { + return false; + } + return true; + } + + // Helper to set/get flags from m_flags member. + inline bool get_flag(crn_comp_flags flag) const { return (m_flags & flag) != 0; } + inline void set_flag(crn_comp_flags flag, bool val) { + m_flags &= ~flag; + if (val) + m_flags |= flag; + } - crn_uint32 m_size_of_obj; + crn_uint32 m_size_of_obj; - crn_file_type m_file_type; // Output file type: cCRNFileTypeCRN or cCRNFileTypeDDS. + crn_file_type m_file_type; // Output file type: cCRNFileTypeCRN or cCRNFileTypeDDS. - crn_uint32 m_faces; // 1 (2D map) or 6 (cubemap) - crn_uint32 m_width; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK - crn_uint32 m_height; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK - crn_uint32 m_levels; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK + crn_uint32 m_faces; // 1 (2D map) or 6 (cubemap) + crn_uint32 m_width; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK + crn_uint32 m_height; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK + crn_uint32 m_levels; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK - crn_format m_format; // Output pixel format. + crn_format m_format; // Output pixel format. - crn_uint32 m_flags; // see crn_comp_flags enum + crn_uint32 m_flags; // see crn_comp_flags enum - // Array of pointers to 32bpp input images. - const crn_uint32* m_pImages[cCRNMaxFaces][cCRNMaxLevels]; + // Array of pointers to 32bpp input images. + const crn_uint32* m_pImages[cCRNMaxFaces][cCRNMaxLevels]; - // Target bitrate - if non-zero, the compressor will use an interpolative search to find the - // highest quality level that is <= the target bitrate. If it fails to find a bitrate high enough, it'll - // try disabling adaptive block sizes (cCRNCompFlagHierarchical flag) and redo the search. This process can be pretty slow. - float m_target_bitrate; + // Target bitrate - if non-zero, the compressor will use an interpolative search to find the + // highest quality level that is <= the target bitrate. If it fails to find a bitrate high enough, it'll + // try disabling adaptive block sizes (cCRNCompFlagHierarchical flag) and redo the search. This process can be pretty slow. + float m_target_bitrate; - // Desired quality level. - // Currently, CRN and DDS quality levels are not compatible with eachother from an image quality standpoint. - crn_uint32 m_quality_level; // [cCRNMinQualityLevel, cCRNMaxQualityLevel] + // Desired quality level. + // Currently, CRN and DDS quality levels are not compatible with eachother from an image quality standpoint. + crn_uint32 m_quality_level; // [cCRNMinQualityLevel, cCRNMaxQualityLevel] - // DXTn compression parameters. - crn_uint32 m_dxt1a_alpha_threshold; - crn_dxt_quality m_dxt_quality; - crn_dxt_compressor_type m_dxt_compressor_type; + // DXTn compression parameters. + crn_uint32 m_dxt1a_alpha_threshold; + crn_dxt_quality m_dxt_quality; + crn_dxt_compressor_type m_dxt_compressor_type; - // Alpha channel's component. Defaults to 3. - crn_uint32 m_alpha_component; + // Alpha channel's component. Defaults to 3. + crn_uint32 m_alpha_component; - // Various low-level CRN specific parameters. - float m_crn_adaptive_tile_color_psnr_derating; - float m_crn_adaptive_tile_alpha_psnr_derating; + // Various low-level CRN specific parameters. + float m_crn_adaptive_tile_color_psnr_derating; + float m_crn_adaptive_tile_alpha_psnr_derating; - crn_uint32 m_crn_color_endpoint_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] - crn_uint32 m_crn_color_selector_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + crn_uint32 m_crn_color_endpoint_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + crn_uint32 m_crn_color_selector_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] - crn_uint32 m_crn_alpha_endpoint_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] - crn_uint32 m_crn_alpha_selector_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + crn_uint32 m_crn_alpha_endpoint_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + crn_uint32 m_crn_alpha_selector_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] - // Number of helper threads to create during compression. 0=no threading. - crn_uint32 m_num_helper_threads; + // Number of helper threads to create during compression. 0=no threading. + crn_uint32 m_num_helper_threads; - // CRN userdata0 and userdata1 members, which are written directly to the header of the output file. - crn_uint32 m_userdata0; - crn_uint32 m_userdata1; + // CRN userdata0 and userdata1 members, which are written directly to the header of the output file. + crn_uint32 m_userdata0; + crn_uint32 m_userdata1; - // User provided progress callback. - crn_progress_callback_func m_pProgress_func; - void* m_pProgress_func_data; + // User provided progress callback. + crn_progress_callback_func m_pProgress_func; + void* m_pProgress_func_data; }; // Mipmap generator's mode. enum crn_mip_mode { - cCRNMipModeUseSourceOrGenerateMips, // Use source texture's mipmaps if it has any, otherwise generate new mipmaps - cCRNMipModeUseSourceMips, // Use source texture's mipmaps if it has any, otherwise the output has no mipmaps - cCRNMipModeGenerateMips, // Always generate new mipmaps - cCRNMipModeNoMips, // Output texture has no mipmaps + cCRNMipModeUseSourceOrGenerateMips, // Use source texture's mipmaps if it has any, otherwise generate new mipmaps + cCRNMipModeUseSourceMips, // Use source texture's mipmaps if it has any, otherwise the output has no mipmaps + cCRNMipModeGenerateMips, // Always generate new mipmaps + cCRNMipModeNoMips, // Output texture has no mipmaps - cCRNMipModeTotal, + cCRNMipModeTotal, - cCRNModeForceDWORD = 0xFFFFFFFF + cCRNModeForceDWORD = 0xFFFFFFFF }; CRN_EXPORT const char* crn_get_mip_mode_desc(crn_mip_mode m); @@ -376,128 +376,128 @@ CRN_EXPORT const char* crn_get_mip_mode_name(crn_mip_mode m); // Mipmap generator's filter kernel. enum crn_mip_filter { - cCRNMipFilterBox, - cCRNMipFilterTent, - cCRNMipFilterLanczos4, - cCRNMipFilterMitchell, - cCRNMipFilterKaiser, // Kaiser=default mipmap filter + cCRNMipFilterBox, + cCRNMipFilterTent, + cCRNMipFilterLanczos4, + cCRNMipFilterMitchell, + cCRNMipFilterKaiser, // Kaiser=default mipmap filter - cCRNMipFilterTotal, + cCRNMipFilterTotal, - cCRNMipFilterForceDWORD = 0xFFFFFFFF + cCRNMipFilterForceDWORD = 0xFFFFFFFF }; CRN_EXPORT const char* crn_get_mip_filter_name(crn_mip_filter f); // Mipmap generator's scale mode. enum crn_scale_mode { - cCRNSMDisabled, - cCRNSMAbsolute, - cCRNSMRelative, - cCRNSMLowerPow2, - cCRNSMNearestPow2, - cCRNSMNextPow2, + cCRNSMDisabled, + cCRNSMAbsolute, + cCRNSMRelative, + cCRNSMLowerPow2, + cCRNSMNearestPow2, + cCRNSMNextPow2, - cCRNSMTotal, + cCRNSMTotal, - cCRNSMForceDWORD = 0xFFFFFFFF + cCRNSMForceDWORD = 0xFFFFFFFF }; CRN_EXPORT const char* crn_get_scale_mode_desc(crn_scale_mode sm); // Mipmap generator parameters. struct crn_mipmap_params { - inline crn_mipmap_params() { clear(); } - - inline void clear() { - m_size_of_obj = sizeof(*this); - m_mode = cCRNMipModeUseSourceOrGenerateMips; - m_filter = cCRNMipFilterKaiser; - m_gamma_filtering = true; - m_gamma = 2.2f; - // Default "blurriness" factor of .9 actually sharpens the output a little. - m_blurriness = .9f; - m_renormalize = false; - m_rtopmip = false; - m_tiled = false; - m_max_levels = cCRNMaxLevels; - m_min_mip_size = 1; - - m_scale_mode = cCRNSMDisabled; - m_scale_x = 1.0f; - m_scale_y = 1.0f; - - m_window_left = 0; - m_window_top = 0; - m_window_right = 0; - m_window_bottom = 0; - - m_clamp_scale = false; - m_clamp_width = 0; - m_clamp_height = 0; - } - - inline bool check() const { return true; } - - inline bool operator==(const crn_mipmap_params& rhs) const { + inline crn_mipmap_params() { clear(); } + + inline void clear() { + m_size_of_obj = sizeof(*this); + m_mode = cCRNMipModeUseSourceOrGenerateMips; + m_filter = cCRNMipFilterKaiser; + m_gamma_filtering = true; + m_gamma = 2.2f; + // Default "blurriness" factor of .9 actually sharpens the output a little. + m_blurriness = .9f; + m_renormalize = false; + m_rtopmip = false; + m_tiled = false; + m_max_levels = cCRNMaxLevels; + m_min_mip_size = 1; + + m_scale_mode = cCRNSMDisabled; + m_scale_x = 1.0f; + m_scale_y = 1.0f; + + m_window_left = 0; + m_window_top = 0; + m_window_right = 0; + m_window_bottom = 0; + + m_clamp_scale = false; + m_clamp_width = 0; + m_clamp_height = 0; + } + + inline bool check() const { return true; } + + inline bool operator==(const crn_mipmap_params& rhs) const { #define CRNLIB_COMP(x) \ do { \ if ((x) != (rhs.x)) \ return false; \ } while (0) - CRNLIB_COMP(m_size_of_obj); - CRNLIB_COMP(m_mode); - CRNLIB_COMP(m_filter); - CRNLIB_COMP(m_gamma_filtering); - CRNLIB_COMP(m_gamma); - CRNLIB_COMP(m_blurriness); - CRNLIB_COMP(m_renormalize); - CRNLIB_COMP(m_rtopmip); - CRNLIB_COMP(m_tiled); - CRNLIB_COMP(m_max_levels); - CRNLIB_COMP(m_min_mip_size); - CRNLIB_COMP(m_scale_mode); - CRNLIB_COMP(m_scale_x); - CRNLIB_COMP(m_scale_y); - CRNLIB_COMP(m_window_left); - CRNLIB_COMP(m_window_top); - CRNLIB_COMP(m_window_right); - CRNLIB_COMP(m_window_bottom); - CRNLIB_COMP(m_clamp_scale); - CRNLIB_COMP(m_clamp_width); - CRNLIB_COMP(m_clamp_height); - return true; + CRNLIB_COMP(m_size_of_obj); + CRNLIB_COMP(m_mode); + CRNLIB_COMP(m_filter); + CRNLIB_COMP(m_gamma_filtering); + CRNLIB_COMP(m_gamma); + CRNLIB_COMP(m_blurriness); + CRNLIB_COMP(m_renormalize); + CRNLIB_COMP(m_rtopmip); + CRNLIB_COMP(m_tiled); + CRNLIB_COMP(m_max_levels); + CRNLIB_COMP(m_min_mip_size); + CRNLIB_COMP(m_scale_mode); + CRNLIB_COMP(m_scale_x); + CRNLIB_COMP(m_scale_y); + CRNLIB_COMP(m_window_left); + CRNLIB_COMP(m_window_top); + CRNLIB_COMP(m_window_right); + CRNLIB_COMP(m_window_bottom); + CRNLIB_COMP(m_clamp_scale); + CRNLIB_COMP(m_clamp_width); + CRNLIB_COMP(m_clamp_height); + return true; #undef CRNLIB_COMP - } - crn_uint32 m_size_of_obj; + } + crn_uint32 m_size_of_obj; - crn_mip_mode m_mode; - crn_mip_filter m_filter; + crn_mip_mode m_mode; + crn_mip_filter m_filter; - crn_bool m_gamma_filtering; - float m_gamma; + crn_bool m_gamma_filtering; + float m_gamma; - float m_blurriness; + float m_blurriness; - crn_uint32 m_max_levels; - crn_uint32 m_min_mip_size; + crn_uint32 m_max_levels; + crn_uint32 m_min_mip_size; - crn_bool m_renormalize; - crn_bool m_rtopmip; - crn_bool m_tiled; + crn_bool m_renormalize; + crn_bool m_rtopmip; + crn_bool m_tiled; - crn_scale_mode m_scale_mode; - float m_scale_x; - float m_scale_y; + crn_scale_mode m_scale_mode; + float m_scale_x; + float m_scale_y; - crn_uint32 m_window_left; - crn_uint32 m_window_top; - crn_uint32 m_window_right; - crn_uint32 m_window_bottom; + crn_uint32 m_window_left; + crn_uint32 m_window_top; + crn_uint32 m_window_right; + crn_uint32 m_window_bottom; - crn_bool m_clamp_scale; - crn_uint32 m_clamp_width; - crn_uint32 m_clamp_height; + crn_bool m_clamp_scale; + crn_uint32 m_clamp_width; + crn_uint32 m_clamp_height; }; // -------- High-level helper function definitions for CDN/DDS compression. @@ -509,7 +509,7 @@ struct crn_mipmap_params { // Function to set an optional user provided memory allocation/reallocation/msize routines. // By default, crnlib just uses malloc(), free(), etc. for all allocations. typedef void* (*crn_realloc_func)(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data); -typedef size_t (*crn_msize_func)(void* p, void* pUser_data); +typedef size_t(*crn_msize_func)(void* p, void* pUser_data); CRN_EXPORT void crn_set_memory_callbacks(crn_realloc_func pRealloc, crn_msize_func pMSize, void* pUser_data); // Frees memory blocks allocated by crn_compress(), crn_decompress_crn_to_dds(), or crn_decompress_dds_to_images(). @@ -550,11 +550,11 @@ CRN_EXPORT void* crn_decompress_crn_to_dds(const void* pCRN_file_data, crn_uint3 // See the crnlib::pixel_format enum in inc/dds_defs.h for a list of the supported DDS formats. // You are responsible for freeing each image block, either by calling crn_free_all_images() or manually calling crn_free_block() on each image pointer. struct crn_texture_desc { - crn_uint32 m_faces; - crn_uint32 m_width; - crn_uint32 m_height; - crn_uint32 m_levels; - crn_uint32 m_fmt_fourcc; // Same as crnlib::pixel_format + crn_uint32 m_faces; + crn_uint32 m_width; + crn_uint32 m_height; + crn_uint32 m_levels; + crn_uint32 m_fmt_fourcc; // Same as crnlib::pixel_format }; CRN_EXPORT bool crn_decompress_dds_to_images(const void* pDDS_file_data, crn_uint32 dds_file_size, crn_uint32** ppImages, crn_texture_desc& tex_desc); diff --git a/inc/dds_defs.h b/inc/dds_defs.h index c7fc43d..e58588c 100644 --- a/inc/dds_defs.h +++ b/inc/dds_defs.h @@ -1,5 +1,6 @@ // File: dds_defs.h // DX9 .DDS file header definitions. + #ifndef CRNLIB_DDS_DEFS_H #define CRNLIB_DDS_DEFS_H @@ -7,142 +8,148 @@ #define CRNLIB_PIXEL_FMT_FOURCC(a, b, c, d) ((a) | ((b) << 8U) | ((c) << 16U) | ((d) << 24U)) -namespace crnlib { -enum pixel_format { - PIXEL_FMT_INVALID = 0, - - PIXEL_FMT_DXT1 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '1'), - PIXEL_FMT_DXT2 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '2'), - PIXEL_FMT_DXT3 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '3'), - PIXEL_FMT_DXT4 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '4'), - PIXEL_FMT_DXT5 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '5'), - PIXEL_FMT_3DC = CRNLIB_PIXEL_FMT_FOURCC('A', 'T', 'I', '2'), // DXN_YX - PIXEL_FMT_DXN = CRNLIB_PIXEL_FMT_FOURCC('A', '2', 'X', 'Y'), // DXN_XY - PIXEL_FMT_DXT5A = CRNLIB_PIXEL_FMT_FOURCC('A', 'T', 'I', '1'), // ATI1N, http://developer.amd.com/media/gpu_assets/Radeon_X1x00_Programming_Guide.pdf - - // Non-standard, crnlib-specific pixel formats (some of these are supported by ATI's Compressonator) - PIXEL_FMT_DXT5_CCxY = CRNLIB_PIXEL_FMT_FOURCC('C', 'C', 'x', 'Y'), - PIXEL_FMT_DXT5_xGxR = CRNLIB_PIXEL_FMT_FOURCC('x', 'G', 'x', 'R'), - PIXEL_FMT_DXT5_xGBR = CRNLIB_PIXEL_FMT_FOURCC('x', 'G', 'B', 'R'), - PIXEL_FMT_DXT5_AGBR = CRNLIB_PIXEL_FMT_FOURCC('A', 'G', 'B', 'R'), - - PIXEL_FMT_DXT1A = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', '1', 'A'), - PIXEL_FMT_ETC1 = CRNLIB_PIXEL_FMT_FOURCC('E', 'T', 'C', '1'), - PIXEL_FMT_ETC2 = CRNLIB_PIXEL_FMT_FOURCC('E', 'T', 'C', '2'), - PIXEL_FMT_ETC2A = CRNLIB_PIXEL_FMT_FOURCC('E', 'T', '2', 'A'), - PIXEL_FMT_ETC1S = CRNLIB_PIXEL_FMT_FOURCC('E', 'T', '1', 'S'), - PIXEL_FMT_ETC2AS = CRNLIB_PIXEL_FMT_FOURCC('E', '2', 'A', 'S'), - - PIXEL_FMT_R8G8B8 = CRNLIB_PIXEL_FMT_FOURCC('R', 'G', 'B', 'x'), - PIXEL_FMT_L8 = CRNLIB_PIXEL_FMT_FOURCC('L', 'x', 'x', 'x'), - PIXEL_FMT_A8 = CRNLIB_PIXEL_FMT_FOURCC('x', 'x', 'x', 'A'), - PIXEL_FMT_A8L8 = CRNLIB_PIXEL_FMT_FOURCC('L', 'x', 'x', 'A'), - PIXEL_FMT_A8R8G8B8 = CRNLIB_PIXEL_FMT_FOURCC('R', 'G', 'B', 'A') -}; - -const crn_uint32 cDDSMaxImageDimensions = 8192U; - -// Total size of header is sizeof(uint32)+cDDSSizeofDDSurfaceDesc2; -const crn_uint32 cDDSSizeofDDSurfaceDesc2 = 124; - -// "DDS " -const crn_uint32 cDDSFileSignature = 0x20534444; - -struct DDCOLORKEY { - crn_uint32 dwUnused0; - crn_uint32 dwUnused1; -}; - -struct DDPIXELFORMAT { - crn_uint32 dwSize; - crn_uint32 dwFlags; - crn_uint32 dwFourCC; - crn_uint32 dwRGBBitCount; // ATI compressonator and crnlib will place a FOURCC code here for swizzled/cooked DXTn formats - crn_uint32 dwRBitMask; - crn_uint32 dwGBitMask; - crn_uint32 dwBBitMask; - crn_uint32 dwRGBAlphaBitMask; -}; - -struct DDSCAPS2 { - crn_uint32 dwCaps; - crn_uint32 dwCaps2; - crn_uint32 dwCaps3; - crn_uint32 dwCaps4; -}; - -struct DDSURFACEDESC2 { - crn_uint32 dwSize; - crn_uint32 dwFlags; - crn_uint32 dwHeight; - crn_uint32 dwWidth; - union { - crn_int32 lPitch; - crn_uint32 dwLinearSize; - }; - crn_uint32 dwBackBufferCount; - crn_uint32 dwMipMapCount; - crn_uint32 dwAlphaBitDepth; - crn_uint32 dwUnused0; - crn_uint32 lpSurface; - DDCOLORKEY unused0; - DDCOLORKEY unused1; - DDCOLORKEY unused2; - DDCOLORKEY unused3; - DDPIXELFORMAT ddpfPixelFormat; - DDSCAPS2 ddsCaps; - crn_uint32 dwUnused1; -}; - -const crn_uint32 DDSD_CAPS = 0x00000001; -const crn_uint32 DDSD_HEIGHT = 0x00000002; -const crn_uint32 DDSD_WIDTH = 0x00000004; -const crn_uint32 DDSD_PITCH = 0x00000008; - -const crn_uint32 DDSD_BACKBUFFERCOUNT = 0x00000020; -const crn_uint32 DDSD_ZBUFFERBITDEPTH = 0x00000040; -const crn_uint32 DDSD_ALPHABITDEPTH = 0x00000080; - -const crn_uint32 DDSD_LPSURFACE = 0x00000800; - -const crn_uint32 DDSD_PIXELFORMAT = 0x00001000; -const crn_uint32 DDSD_CKDESTOVERLAY = 0x00002000; -const crn_uint32 DDSD_CKDESTBLT = 0x00004000; -const crn_uint32 DDSD_CKSRCOVERLAY = 0x00008000; - -const crn_uint32 DDSD_CKSRCBLT = 0x00010000; -const crn_uint32 DDSD_MIPMAPCOUNT = 0x00020000; -const crn_uint32 DDSD_REFRESHRATE = 0x00040000; -const crn_uint32 DDSD_LINEARSIZE = 0x00080000; - -const crn_uint32 DDSD_TEXTURESTAGE = 0x00100000; -const crn_uint32 DDSD_FVF = 0x00200000; -const crn_uint32 DDSD_SRCVBHANDLE = 0x00400000; -const crn_uint32 DDSD_DEPTH = 0x00800000; - -const crn_uint32 DDSD_ALL = 0x00fff9ee; - -const crn_uint32 DDPF_ALPHAPIXELS = 0x00000001; -const crn_uint32 DDPF_ALPHA = 0x00000002; -const crn_uint32 DDPF_FOURCC = 0x00000004; -const crn_uint32 DDPF_PALETTEINDEXED8 = 0x00000020; -const crn_uint32 DDPF_RGB = 0x00000040; -const crn_uint32 DDPF_LUMINANCE = 0x00020000; - -const crn_uint32 DDSCAPS_COMPLEX = 0x00000008; -const crn_uint32 DDSCAPS_TEXTURE = 0x00001000; -const crn_uint32 DDSCAPS_MIPMAP = 0x00400000; - -const crn_uint32 DDSCAPS2_CUBEMAP = 0x00000200; -const crn_uint32 DDSCAPS2_CUBEMAP_POSITIVEX = 0x00000400; -const crn_uint32 DDSCAPS2_CUBEMAP_NEGATIVEX = 0x00000800; - -const crn_uint32 DDSCAPS2_CUBEMAP_POSITIVEY = 0x00001000; -const crn_uint32 DDSCAPS2_CUBEMAP_NEGATIVEY = 0x00002000; -const crn_uint32 DDSCAPS2_CUBEMAP_POSITIVEZ = 0x00004000; -const crn_uint32 DDSCAPS2_CUBEMAP_NEGATIVEZ = 0x00008000; - -const crn_uint32 DDSCAPS2_VOLUME = 0x00200000; +namespace crnlib +{ + enum pixel_format + { + PIXEL_FMT_INVALID = 0, + + PIXEL_FMT_DXT1 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '1'), + PIXEL_FMT_DXT2 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '2'), + PIXEL_FMT_DXT3 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '3'), + PIXEL_FMT_DXT4 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '4'), + PIXEL_FMT_DXT5 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '5'), + PIXEL_FMT_3DC = CRNLIB_PIXEL_FMT_FOURCC('A', 'T', 'I', '2'), // DXN_YX + PIXEL_FMT_DXN = CRNLIB_PIXEL_FMT_FOURCC('A', '2', 'X', 'Y'), // DXN_XY + PIXEL_FMT_DXT5A = CRNLIB_PIXEL_FMT_FOURCC('A', 'T', 'I', '1'), // ATI1N, http://developer.amd.com/media/gpu_assets/Radeon_X1x00_Programming_Guide.pdf + + // Non-standard, crnlib-specific pixel formats (some of these are supported by ATI's Compressonator) + PIXEL_FMT_DXT5_CCxY = CRNLIB_PIXEL_FMT_FOURCC('C', 'C', 'x', 'Y'), + PIXEL_FMT_DXT5_xGxR = CRNLIB_PIXEL_FMT_FOURCC('x', 'G', 'x', 'R'), + PIXEL_FMT_DXT5_xGBR = CRNLIB_PIXEL_FMT_FOURCC('x', 'G', 'B', 'R'), + PIXEL_FMT_DXT5_AGBR = CRNLIB_PIXEL_FMT_FOURCC('A', 'G', 'B', 'R'), + + PIXEL_FMT_DXT1A = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', '1', 'A'), + PIXEL_FMT_ETC1 = CRNLIB_PIXEL_FMT_FOURCC('E', 'T', 'C', '1'), + PIXEL_FMT_ETC2 = CRNLIB_PIXEL_FMT_FOURCC('E', 'T', 'C', '2'), + PIXEL_FMT_ETC2A = CRNLIB_PIXEL_FMT_FOURCC('E', 'T', '2', 'A'), + PIXEL_FMT_ETC1S = CRNLIB_PIXEL_FMT_FOURCC('E', 'T', '1', 'S'), + PIXEL_FMT_ETC2AS = CRNLIB_PIXEL_FMT_FOURCC('E', '2', 'A', 'S'), + + PIXEL_FMT_R8G8B8 = CRNLIB_PIXEL_FMT_FOURCC('R', 'G', 'B', 'x'), + PIXEL_FMT_L8 = CRNLIB_PIXEL_FMT_FOURCC('L', 'x', 'x', 'x'), + PIXEL_FMT_A8 = CRNLIB_PIXEL_FMT_FOURCC('x', 'x', 'x', 'A'), + PIXEL_FMT_A8L8 = CRNLIB_PIXEL_FMT_FOURCC('L', 'x', 'x', 'A'), + PIXEL_FMT_A8R8G8B8 = CRNLIB_PIXEL_FMT_FOURCC('R', 'G', 'B', 'A') + }; + + const crn_uint32 cDDSMaxImageDimensions = 8192U; + + // Total size of header is sizeof(uint32)+cDDSSizeofDDSurfaceDesc2; + const crn_uint32 cDDSSizeofDDSurfaceDesc2 = 124; + + // "DDS " + const crn_uint32 cDDSFileSignature = 0x20534444; + + struct DDCOLORKEY + { + crn_uint32 dwUnused0; + crn_uint32 dwUnused1; + }; + + struct DDPIXELFORMAT + { + crn_uint32 dwSize; + crn_uint32 dwFlags; + crn_uint32 dwFourCC; + crn_uint32 dwRGBBitCount; // ATI compressonator and crnlib will place a FOURCC code here for swizzled/cooked DXTn formats + crn_uint32 dwRBitMask; + crn_uint32 dwGBitMask; + crn_uint32 dwBBitMask; + crn_uint32 dwRGBAlphaBitMask; + }; + + struct DDSCAPS2 + { + crn_uint32 dwCaps; + crn_uint32 dwCaps2; + crn_uint32 dwCaps3; + crn_uint32 dwCaps4; + }; + + struct DDSURFACEDESC2 + { + crn_uint32 dwSize; + crn_uint32 dwFlags; + crn_uint32 dwHeight; + crn_uint32 dwWidth; + union { + crn_int32 lPitch; + crn_uint32 dwLinearSize; + }; + crn_uint32 dwBackBufferCount; + crn_uint32 dwMipMapCount; + crn_uint32 dwAlphaBitDepth; + crn_uint32 dwUnused0; + crn_uint32 lpSurface; + DDCOLORKEY unused0; + DDCOLORKEY unused1; + DDCOLORKEY unused2; + DDCOLORKEY unused3; + DDPIXELFORMAT ddpfPixelFormat; + DDSCAPS2 ddsCaps; + crn_uint32 dwUnused1; + }; + + const crn_uint32 DDSD_CAPS = 0x00000001; + const crn_uint32 DDSD_HEIGHT = 0x00000002; + const crn_uint32 DDSD_WIDTH = 0x00000004; + const crn_uint32 DDSD_PITCH = 0x00000008; + + const crn_uint32 DDSD_BACKBUFFERCOUNT = 0x00000020; + const crn_uint32 DDSD_ZBUFFERBITDEPTH = 0x00000040; + const crn_uint32 DDSD_ALPHABITDEPTH = 0x00000080; + + const crn_uint32 DDSD_LPSURFACE = 0x00000800; + + const crn_uint32 DDSD_PIXELFORMAT = 0x00001000; + const crn_uint32 DDSD_CKDESTOVERLAY = 0x00002000; + const crn_uint32 DDSD_CKDESTBLT = 0x00004000; + const crn_uint32 DDSD_CKSRCOVERLAY = 0x00008000; + + const crn_uint32 DDSD_CKSRCBLT = 0x00010000; + const crn_uint32 DDSD_MIPMAPCOUNT = 0x00020000; + const crn_uint32 DDSD_REFRESHRATE = 0x00040000; + const crn_uint32 DDSD_LINEARSIZE = 0x00080000; + + const crn_uint32 DDSD_TEXTURESTAGE = 0x00100000; + const crn_uint32 DDSD_FVF = 0x00200000; + const crn_uint32 DDSD_SRCVBHANDLE = 0x00400000; + const crn_uint32 DDSD_DEPTH = 0x00800000; + + const crn_uint32 DDSD_ALL = 0x00fff9ee; + + const crn_uint32 DDPF_ALPHAPIXELS = 0x00000001; + const crn_uint32 DDPF_ALPHA = 0x00000002; + const crn_uint32 DDPF_FOURCC = 0x00000004; + const crn_uint32 DDPF_PALETTEINDEXED8 = 0x00000020; + const crn_uint32 DDPF_RGB = 0x00000040; + const crn_uint32 DDPF_LUMINANCE = 0x00020000; + + const crn_uint32 DDSCAPS_COMPLEX = 0x00000008; + const crn_uint32 DDSCAPS_TEXTURE = 0x00001000; + const crn_uint32 DDSCAPS_MIPMAP = 0x00400000; + + const crn_uint32 DDSCAPS2_CUBEMAP = 0x00000200; + const crn_uint32 DDSCAPS2_CUBEMAP_POSITIVEX = 0x00000400; + const crn_uint32 DDSCAPS2_CUBEMAP_NEGATIVEX = 0x00000800; + + const crn_uint32 DDSCAPS2_CUBEMAP_POSITIVEY = 0x00001000; + const crn_uint32 DDSCAPS2_CUBEMAP_NEGATIVEY = 0x00002000; + const crn_uint32 DDSCAPS2_CUBEMAP_POSITIVEZ = 0x00004000; + const crn_uint32 DDSCAPS2_CUBEMAP_NEGATIVEZ = 0x00008000; + + const crn_uint32 DDSCAPS2_VOLUME = 0x00200000; } // namespace crnlib From 46c045a327fa5f24a3d39651815c8a67043a8f4f Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Mon, 7 Sep 2020 02:43:26 -0400 Subject: [PATCH 09/18] Format some files --- crnlib/crn_buffer_stream.h | 366 +++--- crnlib/crn_cfile_stream.h | 486 ++++---- crnlib/crn_clusterizer.h | 1207 +++++++++++--------- crnlib/crn_colorized_console.cpp | 204 ++-- crnlib/crn_colorized_console.h | 21 +- crnlib/crn_command_line_params.cpp | 805 ++++++++------ crnlib/crn_command_line_params.h | 122 +- crnlib/crn_data_stream.cpp | 1 + crnlib/crn_dds_comp.cpp | 467 ++++---- crnlib/crn_dds_comp.h | 86 +- crnlib/crn_decomp.cpp | 2 +- crnlib/crn_dxt5a.cpp | 359 +++--- crnlib/crn_dxt5a.h | 85 +- crnlib/crn_dxt_endpoint_refiner.cpp | 456 ++++---- crnlib/crn_dxt_endpoint_refiner.h | 107 +- crnlib/crn_dxt_fast.cpp | 1591 +++++++++++++++------------ crnlib/crn_dxt_fast.h | 22 +- crnlib/crn_dxt_hc_common.cpp | 1 - crnlib/crn_dxt_hc_common.h | 1 - crnlib/crn_dynamic_stream.h | 397 ++++--- crnlib/crn_intersect.h | 226 ++-- crnlib/crn_math.cpp | 120 +- crnlib/crn_math.h | 574 +++++----- crnlib/crn_packed_uint.h | 171 +-- crnlib/crn_vector2d.h | 324 +++--- 25 files changed, 4612 insertions(+), 3589 deletions(-) diff --git a/crnlib/crn_buffer_stream.h b/crnlib/crn_buffer_stream.h index 2d064fc..86332ca 100644 --- a/crnlib/crn_buffer_stream.h +++ b/crnlib/crn_buffer_stream.h @@ -3,176 +3,220 @@ #pragma once #include "crn_data_stream.h" -namespace crnlib { -class buffer_stream : public data_stream { - public: - buffer_stream() - : data_stream(), - m_pBuf(NULL), - m_size(0), - m_ofs(0) { - } +namespace crnlib +{ + class buffer_stream : public data_stream + { + public: + buffer_stream(): data_stream(), + m_pBuf(NULL), + m_size(0), + m_ofs(0) + { + } + + buffer_stream(void* p, uint size): data_stream(), + m_pBuf(NULL), + m_size(0), + m_ofs(0) + { + open(p, size); + } + + buffer_stream(const void* p, uint size): data_stream(), + m_pBuf(NULL), + m_size(0), + m_ofs(0) + { + open(p, size); + } + + virtual ~buffer_stream() + { + } + + bool open(const void* p, uint size) + { + CRNLIB_ASSERT(p); + + close(); + + if ((!p) || (!size)) + { + return false; + } + + m_opened = true; + m_pBuf = (uint8*)(p); + m_size = size; + m_ofs = 0; + m_attribs = cDataStreamSeekable | cDataStreamReadable; + return true; + } + + bool open(void* p, uint size) + { + CRNLIB_ASSERT(p); + + close(); + + if ((!p) || (!size)) + { + return false; + } + + m_opened = true; + m_pBuf = static_cast(p); + m_size = size; + m_ofs = 0; + m_attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable; + return true; + } + + virtual bool close() + { + if (m_opened) + { + m_opened = false; + m_pBuf = NULL; + m_size = 0; + m_ofs = 0; + return true; + } + + return false; + } + + const void* get_buf() const + { + return m_pBuf; + } + void* get_buf() + { + return m_pBuf; + } + + virtual const void* get_ptr() const + { + return m_pBuf; + } + + virtual uint read(void* pBuf, uint len) + { + CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); + + if ((!m_opened) || (!is_readable()) || (!len)) + { + return 0; + } + + CRNLIB_ASSERT(m_ofs <= m_size); + + uint bytes_left = m_size - m_ofs; + + len = math::minimum(len, bytes_left); + + if (len) + { + memcpy(pBuf, &m_pBuf[m_ofs], len); + } + + m_ofs += len; + + return len; + } + + virtual uint write(const void* pBuf, uint len) + { + CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); + + if ((!m_opened) || (!is_writable()) || (!len)) + { + return 0; + } + + CRNLIB_ASSERT(m_ofs <= m_size); + + uint bytes_left = m_size - m_ofs; + + len = math::minimum(len, bytes_left); + + if (len) + { + memcpy(&m_pBuf[m_ofs], pBuf, len); + } + + m_ofs += len; + + return len; + } + + virtual bool flush() { + if (!m_opened) + { + return false; + } + + return true; + } + + virtual uint64 get_size() { + if (!m_opened) + { + return 0; + } + + return m_size; + } + + virtual uint64 get_remaining() { + if (!m_opened) + { + return 0; + } + + CRNLIB_ASSERT(m_ofs <= m_size); - buffer_stream(void* p, uint size) - : data_stream(), - m_pBuf(NULL), - m_size(0), - m_ofs(0) { - open(p, size); - } + return m_size - m_ofs; + } - buffer_stream(const void* p, uint size) - : data_stream(), - m_pBuf(NULL), - m_size(0), - m_ofs(0) { - open(p, size); - } + virtual uint64 get_ofs() { + if (!m_opened) + { + return 0; + } - virtual ~buffer_stream() { - } + return m_ofs; + } - bool open(const void* p, uint size) { - CRNLIB_ASSERT(p); + virtual bool seek(int64 ofs, bool relative) { + if ((!m_opened) || (!is_seekable())) + { + return false; + } - close(); + int64 new_ofs = relative ? (m_ofs + ofs) : ofs; - if ((!p) || (!size)) - return false; + if (new_ofs < 0) + { + return false; + } + else if (new_ofs > m_size) + { + return false; + } - m_opened = true; - m_pBuf = (uint8*)(p); - m_size = size; - m_ofs = 0; - m_attribs = cDataStreamSeekable | cDataStreamReadable; - return true; - } + m_ofs = static_cast(new_ofs); - bool open(void* p, uint size) { - CRNLIB_ASSERT(p); + post_seek(); - close(); + return true; + } - if ((!p) || (!size)) - return false; - - m_opened = true; - m_pBuf = static_cast(p); - m_size = size; - m_ofs = 0; - m_attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable; - return true; - } - - virtual bool close() { - if (m_opened) { - m_opened = false; - m_pBuf = NULL; - m_size = 0; - m_ofs = 0; - return true; - } - - return false; - } - - const void* get_buf() const { return m_pBuf; } - void* get_buf() { return m_pBuf; } - - virtual const void* get_ptr() const { return m_pBuf; } - - virtual uint read(void* pBuf, uint len) { - CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); - - if ((!m_opened) || (!is_readable()) || (!len)) - return 0; - - CRNLIB_ASSERT(m_ofs <= m_size); - - uint bytes_left = m_size - m_ofs; - - len = math::minimum(len, bytes_left); - - if (len) - memcpy(pBuf, &m_pBuf[m_ofs], len); - - m_ofs += len; - - return len; - } - - virtual uint write(const void* pBuf, uint len) { - CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); - - if ((!m_opened) || (!is_writable()) || (!len)) - return 0; - - CRNLIB_ASSERT(m_ofs <= m_size); - - uint bytes_left = m_size - m_ofs; - - len = math::minimum(len, bytes_left); - - if (len) - memcpy(&m_pBuf[m_ofs], pBuf, len); - - m_ofs += len; - - return len; - } - - virtual bool flush() { - if (!m_opened) - return false; - - return true; - } - - virtual uint64 get_size() { - if (!m_opened) - return 0; - - return m_size; - } - - virtual uint64 get_remaining() { - if (!m_opened) - return 0; - - CRNLIB_ASSERT(m_ofs <= m_size); - - return m_size - m_ofs; - } - - virtual uint64 get_ofs() { - if (!m_opened) - return 0; - - return m_ofs; - } - - virtual bool seek(int64 ofs, bool relative) { - if ((!m_opened) || (!is_seekable())) - return false; - - int64 new_ofs = relative ? (m_ofs + ofs) : ofs; - - if (new_ofs < 0) - return false; - else if (new_ofs > m_size) - return false; - - m_ofs = static_cast(new_ofs); - - post_seek(); - - return true; - } - - private: - uint8* m_pBuf; - uint m_size; - uint m_ofs; -}; + private: + uint8* m_pBuf; + uint m_size; + uint m_ofs; + }; } // namespace crnlib diff --git a/crnlib/crn_cfile_stream.h b/crnlib/crn_cfile_stream.h index c17597a..0e2ae22 100644 --- a/crnlib/crn_cfile_stream.h +++ b/crnlib/crn_cfile_stream.h @@ -4,213 +4,281 @@ #include "crn_data_stream.h" -namespace crnlib { -class cfile_stream : public data_stream { - public: - cfile_stream() - : data_stream(), m_pFile(NULL), m_size(0), m_ofs(0), m_has_ownership(false) { - } - - cfile_stream(FILE* pFile, const char* pFilename, uint attribs, bool has_ownership) - : data_stream(), m_pFile(NULL), m_size(0), m_ofs(0), m_has_ownership(false) { - open(pFile, pFilename, attribs, has_ownership); - } - - cfile_stream(const char* pFilename, uint attribs = cDataStreamReadable | cDataStreamSeekable, bool open_existing = false) - : data_stream(), m_pFile(NULL), m_size(0), m_ofs(0), m_has_ownership(false) { - open(pFilename, attribs, open_existing); - } - - virtual ~cfile_stream() { - close(); - } - - virtual bool close() { - clear_error(); - - if (m_opened) { - bool status = true; - if (m_has_ownership) { - if (EOF == fclose(m_pFile)) - status = false; - } - - m_pFile = NULL; - m_opened = false; - m_size = 0; - m_ofs = 0; - m_has_ownership = false; - - return status; - } - - return false; - } - - bool open(FILE* pFile, const char* pFilename, uint attribs, bool has_ownership) { - CRNLIB_ASSERT(pFile); - CRNLIB_ASSERT(pFilename); - - close(); - - set_name(pFilename); - m_pFile = pFile; - m_has_ownership = has_ownership; - m_attribs = static_cast(attribs); - - m_ofs = crn_ftell(m_pFile); - crn_fseek(m_pFile, 0, SEEK_END); - m_size = crn_ftell(m_pFile); - crn_fseek(m_pFile, m_ofs, SEEK_SET); - - m_opened = true; - - return true; - } - - bool open(const char* pFilename, uint attribs = cDataStreamReadable | cDataStreamSeekable, bool open_existing = false) { - CRNLIB_ASSERT(pFilename); - - close(); - - m_attribs = static_cast(attribs); - - const char* pMode; - if ((is_readable()) && (is_writable())) - pMode = open_existing ? "r+b" : "w+b"; - else if (is_writable()) - pMode = open_existing ? "ab" : "wb"; - else if (is_readable()) - pMode = "rb"; - else { - set_error(); - return false; - } - - FILE* pFile = NULL; - crn_fopen(&pFile, pFilename, pMode); - m_has_ownership = true; - - if (!pFile) { - set_error(); - return false; - } - - // TODO: Change stream class to support UCS2 filenames. - - return open(pFile, pFilename, attribs, true); - } - - FILE* get_file() const { return m_pFile; } - - virtual uint read(void* pBuf, uint len) { - CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); - - if (!m_opened || (!is_readable()) || (!len)) - return 0; - - len = static_cast(math::minimum(len, get_remaining())); - - if (fread(pBuf, 1, len, m_pFile) != len) { - set_error(); - return 0; - } - - m_ofs += len; - return len; - } - - virtual uint write(const void* pBuf, uint len) { - CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); - - if (!m_opened || (!is_writable()) || (!len)) - return 0; - - if (fwrite(pBuf, 1, len, m_pFile) != len) { - set_error(); - return 0; - } - - m_ofs += len; - m_size = math::maximum(m_size, m_ofs); - - return len; - } - - virtual bool flush() { - if ((!m_opened) || (!is_writable())) - return false; - - if (EOF == fflush(m_pFile)) { - set_error(); - return false; - } - - return true; - } - - virtual uint64 get_size() { - if (!m_opened) - return 0; - - return m_size; - } - - virtual uint64 get_remaining() { - if (!m_opened) - return 0; - - CRNLIB_ASSERT(m_ofs <= m_size); - return m_size - m_ofs; - } - - virtual uint64 get_ofs() { - if (!m_opened) - return 0; - - return m_ofs; - } - - virtual bool seek(int64 ofs, bool relative) { - if ((!m_opened) || (!is_seekable())) - return false; - - int64 new_ofs = relative ? (m_ofs + ofs) : ofs; - if (new_ofs < 0) - return false; - else if (static_cast(new_ofs) > m_size) - return false; - - if (static_cast(new_ofs) != m_ofs) { - if (crn_fseek(m_pFile, new_ofs, SEEK_SET) != 0) { - set_error(); - return false; - } - - m_ofs = new_ofs; - } - - return true; - } - - static bool read_file_into_array(const char* pFilename, vector& buf) { - cfile_stream in_stream(pFilename); - if (!in_stream.is_opened()) - return false; - return in_stream.read_array(buf); - } - - static bool write_array_to_file(const char* pFilename, const vector& buf) { - cfile_stream out_stream(pFilename, cDataStreamWritable | cDataStreamSeekable); - if (!out_stream.is_opened()) - return false; - return out_stream.write_array(buf); - } - - private: - FILE* m_pFile; - uint64 m_size, m_ofs; - bool m_has_ownership; -}; - +namespace crnlib +{ + class cfile_stream : public data_stream + { + public: + cfile_stream(): data_stream(), + m_pFile(NULL), + m_size(0), + m_ofs(0), + m_has_ownership(false) + { + } + + cfile_stream(FILE* pFile, const char* pFilename, uint attribs, bool has_ownership): data_stream(), + m_pFile(NULL), + m_size(0), + m_ofs(0), + m_has_ownership(false) + { + open(pFile, pFilename, attribs, has_ownership); + } + + cfile_stream(const char* pFilename, uint attribs = cDataStreamReadable | cDataStreamSeekable, bool open_existing = false): data_stream(), + m_pFile(NULL), + m_size(0), + m_ofs(0), + m_has_ownership(false) + { + open(pFilename, attribs, open_existing); + } + + virtual ~cfile_stream() + { + close(); + } + + virtual bool close() + { + clear_error(); + + if (m_opened) + { + bool status = true; + if (m_has_ownership) + { + if (EOF == fclose(m_pFile)) + { + status = false; + } + } + + m_pFile = NULL; + m_opened = false; + m_size = 0; + m_ofs = 0; + m_has_ownership = false; + + return status; + } + + return false; + } + + bool open(FILE* pFile, const char* pFilename, uint attribs, bool has_ownership) + { + CRNLIB_ASSERT(pFile); + CRNLIB_ASSERT(pFilename); + + close(); + + set_name(pFilename); + m_pFile = pFile; + m_has_ownership = has_ownership; + m_attribs = static_cast(attribs); + + m_ofs = crn_ftell(m_pFile); + crn_fseek(m_pFile, 0, SEEK_END); + m_size = crn_ftell(m_pFile); + crn_fseek(m_pFile, m_ofs, SEEK_SET); + + m_opened = true; + + return true; + } + + bool open(const char* pFilename, uint attribs = cDataStreamReadable | cDataStreamSeekable, bool open_existing = false) + { + CRNLIB_ASSERT(pFilename); + + close(); + + m_attribs = static_cast(attribs); + + const char* pMode; + if ((is_readable()) && (is_writable())) + { + pMode = open_existing ? "r+b" : "w+b"; + } + else if (is_writable()) + { + pMode = open_existing ? "ab" : "wb"; + } + else if (is_readable()) + { + pMode = "rb"; + } + else + { + set_error(); + return false; + } + + FILE* pFile = NULL; + crn_fopen(&pFile, pFilename, pMode); + m_has_ownership = true; + + if (!pFile) + { + set_error(); + return false; + } + + // TODO: Change stream class to support UCS2 filenames. + + return open(pFile, pFilename, attribs, true); + } + + FILE* get_file() const + { + return m_pFile; + } + + virtual uint read(void* pBuf, uint len) + { + CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); + + if (!m_opened || (!is_readable()) || (!len)) + { + return 0; + } + + len = static_cast(math::minimum(len, get_remaining())); + + if (fread(pBuf, 1, len, m_pFile) != len) + { + set_error(); + return 0; + } + + m_ofs += len; + return len; + } + + virtual uint write(const void* pBuf, uint len) + { + CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); + + if (!m_opened || (!is_writable()) || (!len)) + { + return 0; + } + + if (fwrite(pBuf, 1, len, m_pFile) != len) + { + set_error(); + return 0; + } + + m_ofs += len; + m_size = math::maximum(m_size, m_ofs); + + return len; + } + + virtual bool flush() + { + if ((!m_opened) || (!is_writable())) + { + return false; + } + + if (EOF == fflush(m_pFile)) + { + set_error(); + return false; + } + + return true; + } + + virtual uint64 get_size() + { + if (!m_opened) + { + return 0; + } + + return m_size; + } + + virtual uint64 get_remaining() + { + if (!m_opened) + { + return 0; + } + + CRNLIB_ASSERT(m_ofs <= m_size); + return m_size - m_ofs; + } + + virtual uint64 get_ofs() + { + if (!m_opened) + { + return 0; + } + + return m_ofs; + } + + virtual bool seek(int64 ofs, bool relative) + { + if ((!m_opened) || (!is_seekable())) + { + return false; + } + + int64 new_ofs = relative ? (m_ofs + ofs) : ofs; + if (new_ofs < 0) + { + return false; + } + else if (static_cast(new_ofs) > m_size) + { + return false; + } + + if (static_cast(new_ofs) != m_ofs) + { + if (crn_fseek(m_pFile, new_ofs, SEEK_SET) != 0) + { + set_error(); + return false; + } + + m_ofs = new_ofs; + } + + return true; + } + + static bool read_file_into_array(const char* pFilename, vector& buf) + { + cfile_stream in_stream(pFilename); + if (!in_stream.is_opened()) + { + return false; + } + return in_stream.read_array(buf); + } + + static bool write_array_to_file(const char* pFilename, const vector& buf) + { + cfile_stream out_stream(pFilename, cDataStreamWritable | cDataStreamSeekable); + if (!out_stream.is_opened()) + { + return false; + } + return out_stream.write_array(buf); + } + + private: + FILE* m_pFile; + uint64 m_size, m_ofs; + bool m_has_ownership; + }; } // namespace crnlib diff --git a/crnlib/crn_clusterizer.h b/crnlib/crn_clusterizer.h index 35b3f60..83407cc 100644 --- a/crnlib/crn_clusterizer.h +++ b/crnlib/crn_clusterizer.h @@ -3,698 +3,847 @@ #pragma once #include "crn_matrix.h" -namespace crnlib { -template -class clusterizer { - public: - clusterizer() - : m_overall_variance(0.0f), - m_split_index(0), - m_heap_size(0), - m_quick(false) { - } - - void clear() { - m_training_vecs.clear(); - m_codebook.clear(); - m_nodes.clear(); - m_overall_variance = 0.0f; - m_split_index = 0; - m_heap_size = 0; - m_quick = false; - } - - void reserve_training_vecs(uint num_expected) { - m_training_vecs.reserve(num_expected); - } - - void add_training_vec(const VectorType& v, uint weight) { - m_training_vecs.push_back(std::make_pair(v, weight)); - } - - typedef bool (*progress_callback_func_ptr)(uint percentage_completed, void* pData); - - bool generate_codebook(uint max_size, progress_callback_func_ptr pProgress_callback = NULL, void* pProgress_data = NULL, bool quick = false) { - if (m_training_vecs.empty()) - return false; - - m_quick = quick; - - double ttsum = 0.0f; - - vq_node root; - root.m_vectors.reserve(m_training_vecs.size()); - - for (uint i = 0; i < m_training_vecs.size(); i++) { - const VectorType& v = m_training_vecs[i].first; - const uint weight = m_training_vecs[i].second; - - root.m_centroid += (v * (float)weight); - root.m_total_weight += weight; - root.m_vectors.push_back(i); - - ttsum += v.dot(v) * weight; - } - - root.m_variance = (float)(ttsum - (root.m_centroid.dot(root.m_centroid) / root.m_total_weight)); - - root.m_centroid *= (1.0f / root.m_total_weight); - - m_nodes.clear(); - m_nodes.reserve(max_size * 2 + 1); - - m_nodes.push_back(root); - - m_heap.resize(max_size + 1); - m_heap[1] = 0; - m_heap_size = 1; - - m_split_index = 0; - - uint total_leaves = 1; - - m_left_children.reserve(m_training_vecs.size() + 1); - m_right_children.reserve(m_training_vecs.size() + 1); - - int prev_percentage = -1; - while ((total_leaves < max_size) && (m_heap_size)) { - int worst_node_index = m_heap[1]; - - m_heap[1] = m_heap[m_heap_size]; - m_heap_size--; - if (m_heap_size) - down_heap(1); - - split_node(worst_node_index); - total_leaves++; - - if ((pProgress_callback) && ((total_leaves & 63) == 0) && (max_size)) { - int cur_percentage = (total_leaves * 100U + (max_size / 2U)) / max_size; - if (cur_percentage != prev_percentage) { - if (!(*pProgress_callback)(cur_percentage, pProgress_data)) - return false; - - prev_percentage = cur_percentage; +namespace crnlib +{ + template + class clusterizer + { + public: + clusterizer(): + m_overall_variance(0.0f), + m_split_index(0), + m_heap_size(0), + m_quick(false) + { } - } - } - m_codebook.clear(); + void clear() + { + m_training_vecs.clear(); + m_codebook.clear(); + m_nodes.clear(); + m_overall_variance = 0.0f; + m_split_index = 0; + m_heap_size = 0; + m_quick = false; + } - m_overall_variance = 0.0f; + void reserve_training_vecs(uint num_expected) + { + m_training_vecs.reserve(num_expected); + } - for (uint i = 0; i < m_nodes.size(); i++) { - vq_node& node = m_nodes[i]; - if (node.m_left != -1) { - CRNLIB_ASSERT(node.m_right != -1); - continue; - } + void add_training_vec(const VectorType& v, uint weight) + { + m_training_vecs.push_back(std::make_pair(v, weight)); + } - CRNLIB_ASSERT((node.m_left == -1) && (node.m_right == -1)); + typedef bool (*progress_callback_func_ptr)(uint percentage_completed, void* pData); - node.m_codebook_index = m_codebook.size(); - m_codebook.push_back(node.m_centroid); + bool generate_codebook(uint max_size, progress_callback_func_ptr pProgress_callback = NULL, void* pProgress_data = NULL, bool quick = false) + { + if (m_training_vecs.empty()) + { + return false; + } - m_overall_variance += node.m_variance; - } + m_quick = quick; - m_heap.clear(); - m_left_children.clear(); - m_right_children.clear(); + double ttsum = 0.0f; - return true; - } + vq_node root; + root.m_vectors.reserve(m_training_vecs.size()); - inline uint get_num_training_vecs() const { return m_training_vecs.size(); } - const VectorType& get_training_vec(uint index) const { return m_training_vecs[index].first; } - uint get_training_vec_weight(uint index) const { return m_training_vecs[index].second; } + for (uint i = 0; i < m_training_vecs.size(); i++) + { + const VectorType& v = m_training_vecs[i].first; + const uint weight = m_training_vecs[i].second; - typedef crnlib::vector > training_vec_array; + root.m_centroid += (v * (float)weight); + root.m_total_weight += weight; + root.m_vectors.push_back(i); - const training_vec_array& get_training_vecs() const { return m_training_vecs; } - training_vec_array& get_training_vecs() { return m_training_vecs; } + ttsum += v.dot(v) * weight; + } - inline float get_overall_variance() const { return m_overall_variance; } + root.m_variance = (float)(ttsum - (root.m_centroid.dot(root.m_centroid) / root.m_total_weight)); - inline uint get_codebook_size() const { - return m_codebook.size(); - } + root.m_centroid *= (1.0f / root.m_total_weight); - inline const VectorType& get_codebook_entry(uint index) const { - return m_codebook[index]; - } + m_nodes.clear(); + m_nodes.reserve(max_size * 2 + 1); - VectorType& get_codebook_entry(uint index) { - return m_codebook[index]; - } + m_nodes.push_back(root); - typedef crnlib::vector vector_vec_type; - inline const vector_vec_type& get_codebook() const { - return m_codebook; - } + m_heap.resize(max_size + 1); + m_heap[1] = 0; + m_heap_size = 1; - uint find_best_codebook_entry(const VectorType& v) const { - uint cur_node_index = 0; + m_split_index = 0; - for (;;) { - const vq_node& cur_node = m_nodes[cur_node_index]; + uint total_leaves = 1; - if (cur_node.m_left == -1) - return cur_node.m_codebook_index; + m_left_children.reserve(m_training_vecs.size() + 1); + m_right_children.reserve(m_training_vecs.size() + 1); - const vq_node& left_node = m_nodes[cur_node.m_left]; - const vq_node& right_node = m_nodes[cur_node.m_right]; + int prev_percentage = -1; + while ((total_leaves < max_size) && (m_heap_size)) + { + int worst_node_index = m_heap[1]; + + m_heap[1] = m_heap[m_heap_size]; + m_heap_size--; + if (m_heap_size) + down_heap(1); + + split_node(worst_node_index); + total_leaves++; + + if ((pProgress_callback) && ((total_leaves & 63) == 0) && (max_size)) + { + int cur_percentage = (total_leaves * 100U + (max_size / 2U)) / max_size; + if (cur_percentage != prev_percentage) + { + if (!(*pProgress_callback)(cur_percentage, pProgress_data)) + { + return false; + } + + prev_percentage = cur_percentage; + } + } + } - float left_dist = left_node.m_centroid.squared_distance(v); - float right_dist = right_node.m_centroid.squared_distance(v); + m_codebook.clear(); - if (left_dist < right_dist) - cur_node_index = cur_node.m_left; - else - cur_node_index = cur_node.m_right; - } - } + m_overall_variance = 0.0f; - const VectorType& find_best_codebook_entry(const VectorType& v, uint max_codebook_size) const { - uint cur_node_index = 0; + for (uint i = 0; i < m_nodes.size(); i++) + { + vq_node& node = m_nodes[i]; + if (node.m_left != -1) + { + CRNLIB_ASSERT(node.m_right != -1); + continue; + } - for (;;) { - const vq_node& cur_node = m_nodes[cur_node_index]; + CRNLIB_ASSERT((node.m_left == -1) && (node.m_right == -1)); - if ((cur_node.m_left == -1) || ((cur_node.m_codebook_index + 1) >= (int)max_codebook_size)) - return cur_node.m_centroid; + node.m_codebook_index = m_codebook.size(); + m_codebook.push_back(node.m_centroid); - const vq_node& left_node = m_nodes[cur_node.m_left]; - const vq_node& right_node = m_nodes[cur_node.m_right]; + m_overall_variance += node.m_variance; + } - float left_dist = left_node.m_centroid.squared_distance(v); - float right_dist = right_node.m_centroid.squared_distance(v); + m_heap.clear(); + m_left_children.clear(); + m_right_children.clear(); - if (left_dist < right_dist) - cur_node_index = cur_node.m_left; - else - cur_node_index = cur_node.m_right; - } - } + return true; + } - uint find_best_codebook_entry_fs(const VectorType& v) const { - float best_dist = math::cNearlyInfinite; - uint best_index = 0; + inline uint get_num_training_vecs() const + { + return m_training_vecs.size(); + } + const VectorType& get_training_vec(uint index) const + { + return m_training_vecs[index].first; + } + uint get_training_vec_weight(uint index) const + { + return m_training_vecs[index].second; + } - for (uint i = 0; i < m_codebook.size(); i++) { - float dist = m_codebook[i].squared_distance(v); - if (dist < best_dist) { - best_dist = dist; - best_index = i; - if (best_dist == 0.0f) - break; - } - } + typedef crnlib::vector> training_vec_array; - return best_index; - } + const training_vec_array& get_training_vecs() const + { + return m_training_vecs; + } + training_vec_array& get_training_vecs() + { + return m_training_vecs; + } - void retrieve_clusters(uint max_clusters, crnlib::vector >& clusters) const { - clusters.resize(0); - clusters.reserve(max_clusters); + inline float get_overall_variance() const + { + return m_overall_variance; + } - crnlib::vector stack; - stack.reserve(512); + inline uint get_codebook_size() const + { + return m_codebook.size(); + } - uint cur_node_index = 0; + inline const VectorType& get_codebook_entry(uint index) const + { + return m_codebook[index]; + } - for (;;) { - const vq_node& cur_node = m_nodes[cur_node_index]; + VectorType& get_codebook_entry(uint index) + { + return m_codebook[index]; + } - if ((cur_node.is_leaf()) || ((cur_node.m_codebook_index + 2) > (int)max_clusters)) { - clusters.resize(clusters.size() + 1); - clusters.back() = cur_node.m_vectors; + typedef crnlib::vector vector_vec_type; + inline const vector_vec_type& get_codebook() const + { + return m_codebook; + } - if (stack.empty()) - break; - cur_node_index = stack.back(); - stack.pop_back(); - continue; - } + uint find_best_codebook_entry(const VectorType& v) const + { + uint cur_node_index = 0; - cur_node_index = cur_node.m_left; - stack.push_back(cur_node.m_right); - } - } + for (;;) + { + const vq_node& cur_node = m_nodes[cur_node_index]; + + if (cur_node.m_left == -1) + { + return cur_node.m_codebook_index; + } + + const vq_node& left_node = m_nodes[cur_node.m_left]; + const vq_node& right_node = m_nodes[cur_node.m_right]; + + float left_dist = left_node.m_centroid.squared_distance(v); + float right_dist = right_node.m_centroid.squared_distance(v); + + if (left_dist < right_dist) + { + cur_node_index = cur_node.m_left; + } + else + { + cur_node_index = cur_node.m_right; + } + } + } - private: - training_vec_array m_training_vecs; + const VectorType& find_best_codebook_entry(const VectorType& v, uint max_codebook_size) const + { + uint cur_node_index = 0; - struct vq_node { - vq_node() - : m_centroid(cClear), m_total_weight(0), m_left(-1), m_right(-1), m_codebook_index(-1), m_unsplittable(false) {} + for (;;) + { + const vq_node& cur_node = m_nodes[cur_node_index]; + + if ((cur_node.m_left == -1) || ((cur_node.m_codebook_index + 1) >= (int)max_codebook_size)) + { + return cur_node.m_centroid; + } + + const vq_node& left_node = m_nodes[cur_node.m_left]; + const vq_node& right_node = m_nodes[cur_node.m_right]; + + float left_dist = left_node.m_centroid.squared_distance(v); + float right_dist = right_node.m_centroid.squared_distance(v); + + if (left_dist < right_dist) + { + cur_node_index = cur_node.m_left; + } + else + { + cur_node_index = cur_node.m_right; + } + } + } - VectorType m_centroid; - uint64 m_total_weight; + uint find_best_codebook_entry_fs(const VectorType& v) const { + float best_dist = math::cNearlyInfinite; + uint best_index = 0; - float m_variance; + for (uint i = 0; i < m_codebook.size(); i++) + { + float dist = m_codebook[i].squared_distance(v); + if (dist < best_dist) + { + best_dist = dist; + best_index = i; + if (best_dist == 0.0f) + { + break; + } + } + } - crnlib::vector m_vectors; + return best_index; + } - int m_left; - int m_right; + void retrieve_clusters(uint max_clusters, crnlib::vector >& clusters) const + { + clusters.resize(0); + clusters.reserve(max_clusters); - int m_codebook_index; + crnlib::vector stack; + stack.reserve(512); - bool m_unsplittable; + uint cur_node_index = 0; - bool is_leaf() const { return m_left < 0; } - }; + for (;;) + { + const vq_node& cur_node = m_nodes[cur_node_index]; + + if ((cur_node.is_leaf()) || ((cur_node.m_codebook_index + 2) > (int)max_clusters)) + { + clusters.resize(clusters.size() + 1); + clusters.back() = cur_node.m_vectors; + + if (stack.empty()) + { + break; + } + cur_node_index = stack.back(); + stack.pop_back(); + continue; + } + + cur_node_index = cur_node.m_left; + stack.push_back(cur_node.m_right); + } + } - typedef crnlib::vector node_vec_type; + private: + training_vec_array m_training_vecs; + + struct vq_node + { + vq_node(): + m_centroid(cClear), + m_total_weight(0), + m_left(-1), + m_right(-1), + m_codebook_index(-1), + m_unsplittable(false) + { + } - node_vec_type m_nodes; + VectorType m_centroid; + uint64 m_total_weight; - vector_vec_type m_codebook; + float m_variance; - float m_overall_variance; + crnlib::vector m_vectors; - uint m_split_index; + int m_left; + int m_right; - crnlib::vector m_heap; - uint m_heap_size; + int m_codebook_index; - bool m_quick; + bool m_unsplittable; - void insert_heap(uint node_index) { - const float variance = m_nodes[node_index].m_variance; - uint pos = ++m_heap_size; + bool is_leaf() const + { + return m_left < 0; + } + }; - if (m_heap_size >= m_heap.size()) - m_heap.resize(m_heap_size + 1); + typedef crnlib::vector node_vec_type; - for (;;) { - uint parent = pos >> 1; - if (!parent) - break; + node_vec_type m_nodes; - float parent_variance = m_nodes[m_heap[parent]].m_variance; - if (parent_variance > variance) - break; + vector_vec_type m_codebook; - m_heap[pos] = m_heap[parent]; + float m_overall_variance; - pos = parent; - } + uint m_split_index; - m_heap[pos] = node_index; - } + crnlib::vector m_heap; + uint m_heap_size; - void down_heap(uint pos) { - uint child; - uint orig = m_heap[pos]; + bool m_quick; - const float orig_variance = m_nodes[orig].m_variance; + void insert_heap(uint node_index) + { + const float variance = m_nodes[node_index].m_variance; + uint pos = ++m_heap_size; - while ((child = (pos << 1)) <= m_heap_size) { - if (child < m_heap_size) { - if (m_nodes[m_heap[child]].m_variance < m_nodes[m_heap[child + 1]].m_variance) - child++; - } + if (m_heap_size >= m_heap.size()) + { + m_heap.resize(m_heap_size + 1); + } - if (orig_variance > m_nodes[m_heap[child]].m_variance) - break; + for (;;) + { + uint parent = pos >> 1; + if (!parent) + { + break; + } - m_heap[pos] = m_heap[child]; + float parent_variance = m_nodes[m_heap[parent]].m_variance; + if (parent_variance > variance) + { + break; + } - pos = child; - } + m_heap[pos] = m_heap[parent]; - m_heap[pos] = orig; - } + pos = parent; + } - void compute_split_estimate(VectorType& left_child_res, VectorType& right_child_res, const vq_node& parent_node) { - VectorType furthest(0); - double furthest_dist = -1.0f; + m_heap[pos] = node_index; + } - for (uint i = 0; i < parent_node.m_vectors.size(); i++) { - const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + void down_heap(uint pos) + { + uint child; + uint orig = m_heap[pos]; - double dist = v.squared_distance(parent_node.m_centroid); - if (dist > furthest_dist) { - furthest_dist = dist; - furthest = v; - } - } + const float orig_variance = m_nodes[orig].m_variance; - VectorType opposite(0); - double opposite_dist = -1.0f; + while ((child = (pos << 1)) <= m_heap_size) + { + if (child < m_heap_size) + { + if (m_nodes[m_heap[child]].m_variance < m_nodes[m_heap[child + 1]].m_variance) + { + child++; + } + } + + if (orig_variance > m_nodes[m_heap[child]].m_variance) + { + break; + } + + m_heap[pos] = m_heap[child]; + + pos = child; + } - for (uint i = 0; i < parent_node.m_vectors.size(); i++) { - const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + m_heap[pos] = orig; + } - double dist = v.squared_distance(furthest); - if (dist > opposite_dist) { - opposite_dist = dist; - opposite = v; - } - } + void compute_split_estimate(VectorType& left_child_res, VectorType& right_child_res, const vq_node& parent_node) + { + VectorType furthest(0); + double furthest_dist = -1.0f; - left_child_res = (furthest + parent_node.m_centroid) * .5f; - right_child_res = (opposite + parent_node.m_centroid) * .5f; - } + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + + double dist = v.squared_distance(parent_node.m_centroid); + if (dist > furthest_dist) + { + furthest_dist = dist; + furthest = v; + } + } - void compute_split_pca(VectorType& left_child_res, VectorType& right_child_res, const vq_node& parent_node) { - if (parent_node.m_vectors.size() == 2) { - left_child_res = m_training_vecs[parent_node.m_vectors[0]].first; - right_child_res = m_training_vecs[parent_node.m_vectors[1]].first; - return; - } + VectorType opposite(0); + double opposite_dist = -1.0f; - const uint N = VectorType::num_elements; + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + + double dist = v.squared_distance(furthest); + if (dist > opposite_dist) + { + opposite_dist = dist; + opposite = v; + } + } - matrix covar; - covar.clear(); + left_child_res = (furthest + parent_node.m_centroid) * .5f; + right_child_res = (opposite + parent_node.m_centroid) * .5f; + } - for (uint i = 0; i < parent_node.m_vectors.size(); i++) { - const VectorType v(m_training_vecs[parent_node.m_vectors[i]].first - parent_node.m_centroid); - const VectorType w(v * (float)m_training_vecs[parent_node.m_vectors[i]].second); + void compute_split_pca(VectorType& left_child_res, VectorType& right_child_res, const vq_node& parent_node) + { + if (parent_node.m_vectors.size() == 2) + { + left_child_res = m_training_vecs[parent_node.m_vectors[0]].first; + right_child_res = m_training_vecs[parent_node.m_vectors[1]].first; + return; + } - for (uint x = 0; x < N; x++) - for (uint y = x; y < N; y++) - covar[x][y] = covar[x][y] + v[x] * w[y]; - } + const uint N = VectorType::num_elements; - float one_over_total_weight = 1.0f / parent_node.m_total_weight; + matrix covar; + covar.clear(); - for (uint x = 0; x < N; x++) - for (uint y = x; y < N; y++) - covar[x][y] *= one_over_total_weight; + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const VectorType v(m_training_vecs[parent_node.m_vectors[i]].first - parent_node.m_centroid); + const VectorType w(v * (float)m_training_vecs[parent_node.m_vectors[i]].second); + + for (uint x = 0; x < N; x++) + { + for (uint y = x; y < N; y++) + { + covar[x][y] = covar[x][y] + v[x] * w[y]; + } + } + } - for (uint x = 0; x < (N - 1); x++) - for (uint y = x + 1; y < N; y++) - covar[y][x] = covar[x][y]; + float one_over_total_weight = 1.0f / parent_node.m_total_weight; - VectorType axis; //(1.0f); - if (N == 1) - axis.set(1.0f); - else { - for (uint i = 0; i < N; i++) - axis[i] = math::lerp(.75f, 1.25f, i * (1.0f / math::maximum(N - 1, 1))); - } + for (uint x = 0; x < N; x++) + { + for (uint y = x; y < N; y++) + { + covar[x][y] *= one_over_total_weight; + } + } - VectorType prev_axis(axis); + for (uint x = 0; x < (N - 1); x++) + { + for (uint y = x + 1; y < N; y++) + { + covar[y][x] = covar[x][y]; + } + } - for (uint iter = 0; iter < 10; iter++) { - VectorType x; + VectorType axis; //(1.0f); + if (N == 1) + { + axis.set(1.0f); + } + else { + for (uint i = 0; i < N; i++) + { + axis[i] = math::lerp(.75f, 1.25f, i * (1.0f / math::maximum(N - 1, 1))); + } + } - double max_sum = 0; + VectorType prev_axis(axis); - for (uint i = 0; i < N; i++) { - double sum = 0; + for (uint iter = 0; iter < 10; iter++) + { + VectorType x; - for (uint j = 0; j < N; j++) - sum += axis[j] * covar[i][j]; + double max_sum = 0; - x[i] = static_cast(sum); + for (uint i = 0; i < N; i++) + { + double sum = 0; - max_sum = math::maximum(max_sum, fabs(sum)); - } + for (uint j = 0; j < N; j++) + { + sum += axis[j] * covar[i][j]; + } - if (max_sum != 0.0f) - x *= static_cast(1.0f / max_sum); + x[i] = static_cast(sum); - VectorType delta_axis(prev_axis - x); + max_sum = math::maximum(max_sum, fabs(sum)); + } - prev_axis = axis; - axis = x; + if (max_sum != 0.0f) + { + x *= static_cast(1.0f / max_sum); + } - if (delta_axis.norm() < .0025f) - break; - } + VectorType delta_axis(prev_axis - x); - axis.normalize(); + prev_axis = axis; + axis = x; - VectorType left_child(0.0f); - VectorType right_child(0.0f); + if (delta_axis.norm() < .0025f) + { + break; + } + } - double left_weight = 0.0f; - double right_weight = 0.0f; + axis.normalize(); - for (uint i = 0; i < parent_node.m_vectors.size(); i++) { - const float weight = (float)m_training_vecs[parent_node.m_vectors[i]].second; + VectorType left_child(0.0f); + VectorType right_child(0.0f); - const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + double left_weight = 0.0f; + double right_weight = 0.0f; - double t = (v - parent_node.m_centroid) * axis; - if (t < 0.0f) { - left_child += v * weight; - left_weight += weight; - } else { - right_child += v * weight; - right_weight += weight; - } - } + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const float weight = (float)m_training_vecs[parent_node.m_vectors[i]].second; + + const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + + double t = (v - parent_node.m_centroid) * axis; + if (t < 0.0f) + { + left_child += v * weight; + left_weight += weight; + } + else + { + right_child += v * weight; + right_weight += weight; + } + } - if ((left_weight > 0.0f) && (right_weight > 0.0f)) { - left_child_res = left_child * (float)(1.0f / left_weight); - right_child_res = right_child * (float)(1.0f / right_weight); - } else { - compute_split_estimate(left_child_res, right_child_res, parent_node); - } - } + if ((left_weight > 0.0f) && (right_weight > 0.0f)) + { + left_child_res = left_child * (float)(1.0f / left_weight); + right_child_res = right_child * (float)(1.0f / right_weight); + } + else + { + compute_split_estimate(left_child_res, right_child_res, parent_node); + } + } #if 0 - void compute_split_pca2(VectorType& left_child_res, VectorType& right_child_res, const vq_node& parent_node) - { - if (parent_node.m_vectors.size() == 2) - { - left_child_res = m_training_vecs[parent_node.m_vectors[0]].first; - right_child_res = m_training_vecs[parent_node.m_vectors[1]].first; - return; - } - - const uint N = VectorType::num_elements; + void compute_split_pca2(VectorType& left_child_res, VectorType& right_child_res, const vq_node& parent_node) + { + if (parent_node.m_vectors.size() == 2) + { + left_child_res = m_training_vecs[parent_node.m_vectors[0]].first; + right_child_res = m_training_vecs[parent_node.m_vectors[1]].first; + return; + } - VectorType furthest; - double furthest_dist = -1.0f; + const uint N = VectorType::num_elements; - for (uint i = 0; i < parent_node.m_vectors.size(); i++) - { - const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + VectorType furthest; + double furthest_dist = -1.0f; - double dist = v.squared_distance(parent_node.m_centroid); - if (dist > furthest_dist) + for (uint i = 0; i < parent_node.m_vectors.size(); i++) { - furthest_dist = dist; - furthest = v; + const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + + double dist = v.squared_distance(parent_node.m_centroid); + if (dist > furthest_dist) + { + furthest_dist = dist; + furthest = v; + } } - } - - VectorType opposite; - double opposite_dist = -1.0f; - for (uint i = 0; i < parent_node.m_vectors.size(); i++) - { - const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + VectorType opposite; + double opposite_dist = -1.0f; - double dist = v.squared_distance(furthest); - if (dist > opposite_dist) + for (uint i = 0; i < parent_node.m_vectors.size(); i++) { - opposite_dist = dist; - opposite = v; + const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + + double dist = v.squared_distance(furthest); + if (dist > opposite_dist) + { + opposite_dist = dist; + opposite = v; + } } - } - VectorType axis(opposite - furthest); - if (axis.normalize() < .000125f) - { - left_child_res = (furthest + parent_node.m_centroid) * .5f; - right_child_res = (opposite + parent_node.m_centroid) * .5f; - return; - } - - for (uint iter = 0; iter < 2; iter++) - { - double next_axis[N]; - utils::zero_object(next_axis); + VectorType axis(opposite - furthest); + if (axis.normalize() < .000125f) + { + left_child_res = (furthest + parent_node.m_centroid) * .5f; + right_child_res = (opposite + parent_node.m_centroid) * .5f; + return; + } - for (uint i = 0; i < parent_node.m_vectors.size(); i++) + for (uint iter = 0; iter < 2; iter++) { - const double weight = m_training_vecs[parent_node.m_vectors[i]].second; + double next_axis[N]; + utils::zero_object(next_axis); - VectorType v(m_training_vecs[parent_node.m_vectors[i]].first - parent_node.m_centroid); + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const double weight = m_training_vecs[parent_node.m_vectors[i]].second; - double dot = (v * axis) * weight; + VectorType v(m_training_vecs[parent_node.m_vectors[i]].first - parent_node.m_centroid); - for (uint j = 0; j < N; j++) - next_axis[j] += dot * v[j]; - } + double dot = (v * axis) * weight; - double w = 0.0f; - for (uint j = 0; j < N; j++) - w += next_axis[j] * next_axis[j]; + for (uint j = 0; j < N; j++) + next_axis[j] += dot * v[j]; + } - if (w > 0.0f) - { - w = 1.0f / sqrt(w); - for (uint j = 0; j < N; j++) - axis[j] = static_cast(next_axis[j] * w); - } - else - break; - } + double w = 0.0f; + for (uint j = 0; j < N; j++) + w += next_axis[j] * next_axis[j]; - VectorType left_child(0.0f); - VectorType right_child(0.0f); + if (w > 0.0f) + { + w = 1.0f / sqrt(w); + for (uint j = 0; j < N; j++) + axis[j] = static_cast(next_axis[j] * w); + } + else + break; + } - double left_weight = 0.0f; - double right_weight = 0.0f; + VectorType left_child(0.0f); + VectorType right_child(0.0f); - for (uint i = 0; i < parent_node.m_vectors.size(); i++) - { - const float weight = (float)m_training_vecs[parent_node.m_vectors[i]].second; + double left_weight = 0.0f; + double right_weight = 0.0f; - const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const float weight = (float)m_training_vecs[parent_node.m_vectors[i]].second; + + const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + + double t = (v - parent_node.m_centroid) * axis; + if (t < 0.0f) + { + left_child += v * weight; + left_weight += weight; + } + else + { + right_child += v * weight; + right_weight += weight; + } + } - double t = (v - parent_node.m_centroid) * axis; - if (t < 0.0f) + if ((left_weight > 0.0f) && (right_weight > 0.0f)) { - left_child += v * weight; - left_weight += weight; + left_child_res = left_child * (float)(1.0f / left_weight); + right_child_res = right_child * (float)(1.0f / right_weight); } else { - right_child += v * weight; - right_weight += weight; + left_child_res = (furthest + parent_node.m_centroid) * .5f; + right_child_res = (opposite + parent_node.m_centroid) * .5f; } - } - - if ((left_weight > 0.0f) && (right_weight > 0.0f)) - { - left_child_res = left_child * (float)(1.0f / left_weight); - right_child_res = right_child * (float)(1.0f / right_weight); - } - else - { - left_child_res = (furthest + parent_node.m_centroid) * .5f; - right_child_res = (opposite + parent_node.m_centroid) * .5f; - } - } + } #endif - // thread safety warning: shared state! - crnlib::vector m_left_children; - crnlib::vector m_right_children; - - void split_node(uint index) { - vq_node& parent_node = m_nodes[index]; + // thread safety warning: shared state! + crnlib::vector m_left_children; + crnlib::vector m_right_children; - if (parent_node.m_vectors.size() == 1) - return; + void split_node(uint index) + { + vq_node& parent_node = m_nodes[index]; - VectorType left_child, right_child; - if (m_quick) - compute_split_estimate(left_child, right_child, parent_node); - else - compute_split_pca(left_child, right_child, parent_node); + if (parent_node.m_vectors.size() == 1) + { + return; + } - uint64 left_weight = 0; - uint64 right_weight = 0; + VectorType left_child, right_child; + if (m_quick) + { + compute_split_estimate(left_child, right_child, parent_node); + } + else + { + compute_split_pca(left_child, right_child, parent_node); + } - float prev_total_variance = 1e+10f; + uint64 left_weight = 0; + uint64 right_weight = 0; - float left_variance = 0.0f; - float right_variance = 0.0f; + float prev_total_variance = 1e+10f; - const uint cMaxLoops = m_quick ? 2 : 8; - for (uint total_loops = 0; total_loops < cMaxLoops; total_loops++) { - m_left_children.resize(0); - m_right_children.resize(0); + float left_variance = 0.0f; + float right_variance = 0.0f; - VectorType new_left_child(cClear); - VectorType new_right_child(cClear); + const uint cMaxLoops = m_quick ? 2 : 8; + for (uint total_loops = 0; total_loops < cMaxLoops; total_loops++) + { + m_left_children.resize(0); + m_right_children.resize(0); - double left_ttsum = 0.0f; - double right_ttsum = 0.0f; + VectorType new_left_child(cClear); + VectorType new_right_child(cClear); - left_weight = 0; - right_weight = 0; + double left_ttsum = 0.0f; + double right_ttsum = 0.0f; - for (uint i = 0; i < parent_node.m_vectors.size(); i++) { - const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; - const uint weight = m_training_vecs[parent_node.m_vectors[i]].second; + left_weight = 0; + right_weight = 0; - double left_dist2 = left_child.squared_distance(v); - double right_dist2 = right_child.squared_distance(v); + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + const uint weight = m_training_vecs[parent_node.m_vectors[i]].second; - if (left_dist2 < right_dist2) { - m_left_children.push_back(parent_node.m_vectors[i]); + double left_dist2 = left_child.squared_distance(v); + double right_dist2 = right_child.squared_distance(v); - new_left_child += (v * (float)weight); - left_weight += weight; + if (left_dist2 < right_dist2) + { + m_left_children.push_back(parent_node.m_vectors[i]); - left_ttsum += v.dot(v) * weight; - } else { - m_right_children.push_back(parent_node.m_vectors[i]); + new_left_child += (v * (float)weight); + left_weight += weight; - new_right_child += (v * (float)weight); - right_weight += weight; + left_ttsum += v.dot(v) * weight; + } + else + { + m_right_children.push_back(parent_node.m_vectors[i]); - right_ttsum += v.dot(v) * weight; - } - } + new_right_child += (v * (float)weight); + right_weight += weight; - if ((!left_weight) || (!right_weight)) { - parent_node.m_unsplittable = true; - return; - } + right_ttsum += v.dot(v) * weight; + } + } - left_variance = (float)(left_ttsum - (new_left_child.dot(new_left_child) / left_weight)); - right_variance = (float)(right_ttsum - (new_right_child.dot(new_right_child) / right_weight)); + if ((!left_weight) || (!right_weight)) + { + parent_node.m_unsplittable = true; + return; + } - new_left_child *= (1.0f / left_weight); - new_right_child *= (1.0f / right_weight); + left_variance = (float)(left_ttsum - (new_left_child.dot(new_left_child) / left_weight)); + right_variance = (float)(right_ttsum - (new_right_child.dot(new_right_child) / right_weight)); - left_child = new_left_child; - right_child = new_right_child; + new_left_child *= (1.0f / left_weight); + new_right_child *= (1.0f / right_weight); - float total_variance = left_variance + right_variance; - if (total_variance < .00001f) - break; + left_child = new_left_child; + right_child = new_right_child; - //const float variance_delta_thresh = .00001f; - const float variance_delta_thresh = .00125f; - if (((prev_total_variance - total_variance) / total_variance) < variance_delta_thresh) - break; + float total_variance = left_variance + right_variance; + if (total_variance < .00001f) + { + break; + } - prev_total_variance = total_variance; - } + //const float variance_delta_thresh = .00001f; + const float variance_delta_thresh = .00125f; + if (((prev_total_variance - total_variance) / total_variance) < variance_delta_thresh) + { + break; + } - const uint left_child_index = m_nodes.size(); - const uint right_child_index = m_nodes.size() + 1; + prev_total_variance = total_variance; + } - parent_node.m_left = m_nodes.size(); - parent_node.m_right = m_nodes.size() + 1; - parent_node.m_codebook_index = m_split_index; - m_split_index++; + const uint left_child_index = m_nodes.size(); + const uint right_child_index = m_nodes.size() + 1; - m_nodes.resize(m_nodes.size() + 2); + parent_node.m_left = m_nodes.size(); + parent_node.m_right = m_nodes.size() + 1; + parent_node.m_codebook_index = m_split_index; + m_split_index++; - // parent_node is invalid now, because m_nodes has been changed + m_nodes.resize(m_nodes.size() + 2); - vq_node& left_child_node = m_nodes[left_child_index]; - vq_node& right_child_node = m_nodes[right_child_index]; + // parent_node is invalid now, because m_nodes has been changed - left_child_node.m_centroid = left_child; - left_child_node.m_total_weight = left_weight; - left_child_node.m_vectors.swap(m_left_children); - left_child_node.m_variance = left_variance; - if ((left_child_node.m_vectors.size() > 1) && (left_child_node.m_variance > 0.0f)) - insert_heap(left_child_index); + vq_node& left_child_node = m_nodes[left_child_index]; + vq_node& right_child_node = m_nodes[right_child_index]; - right_child_node.m_centroid = right_child; - right_child_node.m_total_weight = right_weight; - right_child_node.m_vectors.swap(m_right_children); - right_child_node.m_variance = right_variance; - if ((right_child_node.m_vectors.size() > 1) && (right_child_node.m_variance > 0.0f)) - insert_heap(right_child_index); - } -}; + left_child_node.m_centroid = left_child; + left_child_node.m_total_weight = left_weight; + left_child_node.m_vectors.swap(m_left_children); + left_child_node.m_variance = left_variance; + if ((left_child_node.m_vectors.size() > 1) && (left_child_node.m_variance > 0.0f)) + { + insert_heap(left_child_index); + } + right_child_node.m_centroid = right_child; + right_child_node.m_total_weight = right_weight; + right_child_node.m_vectors.swap(m_right_children); + right_child_node.m_variance = right_variance; + if ((right_child_node.m_vectors.size() > 1) && (right_child_node.m_variance > 0.0f)) + { + insert_heap(right_child_index); + } + } + }; } // namespace crnlib diff --git a/crnlib/crn_colorized_console.cpp b/crnlib/crn_colorized_console.cpp index a984435..c25525d 100644 --- a/crnlib/crn_colorized_console.cpp +++ b/crnlib/crn_colorized_console.cpp @@ -1,109 +1,137 @@ // File: crn_colorized_console.cpp // See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" #include "crn_colorized_console.h" #ifdef CRNLIB_USE_WIN32_API #include "crn_winhdr.h" #endif -namespace crnlib { -void colorized_console::init() { - console::init(); - console::add_console_output_func(console_output_func, NULL); -} +namespace crnlib +{ + void colorized_console::init() + { + console::init(); + console::add_console_output_func(console_output_func, NULL); + } -void colorized_console::deinit() { - console::remove_console_output_func(console_output_func); - console::deinit(); -} + void colorized_console::deinit() + { + console::remove_console_output_func(console_output_func); + console::deinit(); + } -void colorized_console::tick() { -} + void colorized_console::tick() + { + } #ifdef CRNLIB_USE_WIN32_API -bool colorized_console::console_output_func(eConsoleMessageType type, const char* pMsg, void*) { - if (console::get_output_disabled()) - return true; - - HANDLE cons = GetStdHandle(STD_OUTPUT_HANDLE); - - DWORD attr = FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE; - switch (type) { - case cDebugConsoleMessage: - attr = FOREGROUND_BLUE | FOREGROUND_INTENSITY; - break; - case cMessageConsoleMessage: - attr = FOREGROUND_GREEN | FOREGROUND_BLUE | FOREGROUND_INTENSITY; - break; - case cWarningConsoleMessage: - attr = FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_INTENSITY; - break; - case cErrorConsoleMessage: - attr = FOREGROUND_RED | FOREGROUND_INTENSITY; - break; - default: - break; - } - - if (INVALID_HANDLE_VALUE != cons) - SetConsoleTextAttribute(cons, (WORD)attr); - - if ((console::get_prefixes()) && (console::get_at_beginning_of_line())) { - switch (type) { - case cDebugConsoleMessage: - printf("Debug: %s", pMsg); - break; - case cWarningConsoleMessage: - printf("Warning: %s", pMsg); - break; - case cErrorConsoleMessage: - printf("Error: %s", pMsg); - break; - default: - printf("%s", pMsg); - break; - } - } else { - printf("%s", pMsg); - } + bool colorized_console::console_output_func(eConsoleMessageType type, const char* pMsg, void*) + { + if (console::get_output_disabled()) + { + return true; + } - if (console::get_crlf()) - printf("\n"); + HANDLE cons = GetStdHandle(STD_OUTPUT_HANDLE); - if (INVALID_HANDLE_VALUE != cons) - SetConsoleTextAttribute(cons, FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE); + DWORD attr = FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE; + switch (type) + { + case cDebugConsoleMessage: + attr = FOREGROUND_BLUE | FOREGROUND_INTENSITY; + break; + case cMessageConsoleMessage: + attr = FOREGROUND_GREEN | FOREGROUND_BLUE | FOREGROUND_INTENSITY; + break; + case cWarningConsoleMessage: + attr = FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_INTENSITY; + break; + case cErrorConsoleMessage: + attr = FOREGROUND_RED | FOREGROUND_INTENSITY; + break; + default: + break; + } - return true; -} -#else -bool colorized_console::console_output_func(eConsoleMessageType type, const char* pMsg, void*) { - if (console::get_output_disabled()) - return true; - - if ((console::get_prefixes()) && (console::get_at_beginning_of_line())) { - switch (type) { - case cDebugConsoleMessage: - printf("Debug: %s", pMsg); - break; - case cWarningConsoleMessage: - printf("Warning: %s", pMsg); - break; - case cErrorConsoleMessage: - printf("Error: %s", pMsg); - break; - default: - printf("%s", pMsg); - break; + if (INVALID_HANDLE_VALUE != cons) + { + SetConsoleTextAttribute(cons, (WORD)attr); + } + + if ((console::get_prefixes()) && (console::get_at_beginning_of_line())) + { + switch (type) + { + case cDebugConsoleMessage: + printf("Debug: %s", pMsg); + break; + case cWarningConsoleMessage: + printf("Warning: %s", pMsg); + break; + case cErrorConsoleMessage: + printf("Error: %s", pMsg); + break; + default: + printf("%s", pMsg); + break; + } + } + else + { + printf("%s", pMsg); + } + + if (console::get_crlf()) + { + printf("\n"); + } + + if (INVALID_HANDLE_VALUE != cons) + { + SetConsoleTextAttribute(cons, FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE); + } + + return true; } - } else { - printf("%s", pMsg); - } +#else + bool colorized_console::console_output_func(eConsoleMessageType type, const char* pMsg, void*) + { + if (console::get_output_disabled()) + { + return true; + } + + if ((console::get_prefixes()) && (console::get_at_beginning_of_line())) + { + switch (type) + { + case cDebugConsoleMessage: + printf("Debug: %s", pMsg); + break; + case cWarningConsoleMessage: + printf("Warning: %s", pMsg); + break; + case cErrorConsoleMessage: + printf("Error: %s", pMsg); + break; + default: + printf("%s", pMsg); + break; + } + } + else + { + printf("%s", pMsg); + } - if (console::get_crlf()) - printf("\n"); + if (console::get_crlf()) + { + printf("\n"); + } - return true; -} + return true; + } #endif } // namespace crnlib diff --git a/crnlib/crn_colorized_console.h b/crnlib/crn_colorized_console.h index 48ca123..61777c9 100644 --- a/crnlib/crn_colorized_console.h +++ b/crnlib/crn_colorized_console.h @@ -1,19 +1,22 @@ // File: crn_colorized_console.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once #include "crn_console.h" #include "crn_export.h" -namespace crnlib { -class CRN_EXPORT colorized_console { - public: - static void init(); - static void deinit(); - static void tick(); +namespace crnlib +{ + class CRN_EXPORT colorized_console + { + public: + static void init(); + static void deinit(); + static void tick(); - private: - static bool console_output_func(eConsoleMessageType type, const char* pMsg, void* pData); -}; + private: + static bool console_output_func(eConsoleMessageType type, const char* pMsg, void* pData); + }; } // namespace crnlib diff --git a/crnlib/crn_command_line_params.cpp b/crnlib/crn_command_line_params.cpp index be3817b..ebd3b3a 100644 --- a/crnlib/crn_command_line_params.cpp +++ b/crnlib/crn_command_line_params.cpp @@ -1,5 +1,6 @@ // File: crn_command_line_params.cpp // See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" #include "crn_command_line_params.h" #include "crn_console.h" @@ -12,399 +13,513 @@ #if CRNLIB_USE_WIN32_API #include "crn_winhdr.h" #endif -namespace crnlib { -void get_command_line_as_single_string(dynamic_string& cmd_line, int argc, char* argv[]) { - argc, argv; + +namespace crnlib +{ + void get_command_line_as_single_string(dynamic_string& cmd_line, int argc, char* argv[]) + { + argc, argv; #if CRNLIB_USE_WIN32_API - cmd_line.set(GetCommandLineA()); + cmd_line.set(GetCommandLineA()); #else - cmd_line.clear(); - for (int i = 0; i < argc; i++) { - dynamic_string tmp(argv[i]); - if ((tmp.front() != '"') && (tmp.front() != '-') && (tmp.front() != '@')) - tmp = "\"" + tmp + "\""; - if (cmd_line.get_len()) - cmd_line += " "; - cmd_line += tmp; - } + cmd_line.clear(); + for (int i = 0; i < argc; i++) + { + dynamic_string tmp(argv[i]); + if ((tmp.front() != '"') && (tmp.front() != '-') && (tmp.front() != '@')) + { + tmp = "\"" + tmp + "\""; + } + if (cmd_line.get_len()) + { + cmd_line += " "; + } + cmd_line += tmp; + } #endif -} - -command_line_params::command_line_params() { -} - -void command_line_params::clear() { - m_params.clear(); + } - m_param_map.clear(); -} + command_line_params::command_line_params() + { + } -bool command_line_params::split_params(const char* p, dynamic_string_array& params) { - bool within_param = false; - bool within_quote = false; + void command_line_params::clear() + { + m_params.clear(); - uint ofs = 0; - dynamic_string str; + m_param_map.clear(); + } - while (p[ofs]) { - const char c = p[ofs]; + bool command_line_params::split_params(const char* p, dynamic_string_array& params) + { + bool within_param = false; + bool within_quote = false; + + uint ofs = 0; + dynamic_string str; + + while (p[ofs]) + { + const char c = p[ofs]; + + if (within_param) + { + if (within_quote) + { + if (c == '"') + { + within_quote = false; + } + str.append_char(c); + } + else if ((c == ' ') || (c == '\t')) + { + if (!str.is_empty()) + { + params.push_back(str); + str.clear(); + } + within_param = false; + } + else + { + if (c == '"') + { + within_quote = true; + } + str.append_char(c); + } + } + else if ((c != ' ') && (c != '\t')) + { + within_param = true; + + if (c == '"') + { + within_quote = true; + } + + str.append_char(c); + } + + ofs++; + } - if (within_param) { - if (within_quote) { - if (c == '"') - within_quote = false; + if (within_quote) + { + console::error("Unmatched quote in command line \"%s\"", p); + return false; + } - str.append_char(c); - } else if ((c == ' ') || (c == '\t')) { - if (!str.is_empty()) { - params.push_back(str); - str.clear(); + if (!str.is_empty()) + { + params.push_back(str); } - within_param = false; - } else { - if (c == '"') - within_quote = true; + return true; + } - str.append_char(c); - } - } else if ((c != ' ') && (c != '\t')) { - within_param = true; + bool command_line_params::load_string_file(const char* pFilename, dynamic_string_array& strings) + { + cfile_stream in_stream; + if (!in_stream.open(pFilename, cDataStreamReadable | cDataStreamSeekable)) + { + console::error("Unable to open file \"%s\" for reading!", pFilename); + return false; + } - if (c == '"') - within_quote = true; + dynamic_string ansi_str; + + for (;;) + { + if (!in_stream.read_line(ansi_str)) + { + break; + } + ansi_str.trim(); + if (ansi_str.is_empty()) + { + continue; + } + strings.push_back(dynamic_string(ansi_str.get_ptr())); + } - str.append_char(c); + return true; } - ofs++; - } - - if (within_quote) { - console::error("Unmatched quote in command line \"%s\"", p); - return false; - } + bool command_line_params::parse(const dynamic_string_array& params, uint n, const param_desc* pParam_desc) + { + CRNLIB_ASSERT(n && pParam_desc); - if (!str.is_empty()) - params.push_back(str); + m_params = params; - return true; -} + uint arg_index = 0; + while (arg_index < params.size()) + { + const uint cur_arg_index = arg_index; + const dynamic_string& src_param = params[arg_index++]; -bool command_line_params::load_string_file(const char* pFilename, dynamic_string_array& strings) { - cfile_stream in_stream; - if (!in_stream.open(pFilename, cDataStreamReadable | cDataStreamSeekable)) { - console::error("Unable to open file \"%s\" for reading!", pFilename); - return false; - } + if (src_param.is_empty()) + { + continue; + } +#if CRNLIB_CMD_LINE_ALLOW_SLASH_PARAMS + if ((src_param[0] == '/') || (src_param[0] == '-')) +#else + if (src_param[0] == '-') +#endif + { + if (src_param.get_len() < 2) + { + console::error("Invalid command line parameter: \"%s\"", src_param.get_ptr()); + return false; + } + + dynamic_string key_str(src_param); + + key_str.right(1); + + int modifier = 0; + char c = key_str[key_str.get_len() - 1]; + if (c == '+') + { + modifier = 1; + } + else if (c == '-') + { + modifier = -1; + } + if (modifier) + { + key_str.left(key_str.get_len() - 1); + } + uint param_index; + for (param_index = 0; param_index < n; param_index++) + { + if (key_str == pParam_desc[param_index].m_pName) + { + break; + } + } + + if (param_index == n) + { + console::error("Unrecognized command line parameter: \"%s\"", src_param.get_ptr()); + return false; + } + + const param_desc& desc = pParam_desc[param_index]; + + const uint cMaxValues = 16; + dynamic_string val_str[cMaxValues]; + uint num_val_strs = 0; + if (desc.m_num_values) + { + CRNLIB_ASSERT(desc.m_num_values <= cMaxValues); + + if ((arg_index + desc.m_num_values) > params.size()) + { + console::error("Expected %u value(s) after command line parameter: \"%s\"", desc.m_num_values, src_param.get_ptr()); + return false; + } + + for (uint v = 0; v < desc.m_num_values; v++) + { + val_str[num_val_strs++] = params[arg_index++]; + } + } + + dynamic_string_array strings; + + if ((desc.m_support_listing_file) && (val_str[0].get_len() >= 2) && (val_str[0][0] == '@')) + { + dynamic_string filename(val_str[0]); + filename.right(1); + filename.unquote(); + + if (!load_string_file(filename.get_ptr(), strings)) + { + console::error("Failed loading listing file \"%s\"!", filename.get_ptr()); + return false; + } + } + else + { + for (uint v = 0; v < num_val_strs; v++) + { + val_str[v].unquote(); + strings.push_back(val_str[v]); + } + } + + param_value pv; + pv.m_values.swap(strings); + pv.m_index = cur_arg_index; + pv.m_modifier = (int8)modifier; + m_param_map.insert(std::make_pair(key_str, pv)); + } + else + { + param_value pv; + pv.m_values.push_back(src_param); + pv.m_values.back().unquote(); + pv.m_index = cur_arg_index; + m_param_map.insert(std::make_pair(g_empty_dynamic_string, pv)); + } + } - dynamic_string ansi_str; + return true; + } - for (;;) { - if (!in_stream.read_line(ansi_str)) - break; + bool command_line_params::parse(const char* pCmd_line, uint n, const param_desc* pParam_desc, bool skip_first_param) + { + CRNLIB_ASSERT(n && pParam_desc); - ansi_str.trim(); - if (ansi_str.is_empty()) - continue; + dynamic_string_array p; + if (!split_params(pCmd_line, p)) + { + return 0; + } - strings.push_back(dynamic_string(ansi_str.get_ptr())); - } + if (p.empty()) + { + return 0; + } - return true; -} + if (skip_first_param) + { + p.erase(0U); + } -bool command_line_params::parse(const dynamic_string_array& params, uint n, const param_desc* pParam_desc) { - CRNLIB_ASSERT(n && pParam_desc); + return parse(p, n, pParam_desc); + } - m_params = params; + bool command_line_params::is_param(uint index) const + { + CRNLIB_ASSERT(index < m_params.size()); + if (index >= m_params.size()) + { + return false; + } - uint arg_index = 0; - while (arg_index < params.size()) { - const uint cur_arg_index = arg_index; - const dynamic_string& src_param = params[arg_index++]; + const dynamic_string& w = m_params[index]; + if (w.is_empty()) + { + return false; + } - if (src_param.is_empty()) - continue; #if CRNLIB_CMD_LINE_ALLOW_SLASH_PARAMS - if ((src_param[0] == '/') || (src_param[0] == '-')) + return (w.get_len() >= 2) && ((w[0] == '-') || (w[0] == '/')); #else - if (src_param[0] == '-') + return (w.get_len() >= 2) && (w[0] == '-'); #endif + } + + uint command_line_params::find(uint num_keys, const char** ppKeys, crnlib::vector* pIterators, crnlib::vector* pUnmatched_indices) const { - if (src_param.get_len() < 2) { - console::error("Invalid command line parameter: \"%s\"", src_param.get_ptr()); - return false; - } - - dynamic_string key_str(src_param); - - key_str.right(1); - - int modifier = 0; - char c = key_str[key_str.get_len() - 1]; - if (c == '+') - modifier = 1; - else if (c == '-') - modifier = -1; - - if (modifier) - key_str.left(key_str.get_len() - 1); - - uint param_index; - for (param_index = 0; param_index < n; param_index++) - if (key_str == pParam_desc[param_index].m_pName) - break; - - if (param_index == n) { - console::error("Unrecognized command line parameter: \"%s\"", src_param.get_ptr()); - return false; - } - - const param_desc& desc = pParam_desc[param_index]; - - const uint cMaxValues = 16; - dynamic_string val_str[cMaxValues]; - uint num_val_strs = 0; - if (desc.m_num_values) { - CRNLIB_ASSERT(desc.m_num_values <= cMaxValues); - - if ((arg_index + desc.m_num_values) > params.size()) { - console::error("Expected %u value(s) after command line parameter: \"%s\"", desc.m_num_values, src_param.get_ptr()); - return false; - } - - for (uint v = 0; v < desc.m_num_values; v++) - val_str[num_val_strs++] = params[arg_index++]; - } - - dynamic_string_array strings; - - if ((desc.m_support_listing_file) && (val_str[0].get_len() >= 2) && (val_str[0][0] == '@')) { - dynamic_string filename(val_str[0]); - filename.right(1); - filename.unquote(); - - if (!load_string_file(filename.get_ptr(), strings)) { - console::error("Failed loading listing file \"%s\"!", filename.get_ptr()); - return false; - } - } else { - for (uint v = 0; v < num_val_strs; v++) { - val_str[v].unquote(); - strings.push_back(val_str[v]); - } - } - - param_value pv; - pv.m_values.swap(strings); - pv.m_index = cur_arg_index; - pv.m_modifier = (int8)modifier; - m_param_map.insert(std::make_pair(key_str, pv)); - } else { - param_value pv; - pv.m_values.push_back(src_param); - pv.m_values.back().unquote(); - pv.m_index = cur_arg_index; - m_param_map.insert(std::make_pair(g_empty_dynamic_string, pv)); + CRNLIB_ASSERT(ppKeys); + + if (pUnmatched_indices) + { + pUnmatched_indices->resize(m_params.size()); + for (uint i = 0; i < m_params.size(); i++) + { + (*pUnmatched_indices)[i] = i; + } + } + + uint n = 0; + for (uint i = 0; i < num_keys; i++) + { + const char* pKey = ppKeys[i]; + + param_map_const_iterator begin, end; + find(pKey, begin, end); + + while (begin != end) + { + if (pIterators) + { + pIterators->push_back(begin); + } + + if (pUnmatched_indices) + { + int k = pUnmatched_indices->find(begin->second.m_index); + if (k >= 0) + { + pUnmatched_indices->erase_unordered(k); + } + } + + n++; + begin++; + } + } + + return n; } - } - return true; -} + void command_line_params::find(const char* pKey, param_map_const_iterator& begin, param_map_const_iterator& end) const + { + dynamic_string key(pKey); + begin = m_param_map.lower_bound(key); + end = m_param_map.upper_bound(key); + } -bool command_line_params::parse(const char* pCmd_line, uint n, const param_desc* pParam_desc, bool skip_first_param) { - CRNLIB_ASSERT(n && pParam_desc); + uint command_line_params::get_count(const char* pKey) const + { + param_map_const_iterator begin, end; + find(pKey, begin, end); - dynamic_string_array p; - if (!split_params(pCmd_line, p)) - return 0; + uint n = 0; - if (p.empty()) - return 0; + while (begin != end) + { + n++; + begin++; + } - if (skip_first_param) - p.erase(0U); + return n; + } - return parse(p, n, pParam_desc); -} + command_line_params::param_map_const_iterator command_line_params::get_param(const char* pKey, uint index) const + { + param_map_const_iterator begin, end; + find(pKey, begin, end); -bool command_line_params::is_param(uint index) const { - CRNLIB_ASSERT(index < m_params.size()); - if (index >= m_params.size()) - return false; + if (begin == end) + { + return m_param_map.end(); + } - const dynamic_string& w = m_params[index]; - if (w.is_empty()) - return false; + uint n = 0; -#if CRNLIB_CMD_LINE_ALLOW_SLASH_PARAMS - return (w.get_len() >= 2) && ((w[0] == '-') || (w[0] == '/')); -#else - return (w.get_len() >= 2) && (w[0] == '-'); -#endif -} + while ((begin != end) && (n != index)) + { + n++; + begin++; + } -uint command_line_params::find(uint num_keys, const char** ppKeys, crnlib::vector* pIterators, crnlib::vector* pUnmatched_indices) const { - CRNLIB_ASSERT(ppKeys); + if (begin == end) + { + return m_param_map.end(); + } + + return begin; + } - if (pUnmatched_indices) { - pUnmatched_indices->resize(m_params.size()); - for (uint i = 0; i < m_params.size(); i++) - (*pUnmatched_indices)[i] = i; - } + bool command_line_params::has_value(const char* pKey, uint index) const + { + return get_num_values(pKey, index) != 0; + } - uint n = 0; - for (uint i = 0; i < num_keys; i++) { - const char* pKey = ppKeys[i]; + uint command_line_params::get_num_values(const char* pKey, uint index) const + { + param_map_const_iterator it = get_param(pKey, index); - param_map_const_iterator begin, end; - find(pKey, begin, end); + if (it == end()) + { + return 0; + } + return it->second.m_values.size(); + } - while (begin != end) { - if (pIterators) - pIterators->push_back(begin); + bool command_line_params::get_value_as_bool(const char* pKey, uint index, bool def) const + { + param_map_const_iterator it = get_param(pKey, index); + if (it == end()) + { + return def; + } + if (it->second.m_modifier) + { + return it->second.m_modifier > 0; + } + else + { + return true; + } + } - if (pUnmatched_indices) { - int k = pUnmatched_indices->find(begin->second.m_index); - if (k >= 0) - pUnmatched_indices->erase_unordered(k); - } + int command_line_params::get_value_as_int(const char* pKey, uint index, int def, int l, int h, uint value_index) const + { + param_map_const_iterator it = get_param(pKey, index); + if ((it == end()) || (value_index >= it->second.m_values.size())) + { + return def; + } - n++; - begin++; + int val; + const char* p = it->second.m_values[value_index].get_ptr(); + if (!string_to_int(p, val)) + { + crnlib::console::warning("Invalid value specified for parameter \"%s\", using default value of %i", pKey, def); + return def; + } + + if (val < l) + { + crnlib::console::warning("Value %i for parameter \"%s\" is out of range, clamping to %i", val, pKey, l); + val = l; + } + else if (val > h) + { + crnlib::console::warning("Value %i for parameter \"%s\" is out of range, clamping to %i", val, pKey, h); + val = h; + } + + return val; } - } - - return n; -} - -void command_line_params::find(const char* pKey, param_map_const_iterator& begin, param_map_const_iterator& end) const { - dynamic_string key(pKey); - begin = m_param_map.lower_bound(key); - end = m_param_map.upper_bound(key); -} - -uint command_line_params::get_count(const char* pKey) const { - param_map_const_iterator begin, end; - find(pKey, begin, end); - - uint n = 0; - - while (begin != end) { - n++; - begin++; - } - - return n; -} - -command_line_params::param_map_const_iterator command_line_params::get_param(const char* pKey, uint index) const { - param_map_const_iterator begin, end; - find(pKey, begin, end); - - if (begin == end) - return m_param_map.end(); - - uint n = 0; - - while ((begin != end) && (n != index)) { - n++; - begin++; - } - - if (begin == end) - return m_param_map.end(); - - return begin; -} - -bool command_line_params::has_value(const char* pKey, uint index) const { - return get_num_values(pKey, index) != 0; -} - -uint command_line_params::get_num_values(const char* pKey, uint index) const { - param_map_const_iterator it = get_param(pKey, index); - - if (it == end()) - return 0; - - return it->second.m_values.size(); -} - -bool command_line_params::get_value_as_bool(const char* pKey, uint index, bool def) const { - param_map_const_iterator it = get_param(pKey, index); - if (it == end()) - return def; - - if (it->second.m_modifier) - return it->second.m_modifier > 0; - else - return true; -} - -int command_line_params::get_value_as_int(const char* pKey, uint index, int def, int l, int h, uint value_index) const { - param_map_const_iterator it = get_param(pKey, index); - if ((it == end()) || (value_index >= it->second.m_values.size())) - return def; - - int val; - const char* p = it->second.m_values[value_index].get_ptr(); - if (!string_to_int(p, val)) { - crnlib::console::warning("Invalid value specified for parameter \"%s\", using default value of %i", pKey, def); - return def; - } - - if (val < l) { - crnlib::console::warning("Value %i for parameter \"%s\" is out of range, clamping to %i", val, pKey, l); - val = l; - } else if (val > h) { - crnlib::console::warning("Value %i for parameter \"%s\" is out of range, clamping to %i", val, pKey, h); - val = h; - } - - return val; -} - -float command_line_params::get_value_as_float(const char* pKey, uint index, float def, float l, float h, uint value_index) const { - param_map_const_iterator it = get_param(pKey, index); - if ((it == end()) || (value_index >= it->second.m_values.size())) - return def; - - float val; - const char* p = it->second.m_values[value_index].get_ptr(); - if (!string_to_float(p, val)) { - crnlib::console::warning("Invalid value specified for float parameter \"%s\", using default value of %f", pKey, def); - return def; - } - - if (val < l) { - crnlib::console::warning("Value %f for parameter \"%s\" is out of range, clamping to %f", val, pKey, l); - val = l; - } else if (val > h) { - crnlib::console::warning("Value %f for parameter \"%s\" is out of range, clamping to %f", val, pKey, h); - val = h; - } - - return val; -} - -bool command_line_params::get_value_as_string(const char* pKey, uint index, dynamic_string& value, uint value_index) const { - param_map_const_iterator it = get_param(pKey, index); - if ((it == end()) || (value_index >= it->second.m_values.size())) { - value.empty(); - return false; - } - - value = it->second.m_values[value_index]; - return true; -} - -const dynamic_string& command_line_params::get_value_as_string_or_empty(const char* pKey, uint index, uint value_index) const { - param_map_const_iterator it = get_param(pKey, index); - if ((it == end()) || (value_index >= it->second.m_values.size())) - return g_empty_dynamic_string; - - return it->second.m_values[value_index]; -} + float command_line_params::get_value_as_float(const char* pKey, uint index, float def, float l, float h, uint value_index) const + { + param_map_const_iterator it = get_param(pKey, index); + if ((it == end()) || (value_index >= it->second.m_values.size())) + { + return def; + } + float val; + const char* p = it->second.m_values[value_index].get_ptr(); + if (!string_to_float(p, val)) + { + crnlib::console::warning("Invalid value specified for float parameter \"%s\", using default value of %f", pKey, def); + return def; + } + + if (val < l) + { + crnlib::console::warning("Value %f for parameter \"%s\" is out of range, clamping to %f", val, pKey, l); + val = l; + } + else if (val > h) + { + crnlib::console::warning("Value %f for parameter \"%s\" is out of range, clamping to %f", val, pKey, h); + val = h; + } + + return val; + } + + bool command_line_params::get_value_as_string(const char* pKey, uint index, dynamic_string& value, uint value_index) const + { + param_map_const_iterator it = get_param(pKey, index); + if ((it == end()) || (value_index >= it->second.m_values.size())) + { + value.empty(); + return false; + } + + value = it->second.m_values[value_index]; + return true; + } + + const dynamic_string& command_line_params::get_value_as_string_or_empty(const char* pKey, uint index, uint value_index) const + { + param_map_const_iterator it = get_param(pKey, index); + if ((it == end()) || (value_index >= it->second.m_values.size())) + { + return g_empty_dynamic_string; + } + return it->second.m_values[value_index]; + } } // namespace crnlib diff --git a/crnlib/crn_command_line_params.h b/crnlib/crn_command_line_params.h index 5408ee7..f3713db 100644 --- a/crnlib/crn_command_line_params.h +++ b/crnlib/crn_command_line_params.h @@ -5,80 +5,104 @@ #include #include "crn_export.h" -namespace crnlib { -// Returns the command line passed to the app as a string. -// On systems where this isn't trivial, this function combines together the separate arguments, quoting and adding spaces as needed. +namespace crnlib +{ + // Returns the command line passed to the app as a string. + // On systems where this isn't trivial, this function combines together the separate arguments, quoting and adding spaces as needed. CRN_EXPORT void get_command_line_as_single_string(dynamic_string& cmd_line, int argc, char* argv[]); -class CRN_EXPORT command_line_params { - public: - struct param_value { - inline param_value() - : m_index(0), m_modifier(0) {} + class CRN_EXPORT command_line_params + { + public: + struct param_value + { + inline param_value(): + m_index(0), + m_modifier(0) + { + } - dynamic_string_array m_values; - uint m_index; - int8 m_modifier; - }; + dynamic_string_array m_values; + uint m_index; + int8 m_modifier; + }; - typedef std::multimap param_map; - typedef param_map::const_iterator param_map_const_iterator; - typedef param_map::iterator param_map_iterator; + typedef std::multimap param_map; + typedef param_map::const_iterator param_map_const_iterator; + typedef param_map::iterator param_map_iterator; - command_line_params(); + command_line_params(); - void clear(); + void clear(); - static bool split_params(const char* p, dynamic_string_array& params); + static bool split_params(const char* p, dynamic_string_array& params); - struct param_desc { - const char* m_pName; - uint m_num_values; - bool m_support_listing_file; - }; + struct param_desc { + const char* m_pName; + uint m_num_values; + bool m_support_listing_file; + }; - bool parse(const dynamic_string_array& params, uint n, const param_desc* pParam_desc); - bool parse(const char* pCmd_line, uint n, const param_desc* pParam_desc, bool skip_first_param = true); + bool parse(const dynamic_string_array& params, uint n, const param_desc* pParam_desc); + bool parse(const char* pCmd_line, uint n, const param_desc* pParam_desc, bool skip_first_param = true); - const dynamic_string_array& get_array() const { return m_params; } + const dynamic_string_array& get_array() const + { + return m_params; + } - bool is_param(uint index) const; + bool is_param(uint index) const; - const param_map& get_map() const { return m_param_map; } + const param_map& get_map() const + { + return m_param_map; + } - uint get_num_params() const { return static_cast(m_param_map.size()); } + uint get_num_params() const + { + return static_cast(m_param_map.size()); + } - param_map_const_iterator begin() const { return m_param_map.begin(); } - param_map_const_iterator end() const { return m_param_map.end(); } + param_map_const_iterator begin() const + { + return m_param_map.begin(); + } + param_map_const_iterator end() const + { + return m_param_map.end(); + } - uint find(uint num_keys, const char** ppKeys, crnlib::vector* pIterators, crnlib::vector* pUnmatched_indices) const; + uint find(uint num_keys, const char** ppKeys, crnlib::vector* pIterators, crnlib::vector* pUnmatched_indices) const; - void find(const char* pKey, param_map_const_iterator& begin, param_map_const_iterator& end) const; + void find(const char* pKey, param_map_const_iterator& begin, param_map_const_iterator& end) const; - uint get_count(const char* pKey) const; + uint get_count(const char* pKey) const; - // Returns end() if param cannot be found, or index is out of range. - param_map_const_iterator get_param(const char* pKey, uint index) const; + // Returns end() if param cannot be found, or index is out of range. + param_map_const_iterator get_param(const char* pKey, uint index) const; - bool has_key(const char* pKey) const { return get_param(pKey, 0) != end(); } + bool has_key(const char* pKey) const + { + return get_param(pKey, 0) != end(); + } - bool has_value(const char* pKey, uint index) const; - uint get_num_values(const char* pKey, uint index) const; + bool has_value(const char* pKey, uint index) const; + uint get_num_values(const char* pKey, uint index) const; - bool get_value_as_bool(const char* pKey, uint index = 0, bool def = false) const; + bool get_value_as_bool(const char* pKey, uint index = 0, bool def = false) const; - int get_value_as_int(const char* pKey, uint index, int def, int l = INT_MIN, int h = INT_MAX, uint value_index = 0) const; - float get_value_as_float(const char* pKey, uint index, float def = 0.0f, float l = -math::cNearlyInfinite, float h = math::cNearlyInfinite, uint value_index = 0) const; + int get_value_as_int(const char* pKey, uint index, int def, int l = INT_MIN, int h = INT_MAX, uint value_index = 0) const; + float get_value_as_float(const char* pKey, uint index, float def = 0.0f, float l = -math::cNearlyInfinite, float h = math::cNearlyInfinite, uint value_index = 0) const; - bool get_value_as_string(const char* pKey, uint index, dynamic_string& value, uint value_index = 0) const; - const dynamic_string& get_value_as_string_or_empty(const char* pKey, uint index = 0, uint value_index = 0) const; + bool get_value_as_string(const char* pKey, uint index, dynamic_string& value, uint value_index = 0) const; + const dynamic_string& get_value_as_string_or_empty(const char* pKey, uint index = 0, uint value_index = 0) const; - private: - dynamic_string_array m_params; + private: + dynamic_string_array m_params; - param_map m_param_map; + param_map m_param_map; - static bool load_string_file(const char* pFilename, dynamic_string_array& strings); -}; + static bool load_string_file(const char* pFilename, dynamic_string_array& strings); + }; } // namespace crnlib diff --git a/crnlib/crn_data_stream.cpp b/crnlib/crn_data_stream.cpp index f2a100e..518fa51 100644 --- a/crnlib/crn_data_stream.cpp +++ b/crnlib/crn_data_stream.cpp @@ -1,5 +1,6 @@ // File: crn_data_stream.cpp // See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" #include "crn_data_stream.h" diff --git a/crnlib/crn_dds_comp.cpp b/crnlib/crn_dds_comp.cpp index 629f968..7139a87 100644 --- a/crnlib/crn_dds_comp.cpp +++ b/crnlib/crn_dds_comp.cpp @@ -1,227 +1,292 @@ // File: crn_dds_comp.cpp // See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" #include "crn_dds_comp.h" #include "crn_dynamic_stream.h" #include "crn_lzma_codec.h" -namespace crnlib { -dds_comp::dds_comp() - : m_pParams(NULL), - m_pixel_fmt(PIXEL_FMT_INVALID), - m_pQDXT_state(NULL) { -} - -dds_comp::~dds_comp() { - crnlib_delete(m_pQDXT_state); -} - -void dds_comp::clear() { - m_src_tex.clear(); - m_packed_tex.clear(); - m_comp_data.clear(); - m_pParams = NULL; - m_pixel_fmt = PIXEL_FMT_INVALID; - m_task_pool.deinit(); - if (m_pQDXT_state) { - crnlib_delete(m_pQDXT_state); - m_pQDXT_state = NULL; - } -} - -bool dds_comp::create_dds_tex(mipmapped_texture& dds_tex) { - image_u8 images[cCRNMaxFaces][cCRNMaxLevels]; - - bool has_alpha = false; - for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) { - for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) { - const uint width = math::maximum(1U, m_pParams->m_width >> level_index); - const uint height = math::maximum(1U, m_pParams->m_height >> level_index); - - if (!m_pParams->m_pImages[face_index][level_index]) - return false; - - images[face_index][level_index].alias((color_quad_u8*)m_pParams->m_pImages[face_index][level_index], width, height); - if (!has_alpha) - has_alpha = image_utils::has_alpha(images[face_index][level_index]); +namespace crnlib +{ + dds_comp::dds_comp(): + m_pParams(NULL), + m_pixel_fmt(PIXEL_FMT_INVALID), + m_pQDXT_state(NULL) + { } - } - - for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) - for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) - images[face_index][level_index].set_component_valid(3, has_alpha); - - image_utils::conversion_type conv_type = image_utils::get_image_conversion_type_from_crn_format((crn_format)m_pParams->m_format); - if (conv_type != image_utils::cConversion_Invalid) { - for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) { - for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) { - image_u8 cooked_image(images[face_index][level_index]); - - image_utils::convert_image(cooked_image, conv_type); - images[face_index][level_index].swap(cooked_image); - } + dds_comp::~dds_comp() + { + crnlib_delete(m_pQDXT_state); } - } - face_vec faces(m_pParams->m_faces); - - for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) { - for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) { - mip_level* pMip = crnlib_new(); - - image_u8* pImage = crnlib_new(); - pImage->swap(images[face_index][level_index]); - pMip->assign(pImage); - - faces[face_index].push_back(pMip); + void dds_comp::clear() + { + m_src_tex.clear(); + m_packed_tex.clear(); + m_comp_data.clear(); + m_pParams = NULL; + m_pixel_fmt = PIXEL_FMT_INVALID; + m_task_pool.deinit(); + if (m_pQDXT_state) + { + crnlib_delete(m_pQDXT_state); + m_pQDXT_state = NULL; + } } - } - dds_tex.assign(faces); + bool dds_comp::create_dds_tex(mipmapped_texture& dds_tex) + { + image_u8 images[cCRNMaxFaces][cCRNMaxLevels]; + + bool has_alpha = false; + for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) + { + for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) + { + const uint width = math::maximum(1U, m_pParams->m_width >> level_index); + const uint height = math::maximum(1U, m_pParams->m_height >> level_index); + + if (!m_pParams->m_pImages[face_index][level_index]) + { + return false; + } + + images[face_index][level_index].alias((color_quad_u8*)m_pParams->m_pImages[face_index][level_index], width, height); + if (!has_alpha) + { + has_alpha = image_utils::has_alpha(images[face_index][level_index]); + } + } + } + + for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) + { + for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) + { + images[face_index][level_index].set_component_valid(3, has_alpha); + } + } + image_utils::conversion_type conv_type = image_utils::get_image_conversion_type_from_crn_format((crn_format)m_pParams->m_format); + if (conv_type != image_utils::cConversion_Invalid) + { + for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) + { + for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) + { + image_u8 cooked_image(images[face_index][level_index]); + + image_utils::convert_image(cooked_image, conv_type); + + images[face_index][level_index].swap(cooked_image); + } + } + } + + face_vec faces(m_pParams->m_faces); + + for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) + { + for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) + { + mip_level* pMip = crnlib_new(); + + image_u8* pImage = crnlib_new(); + pImage->swap(images[face_index][level_index]); + pMip->assign(pImage); + + faces[face_index].push_back(pMip); + } + } + + dds_tex.assign(faces); #ifdef CRNLIB_BUILD_DEBUG - CRNLIB_ASSERT(dds_tex.check()); + CRNLIB_ASSERT(dds_tex.check()); #endif - return true; -} - -static bool progress_callback_func(uint percentage_complete, void* pUser_data_ptr) { - const crn_comp_params& params = *(const crn_comp_params*)pUser_data_ptr; - return params.m_pProgress_func(0, 1, percentage_complete, 100, params.m_pProgress_func_data) != 0; -} - -static bool progress_callback_func_phase_0(uint percentage_complete, void* pUser_data_ptr) { - const crn_comp_params& params = *(const crn_comp_params*)pUser_data_ptr; - return params.m_pProgress_func(0, 2, percentage_complete, 100, params.m_pProgress_func_data) != 0; -} - -static bool progress_callback_func_phase_1(uint percentage_complete, void* pUser_data_ptr) { - const crn_comp_params& params = *(const crn_comp_params*)pUser_data_ptr; - return params.m_pProgress_func(1, 2, percentage_complete, 100, params.m_pProgress_func_data) != 0; -} - -bool dds_comp::convert_to_dxt(const crn_comp_params& params) { - if ((params.m_quality_level == cCRNMaxQualityLevel) || (params.m_format == cCRNFmtDXT3)) { - m_packed_tex = m_src_tex; - if (!m_packed_tex.convert(m_pixel_fmt, false, m_pack_params)) - return false; - } else { - const bool hierarchical = (params.m_flags & cCRNCompFlagHierarchical) != 0; - - m_q1_params.m_quality_level = params.m_quality_level; - m_q1_params.m_hierarchical = hierarchical; - - m_q5_params.m_quality_level = params.m_quality_level; - m_q5_params.m_hierarchical = hierarchical; - - if (!m_pQDXT_state) { - m_pQDXT_state = crnlib_new(m_task_pool); - - if (params.m_pProgress_func) { - m_q1_params.m_pProgress_func = progress_callback_func_phase_0; - m_q1_params.m_pProgress_data = (void*)¶ms; - m_q5_params.m_pProgress_func = progress_callback_func_phase_0; - m_q5_params.m_pProgress_data = (void*)¶ms; - } - - if (!m_src_tex.qdxt_pack_init(*m_pQDXT_state, m_packed_tex, m_q1_params, m_q5_params, m_pixel_fmt, false)) - return false; - - if (params.m_pProgress_func) { - m_q1_params.m_pProgress_func = progress_callback_func_phase_1; - m_q5_params.m_pProgress_func = progress_callback_func_phase_1; - } - } else { - if (params.m_pProgress_func) { - m_q1_params.m_pProgress_func = progress_callback_func; - m_q1_params.m_pProgress_data = (void*)¶ms; - m_q5_params.m_pProgress_func = progress_callback_func; - m_q5_params.m_pProgress_data = (void*)¶ms; - } + return true; } - if (!m_src_tex.qdxt_pack(*m_pQDXT_state, m_packed_tex, m_q1_params, m_q5_params)) - return false; - } - - return true; -} - -bool dds_comp::compress_init(const crn_comp_params& params) { - clear(); - - m_pParams = ¶ms; - - if ((math::minimum(m_pParams->m_width, m_pParams->m_height) < 1) || (math::maximum(m_pParams->m_width, m_pParams->m_height) > cCRNMaxLevelResolution)) - return false; - - if (math::minimum(m_pParams->m_faces, m_pParams->m_levels) < 1) - return false; - - if (!create_dds_tex(m_src_tex)) - return false; - - m_pack_params.init(*m_pParams); - if (params.m_pProgress_func) { - m_pack_params.m_pProgress_callback = progress_callback_func; - m_pack_params.m_pProgress_callback_user_data_ptr = (void*)¶ms; - } - - m_pixel_fmt = pixel_format_helpers::convert_crn_format_to_pixel_format(static_cast(m_pParams->m_format)); - if (m_pixel_fmt == PIXEL_FMT_INVALID) - return false; - if ((m_pixel_fmt == PIXEL_FMT_DXT1) && (m_src_tex.has_alpha()) && (m_pack_params.m_use_both_block_types) && (m_pParams->m_flags & cCRNCompFlagDXT1AForTransparency)) - m_pixel_fmt = PIXEL_FMT_DXT1A; - - if (!m_task_pool.init(m_pParams->m_num_helper_threads)) - return false; - m_pack_params.m_pTask_pool = &m_task_pool; - - const bool hierarchical = (params.m_flags & cCRNCompFlagHierarchical) != 0; - m_q1_params.init(m_pack_params, params.m_quality_level, hierarchical); - m_q5_params.init(m_pack_params, params.m_quality_level, hierarchical); - - return true; -} - -bool dds_comp::compress_pass(const crn_comp_params& params, float* pEffective_bitrate) { - if (pEffective_bitrate) - *pEffective_bitrate = 0.0f; - - if (!m_pParams) - return false; - - if (!convert_to_dxt(params)) - return false; - - dynamic_stream out_stream; - out_stream.reserve(512 * 1024); - data_stream_serializer serializer(out_stream); - - if (!m_packed_tex.write_dds(serializer)) - return false; - out_stream.reserve(0); + static bool progress_callback_func(uint percentage_complete, void* pUser_data_ptr) + { + const crn_comp_params& params = *(const crn_comp_params*)pUser_data_ptr; + return params.m_pProgress_func(0, 1, percentage_complete, 100, params.m_pProgress_func_data) != 0; + } - m_comp_data.swap(out_stream.get_buf()); + static bool progress_callback_func_phase_0(uint percentage_complete, void* pUser_data_ptr) + { + const crn_comp_params& params = *(const crn_comp_params*)pUser_data_ptr; + return params.m_pProgress_func(0, 2, percentage_complete, 100, params.m_pProgress_func_data) != 0; + } - if (pEffective_bitrate) { - lzma_codec lossless_codec; + static bool progress_callback_func_phase_1(uint percentage_complete, void* pUser_data_ptr) + { + const crn_comp_params& params = *(const crn_comp_params*)pUser_data_ptr; + return params.m_pProgress_func(1, 2, percentage_complete, 100, params.m_pProgress_func_data) != 0; + } - crnlib::vector cmp_tex_bytes; - if (lossless_codec.pack(m_comp_data.get_ptr(), m_comp_data.size(), cmp_tex_bytes)) { - uint comp_size = cmp_tex_bytes.size(); - if (comp_size) { - *pEffective_bitrate = (comp_size * 8.0f) / m_src_tex.get_total_pixels_in_all_faces_and_mips(); - } + bool dds_comp::convert_to_dxt(const crn_comp_params& params) + { + if ((params.m_quality_level == cCRNMaxQualityLevel) || (params.m_format == cCRNFmtDXT3)) + { + m_packed_tex = m_src_tex; + if (!m_packed_tex.convert(m_pixel_fmt, false, m_pack_params)) + { + return false; + } + } + else + { + const bool hierarchical = (params.m_flags & cCRNCompFlagHierarchical) != 0; + + m_q1_params.m_quality_level = params.m_quality_level; + m_q1_params.m_hierarchical = hierarchical; + + m_q5_params.m_quality_level = params.m_quality_level; + m_q5_params.m_hierarchical = hierarchical; + + if (!m_pQDXT_state) + { + m_pQDXT_state = crnlib_new(m_task_pool); + + if (params.m_pProgress_func) + { + m_q1_params.m_pProgress_func = progress_callback_func_phase_0; + m_q1_params.m_pProgress_data = (void*)¶ms; + m_q5_params.m_pProgress_func = progress_callback_func_phase_0; + m_q5_params.m_pProgress_data = (void*)¶ms; + } + + if (!m_src_tex.qdxt_pack_init(*m_pQDXT_state, m_packed_tex, m_q1_params, m_q5_params, m_pixel_fmt, false)) + { + return false; + } + + if (params.m_pProgress_func) + { + m_q1_params.m_pProgress_func = progress_callback_func_phase_1; + m_q5_params.m_pProgress_func = progress_callback_func_phase_1; + } + } + else + { + if (params.m_pProgress_func) + { + m_q1_params.m_pProgress_func = progress_callback_func; + m_q1_params.m_pProgress_data = (void*)¶ms; + m_q5_params.m_pProgress_func = progress_callback_func; + m_q5_params.m_pProgress_data = (void*)¶ms; + } + } + + if (!m_src_tex.qdxt_pack(*m_pQDXT_state, m_packed_tex, m_q1_params, m_q5_params)) + { + return false; + } + } + + return true; } - } - return true; -} + bool dds_comp::compress_init(const crn_comp_params& params) + { + clear(); + + m_pParams = ¶ms; + + if ((math::minimum(m_pParams->m_width, m_pParams->m_height) < 1) || (math::maximum(m_pParams->m_width, m_pParams->m_height) > cCRNMaxLevelResolution)) + { + return false; + } + + if (math::minimum(m_pParams->m_faces, m_pParams->m_levels) < 1) + { + return false; + } + + if (!create_dds_tex(m_src_tex)) + { + return false; + } + + m_pack_params.init(*m_pParams); + if (params.m_pProgress_func) + { + m_pack_params.m_pProgress_callback = progress_callback_func; + m_pack_params.m_pProgress_callback_user_data_ptr = (void*)¶ms; + } + + m_pixel_fmt = pixel_format_helpers::convert_crn_format_to_pixel_format(static_cast(m_pParams->m_format)); + if (m_pixel_fmt == PIXEL_FMT_INVALID) + { + return false; + } + if ((m_pixel_fmt == PIXEL_FMT_DXT1) && (m_src_tex.has_alpha()) && (m_pack_params.m_use_both_block_types) && (m_pParams->m_flags & cCRNCompFlagDXT1AForTransparency)) + { + m_pixel_fmt = PIXEL_FMT_DXT1A; + } + + if (!m_task_pool.init(m_pParams->m_num_helper_threads)) + { + return false; + } + m_pack_params.m_pTask_pool = &m_task_pool; + + const bool hierarchical = (params.m_flags & cCRNCompFlagHierarchical) != 0; + m_q1_params.init(m_pack_params, params.m_quality_level, hierarchical); + m_q5_params.init(m_pack_params, params.m_quality_level, hierarchical); + + return true; + } -void dds_comp::compress_deinit() { - clear(); -} + bool dds_comp::compress_pass(const crn_comp_params& params, float* pEffective_bitrate) { + if (pEffective_bitrate) + { + *pEffective_bitrate = 0.0f; + } + + if (!m_pParams) + { + return false; + } + + if (!convert_to_dxt(params)) + { + return false; + } + + dynamic_stream out_stream; + out_stream.reserve(512 * 1024); + data_stream_serializer serializer(out_stream); + + if (!m_packed_tex.write_dds(serializer)) + { + return false; + } + out_stream.reserve(0); + + m_comp_data.swap(out_stream.get_buf()); + + if (pEffective_bitrate) + { + lzma_codec lossless_codec; + + crnlib::vector cmp_tex_bytes; + if (lossless_codec.pack(m_comp_data.get_ptr(), m_comp_data.size(), cmp_tex_bytes)) + { + uint comp_size = cmp_tex_bytes.size(); + if (comp_size) + { + *pEffective_bitrate = (comp_size * 8.0f) / m_src_tex.get_total_pixels_in_all_faces_and_mips(); + } + } + } + + return true; + } + void dds_comp::compress_deinit() + { + clear(); + } } // namespace crnlib diff --git a/crnlib/crn_dds_comp.h b/crnlib/crn_dds_comp.h index 3426b19..150d3fb 100644 --- a/crnlib/crn_dds_comp.h +++ b/crnlib/crn_dds_comp.h @@ -1,47 +1,59 @@ // File: crn_comp.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once + #include "crn_comp.h" #include "crn_mipmapped_texture.h" #include "crn_texture_comp.h" #include "crn_export.h" -namespace crnlib { -class CRN_EXPORT dds_comp : public itexture_comp { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(dds_comp); - - public: - dds_comp(); - virtual ~dds_comp(); - - virtual const char* get_ext() const { return "DDS"; } - - virtual bool compress_init(const crn_comp_params& params); - virtual bool compress_pass(const crn_comp_params& params, float* pEffective_bitrate); - virtual void compress_deinit(); - - virtual const crnlib::vector& get_comp_data() const { return m_comp_data; } - virtual crnlib::vector& get_comp_data() { return m_comp_data; } - - private: - mipmapped_texture m_src_tex; - mipmapped_texture m_packed_tex; - - crnlib::vector m_comp_data; - - const crn_comp_params* m_pParams; - - pixel_format m_pixel_fmt; - dxt_image::pack_params m_pack_params; - - task_pool m_task_pool; - qdxt1_params m_q1_params; - qdxt5_params m_q5_params; - mipmapped_texture::qdxt_state* m_pQDXT_state; - - void clear(); - bool create_dds_tex(mipmapped_texture& dds_tex); - bool convert_to_dxt(const crn_comp_params& params); -}; +namespace crnlib +{ + class CRN_EXPORT dds_comp : public itexture_comp + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(dds_comp); + public: + dds_comp(); + virtual ~dds_comp(); + + virtual const char* get_ext() const + { + return "DDS"; + } + + virtual bool compress_init(const crn_comp_params& params); + virtual bool compress_pass(const crn_comp_params& params, float* pEffective_bitrate); + virtual void compress_deinit(); + + virtual const crnlib::vector& get_comp_data() const + { + return m_comp_data; + } + virtual crnlib::vector& get_comp_data() + { + return m_comp_data; + } + + private: + mipmapped_texture m_src_tex; + mipmapped_texture m_packed_tex; + + crnlib::vector m_comp_data; + + const crn_comp_params* m_pParams; + + pixel_format m_pixel_fmt; + dxt_image::pack_params m_pack_params; + + task_pool m_task_pool; + qdxt1_params m_q1_params; + qdxt5_params m_q5_params; + mipmapped_texture::qdxt_state* m_pQDXT_state; + + void clear(); + bool create_dds_tex(mipmapped_texture& dds_tex); + bool convert_to_dxt(const crn_comp_params& params); + }; } // namespace crnlib diff --git a/crnlib/crn_decomp.cpp b/crnlib/crn_decomp.cpp index bd2c31c..1d9faf7 100644 --- a/crnlib/crn_decomp.cpp +++ b/crnlib/crn_decomp.cpp @@ -3,4 +3,4 @@ #include "crn_core.h" // Include the single-file header library with no defines, which brings in the full CRN decompressor. -#include "../inc/crn_decomp.h" +#include "crn_decomp.h" diff --git a/crnlib/crn_dxt5a.cpp b/crnlib/crn_dxt5a.cpp index f164640..b2dd881 100644 --- a/crnlib/crn_dxt5a.cpp +++ b/crnlib/crn_dxt5a.cpp @@ -6,184 +6,237 @@ #include "crn_dxt_fast.h" #include "crn_intersect.h" -namespace crnlib { -dxt5_endpoint_optimizer::dxt5_endpoint_optimizer() - : m_pParams(NULL), - m_pResults(NULL) { - m_unique_values.reserve(16); - m_unique_value_weights.reserve(16); -} - -bool dxt5_endpoint_optimizer::compute(const params& p, results& r) { - m_pParams = &p; - m_pResults = &r; - - if ((!p.m_num_pixels) || (!p.m_pPixels)) - return false; - - m_unique_values.resize(0); - m_unique_value_weights.resize(0); - - for (uint i = 0; i < 256; i++) - m_unique_value_map[i] = -1; - - for (uint i = 0; i < p.m_num_pixels; i++) { - uint alpha = p.m_pPixels[i][p.m_comp_index]; - - int index = m_unique_value_map[alpha]; +namespace crnlib +{ + dxt5_endpoint_optimizer::dxt5_endpoint_optimizer(): + m_pParams(NULL), + m_pResults(NULL) { + m_unique_values.reserve(16); + m_unique_value_weights.reserve(16); + } - if (index == -1) { - index = m_unique_values.size(); + bool dxt5_endpoint_optimizer::compute(const params& p, results& r) + { + m_pParams = &p; + m_pResults = &r; - m_unique_value_map[alpha] = index; + if ((!p.m_num_pixels) || (!p.m_pPixels)) + { + return false; + } - m_unique_values.push_back(static_cast(alpha)); - m_unique_value_weights.push_back(0); - } + m_unique_values.resize(0); + m_unique_value_weights.resize(0); - m_unique_value_weights[index]++; - } + for (uint i = 0; i < 256; i++) + { + m_unique_value_map[i] = -1; + } - if (m_unique_values.size() == 1) { - r.m_block_type = 0; - r.m_reordered = false; - r.m_error = 0; - r.m_first_endpoint = m_unique_values[0]; - r.m_second_endpoint = m_unique_values[0]; - memset(r.m_pSelectors, 0, p.m_num_pixels); - return true; - } + for (uint i = 0; i < p.m_num_pixels; i++) + { + uint alpha = p.m_pPixels[i][p.m_comp_index]; - m_trial_selectors.resize(m_unique_values.size()); - m_best_selectors.resize(m_unique_values.size()); + int index = m_unique_value_map[alpha]; - r.m_error = cUINT64_MAX; + if (index == -1) + { + index = m_unique_values.size(); - for (uint i = 0; i < m_unique_values.size() - 1; i++) { - const uint low_endpoint = m_unique_values[i]; + m_unique_value_map[alpha] = index; - for (uint j = i + 1; j < m_unique_values.size(); j++) { - const uint high_endpoint = m_unique_values[j]; + m_unique_values.push_back(static_cast(alpha)); + m_unique_value_weights.push_back(0); + } - evaluate_solution(low_endpoint, high_endpoint); - } - } - - if ((m_pParams->m_quality >= cCRNDXTQualityBetter) && (m_pResults->m_error)) { - m_flags.resize(256 * 256); - m_flags.clear_all_bits(); - - const int cProbeAmount = (m_pParams->m_quality == cCRNDXTQualityUber) ? 16 : 8; - - for (int l_delta = -cProbeAmount; l_delta <= cProbeAmount; l_delta++) { - const int l = m_pResults->m_first_endpoint + l_delta; - if (l < 0) - continue; - else if (l > 255) - break; - - const uint bit_index = l * 256; - - for (int h_delta = -cProbeAmount; h_delta <= cProbeAmount; h_delta++) { - const int h = m_pResults->m_second_endpoint + h_delta; - if (h < 0) - continue; - else if (h > 255) - break; - - //if (m_flags.get_bit(bit_index + h)) - // continue; - if ((m_flags.get_bit(bit_index + h)) || (m_flags.get_bit(h * 256 + l))) - continue; - m_flags.set_bit(bit_index + h); - - evaluate_solution(static_cast(l), static_cast(h)); - } - } - } - - m_pResults->m_reordered = false; - if (m_pResults->m_first_endpoint == m_pResults->m_second_endpoint) { - for (uint i = 0; i < m_best_selectors.size(); i++) - m_best_selectors[i] = 0; - } else if (m_pResults->m_block_type) { - //if (l > h) - // eight alpha - // else - // six alpha - - if (m_pResults->m_first_endpoint > m_pResults->m_second_endpoint) { - utils::swap(m_pResults->m_first_endpoint, m_pResults->m_second_endpoint); - m_pResults->m_reordered = true; - for (uint i = 0; i < m_best_selectors.size(); i++) - m_best_selectors[i] = g_six_alpha_invert_table[m_best_selectors[i]]; - } - } else if (!(m_pResults->m_first_endpoint > m_pResults->m_second_endpoint)) { - utils::swap(m_pResults->m_first_endpoint, m_pResults->m_second_endpoint); - m_pResults->m_reordered = true; - for (uint i = 0; i < m_best_selectors.size(); i++) - m_best_selectors[i] = g_eight_alpha_invert_table[m_best_selectors[i]]; - } + m_unique_value_weights[index]++; + } - for (uint i = 0; i < m_pParams->m_num_pixels; i++) { - uint alpha = m_pParams->m_pPixels[i][m_pParams->m_comp_index]; + if (m_unique_values.size() == 1) + { + r.m_block_type = 0; + r.m_reordered = false; + r.m_error = 0; + r.m_first_endpoint = m_unique_values[0]; + r.m_second_endpoint = m_unique_values[0]; + memset(r.m_pSelectors, 0, p.m_num_pixels); + return true; + } - int index = m_unique_value_map[alpha]; + m_trial_selectors.resize(m_unique_values.size()); + m_best_selectors.resize(m_unique_values.size()); - m_pResults->m_pSelectors[i] = m_best_selectors[index]; - } + r.m_error = cUINT64_MAX; - return true; -} + for (uint i = 0; i < m_unique_values.size() - 1; i++) + { + const uint low_endpoint = m_unique_values[i]; -void dxt5_endpoint_optimizer::evaluate_solution(uint low_endpoint, uint high_endpoint) { - for (uint block_type = 0; block_type < (m_pParams->m_use_both_block_types ? 2U : 1U); block_type++) { - uint selector_values[8]; + for (uint j = i + 1; j < m_unique_values.size(); j++) + { + const uint high_endpoint = m_unique_values[j]; - if (!block_type) - dxt5_block::get_block_values8(selector_values, low_endpoint, high_endpoint); - else - dxt5_block::get_block_values6(selector_values, low_endpoint, high_endpoint); + evaluate_solution(low_endpoint, high_endpoint); + } + } - uint64 trial_error = 0; + if ((m_pParams->m_quality >= cCRNDXTQualityBetter) && (m_pResults->m_error)) + { + m_flags.resize(256 * 256); + m_flags.clear_all_bits(); + + const int cProbeAmount = (m_pParams->m_quality == cCRNDXTQualityUber) ? 16 : 8; + + for (int l_delta = -cProbeAmount; l_delta <= cProbeAmount; l_delta++) + { + const int l = m_pResults->m_first_endpoint + l_delta; + if (l < 0) + { + continue; + } + else if (l > 255) + { + break; + } + const uint bit_index = l * 256; + + for (int h_delta = -cProbeAmount; h_delta <= cProbeAmount; h_delta++) + { + const int h = m_pResults->m_second_endpoint + h_delta; + if (h < 0) + { + continue; + } + else if (h > 255) + { + break; + } + + //if (m_flags.get_bit(bit_index + h)) + // continue; + if ((m_flags.get_bit(bit_index + h)) || (m_flags.get_bit(h * 256 + l))) + { + continue; + } + + m_flags.set_bit(bit_index + h); + + evaluate_solution(static_cast(l), static_cast(h)); + } + } + } - for (uint i = 0; i < m_unique_values.size(); i++) { - const uint val = m_unique_values[i]; - const uint weight = m_unique_value_weights[i]; + m_pResults->m_reordered = false; + if (m_pResults->m_first_endpoint == m_pResults->m_second_endpoint) + { + for (uint i = 0; i < m_best_selectors.size(); i++) + { + m_best_selectors[i] = 0; + } + } + else if (m_pResults->m_block_type) + { + //if (l > h) + // eight alpha + // else + // six alpha + + if (m_pResults->m_first_endpoint > m_pResults->m_second_endpoint) + { + utils::swap(m_pResults->m_first_endpoint, m_pResults->m_second_endpoint); + m_pResults->m_reordered = true; + for (uint i = 0; i < m_best_selectors.size(); i++) + { + m_best_selectors[i] = g_six_alpha_invert_table[m_best_selectors[i]]; + } + } + } + else if (!(m_pResults->m_first_endpoint > m_pResults->m_second_endpoint)) + { + utils::swap(m_pResults->m_first_endpoint, m_pResults->m_second_endpoint); + m_pResults->m_reordered = true; + for (uint i = 0; i < m_best_selectors.size(); i++) + { + m_best_selectors[i] = g_eight_alpha_invert_table[m_best_selectors[i]]; + } + } - uint best_selector_error = UINT_MAX; - uint best_selector = 0; + for (uint i = 0; i < m_pParams->m_num_pixels; i++) + { + uint alpha = m_pParams->m_pPixels[i][m_pParams->m_comp_index]; - for (uint j = 0; j < 8; j++) { - int selector_error = val - selector_values[j]; - selector_error = selector_error * selector_error * (int)weight; + int index = m_unique_value_map[alpha]; - if (static_cast(selector_error) < best_selector_error) { - best_selector_error = selector_error; - best_selector = j; - if (!best_selector_error) - break; + m_pResults->m_pSelectors[i] = m_best_selectors[index]; } - } - - m_trial_selectors[i] = static_cast(best_selector); - trial_error += best_selector_error; - if (trial_error > m_pResults->m_error) - break; + return true; } - if (trial_error < m_pResults->m_error) { - m_pResults->m_error = trial_error; - m_pResults->m_first_endpoint = static_cast(low_endpoint); - m_pResults->m_second_endpoint = static_cast(high_endpoint); - m_pResults->m_block_type = static_cast(block_type); - m_best_selectors.swap(m_trial_selectors); - - if (!trial_error) - break; + void dxt5_endpoint_optimizer::evaluate_solution(uint low_endpoint, uint high_endpoint) + { + for (uint block_type = 0; block_type < (m_pParams->m_use_both_block_types ? 2U : 1U); block_type++) + { + uint selector_values[8]; + + if (!block_type) + { + dxt5_block::get_block_values8(selector_values, low_endpoint, high_endpoint); + } + else + { + dxt5_block::get_block_values6(selector_values, low_endpoint, high_endpoint); + } + + uint64 trial_error = 0; + + for (uint i = 0; i < m_unique_values.size(); i++) + { + const uint val = m_unique_values[i]; + const uint weight = m_unique_value_weights[i]; + + uint best_selector_error = UINT_MAX; + uint best_selector = 0; + + for (uint j = 0; j < 8; j++) + { + int selector_error = val - selector_values[j]; + selector_error = selector_error * selector_error * (int)weight; + + if (static_cast(selector_error) < best_selector_error) + { + best_selector_error = selector_error; + best_selector = j; + if (!best_selector_error) + { + break; + } + } + } + + m_trial_selectors[i] = static_cast(best_selector); + trial_error += best_selector_error; + + if (trial_error > m_pResults->m_error) + { + break; + } + } + + if (trial_error < m_pResults->m_error) + { + m_pResults->m_error = trial_error; + m_pResults->m_first_endpoint = static_cast(low_endpoint); + m_pResults->m_second_endpoint = static_cast(high_endpoint); + m_pResults->m_block_type = static_cast(block_type); + m_best_selectors.swap(m_trial_selectors); + + if (!trial_error) + { + break; + } + } + } } - } -} } // namespace crnlib diff --git a/crnlib/crn_dxt5a.h b/crnlib/crn_dxt5a.h index b6ed233..dd67388 100644 --- a/crnlib/crn_dxt5a.h +++ b/crnlib/crn_dxt5a.h @@ -5,60 +5,65 @@ #include "crn_dxt.h" #include "crn_export.h" -namespace crnlib { -class CRN_EXPORT dxt5_endpoint_optimizer { - public: - dxt5_endpoint_optimizer(); +namespace crnlib +{ + class CRN_EXPORT dxt5_endpoint_optimizer + { + public: + dxt5_endpoint_optimizer(); - struct params { - params() - : m_block_index(0), - m_pPixels(NULL), - m_num_pixels(0), - m_comp_index(3), - m_quality(cCRNDXTQualityUber), - m_use_both_block_types(true) { - } + struct params + { + params(): + m_block_index(0), + m_pPixels(NULL), + m_num_pixels(0), + m_comp_index(3), + m_quality(cCRNDXTQualityUber), + m_use_both_block_types(true) + { + } - uint m_block_index; + uint m_block_index; - const color_quad_u8* m_pPixels; - uint m_num_pixels; - uint m_comp_index; + const color_quad_u8* m_pPixels; + uint m_num_pixels; + uint m_comp_index; - crn_dxt_quality m_quality; + crn_dxt_quality m_quality; - bool m_use_both_block_types; - }; + bool m_use_both_block_types; + }; - struct results { - uint8* m_pSelectors; + struct results + { + uint8* m_pSelectors; - uint64 m_error; + uint64 m_error; - uint8 m_first_endpoint; - uint8 m_second_endpoint; + uint8 m_first_endpoint; + uint8 m_second_endpoint; - uint8 m_block_type; // 1 if 6-alpha, otherwise 8-alpha - bool m_reordered; - }; + uint8 m_block_type; // 1 if 6-alpha, otherwise 8-alpha + bool m_reordered; + }; - bool compute(const params& p, results& r); + bool compute(const params& p, results& r); - private: - const params* m_pParams; - results* m_pResults; + private: + const params* m_pParams; + results* m_pResults; - crnlib::vector m_unique_values; - crnlib::vector m_unique_value_weights; + crnlib::vector m_unique_values; + crnlib::vector m_unique_value_weights; - crnlib::vector m_trial_selectors; - crnlib::vector m_best_selectors; - int m_unique_value_map[256]; + crnlib::vector m_trial_selectors; + crnlib::vector m_best_selectors; + int m_unique_value_map[256]; - sparse_bit_array m_flags; + sparse_bit_array m_flags; - void evaluate_solution(uint low_endpoint, uint high_endpoint); -}; + void evaluate_solution(uint low_endpoint, uint high_endpoint); + }; } // namespace crnlib diff --git a/crnlib/crn_dxt_endpoint_refiner.cpp b/crnlib/crn_dxt_endpoint_refiner.cpp index 2d8d7d9..d830a24 100644 --- a/crnlib/crn_dxt_endpoint_refiner.cpp +++ b/crnlib/crn_dxt_endpoint_refiner.cpp @@ -1,209 +1,273 @@ // File: crn_dxt_endpoint_refiner.cpp // See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" #include "crn_dxt_endpoint_refiner.h" #include "crn_dxt1.h" -namespace crnlib { -dxt_endpoint_refiner::dxt_endpoint_refiner() - : m_pParams(NULL), - m_pResults(NULL) { -} - -bool dxt_endpoint_refiner::refine(const params& p, results& r) { - if (!p.m_num_pixels) - return false; - - m_pParams = &p; - m_pResults = &r; - - r.m_error = cUINT64_MAX; - r.m_low_color = 0; - r.m_high_color = 0; - - double alpha2_sum = 0.0f; - double beta2_sum = 0.0f; - double alphabeta_sum = 0.0f; - - vec<3, double> alphax_sum(0.0f); - vec<3, double> betax_sum(0.0f); - - vec<3, double> first_color(0.0f); - - // This linear solver is from Squish. - for (uint i = 0; i < p.m_num_pixels; ++i) { - uint8 c = p.m_pSelectors[i]; - - double k; - if (p.m_dxt1_selectors) - k = g_dxt1_to_linear[c] * 1.0f / 3.0f; - else - k = g_dxt5_to_linear[c] * 1.0f / 7.0f; - - double alpha = 1.0f - k; - double beta = k; - - vec<3, double> x; - - if (p.m_dxt1_selectors) - x.set(p.m_pPixels[i][0] * 1.0f / 255.0f, p.m_pPixels[i][1] * 1.0f / 255.0f, p.m_pPixels[i][2] * 1.0f / 255.0f); - else - x.set(p.m_pPixels[i][p.m_alpha_comp_index] / 255.0f); - - if (!i) - first_color = x; - - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * x; - betax_sum += beta * x; - } - - // zero where non-determinate - vec<3, double> a, b; - if (beta2_sum == 0.0f) { - a = alphax_sum / alpha2_sum; - b.clear(); - } else if (alpha2_sum == 0.0f) { - a.clear(); - b = betax_sum / beta2_sum; - } else { - double factor = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum; - if (factor != 0.0f) { - a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) / factor; - b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) / factor; - } else { - a = first_color; - b = first_color; - } - } - - vec3F l(0.0f), h(0.0f); - l = a; - h = b; - - l.clamp(0.0f, 1.0f); - h.clamp(0.0f, 1.0f); - - if (p.m_dxt1_selectors) - optimize_dxt1(l, h); - else - optimize_dxt5(l, h); - - return r.m_error < p.m_error_to_beat; -} - -void dxt_endpoint_refiner::optimize_dxt5(vec3F low_color, vec3F high_color) { - uint8 L0 = math::clamp(low_color[0] * 256.0f, 0, 255); - uint8 H0 = math::clamp(high_color[0] * 256.0f, 0, 255); - - uint64 hist[8] = {}, D2[8] = {}, DD[8] = {}; - for (uint c = m_pParams->m_alpha_comp_index, i = 0; i < m_pParams->m_num_pixels; i++) { - uint8 a = m_pParams->m_pPixels[i][c]; - uint8 s = m_pParams->m_pSelectors[i]; - hist[s]++; - D2[s] += a * 2; - DD[s] += a * a; - } - - uint16 solutions[529]; - uint solutions_count = 0; - solutions[solutions_count++] = L0 == H0 ? H0 ? H0 - 1 << 8 | L0 : 1 : L0 > H0 ? H0 << 8 | L0 : L0 << 8 | H0; - uint8 minL = L0 <= 11 ? 0 : L0 - 11, maxL = L0 >= 244 ? 255 : L0 + 11; - uint8 minH = H0 <= 11 ? 0 : H0 - 11, maxH = H0 >= 244 ? 255 : H0 + 11; - for (uint16 L = minL; L <= maxL; L++) { - for (uint16 H = minH; H <= maxH; H++) { - if ((maxH < L || L <= H || H < minL) && (L != L0 || H != H0) && (L != H0 || H != L0)) - solutions[solutions_count++] = L == H ? H ? H - 1 << 8 | L : 1 : L > H ? H << 8 | L : L << 8 | H; - } - } - - for (uint i = 0; i < solutions_count; i++) { - uint8 L = solutions[i] & 0xFF; - uint8 H = solutions[i] >> 8; - uint values[8]; - dxt5_block::get_block_values8(values, L, H); - uint64 error = 0; - for (uint64 s = 0; s < 8; s++) - error += hist[s] * values[s] * values[s] - D2[s] * values[s] + DD[s]; - if (error < m_pResults->m_error) { - m_pResults->m_low_color = L; - m_pResults->m_high_color = H; - m_pResults->m_error = error; - if (!m_pResults->m_error) - return; +namespace crnlib +{ + dxt_endpoint_refiner::dxt_endpoint_refiner() : + m_pParams(NULL), + m_pResults(NULL) + { } - } -} - -void dxt_endpoint_refiner::optimize_dxt1(vec3F low_color, vec3F high_color) { - uint16 L0 = math::clamp(low_color[0] * 32.0f, 0, 31) << 11 | math::clamp(low_color[1] * 64.0f, 0, 63) << 5 | math::clamp(low_color[2] * 32.0f, 0, 31); - uint16 H0 = math::clamp(high_color[0] * 32.0f, 0, 31) << 11 | math::clamp(high_color[1] * 64.0f, 0, 63) << 5 | math::clamp(high_color[2] * 32.0f, 0, 31); - - uint64 hist[4] = {}, D2[4][3] = {}, DD[4][3] = {}; - for (uint i = 0; i < m_pParams->m_num_pixels; i++) { - const color_quad_u8& pixel = m_pParams->m_pPixels[i]; - uint8 s = m_pParams->m_pSelectors[i]; - hist[s]++; - for (uint c = 0; c < 3; c++) { - D2[s][c] += pixel[c] * 2; - DD[s][c] += pixel[c] * pixel[c]; - } - } - crnlib::vector solutions(54); - bool preserveL = hist[0] + hist[2] > hist[1] + hist[3]; - bool improved = true; - - for (uint iterations = 8; improved && iterations; iterations--) { - improved = false; - uint solutions_count = 0; - for (uint16 b0 = L0 & 31, g0 = L0 >> 5 & 63, r0 = L0 >> 11 & 31, b = b0 ? b0 - 1 : b0; b <= b0 + 1 && b <= 31; b++) { - for (uint16 g = g0 ? g0 - 1 : g0; g <= g0 + 1 && g <= 63; g++) { - for (uint16 r = r0 ? r0 - 1 : r0; r <= r0 + 1 && r <= 31; r++) { - uint16 L = r << 11 | g << 5 | b; - if (L != L0) - solutions[solutions_count++] = L > H0 ? L | H0 << 16 : H0 | L << 16; + + bool dxt_endpoint_refiner::refine(const params& p, results& r) + { + if (!p.m_num_pixels) + { + return false; } - } - } - for (uint16 b0 = H0 & 31, g0 = H0 >> 5 & 63, r0 = H0 >> 11 & 31, b = b0 ? b0 - 1 : b0; b <= b0 + 1 && b <= 31; b++) { - for (uint16 g = g0 ? g0 - 1 : g0; g <= g0 + 1 && g <= 63; g++) { - for (uint16 r = r0 ? r0 - 1 : r0; r <= r0 + 1 && r <= 31; r++) { - uint16 H = r << 11 | g << 5 | b; - if (H != H0) - solutions[solutions_count++] = H > L0 ? H | L0 << 16 : L0 | H << 16; + + m_pParams = &p; + m_pResults = &r; + + r.m_error = cUINT64_MAX; + r.m_low_color = 0; + r.m_high_color = 0; + + double alpha2_sum = 0.0f; + double beta2_sum = 0.0f; + double alphabeta_sum = 0.0f; + + vec<3, double> alphax_sum(0.0f); + vec<3, double> betax_sum(0.0f); + + vec<3, double> first_color(0.0f); + + // This linear solver is from Squish. + for (uint i = 0; i < p.m_num_pixels; ++i) + { + uint8 c = p.m_pSelectors[i]; + + double k; + if (p.m_dxt1_selectors) + { + k = g_dxt1_to_linear[c] * 1.0f / 3.0f; + } + else + { + k = g_dxt5_to_linear[c] * 1.0f / 7.0f; + } + + double alpha = 1.0f - k; + double beta = k; + + vec<3, double> x; + + if (p.m_dxt1_selectors) + { + x.set(p.m_pPixels[i][0] * 1.0f / 255.0f, p.m_pPixels[i][1] * 1.0f / 255.0f, p.m_pPixels[i][2] * 1.0f / 255.0f); + } + else + { + x.set(p.m_pPixels[i][p.m_alpha_comp_index] / 255.0f); + } + + if (!i) + { + first_color = x; + } + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * x; + betax_sum += beta * x; + } + + // zero where non-determinate + vec<3, double> a, b; + if (beta2_sum == 0.0f) + { + a = alphax_sum / alpha2_sum; + b.clear(); } - } + else if (alpha2_sum == 0.0f) + { + a.clear(); + b = betax_sum / beta2_sum; + } + else + { + double factor = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum; + if (factor != 0.0f) + { + a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) / factor; + b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) / factor; + } + else + { + a = first_color; + b = first_color; + } + } + + vec3F l(0.0f), h(0.0f); + l = a; + h = b; + + l.clamp(0.0f, 1.0f); + h.clamp(0.0f, 1.0f); + + if (p.m_dxt1_selectors) + { + optimize_dxt1(l, h); + } + else + { + optimize_dxt5(l, h); + } + + return r.m_error < p.m_error_to_beat; } - std::sort(solutions.begin(), solutions.begin() + solutions_count); - for (uint i = 0; i < solutions_count; i++) { - if (i && solutions[i] == solutions[i - 1]) - continue; - uint16 L = solutions[i] & 0xFFFF; - uint16 H = solutions[i] >> 16; - if (L == H) { - L += !preserveL ? ~L & 0x1F ? 0x1 : ~L & 0xF800 ? 0x800 : ~L & 0x7E0 ? 0x20 : 0 : !L ? 0x1 : 0; - H -= preserveL ? H & 0x1F ? 0x1 : H & 0xF800 ? 0x800 : H & 0x7E0 ? 0x20 : 0 : H == 0xFFFF ? 0x1 : 0; - } - color_quad_u8 block_colors[4]; - dxt1_block::get_block_colors4(block_colors, L, H); - uint64 error = 0; - for (uint64 s = 0, d[3]; s < 4; s++) { - for (uint c = 0; c < 3; c++) - d[c] = hist[s] * block_colors[s][c] * block_colors[s][c] - D2[s][c] * block_colors[s][c] + DD[s][c]; - error += m_pParams->m_perceptual ? d[0] * 8 + d[1] * 25 + d[2] : d[0] + d[1] + d[2]; - } - if (error < m_pResults->m_error) { - m_pResults->m_low_color = L0 = L; - m_pResults->m_high_color = H0 = H; - m_pResults->m_error = error; - if (!m_pResults->m_error) - return; - improved = true; - } + + void dxt_endpoint_refiner::optimize_dxt5(vec3F low_color, vec3F high_color) + { + uint8 L0 = math::clamp(low_color[0] * 256.0f, 0, 255); + uint8 H0 = math::clamp(high_color[0] * 256.0f, 0, 255); + + uint64 hist[8] = {}, D2[8] = {}, DD[8] = {}; + for (uint c = m_pParams->m_alpha_comp_index, i = 0; i < m_pParams->m_num_pixels; i++) + { + uint8 a = m_pParams->m_pPixels[i][c]; + uint8 s = m_pParams->m_pSelectors[i]; + hist[s]++; + D2[s] += a * 2; + DD[s] += a * a; + } + + uint16 solutions[529]; + uint solutions_count = 0; + solutions[solutions_count++] = L0 == H0 ? H0 ? H0 - 1 << 8 | L0 : 1 : L0 > H0 ? H0 << 8 | L0 : L0 << 8 | H0; + uint8 minL = L0 <= 11 ? 0 : L0 - 11, maxL = L0 >= 244 ? 255 : L0 + 11; + uint8 minH = H0 <= 11 ? 0 : H0 - 11, maxH = H0 >= 244 ? 255 : H0 + 11; + for (uint16 L = minL; L <= maxL; L++) + { + for (uint16 H = minH; H <= maxH; H++) + { + if ((maxH < L || L <= H || H < minL) && (L != L0 || H != H0) && (L != H0 || H != L0)) + { + solutions[solutions_count++] = L == H ? H ? H - 1 << 8 | L : 1 : L > H ? H << 8 | L : L << 8 | H; + } + } + } + + for (uint i = 0; i < solutions_count; i++) + { + uint8 L = solutions[i] & 0xFF; + uint8 H = solutions[i] >> 8; + uint values[8]; + dxt5_block::get_block_values8(values, L, H); + uint64 error = 0; + for (uint64 s = 0; s < 8; s++) + { + error += hist[s] * values[s] * values[s] - D2[s] * values[s] + DD[s]; + } + if (error < m_pResults->m_error) + { + m_pResults->m_low_color = L; + m_pResults->m_high_color = H; + m_pResults->m_error = error; + if (!m_pResults->m_error) + { + return; + } + } + } } - } -} + void dxt_endpoint_refiner::optimize_dxt1(vec3F low_color, vec3F high_color) + { + uint16 L0 = math::clamp(low_color[0] * 32.0f, 0, 31) << 11 | math::clamp(low_color[1] * 64.0f, 0, 63) << 5 | math::clamp(low_color[2] * 32.0f, 0, 31); + uint16 H0 = math::clamp(high_color[0] * 32.0f, 0, 31) << 11 | math::clamp(high_color[1] * 64.0f, 0, 63) << 5 | math::clamp(high_color[2] * 32.0f, 0, 31); + + uint64 hist[4] = {}, D2[4][3] = {}, DD[4][3] = {}; + for (uint i = 0; i < m_pParams->m_num_pixels; i++) + { + const color_quad_u8& pixel = m_pParams->m_pPixels[i]; + uint8 s = m_pParams->m_pSelectors[i]; + hist[s]++; + for (uint c = 0; c < 3; c++) + { + D2[s][c] += pixel[c] * 2; + DD[s][c] += pixel[c] * pixel[c]; + } + } + crnlib::vector solutions(54); + bool preserveL = hist[0] + hist[2] > hist[1] + hist[3]; + bool improved = true; + + for (uint iterations = 8; improved && iterations; iterations--) + { + improved = false; + uint solutions_count = 0; + for (uint16 b0 = L0 & 31, g0 = L0 >> 5 & 63, r0 = L0 >> 11 & 31, b = b0 ? b0 - 1 : b0; b <= b0 + 1 && b <= 31; b++) + { + for (uint16 g = g0 ? g0 - 1 : g0; g <= g0 + 1 && g <= 63; g++) + { + for (uint16 r = r0 ? r0 - 1 : r0; r <= r0 + 1 && r <= 31; r++) + { + uint16 L = r << 11 | g << 5 | b; + if (L != L0) + { + solutions[solutions_count++] = L > H0 ? L | H0 << 16 : H0 | L << 16; + } + } + } + } + for (uint16 b0 = H0 & 31, g0 = H0 >> 5 & 63, r0 = H0 >> 11 & 31, b = b0 ? b0 - 1 : b0; b <= b0 + 1 && b <= 31; b++) + { + for (uint16 g = g0 ? g0 - 1 : g0; g <= g0 + 1 && g <= 63; g++) + { + for (uint16 r = r0 ? r0 - 1 : r0; r <= r0 + 1 && r <= 31; r++) + { + uint16 H = r << 11 | g << 5 | b; + if (H != H0) + { + solutions[solutions_count++] = H > L0 ? H | L0 << 16 : L0 | H << 16; + } + } + } + } + std::sort(solutions.begin(), solutions.begin() + solutions_count); + for (uint i = 0; i < solutions_count; i++) + { + if (i && solutions[i] == solutions[i - 1]) + { + continue; + } + uint16 L = solutions[i] & 0xFFFF; + uint16 H = solutions[i] >> 16; + if (L == H) + { + L += !preserveL ? ~L & 0x1F ? 0x1 : ~L & 0xF800 ? 0x800 : ~L & 0x7E0 ? 0x20 : 0 : !L ? 0x1 : 0; + H -= preserveL ? H & 0x1F ? 0x1 : H & 0xF800 ? 0x800 : H & 0x7E0 ? 0x20 : 0 : H == 0xFFFF ? 0x1 : 0; + } + color_quad_u8 block_colors[4]; + dxt1_block::get_block_colors4(block_colors, L, H); + uint64 error = 0; + for (uint64 s = 0, d[3]; s < 4; s++) + { + for (uint c = 0; c < 3; c++) + { + d[c] = hist[s] * block_colors[s][c] * block_colors[s][c] - D2[s][c] * block_colors[s][c] + DD[s][c]; + } + error += m_pParams->m_perceptual ? d[0] * 8 + d[1] * 25 + d[2] : d[0] + d[1] + d[2]; + } + if (error < m_pResults->m_error) + { + m_pResults->m_low_color = L0 = L; + m_pResults->m_high_color = H0 = H; + m_pResults->m_error = error; + if (!m_pResults->m_error) + { + return; + } + improved = true; + } + } + } + } } // namespace crnlib diff --git a/crnlib/crn_dxt_endpoint_refiner.h b/crnlib/crn_dxt_endpoint_refiner.h index 11a3f88..abb9da3 100644 --- a/crnlib/crn_dxt_endpoint_refiner.h +++ b/crnlib/crn_dxt_endpoint_refiner.h @@ -1,57 +1,62 @@ // File: crn_dxt_endpoint_refiner.h // See Copyright Notice and license at the end of inc/crnlib.h -#pragma once -#include "crn_dxt.h" - -namespace crnlib { -// TODO: Experimental/Not fully implemented -class dxt_endpoint_refiner { - public: - dxt_endpoint_refiner(); - - struct params { - params() - : m_block_index(0), - m_pPixels(NULL), - m_num_pixels(0), - m_pSelectors(NULL), - m_alpha_comp_index(0), - m_error_to_beat(cUINT64_MAX), - m_dxt1_selectors(true), - m_perceptual(true), - m_highest_quality(true) { - } - - uint m_block_index; - - const color_quad_u8* m_pPixels; - uint m_num_pixels; - - const uint8* m_pSelectors; - uint m_alpha_comp_index; - - uint64 m_error_to_beat; - - bool m_dxt1_selectors; - bool m_perceptual; - bool m_highest_quality; - }; - - struct results { - uint16 m_low_color; - uint16 m_high_color; - uint64 m_error; - }; - - bool refine(const params& p, results& r); - - private: - const params* m_pParams; - results* m_pResults; +#pragma once - void optimize_dxt1(vec3F low_color, vec3F high_color); - void optimize_dxt5(vec3F low_color, vec3F high_color); -}; +#include "crn_dxt.h" +namespace crnlib +{ + // TODO: Experimental/Not fully implemented + class dxt_endpoint_refiner + { + public: + dxt_endpoint_refiner(); + + struct params { + params(): + m_block_index(0), + m_pPixels(NULL), + m_num_pixels(0), + m_pSelectors(NULL), + m_alpha_comp_index(0), + m_error_to_beat(cUINT64_MAX), + m_dxt1_selectors(true), + m_perceptual(true), + m_highest_quality(true) + { + } + + uint m_block_index; + + const color_quad_u8* m_pPixels; + uint m_num_pixels; + + const uint8* m_pSelectors; + + uint m_alpha_comp_index; + + uint64 m_error_to_beat; + + bool m_dxt1_selectors; + bool m_perceptual; + bool m_highest_quality; + }; + + struct results + { + uint16 m_low_color; + uint16 m_high_color; + uint64 m_error; + }; + + bool refine(const params& p, results& r); + + private: + const params* m_pParams; + results* m_pResults; + + void optimize_dxt1(vec3F low_color, vec3F high_color); + void optimize_dxt5(vec3F low_color, vec3F high_color); + }; } // namespace crnlib diff --git a/crnlib/crn_dxt_fast.cpp b/crnlib/crn_dxt_fast.cpp index 3f74686..da588f6 100644 --- a/crnlib/crn_dxt_fast.cpp +++ b/crnlib/crn_dxt_fast.cpp @@ -1,836 +1,971 @@ // File: crn_dxt_fast.cpp // See Copyright Notice and license at the end of inc/crnlib.h // Parts of this module are derived from RYG's excellent public domain DXTx compressor. + #include "crn_core.h" #include "crn_dxt_fast.h" #include "crn_ryg_dxt.hpp" -namespace crnlib { -namespace dxt_fast { -static inline int mul_8bit(int a, int b) { - int t = a * b + 128; - return (t + (t >> 8)) >> 8; -} +namespace crnlib +{ + namespace dxt_fast + { + static inline int mul_8bit(int a, int b) + { + int t = a * b + 128; + return (t + (t >> 8)) >> 8; + } -static inline color_quad_u8& unpack_color(color_quad_u8& c, uint v) { - uint rv = (v & 0xf800) >> 11; - uint gv = (v & 0x07e0) >> 5; - uint bv = (v & 0x001f) >> 0; + static inline color_quad_u8& unpack_color(color_quad_u8& c, uint v) + { + uint rv = (v & 0xf800) >> 11; + uint gv = (v & 0x07e0) >> 5; + uint bv = (v & 0x001f) >> 0; - c.r = ryg_dxt::Expand5[rv]; - c.g = ryg_dxt::Expand6[gv]; - c.b = ryg_dxt::Expand5[bv]; - c.a = 0; + c.r = ryg_dxt::Expand5[rv]; + c.g = ryg_dxt::Expand6[gv]; + c.b = ryg_dxt::Expand5[bv]; + c.a = 0; - return c; -} + return c; + } -static inline uint pack_color(const color_quad_u8& c) { - return (mul_8bit(c.r, 31) << 11) + (mul_8bit(c.g, 63) << 5) + mul_8bit(c.b, 31); -} + static inline uint pack_color(const color_quad_u8& c) + { + return (mul_8bit(c.r, 31) << 11) + (mul_8bit(c.g, 63) << 5) + mul_8bit(c.b, 31); + } -static inline void lerp_color(color_quad_u8& result, const color_quad_u8& p1, const color_quad_u8& p2, uint f) { - CRNLIB_ASSERT(f <= 255); + static inline void lerp_color(color_quad_u8& result, const color_quad_u8& p1, const color_quad_u8& p2, uint f) + { + CRNLIB_ASSERT(f <= 255); - result.r = static_cast(p1.r + mul_8bit(p2.r - p1.r, f)); - result.g = static_cast(p1.g + mul_8bit(p2.g - p1.g, f)); - result.b = static_cast(p1.b + mul_8bit(p2.b - p1.b, f)); -} + result.r = static_cast(p1.r + mul_8bit(p2.r - p1.r, f)); + result.g = static_cast(p1.g + mul_8bit(p2.g - p1.g, f)); + result.b = static_cast(p1.b + mul_8bit(p2.b - p1.b, f)); + } -static inline void eval_colors(color_quad_u8* pColors, uint c0, uint c1) { - unpack_color(pColors[0], c0); - unpack_color(pColors[1], c1); + static inline void eval_colors(color_quad_u8* pColors, uint c0, uint c1) + { + unpack_color(pColors[0], c0); + unpack_color(pColors[1], c1); #if 0 - lerp_color(pColors[2], pColors[0], pColors[1], 0x55); - lerp_color(pColors[3], pColors[0], pColors[1], 0xAA); + lerp_color(pColors[2], pColors[0], pColors[1], 0x55); + lerp_color(pColors[3], pColors[0], pColors[1], 0xAA); #else - pColors[2].r = (pColors[0].r * 2 + pColors[1].r) / 3; - pColors[2].g = (pColors[0].g * 2 + pColors[1].g) / 3; - pColors[2].b = (pColors[0].b * 2 + pColors[1].b) / 3; + pColors[2].r = (pColors[0].r * 2 + pColors[1].r) / 3; + pColors[2].g = (pColors[0].g * 2 + pColors[1].g) / 3; + pColors[2].b = (pColors[0].b * 2 + pColors[1].b) / 3; - pColors[3].r = (pColors[1].r * 2 + pColors[0].r) / 3; - pColors[3].g = (pColors[1].g * 2 + pColors[0].g) / 3; - pColors[3].b = (pColors[1].b * 2 + pColors[0].b) / 3; + pColors[3].r = (pColors[1].r * 2 + pColors[0].r) / 3; + pColors[3].g = (pColors[1].g * 2 + pColors[0].g) / 3; + pColors[3].b = (pColors[1].b * 2 + pColors[0].b) / 3; #endif -} - -// false if all selectors equal -static bool match_block_colors(uint n, const color_quad_u8* pBlock, const color_quad_u8* pColors, uint8* pSelectors) { - int dirr = pColors[0].r - pColors[1].r; - int dirg = pColors[0].g - pColors[1].g; - int dirb = pColors[0].b - pColors[1].b; - - int stops[4]; - for (int i = 0; i < 4; i++) - stops[i] = pColors[i].r * dirr + pColors[i].g * dirg + pColors[i].b * dirb; - - // 0 2 3 1 - int c0Point = stops[1] + stops[3]; - int halfPoint = stops[3] + stops[2]; - int c3Point = stops[2] + stops[0]; - - //dirr *= 2; - //dirg *= 2; - //dirb *= 2; - c0Point >>= 1; - halfPoint >>= 1; - c3Point >>= 1; - - bool status = false; - for (uint i = 0; i < n; i++) { - int dot = pBlock[i].r * dirr + pBlock[i].g * dirg + pBlock[i].b * dirb; - - uint8 s; - if (dot < halfPoint) - s = (dot < c0Point) ? 1 : 3; - else - s = (dot < c3Point) ? 2 : 0; - - pSelectors[i] = s; - - if (s != pSelectors[0]) - status = true; - } - - return status; -} - -static bool optimize_block_colors(uint n, const color_quad_u8* block, uint& max16, uint& min16, uint ave_color[3], float axis[3]) { - int min[3], max[3]; - - for (uint ch = 0; ch < 3; ch++) { - const uint8* bp = ((const uint8*)block) + ch; - int minv, maxv; - - int64 muv = bp[0]; - minv = maxv = bp[0]; - - const uint l = n << 2; - for (uint i = 4; i < l; i += 4) { - muv += bp[i]; - minv = math::minimum(minv, bp[i]); - maxv = math::maximum(maxv, bp[i]); - } - - ave_color[ch] = static_cast((muv + (n / 2)) / n); - min[ch] = minv; - max[ch] = maxv; - } - - if ((min[0] == max[0]) && (min[1] == max[1]) && (min[2] == max[2])) - return false; - - // determine covariance matrix - double cov[6]; - for (int i = 0; i < 6; i++) - cov[i] = 0; - - for (uint i = 0; i < n; i++) { - double r = (int)block[i].r - (int)ave_color[0]; - double g = (int)block[i].g - (int)ave_color[1]; - double b = (int)block[i].b - (int)ave_color[2]; - - cov[0] += r * r; - cov[1] += r * g; - cov[2] += r * b; - cov[3] += g * g; - cov[4] += g * b; - cov[5] += b * b; - } - - double covf[6], vfr, vfg, vfb; - for (int i = 0; i < 6; i++) - covf[i] = cov[i] * (1.0f / 255.0f); - - vfr = max[0] - min[0]; - vfg = max[1] - min[1]; - vfb = max[2] - min[2]; - - static const uint nIterPower = 4; - for (uint iter = 0; iter < nIterPower; iter++) { - double r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2]; - double g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4]; - double b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5]; - - vfr = r; - vfg = g; - vfb = b; - } - - double magn = math::maximum(math::maximum(fabs(vfr), fabs(vfg)), fabs(vfb)); - int v_r, v_g, v_b; - - if (magn < 4.0f) // too small, default to luminance - { - v_r = 148; - v_g = 300; - v_b = 58; - - axis[0] = (float)v_r; - axis[1] = (float)v_g; - axis[2] = (float)v_b; - } else { - magn = 512.0f / magn; - vfr *= magn; - vfg *= magn; - vfb *= magn; - v_r = static_cast(vfr); - v_g = static_cast(vfg); - v_b = static_cast(vfb); - - axis[0] = (float)vfr; - axis[1] = (float)vfg; - axis[2] = (float)vfb; - } - - int mind = block[0].r * v_r + block[0].g * v_g + block[0].b * v_b; - int maxd = mind; - color_quad_u8 minp(block[0]); - color_quad_u8 maxp(block[0]); - - for (uint i = 1; i < n; i++) { - int dot = block[i].r * v_r + block[i].g * v_g + block[i].b * v_b; - - if (dot < mind) { - mind = dot; - minp = block[i]; - } - - if (dot > maxd) { - maxd = dot; - maxp = block[i]; - } - } - - max16 = pack_color(maxp); - min16 = pack_color(minp); - - return true; -} - -// The refinement function. (Clever code, part 2) -// Tries to optimize colors to suit block contents better. -// (By solving a least squares system via normal equations+Cramer's rule) -static bool refine_block(uint n, const color_quad_u8* block, uint& max16, uint& min16, const uint8* pSelectors) { - static const int w1Tab[4] = {3, 0, 2, 1}; - - static const int prods_0[4] = {0x00, 0x00, 0x02, 0x02}; - static const int prods_1[4] = {0x00, 0x09, 0x01, 0x04}; - static const int prods_2[4] = {0x09, 0x00, 0x04, 0x01}; - - double akku_0 = 0; - double akku_1 = 0; - double akku_2 = 0; - double At1_r, At1_g, At1_b; - double At2_r, At2_g, At2_b; - - At1_r = At1_g = At1_b = 0; - At2_r = At2_g = At2_b = 0; - for (uint i = 0; i < n; i++) { - double r = block[i].r; - double g = block[i].g; - double b = block[i].b; - int step = pSelectors[i]; - - int w1 = w1Tab[step]; - - akku_0 += prods_0[step]; - akku_1 += prods_1[step]; - akku_2 += prods_2[step]; - At1_r += w1 * r; - At1_g += w1 * g; - At1_b += w1 * b; - At2_r += r; - At2_g += g; - At2_b += b; - } - - At2_r = 3 * At2_r - At1_r; - At2_g = 3 * At2_g - At1_g; - At2_b = 3 * At2_b - At1_b; - - double xx = akku_2; - double yy = akku_1; - double xy = akku_0; - - double t = xx * yy - xy * xy; - if (!yy || !xx || (fabs(t) < .0000125f)) - return false; - - double frb = (3.0f * 31.0f / 255.0f) / t; - double fg = frb * (63.0f / 31.0f); - - uint oldMin = min16; - uint oldMax = max16; - - // solve. - max16 = math::clamp(static_cast((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31) << 11; - max16 |= math::clamp(static_cast((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63) << 5; - max16 |= math::clamp(static_cast((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31) << 0; + } - min16 = math::clamp(static_cast((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31) << 11; - min16 |= math::clamp(static_cast((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63) << 5; - min16 |= math::clamp(static_cast((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31) << 0; + // false if all selectors equal + static bool match_block_colors(uint n, const color_quad_u8* pBlock, const color_quad_u8* pColors, uint8* pSelectors) + { + int dirr = pColors[0].r - pColors[1].r; + int dirg = pColors[0].g - pColors[1].g; + int dirb = pColors[0].b - pColors[1].b; + + int stops[4]; + for (int i = 0; i < 4; i++) + { + stops[i] = pColors[i].r * dirr + pColors[i].g * dirg + pColors[i].b * dirb; + } - return (oldMin != min16) || (oldMax != max16); -} + // 0 2 3 1 + int c0Point = stops[1] + stops[3]; + int halfPoint = stops[3] + stops[2]; + int c3Point = stops[2] + stops[0]; + + //dirr *= 2; + //dirg *= 2; + //dirb *= 2; + c0Point >>= 1; + halfPoint >>= 1; + c3Point >>= 1; + + bool status = false; + for (uint i = 0; i < n; i++) + { + int dot = pBlock[i].r * dirr + pBlock[i].g * dirg + pBlock[i].b * dirb; + + uint8 s; + if (dot < halfPoint) + { + s = (dot < c0Point) ? 1 : 3; + } + else + { + s = (dot < c3Point) ? 2 : 0; + } + + pSelectors[i] = s; + + if (s != pSelectors[0]) + { + status = true; + } + } -// false if all selectors equal -static bool determine_selectors(uint n, const color_quad_u8* block, uint min16, uint max16, uint8* pSelectors) { - color_quad_u8 color[4]; + return status; + } - if (max16 != min16) { - eval_colors(color, min16, max16); + static bool optimize_block_colors(uint n, const color_quad_u8* block, uint& max16, uint& min16, uint ave_color[3], float axis[3]) { + int min[3], max[3]; - return match_block_colors(n, block, color, pSelectors); - } + for (uint ch = 0; ch < 3; ch++) + { + const uint8* bp = ((const uint8*)block) + ch; + int minv, maxv; - memset(pSelectors, 0, n); - return false; -} + int64 muv = bp[0]; + minv = maxv = bp[0]; -static uint64 determine_error(uint n, const color_quad_u8* block, uint min16, uint max16, uint64 early_out_error) { - color_quad_u8 color[4]; + const uint l = n << 2; + for (uint i = 4; i < l; i += 4) + { + muv += bp[i]; + minv = math::minimum(minv, bp[i]); + maxv = math::maximum(maxv, bp[i]); + } - eval_colors(color, min16, max16); + ave_color[ch] = static_cast((muv + (n / 2)) / n); + min[ch] = minv; + max[ch] = maxv; + } - int dirr = color[0].r - color[1].r; - int dirg = color[0].g - color[1].g; - int dirb = color[0].b - color[1].b; + if ((min[0] == max[0]) && (min[1] == max[1]) && (min[2] == max[2])) + { + return false; + } - int stops[4]; - for (int i = 0; i < 4; i++) - stops[i] = color[i].r * dirr + color[i].g * dirg + color[i].b * dirb; + // determine covariance matrix + double cov[6]; + for (int i = 0; i < 6; i++) + { + cov[i] = 0; + } - // 0 2 3 1 - int c0Point = stops[1] + stops[3]; - int halfPoint = stops[3] + stops[2]; - int c3Point = stops[2] + stops[0]; + for (uint i = 0; i < n; i++) + { + double r = (int)block[i].r - (int)ave_color[0]; + double g = (int)block[i].g - (int)ave_color[1]; + double b = (int)block[i].b - (int)ave_color[2]; + + cov[0] += r * r; + cov[1] += r * g; + cov[2] += r * b; + cov[3] += g * g; + cov[4] += g * b; + cov[5] += b * b; + } - c0Point >>= 1; - halfPoint >>= 1; - c3Point >>= 1; + double covf[6], vfr, vfg, vfb; + for (int i = 0; i < 6; i++) + { + covf[i] = cov[i] * (1.0f / 255.0f); + } - uint64 total_error = 0; + vfr = max[0] - min[0]; + vfg = max[1] - min[1]; + vfb = max[2] - min[2]; - for (uint i = 0; i < n; i++) { - const color_quad_u8& a = block[i]; + static const uint nIterPower = 4; + for (uint iter = 0; iter < nIterPower; iter++) + { + double r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2]; + double g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4]; + double b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5]; - uint s = 0; - if (min16 != max16) { - int dot = a.r * dirr + a.g * dirg + a.b * dirb; + vfr = r; + vfg = g; + vfb = b; + } - if (dot < halfPoint) - s = (dot < c0Point) ? 1 : 3; - else - s = (dot < c3Point) ? 2 : 0; - } + double magn = math::maximum(math::maximum(fabs(vfr), fabs(vfg)), fabs(vfb)); + int v_r, v_g, v_b; - const color_quad_u8& b = color[s]; + if (magn < 4.0f) // too small, default to luminance + { + v_r = 148; + v_g = 300; + v_b = 58; - int e = a[0] - b[0]; - total_error += e * e; + axis[0] = (float)v_r; + axis[1] = (float)v_g; + axis[2] = (float)v_b; + } + else + { + magn = 512.0f / magn; + vfr *= magn; + vfg *= magn; + vfb *= magn; + v_r = static_cast(vfr); + v_g = static_cast(vfg); + v_b = static_cast(vfb); + + axis[0] = (float)vfr; + axis[1] = (float)vfg; + axis[2] = (float)vfb; + } - e = a[1] - b[1]; - total_error += e * e; + int mind = block[0].r * v_r + block[0].g * v_g + block[0].b * v_b; + int maxd = mind; + color_quad_u8 minp(block[0]); + color_quad_u8 maxp(block[0]); + + for (uint i = 1; i < n; i++) + { + int dot = block[i].r * v_r + block[i].g * v_g + block[i].b * v_b; + + if (dot < mind) + { + mind = dot; + minp = block[i]; + } + + if (dot > maxd) + { + maxd = dot; + maxp = block[i]; + } + } - e = a[2] - b[2]; - total_error += e * e; + max16 = pack_color(maxp); + min16 = pack_color(minp); - if (total_error >= early_out_error) - break; - } + return true; + } - return total_error; -} + // The refinement function. (Clever code, part 2) + // Tries to optimize colors to suit block contents better. + // (By solving a least squares system via normal equations+Cramer's rule) + static bool refine_block(uint n, const color_quad_u8* block, uint& max16, uint& min16, const uint8* pSelectors) + { + static const int w1Tab[4] = { 3, 0, 2, 1 }; + + static const int prods_0[4] = { 0x00, 0x00, 0x02, 0x02 }; + static const int prods_1[4] = { 0x00, 0x09, 0x01, 0x04 }; + static const int prods_2[4] = { 0x09, 0x00, 0x04, 0x01 }; + + double akku_0 = 0; + double akku_1 = 0; + double akku_2 = 0; + double At1_r, At1_g, At1_b; + double At2_r, At2_g, At2_b; + + At1_r = At1_g = At1_b = 0; + At2_r = At2_g = At2_b = 0; + for (uint i = 0; i < n; i++) + { + double r = block[i].r; + double g = block[i].g; + double b = block[i].b; + int step = pSelectors[i]; + + int w1 = w1Tab[step]; + + akku_0 += prods_0[step]; + akku_1 += prods_1[step]; + akku_2 += prods_2[step]; + At1_r += w1 * r; + At1_g += w1 * g; + At1_b += w1 * b; + At2_r += r; + At2_g += g; + At2_b += b; + } -static bool refine_endpoints(uint n, const color_quad_u8* pBlock, uint& low16, uint& high16, uint8* pSelectors) { - bool optimized = false; + At2_r = 3 * At2_r - At1_r; + At2_g = 3 * At2_g - At1_g; + At2_b = 3 * At2_b - At1_b; - const int limits[3] = {31, 63, 31}; + double xx = akku_2; + double yy = akku_1; + double xy = akku_0; - for (uint trial = 0; trial < 2; trial++) { - color_quad_u8 color[4]; - eval_colors(color, low16, high16); + double t = xx * yy - xy * xy; + if (!yy || !xx || (fabs(t) < .0000125f)) + { + return false; + } - uint64 total_error[3] = {0, 0, 0}; + double frb = (3.0f * 31.0f / 255.0f) / t; + double fg = frb * (63.0f / 31.0f); - for (uint i = 0; i < n; i++) { - const color_quad_u8& a = pBlock[i]; + uint oldMin = min16; + uint oldMax = max16; - const uint s = pSelectors[i]; - const color_quad_u8& b = color[s]; + // solve. + max16 = math::clamp(static_cast((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31) << 11; + max16 |= math::clamp(static_cast((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63) << 5; + max16 |= math::clamp(static_cast((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31) << 0; - int e = a[0] - b[0]; - total_error[0] += e * e; + min16 = math::clamp(static_cast((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31) << 11; + min16 |= math::clamp(static_cast((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63) << 5; + min16 |= math::clamp(static_cast((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31) << 0; - e = a[1] - b[1]; - total_error[1] += e * e; + return (oldMin != min16) || (oldMax != max16); + } - e = a[2] - b[2]; - total_error[2] += e * e; - } + // false if all selectors equal + static bool determine_selectors(uint n, const color_quad_u8* block, uint min16, uint max16, uint8* pSelectors) + { + color_quad_u8 color[4]; - color_quad_u8 endpoints[2]; - endpoints[0] = dxt1_block::unpack_color((uint16)low16, false); - endpoints[1] = dxt1_block::unpack_color((uint16)high16, false); + if (max16 != min16) + { + eval_colors(color, min16, max16); - color_quad_u8 expanded_endpoints[2]; - expanded_endpoints[0] = dxt1_block::unpack_color((uint16)low16, true); - expanded_endpoints[1] = dxt1_block::unpack_color((uint16)high16, true); + return match_block_colors(n, block, color, pSelectors); + } - bool trial_optimized = false; + memset(pSelectors, 0, n); + return false; + } - for (uint axis = 0; axis < 3; axis++) { - if (!total_error[axis]) - continue; + static uint64 determine_error(uint n, const color_quad_u8* block, uint min16, uint max16, uint64 early_out_error) { + color_quad_u8 color[4]; - const sU8* const pExpand = (axis == 1) ? ryg_dxt::Expand6 : ryg_dxt::Expand5; + eval_colors(color, min16, max16); - for (uint e = 0; e < 2; e++) { - uint v[4]; - v[e ^ 1] = expanded_endpoints[e ^ 1][axis]; + int dirr = color[0].r - color[1].r; + int dirg = color[0].g - color[1].g; + int dirb = color[0].b - color[1].b; - for (int t = -1; t <= 1; t += 2) { - int a = endpoints[e][axis] + t; - if ((a < 0) || (a > limits[axis])) - continue; + int stops[4]; + for (int i = 0; i < 4; i++) + { + stops[i] = color[i].r * dirr + color[i].g * dirg + color[i].b * dirb; + } - v[e] = pExpand[a]; + // 0 2 3 1 + int c0Point = stops[1] + stops[3]; + int halfPoint = stops[3] + stops[2]; + int c3Point = stops[2] + stops[0]; - //int delta = v[1] - v[0]; - //v[2] = v[0] + mul_8bit(delta, 0x55); - //v[3] = v[0] + mul_8bit(delta, 0xAA); + c0Point >>= 1; + halfPoint >>= 1; + c3Point >>= 1; - v[2] = (v[0] * 2 + v[1]) / 3; - v[3] = (v[0] + v[1] * 2) / 3; + uint64 total_error = 0; - uint64 axis_error = 0; + for (uint i = 0; i < n; i++) + { + const color_quad_u8& a = block[i]; - for (uint i = 0; i < n; i++) { - const color_quad_u8& p = pBlock[i]; + uint s = 0; + if (min16 != max16) + { + int dot = a.r * dirr + a.g * dirg + a.b * dirb; - int e = v[pSelectors[i]] - p[axis]; + if (dot < halfPoint) + { + s = (dot < c0Point) ? 1 : 3; + } + else + { + s = (dot < c3Point) ? 2 : 0; + } + } - axis_error += e * e; + const color_quad_u8& b = color[s]; - if (axis_error >= total_error[axis]) - break; - } + int e = a[0] - b[0]; + total_error += e * e; - if (axis_error < total_error[axis]) { - //total_error[axis] = axis_error; + e = a[1] - b[1]; + total_error += e * e; - endpoints[e][axis] = (uint8)a; - expanded_endpoints[e][axis] = (uint8)v[e]; + e = a[2] - b[2]; + total_error += e * e; - if (e) - high16 = dxt1_block::pack_color(endpoints[1], false); - else - low16 = dxt1_block::pack_color(endpoints[0], false); + if (total_error >= early_out_error) + { + break; + } + } - determine_selectors(n, pBlock, low16, high16, pSelectors); + return total_error; + } - eval_colors(color, low16, high16); + static bool refine_endpoints(uint n, const color_quad_u8* pBlock, uint& low16, uint& high16, uint8* pSelectors) + { + bool optimized = false; - utils::zero_object(total_error); + const int limits[3] = { 31, 63, 31 }; - for (uint i = 0; i < n; i++) { - const color_quad_u8& a = pBlock[i]; + for (uint trial = 0; trial < 2; trial++) + { + color_quad_u8 color[4]; + eval_colors(color, low16, high16); - const uint s = pSelectors[i]; - const color_quad_u8& b = color[s]; + uint64 total_error[3] = { 0, 0, 0 }; - int e = a[0] - b[0]; - total_error[0] += e * e; + for (uint i = 0; i < n; i++) + { + const color_quad_u8& a = pBlock[i]; - e = a[1] - b[1]; - total_error[1] += e * e; + const uint s = pSelectors[i]; + const color_quad_u8& b = color[s]; - e = a[2] - b[2]; - total_error[2] += e * e; - } + int e = a[0] - b[0]; + total_error[0] += e * e; + + e = a[1] - b[1]; + total_error[1] += e * e; + + e = a[2] - b[2]; + total_error[2] += e * e; + } + + color_quad_u8 endpoints[2]; + endpoints[0] = dxt1_block::unpack_color((uint16)low16, false); + endpoints[1] = dxt1_block::unpack_color((uint16)high16, false); + + color_quad_u8 expanded_endpoints[2]; + expanded_endpoints[0] = dxt1_block::unpack_color((uint16)low16, true); + expanded_endpoints[1] = dxt1_block::unpack_color((uint16)high16, true); + + bool trial_optimized = false; + + for (uint axis = 0; axis < 3; axis++) + { + if (!total_error[axis]) + { + continue; + } + + const sU8* const pExpand = (axis == 1) ? ryg_dxt::Expand6 : ryg_dxt::Expand5; + + for (uint e = 0; e < 2; e++) + { + uint v[4]; + v[e ^ 1] = expanded_endpoints[e ^ 1][axis]; + + for (int t = -1; t <= 1; t += 2) + { + int a = endpoints[e][axis] + t; + if ((a < 0) || (a > limits[axis])) + { + continue; + } + + v[e] = pExpand[a]; + + //int delta = v[1] - v[0]; + //v[2] = v[0] + mul_8bit(delta, 0x55); + //v[3] = v[0] + mul_8bit(delta, 0xAA); + + v[2] = (v[0] * 2 + v[1]) / 3; + v[3] = (v[0] + v[1] * 2) / 3; + + uint64 axis_error = 0; + + for (uint i = 0; i < n; i++) + { + const color_quad_u8& p = pBlock[i]; + + int e = v[pSelectors[i]] - p[axis]; + + axis_error += e * e; + + if (axis_error >= total_error[axis]) + { + break; + } + } - trial_optimized = true; - } + if (axis_error < total_error[axis]) + { + //total_error[axis] = axis_error; - } // t + endpoints[e][axis] = (uint8)a; + expanded_endpoints[e][axis] = (uint8)v[e]; - } // e - } // axis + if (e) + { + high16 = dxt1_block::pack_color(endpoints[1], false); + } + else + { + low16 = dxt1_block::pack_color(endpoints[0], false); + } - if (!trial_optimized) - break; + determine_selectors(n, pBlock, low16, high16, pSelectors); - optimized = true; + eval_colors(color, low16, high16); - } // for ( ; ; ) + utils::zero_object(total_error); - return optimized; -} + for (uint i = 0; i < n; i++) + { + const color_quad_u8& a = pBlock[i]; -static void refine_endpoints2(uint n, const color_quad_u8* pBlock, uint& low16, uint& high16, uint8* pSelectors, float axis[3]) { - uint64 orig_error = determine_error(n, pBlock, low16, high16, cUINT64_MAX); - if (!orig_error) - return; + const uint s = pSelectors[i]; + const color_quad_u8& b = color[s]; - float l = 1.0f / sqrt(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]); - vec3F principle_axis(axis[0] * l, axis[1] * l, axis[2] * l); + int e = a[0] - b[0]; + total_error[0] += e * e; - const float dist_per_trial = 0.027063293f; + e = a[1] - b[1]; + total_error[1] += e * e; - const uint cMaxProbeRange = 8; - uint probe_low[cMaxProbeRange * 2 + 1]; - uint probe_high[cMaxProbeRange * 2 + 1]; + e = a[2] - b[2]; + total_error[2] += e * e; + } - int probe_range = 8; - uint num_iters = 4; + trial_optimized = true; + } - const uint num_trials = probe_range * 2 + 1; + } // t - vec3F scaled_principle_axis(principle_axis * dist_per_trial); - scaled_principle_axis[0] *= 31.0f; - scaled_principle_axis[1] *= 63.0f; - scaled_principle_axis[2] *= 31.0f; - vec3F initial_ofs(scaled_principle_axis * (float)-probe_range); - initial_ofs[0] += .5f; - initial_ofs[1] += .5f; - initial_ofs[2] += .5f; + } // e + } // axis - uint64 cur_error = orig_error; + if (!trial_optimized) + { + break; + } - for (uint iter = 0; iter < num_iters; iter++) { - color_quad_u8 endpoints[2]; + optimized = true; - endpoints[0] = dxt1_block::unpack_color((uint16)low16, false); - endpoints[1] = dxt1_block::unpack_color((uint16)high16, false); + } // for ( ; ; ) - vec3F low_color(endpoints[0][0], endpoints[0][1], endpoints[0][2]); - vec3F high_color(endpoints[1][0], endpoints[1][1], endpoints[1][2]); + return optimized; + } + + static void refine_endpoints2(uint n, const color_quad_u8* pBlock, uint& low16, uint& high16, uint8* pSelectors, float axis[3]) + { + uint64 orig_error = determine_error(n, pBlock, low16, high16, cUINT64_MAX); + if (!orig_error) + { + return; + } + + float l = 1.0f / sqrt(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]); + vec3F principle_axis(axis[0] * l, axis[1] * l, axis[2] * l); + + const float dist_per_trial = 0.027063293f; + + const uint cMaxProbeRange = 8; + uint probe_low[cMaxProbeRange * 2 + 1]; + uint probe_high[cMaxProbeRange * 2 + 1]; + + int probe_range = 8; + uint num_iters = 4; + + const uint num_trials = probe_range * 2 + 1; + + vec3F scaled_principle_axis(principle_axis * dist_per_trial); + scaled_principle_axis[0] *= 31.0f; + scaled_principle_axis[1] *= 63.0f; + scaled_principle_axis[2] *= 31.0f; + vec3F initial_ofs(scaled_principle_axis * (float)-probe_range); + initial_ofs[0] += .5f; + initial_ofs[1] += .5f; + initial_ofs[2] += .5f; + + uint64 cur_error = orig_error; + + for (uint iter = 0; iter < num_iters; iter++) + { + color_quad_u8 endpoints[2]; + + endpoints[0] = dxt1_block::unpack_color((uint16)low16, false); + endpoints[1] = dxt1_block::unpack_color((uint16)high16, false); + + vec3F low_color(endpoints[0][0], endpoints[0][1], endpoints[0][2]); + vec3F high_color(endpoints[1][0], endpoints[1][1], endpoints[1][2]); + + vec3F probe_low_color(low_color + initial_ofs); + for (uint i = 0; i < num_trials; i++) + { + int r = math::clamp((int)floor(probe_low_color[0]), 0, 31); + int g = math::clamp((int)floor(probe_low_color[1]), 0, 63); + int b = math::clamp((int)floor(probe_low_color[2]), 0, 31); + probe_low[i] = b | (g << 5U) | (r << 11U); + + probe_low_color += scaled_principle_axis; + } + + vec3F probe_high_color(high_color + initial_ofs); + for (uint i = 0; i < num_trials; i++) + { + int r = math::clamp((int)floor(probe_high_color[0]), 0, 31); + int g = math::clamp((int)floor(probe_high_color[1]), 0, 63); + int b = math::clamp((int)floor(probe_high_color[2]), 0, 31); + probe_high[i] = b | (g << 5U) | (r << 11U); + + probe_high_color += scaled_principle_axis; + } + + uint best_l = low16; + uint best_h = high16; + + enum { cMaxHash = 4 }; + uint64 hash[cMaxHash]; + for (uint i = 0; i < cMaxHash; i++) + { + hash[i] = 0; + } + + uint c = best_l | (best_h << 16); + c = fast_hash(&c, sizeof(c)); + hash[(c >> 6) & 3] = 1ULL << (c & 63); + + for (uint i = 0; i < num_trials; i++) + { + for (uint j = 0; j < num_trials; j++) + { + uint l = probe_low[i]; + uint h = probe_high[j]; + if (l < h) + { + utils::swap(l, h); + } + + uint c = l | (h << 16); + c = fast_hash(&c, sizeof(c)); + uint64 mask = 1ULL << (c & 63); + uint ofs = (c >> 6) & 3; + if (hash[ofs] & mask) + { + continue; + } + + hash[ofs] |= mask; + + uint64 new_error = determine_error(n, pBlock, l, h, cur_error); + if (new_error < cur_error) + { + best_l = l; + best_h = h; + cur_error = new_error; + } + } + } + + bool improved = false; + + if ((best_l != low16) || (best_h != high16)) + { + low16 = best_l; + high16 = best_h; + + determine_selectors(n, pBlock, low16, high16, pSelectors); + improved = true; + } + + if (refine_endpoints(n, pBlock, low16, high16, pSelectors)) + { + improved = true; + + uint64 cur_error = determine_error(n, pBlock, low16, high16, cUINT64_MAX); + if (!cur_error) + { + return; + } + } + + if (!improved) + { + break; + } + + } // iter + + //uint64 end_error = determine_error(n, pBlock, low16, high16, UINT64_MAX); + //if (end_error > orig_error) DebugBreak(); + } + + static void compress_solid_block(uint n, uint ave_color[3], uint& low16, uint& high16, uint8* pSelectors) + { + uint r = ave_color[0]; + uint g = ave_color[1]; + uint b = ave_color[2]; + + memset(pSelectors, 2, n); + + low16 = (ryg_dxt::OMatch5[r][0] << 11) | (ryg_dxt::OMatch6[g][0] << 5) | ryg_dxt::OMatch5[b][0]; + high16 = (ryg_dxt::OMatch5[r][1] << 11) | (ryg_dxt::OMatch6[g][1] << 5) | ryg_dxt::OMatch5[b][1]; + } + + void compress_color_block(uint n, const color_quad_u8* block, uint& low16, uint& high16, uint8* pSelectors, bool refine) + { + CRNLIB_ASSERT((n & 15) == 0); + + uint ave_color[3]; + float axis[3]; + + if (!optimize_block_colors(n, block, low16, high16, ave_color, axis)) + { + compress_solid_block(n, ave_color, low16, high16, pSelectors); + } + else + { + if (!determine_selectors(n, block, low16, high16, pSelectors)) + { + compress_solid_block(n, ave_color, low16, high16, pSelectors); + } + else + { + if (refine_block(n, block, low16, high16, pSelectors)) + { + determine_selectors(n, block, low16, high16, pSelectors); + } + + if (refine) + { + refine_endpoints2(n, block, low16, high16, pSelectors, axis); + } + } + } + + if (low16 < high16) + { + utils::swap(low16, high16); + for (uint i = 0; i < n; i++) + { + pSelectors[i] ^= 1; + } + } + } - vec3F probe_low_color(low_color + initial_ofs); - for (uint i = 0; i < num_trials; i++) { - int r = math::clamp((int)floor(probe_low_color[0]), 0, 31); - int g = math::clamp((int)floor(probe_low_color[1]), 0, 63); - int b = math::clamp((int)floor(probe_low_color[2]), 0, 31); - probe_low[i] = b | (g << 5U) | (r << 11U); + void compress_color_block(dxt1_block* pDXT1_block, const color_quad_u8* pBlock, bool refine) + { + uint8 color_selectors[16]; + uint low16, high16; + dxt_fast::compress_color_block(16, pBlock, low16, high16, color_selectors, refine); - probe_low_color += scaled_principle_axis; - } + pDXT1_block->set_low_color(static_cast(low16)); + pDXT1_block->set_high_color(static_cast(high16)); - vec3F probe_high_color(high_color + initial_ofs); - for (uint i = 0; i < num_trials; i++) { - int r = math::clamp((int)floor(probe_high_color[0]), 0, 31); - int g = math::clamp((int)floor(probe_high_color[1]), 0, 63); - int b = math::clamp((int)floor(probe_high_color[2]), 0, 31); - probe_high[i] = b | (g << 5U) | (r << 11U); + uint mask = 0; + for (int i = 15; i >= 0; i--) + { + mask <<= 2; + mask |= color_selectors[i]; + } + + pDXT1_block->m_selectors[0] = (uint8)(mask & 0xFF); + pDXT1_block->m_selectors[1] = (uint8)((mask >> 8) & 0xFF); + pDXT1_block->m_selectors[2] = (uint8)((mask >> 16) & 0xFF); + pDXT1_block->m_selectors[3] = (uint8)((mask >> 24) & 0xFF); + } - probe_high_color += scaled_principle_axis; - } + void compress_alpha_block(uint n, const color_quad_u8* block, uint& low8, uint& high8, uint8* pSelectors, uint comp_index) + { + int min, max; + min = max = block[0][comp_index]; - uint best_l = low16; - uint best_h = high16; + for (uint i = 1; i < n; i++) + { + min = math::minimum(min, block[i][comp_index]); + max = math::maximum(max, block[i][comp_index]); + } - enum { cMaxHash = 4 }; - uint64 hash[cMaxHash]; - for (uint i = 0; i < cMaxHash; i++) - hash[i] = 0; + low8 = max; + high8 = min; - uint c = best_l | (best_h << 16); - c = fast_hash(&c, sizeof(c)); - hash[(c >> 6) & 3] = 1ULL << (c & 63); + int dist = max - min; + int bias = min * 7 - (dist >> 1); + int dist4 = dist * 4; + int dist2 = dist * 2; - for (uint i = 0; i < num_trials; i++) { - for (uint j = 0; j < num_trials; j++) { - uint l = probe_low[i]; - uint h = probe_high[j]; - if (l < h) - utils::swap(l, h); + for (uint i = 0; i < n; i++) + { + int a = block[i][comp_index] * 7 - bias; + int ind, t; - uint c = l | (h << 16); - c = fast_hash(&c, sizeof(c)); - uint64 mask = 1ULL << (c & 63); - uint ofs = (c >> 6) & 3; - if (hash[ofs] & mask) - continue; + t = (dist4 - a) >> 31; + ind = t & 4; + a -= dist4 & t; + t = (dist2 - a) >> 31; + ind += t & 2; + a -= dist2 & t; + t = (dist - a) >> 31; + ind += t & 1; - hash[ofs] |= mask; + ind = -ind & 7; + ind ^= (2 > ind); - uint64 new_error = determine_error(n, pBlock, l, h, cur_error); - if (new_error < cur_error) { - best_l = l; - best_h = h; - cur_error = new_error; + pSelectors[i] = static_cast(ind); + } } - } - } - - bool improved = false; - - if ((best_l != low16) || (best_h != high16)) { - low16 = best_l; - high16 = best_h; - - determine_selectors(n, pBlock, low16, high16, pSelectors); - improved = true; - } - - if (refine_endpoints(n, pBlock, low16, high16, pSelectors)) { - improved = true; - - uint64 cur_error = determine_error(n, pBlock, low16, high16, cUINT64_MAX); - if (!cur_error) - return; - } - - if (!improved) - break; - - } // iter - - //uint64 end_error = determine_error(n, pBlock, low16, high16, UINT64_MAX); - //if (end_error > orig_error) DebugBreak(); -} - -static void compress_solid_block(uint n, uint ave_color[3], uint& low16, uint& high16, uint8* pSelectors) { - uint r = ave_color[0]; - uint g = ave_color[1]; - uint b = ave_color[2]; - - memset(pSelectors, 2, n); - - low16 = (ryg_dxt::OMatch5[r][0] << 11) | (ryg_dxt::OMatch6[g][0] << 5) | ryg_dxt::OMatch5[b][0]; - high16 = (ryg_dxt::OMatch5[r][1] << 11) | (ryg_dxt::OMatch6[g][1] << 5) | ryg_dxt::OMatch5[b][1]; -} - -void compress_color_block(uint n, const color_quad_u8* block, uint& low16, uint& high16, uint8* pSelectors, bool refine) { - CRNLIB_ASSERT((n & 15) == 0); - - uint ave_color[3]; - float axis[3]; - - if (!optimize_block_colors(n, block, low16, high16, ave_color, axis)) { - compress_solid_block(n, ave_color, low16, high16, pSelectors); - } else { - if (!determine_selectors(n, block, low16, high16, pSelectors)) - compress_solid_block(n, ave_color, low16, high16, pSelectors); - else { - if (refine_block(n, block, low16, high16, pSelectors)) - determine_selectors(n, block, low16, high16, pSelectors); - - if (refine) - refine_endpoints2(n, block, low16, high16, pSelectors, axis); - } - } - - if (low16 < high16) { - utils::swap(low16, high16); - for (uint i = 0; i < n; i++) - pSelectors[i] ^= 1; - } -} - -void compress_color_block(dxt1_block* pDXT1_block, const color_quad_u8* pBlock, bool refine) { - uint8 color_selectors[16]; - uint low16, high16; - dxt_fast::compress_color_block(16, pBlock, low16, high16, color_selectors, refine); - - pDXT1_block->set_low_color(static_cast(low16)); - pDXT1_block->set_high_color(static_cast(high16)); - - uint mask = 0; - for (int i = 15; i >= 0; i--) { - mask <<= 2; - mask |= color_selectors[i]; - } - - pDXT1_block->m_selectors[0] = (uint8)(mask & 0xFF); - pDXT1_block->m_selectors[1] = (uint8)((mask >> 8) & 0xFF); - pDXT1_block->m_selectors[2] = (uint8)((mask >> 16) & 0xFF); - pDXT1_block->m_selectors[3] = (uint8)((mask >> 24) & 0xFF); -} - -void compress_alpha_block(uint n, const color_quad_u8* block, uint& low8, uint& high8, uint8* pSelectors, uint comp_index) { - int min, max; - min = max = block[0][comp_index]; - - for (uint i = 1; i < n; i++) { - min = math::minimum(min, block[i][comp_index]); - max = math::maximum(max, block[i][comp_index]); - } - - low8 = max; - high8 = min; - - int dist = max - min; - int bias = min * 7 - (dist >> 1); - int dist4 = dist * 4; - int dist2 = dist * 2; - - for (uint i = 0; i < n; i++) { - int a = block[i][comp_index] * 7 - bias; - int ind, t; - - t = (dist4 - a) >> 31; - ind = t & 4; - a -= dist4 & t; - t = (dist2 - a) >> 31; - ind += t & 2; - a -= dist2 & t; - t = (dist - a) >> 31; - ind += t & 1; - - ind = -ind & 7; - ind ^= (2 > ind); - - pSelectors[i] = static_cast(ind); - } -} - -void compress_alpha_block(dxt5_block* pDXT5_block, const color_quad_u8* pBlock, uint comp_index) { - uint8 selectors[16]; - uint low8, high8; - - compress_alpha_block(16, pBlock, low8, high8, selectors, comp_index); - - pDXT5_block->set_low_alpha(low8); - pDXT5_block->set_high_alpha(high8); - - uint mask = 0; - uint bits = 0; - uint8* pDst = pDXT5_block->m_selectors; - - for (uint i = 0; i < 16; i++) { - mask |= (selectors[i] << bits); - - if ((bits += 3) >= 8) { - *pDst++ = static_cast(mask); - mask >>= 8; - bits -= 8; - } - } -} - -void find_representative_colors(uint n, const color_quad_u8* pBlock, color_quad_u8& lo, color_quad_u8& hi) { - uint64 ave64[3]; - ave64[0] = 0; - ave64[1] = 0; - ave64[2] = 0; - - for (uint i = 0; i < n; i++) { - ave64[0] += pBlock[i].r; - ave64[1] += pBlock[i].g; - ave64[2] += pBlock[i].b; - } - - uint ave[3]; - ave[0] = static_cast((ave64[0] + (n / 2)) / n); - ave[1] = static_cast((ave64[1] + (n / 2)) / n); - ave[2] = static_cast((ave64[2] + (n / 2)) / n); - - int furthest_dist = -1; - uint furthest_index = 0; - for (uint i = 0; i < n; i++) { - int r = pBlock[i].r - ave[0]; - int g = pBlock[i].g - ave[1]; - int b = pBlock[i].b - ave[2]; - int dist = r * r + g * g + b * b; - if (dist > furthest_dist) { - furthest_dist = dist; - furthest_index = i; - } - } - - color_quad_u8 lo_color(pBlock[furthest_index]); - - int opp_dist = -1; - uint opp_index = 0; - for (uint i = 0; i < n; i++) { - int r = pBlock[i].r - lo_color.r; - int g = pBlock[i].g - lo_color.g; - int b = pBlock[i].b - lo_color.b; - int dist = r * r + g * g + b * b; - if (dist > opp_dist) { - opp_dist = dist; - opp_index = i; - } - } - - color_quad_u8 hi_color(pBlock[opp_index]); - - for (uint i = 0; i < 3; i++) { - lo_color[i] = static_cast((lo_color[i] + ave[i]) >> 1); - hi_color[i] = static_cast((hi_color[i] + ave[i]) >> 1); - } - - const uint cMaxIters = 4; - for (uint iter_index = 0; iter_index < cMaxIters; iter_index++) { - if ((lo_color[0] == hi_color[0]) && (lo_color[1] == hi_color[1]) && (lo_color[2] == hi_color[2])) - break; - - uint64 new_color[2][3]; - uint weight[2]; - - utils::zero_object(new_color); - utils::zero_object(weight); - - int vec_r = hi_color[0] - lo_color[0]; - int vec_g = hi_color[1] - lo_color[1]; - int vec_b = hi_color[2] - lo_color[2]; - - int lo_dot = vec_r * lo_color[0] + vec_g * lo_color[1] + vec_b * lo_color[2]; - int hi_dot = vec_r * hi_color[0] + vec_g * hi_color[1] + vec_b * hi_color[2]; - int mid_dot = lo_dot + hi_dot; - - vec_r *= 2; - vec_g *= 2; - vec_b *= 2; - - for (uint i = 0; i < n; i++) { - const color_quad_u8& c = pBlock[i]; - - const int dot = c[0] * vec_r + c[1] * vec_g + c[2] * vec_b; - const uint match_index = (dot > mid_dot); - - new_color[match_index][0] += c.r; - new_color[match_index][1] += c.g; - new_color[match_index][2] += c.b; - weight[match_index]++; - } - - if ((!weight[0]) || (!weight[1])) - break; - - uint8 new_color8[2][3]; - - for (uint j = 0; j < 2; j++) - for (uint i = 0; i < 3; i++) - new_color8[j][i] = static_cast((new_color[j][i] + (weight[j] / 2)) / weight[j]); - - if ((new_color8[0][0] == lo_color[0]) && (new_color8[0][1] == lo_color[1]) && (new_color8[0][2] == lo_color[2]) && - (new_color8[1][0] == hi_color[0]) && (new_color8[1][1] == hi_color[1]) && (new_color8[1][2] == hi_color[2])) - break; - - for (uint i = 0; i < 3; i++) { - lo_color[i] = new_color8[0][i]; - hi_color[i] = new_color8[1][i]; - } - } - - uint energy[2] = {0, 0}; - for (uint i = 0; i < 3; i++) { - energy[0] += lo_color[i] * lo_color[i]; - energy[1] += hi_color[i] * hi_color[i]; - } - - if (energy[0] > energy[1]) - utils::swap(lo_color, hi_color); - - lo = lo_color; - hi = hi_color; -} - -} // namespace dxt_fast + void compress_alpha_block(dxt5_block* pDXT5_block, const color_quad_u8* pBlock, uint comp_index) + { + uint8 selectors[16]; + uint low8, high8; + + compress_alpha_block(16, pBlock, low8, high8, selectors, comp_index); + + pDXT5_block->set_low_alpha(low8); + pDXT5_block->set_high_alpha(high8); + + uint mask = 0; + uint bits = 0; + uint8* pDst = pDXT5_block->m_selectors; + + for (uint i = 0; i < 16; i++) + { + mask |= (selectors[i] << bits); + + if ((bits += 3) >= 8) + { + *pDst++ = static_cast(mask); + mask >>= 8; + bits -= 8; + } + } + } + + void find_representative_colors(uint n, const color_quad_u8* pBlock, color_quad_u8& lo, color_quad_u8& hi) + { + uint64 ave64[3]; + ave64[0] = 0; + ave64[1] = 0; + ave64[2] = 0; + + for (uint i = 0; i < n; i++) + { + ave64[0] += pBlock[i].r; + ave64[1] += pBlock[i].g; + ave64[2] += pBlock[i].b; + } + + uint ave[3]; + ave[0] = static_cast((ave64[0] + (n / 2)) / n); + ave[1] = static_cast((ave64[1] + (n / 2)) / n); + ave[2] = static_cast((ave64[2] + (n / 2)) / n); + + int furthest_dist = -1; + uint furthest_index = 0; + for (uint i = 0; i < n; i++) + { + int r = pBlock[i].r - ave[0]; + int g = pBlock[i].g - ave[1]; + int b = pBlock[i].b - ave[2]; + int dist = r * r + g * g + b * b; + if (dist > furthest_dist) + { + furthest_dist = dist; + furthest_index = i; + } + } + + color_quad_u8 lo_color(pBlock[furthest_index]); + + int opp_dist = -1; + uint opp_index = 0; + for (uint i = 0; i < n; i++) + { + int r = pBlock[i].r - lo_color.r; + int g = pBlock[i].g - lo_color.g; + int b = pBlock[i].b - lo_color.b; + int dist = r * r + g * g + b * b; + if (dist > opp_dist) + { + opp_dist = dist; + opp_index = i; + } + } + + color_quad_u8 hi_color(pBlock[opp_index]); + + for (uint i = 0; i < 3; i++) + { + lo_color[i] = static_cast((lo_color[i] + ave[i]) >> 1); + hi_color[i] = static_cast((hi_color[i] + ave[i]) >> 1); + } + + const uint cMaxIters = 4; + for (uint iter_index = 0; iter_index < cMaxIters; iter_index++) + { + if ((lo_color[0] == hi_color[0]) && (lo_color[1] == hi_color[1]) && (lo_color[2] == hi_color[2])) + { + break; + } + + uint64 new_color[2][3]; + uint weight[2]; + + utils::zero_object(new_color); + utils::zero_object(weight); + + int vec_r = hi_color[0] - lo_color[0]; + int vec_g = hi_color[1] - lo_color[1]; + int vec_b = hi_color[2] - lo_color[2]; + + int lo_dot = vec_r * lo_color[0] + vec_g * lo_color[1] + vec_b * lo_color[2]; + int hi_dot = vec_r * hi_color[0] + vec_g * hi_color[1] + vec_b * hi_color[2]; + int mid_dot = lo_dot + hi_dot; + + vec_r *= 2; + vec_g *= 2; + vec_b *= 2; + + for (uint i = 0; i < n; i++) + { + const color_quad_u8& c = pBlock[i]; + + const int dot = c[0] * vec_r + c[1] * vec_g + c[2] * vec_b; + const uint match_index = (dot > mid_dot); + + new_color[match_index][0] += c.r; + new_color[match_index][1] += c.g; + new_color[match_index][2] += c.b; + weight[match_index]++; + } + + if ((!weight[0]) || (!weight[1])) + { + break; + } + + uint8 new_color8[2][3]; + + for (uint j = 0; j < 2; j++) + { + for (uint i = 0; i < 3; i++) + { + new_color8[j][i] = static_cast((new_color[j][i] + (weight[j] / 2)) / weight[j]); + } + } + + if ((new_color8[0][0] == lo_color[0]) && (new_color8[0][1] == lo_color[1]) && (new_color8[0][2] == lo_color[2]) && + (new_color8[1][0] == hi_color[0]) && (new_color8[1][1] == hi_color[1]) && (new_color8[1][2] == hi_color[2])) + { + break; + } + + for (uint i = 0; i < 3; i++) + { + lo_color[i] = new_color8[0][i]; + hi_color[i] = new_color8[1][i]; + } + } + + uint energy[2] = { 0, 0 }; + for (uint i = 0; i < 3; i++) + { + energy[0] += lo_color[i] * lo_color[i]; + energy[1] += hi_color[i] * hi_color[i]; + } + + if (energy[0] > energy[1]) + { + utils::swap(lo_color, hi_color); + } + + lo = lo_color; + hi = hi_color; + } + } // namespace dxt_fast } // namespace crnlib diff --git a/crnlib/crn_dxt_fast.h b/crnlib/crn_dxt_fast.h index 8da2e8a..46e972b 100644 --- a/crnlib/crn_dxt_fast.h +++ b/crnlib/crn_dxt_fast.h @@ -1,20 +1,22 @@ // File: crn_dxt_fast.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once + #include "crn_color.h" #include "crn_dxt.h" #include "crn_export.h" -namespace crnlib { -namespace dxt_fast { - CRN_EXPORT void compress_color_block(uint n, const color_quad_u8* block, uint& low16, uint& high16, uint8* pSelectors, bool refine = false); - CRN_EXPORT void compress_color_block(dxt1_block* pDXT1_block, const color_quad_u8* pBlock, bool refine = false); - - CRN_EXPORT void compress_alpha_block(uint n, const color_quad_u8* block, uint& low8, uint& high8, uint8* pSelectors, uint comp_index); - CRN_EXPORT void compress_alpha_block(dxt5_block* pDXT5_block, const color_quad_u8* pBlock, uint comp_index); - - CRN_EXPORT void find_representative_colors(uint n, const color_quad_u8* pBlock, color_quad_u8& lo, color_quad_u8& hi); +namespace crnlib +{ + namespace dxt_fast + { + CRN_EXPORT void compress_color_block(uint n, const color_quad_u8* block, uint& low16, uint& high16, uint8* pSelectors, bool refine = false); + CRN_EXPORT void compress_color_block(dxt1_block* pDXT1_block, const color_quad_u8* pBlock, bool refine = false); -} // namespace dxt_fast + CRN_EXPORT void compress_alpha_block(uint n, const color_quad_u8* block, uint& low8, uint& high8, uint8* pSelectors, uint comp_index); + CRN_EXPORT void compress_alpha_block(dxt5_block* pDXT5_block, const color_quad_u8* pBlock, uint comp_index); + CRN_EXPORT void find_representative_colors(uint n, const color_quad_u8* pBlock, color_quad_u8& lo, color_quad_u8& hi); + } // namespace dxt_fast } // namespace crnlib diff --git a/crnlib/crn_dxt_hc_common.cpp b/crnlib/crn_dxt_hc_common.cpp index d43bb9f..e7f19bc 100644 --- a/crnlib/crn_dxt_hc_common.cpp +++ b/crnlib/crn_dxt_hc_common.cpp @@ -41,5 +41,4 @@ namespace crnlib {0, 4, 4, 4, 7}, {4, 4, 4, 4, 8} }; - } // namespace crnlib diff --git a/crnlib/crn_dxt_hc_common.h b/crnlib/crn_dxt_hc_common.h index 33e4fd2..6e46ecd 100644 --- a/crnlib/crn_dxt_hc_common.h +++ b/crnlib/crn_dxt_hc_common.h @@ -42,5 +42,4 @@ namespace crnlib const uint cNumChunkTileLayouts = 9; const uint cFirst4x4ChunkTileLayout = 5; CRN_EXPORT extern chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts]; - } // namespace crnlib diff --git a/crnlib/crn_dynamic_stream.h b/crnlib/crn_dynamic_stream.h index 5a3c832..577dbbe 100644 --- a/crnlib/crn_dynamic_stream.h +++ b/crnlib/crn_dynamic_stream.h @@ -1,184 +1,235 @@ // File: crn_dynamic_stream.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once -#include "crn_data_stream.h" +#include "crn_data_stream.h" #include "crn_export.h" -namespace crnlib { -class CRN_EXPORT dynamic_stream : public data_stream { - public: - dynamic_stream(uint initial_size, const char* pName = "dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) - : data_stream(pName, attribs), - m_ofs(0) { - open(initial_size, pName, attribs); - } - - dynamic_stream(const void* pBuf, uint size, const char* pName = "dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) - : data_stream(pName, attribs), - m_ofs(0) { - open(pBuf, size, pName, attribs); - } - - dynamic_stream() - : data_stream(), - m_ofs(0) { - open(); - } - - virtual ~dynamic_stream() { - } - - bool open(uint initial_size = 0, const char* pName = "dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) { - close(); - - m_opened = true; - m_buf.clear(); - m_buf.resize(initial_size); - m_ofs = 0; - m_name.set(pName ? pName : "dynamic_stream"); - m_attribs = static_cast(attribs); - return true; - } - - bool reopen(const char* pName, uint attribs) { - if (!m_opened) { - return open(0, pName, attribs); - } - - m_name.set(pName ? pName : "dynamic_stream"); - m_attribs = static_cast(attribs); - return true; - } - - bool open(const void* pBuf, uint size, const char* pName = "dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) { - if (!m_opened) { - m_opened = true; - m_buf.resize(size); - if (size) { - CRNLIB_ASSERT(pBuf); - memcpy(&m_buf[0], pBuf, size); - } - m_ofs = 0; - m_name.set(pName ? pName : "dynamic_stream"); - m_attribs = static_cast(attribs); - return true; - } - - return false; - } - - virtual bool close() { - if (m_opened) { - m_opened = false; - m_buf.clear(); - m_ofs = 0; - return true; - } - - return false; - } - - const crnlib::vector& get_buf() const { return m_buf; } - crnlib::vector& get_buf() { return m_buf; } - - void reserve(uint size) { - if (m_opened) { - m_buf.reserve(size); - } - } - - virtual const void* get_ptr() const { return m_buf.empty() ? NULL : &m_buf[0]; } - - virtual uint read(void* pBuf, uint len) { - CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); - - if ((!m_opened) || (!is_readable()) || (!len)) - return 0; - - CRNLIB_ASSERT(m_ofs <= m_buf.size()); - - uint bytes_left = m_buf.size() - m_ofs; - - len = math::minimum(len, bytes_left); - - if (len) - memcpy(pBuf, &m_buf[m_ofs], len); - - m_ofs += len; - - return len; - } - - virtual uint write(const void* pBuf, uint len) { - CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); - - if ((!m_opened) || (!is_writable()) || (!len)) - return 0; - - CRNLIB_ASSERT(m_ofs <= m_buf.size()); - - uint new_ofs = m_ofs + len; - if (new_ofs > m_buf.size()) - m_buf.resize(new_ofs); - - memcpy(&m_buf[m_ofs], pBuf, len); - m_ofs = new_ofs; - - return len; - } - - virtual bool flush() { - if (!m_opened) - return false; - - return true; - } - - virtual uint64 get_size() { - if (!m_opened) - return 0; - - return m_buf.size(); - } - - virtual uint64 get_remaining() { - if (!m_opened) - return 0; - - CRNLIB_ASSERT(m_ofs <= m_buf.size()); - - return m_buf.size() - m_ofs; - } - - virtual uint64 get_ofs() { - if (!m_opened) - return 0; - - return m_ofs; - } - - virtual bool seek(int64 ofs, bool relative) { - if ((!m_opened) || (!is_seekable())) - return false; - - int64 new_ofs = relative ? (m_ofs + ofs) : ofs; - - if (new_ofs < 0) - return false; - else if (new_ofs > m_buf.size()) - return false; - - m_ofs = static_cast(new_ofs); +namespace crnlib +{ + class CRN_EXPORT dynamic_stream : public data_stream + { + public: + dynamic_stream(uint initial_size, const char* pName = "dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable): data_stream(pName, attribs), + m_ofs(0) + { + open(initial_size, pName, attribs); + } + + dynamic_stream(const void* pBuf, uint size, const char* pName = "dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) : data_stream(pName, attribs), + m_ofs(0) + { + open(pBuf, size, pName, attribs); + } + + dynamic_stream() : data_stream(), + m_ofs(0) + { + open(); + } + + virtual ~dynamic_stream() + { + } + + bool open(uint initial_size = 0, const char* pName = "dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) + { + close(); + + m_opened = true; + m_buf.clear(); + m_buf.resize(initial_size); + m_ofs = 0; + m_name.set(pName ? pName : "dynamic_stream"); + m_attribs = static_cast(attribs); + return true; + } + + bool reopen(const char* pName, uint attribs) + { + if (!m_opened) + { + return open(0, pName, attribs); + } + + m_name.set(pName ? pName : "dynamic_stream"); + m_attribs = static_cast(attribs); + return true; + } + + bool open(const void* pBuf, uint size, const char* pName = "dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) + { + if (!m_opened) + { + m_opened = true; + m_buf.resize(size); + if (size) + { + CRNLIB_ASSERT(pBuf); + memcpy(&m_buf[0], pBuf, size); + } + m_ofs = 0; + m_name.set(pName ? pName : "dynamic_stream"); + m_attribs = static_cast(attribs); + return true; + } + + return false; + } + + virtual bool close() + { + if (m_opened) + { + m_opened = false; + m_buf.clear(); + m_ofs = 0; + return true; + } + + return false; + } + + const crnlib::vector& get_buf() const + { + return m_buf; + } + crnlib::vector& get_buf() + { + return m_buf; + } + + void reserve(uint size) + { + if (m_opened) + { + m_buf.reserve(size); + } + } + + virtual const void* get_ptr() const + { + return m_buf.empty() ? NULL : &m_buf[0]; + } + + virtual uint read(void* pBuf, uint len) + { + CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); + + if ((!m_opened) || (!is_readable()) || (!len)) + { + return 0; + } + + CRNLIB_ASSERT(m_ofs <= m_buf.size()); + + uint bytes_left = m_buf.size() - m_ofs; + + len = math::minimum(len, bytes_left); + + if (len) + { + memcpy(pBuf, &m_buf[m_ofs], len); + } + + m_ofs += len; + + return len; + } + + virtual uint write(const void* pBuf, uint len) + { + CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); + + if ((!m_opened) || (!is_writable()) || (!len)) + { + return 0; + } + + CRNLIB_ASSERT(m_ofs <= m_buf.size()); + + uint new_ofs = m_ofs + len; + if (new_ofs > m_buf.size()) + { + m_buf.resize(new_ofs); + } + + memcpy(&m_buf[m_ofs], pBuf, len); + m_ofs = new_ofs; + + return len; + } + + virtual bool flush() + { + if (!m_opened) + { + return false; + } + + return true; + } + + virtual uint64 get_size() + { + if (!m_opened) + { + return 0; + } + + return m_buf.size(); + } + + virtual uint64 get_remaining() + { + if (!m_opened) + { + return 0; + } + + CRNLIB_ASSERT(m_ofs <= m_buf.size()); + + return m_buf.size() - m_ofs; + } + + virtual uint64 get_ofs() + { + if (!m_opened) + { + return 0; + } + + return m_ofs; + } + + virtual bool seek(int64 ofs, bool relative) + { + if ((!m_opened) || (!is_seekable())) + { + return false; + } - post_seek(); + int64 new_ofs = relative ? (m_ofs + ofs) : ofs; - return true; - } + if (new_ofs < 0) + { + return false; + } + else if (new_ofs > m_buf.size()) + { + return false; + } - private: - crnlib::vector m_buf; - uint m_ofs; -}; + m_ofs = static_cast(new_ofs); + post_seek(); + + return true; + } + + private: + crnlib::vector m_buf; + uint m_ofs; + }; } // namespace crnlib diff --git a/crnlib/crn_intersect.h b/crnlib/crn_intersect.h index f4d3718..74c3617 100644 --- a/crnlib/crn_intersect.h +++ b/crnlib/crn_intersect.h @@ -1,104 +1,136 @@ // File: crn_intersect.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once + #include "crn_ray.h" -namespace crnlib { -namespace intersection { -enum result { - cBackfacing = -1, - cFailure = 0, - cSuccess, - cParallel, - cInside, -}; - -// Returns cInside, cSuccess, or cFailure. -// Algorithm: Graphics Gems 1 -template -result ray_aabb(vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) { - enum { - cNumDim = vector_type::num_elements, - cRight = 0, - cLeft = 1, - cMiddle = 2 - }; - - bool inside = true; - int quadrant[cNumDim]; - scalar_type candidate_plane[cNumDim]; - - for (int i = 0; i < cNumDim; i++) { - if (ray.get_origin()[i] < box[0][i]) { - quadrant[i] = cLeft; - candidate_plane[i] = box[0][i]; - inside = false; - } else if (ray.get_origin()[i] > box[1][i]) { - quadrant[i] = cRight; - candidate_plane[i] = box[1][i]; - inside = false; - } else { - quadrant[i] = cMiddle; +namespace crnlib +{ + namespace intersection + { + enum result { + cBackfacing = -1, + cFailure = 0, + cSuccess, + cParallel, + cInside, + }; + + // Returns cInside, cSuccess, or cFailure. + // Algorithm: Graphics Gems 1 + template + result ray_aabb(vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) + { + enum { + cNumDim = vector_type::num_elements, + cRight = 0, + cLeft = 1, + cMiddle = 2 + }; + + bool inside = true; + int quadrant[cNumDim]; + scalar_type candidate_plane[cNumDim]; + + for (int i = 0; i < cNumDim; i++) + { + if (ray.get_origin()[i] < box[0][i]) + { + quadrant[i] = cLeft; + candidate_plane[i] = box[0][i]; + inside = false; + } + else if (ray.get_origin()[i] > box[1][i]) + { + quadrant[i] = cRight; + candidate_plane[i] = box[1][i]; + inside = false; + } + else + { + quadrant[i] = cMiddle; + } + } + + if (inside) + { + coord = ray.get_origin(); + t = 0.0f; + return cInside; + } + + scalar_type max_t[cNumDim]; + for (int i = 0; i < cNumDim; i++) + { + if ((quadrant[i] != cMiddle) && (ray.get_direction()[i] != 0.0f)) + { + max_t[i] = (candidate_plane[i] - ray.get_origin()[i]) / ray.get_direction()[i]; + } + else + { + max_t[i] = -1.0f; + } + } + + int which_plane = 0; + for (int i = 1; i < cNumDim; i++) + { + if (max_t[which_plane] < max_t[i]) + { + which_plane = i; + } + } + if (max_t[which_plane] < 0.0f) + { + return cFailure; + } + + for (int i = 0; i < cNumDim; i++) + { + if (i != which_plane) + { + coord[i] = ray.get_origin()[i] + max_t[which_plane] * ray.get_direction()[i]; + + if ((coord[i] < box[0][i]) || (coord[i] > box[1][i])) + { + return cFailure; + } + } + else + { + coord[i] = candidate_plane[i]; + } + + CRNLIB_ASSERT(coord[i] >= box[0][i] && coord[i] <= box[1][i]); + } + + t = max_t[which_plane]; + return cSuccess; + } + + template + result ray_aabb(bool& started_within, vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) + { + if (!box.contains(ray.get_origin())) + { + started_within = false; + return ray_aabb(coord, t, ray, box); + } + + started_within = true; + + float diag_dist = box.diagonal_length() * 1.5f; + ray_type outside_ray(ray.eval(diag_dist), -ray.get_direction()); + + result res(ray_aabb(coord, t, outside_ray, box)); + if (res != cSuccess) + { + return res; + } + + t = math::maximum(0.0f, diag_dist - t); + return cSuccess; + } } - } - - if (inside) { - coord = ray.get_origin(); - t = 0.0f; - return cInside; - } - - scalar_type max_t[cNumDim]; - for (int i = 0; i < cNumDim; i++) { - if ((quadrant[i] != cMiddle) && (ray.get_direction()[i] != 0.0f)) - max_t[i] = (candidate_plane[i] - ray.get_origin()[i]) / ray.get_direction()[i]; - else - max_t[i] = -1.0f; - } - - int which_plane = 0; - for (int i = 1; i < cNumDim; i++) - if (max_t[which_plane] < max_t[i]) - which_plane = i; - - if (max_t[which_plane] < 0.0f) - return cFailure; - - for (int i = 0; i < cNumDim; i++) { - if (i != which_plane) { - coord[i] = ray.get_origin()[i] + max_t[which_plane] * ray.get_direction()[i]; - - if ((coord[i] < box[0][i]) || (coord[i] > box[1][i])) - return cFailure; - } else { - coord[i] = candidate_plane[i]; - } - - CRNLIB_ASSERT(coord[i] >= box[0][i] && coord[i] <= box[1][i]); - } - - t = max_t[which_plane]; - return cSuccess; -} - -template -result ray_aabb(bool& started_within, vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) { - if (!box.contains(ray.get_origin())) { - started_within = false; - return ray_aabb(coord, t, ray, box); - } - - started_within = true; - - float diag_dist = box.diagonal_length() * 1.5f; - ray_type outside_ray(ray.eval(diag_dist), -ray.get_direction()); - - result res(ray_aabb(coord, t, outside_ray, box)); - if (res != cSuccess) - return res; - - t = math::maximum(0.0f, diag_dist - t); - return cSuccess; -} -} } diff --git a/crnlib/crn_math.cpp b/crnlib/crn_math.cpp index 585d98f..36d2603 100644 --- a/crnlib/crn_math.cpp +++ b/crnlib/crn_math.cpp @@ -1,67 +1,87 @@ // File: crn_math.cpp // See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" -namespace crnlib { -namespace math { -uint g_bitmasks[32] = +namespace crnlib +{ + namespace math { - 1U << 0U, 1U << 1U, 1U << 2U, 1U << 3U, - 1U << 4U, 1U << 5U, 1U << 6U, 1U << 7U, - 1U << 8U, 1U << 9U, 1U << 10U, 1U << 11U, - 1U << 12U, 1U << 13U, 1U << 14U, 1U << 15U, - 1U << 16U, 1U << 17U, 1U << 18U, 1U << 19U, - 1U << 20U, 1U << 21U, 1U << 22U, 1U << 23U, - 1U << 24U, 1U << 25U, 1U << 26U, 1U << 27U, - 1U << 28U, 1U << 29U, 1U << 30U, 1U << 31U}; - -double compute_entropy(const uint8* p, uint n) { - uint hist[256]; - utils::zero_object(hist); + uint g_bitmasks[32] = + { + 1U << 0U, 1U << 1U, 1U << 2U, 1U << 3U, + 1U << 4U, 1U << 5U, 1U << 6U, 1U << 7U, + 1U << 8U, 1U << 9U, 1U << 10U, 1U << 11U, + 1U << 12U, 1U << 13U, 1U << 14U, 1U << 15U, + 1U << 16U, 1U << 17U, 1U << 18U, 1U << 19U, + 1U << 20U, 1U << 21U, 1U << 22U, 1U << 23U, + 1U << 24U, 1U << 25U, 1U << 26U, 1U << 27U, + 1U << 28U, 1U << 29U, 1U << 30U, 1U << 31U + }; - for (uint i = 0; i < n; i++) - hist[*p++]++; + double compute_entropy(const uint8* p, uint n) + { + uint hist[256]; + utils::zero_object(hist); - double entropy = 0.0f; + for (uint i = 0; i < n; i++) + { + hist[*p++]++; + } - const double invln2 = 1.0f / log(2.0f); - for (uint i = 0; i < 256; i++) { - if (!hist[i]) - continue; + double entropy = 0.0f; - double prob = static_cast(hist[i]) / n; - entropy += (-log(prob) * invln2) * hist[i]; - } + const double invln2 = 1.0f / log(2.0f); + for (uint i = 0; i < 256; i++) + { + if (!hist[i]) + { + continue; + } - return entropy; -} + double prob = static_cast(hist[i]) / n; + entropy += (-log(prob) * invln2) * hist[i]; + } -void compute_lower_pow2_dim(int& width, int& height) { - const int tex_width = width; - const int tex_height = height; + return entropy; + } - width = 1; - for (;;) { - if ((width * 2) > tex_width) - break; - width *= 2; - } + void compute_lower_pow2_dim(int& width, int& height) + { + const int tex_width = width; + const int tex_height = height; - height = 1; - for (;;) { - if ((height * 2) > tex_height) - break; - height *= 2; - } -} + width = 1; + for (;;) { + if ((width * 2) > tex_width) + { + break; + } + width *= 2; + } -void compute_upper_pow2_dim(int& width, int& height) { - if (!math::is_power_of_2((uint32)width)) - width = math::next_pow2((uint32)width); + height = 1; + for (;;) + { + if ((height * 2) > tex_height) + { + break; + } + height *= 2; + } + } - if (!math::is_power_of_2((uint32)height)) - height = math::next_pow2((uint32)height); -} + void compute_upper_pow2_dim(int& width, int& height) + { + if (!math::is_power_of_2((uint32)width)) + { + width = math::next_pow2((uint32)width); + } -} // namespace math + if (!math::is_power_of_2((uint32)height)) + { + height = math::next_pow2((uint32)height); + } + } + } // namespace math } // namespace crnlib diff --git a/crnlib/crn_math.h b/crnlib/crn_math.h index 8f06856..9f3faec 100644 --- a/crnlib/crn_math.h +++ b/crnlib/crn_math.h @@ -10,273 +10,325 @@ unsigned __int64 __emulu(unsigned int a, unsigned int b); #endif -namespace crnlib { -namespace math { -const float cNearlyInfinite = 1.0e+37f; - -const float cDegToRad = 0.01745329252f; -const float cRadToDeg = 57.29577951f; - -extern uint g_bitmasks[32]; - -template -inline bool within_closed_range(T a, T b, T c) { - return (a >= b) && (a <= c); -} - -template -inline bool within_open_range(T a, T b, T c) { - return (a >= b) && (a < c); -} - -// Yes I know these should probably be pass by ref, not val: -// http://www.stepanovpapers.com/notes.pdf -// Just don't use them on non-simple (non built-in) types! -template -inline T minimum(T a, T b) { - return (a < b) ? a : b; -} - -template -inline T minimum(T a, T b, T c) { - return minimum(minimum(a, b), c); -} - -template -inline T maximum(T a, T b) { - return (a > b) ? a : b; -} - -template -inline T maximum(T a, T b, T c) { - return maximum(maximum(a, b), c); -} - -template -inline T lerp(T a, T b, U c) { - return a + (b - a) * c; -} - -template -inline T clamp(T value, T low, T high) { - return (value < low) ? low : ((value > high) ? high : value); -} - -template -inline T saturate(T value) { - return (value < 0.0f) ? 0.0f : ((value > 1.0f) ? 1.0f : value); -} - -inline int float_to_int(float f) { - return static_cast(f); -} - -inline uint float_to_uint(float f) { - return static_cast(f); -} - -inline int float_to_int(double f) { - return static_cast(f); -} - -inline uint float_to_uint(double f) { - return static_cast(f); -} - -inline int float_to_int_round(float f) { - return static_cast((f < 0.0f) ? -floor(-f + .5f) : floor(f + .5f)); -} - -inline uint float_to_uint_round(float f) { - return static_cast((f < 0.0f) ? 0.0f : floor(f + .5f)); -} - -template -inline int sign(T value) { - return (value < 0) ? -1 : ((value > 0) ? 1 : 0); -} - -template -inline T square(T value) { - return value * value; -} - -inline bool is_power_of_2(uint32 x) { - return x && ((x & (x - 1U)) == 0U); -} -inline bool is_power_of_2(uint64 x) { - return x && ((x & (x - 1U)) == 0U); -} - -template -inline T align_up_value(T x, uint alignment) { - CRNLIB_ASSERT(is_power_of_2(alignment)); - uint q = static_cast(x); - q = (q + alignment - 1) & (~(alignment - 1)); - return static_cast(q); -} - -template -inline T align_down_value(T x, uint alignment) { - CRNLIB_ASSERT(is_power_of_2(alignment)); - uint q = static_cast(x); - q = q & (~(alignment - 1)); - return static_cast(q); -} - -template -inline T get_align_up_value_delta(T x, uint alignment) { - return align_up_value(x, alignment) - x; -} - -// From "Hackers Delight" -inline uint32 next_pow2(uint32 val) { - val--; - val |= val >> 16; - val |= val >> 8; - val |= val >> 4; - val |= val >> 2; - val |= val >> 1; - return val + 1; -} - -inline uint64 next_pow2(uint64 val) { - val--; - val |= val >> 32; - val |= val >> 16; - val |= val >> 8; - val |= val >> 4; - val |= val >> 2; - val |= val >> 1; - return val + 1; -} - -inline uint floor_log2i(uint v) { - uint l = 0; - while (v > 1U) { - v >>= 1; - l++; - } - return l; -} - -inline uint ceil_log2i(uint v) { - uint l = floor_log2i(v); - if ((l != cIntBits) && (v > (1U << l))) - l++; - return l; -} - -// Returns the total number of bits needed to encode v. -inline uint total_bits(uint v) { - uint l = 0; - while (v > 0U) { - v >>= 1; - l++; - } - return l; -} - -// Actually counts the number of set bits, but hey -inline uint bitmask_size(uint mask) { - uint size = 0; - while (mask) { - mask &= (mask - 1U); - size++; - } - return size; -} - -inline uint bitmask_ofs(uint mask) { - if (!mask) - return 0; - uint ofs = 0; - while ((mask & 1U) == 0) { - mask >>= 1U; - ofs++; - } - return ofs; -} - -// See Bit Twiddling Hacks (public domain) -// http://www-graphics.stanford.edu/~seander/bithacks.html -inline uint count_trailing_zero_bits(uint v) { - uint c = 32; // c will be the number of zero bits on the right - - static const unsigned int B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF}; - static const unsigned int S[] = {1, 2, 4, 8, 16}; // Our Magic Binary Numbers - - for (int i = 4; i >= 0; --i) // unroll for more speed - { - if (v & B[i]) { - v <<= S[i]; - c -= S[i]; - } - } - - if (v) { - c--; - } - - return c; -} - -inline uint count_leading_zero_bits(uint v) { - uint temp; - uint result = 32U; - - temp = (v >> 16U); - if (temp) { - result -= 16U; - v = temp; - } - temp = (v >> 8U); - if (temp) { - result -= 8U; - v = temp; - } - temp = (v >> 4U); - if (temp) { - result -= 4U; - v = temp; - } - temp = (v >> 2U); - if (temp) { - result -= 2U; - v = temp; - } - temp = (v >> 1U); - if (temp) { - result -= 1U; - v = temp; - } - - if (v & 1U) - result--; - - return result; -} - -inline uint64 emulu(uint32 a, uint32 b) { +namespace crnlib +{ + namespace math + { + const float cNearlyInfinite = 1.0e+37f; + + const float cDegToRad = 0.01745329252f; + const float cRadToDeg = 57.29577951f; + + extern uint g_bitmasks[32]; + + template + inline bool within_closed_range(T a, T b, T c) + { + return (a >= b) && (a <= c); + } + + template + inline bool within_open_range(T a, T b, T c) + { + return (a >= b) && (a < c); + } + + // Yes I know these should probably be pass by ref, not val: + // http://www.stepanovpapers.com/notes.pdf + // Just don't use them on non-simple (non built-in) types! + template + inline T minimum(T a, T b) + { + return (a < b) ? a : b; + } + + template + inline T minimum(T a, T b, T c) + { + return minimum(minimum(a, b), c); + } + + template + inline T maximum(T a, T b) + { + return (a > b) ? a : b; + } + + template + inline T maximum(T a, T b, T c) + { + return maximum(maximum(a, b), c); + } + + template + inline T lerp(T a, T b, U c) + { + return a + (b - a) * c; + } + + template + inline T clamp(T value, T low, T high) + { + return (value < low) ? low : ((value > high) ? high : value); + } + + template + inline T saturate(T value) + { + return (value < 0.0f) ? 0.0f : ((value > 1.0f) ? 1.0f : value); + } + + inline int float_to_int(float f) + { + return static_cast(f); + } + + inline uint float_to_uint(float f) + { + return static_cast(f); + } + + inline int float_to_int(double f) + { + return static_cast(f); + } + + inline uint float_to_uint(double f) + { + return static_cast(f); + } + + inline int float_to_int_round(float f) + { + return static_cast((f < 0.0f) ? -floor(-f + .5f) : floor(f + .5f)); + } + + inline uint float_to_uint_round(float f) + { + return static_cast((f < 0.0f) ? 0.0f : floor(f + .5f)); + } + + template + inline int sign(T value) + { + return (value < 0) ? -1 : ((value > 0) ? 1 : 0); + } + + template + inline T square(T value) + { + return value * value; + } + + inline bool is_power_of_2(uint32 x) + { + return x && ((x & (x - 1U)) == 0U); + } + inline bool is_power_of_2(uint64 x) + { + return x && ((x & (x - 1U)) == 0U); + } + + template + inline T align_up_value(T x, uint alignment) + { + CRNLIB_ASSERT(is_power_of_2(alignment)); + uint q = static_cast(x); + q = (q + alignment - 1) & (~(alignment - 1)); + return static_cast(q); + } + + template + inline T align_down_value(T x, uint alignment) + { + CRNLIB_ASSERT(is_power_of_2(alignment)); + uint q = static_cast(x); + q = q & (~(alignment - 1)); + return static_cast(q); + } + + template + inline T get_align_up_value_delta(T x, uint alignment) + { + return align_up_value(x, alignment) - x; + } + + // From "Hackers Delight" + inline uint32 next_pow2(uint32 val) + { + val--; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } + + inline uint64 next_pow2(uint64 val) + { + val--; + val |= val >> 32; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } + + inline uint floor_log2i(uint v) + { + uint l = 0; + while (v > 1U) + { + v >>= 1; + l++; + } + return l; + } + + inline uint ceil_log2i(uint v) + { + uint l = floor_log2i(v); + if ((l != cIntBits) && (v > (1U << l))) + { + l++; + } + return l; + } + + // Returns the total number of bits needed to encode v. + inline uint total_bits(uint v) + { + uint l = 0; + while (v > 0U) + { + v >>= 1; + l++; + } + return l; + } + + // Actually counts the number of set bits, but hey + inline uint bitmask_size(uint mask) + { + uint size = 0; + while (mask) + { + mask &= (mask - 1U); + size++; + } + return size; + } + + inline uint bitmask_ofs(uint mask) + { + if (!mask) + { + return 0; + } + uint ofs = 0; + while ((mask & 1U) == 0) + { + mask >>= 1U; + ofs++; + } + return ofs; + } + + // See Bit Twiddling Hacks (public domain) + // http://www-graphics.stanford.edu/~seander/bithacks.html + inline uint count_trailing_zero_bits(uint v) + { + uint c = 32; // c will be the number of zero bits on the right + + static const unsigned int B[] = { 0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF }; + static const unsigned int S[] = { 1, 2, 4, 8, 16 }; // Our Magic Binary Numbers + + for (int i = 4; i >= 0; --i) // unroll for more speed + { + if (v & B[i]) + { + v <<= S[i]; + c -= S[i]; + } + } + + if (v) { + c--; + } + + return c; + } + + inline uint count_leading_zero_bits(uint v) + { + uint temp; + uint result = 32U; + + temp = (v >> 16U); + if (temp) + { + result -= 16U; + v = temp; + } + temp = (v >> 8U); + if (temp) + { + result -= 8U; + v = temp; + } + temp = (v >> 4U); + if (temp) + { + result -= 4U; + v = temp; + } + temp = (v >> 2U); + if (temp) + { + result -= 2U; + v = temp; + } + temp = (v >> 1U); + if (temp) + { + result -= 1U; + v = temp; + } + + if (v & 1U) + { + result--; + } + + return result; + } + + inline uint64 emulu(uint32 a, uint32 b) + { #if defined(_M_IX86) && defined(_MSC_VER) - return __emulu(a, b); + return __emulu(a, b); #else - return static_cast(a) * static_cast(b); + return static_cast(a) * static_cast(b); #endif -} + } -CRN_EXPORT double compute_entropy(const uint8* p, uint n); + CRN_EXPORT double compute_entropy(const uint8* p, uint n); -CRN_EXPORT void compute_lower_pow2_dim(int& width, int& height); -CRN_EXPORT void compute_upper_pow2_dim(int& width, int& height); + CRN_EXPORT void compute_lower_pow2_dim(int& width, int& height); + CRN_EXPORT void compute_upper_pow2_dim(int& width, int& height); -inline bool equal_tol(float a, float b, float t) { - return fabs(a - b) < ((maximum(fabs(a), fabs(b)) + 1.0f) * t); -} + inline bool equal_tol(float a, float b, float t) + { + return fabs(a - b) < ((maximum(fabs(a), fabs(b)) + 1.0f) * t); + } -inline bool equal_tol(double a, double b, double t) { - return fabs(a - b) < ((maximum(fabs(a), fabs(b)) + 1.0f) * t); -} -} + inline bool equal_tol(double a, double b, double t) + { + return fabs(a - b) < ((maximum(fabs(a), fabs(b)) + 1.0f) * t); + } + } } // namespace crnlib diff --git a/crnlib/crn_packed_uint.h b/crnlib/crn_packed_uint.h index 0f3427a..8a055bf 100644 --- a/crnlib/crn_packed_uint.h +++ b/crnlib/crn_packed_uint.h @@ -1,79 +1,116 @@ // File: crn_packed_uint // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once -namespace crnlib { -template -struct packed_uint { - inline packed_uint() {} +namespace crnlib +{ + template + struct packed_uint + { + inline packed_uint() + { + } - inline packed_uint(unsigned int val) { *this = val; } + inline packed_uint(unsigned int val) + { + *this = val; + } - inline packed_uint(const packed_uint& other) { *this = other; } + inline packed_uint(const packed_uint& other) + { + *this = other; + } - inline packed_uint& operator=(const packed_uint& rhs) { - if (this != &rhs) - memcpy(m_buf, rhs.m_buf, sizeof(m_buf)); - return *this; - } + inline packed_uint& operator=(const packed_uint& rhs) + { + if (this != &rhs) + { + memcpy(m_buf, rhs.m_buf, sizeof(m_buf)); + } + return *this; + } - inline packed_uint& operator=(unsigned int val) { + inline packed_uint& operator=(unsigned int val) + { #ifdef CRNLIB_BUILD_DEBUG - if (N == 1) { - CRNLIB_ASSERT(val <= 0xFFU); - } else if (N == 2) { - CRNLIB_ASSERT(val <= 0xFFFFU); - } else if (N == 3) { - CRNLIB_ASSERT(val <= 0xFFFFFFU); - } + if (N == 1) + { + CRNLIB_ASSERT(val <= 0xFFU); + } + else if (N == 2) + { + CRNLIB_ASSERT(val <= 0xFFFFU); + } + else if (N == 3) + { + CRNLIB_ASSERT(val <= 0xFFFFFFU); + } #endif - val <<= (8U * (4U - N)); - - for (unsigned int i = 0; i < N; i++) { - m_buf[i] = static_cast(val >> 24U); - val <<= 8U; - } - - return *this; - } - - inline operator unsigned int() const { - switch (N) { - case 1: - return m_buf[0]; - case 2: - return (m_buf[0] << 8U) | m_buf[1]; - case 3: - return (m_buf[0] << 16U) | (m_buf[1] << 8U) | (m_buf[2]); - default: - return (m_buf[0] << 24U) | (m_buf[1] << 16U) | (m_buf[2] << 8U) | (m_buf[3]); - } - } - - unsigned char m_buf[N]; -}; -template -class packed_value { - public: - packed_value() {} - packed_value(T val) { *this = val; } - - inline operator T() const { - T result = 0; - for (int i = sizeof(T) - 1; i >= 0; i--) - result = static_cast((result << 8) | m_bytes[i]); - return result; - } - packed_value& operator=(T val) { - for (int i = 0; i < sizeof(T); i++) { - m_bytes[i] = static_cast(val); - val >>= 8; - } - return *this; - } - - private: - uint8 m_bytes[sizeof(T)]; -}; + val <<= (8U * (4U - N)); + + for (unsigned int i = 0; i < N; i++) + { + m_buf[i] = static_cast(val >> 24U); + val <<= 8U; + } + + return *this; + } + + inline operator unsigned int() const + { + switch (N) + { + case 1: + return m_buf[0]; + case 2: + return (m_buf[0] << 8U) | m_buf[1]; + case 3: + return (m_buf[0] << 16U) | (m_buf[1] << 8U) | (m_buf[2]); + default: + return (m_buf[0] << 24U) | (m_buf[1] << 16U) | (m_buf[2] << 8U) | (m_buf[3]); + } + } + + unsigned char m_buf[N]; + }; + + template + class packed_value + { + public: + packed_value() + { + } + + packed_value(T val) + { + *this = val; + } + + inline operator T() const + { + T result = 0; + for (int i = sizeof(T) - 1; i >= 0; i--) + { + result = static_cast((result << 8) | m_bytes[i]); + } + return result; + } + + packed_value& operator=(T val) + { + for (int i = 0; i < sizeof(T); i++) + { + m_bytes[i] = static_cast(val); + val >>= 8; + } + return *this; + } + + private: + uint8 m_bytes[sizeof(T)]; + }; } // namespace crnlib diff --git a/crnlib/crn_vector2d.h b/crnlib/crn_vector2d.h index 4da69f7..558e2a4 100644 --- a/crnlib/crn_vector2d.h +++ b/crnlib/crn_vector2d.h @@ -1,137 +1,197 @@ // File: crn_vector2d.h #pragma once -namespace crnlib { -template -class vector2D { - public: - typedef crnlib::vector vector_type; - typedef T value_type; - typedef T& reference; - typedef const T& const_reference; - typedef T* pointer; - typedef const T* const_pointer; - - inline vector2D(uint width = 0, uint height = 0, const T& def = T()) - : m_width(width), - m_height(height), - m_vec(width * height), - m_def(def) { - } - - inline vector2D(const vector2D& other) - : m_width(other.m_width), - m_height(other.m_height), - m_vec(other.m_vec), - m_def(other.m_def) { - } - - inline vector2D& operator=(const vector2D& rhs) { - if (this == &rhs) - return *this; - - m_width = rhs.m_width; - m_height = rhs.m_height; - m_vec = rhs.m_vec; - - return *this; - } - - bool try_resize(uint width, uint height, bool preserve = true) { - if ((width == m_width) && (height == m_height)) - return true; - - vector_type new_vec; - if (!new_vec.try_resize(width * height)) - return false; - - if (preserve) { - const uint nx = math::minimum(width, m_width); - const uint ny = math::minimum(height, m_height); - - for (uint y = 0; y < ny; y++) { - const T* pSrc = &m_vec[y * m_width]; - T* pDst = &new_vec[y * width]; - if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(T)) - memcpy(pDst, pSrc, nx * sizeof(T)); - else { - for (uint x = 0; x < nx; x++) - *pDst++ = *pSrc++; - } - } - } - - m_width = width; - m_height = height; - m_vec.swap(new_vec); - - return true; - } - - void resize(uint width, uint height, bool preserve = true) { - if (!try_resize(width, height, preserve)) { - CRNLIB_FAIL("vector2D::resize: Out of memory"); - } - } - - inline void clear() { - m_vec.clear(); - m_width = 0; - m_height = 0; - } - - inline uint width() const { return m_width; } - inline uint height() const { return m_height; } - inline uint size() const { return m_vec.size(); } - - inline uint size_in_bytes() const { return m_vec.size() * sizeof(T); } - - const vector_type& get_vec() const { return m_vec; } - vector_type& get_vec() { return m_vec; } - - inline const T* get_ptr() const { return m_vec.get_ptr(); } - inline T* get_ptr() { return m_vec.get_ptr(); } - - inline const T& operator[](uint i) const { return m_vec[i]; } - inline T& operator[](uint i) { return m_vec[i]; } - - inline const T& operator()(uint x, uint y) const { - CRNLIB_ASSERT((x < m_width) && (y < m_height)); - return m_vec[x + y * m_width]; - } - - inline T& operator()(uint x, uint y) { - CRNLIB_ASSERT((x < m_width) && (y < m_height)); - return m_vec[x + y * m_width]; - } - - inline const T& at(uint x, uint y) const { - if ((x >= m_width) || (y >= m_height)) - return m_def; - return m_vec[x + y * m_width]; - } - - inline T& at(uint x, uint y) { - if ((x >= m_width) || (y >= m_height)) - return m_def; - return m_vec[x + y * m_width]; - } - - inline void swap(vector2D& other) { - m_vec.swap(other.m_vec); - anvil::swap(m_width, other.m_width); - anvil::swap(m_height, other.m_height); - } - - inline void set_all(const T& x) { - m_vec.set_all(x); - } - - private: - vector_type m_vec; - uint m_width; - uint m_height; - T m_def; -}; +namespace crnlib +{ + template + class vector2D + { + public: + typedef crnlib::vector vector_type; + typedef T value_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + + inline vector2D(uint width = 0, uint height = 0, const T& def = T()): + m_width(width), + m_height(height), + m_vec(width* height), + m_def(def) + { + } + + inline vector2D(const vector2D& other): + m_width(other.m_width), + m_height(other.m_height), + m_vec(other.m_vec), + m_def(other.m_def) + { + } + + inline vector2D& operator=(const vector2D& rhs) + { + if (this == &rhs) + { + return *this; + } + m_width = rhs.m_width; + m_height = rhs.m_height; + m_vec = rhs.m_vec; + + return *this; + } + + bool try_resize(uint width, uint height, bool preserve = true) + { + if ((width == m_width) && (height == m_height)) + { + return true; + } + + vector_type new_vec; + if (!new_vec.try_resize(width * height)) + { + return false; + } + + if (preserve) + { + const uint nx = math::minimum(width, m_width); + const uint ny = math::minimum(height, m_height); + + for (uint y = 0; y < ny; y++) + { + const T* pSrc = &m_vec[y * m_width]; + T* pDst = &new_vec[y * width]; + if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(T)) + { + memcpy(pDst, pSrc, nx * sizeof(T)); + } + else + { + for (uint x = 0; x < nx; x++) + { + *pDst++ = *pSrc++; + } + } + } + } + + m_width = width; + m_height = height; + m_vec.swap(new_vec); + + return true; + } + + void resize(uint width, uint height, bool preserve = true) + { + if (!try_resize(width, height, preserve)) + { + CRNLIB_FAIL("vector2D::resize: Out of memory"); + } + } + + inline void clear() + { + m_vec.clear(); + m_width = 0; + m_height = 0; + } + + inline uint width() const + { + return m_width; + } + inline uint height() const + { + return m_height; + } + inline uint size() const + { + return m_vec.size(); + } + + inline uint size_in_bytes() const + { + return m_vec.size() * sizeof(T); + } + + const vector_type& get_vec() const + { + return m_vec; + } + vector_type& get_vec() + { + return m_vec; + } + + inline const T* get_ptr() const + { + return m_vec.get_ptr(); + } + inline T* get_ptr() + { + return m_vec.get_ptr(); + } + + inline const T& operator[](uint i) const + { + return m_vec[i]; + } + inline T& operator[](uint i) + { + return m_vec[i]; + } + + inline const T& operator()(uint x, uint y) const + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + return m_vec[x + y * m_width]; + } + + inline T& operator()(uint x, uint y) + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + return m_vec[x + y * m_width]; + } + + inline const T& at(uint x, uint y) const + { + if ((x >= m_width) || (y >= m_height)) + { + return m_def; + } + return m_vec[x + y * m_width]; + } + + inline T& at(uint x, uint y) + { + if ((x >= m_width) || (y >= m_height)) + { + return m_def; + } + return m_vec[x + y * m_width]; + } + + inline void swap(vector2D& other) + { + m_vec.swap(other.m_vec); + anvil::swap(m_width, other.m_width); + anvil::swap(m_height, other.m_height); + } + + inline void set_all(const T& x) + { + m_vec.set_all(x); + } + private: + vector_type m_vec; + uint m_width; + uint m_height; + T m_def; + }; } // namespace anvil From c7b0631d79163f8aee09bc465bc22eb245a85136 Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Mon, 7 Sep 2020 03:00:40 -0400 Subject: [PATCH 10/18] Force C++11 --- CMakeLists.txt | 2 +- crnlib/CMakeLists.txt | 1 + crunch/CMakeLists.txt | 1 + examples/example1/CMakeLists.txt | 2 +- examples/example2/CMakeLists.txt | 3 +-- examples/example3/CMakeLists.txt | 3 +-- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index be076b4..d81f53b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.4) project(Crunch2 VERSION "1.2.0") if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/conanbuildinfo.cmake") diff --git a/crnlib/CMakeLists.txt b/crnlib/CMakeLists.txt index 19dc3ec..966bc84 100644 --- a/crnlib/CMakeLists.txt +++ b/crnlib/CMakeLists.txt @@ -159,6 +159,7 @@ else() endif() add_library(crn ${CRNLIB_SRCS}) +set_property(TARGET crn PROPERTY CXX_STANDARD 11) target_include_directories(crn PUBLIC ${CMAKE_CURRENT_BINARY_DIR} diff --git a/crunch/CMakeLists.txt b/crunch/CMakeLists.txt index 8566d52..a7e9e32 100644 --- a/crunch/CMakeLists.txt +++ b/crunch/CMakeLists.txt @@ -7,6 +7,7 @@ set(CRUNCH_SRCS ) add_executable(crunch ${CRUNCH_SRCS}) +set_property(TARGET crunch PROPERTY CXX_STANDARD 11) target_link_libraries(crunch crn) install(TARGETS crunch) diff --git a/examples/example1/CMakeLists.txt b/examples/example1/CMakeLists.txt index c93a0c1..789a449 100644 --- a/examples/example1/CMakeLists.txt +++ b/examples/example1/CMakeLists.txt @@ -4,4 +4,4 @@ set(EXAMPLE1_SRCS add_executable(example1 ${EXAMPLE1_SRCS}) target_link_libraries(example1 crn stb) -set_target_properties(example1 PROPERTIES FOLDER "examples") +set_target_properties(example1 PROPERTIES CXX_STANDARD 11 FOLDER "examples") diff --git a/examples/example2/CMakeLists.txt b/examples/example2/CMakeLists.txt index d1174d6..64cce2c 100644 --- a/examples/example2/CMakeLists.txt +++ b/examples/example2/CMakeLists.txt @@ -6,5 +6,4 @@ set(EXAMPLE2_SRCS add_executable(example2 ${EXAMPLE2_SRCS}) target_link_libraries(example2 crn) - -set_target_properties(example2 PROPERTIES FOLDER "examples") +set_target_properties(example2 PROPERTIES CXX_STANDARD 11 FOLDER "examples") diff --git a/examples/example3/CMakeLists.txt b/examples/example3/CMakeLists.txt index 375f822..0a92c7e 100644 --- a/examples/example3/CMakeLists.txt +++ b/examples/example3/CMakeLists.txt @@ -3,5 +3,4 @@ set(EXAMPLE3_SRCS ) add_executable(example3 ${EXAMPLE3_SRCS}) target_link_libraries(example3 crn stb) - -set_target_properties(example3 PROPERTIES FOLDER "examples") +set_target_properties(example3 PROPERTIES CXX_STANDARD 11 FOLDER "examples") From 9015f64c69af72333da884d9b8955684c7557510 Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Mon, 7 Sep 2020 14:27:39 -0400 Subject: [PATCH 11/18] Format some files --- crnlib/CMakeLists.txt | 4 +- crnlib/crn_comp.h | 1 + crnlib/crn_data_stream_serializer.h | 1095 +++++++++------ crnlib/crn_dxt1.h | 581 ++++---- crnlib/crn_dynamic_string.cpp | 1069 ++++++++------ crnlib/crn_dynamic_string.h | 450 +++--- crnlib/crn_helpers.h | 110 +- crnlib/crn_huffman_codes.cpp | 683 +++++---- crnlib/crn_huffman_codes.h | 13 +- crnlib/crn_image_utils.h | 350 ++--- crnlib/crn_prefix_coding.cpp | 643 +++++---- crnlib/crn_prefix_coding.h | 229 +-- crnlib/crn_radix_sort.h | 598 ++++---- crnlib/crn_strutils.h | 1 + crnlib/crn_tree_clusterizer.h | 1284 +++++++++-------- crnlib/crn_types.h | 93 +- crnlib/crn_value.cpp | 8 +- crnlib/crn_value.h | 2012 +++++++++++++++------------ crnlib/crn_vector.h | 1400 +++++++++++-------- crnlib/crnlib.cpp | 674 +++++---- 20 files changed, 6358 insertions(+), 4940 deletions(-) diff --git a/crnlib/CMakeLists.txt b/crnlib/CMakeLists.txt index 966bc84..1a7d9d8 100644 --- a/crnlib/CMakeLists.txt +++ b/crnlib/CMakeLists.txt @@ -173,8 +173,8 @@ include(GenerateExportHeader) generate_export_header(crn) if(NOT WIN32) - find_package(Threads) - target_link_libraries(crn PUBLIC Threads::Threads) + find_package(Threads) + target_link_libraries(crn PUBLIC Threads::Threads) endif() install(TARGETS crn) diff --git a/crnlib/crn_comp.h b/crnlib/crn_comp.h index b71f6ea..35f4c0a 100644 --- a/crnlib/crn_comp.h +++ b/crnlib/crn_comp.h @@ -1,5 +1,6 @@ // File: crn_comp.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once #include "crn_defs.h" diff --git a/crnlib/crn_data_stream_serializer.h b/crnlib/crn_data_stream_serializer.h index f7fbdb0..eec77a7 100644 --- a/crnlib/crn_data_stream_serializer.h +++ b/crnlib/crn_data_stream_serializer.h @@ -1,496 +1,675 @@ // File: data_stream_serializer.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once + #include "crn_data_stream.h" #include "crn_export.h" -namespace crnlib { -// Defaults to little endian mode. -class CRN_EXPORT data_stream_serializer { - public: - data_stream_serializer() - : m_pStream(NULL), m_little_endian(true) {} - data_stream_serializer(data_stream* pStream) - : m_pStream(pStream), m_little_endian(true) {} - data_stream_serializer(data_stream& stream) - : m_pStream(&stream), m_little_endian(true) {} - data_stream_serializer(const data_stream_serializer& other) - : m_pStream(other.m_pStream), m_little_endian(other.m_little_endian) {} - - data_stream_serializer& operator=(const data_stream_serializer& rhs) { - m_pStream = rhs.m_pStream; - m_little_endian = rhs.m_little_endian; - return *this; - } - - data_stream* get_stream() const { return m_pStream; } - void set_stream(data_stream* pStream) { m_pStream = pStream; } - - const dynamic_string& get_name() const { return m_pStream ? m_pStream->get_name() : g_empty_dynamic_string; } - - bool get_error() { return m_pStream ? m_pStream->get_error() : false; } - - bool get_little_endian() const { return m_little_endian; } - void set_little_endian(bool little_endian) { m_little_endian = little_endian; } - - bool write(const void* pBuf, uint len) { - return m_pStream->write(pBuf, len) == len; - } - - bool read(void* pBuf, uint len) { - return m_pStream->read(pBuf, len) == len; - } - - // size = size of each element, count = number of elements, returns actual count of elements written - uint write(const void* pBuf, uint size, uint count) { - uint actual_size = size * count; - if (!actual_size) - return 0; - uint n = m_pStream->write(pBuf, actual_size); - if (n == actual_size) - return count; - return n / size; - } - - // size = size of each element, count = number of elements, returns actual count of elements read - uint read(void* pBuf, uint size, uint count) { - uint actual_size = size * count; - if (!actual_size) - return 0; - uint n = m_pStream->read(pBuf, actual_size); - if (n == actual_size) - return count; - return n / size; - } - - bool write_chars(const char* pBuf, uint len) { - return write(pBuf, len); - } - - bool read_chars(char* pBuf, uint len) { - return read(pBuf, len); - } - - bool skip(uint len) { - return m_pStream->skip(len) == len; - } - - template - bool write_object(const T& obj) { - if (m_little_endian == c_crnlib_little_endian_platform) - return write(&obj, sizeof(obj)); - else { - uint8 buf[sizeof(T)]; - uint buf_size = sizeof(T); - void* pBuf = buf; - utils::write_obj(obj, pBuf, buf_size, m_little_endian); - - return write(buf, sizeof(T)); - } - } - - template - bool read_object(T& obj) { - if (m_little_endian == c_crnlib_little_endian_platform) - return read(&obj, sizeof(obj)); - else { - uint8 buf[sizeof(T)]; - if (!read(buf, sizeof(T))) - return false; - - uint buf_size = sizeof(T); - const void* pBuf = buf; - utils::read_obj(obj, pBuf, buf_size, m_little_endian); - - return true; - } - } - - template - bool write_value(T value) { - return write_object(value); - } - - template - T read_value(const T& on_error_value = T()) { - T result; - if (!read_object(result)) - result = on_error_value; - return result; - } - - template - bool write_enum(T e) { - int val = static_cast(e); - return write_object(val); - } - - template - T read_enum() { - return static_cast(read_value()); - } - - // Writes uint using a simple variable length code (VLC). - bool write_uint_vlc(uint val) { - do { - uint8 c = static_cast(val) & 0x7F; - if (val <= 0x7F) - c |= 0x80; - - if (!write_value(c)) - return false; - - val >>= 7; - } while (val); - - return true; - } - - // Reads uint using a simple variable length code (VLC). - bool read_uint_vlc(uint& val) { - val = 0; - uint shift = 0; +namespace crnlib +{ + // Defaults to little endian mode. + class CRN_EXPORT data_stream_serializer + { + public: + data_stream_serializer(): + m_pStream(NULL), + m_little_endian(true) + { + } + data_stream_serializer(data_stream* pStream) : + m_pStream(pStream), + m_little_endian(true) + { + } + data_stream_serializer(data_stream& stream): + m_pStream(&stream), + m_little_endian(true) + { + } + data_stream_serializer(const data_stream_serializer& other): + m_pStream(other.m_pStream), + m_little_endian(other.m_little_endian) + { + } - for (;;) { - if (shift >= 32) - return false; + data_stream_serializer& operator=(const data_stream_serializer& rhs) + { + m_pStream = rhs.m_pStream; + m_little_endian = rhs.m_little_endian; + return *this; + } - uint8 c; - if (!read_object(c)) - return false; + data_stream* get_stream() const + { + return m_pStream; + } + void set_stream(data_stream* pStream) + { + m_pStream = pStream; + } - val |= ((c & 0x7F) << shift); - shift += 7; + const dynamic_string& get_name() const + { + return m_pStream ? m_pStream->get_name() : g_empty_dynamic_string; + } - if (c & 0x80) - break; - } + bool get_error() + { + return m_pStream ? m_pStream->get_error() : false; + } + + bool get_little_endian() const + { + return m_little_endian; + } + void set_little_endian(bool little_endian) + { + m_little_endian = little_endian; + } + + bool write(const void* pBuf, uint len) + { + return m_pStream->write(pBuf, len) == len; + } + + bool read(void* pBuf, uint len) + { + return m_pStream->read(pBuf, len) == len; + } + + // size = size of each element, count = number of elements, returns actual count of elements written + uint write(const void* pBuf, uint size, uint count) + { + uint actual_size = size * count; + if (!actual_size) + { + return 0; + } + uint n = m_pStream->write(pBuf, actual_size); + if (n == actual_size) + { + return count; + } + return n / size; + } - return true; - } + // size = size of each element, count = number of elements, returns actual count of elements read + uint read(void* pBuf, uint size, uint count) + { + uint actual_size = size * count; + if (!actual_size) + { + return 0; + } + uint n = m_pStream->read(pBuf, actual_size); + if (n == actual_size) + { + return count; + } + return n / size; + } - bool write_c_str(const char* p) { - uint len = static_cast(strlen(p)); - if (!write_uint_vlc(len)) - return false; + bool write_chars(const char* pBuf, uint len) + { + return write(pBuf, len); + } - return write_chars(p, len); - } + bool read_chars(char* pBuf, uint len) + { + return read(pBuf, len); + } - bool read_c_str(char* pBuf, uint buf_size) { - uint len; - if (!read_uint_vlc(len)) - return false; - if ((len + 1) > buf_size) - return false; + bool skip(uint len) + { + return m_pStream->skip(len) == len; + } - pBuf[len] = '\0'; + template + bool write_object(const T& obj) + { + if (m_little_endian == c_crnlib_little_endian_platform) + { + return write(&obj, sizeof(obj)); + } + else + { + uint8 buf[sizeof(T)]; + uint buf_size = sizeof(T); + void* pBuf = buf; + utils::write_obj(obj, pBuf, buf_size, m_little_endian); + + return write(buf, sizeof(T)); + } + } - return read_chars(pBuf, len); - } + template + bool read_object(T& obj) + { + if (m_little_endian == c_crnlib_little_endian_platform) + { + return read(&obj, sizeof(obj)); + } + else + { + uint8 buf[sizeof(T)]; + if (!read(buf, sizeof(T))) + { + return false; + } + + uint buf_size = sizeof(T); + const void* pBuf = buf; + utils::read_obj(obj, pBuf, buf_size, m_little_endian); + + return true; + } + } - bool write_string(const dynamic_string& str) { - if (!write_uint_vlc(str.get_len())) - return false; + template + bool write_value(T value) + { + return write_object(value); + } - return write_chars(str.get_ptr(), str.get_len()); - } + template + T read_value(const T& on_error_value = T()) + { + T result; + if (!read_object(result)) + { + result = on_error_value; + } + return result; + } - bool read_string(dynamic_string& str) { - uint len; - if (!read_uint_vlc(len)) - return false; + template + bool write_enum(T e) + { + int val = static_cast(e); + return write_object(val); + } - if (!str.set_len(len)) - return false; + template + T read_enum() + { + return static_cast(read_value()); + } - if (len) { - if (!read_chars(str.get_ptr_raw(), len)) - return false; + // Writes uint using a simple variable length code (VLC). + bool write_uint_vlc(uint val) + { + do + { + uint8 c = static_cast(val) & 0x7F; + if (val <= 0x7F) + { + c |= 0x80; + } + + if (!write_value(c)) + { + return false; + } + + val >>= 7; + } + while (val); + + return true; + } - if (memchr(str.get_ptr(), 0, len) != NULL) { - str.truncate(0); - return false; - } - } + // Reads uint using a simple variable length code (VLC). + bool read_uint_vlc(uint& val) + { + val = 0; + uint shift = 0; + + for (;;) + { + if (shift >= 32) + { + return false; + } + + uint8 c; + if (!read_object(c)) + { + return false; + } + + val |= ((c & 0x7F) << shift); + shift += 7; + + if (c & 0x80) + { + break; + } + } + + return true; + } - return true; - } + bool write_c_str(const char* p) + { + uint len = static_cast(strlen(p)); + if (!write_uint_vlc(len)) + { + return false; + } - template - bool write_vector(const T& vec) { - if (!write_uint_vlc(vec.size())) - return false; + return write_chars(p, len); + } - for (uint i = 0; i < vec.size(); i++) { - *this << vec[i]; - if (get_error()) - return false; - } + bool read_c_str(char* pBuf, uint buf_size) + { + uint len; + if (!read_uint_vlc(len)) + { + return false; + } + if ((len + 1) > buf_size) + { + return false; + } + + pBuf[len] = '\0'; + + return read_chars(pBuf, len); + } + + bool write_string(const dynamic_string& str) + { + if (!write_uint_vlc(str.get_len())) + { + return false; + } + + return write_chars(str.get_ptr(), str.get_len()); + } + + bool read_string(dynamic_string& str) + { + uint len; + if (!read_uint_vlc(len)) + { + return false; + } + + if (!str.set_len(len)) + { + return false; + } + + if (len) { + if (!read_chars(str.get_ptr_raw(), len)) + { + return false; + } + + if (memchr(str.get_ptr(), 0, len) != NULL) + { + str.truncate(0); + return false; + } + } + + return true; + } - return true; - }; + template + bool write_vector(const T& vec) + { + if (!write_uint_vlc(vec.size())) + { + return false; + } + + for (uint i = 0; i < vec.size(); i++) + { + *this << vec[i]; + if (get_error()) + { + return false; + } + } + + return true; + }; + + template + bool read_vector(T& vec, uint num_expected = UINT_MAX) + { + uint size; + if (!read_uint_vlc(size)) + { + return false; + } + + if ((size * sizeof(T::value_type)) >= 2U * 1024U * 1024U * 1024U) + { + return false; + } + + if ((num_expected != UINT_MAX) && (size != num_expected)) + { + return false; + } + + vec.resize(size); + for (uint i = 0; i < vec.size(); i++) + { + *this >> vec[i]; + + if (get_error()) + { + return false; + } + } + + return true; + } + + bool read_entire_file(crnlib::vector& buf) + { + return m_pStream->read_array(buf); + } - template - bool read_vector(T& vec, uint num_expected = UINT_MAX) { - uint size; - if (!read_uint_vlc(size)) - return false; + bool write_entire_file(const crnlib::vector& buf) + { + return m_pStream->write_array(buf); + } + + // Got this idea from the Molly Rocket forums. + // fmt may contain the characters "1", "2", or "4". + bool writef(char* fmt, ...) + { + va_list v; + va_start(v, fmt); + + while (*fmt) + { + switch (*fmt++) + { + case '1': + { + const uint8 x = static_cast(va_arg(v, uint)); + if (!write_value(x)) + { + return false; + } + } + case '2': + { + const uint16 x = static_cast(va_arg(v, uint)); + if (!write_value(x)) + { + return false; + } + } + case '4': + { + const uint32 x = static_cast(va_arg(v, uint)); + if (!write_value(x)) + { + return false; + } + } + case ' ': + case ',': + { + break; + } + default: + { + CRNLIB_ASSERT(0); + return false; + } + } + } + + va_end(v); + return true; + } - if ((size * sizeof(T::value_type)) >= 2U * 1024U * 1024U * 1024U) - return false; + // Got this idea from the Molly Rocket forums. + // fmt may contain the characters "1", "2", or "4". + bool readf(char* fmt, ...) + { + va_list v; + va_start(v, fmt); + + while (*fmt) + { + switch (*fmt++) + { + case '1': + { + uint8* x = va_arg(v, uint8*); + CRNLIB_ASSERT(x); + if (!read_object(*x)) + { + return false; + } + } + case '2': + { + uint16* x = va_arg(v, uint16*); + CRNLIB_ASSERT(x); + if (!read_object(*x)) + { + return false; + } + } + case '4': + { + uint32* x = va_arg(v, uint32*); + CRNLIB_ASSERT(x); + if (!read_object(*x)) + { + return false; + } + } + case ' ': + case ',': + { + break; + } + default: + { + CRNLIB_ASSERT(0); + return false; + } + } + } + + va_end(v); + return true; + } + + private: + data_stream* m_pStream; + + bool m_little_endian; + }; + + // Write operators + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, bool val) + { + serializer.write_value(val); + return serializer; + } + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, int8 val) + { + serializer.write_value(val); + return serializer; + } + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, uint8 val) + { + serializer.write_value(val); + return serializer; + } + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, int16 val) + { + serializer.write_value(val); + return serializer; + } + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, uint16 val) + { + serializer.write_value(val); + return serializer; + } + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, int32 val) + { + serializer.write_value(val); + return serializer; + } + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, uint32 val) + { + serializer.write_uint_vlc(val); + return serializer; + } + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, int64 val) + { + serializer.write_value(val); + return serializer; + } + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, uint64 val) + { + serializer.write_value(val); + return serializer; + } + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, long val) + { + serializer.write_value(val); + return serializer; + } + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, unsigned long val) + { + serializer.write_value(val); + return serializer; + } + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, float val) + { + serializer.write_value(val); + return serializer; + } + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, double val) + { + serializer.write_value(val); + return serializer; + } + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, const char* p) + { + serializer.write_c_str(p); + return serializer; + } - if ((num_expected != UINT_MAX) && (size != num_expected)) - return false; + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, const dynamic_string& str) + { + serializer.write_string(str); + return serializer; + } - vec.resize(size); - for (uint i = 0; i < vec.size(); i++) { - *this >> vec[i]; + template + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, const crnlib::vector& vec) + { + serializer.write_vector(vec); + return serializer; + } - if (get_error()) - return false; + template + inline data_stream_serializer& operator<<(data_stream_serializer& serializer, const T* p) + { + serializer.write_object(*p); + return serializer; } - return true; - } + // Read operators + inline data_stream_serializer& operator>>(data_stream_serializer& serializer, bool& val) + { + serializer.read_object(val); + return serializer; + } + inline data_stream_serializer& operator>>(data_stream_serializer& serializer, int8& val) + { + serializer.read_object(val); + return serializer; + } + inline data_stream_serializer& operator>>(data_stream_serializer& serializer, uint8& val) + { + serializer.read_object(val); + return serializer; + } + inline data_stream_serializer& operator>>(data_stream_serializer& serializer, int16& val) + { + serializer.read_object(val); + return serializer; + } + inline data_stream_serializer& operator>>(data_stream_serializer& serializer, uint16& val) + { + serializer.read_object(val); + return serializer; + } + inline data_stream_serializer& operator>>(data_stream_serializer& serializer, int32& val) + { + serializer.read_object(val); + return serializer; + } + inline data_stream_serializer& operator>>(data_stream_serializer& serializer, uint32& val) + { + serializer.read_uint_vlc(val); + return serializer; + } + inline data_stream_serializer& operator>>(data_stream_serializer& serializer, int64& val) + { + serializer.read_object(val); + return serializer; + } + inline data_stream_serializer& operator>>(data_stream_serializer& serializer, uint64& val) + { + serializer.read_object(val); + return serializer; + } + inline data_stream_serializer& operator>>(data_stream_serializer& serializer, long& val) + { + serializer.read_object(val); + return serializer; + } + inline data_stream_serializer& operator>>(data_stream_serializer& serializer, unsigned long& val) + { + serializer.read_object(val); + return serializer; + } + inline data_stream_serializer& operator>>(data_stream_serializer& serializer, float& val) + { + serializer.read_object(val); + return serializer; + } + inline data_stream_serializer& operator>>(data_stream_serializer& serializer, double& val) + { + serializer.read_object(val); + return serializer; + } - bool read_entire_file(crnlib::vector& buf) { - return m_pStream->read_array(buf); - } + inline data_stream_serializer& operator>>(data_stream_serializer& serializer, dynamic_string& str) + { + serializer.read_string(str); + return serializer; + } - bool write_entire_file(const crnlib::vector& buf) { - return m_pStream->write_array(buf); - } + template + inline data_stream_serializer& operator>>(data_stream_serializer& serializer, crnlib::vector& vec) + { + serializer.read_vector(vec); + return serializer; + } - // Got this idea from the Molly Rocket forums. - // fmt may contain the characters "1", "2", or "4". - bool writef(char* fmt, ...) { - va_list v; - va_start(v, fmt); - - while (*fmt) { - switch (*fmt++) { - case '1': { - const uint8 x = static_cast(va_arg(v, uint)); - if (!write_value(x)) - return false; - } - case '2': { - const uint16 x = static_cast(va_arg(v, uint)); - if (!write_value(x)) - return false; - } - case '4': { - const uint32 x = static_cast(va_arg(v, uint)); - if (!write_value(x)) - return false; - } - case ' ': - case ',': { - break; - } - default: { - CRNLIB_ASSERT(0); - return false; - } - } - } - - va_end(v); - return true; - } - - // Got this idea from the Molly Rocket forums. - // fmt may contain the characters "1", "2", or "4". - bool readf(char* fmt, ...) { - va_list v; - va_start(v, fmt); - - while (*fmt) { - switch (*fmt++) { - case '1': { - uint8* x = va_arg(v, uint8*); - CRNLIB_ASSERT(x); - if (!read_object(*x)) - return false; - } - case '2': { - uint16* x = va_arg(v, uint16*); - CRNLIB_ASSERT(x); - if (!read_object(*x)) - return false; - } - case '4': { - uint32* x = va_arg(v, uint32*); - CRNLIB_ASSERT(x); - if (!read_object(*x)) - return false; - } - case ' ': - case ',': { - break; - } - default: { - CRNLIB_ASSERT(0); - return false; - } - } - } - - va_end(v); - return true; - } - - private: - data_stream* m_pStream; - - bool m_little_endian; -}; - -// Write operators -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, bool val) { - serializer.write_value(val); - return serializer; -} -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, int8 val) { - serializer.write_value(val); - return serializer; -} -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, uint8 val) { - serializer.write_value(val); - return serializer; -} -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, int16 val) { - serializer.write_value(val); - return serializer; -} -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, uint16 val) { - serializer.write_value(val); - return serializer; -} -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, int32 val) { - serializer.write_value(val); - return serializer; -} -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, uint32 val) { - serializer.write_uint_vlc(val); - return serializer; -} -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, int64 val) { - serializer.write_value(val); - return serializer; -} -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, uint64 val) { - serializer.write_value(val); - return serializer; -} -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, long val) { - serializer.write_value(val); - return serializer; -} -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, unsigned long val) { - serializer.write_value(val); - return serializer; -} -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, float val) { - serializer.write_value(val); - return serializer; -} -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, double val) { - serializer.write_value(val); - return serializer; -} -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, const char* p) { - serializer.write_c_str(p); - return serializer; -} - -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, const dynamic_string& str) { - serializer.write_string(str); - return serializer; -} - -template -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, const crnlib::vector& vec) { - serializer.write_vector(vec); - return serializer; -} - -template -inline data_stream_serializer& operator<<(data_stream_serializer& serializer, const T* p) { - serializer.write_object(*p); - return serializer; -} - -// Read operators -inline data_stream_serializer& operator>>(data_stream_serializer& serializer, bool& val) { - serializer.read_object(val); - return serializer; -} -inline data_stream_serializer& operator>>(data_stream_serializer& serializer, int8& val) { - serializer.read_object(val); - return serializer; -} -inline data_stream_serializer& operator>>(data_stream_serializer& serializer, uint8& val) { - serializer.read_object(val); - return serializer; -} -inline data_stream_serializer& operator>>(data_stream_serializer& serializer, int16& val) { - serializer.read_object(val); - return serializer; -} -inline data_stream_serializer& operator>>(data_stream_serializer& serializer, uint16& val) { - serializer.read_object(val); - return serializer; -} -inline data_stream_serializer& operator>>(data_stream_serializer& serializer, int32& val) { - serializer.read_object(val); - return serializer; -} -inline data_stream_serializer& operator>>(data_stream_serializer& serializer, uint32& val) { - serializer.read_uint_vlc(val); - return serializer; -} -inline data_stream_serializer& operator>>(data_stream_serializer& serializer, int64& val) { - serializer.read_object(val); - return serializer; -} -inline data_stream_serializer& operator>>(data_stream_serializer& serializer, uint64& val) { - serializer.read_object(val); - return serializer; -} -inline data_stream_serializer& operator>>(data_stream_serializer& serializer, long& val) { - serializer.read_object(val); - return serializer; -} -inline data_stream_serializer& operator>>(data_stream_serializer& serializer, unsigned long& val) { - serializer.read_object(val); - return serializer; -} -inline data_stream_serializer& operator>>(data_stream_serializer& serializer, float& val) { - serializer.read_object(val); - return serializer; -} -inline data_stream_serializer& operator>>(data_stream_serializer& serializer, double& val) { - serializer.read_object(val); - return serializer; -} - -inline data_stream_serializer& operator>>(data_stream_serializer& serializer, dynamic_string& str) { - serializer.read_string(str); - return serializer; -} - -template -inline data_stream_serializer& operator>>(data_stream_serializer& serializer, crnlib::vector& vec) { - serializer.read_vector(vec); - return serializer; -} - -template -inline data_stream_serializer& operator>>(data_stream_serializer& serializer, T* p) { - serializer.read_object(*p); - return serializer; -} + template + inline data_stream_serializer& operator>>(data_stream_serializer& serializer, T* p) + { + serializer.read_object(*p); + return serializer; + } } // namespace crnlib diff --git a/crnlib/crn_dxt1.h b/crnlib/crn_dxt1.h index 15a4265..79b81ed 100644 --- a/crnlib/crn_dxt1.h +++ b/crnlib/crn_dxt1.h @@ -5,274 +5,323 @@ #include "crn_dxt.h" #include "crn_export.h" -namespace crnlib { -struct CRN_EXPORT dxt1_solution_coordinates { - inline dxt1_solution_coordinates() - : m_low_color(0), m_high_color(0) {} - - inline dxt1_solution_coordinates(uint16 l, uint16 h) - : m_low_color(l), m_high_color(h) {} - - inline dxt1_solution_coordinates(const color_quad_u8& l, const color_quad_u8& h, bool scaled = true) - : m_low_color(dxt1_block::pack_color(l, scaled)), - m_high_color(dxt1_block::pack_color(h, scaled)) { - } - - inline dxt1_solution_coordinates(vec3F nl, vec3F nh) { +namespace crnlib +{ + struct CRN_EXPORT dxt1_solution_coordinates + { + inline dxt1_solution_coordinates(): + m_low_color(0), + m_high_color(0) + { + } + + inline dxt1_solution_coordinates(uint16 l, uint16 h): + m_low_color(l), + m_high_color(h) + { + } + + inline dxt1_solution_coordinates(const color_quad_u8& l, const color_quad_u8& h, bool scaled = true): + m_low_color(dxt1_block::pack_color(l, scaled)), + m_high_color(dxt1_block::pack_color(h, scaled)) + { + } + + inline dxt1_solution_coordinates(vec3F nl, vec3F nh) + { #if CRNLIB_DXT_ALT_ROUNDING - // Umm, wtf? - nl.clamp(0.0f, .999f); - nh.clamp(0.0f, .999f); - color_quad_u8 l((int)floor(nl[0] * 32.0f), (int)floor(nl[1] * 64.0f), (int)floor(nl[2] * 32.0f), 255); - color_quad_u8 h((int)floor(nh[0] * 32.0f), (int)floor(nh[1] * 64.0f), (int)floor(nh[2] * 32.0f), 255); + // Umm, wtf? + nl.clamp(0.0f, .999f); + nh.clamp(0.0f, .999f); + color_quad_u8 l((int)floor(nl[0] * 32.0f), (int)floor(nl[1] * 64.0f), (int)floor(nl[2] * 32.0f), 255); + color_quad_u8 h((int)floor(nh[0] * 32.0f), (int)floor(nh[1] * 64.0f), (int)floor(nh[2] * 32.0f), 255); #else - // Fixes the bins - color_quad_u8 l((int)floor(.5f + nl[0] * 31.0f), (int)floor(.5f + nl[1] * 63.0f), (int)floor(.5f + nl[2] * 31.0f), 255); - color_quad_u8 h((int)floor(.5f + nh[0] * 31.0f), (int)floor(.5f + nh[1] * 63.0f), (int)floor(.5f + nh[2] * 31.0f), 255); + // Fixes the bins + color_quad_u8 l((int)floor(.5f + nl[0] * 31.0f), (int)floor(.5f + nl[1] * 63.0f), (int)floor(.5f + nl[2] * 31.0f), 255); + color_quad_u8 h((int)floor(.5f + nh[0] * 31.0f), (int)floor(.5f + nh[1] * 63.0f), (int)floor(.5f + nh[2] * 31.0f), 255); #endif - m_low_color = dxt1_block::pack_color(l, false); - m_high_color = dxt1_block::pack_color(h, false); - } - - uint16 m_low_color; - uint16 m_high_color; - - inline void clear() { - m_low_color = 0; - m_high_color = 0; - } - - inline dxt1_solution_coordinates& canonicalize() { - if (m_low_color < m_high_color) - utils::swap(m_low_color, m_high_color); - return *this; - } - - inline operator size_t() const { return fast_hash(this, sizeof(*this)); } - - inline bool operator==(const dxt1_solution_coordinates& other) const { - uint16 l0 = math::minimum(m_low_color, m_high_color); - uint16 h0 = math::maximum(m_low_color, m_high_color); - - uint16 l1 = math::minimum(other.m_low_color, other.m_high_color); - uint16 h1 = math::maximum(other.m_low_color, other.m_high_color); - - return (l0 == l1) && (h0 == h1); - } - - inline bool operator!=(const dxt1_solution_coordinates& other) const { - return !(*this == other); - } - - inline bool operator<(const dxt1_solution_coordinates& other) const { - uint16 l0 = math::minimum(m_low_color, m_high_color); - uint16 h0 = math::maximum(m_low_color, m_high_color); - - uint16 l1 = math::minimum(other.m_low_color, other.m_high_color); - uint16 h1 = math::maximum(other.m_low_color, other.m_high_color); - - if (l0 < l1) - return true; - else if (l0 == l1) { - if (h0 < h1) - return true; - } - - return false; - } -}; - -typedef crnlib::vector dxt1_solution_coordinates_vec; - -CRNLIB_DEFINE_BITWISE_COPYABLE(dxt1_solution_coordinates); - -struct CRN_EXPORT unique_color { - inline unique_color() {} - inline unique_color(const color_quad_u8& color, uint weight) - : m_color(color), m_weight(weight) {} - - color_quad_u8 m_color; - uint m_weight; - - inline bool operator<(const unique_color& c) const { - return *reinterpret_cast(&m_color) < *reinterpret_cast(&c.m_color); - } - - inline bool operator==(const unique_color& c) const { - return *reinterpret_cast(&m_color) == *reinterpret_cast(&c.m_color); - } -}; - -CRNLIB_DEFINE_BITWISE_COPYABLE(unique_color); - -class CRN_EXPORT dxt1_endpoint_optimizer { - public: - dxt1_endpoint_optimizer(); - - struct params { - params() - : m_block_index(0), - m_pPixels(NULL), - m_num_pixels(0), - m_dxt1a_alpha_threshold(128U), - m_quality(cCRNDXTQualityUber), - m_pixels_have_alpha(false), - m_use_alpha_blocks(true), - m_perceptual(true), - m_grayscale_sampling(false), - m_endpoint_caching(true), - m_use_transparent_indices_for_black(false), - m_force_alpha_blocks(false) { - } - - uint m_block_index; - - const color_quad_u8* m_pPixels; - uint m_num_pixels; - uint m_dxt1a_alpha_threshold; - - crn_dxt_quality m_quality; - - bool m_pixels_have_alpha; - bool m_use_alpha_blocks; - bool m_perceptual; - bool m_grayscale_sampling; - bool m_endpoint_caching; - bool m_use_transparent_indices_for_black; - bool m_force_alpha_blocks; - }; - - struct results { - inline results() - : m_pSelectors(NULL) {} - - uint64 m_error; - - uint16 m_low_color; - uint16 m_high_color; - - uint8* m_pSelectors; - bool m_alpha_block; - bool m_reordered; - bool m_alternate_rounding; - bool m_enforce_selector; - uint8 m_enforced_selector; - }; - - bool compute(const params& p, results& r); - - private: - const params* m_pParams; - results* m_pResults; - - bool m_perceptual; - bool m_evaluate_hc; - - typedef crnlib::vector unique_color_vec; - - //typedef crnlib::hash_map > unique_color_hash_map; - typedef crnlib::hash_map unique_color_hash_map; - unique_color_hash_map m_unique_color_hash_map; - - unique_color_vec m_unique_colors; // excludes transparent colors! - unique_color_vec m_evaluated_colors; - unique_color_vec m_temp_unique_colors; - - struct { - uint64 low, high; - } m_rDist[32], m_gDist[64], m_bDist[32]; - - uint m_total_unique_color_weight; - - bool m_has_transparent_pixels; - - vec3F_array m_norm_unique_colors; - vec3F m_mean_norm_color; - - vec3F_array m_norm_unique_colors_weighted; - vec3F m_mean_norm_color_weighted; - - vec3F m_principle_axis; - - crnlib::vector m_unique_packed_colors; - crnlib::vector m_trial_selectors; - - crnlib::vector m_low_coords; - crnlib::vector m_high_coords; - - enum { cMaxPrevResults = 4 }; - dxt1_solution_coordinates m_prev_results[cMaxPrevResults]; - uint m_num_prev_results; - - crnlib::vector m_lo_cells; - crnlib::vector m_hi_cells; - - struct potential_solution { - potential_solution() - : m_coords(), m_error(cUINT64_MAX), m_alpha_block(false) { - } - - dxt1_solution_coordinates m_coords; - crnlib::vector m_selectors; - uint64 m_error; - bool m_alpha_block; - bool m_alternate_rounding; - bool m_enforce_selector; - uint8 m_enforced_selector; - - void clear() { - m_coords.clear(); - m_selectors.resize(0); - m_error = cUINT64_MAX; - m_alpha_block = false; - } - - bool are_selectors_all_equal() const { - if (m_selectors.empty()) - return false; - const uint s = m_selectors[0]; - for (uint i = 1; i < m_selectors.size(); i++) - if (m_selectors[i] != s) - return false; - return true; - } - }; - - potential_solution m_trial_solution; - potential_solution m_best_solution; - - typedef crnlib::hash_map solution_hash_map; - solution_hash_map m_solutions_tried; - - bool refine_solution(int refinement_level = 0); - - bool evaluate_solution(const dxt1_solution_coordinates& coords, bool alternate_rounding = false); - bool evaluate_solution_uber(const dxt1_solution_coordinates& coords, bool alternate_rounding); - bool evaluate_solution_fast(const dxt1_solution_coordinates& coords, bool alternate_rounding); - bool evaluate_solution_hc_perceptual(const dxt1_solution_coordinates& coords, bool alternate_rounding); - bool evaluate_solution_hc_uniform(const dxt1_solution_coordinates& coords, bool alternate_rounding); - void compute_selectors(); - void compute_selectors_hc(); - - void find_unique_colors(); - void handle_multicolor_block(); - void compute_pca(vec3F& axis, const vec3F_array& norm_colors, const vec3F& def); - void compute_vectors(const vec3F& perceptual_weights); - void return_solution(); - void try_combinatorial_encoding(); - void compute_endpoint_component_errors(uint comp_index, uint64 (&error)[4][256], uint64 (&best_remaining_error)[4]); - void optimize_endpoint_comps(); - void optimize_endpoints(vec3F& low_color, vec3F& high_color); - bool try_alpha_as_black_optimization(); - bool try_average_block_as_solid(); - bool try_median4(const vec3F& low_color, const vec3F& high_color); - - void compute_internal(const params& p, results& r); - - unique_color lerp_color(const color_quad_u8& a, const color_quad_u8& b, float f, int rounding = 1); - - inline uint color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha); -}; - + m_low_color = dxt1_block::pack_color(l, false); + m_high_color = dxt1_block::pack_color(h, false); + } + + uint16 m_low_color; + uint16 m_high_color; + + inline void clear() + { + m_low_color = 0; + m_high_color = 0; + } + + inline dxt1_solution_coordinates& canonicalize() + { + if (m_low_color < m_high_color) + { + utils::swap(m_low_color, m_high_color); + } + return *this; + } + + inline operator size_t() const + { + return fast_hash(this, sizeof(*this)); + } + + inline bool operator==(const dxt1_solution_coordinates& other) const + { + uint16 l0 = math::minimum(m_low_color, m_high_color); + uint16 h0 = math::maximum(m_low_color, m_high_color); + + uint16 l1 = math::minimum(other.m_low_color, other.m_high_color); + uint16 h1 = math::maximum(other.m_low_color, other.m_high_color); + + return (l0 == l1) && (h0 == h1); + } + + inline bool operator!=(const dxt1_solution_coordinates& other) const + { + return !(*this == other); + } + + inline bool operator<(const dxt1_solution_coordinates& other) const + { + uint16 l0 = math::minimum(m_low_color, m_high_color); + uint16 h0 = math::maximum(m_low_color, m_high_color); + + uint16 l1 = math::minimum(other.m_low_color, other.m_high_color); + uint16 h1 = math::maximum(other.m_low_color, other.m_high_color); + + if (l0 < l1) + { + return true; + } + else if (l0 == l1) + { + if (h0 < h1) + { + return true; + } + } + + return false; + } + }; + + typedef crnlib::vector dxt1_solution_coordinates_vec; + + CRNLIB_DEFINE_BITWISE_COPYABLE(dxt1_solution_coordinates); + + struct CRN_EXPORT unique_color + { + inline unique_color() + { + } + inline unique_color(const color_quad_u8& color, uint weight): + m_color(color), + m_weight(weight) + { + } + + color_quad_u8 m_color; + uint m_weight; + + inline bool operator<(const unique_color& c) const + { + return *reinterpret_cast(&m_color) < *reinterpret_cast(&c.m_color); + } + + inline bool operator==(const unique_color& c) const + { + return *reinterpret_cast(&m_color) == *reinterpret_cast(&c.m_color); + } + }; + + CRNLIB_DEFINE_BITWISE_COPYABLE(unique_color); + + class CRN_EXPORT dxt1_endpoint_optimizer + { + public: + dxt1_endpoint_optimizer(); + + struct params + { + params(): + m_block_index(0), + m_pPixels(NULL), + m_num_pixels(0), + m_dxt1a_alpha_threshold(128U), + m_quality(cCRNDXTQualityUber), + m_pixels_have_alpha(false), + m_use_alpha_blocks(true), + m_perceptual(true), + m_grayscale_sampling(false), + m_endpoint_caching(true), + m_use_transparent_indices_for_black(false), + m_force_alpha_blocks(false) + { + } + + uint m_block_index; + + const color_quad_u8* m_pPixels; + uint m_num_pixels; + uint m_dxt1a_alpha_threshold; + + crn_dxt_quality m_quality; + + bool m_pixels_have_alpha; + bool m_use_alpha_blocks; + bool m_perceptual; + bool m_grayscale_sampling; + bool m_endpoint_caching; + bool m_use_transparent_indices_for_black; + bool m_force_alpha_blocks; + }; + + struct results + { + inline results(): m_pSelectors(NULL) + { + } + + uint64 m_error; + + uint16 m_low_color; + uint16 m_high_color; + + uint8* m_pSelectors; + bool m_alpha_block; + bool m_reordered; + bool m_alternate_rounding; + bool m_enforce_selector; + uint8 m_enforced_selector; + }; + + bool compute(const params& p, results& r); + + private: + const params* m_pParams; + results* m_pResults; + + bool m_perceptual; + bool m_evaluate_hc; + + typedef crnlib::vector unique_color_vec; + + //typedef crnlib::hash_map > unique_color_hash_map; + typedef crnlib::hash_map unique_color_hash_map; + unique_color_hash_map m_unique_color_hash_map; + + unique_color_vec m_unique_colors; // excludes transparent colors! + unique_color_vec m_evaluated_colors; + unique_color_vec m_temp_unique_colors; + + struct { + uint64 low, high; + } m_rDist[32], m_gDist[64], m_bDist[32]; + + uint m_total_unique_color_weight; + + bool m_has_transparent_pixels; + + vec3F_array m_norm_unique_colors; + vec3F m_mean_norm_color; + + vec3F_array m_norm_unique_colors_weighted; + vec3F m_mean_norm_color_weighted; + + vec3F m_principle_axis; + + crnlib::vector m_unique_packed_colors; + crnlib::vector m_trial_selectors; + + crnlib::vector m_low_coords; + crnlib::vector m_high_coords; + + enum { cMaxPrevResults = 4 }; + dxt1_solution_coordinates m_prev_results[cMaxPrevResults]; + uint m_num_prev_results; + + crnlib::vector m_lo_cells; + crnlib::vector m_hi_cells; + + struct potential_solution + { + potential_solution(): + m_coords(), + m_error(cUINT64_MAX), + m_alpha_block(false) + { + } + + dxt1_solution_coordinates m_coords; + crnlib::vector m_selectors; + uint64 m_error; + bool m_alpha_block; + bool m_alternate_rounding; + bool m_enforce_selector; + uint8 m_enforced_selector; + + void clear() + { + m_coords.clear(); + m_selectors.resize(0); + m_error = cUINT64_MAX; + m_alpha_block = false; + } + + bool are_selectors_all_equal() const + { + if (m_selectors.empty()) + { + return false; + } + const uint s = m_selectors[0]; + for (uint i = 1; i < m_selectors.size(); i++) + { + if (m_selectors[i] != s) + { + return false; + } + } + return true; + } + }; + + potential_solution m_trial_solution; + potential_solution m_best_solution; + + typedef crnlib::hash_map solution_hash_map; + solution_hash_map m_solutions_tried; + + bool refine_solution(int refinement_level = 0); + + bool evaluate_solution(const dxt1_solution_coordinates& coords, bool alternate_rounding = false); + bool evaluate_solution_uber(const dxt1_solution_coordinates& coords, bool alternate_rounding); + bool evaluate_solution_fast(const dxt1_solution_coordinates& coords, bool alternate_rounding); + bool evaluate_solution_hc_perceptual(const dxt1_solution_coordinates& coords, bool alternate_rounding); + bool evaluate_solution_hc_uniform(const dxt1_solution_coordinates& coords, bool alternate_rounding); + void compute_selectors(); + void compute_selectors_hc(); + + void find_unique_colors(); + void handle_multicolor_block(); + void compute_pca(vec3F& axis, const vec3F_array& norm_colors, const vec3F& def); + void compute_vectors(const vec3F& perceptual_weights); + void return_solution(); + void try_combinatorial_encoding(); + void compute_endpoint_component_errors(uint comp_index, uint64(&error)[4][256], uint64(&best_remaining_error)[4]); + void optimize_endpoint_comps(); + void optimize_endpoints(vec3F& low_color, vec3F& high_color); + bool try_alpha_as_black_optimization(); + bool try_average_block_as_solid(); + bool try_median4(const vec3F& low_color, const vec3F& high_color); + + void compute_internal(const params& p, results& r); + + unique_color lerp_color(const color_quad_u8& a, const color_quad_u8& b, float f, int rounding = 1); + + inline uint color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha); + }; } // namespace crnlib diff --git a/crnlib/crn_dynamic_string.cpp b/crnlib/crn_dynamic_string.cpp index 5464d78..1ba8621 100644 --- a/crnlib/crn_dynamic_string.cpp +++ b/crnlib/crn_dynamic_string.cpp @@ -1,583 +1,752 @@ // File: crn_dynamic_string.cpp // See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" #include "crn_strutils.h" -namespace crnlib { -dynamic_string g_empty_dynamic_string; - -dynamic_string::dynamic_string(eVarArg, const char* p, ...) - : m_buf_size(0), m_len(0), m_pStr(NULL) { - CRNLIB_ASSERT(p); - - va_list args; - va_start(args, p); - format_args(p, args); - va_end(args); -} - -dynamic_string::dynamic_string(const char* p) - : m_buf_size(0), m_len(0), m_pStr(NULL) { - CRNLIB_ASSERT(p); - set(p); -} - -dynamic_string::dynamic_string(const char* p, uint len) - : m_buf_size(0), m_len(0), m_pStr(NULL) { - CRNLIB_ASSERT(p); - set_from_buf(p, len); -} +namespace crnlib +{ + dynamic_string g_empty_dynamic_string; + + dynamic_string::dynamic_string(eVarArg, const char* p, ...): + m_buf_size(0), + m_len(0), + m_pStr(NULL) + { + CRNLIB_ASSERT(p); + + va_list args; + va_start(args, p); + format_args(p, args); + va_end(args); + } -dynamic_string::dynamic_string(const dynamic_string& other) - : m_buf_size(0), m_len(0), m_pStr(NULL) { - set(other); -} + dynamic_string::dynamic_string(const char* p): + m_buf_size(0), + m_len(0), + m_pStr(NULL) + { + CRNLIB_ASSERT(p); + set(p); + } -void dynamic_string::clear() { - check(); - - if (m_pStr) { - crnlib_delete_array(m_pStr); - m_pStr = NULL; + dynamic_string::dynamic_string(const char* p, uint len): + m_buf_size(0), + m_len(0), + m_pStr(NULL) + { + CRNLIB_ASSERT(p); + set_from_buf(p, len); + } - m_len = 0; - m_buf_size = 0; - } -} + dynamic_string::dynamic_string(const dynamic_string& other): + m_buf_size(0), + m_len(0), + m_pStr(NULL) + { + set(other); + } -void dynamic_string::empty() { - truncate(0); -} + void dynamic_string::clear() + { + check(); -void dynamic_string::optimize() { - if (!m_len) - clear(); - else { - uint min_buf_size = math::next_pow2((uint)m_len + 1); - if (m_buf_size > min_buf_size) { - char* p = crnlib_new_array(min_buf_size); - memcpy(p, m_pStr, m_len + 1); + if (m_pStr) + { + crnlib_delete_array(m_pStr); + m_pStr = NULL; - crnlib_delete_array(m_pStr); - m_pStr = p; + m_len = 0; + m_buf_size = 0; + } + } - m_buf_size = static_cast(min_buf_size); + void dynamic_string::empty() + { + truncate(0); + } - check(); + void dynamic_string::optimize() + { + if (!m_len) + { + clear(); + } + else + { + uint min_buf_size = math::next_pow2((uint)m_len + 1); + if (m_buf_size > min_buf_size) + { + char* p = crnlib_new_array(min_buf_size); + memcpy(p, m_pStr, m_len + 1); + + crnlib_delete_array(m_pStr); + m_pStr = p; + + m_buf_size = static_cast(min_buf_size); + + check(); + } + } } - } -} -int dynamic_string::compare(const char* p, bool case_sensitive) const { - CRNLIB_ASSERT(p); + int dynamic_string::compare(const char* p, bool case_sensitive) const + { + CRNLIB_ASSERT(p); - const int result = (case_sensitive ? strcmp : crn_stricmp)(get_ptr_priv(), p); + const int result = (case_sensitive ? strcmp : crn_stricmp)(get_ptr_priv(), p); - if (result < 0) - return -1; - else if (result > 0) - return 1; + if (result < 0) + { + return -1; + } + else if (result > 0) + { + return 1; + } - return 0; -} + return 0; + } -int dynamic_string::compare(const dynamic_string& rhs, bool case_sensitive) const { - return compare(rhs.get_ptr_priv(), case_sensitive); -} + int dynamic_string::compare(const dynamic_string& rhs, bool case_sensitive) const + { + return compare(rhs.get_ptr_priv(), case_sensitive); + } -dynamic_string& dynamic_string::set(const char* p, uint max_len) { - CRNLIB_ASSERT(p); + dynamic_string& dynamic_string::set(const char* p, uint max_len) + { + CRNLIB_ASSERT(p); + + const uint len = math::minimum(max_len, static_cast(strlen(p))); + CRNLIB_ASSERT(len < cUINT16_MAX); + + if ((!len) || (len >= cUINT16_MAX)) + { + clear(); + } + else if ((m_pStr) && (p >= m_pStr) && (p < (m_pStr + m_buf_size))) + { + if (m_pStr != p) + { + memmove(m_pStr, p, len); + } + m_pStr[len] = '\0'; + m_len = static_cast(len); + } + else if (ensure_buf(len, false)) + { + m_len = static_cast(len); + memcpy(m_pStr, p, m_len + 1); + } + + check(); + + return *this; + } - const uint len = math::minimum(max_len, static_cast(strlen(p))); - CRNLIB_ASSERT(len < cUINT16_MAX); + dynamic_string& dynamic_string::set(const dynamic_string& other, uint max_len) + { + if (this == &other) + { + if (max_len < m_len) + { + m_pStr[max_len] = '\0'; + m_len = static_cast(max_len); + } + } + else + { + const uint len = math::minimum(max_len, other.m_len); + + if (!len) + { + clear(); + } + else if (ensure_buf(len, false)) + { + m_len = static_cast(len); + memcpy(m_pStr, other.get_ptr_priv(), m_len); + m_pStr[len] = '\0'; + } + } + + check(); + + return *this; + } - if ((!len) || (len >= cUINT16_MAX)) - clear(); - else if ((m_pStr) && (p >= m_pStr) && (p < (m_pStr + m_buf_size))) { - if (m_pStr != p) - memmove(m_pStr, p, len); - m_pStr[len] = '\0'; - m_len = static_cast(len); - } else if (ensure_buf(len, false)) { - m_len = static_cast(len); - memcpy(m_pStr, p, m_len + 1); - } + bool dynamic_string::set_len(uint new_len, char fill_char) + { + if ((new_len >= cUINT16_MAX) || (!fill_char)) + { + CRNLIB_ASSERT(0); + return false; + } - check(); + uint cur_len = m_len; - return *this; -} + if (ensure_buf(new_len, true)) + { + if (new_len > cur_len) + { + memset(m_pStr + cur_len, fill_char, new_len - cur_len); + } -dynamic_string& dynamic_string::set(const dynamic_string& other, uint max_len) { - if (this == &other) { - if (max_len < m_len) { - m_pStr[max_len] = '\0'; - m_len = static_cast(max_len); - } - } else { - const uint len = math::minimum(max_len, other.m_len); + m_pStr[new_len] = 0; - if (!len) - clear(); - else if (ensure_buf(len, false)) { - m_len = static_cast(len); - memcpy(m_pStr, other.get_ptr_priv(), m_len); - m_pStr[len] = '\0'; - } - } + m_len = static_cast(new_len); - check(); + check(); + } - return *this; -} + return true; + } -bool dynamic_string::set_len(uint new_len, char fill_char) { - if ((new_len >= cUINT16_MAX) || (!fill_char)) { - CRNLIB_ASSERT(0); - return false; - } + dynamic_string& dynamic_string::set_from_raw_buf_and_assume_ownership(char* pBuf, uint buf_size_in_chars, uint len_in_chars) + { + CRNLIB_ASSERT(buf_size_in_chars <= cUINT16_MAX); + CRNLIB_ASSERT(math::is_power_of_2(buf_size_in_chars) || (buf_size_in_chars == cUINT16_MAX)); + CRNLIB_ASSERT((len_in_chars + 1) <= buf_size_in_chars); - uint cur_len = m_len; + clear(); - if (ensure_buf(new_len, true)) { - if (new_len > cur_len) - memset(m_pStr + cur_len, fill_char, new_len - cur_len); + m_pStr = pBuf; + m_buf_size = static_cast(buf_size_in_chars); + m_len = static_cast(len_in_chars); - m_pStr[new_len] = 0; + check(); - m_len = static_cast(new_len); + return *this; + } - check(); - } + dynamic_string& dynamic_string::set_from_buf(const void* pBuf, uint buf_size) + { + CRNLIB_ASSERT(pBuf); - return true; -} + if (buf_size >= cUINT16_MAX) + { + clear(); + return *this; + } -dynamic_string& dynamic_string::set_from_raw_buf_and_assume_ownership(char* pBuf, uint buf_size_in_chars, uint len_in_chars) { - CRNLIB_ASSERT(buf_size_in_chars <= cUINT16_MAX); - CRNLIB_ASSERT(math::is_power_of_2(buf_size_in_chars) || (buf_size_in_chars == cUINT16_MAX)); - CRNLIB_ASSERT((len_in_chars + 1) <= buf_size_in_chars); +#ifdef CRNLIB_BUILD_DEBUG + if ((buf_size) && (memchr(pBuf, 0, buf_size) != NULL)) + { + CRNLIB_ASSERT(0); + clear(); + return *this; + } +#endif - clear(); + if (ensure_buf(buf_size, false)) + { + if (buf_size) + { + memcpy(m_pStr, pBuf, buf_size); + } - m_pStr = pBuf; - m_buf_size = static_cast(buf_size_in_chars); - m_len = static_cast(len_in_chars); + m_pStr[buf_size] = 0; - check(); + m_len = static_cast(buf_size); - return *this; -} + check(); + } -dynamic_string& dynamic_string::set_from_buf(const void* pBuf, uint buf_size) { - CRNLIB_ASSERT(pBuf); + return *this; + } - if (buf_size >= cUINT16_MAX) { - clear(); - return *this; - } + dynamic_string& dynamic_string::set_char(uint index, char c) + { + CRNLIB_ASSERT(index <= m_len); + + if (!c) + { + truncate(index); + } + else if (index < m_len) + { + m_pStr[index] = c; + + check(); + } + else if (index == m_len) + { + append_char(c); + } + + return *this; + } -#ifdef CRNLIB_BUILD_DEBUG - if ((buf_size) && (memchr(pBuf, 0, buf_size) != NULL)) { - CRNLIB_ASSERT(0); - clear(); - return *this; - } -#endif + dynamic_string& dynamic_string::append_char(char c) + { + if (ensure_buf(m_len + 1)) + { + m_pStr[m_len] = c; + m_pStr[m_len + 1] = '\0'; + m_len++; + check(); + } + + return *this; + } - if (ensure_buf(buf_size, false)) { - if (buf_size) - memcpy(m_pStr, pBuf, buf_size); + dynamic_string& dynamic_string::truncate(uint new_len) + { + if (new_len < m_len) + { + m_pStr[new_len] = '\0'; + m_len = static_cast(new_len); + check(); + } + return *this; + } - m_pStr[buf_size] = 0; + dynamic_string& dynamic_string::tolower() + { + if (m_len) + { +#if defined(CRN_CC_MSVC) + _strlwr_s(get_ptr_priv(), m_buf_size); +#else + strlwr(get_ptr_priv()); +#endif + } + return *this; + } - m_len = static_cast(buf_size); + dynamic_string& dynamic_string::toupper() + { + if (m_len) + { +#if defined(CRN_CC_MSVC) + _strupr_s(get_ptr_priv(), m_buf_size); +#else + strupr(get_ptr_priv()); +#endif + } + return *this; + } - check(); - } + dynamic_string& dynamic_string::append(const char* p) + { + CRNLIB_ASSERT(p); - return *this; -} + uint len = static_cast(strlen(p)); + uint new_total_len = m_len + len; + if ((new_total_len) && ensure_buf(new_total_len)) + { + memcpy(m_pStr + m_len, p, len + 1); + m_len = static_cast(m_len + len); + check(); + } -dynamic_string& dynamic_string::set_char(uint index, char c) { - CRNLIB_ASSERT(index <= m_len); + return *this; + } - if (!c) - truncate(index); - else if (index < m_len) { - m_pStr[index] = c; + dynamic_string& dynamic_string::append(const dynamic_string& other) + { + uint len = other.m_len; + uint new_total_len = m_len + len; + if ((new_total_len) && ensure_buf(new_total_len)) + { + memcpy(m_pStr + m_len, other.get_ptr_priv(), len + 1); + m_len = static_cast(m_len + len); + check(); + } + + return *this; + } - check(); - } else if (index == m_len) - append_char(c); + dynamic_string operator+(const char* p, const dynamic_string& a) + { + return dynamic_string(p).append(a); + } - return *this; -} + dynamic_string operator+(const dynamic_string& a, const char* p) + { + return dynamic_string(a).append(p); + } -dynamic_string& dynamic_string::append_char(char c) { - if (ensure_buf(m_len + 1)) { - m_pStr[m_len] = c; - m_pStr[m_len + 1] = '\0'; - m_len++; - check(); - } + dynamic_string operator+(const dynamic_string& a, const dynamic_string& b) + { + return dynamic_string(a).append(b); + } - return *this; -} + dynamic_string& dynamic_string::format_args(const char* p, va_list args) + { + CRNLIB_ASSERT(p); -dynamic_string& dynamic_string::truncate(uint new_len) { - if (new_len < m_len) { - m_pStr[new_len] = '\0'; - m_len = static_cast(new_len); - check(); - } - return *this; -} + const uint cBufSize = 4096; + char buf[cBufSize]; -dynamic_string& dynamic_string::tolower() { - if (m_len) { -#ifdef _MSC_VER - _strlwr_s(get_ptr_priv(), m_buf_size); -#else - strlwr(get_ptr_priv()); -#endif - } - return *this; -} - -dynamic_string& dynamic_string::toupper() { - if (m_len) { -#ifdef _MSC_VER - _strupr_s(get_ptr_priv(), m_buf_size); +#if defined(CRN_CC_MSVC) + int l = vsnprintf_s(buf, cBufSize, _TRUNCATE, p, args); #else - strupr(get_ptr_priv()); + int l = vsnprintf(buf, cBufSize, p, args); #endif - } - return *this; -} - -dynamic_string& dynamic_string::append(const char* p) { - CRNLIB_ASSERT(p); - - uint len = static_cast(strlen(p)); - uint new_total_len = m_len + len; - if ((new_total_len) && ensure_buf(new_total_len)) { - memcpy(m_pStr + m_len, p, len + 1); - m_len = static_cast(m_len + len); - check(); - } - - return *this; -} - -dynamic_string& dynamic_string::append(const dynamic_string& other) { - uint len = other.m_len; - uint new_total_len = m_len + len; - if ((new_total_len) && ensure_buf(new_total_len)) { - memcpy(m_pStr + m_len, other.get_ptr_priv(), len + 1); - m_len = static_cast(m_len + len); - check(); - } - - return *this; -} - -dynamic_string operator+(const char* p, const dynamic_string& a) { - return dynamic_string(p).append(a); -} - -dynamic_string operator+(const dynamic_string& a, const char* p) { - return dynamic_string(a).append(p); -} - -dynamic_string operator+(const dynamic_string& a, const dynamic_string& b) { - return dynamic_string(a).append(b); -} - -dynamic_string& dynamic_string::format_args(const char* p, va_list args) { - CRNLIB_ASSERT(p); - - const uint cBufSize = 4096; - char buf[cBufSize]; - -#ifdef _MSC_VER - int l = vsnprintf_s(buf, cBufSize, _TRUNCATE, p, args); -#else - int l = vsnprintf(buf, cBufSize, p, args); -#endif - if (l <= 0) - clear(); - else if (ensure_buf(l, false)) { - memcpy(m_pStr, buf, l + 1); + if (l <= 0) + { + clear(); + } + else if (ensure_buf(l, false)) + { + memcpy(m_pStr, buf, l + 1); - m_len = static_cast(l); + m_len = static_cast(l); - check(); - } + check(); + } - return *this; -} + return *this; + } -dynamic_string& dynamic_string::format(const char* p, ...) { - CRNLIB_ASSERT(p); + dynamic_string& dynamic_string::format(const char* p, ...) + { + CRNLIB_ASSERT(p); - va_list args; - va_start(args, p); - format_args(p, args); - va_end(args); - return *this; -} + va_list args; + va_start(args, p); + format_args(p, args); + va_end(args); + return *this; + } -dynamic_string& dynamic_string::crop(uint start, uint len) { - if (start >= m_len) { - clear(); - return *this; - } + dynamic_string& dynamic_string::crop(uint start, uint len) + { + if (start >= m_len) + { + clear(); + return *this; + } - len = math::minimum(len, m_len - start); + len = math::minimum(len, m_len - start); - if (start) - memmove(get_ptr_priv(), get_ptr_priv() + start, len); + if (start) + { + memmove(get_ptr_priv(), get_ptr_priv() + start, len); + } - m_pStr[len] = '\0'; + m_pStr[len] = '\0'; - m_len = static_cast(len); + m_len = static_cast(len); - check(); + check(); - return *this; -} + return *this; + } -dynamic_string& dynamic_string::substring(uint start, uint end) { - CRNLIB_ASSERT(start <= end); - if (start > end) - return *this; - return crop(start, end - start); -} + dynamic_string& dynamic_string::substring(uint start, uint end) + { + CRNLIB_ASSERT(start <= end); + if (start > end) + { + return *this; + } + return crop(start, end - start); + } -dynamic_string& dynamic_string::left(uint len) { - return substring(0, len); -} + dynamic_string& dynamic_string::left(uint len) + { + return substring(0, len); + } -dynamic_string& dynamic_string::mid(uint start, uint len) { - return crop(start, len); -} + dynamic_string& dynamic_string::mid(uint start, uint len) + { + return crop(start, len); + } -dynamic_string& dynamic_string::right(uint start) { - return substring(start, get_len()); -} + dynamic_string& dynamic_string::right(uint start) + { + return substring(start, get_len()); + } -dynamic_string& dynamic_string::tail(uint num) { - return substring(math::maximum(static_cast(get_len()) - static_cast(num), 0), get_len()); -} + dynamic_string& dynamic_string::tail(uint num) + { + return substring(math::maximum(static_cast(get_len()) - static_cast(num), 0), get_len()); + } -dynamic_string& dynamic_string::unquote() { - if (m_len >= 2) { - if (((*this)[0] == '\"') && ((*this)[m_len - 1] == '\"')) { - return mid(1, m_len - 2); + dynamic_string& dynamic_string::unquote() + { + if (m_len >= 2) + { + if (((*this)[0] == '\"') && ((*this)[m_len - 1] == '\"')) + { + return mid(1, m_len - 2); + } + } + + return *this; } - } - return *this; -} + int dynamic_string::find_left(const char* p, bool case_sensitive) const + { + CRNLIB_ASSERT(p); -int dynamic_string::find_left(const char* p, bool case_sensitive) const { - CRNLIB_ASSERT(p); + const int p_len = (int)strlen(p); - const int p_len = (int)strlen(p); + for (int i = 0; i <= (m_len - p_len); i++) + { + if ((case_sensitive ? strncmp : _strnicmp)(p, &m_pStr[i], p_len) == 0) + { + return i; + } + } - for (int i = 0; i <= (m_len - p_len); i++) - if ((case_sensitive ? strncmp : _strnicmp)(p, &m_pStr[i], p_len) == 0) - return i; + return -1; + } - return -1; -} + bool dynamic_string::contains(const char* p, bool case_sensitive) const + { + return find_left(p, case_sensitive) >= 0; + } -bool dynamic_string::contains(const char* p, bool case_sensitive) const { - return find_left(p, case_sensitive) >= 0; -} + uint dynamic_string::count_char(char c) const + { + uint count = 0; + for (uint i = 0; i < m_len; i++) + { + if (m_pStr[i] == c) + { + count++; + } + } + return count; + } -uint dynamic_string::count_char(char c) const { - uint count = 0; - for (uint i = 0; i < m_len; i++) - if (m_pStr[i] == c) - count++; - return count; -} + int dynamic_string::find_left(char c) const + { + for (uint i = 0; i < m_len; i++) + { + if (m_pStr[i] == c) + { + return i; + } + } + return -1; + } -int dynamic_string::find_left(char c) const { - for (uint i = 0; i < m_len; i++) - if (m_pStr[i] == c) - return i; - return -1; -} + int dynamic_string::find_right(char c) const + { + for (int i = (int)m_len - 1; i >= 0; i--) + { + if (m_pStr[i] == c) + { + return i; + } + } + return -1; + } -int dynamic_string::find_right(char c) const { - for (int i = (int)m_len - 1; i >= 0; i--) - if (m_pStr[i] == c) - return i; - return -1; -} + int dynamic_string::find_right(const char* p, bool case_sensitive) const + { + CRNLIB_ASSERT(p); + const int p_len = (int)strlen(p); -int dynamic_string::find_right(const char* p, bool case_sensitive) const { - CRNLIB_ASSERT(p); - const int p_len = (int)strlen(p); + for (int i = m_len - p_len; i >= 0; i--) + { + if ((case_sensitive ? strncmp : _strnicmp)(p, &m_pStr[i], p_len) == 0) + { + return i; + } + } - for (int i = m_len - p_len; i >= 0; i--) - if ((case_sensitive ? strncmp : _strnicmp)(p, &m_pStr[i], p_len) == 0) - return i; + return -1; + } - return -1; -} + dynamic_string& dynamic_string::trim() { + int s, e; + for (s = 0; s < (int)m_len; s++) + { + if (!isspace(m_pStr[s])) + { + break; + } + } + + for (e = m_len - 1; e > s; e--) + { + if (!isspace(m_pStr[e])) + { + break; + } + } + + return crop(s, e - s + 1); + } -dynamic_string& dynamic_string::trim() { - int s, e; - for (s = 0; s < (int)m_len; s++) - if (!isspace(m_pStr[s])) - break; + dynamic_string& dynamic_string::trim_crlf() { + int s = 0, e; - for (e = m_len - 1; e > s; e--) - if (!isspace(m_pStr[e])) - break; + for (e = m_len - 1; e > s; e--) + { + if ((m_pStr[e] != 13) && (m_pStr[e] != 10)) + { + break; + } + } - return crop(s, e - s + 1); -} + return crop(s, e - s + 1); + } -dynamic_string& dynamic_string::trim_crlf() { - int s = 0, e; - - for (e = m_len - 1; e > s; e--) - if ((m_pStr[e] != 13) && (m_pStr[e] != 10)) - break; - - return crop(s, e - s + 1); -} - -dynamic_string& dynamic_string::remap(int from_char, int to_char) { - for (uint i = 0; i < m_len; i++) - if (m_pStr[i] == from_char) - m_pStr[i] = (char)to_char; - return *this; -} + dynamic_string& dynamic_string::remap(int from_char, int to_char) { + for (uint i = 0; i < m_len; i++) + { + if (m_pStr[i] == from_char) + { + m_pStr[i] = (char)to_char; + } + } + return *this; + } #ifdef CRNLIB_BUILD_DEBUG -void dynamic_string::check() const { - if (!m_pStr) { - CRNLIB_ASSERT(!m_buf_size && !m_len); - } else { - CRNLIB_ASSERT(m_buf_size); - CRNLIB_ASSERT((m_buf_size == cUINT16_MAX) || math::is_power_of_2((uint32)m_buf_size)); - CRNLIB_ASSERT(m_len < m_buf_size); - CRNLIB_ASSERT(!m_pStr[m_len]); + void dynamic_string::check() const + { + if (!m_pStr) + { + CRNLIB_ASSERT(!m_buf_size && !m_len); + } + else + { + CRNLIB_ASSERT(m_buf_size); + CRNLIB_ASSERT((m_buf_size == cUINT16_MAX) || math::is_power_of_2((uint32)m_buf_size)); + CRNLIB_ASSERT(m_len < m_buf_size); + CRNLIB_ASSERT(!m_pStr[m_len]); #if CRNLIB_SLOW_STRING_LEN_CHECKS - CRNLIB_ASSERT(strlen(m_pStr) == m_len); + CRNLIB_ASSERT(strlen(m_pStr) == m_len); #endif - } -} + } + } #endif -bool dynamic_string::ensure_buf(uint len, bool preserve_contents) { - uint buf_size_needed = len + 1; + bool dynamic_string::ensure_buf(uint len, bool preserve_contents) + { + uint buf_size_needed = len + 1; - CRNLIB_ASSERT(buf_size_needed <= cUINT16_MAX); + CRNLIB_ASSERT(buf_size_needed <= cUINT16_MAX); - if (buf_size_needed <= cUINT16_MAX) { - if (buf_size_needed > m_buf_size) - expand_buf(buf_size_needed, preserve_contents); - } + if (buf_size_needed <= cUINT16_MAX) + { + if (buf_size_needed > m_buf_size) + expand_buf(buf_size_needed, preserve_contents); + } - return m_buf_size >= buf_size_needed; -} - -bool dynamic_string::expand_buf(uint new_buf_size, bool preserve_contents) { - new_buf_size = math::minimum(cUINT16_MAX, math::next_pow2(math::maximum(m_buf_size, new_buf_size))); + return m_buf_size >= buf_size_needed; + } - if (new_buf_size != m_buf_size) { - char* p = crnlib_new_array(new_buf_size); + bool dynamic_string::expand_buf(uint new_buf_size, bool preserve_contents) + { + new_buf_size = math::minimum(cUINT16_MAX, math::next_pow2(math::maximum(m_buf_size, new_buf_size))); - if (preserve_contents) - memcpy(p, get_ptr_priv(), m_len + 1); + if (new_buf_size != m_buf_size) + { + char* p = crnlib_new_array(new_buf_size); - crnlib_delete_array(m_pStr); - m_pStr = p; + if (preserve_contents) + { + memcpy(p, get_ptr_priv(), m_len + 1); + } - m_buf_size = static_cast(new_buf_size); + crnlib_delete_array(m_pStr); + m_pStr = p; - if (preserve_contents) - check(); - } + m_buf_size = static_cast(new_buf_size); - return m_buf_size >= new_buf_size; -} + if (preserve_contents) + { + check(); + } + } -void dynamic_string::swap(dynamic_string& other) { - utils::swap(other.m_buf_size, m_buf_size); - utils::swap(other.m_len, m_len); - utils::swap(other.m_pStr, m_pStr); -} + return m_buf_size >= new_buf_size; + } -int dynamic_string::serialize(void* pBuf, uint buf_size, bool little_endian) const { - uint buf_left = buf_size; + void dynamic_string::swap(dynamic_string& other) + { + utils::swap(other.m_buf_size, m_buf_size); + utils::swap(other.m_len, m_len); + utils::swap(other.m_pStr, m_pStr); + } - //if (m_len > cUINT16_MAX) - // return -1; - CRNLIB_ASSUME(sizeof(m_len) == sizeof(uint16)); + int dynamic_string::serialize(void* pBuf, uint buf_size, bool little_endian) const + { + uint buf_left = buf_size; - if (!utils::write_val((uint16)m_len, pBuf, buf_left, little_endian)) - return -1; + //if (m_len > cUINT16_MAX) + // return -1; + CRNLIB_ASSUME(sizeof(m_len) == sizeof(uint16)); - if (buf_left < m_len) - return -1; + if (!utils::write_val((uint16)m_len, pBuf, buf_left, little_endian)) + { + return -1; + } - memcpy(pBuf, get_ptr(), m_len); + if (buf_left < m_len) + { + return -1; + } - buf_left -= m_len; + memcpy(pBuf, get_ptr(), m_len); - return buf_size - buf_left; -} + buf_left -= m_len; -int dynamic_string::deserialize(const void* pBuf, uint buf_size, bool little_endian) { - uint buf_left = buf_size; + return buf_size - buf_left; + } - if (buf_left < sizeof(uint16)) - return -1; + int dynamic_string::deserialize(const void* pBuf, uint buf_size, bool little_endian) + { + uint buf_left = buf_size; - uint16 l; - if (!utils::read_obj(l, pBuf, buf_left, little_endian)) - return -1; + if (buf_left < sizeof(uint16)) + { + return -1; + } - if (buf_left < l) - return -1; + uint16 l; + if (!utils::read_obj(l, pBuf, buf_left, little_endian)) + { + return -1; + } - set_from_buf(pBuf, l); + if (buf_left < l) + { + return -1; + } - buf_left -= l; + set_from_buf(pBuf, l); - return buf_size - buf_left; -} + buf_left -= l; -void dynamic_string::translate_lf_to_crlf() { - if (find_left(0x0A) < 0) - return; + return buf_size - buf_left; + } - dynamic_string tmp; - tmp.ensure_buf(m_len + 2); + void dynamic_string::translate_lf_to_crlf() + { + if (find_left(0x0A) < 0) + { + return; + } - // normal sequence is 0x0D 0x0A (CR LF, \r\n) + dynamic_string tmp; + tmp.ensure_buf(m_len + 2); - int prev_char = -1; - for (uint i = 0; i < get_len(); i++) { - const int cur_char = (*this)[i]; + // normal sequence is 0x0D 0x0A (CR LF, \r\n) - if ((cur_char == 0x0A) && (prev_char != 0x0D)) - tmp.append_char(0x0D); + int prev_char = -1; + for (uint i = 0; i < get_len(); i++) + { + const int cur_char = (*this)[i]; - tmp.append_char(cur_char); + if ((cur_char == 0x0A) && (prev_char != 0x0D)) + { + tmp.append_char(0x0D); + } - prev_char = cur_char; - } + tmp.append_char(cur_char); - swap(tmp); -} + prev_char = cur_char; + } + swap(tmp); + } } // namespace crnlib diff --git a/crnlib/crn_dynamic_string.h b/crnlib/crn_dynamic_string.h index f437455..7598b40 100644 --- a/crnlib/crn_dynamic_string.h +++ b/crnlib/crn_dynamic_string.h @@ -1,187 +1,287 @@ // File: crn_dynamic_string.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once #include "crn_export.h" -namespace crnlib { -enum { cMaxDynamicStringLen = cUINT16_MAX - 1 }; -class CRN_EXPORT dynamic_string { - public: - inline dynamic_string() - : m_buf_size(0), m_len(0), m_pStr(NULL) {} - dynamic_string(eVarArg dummy, const char* p, ...); - dynamic_string(const char* p); - dynamic_string(const char* p, uint len); - dynamic_string(const dynamic_string& other); - - inline ~dynamic_string() { - if (m_pStr) - crnlib_delete_array(m_pStr); - } - - // Truncates the string to 0 chars and frees the buffer. - void clear(); - void optimize(); - - // Truncates the string to 0 chars, but does not free the buffer. - void empty(); - inline const char* assume_ownership() { - const char* p = m_pStr; - m_pStr = NULL; - m_len = 0; - m_buf_size = 0; - return p; - } - - inline uint get_len() const { return m_len; } - inline bool is_empty() const { return !m_len; } - - inline const char* get_ptr() const { return m_pStr ? m_pStr : ""; } - inline const char* c_str() const { return get_ptr(); } - - inline const char* get_ptr_raw() const { return m_pStr; } - inline char* get_ptr_raw() { return m_pStr; } - - inline char front() const { return m_len ? m_pStr[0] : '\0'; } - inline char back() const { return m_len ? m_pStr[m_len - 1] : '\0'; } - - inline char operator[](uint i) const { - CRNLIB_ASSERT(i <= m_len); - return get_ptr()[i]; - } - - inline operator size_t() const { return fast_hash(get_ptr(), m_len) ^ fast_hash(&m_len, sizeof(m_len)); } - - int compare(const char* p, bool case_sensitive = false) const; - int compare(const dynamic_string& rhs, bool case_sensitive = false) const; - - inline bool operator==(const dynamic_string& rhs) const { return compare(rhs) == 0; } - inline bool operator==(const char* p) const { return compare(p) == 0; } - - inline bool operator!=(const dynamic_string& rhs) const { return compare(rhs) != 0; } - inline bool operator!=(const char* p) const { return compare(p) != 0; } - - inline bool operator<(const dynamic_string& rhs) const { return compare(rhs) < 0; } - inline bool operator<(const char* p) const { return compare(p) < 0; } - - inline bool operator>(const dynamic_string& rhs) const { return compare(rhs) > 0; } - inline bool operator>(const char* p) const { return compare(p) > 0; } - - inline bool operator<=(const dynamic_string& rhs) const { return compare(rhs) <= 0; } - inline bool operator<=(const char* p) const { return compare(p) <= 0; } - - inline bool operator>=(const dynamic_string& rhs) const { return compare(rhs) >= 0; } - inline bool operator>=(const char* p) const { return compare(p) >= 0; } - - friend inline bool operator==(const char* p, const dynamic_string& rhs) { return rhs.compare(p) == 0; } - - dynamic_string& set(const char* p, uint max_len = UINT_MAX); - dynamic_string& set(const dynamic_string& other, uint max_len = UINT_MAX); - - bool set_len(uint new_len, char fill_char = ' '); - - // Set from non-zero terminated buffer. - dynamic_string& set_from_buf(const void* pBuf, uint buf_size); - - dynamic_string& operator=(const dynamic_string& rhs) { return set(rhs); } - dynamic_string& operator=(const char* p) { return set(p); } - - dynamic_string& set_char(uint index, char c); - dynamic_string& append_char(char c); - dynamic_string& append_char(int c) { - CRNLIB_ASSERT((c >= 0) && (c <= 255)); - return append_char(static_cast(c)); - } - dynamic_string& truncate(uint new_len); - dynamic_string& tolower(); - dynamic_string& toupper(); - - dynamic_string& append(const char* p); - dynamic_string& append(const dynamic_string& other); - dynamic_string& operator+=(const char* p) { return append(p); } - dynamic_string& operator+=(const dynamic_string& other) { return append(other); } - - CRN_EXPORT friend dynamic_string operator+(const char* p, const dynamic_string& a); - CRN_EXPORT friend dynamic_string operator+(const dynamic_string& a, const char* p); - CRN_EXPORT friend dynamic_string operator+(const dynamic_string& a, const dynamic_string& b); - - dynamic_string& format_args(const char* p, va_list args); - dynamic_string& format(const char* p, ...); - - dynamic_string& crop(uint start, uint len); - dynamic_string& substring(uint start, uint end); - dynamic_string& left(uint len); - dynamic_string& mid(uint start, uint len); - dynamic_string& right(uint start); - dynamic_string& tail(uint num); - - dynamic_string& unquote(); - - uint count_char(char c) const; - - int find_left(const char* p, bool case_sensitive = false) const; - int find_left(char c) const; - - int find_right(char c) const; - int find_right(const char* p, bool case_sensitive = false) const; - - bool contains(const char* p, bool case_sensitive = false) const; - - dynamic_string& trim(); - dynamic_string& trim_crlf(); - - dynamic_string& remap(int from_char, int to_char); - - void swap(dynamic_string& other); - - // Returns -1 on failure, or the number of bytes written. - int serialize(void* pBuf, uint buf_size, bool little_endian) const; - - // Returns -1 on failure, or the number of bytes read. - int deserialize(const void* pBuf, uint buf_size, bool little_endian); - - void translate_lf_to_crlf(); - - static inline char* create_raw_buffer(uint& buf_size_in_chars); - static inline void free_raw_buffer(char* p) { crnlib_delete_array(p); } - dynamic_string& set_from_raw_buf_and_assume_ownership(char* pBuf, uint buf_size_in_chars, uint len_in_chars); - - private: - uint16 m_buf_size; - uint16 m_len; - char* m_pStr; +namespace crnlib +{ + enum { cMaxDynamicStringLen = cUINT16_MAX - 1 }; + + class CRN_EXPORT dynamic_string + { + public: + inline dynamic_string(): + m_buf_size(0), + m_len(0), + m_pStr(NULL) + { + } + dynamic_string(eVarArg dummy, const char* p, ...); + dynamic_string(const char* p); + dynamic_string(const char* p, uint len); + dynamic_string(const dynamic_string& other); + + inline ~dynamic_string() + { + if (m_pStr) + { + crnlib_delete_array(m_pStr); + } + } + + // Truncates the string to 0 chars and frees the buffer. + void clear(); + void optimize(); + + // Truncates the string to 0 chars, but does not free the buffer. + void empty(); + inline const char* assume_ownership() + { + const char* p = m_pStr; + m_pStr = NULL; + m_len = 0; + m_buf_size = 0; + return p; + } + + inline uint get_len() const + { + return m_len; + } + inline bool is_empty() const + { + return !m_len; + } + + inline const char* get_ptr() const + { + return m_pStr ? m_pStr : ""; + } + inline const char* c_str() const + { + return get_ptr(); + } + + inline const char* get_ptr_raw() const + { + return m_pStr; + } + inline char* get_ptr_raw() + { + return m_pStr; + } + + inline char front() const + { + return m_len ? m_pStr[0] : '\0'; + } + inline char back() const + { + return m_len ? m_pStr[m_len - 1] : '\0'; + } + + inline char operator[](uint i) const + { + CRNLIB_ASSERT(i <= m_len); + return get_ptr()[i]; + } + + inline operator size_t() const + { + return fast_hash(get_ptr(), m_len) ^ fast_hash(&m_len, sizeof(m_len)); + } + + int compare(const char* p, bool case_sensitive = false) const; + int compare(const dynamic_string& rhs, bool case_sensitive = false) const; + + inline bool operator==(const dynamic_string& rhs) const + { + return compare(rhs) == 0; + } + inline bool operator==(const char* p) const + { + return compare(p) == 0; + } + + inline bool operator!=(const dynamic_string& rhs) const + { + return compare(rhs) != 0; + } + inline bool operator!=(const char* p) const { + return compare(p) != 0; + } + + inline bool operator<(const dynamic_string& rhs) const + { + return compare(rhs) < 0; + } + inline bool operator<(const char* p) const + { + return compare(p) < 0; + } + + inline bool operator>(const dynamic_string& rhs) const + { + return compare(rhs) > 0; + } + inline bool operator>(const char* p) const + { + return compare(p) > 0; + } + + inline bool operator<=(const dynamic_string& rhs) const + { + return compare(rhs) <= 0; + } + inline bool operator<=(const char* p) const + { + return compare(p) <= 0; + } + + inline bool operator>=(const dynamic_string& rhs) const + { + return compare(rhs) >= 0; + } + inline bool operator>=(const char* p) const + { + return compare(p) >= 0; + } + + friend inline bool operator==(const char* p, const dynamic_string& rhs) + { + return rhs.compare(p) == 0; + } + + dynamic_string& set(const char* p, uint max_len = UINT_MAX); + dynamic_string& set(const dynamic_string& other, uint max_len = UINT_MAX); + + bool set_len(uint new_len, char fill_char = ' '); + + // Set from non-zero terminated buffer. + dynamic_string& set_from_buf(const void* pBuf, uint buf_size); + + dynamic_string& operator=(const dynamic_string& rhs) + { + return set(rhs); + } + dynamic_string& operator=(const char* p) + { + return set(p); + } + + dynamic_string& set_char(uint index, char c); + dynamic_string& append_char(char c); + dynamic_string& append_char(int c) + { + CRNLIB_ASSERT((c >= 0) && (c <= 255)); + return append_char(static_cast(c)); + } + dynamic_string& truncate(uint new_len); + dynamic_string& tolower(); + dynamic_string& toupper(); + + dynamic_string& append(const char* p); + dynamic_string& append(const dynamic_string& other); + dynamic_string& operator+=(const char* p) + { + return append(p); + } + dynamic_string& operator+=(const dynamic_string& other) + { + return append(other); + } + + CRN_EXPORT friend dynamic_string operator+(const char* p, const dynamic_string& a); + CRN_EXPORT friend dynamic_string operator+(const dynamic_string& a, const char* p); + CRN_EXPORT friend dynamic_string operator+(const dynamic_string& a, const dynamic_string& b); + + dynamic_string& format_args(const char* p, va_list args); + dynamic_string& format(const char* p, ...); + + dynamic_string& crop(uint start, uint len); + dynamic_string& substring(uint start, uint end); + dynamic_string& left(uint len); + dynamic_string& mid(uint start, uint len); + dynamic_string& right(uint start); + dynamic_string& tail(uint num); + + dynamic_string& unquote(); + + uint count_char(char c) const; + + int find_left(const char* p, bool case_sensitive = false) const; + int find_left(char c) const; + + int find_right(char c) const; + int find_right(const char* p, bool case_sensitive = false) const; + + bool contains(const char* p, bool case_sensitive = false) const; + + dynamic_string& trim(); + dynamic_string& trim_crlf(); + + dynamic_string& remap(int from_char, int to_char); + + void swap(dynamic_string& other); + + // Returns -1 on failure, or the number of bytes written. + int serialize(void* pBuf, uint buf_size, bool little_endian) const; + + // Returns -1 on failure, or the number of bytes read. + int deserialize(const void* pBuf, uint buf_size, bool little_endian); + + void translate_lf_to_crlf(); + + static inline char* create_raw_buffer(uint& buf_size_in_chars); + static inline void free_raw_buffer(char* p) { crnlib_delete_array(p); } + dynamic_string& set_from_raw_buf_and_assume_ownership(char* pBuf, uint buf_size_in_chars, uint len_in_chars); + + private: + uint16 m_buf_size; + uint16 m_len; + char* m_pStr; #ifdef CRNLIB_BUILD_DEBUG - void check() const; + void check() const; #else - inline void check() const {} + inline void check() const {} #endif - bool expand_buf(uint new_buf_size, bool preserve_contents); - - const char* get_ptr_priv() const { return m_pStr ? m_pStr : ""; } - char* get_ptr_priv() { return (char*)(m_pStr ? m_pStr : ""); } - - bool ensure_buf(uint len, bool preserve_contents = true); -}; - -typedef crnlib::vector dynamic_string_array; - -CRN_EXPORT extern dynamic_string g_empty_dynamic_string; - -CRNLIB_DEFINE_BITWISE_MOVABLE(dynamic_string); - -inline void swap(dynamic_string& a, dynamic_string& b) { - a.swap(b); -} - -inline char* dynamic_string::create_raw_buffer(uint& buf_size_in_chars) { - if (buf_size_in_chars > cUINT16_MAX) { - CRNLIB_ASSERT(0); - return NULL; - } - buf_size_in_chars = math::minimum(cUINT16_MAX, math::next_pow2(buf_size_in_chars)); - return crnlib_new_array(buf_size_in_chars); -} + bool expand_buf(uint new_buf_size, bool preserve_contents); + + const char* get_ptr_priv() const + { + return m_pStr ? m_pStr : ""; + } + char* get_ptr_priv() + { + return (char*)(m_pStr ? m_pStr : ""); + } + + bool ensure_buf(uint len, bool preserve_contents = true); + }; + + typedef crnlib::vector dynamic_string_array; + + CRN_EXPORT extern dynamic_string g_empty_dynamic_string; + + CRNLIB_DEFINE_BITWISE_MOVABLE(dynamic_string); + + inline void swap(dynamic_string& a, dynamic_string& b) + { + a.swap(b); + } + + inline char* dynamic_string::create_raw_buffer(uint& buf_size_in_chars) + { + if (buf_size_in_chars > cUINT16_MAX) + { + CRNLIB_ASSERT(0); + return NULL; + } + buf_size_in_chars = math::minimum(cUINT16_MAX, math::next_pow2(buf_size_in_chars)); + return crnlib_new_array(buf_size_in_chars); + } } // namespace crnlib diff --git a/crnlib/crn_helpers.h b/crnlib/crn_helpers.h index ea45e40..23cbb44 100644 --- a/crnlib/crn_helpers.h +++ b/crnlib/crn_helpers.h @@ -1,5 +1,6 @@ // File: crn_helpers.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once #define CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(c) \ @@ -10,53 +11,66 @@ static void* operator new(size_t); \ static void* operator new[](size_t); -namespace crnlib { -namespace helpers { -template -struct rel_ops { - friend bool operator!=(const T& x, const T& y) { return (!(x == y)); } - friend bool operator>(const T& x, const T& y) { return (y < x); } - friend bool operator<=(const T& x, const T& y) { return (!(y < x)); } - friend bool operator>=(const T& x, const T& y) { return (!(x < y)); } -}; - -template -inline T* construct(T* p) { - return new (static_cast(p)) T; -} - -template -inline T* construct(T* p, const U& init) { - return new (static_cast(p)) T(init); -} - -template -inline void construct_array(T* p, uint n) { - T* q = p + n; - for (; p != q; ++p) - new (static_cast(p)) T; -} - -template -inline void construct_array(T* p, uint n, const U& init) { - T* q = p + n; - for (; p != q; ++p) - new (static_cast(p)) T(init); -} - -template -inline void destruct(T* p) { - (void)p; - p->~T(); -} - -template -inline void destruct_array(T* p, uint n) { - T* q = p + n; - for (; p != q; ++p) - p->~T(); -} - -} // namespace helpers +namespace crnlib +{ + namespace helpers + { + template + struct rel_ops + { + friend bool operator!=(const T& x, const T& y) { return (!(x == y)); } + friend bool operator>(const T& x, const T& y) { return (y < x); } + friend bool operator<=(const T& x, const T& y) { return (!(y < x)); } + friend bool operator>=(const T& x, const T& y) { return (!(x < y)); } + }; + + template + inline T* construct(T* p) + { + return new (static_cast(p)) T; + } + + template + inline T* construct(T* p, const U& init) + { + return new (static_cast(p)) T(init); + } + + template + inline void construct_array(T* p, uint n) + { + T* q = p + n; + for (; p != q; ++p) + { + new (static_cast(p)) T; + } + } + + template + inline void construct_array(T* p, uint n, const U& init) + { + T* q = p + n; + for (; p != q; ++p) + { + new (static_cast(p)) T(init); + } + } + + template + inline void destruct(T* p) + { + (void)p; + p->~T(); + } + + template + inline void destruct_array(T* p, uint n) { + T* q = p + n; + for (; p != q; ++p) + { + p->~T(); + } + } + } // namespace helpers } // namespace crnlib diff --git a/crnlib/crn_huffman_codes.cpp b/crnlib/crn_huffman_codes.cpp index e98eddc..cd48f8a 100644 --- a/crnlib/crn_huffman_codes.cpp +++ b/crnlib/crn_huffman_codes.cpp @@ -1,366 +1,431 @@ // File: crn_huffman_codes.cpp // See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" #include "crn_huffman_codes.h" -namespace crnlib { -struct sym_freq { - uint m_freq; - uint16 m_left; - uint16 m_right; - - inline bool operator<(const sym_freq& other) const { - return m_freq > other.m_freq; - } -}; +namespace crnlib +{ + struct sym_freq + { + uint m_freq; + uint16 m_left; + uint16 m_right; -static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* syms0, sym_freq* syms1) { - const uint cMaxPasses = 2; - uint hist[256 * cMaxPasses]; + inline bool operator<(const sym_freq& other) const + { + return m_freq > other.m_freq; + } + }; - memset(hist, 0, sizeof(hist[0]) * 256 * cMaxPasses); + static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* syms0, sym_freq* syms1) + { + const uint cMaxPasses = 2; + uint hist[256 * cMaxPasses]; - sym_freq* p = syms0; - sym_freq* q = syms0 + (num_syms >> 1) * 2; + memset(hist, 0, sizeof(hist[0]) * 256 * cMaxPasses); - for (; p != q; p += 2) { - const uint freq0 = p[0].m_freq; - const uint freq1 = p[1].m_freq; + sym_freq* p = syms0; + sym_freq* q = syms0 + (num_syms >> 1) * 2; - hist[freq0 & 0xFF]++; - hist[256 + ((freq0 >> 8) & 0xFF)]++; + for (; p != q; p += 2) + { + const uint freq0 = p[0].m_freq; + const uint freq1 = p[1].m_freq; - hist[freq1 & 0xFF]++; - hist[256 + ((freq1 >> 8) & 0xFF)]++; - } + hist[freq0 & 0xFF]++; + hist[256 + ((freq0 >> 8) & 0xFF)]++; - if (num_syms & 1) { - const uint freq = p->m_freq; + hist[freq1 & 0xFF]++; + hist[256 + ((freq1 >> 8) & 0xFF)]++; + } - hist[freq & 0xFF]++; - hist[256 + ((freq >> 8) & 0xFF)]++; - } + if (num_syms & 1) + { + const uint freq = p->m_freq; - sym_freq* pCur_syms = syms0; - sym_freq* pNew_syms = syms1; + hist[freq & 0xFF]++; + hist[256 + ((freq >> 8) & 0xFF)]++; + } - for (uint pass = 0; pass < cMaxPasses; pass++) { - const uint* pHist = &hist[pass << 8]; + sym_freq* pCur_syms = syms0; + sym_freq* pNew_syms = syms1; - uint offsets[256]; + for (uint pass = 0; pass < cMaxPasses; pass++) + { + const uint* pHist = &hist[pass << 8]; - uint cur_ofs = 0; - for (uint i = 0; i < 256; i += 2) { - offsets[i] = cur_ofs; - cur_ofs += pHist[i]; + uint offsets[256]; - offsets[i + 1] = cur_ofs; - cur_ofs += pHist[i + 1]; - } + uint cur_ofs = 0; + for (uint i = 0; i < 256; i += 2) + { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; - const uint pass_shift = pass << 3; + offsets[i + 1] = cur_ofs; + cur_ofs += pHist[i + 1]; + } - sym_freq* p = pCur_syms; - sym_freq* q = pCur_syms + (num_syms >> 1) * 2; + const uint pass_shift = pass << 3; - for (; p != q; p += 2) { - uint c0 = p[0].m_freq; - uint c1 = p[1].m_freq; + sym_freq* p = pCur_syms; + sym_freq* q = pCur_syms + (num_syms >> 1) * 2; - if (pass) { - c0 >>= 8; - c1 >>= 8; - } + for (; p != q; p += 2) + { + uint c0 = p[0].m_freq; + uint c1 = p[1].m_freq; - c0 &= 0xFF; - c1 &= 0xFF; + if (pass) + { + c0 >>= 8; + c1 >>= 8; + } - if (c0 == c1) { - uint dst_offset0 = offsets[c0]; + c0 &= 0xFF; + c1 &= 0xFF; - offsets[c0] = dst_offset0 + 2; + if (c0 == c1) + { + uint dst_offset0 = offsets[c0]; - pNew_syms[dst_offset0] = p[0]; - pNew_syms[dst_offset0 + 1] = p[1]; - } else { - uint dst_offset0 = offsets[c0]++; - uint dst_offset1 = offsets[c1]++; + offsets[c0] = dst_offset0 + 2; - pNew_syms[dst_offset0] = p[0]; - pNew_syms[dst_offset1] = p[1]; - } - } + pNew_syms[dst_offset0] = p[0]; + pNew_syms[dst_offset0 + 1] = p[1]; + } + else + { + uint dst_offset0 = offsets[c0]++; + uint dst_offset1 = offsets[c1]++; - if (num_syms & 1) { - uint c = ((p->m_freq) >> pass_shift) & 0xFF; + pNew_syms[dst_offset0] = p[0]; + pNew_syms[dst_offset1] = p[1]; + } + } + + if (num_syms & 1) + { + uint c = ((p->m_freq) >> pass_shift) & 0xFF; - uint dst_offset = offsets[c]; - offsets[c] = dst_offset + 1; + uint dst_offset = offsets[c]; + offsets[c] = dst_offset + 1; - pNew_syms[dst_offset] = *p; - } + pNew_syms[dst_offset] = *p; + } - sym_freq* t = pCur_syms; - pCur_syms = pNew_syms; - pNew_syms = t; - } + sym_freq* t = pCur_syms; + pCur_syms = pNew_syms; + pNew_syms = t; + } #ifdef CRNLIB_ASSERTS_ENABLED - uint prev_freq = 0; - for (uint i = 0; i < num_syms; i++) { - CRNLIB_ASSERT(!(pCur_syms[i].m_freq < prev_freq)); - prev_freq = pCur_syms[i].m_freq; - } + uint prev_freq = 0; + for (uint i = 0; i < num_syms; i++) + { + CRNLIB_ASSERT(!(pCur_syms[i].m_freq < prev_freq)); + prev_freq = pCur_syms[i].m_freq; + } #endif - return pCur_syms; -} + return pCur_syms; + } -struct huffman_work_tables { - enum { cMaxInternalNodes = cHuffmanMaxSupportedSyms }; + struct huffman_work_tables + { + enum { cMaxInternalNodes = cHuffmanMaxSupportedSyms }; - sym_freq syms0[cHuffmanMaxSupportedSyms + 1 + cMaxInternalNodes]; - sym_freq syms1[cHuffmanMaxSupportedSyms + 1 + cMaxInternalNodes]; + sym_freq syms0[cHuffmanMaxSupportedSyms + 1 + cMaxInternalNodes]; + sym_freq syms1[cHuffmanMaxSupportedSyms + 1 + cMaxInternalNodes]; - uint16 queue[cMaxInternalNodes]; -}; + uint16 queue[cMaxInternalNodes]; + }; -void* create_generate_huffman_codes_tables() { - return crnlib_new(); -} + void* create_generate_huffman_codes_tables() + { + return crnlib_new(); + } -void free_generate_huffman_codes_tables(void* p) { - crnlib_delete(static_cast(p)); -} + void free_generate_huffman_codes_tables(void* p) + { + crnlib_delete(static_cast(p)); + } #if USE_CALCULATE_MINIMUM_REDUNDANCY -/* calculate_minimum_redundancy() written by - Alistair Moffat, alistair@cs.mu.oz.au, - Jyrki Katajainen, jyrki@diku.dk - November 1996. - */ -static void calculate_minimum_redundancy(int A[], int n) { - int root; /* next root node to be used */ - int leaf; /* next leaf to be used */ - int next; /* next value to be assigned */ - int avbl; /* number of available nodes */ - int used; /* number of internal nodes */ - int dpth; /* current depth of leaves */ - - /* check for pathological cases */ - if (n == 0) { - return; - } - if (n == 1) { - A[0] = 0; - return; - } - - /* first pass, left to right, setting parent pointers */ - A[0] += A[1]; - root = 0; - leaf = 2; - for (next = 1; next < n - 1; next++) { - /* select first item for a pairing */ - if (leaf >= n || A[root] < A[leaf]) { - A[next] = A[root]; - A[root++] = next; - } else - A[next] = A[leaf++]; - - /* add on the second item */ - if (leaf >= n || (root < next && A[root] < A[leaf])) { - A[next] += A[root]; - A[root++] = next; - } else - A[next] += A[leaf++]; - } - - /* second pass, right to left, setting internal depths */ - A[n - 2] = 0; - for (next = n - 3; next >= 0; next--) - A[next] = A[A[next]] + 1; - - /* third pass, right to left, setting leaf depths */ - avbl = 1; - used = dpth = 0; - root = n - 2; - next = n - 1; - while (avbl > 0) { - while (root >= 0 && A[root] == dpth) { - used++; - root--; - } - while (avbl > used) { - A[next--] = dpth; - avbl--; + /* calculate_minimum_redundancy() written by + Alistair Moffat, alistair@cs.mu.oz.au, + Jyrki Katajainen, jyrki@diku.dk + November 1996. + */ + static void calculate_minimum_redundancy(int A[], int n) + { + int root; /* next root node to be used */ + int leaf; /* next leaf to be used */ + int next; /* next value to be assigned */ + int avbl; /* number of available nodes */ + int used; /* number of internal nodes */ + int dpth; /* current depth of leaves */ + + /* check for pathological cases */ + if (n == 0) + { + return; + } + if (n == 1) + { + A[0] = 0; + return; + } + + /* first pass, left to right, setting parent pointers */ + A[0] += A[1]; + root = 0; + leaf = 2; + for (next = 1; next < n - 1; next++) + { + /* select first item for a pairing */ + if (leaf >= n || A[root] < A[leaf]) { + A[next] = A[root]; + A[root++] = next; + } + else + { + A[next] = A[leaf++]; + } + + /* add on the second item */ + if (leaf >= n || (root < next && A[root] < A[leaf])) + { + A[next] += A[root]; + A[root++] = next; + } + else + { + A[next] += A[leaf++]; + } + } + + /* second pass, right to left, setting internal depths */ + A[n - 2] = 0; + for (next = n - 3; next >= 0; next--) + { + A[next] = A[A[next]] + 1; + } + + /* third pass, right to left, setting leaf depths */ + avbl = 1; + used = dpth = 0; + root = n - 2; + next = n - 1; + while (avbl > 0) + { + while (root >= 0 && A[root] == dpth) + { + used++; + root--; + } + while (avbl > used) + { + A[next--] = dpth; + avbl--; + } + avbl = 2 * used; + dpth++; + used = 0; + } } - avbl = 2 * used; - dpth++; - used = 0; - } -} #endif -bool generate_huffman_codes(void* pContext, uint num_syms, const uint16* pFreq, uint8* pCodesizes, uint& max_code_size, uint& total_freq_ret) { - if ((!num_syms) || (num_syms > cHuffmanMaxSupportedSyms)) - return false; - - huffman_work_tables& state = *static_cast(pContext); - ; - - uint max_freq = 0; - uint total_freq = 0; - - uint num_used_syms = 0; - for (uint i = 0; i < num_syms; i++) { - uint freq = pFreq[i]; - - if (!freq) - pCodesizes[i] = 0; - else { - total_freq += freq; - max_freq = math::maximum(max_freq, freq); - - sym_freq& sf = state.syms0[num_used_syms]; - sf.m_left = (uint16)i; - sf.m_right = cUINT16_MAX; - sf.m_freq = freq; - num_used_syms++; - } - } - - total_freq_ret = total_freq; - - if (num_used_syms == 1) { - pCodesizes[state.syms0[0].m_left] = 1; - return true; - } - - sym_freq* syms = radix_sort_syms(num_used_syms, state.syms0, state.syms1); + bool generate_huffman_codes(void* pContext, uint num_syms, const uint16* pFreq, uint8* pCodesizes, uint& max_code_size, uint& total_freq_ret) + { + if ((!num_syms) || (num_syms > cHuffmanMaxSupportedSyms)) + { + return false; + } + + huffman_work_tables& state = *static_cast(pContext); + + uint max_freq = 0; + uint total_freq = 0; + + uint num_used_syms = 0; + for (uint i = 0; i < num_syms; i++) + { + uint freq = pFreq[i]; + + if (!freq) + { + pCodesizes[i] = 0; + } + else + { + total_freq += freq; + max_freq = math::maximum(max_freq, freq); + + sym_freq& sf = state.syms0[num_used_syms]; + sf.m_left = (uint16)i; + sf.m_right = cUINT16_MAX; + sf.m_freq = freq; + num_used_syms++; + } + } + + total_freq_ret = total_freq; + + if (num_used_syms == 1) + { + pCodesizes[state.syms0[0].m_left] = 1; + return true; + } + + sym_freq* syms = radix_sort_syms(num_used_syms, state.syms0, state.syms1); #if USE_CALCULATE_MINIMUM_REDUNDANCY - int x[cHuffmanMaxSupportedSyms]; - for (uint i = 0; i < num_used_syms; i++) - x[i] = state.syms0[i].m_freq; - - calculate_minimum_redundancy(x, num_used_syms); - - uint max_len = 0; - for (uint i = 0; i < num_used_syms; i++) { - uint len = x[i]; - max_len = math::maximum(len, max_len); - pCodesizes[state.syms0[i].m_left] = static_cast(len); - } - - return true; + int x[cHuffmanMaxSupportedSyms]; + for (uint i = 0; i < num_used_syms; i++) + { + x[i] = state.syms0[i].m_freq; + } + + calculate_minimum_redundancy(x, num_used_syms); + + uint max_len = 0; + for (uint i = 0; i < num_used_syms; i++) + { + uint len = x[i]; + max_len = math::maximum(len, max_len); + pCodesizes[state.syms0[i].m_left] = static_cast(len); + } + + return true; #else - // Dummy node - sym_freq& sf = state.syms0[num_used_syms]; - sf.m_left = cUINT16_MAX; - sf.m_right = cUINT16_MAX; - sf.m_freq = UINT_MAX; - - uint next_internal_node = num_used_syms + 1; - - uint queue_front = 0; - uint queue_end = 0; - - uint next_lowest_sym = 0; + // Dummy node + sym_freq& sf = state.syms0[num_used_syms]; + sf.m_left = cUINT16_MAX; + sf.m_right = cUINT16_MAX; + sf.m_freq = UINT_MAX; - uint num_nodes_remaining = num_used_syms; - do { - uint left_freq = syms[next_lowest_sym].m_freq; - uint left_child = next_lowest_sym; + uint next_internal_node = num_used_syms + 1; + + uint queue_front = 0; + uint queue_end = 0; - if ((queue_end > queue_front) && (syms[state.queue[queue_front]].m_freq < left_freq)) { - left_child = state.queue[queue_front]; - left_freq = syms[left_child].m_freq; + uint next_lowest_sym = 0; - queue_front++; - } else - next_lowest_sym++; + uint num_nodes_remaining = num_used_syms; + do + { + uint left_freq = syms[next_lowest_sym].m_freq; + uint left_child = next_lowest_sym; - uint right_freq = syms[next_lowest_sym].m_freq; - uint right_child = next_lowest_sym; + if ((queue_end > queue_front) && (syms[state.queue[queue_front]].m_freq < left_freq)) + { + left_child = state.queue[queue_front]; + left_freq = syms[left_child].m_freq; + + queue_front++; + } + else + { + next_lowest_sym++; + } + + uint right_freq = syms[next_lowest_sym].m_freq; + uint right_child = next_lowest_sym; + + if ((queue_end > queue_front) && (syms[state.queue[queue_front]].m_freq < right_freq)) + { + right_child = state.queue[queue_front]; + right_freq = syms[right_child].m_freq; + + queue_front++; + } + else + { + next_lowest_sym++; + } + + const uint internal_node_index = next_internal_node; + next_internal_node++; + + CRNLIB_ASSERT(next_internal_node < CRNLIB_ARRAYSIZE(state.syms0)); + + syms[internal_node_index].m_freq = left_freq + right_freq; + syms[internal_node_index].m_left = static_cast(left_child); + syms[internal_node_index].m_right = static_cast(right_child); + + CRNLIB_ASSERT(queue_end < huffman_work_tables::cMaxInternalNodes); + state.queue[queue_end] = static_cast(internal_node_index); + queue_end++; + + num_nodes_remaining--; + + } + while (num_nodes_remaining > 1); + + CRNLIB_ASSERT(next_lowest_sym == num_used_syms); + CRNLIB_ASSERT((queue_end - queue_front) == 1); + + uint cur_node_index = state.queue[queue_front]; + + uint32* pStack = (syms == state.syms0) ? (uint32*)state.syms1 : (uint32*)state.syms0; + uint32* pStack_top = pStack; + + uint max_level = 0; + + for (;;) + { + uint level = cur_node_index >> 16; + uint node_index = cur_node_index & 0xFFFF; + + uint left_child = syms[node_index].m_left; + uint right_child = syms[node_index].m_right; + + uint next_level = (cur_node_index + 0x10000) & 0xFFFF0000; + + if (left_child < num_used_syms) + { + max_level = math::maximum(max_level, level); + + pCodesizes[syms[left_child].m_left] = static_cast(level + 1); + + if (right_child < num_used_syms) + { + pCodesizes[syms[right_child].m_left] = static_cast(level + 1); + + if (pStack == pStack_top) + { + break; + } + cur_node_index = *--pStack; + } + else + { + cur_node_index = next_level | right_child; + } + } + else + { + if (right_child < num_used_syms) + { + max_level = math::maximum(max_level, level); - if ((queue_end > queue_front) && (syms[state.queue[queue_front]].m_freq < right_freq)) { - right_child = state.queue[queue_front]; - right_freq = syms[right_child].m_freq; + pCodesizes[syms[right_child].m_left] = static_cast(level + 1); + + cur_node_index = next_level | left_child; + } + else + { + *pStack++ = next_level | left_child; - queue_front++; - } else - next_lowest_sym++; + cur_node_index = next_level | right_child; + } + } + } - const uint internal_node_index = next_internal_node; - next_internal_node++; - - CRNLIB_ASSERT(next_internal_node < CRNLIB_ARRAYSIZE(state.syms0)); - - syms[internal_node_index].m_freq = left_freq + right_freq; - syms[internal_node_index].m_left = static_cast(left_child); - syms[internal_node_index].m_right = static_cast(right_child); - - CRNLIB_ASSERT(queue_end < huffman_work_tables::cMaxInternalNodes); - state.queue[queue_end] = static_cast(internal_node_index); - queue_end++; - - num_nodes_remaining--; - - } while (num_nodes_remaining > 1); - - CRNLIB_ASSERT(next_lowest_sym == num_used_syms); - CRNLIB_ASSERT((queue_end - queue_front) == 1); - - uint cur_node_index = state.queue[queue_front]; - - uint32* pStack = (syms == state.syms0) ? (uint32*)state.syms1 : (uint32*)state.syms0; - uint32* pStack_top = pStack; - - uint max_level = 0; - - for (;;) { - uint level = cur_node_index >> 16; - uint node_index = cur_node_index & 0xFFFF; - - uint left_child = syms[node_index].m_left; - uint right_child = syms[node_index].m_right; - - uint next_level = (cur_node_index + 0x10000) & 0xFFFF0000; - - if (left_child < num_used_syms) { - max_level = math::maximum(max_level, level); - - pCodesizes[syms[left_child].m_left] = static_cast(level + 1); - - if (right_child < num_used_syms) { - pCodesizes[syms[right_child].m_left] = static_cast(level + 1); - - if (pStack == pStack_top) - break; - cur_node_index = *--pStack; - } else { - cur_node_index = next_level | right_child; - } - } else { - if (right_child < num_used_syms) { - max_level = math::maximum(max_level, level); - - pCodesizes[syms[right_child].m_left] = static_cast(level + 1); - - cur_node_index = next_level | left_child; - } else { - *pStack++ = next_level | left_child; - - cur_node_index = next_level | right_child; - } - } - } - - max_code_size = max_level + 1; + max_code_size = max_level + 1; #endif - - return true; -} - + return true; + } } // namespace crnlib diff --git a/crnlib/crn_huffman_codes.h b/crnlib/crn_huffman_codes.h index e618666..41964d8 100644 --- a/crnlib/crn_huffman_codes.h +++ b/crnlib/crn_huffman_codes.h @@ -1,15 +1,16 @@ // File: crn_huffman_codes.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once #include "crn_export.h" -namespace crnlib { -const uint cHuffmanMaxSupportedSyms = 8192; - -CRN_EXPORT void* create_generate_huffman_codes_tables(); -CRN_EXPORT void free_generate_huffman_codes_tables(void* p); +namespace crnlib +{ + const uint cHuffmanMaxSupportedSyms = 8192; -CRN_EXPORT bool generate_huffman_codes(void* pContext, uint num_syms, const uint16* pFreq, uint8* pCodesizes, uint& max_code_size, uint& total_freq_ret); + CRN_EXPORT void* create_generate_huffman_codes_tables(); + CRN_EXPORT void free_generate_huffman_codes_tables(void* p); + CRN_EXPORT bool generate_huffman_codes(void* pContext, uint num_syms, const uint16* pFreq, uint8* pCodesizes, uint& max_code_size, uint& total_freq_ret); } // namespace crnlib diff --git a/crnlib/crn_image_utils.h b/crnlib/crn_image_utils.h index e29d4c7..3c57c8d 100644 --- a/crnlib/crn_image_utils.h +++ b/crnlib/crn_image_utils.h @@ -7,180 +7,198 @@ #include "crn_export.h" -namespace crnlib { -enum pixel_format; - -namespace image_utils { -enum read_flags_t { - cReadFlagForceSTB = 1, - - cReadFlagsAllFlags = 1 -}; - -CRN_EXPORT bool read_from_stream_stb(data_stream_serializer& serializer, image_u8& img); -CRN_EXPORT bool read_from_stream_jpgd(data_stream_serializer& serializer, image_u8& img); -CRN_EXPORT bool read_from_stream(image_u8& dest, data_stream_serializer& serializer, uint read_flags = 0); -CRN_EXPORT bool read_from_file(image_u8& dest, const char* pFilename, uint read_flags = 0); - -// Reads texture from memory, results returned stb_image.c style. -// *pActual_comps is set to 1, 3, or 4. req_comps must range from 1-4. -CRN_EXPORT uint8* read_from_memory(const uint8* pImage, int nSize, int* pWidth, int* pHeight, int* pActualComps, int req_comps, const char* pFilename); - -enum { - cWriteFlagIgnoreAlpha = 0x00000001, - cWriteFlagGrayscale = 0x00000002, - - cWriteFlagJPEGH1V1 = 0x00010000, - cWriteFlagJPEGH2V1 = 0x00020000, - cWriteFlagJPEGH2V2 = 0x00040000, - cWriteFlagJPEGTwoPass = 0x00080000, - cWriteFlagJPEGNoChromaDiscrim = 0x00100000, - cWriteFlagJPEGQualityLevelMask = 0xFF000000, - cWriteFlagJPEGQualityLevelShift = 24, -}; - -const int cLumaComponentIndex = -1; - -inline uint create_jpeg_write_flags(uint base_flags, uint quality_level) { - CRNLIB_ASSERT(quality_level <= 100); - return base_flags | ((quality_level << cWriteFlagJPEGQualityLevelShift) & cWriteFlagJPEGQualityLevelMask); -} +namespace crnlib +{ + enum pixel_format; + + namespace image_utils + { + enum read_flags_t + { + cReadFlagForceSTB = 1, + + cReadFlagsAllFlags = 1 + }; + + CRN_EXPORT bool read_from_stream_stb(data_stream_serializer& serializer, image_u8& img); + CRN_EXPORT bool read_from_stream_jpgd(data_stream_serializer& serializer, image_u8& img); + CRN_EXPORT bool read_from_stream(image_u8& dest, data_stream_serializer& serializer, uint read_flags = 0); + CRN_EXPORT bool read_from_file(image_u8& dest, const char* pFilename, uint read_flags = 0); + + // Reads texture from memory, results returned stb_image.c style. + // *pActual_comps is set to 1, 3, or 4. req_comps must range from 1-4. + CRN_EXPORT uint8* read_from_memory(const uint8* pImage, int nSize, int* pWidth, int* pHeight, int* pActualComps, int req_comps, const char* pFilename); + + enum { + cWriteFlagIgnoreAlpha = 0x00000001, + cWriteFlagGrayscale = 0x00000002, + + cWriteFlagJPEGH1V1 = 0x00010000, + cWriteFlagJPEGH2V1 = 0x00020000, + cWriteFlagJPEGH2V2 = 0x00040000, + cWriteFlagJPEGTwoPass = 0x00080000, + cWriteFlagJPEGNoChromaDiscrim = 0x00100000, + cWriteFlagJPEGQualityLevelMask = 0xFF000000, + cWriteFlagJPEGQualityLevelShift = 24, + }; + + const int cLumaComponentIndex = -1; + + inline uint create_jpeg_write_flags(uint base_flags, uint quality_level) + { + CRNLIB_ASSERT(quality_level <= 100); + return base_flags | ((quality_level << cWriteFlagJPEGQualityLevelShift) & cWriteFlagJPEGQualityLevelMask); + } + + CRN_EXPORT bool write_to_file(const char* pFilename, const image_u8& img, uint write_flags = 0, int grayscale_comp_index = cLumaComponentIndex); + + CRN_EXPORT bool has_alpha(const image_u8& img); + CRN_EXPORT bool is_normal_map(const image_u8& img, const char* pFilename = NULL); + CRN_EXPORT void renorm_normal_map(image_u8& img); + + struct resample_params + { + resample_params(): + m_dst_width(0), + m_dst_height(0), + m_pFilter("lanczos4"), + m_filter_scale(1.0f), + m_srgb(true), + m_wrapping(false), + m_first_comp(0), + m_num_comps(4), + m_source_gamma(2.2f), // 1.75f + m_multithreaded(true) + { + } + + uint m_dst_width; + uint m_dst_height; + const char* m_pFilter; + float m_filter_scale; + bool m_srgb; + bool m_wrapping; + uint m_first_comp; + uint m_num_comps; + float m_source_gamma; + bool m_multithreaded; + }; + + CRN_EXPORT bool resample_single_thread(const image_u8& src, image_u8& dst, const resample_params& params); + CRN_EXPORT bool resample_multithreaded(const image_u8& src, image_u8& dst, const resample_params& params); + CRN_EXPORT bool resample(const image_u8& src, image_u8& dst, const resample_params& params); + + CRN_EXPORT bool compute_delta(image_u8& dest, image_u8& a, image_u8& b, uint scale = 2); + + class CRN_EXPORT error_metrics + { + public: + error_metrics() + { + utils::zero_this(this); + } + + void print(const char* pName) const; + + // If num_channels==0, luma error is computed. + // If pHist != NULL, it must point to a 256 entry array. + bool compute(const image_u8& a, const image_u8& b, uint first_channel, uint num_channels, bool average_component_error = true); + + uint mMax; + double mMean; + double mMeanSquared; + double mRootMeanSquared; + double mPeakSNR; + + inline bool operator==(const error_metrics& other) const + { + return mPeakSNR == other.mPeakSNR; + } + + inline bool operator<(const error_metrics& other) const + { + return mPeakSNR < other.mPeakSNR; + } + + inline bool operator>(const error_metrics& other) const + { + return mPeakSNR > other.mPeakSNR; + } + }; + + CRN_EXPORT void print_image_metrics(const image_u8& src_img, const image_u8& dst_img); + + CRN_EXPORT double compute_block_ssim(uint n, const uint8* pX, const uint8* pY); + CRN_EXPORT double compute_ssim(const image_u8& a, const image_u8& b, int channel_index); + CRN_EXPORT void print_ssim(const image_u8& src_img, const image_u8& dst_img); + + enum conversion_type + { + cConversion_Invalid = -1, + + cConversion_To_CCxY, + cConversion_From_CCxY, + + cConversion_To_xGxR, + cConversion_From_xGxR, + + cConversion_To_xGBR, + cConversion_From_xGBR, + + cConversion_To_AGBR, + cConversion_From_AGBR, + + cConversion_XY_to_XYZ, + + cConversion_Y_To_A, + + cConversion_A_To_RGBA, + cConversion_Y_To_RGB, + + cConversion_To_Y, + + cConversionTotal + }; + + CRN_EXPORT void convert_image(image_u8& img, conversion_type conv_type); + + template + inline uint8* pack_image(const image_type& img, const pixel_packer& packer, uint& n) + { + n = 0; -CRN_EXPORT bool write_to_file(const char* pFilename, const image_u8& img, uint write_flags = 0, int grayscale_comp_index = cLumaComponentIndex); + if (!packer.is_valid()) + { + return NULL; + } -CRN_EXPORT bool has_alpha(const image_u8& img); -CRN_EXPORT bool is_normal_map(const image_u8& img, const char* pFilename = NULL); -CRN_EXPORT void renorm_normal_map(image_u8& img); + const uint width = img.get_width(), height = img.get_height(); + uint dst_pixel_stride = packer.get_pixel_stride(); + uint dst_pitch = width * dst_pixel_stride; -struct resample_params { - resample_params() - : m_dst_width(0), - m_dst_height(0), - m_pFilter("lanczos4"), - m_filter_scale(1.0f), - m_srgb(true), - m_wrapping(false), - m_first_comp(0), - m_num_comps(4), - m_source_gamma(2.2f), // 1.75f - m_multithreaded(true) { - } + n = dst_pitch * height; - uint m_dst_width; - uint m_dst_height; - const char* m_pFilter; - float m_filter_scale; - bool m_srgb; - bool m_wrapping; - uint m_first_comp; - uint m_num_comps; - float m_source_gamma; - bool m_multithreaded; -}; + uint8* pImage = static_cast(crnlib_malloc(n)); -CRN_EXPORT bool resample_single_thread(const image_u8& src, image_u8& dst, const resample_params& params); -CRN_EXPORT bool resample_multithreaded(const image_u8& src, image_u8& dst, const resample_params& params); -CRN_EXPORT bool resample(const image_u8& src, image_u8& dst, const resample_params& params); + uint8* pDst = pImage; + for (uint y = 0; y < height; y++) + { + const typename image_type::color_t* pSrc = img.get_scanline(y); + for (uint x = 0; x < width; x++) + { + pDst = (uint8*)packer.pack(*pSrc++, pDst); + } + } -CRN_EXPORT bool compute_delta(image_u8& dest, image_u8& a, image_u8& b, uint scale = 2); + return pImage; + } -class CRN_EXPORT error_metrics { - public: - error_metrics() { utils::zero_this(this); } + CRN_EXPORT image_utils::conversion_type get_conversion_type(bool cooking, pixel_format fmt); - void print(const char* pName) const; + CRN_EXPORT image_utils::conversion_type get_image_conversion_type_from_crn_format(crn_format fmt); - // If num_channels==0, luma error is computed. - // If pHist != NULL, it must point to a 256 entry array. - bool compute(const image_u8& a, const image_u8& b, uint first_channel, uint num_channels, bool average_component_error = true); - - uint mMax; - double mMean; - double mMeanSquared; - double mRootMeanSquared; - double mPeakSNR; - - inline bool operator==(const error_metrics& other) const { - return mPeakSNR == other.mPeakSNR; - } - - inline bool operator<(const error_metrics& other) const { - return mPeakSNR < other.mPeakSNR; - } - - inline bool operator>(const error_metrics& other) const { - return mPeakSNR > other.mPeakSNR; - } -}; - -CRN_EXPORT void print_image_metrics(const image_u8& src_img, const image_u8& dst_img); - -CRN_EXPORT double compute_block_ssim(uint n, const uint8* pX, const uint8* pY); -CRN_EXPORT double compute_ssim(const image_u8& a, const image_u8& b, int channel_index); -CRN_EXPORT void print_ssim(const image_u8& src_img, const image_u8& dst_img); - -enum conversion_type { - cConversion_Invalid = -1, - - cConversion_To_CCxY, - cConversion_From_CCxY, - - cConversion_To_xGxR, - cConversion_From_xGxR, - - cConversion_To_xGBR, - cConversion_From_xGBR, - - cConversion_To_AGBR, - cConversion_From_AGBR, - - cConversion_XY_to_XYZ, - - cConversion_Y_To_A, - - cConversion_A_To_RGBA, - cConversion_Y_To_RGB, - - cConversion_To_Y, - - cConversionTotal -}; - -CRN_EXPORT void convert_image(image_u8& img, conversion_type conv_type); - -template -inline uint8* pack_image(const image_type& img, const pixel_packer& packer, uint& n) { - n = 0; - - if (!packer.is_valid()) - return NULL; - - const uint width = img.get_width(), height = img.get_height(); - uint dst_pixel_stride = packer.get_pixel_stride(); - uint dst_pitch = width * dst_pixel_stride; - - n = dst_pitch * height; - - uint8* pImage = static_cast(crnlib_malloc(n)); - - uint8* pDst = pImage; - for (uint y = 0; y < height; y++) { - const typename image_type::color_t* pSrc = img.get_scanline(y); - for (uint x = 0; x < width; x++) - pDst = (uint8*)packer.pack(*pSrc++, pDst); - } - - return pImage; -} - -CRN_EXPORT image_utils::conversion_type get_conversion_type(bool cooking, pixel_format fmt); - -CRN_EXPORT image_utils::conversion_type get_image_conversion_type_from_crn_format(crn_format fmt); - -CRN_EXPORT double compute_std_dev(uint n, const color_quad_u8* pPixels, uint first_channel, uint num_channels); - -CRN_EXPORT uint8* read_image_from_memory(const uint8* pImage, int nSize, int* pWidth, int* pHeight, int* pActualComps, int req_comps, const char* pFilename); - -} // namespace image_utils + CRN_EXPORT double compute_std_dev(uint n, const color_quad_u8* pPixels, uint first_channel, uint num_channels); + CRN_EXPORT uint8* read_image_from_memory(const uint8* pImage, int nSize, int* pWidth, int* pHeight, int* pActualComps, int req_comps, const char* pFilename); + } // namespace image_utils } // namespace crnlib diff --git a/crnlib/crn_prefix_coding.cpp b/crnlib/crn_prefix_coding.cpp index ee86b4e..b07f0dc 100644 --- a/crnlib/crn_prefix_coding.cpp +++ b/crnlib/crn_prefix_coding.cpp @@ -1,5 +1,6 @@ // File: crn_prefix_coding.cpp // See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" #include "crn_prefix_coding.h" //#include "rand.h" @@ -8,306 +9,380 @@ //#define TEST_DECODER_TABLES #endif -namespace crnlib { - -namespace prefix_coding { -bool limit_max_code_size(uint num_syms, uint8* pCodesizes, uint max_code_size) { - const uint cMaxEverCodeSize = 34; - - if ((!num_syms) || (num_syms > cMaxSupportedSyms) || (max_code_size < 1) || (max_code_size > cMaxEverCodeSize)) - return false; - - uint num_codes[cMaxEverCodeSize + 1]; - utils::zero_object(num_codes); - - bool should_limit = false; - - for (uint i = 0; i < num_syms; i++) { - uint c = pCodesizes[i]; - if (c) { - CRNLIB_ASSERT(c <= cMaxEverCodeSize); - - num_codes[c]++; - if (c > max_code_size) - should_limit = true; - } - } - - if (!should_limit) - return true; - - uint ofs = 0; - uint next_sorted_ofs[cMaxEverCodeSize + 1]; - for (uint i = 1; i <= cMaxEverCodeSize; i++) { - next_sorted_ofs[i] = ofs; - ofs += num_codes[i]; - } - - if ((ofs < 2) || (ofs > cMaxSupportedSyms)) - return true; - - if (ofs > (1U << max_code_size)) - return false; - - for (uint i = max_code_size + 1; i <= cMaxEverCodeSize; i++) - num_codes[max_code_size] += num_codes[i]; - - // Technique of adjusting tree to enforce maximum code size from LHArc. - - uint total = 0; - for (uint i = max_code_size; i; --i) - total += (num_codes[i] << (max_code_size - i)); - - if (total == (1U << max_code_size)) - return true; - - do { - num_codes[max_code_size]--; - - uint i; - for (i = max_code_size - 1; i; --i) { - if (!num_codes[i]) - continue; - num_codes[i]--; - num_codes[i + 1] += 2; - break; - } - if (!i) - return false; - - total--; - } while (total != (1U << max_code_size)); - - uint8 new_codesizes[cMaxSupportedSyms]; - uint8* p = new_codesizes; - for (uint i = 1; i <= max_code_size; i++) { - uint n = num_codes[i]; - if (n) { - memset(p, i, n); - p += n; - } - } - - for (uint i = 0; i < num_syms; i++) { - const uint c = pCodesizes[i]; - if (c) { - uint ofs = next_sorted_ofs[c]; - next_sorted_ofs[c] = ofs + 1; - - pCodesizes[i] = static_cast(new_codesizes[ofs]); - } - } - - return true; -} - -bool generate_codes(uint num_syms, const uint8* pCodesizes, uint16* pCodes) { - uint num_codes[cMaxExpectedCodeSize + 1]; - utils::zero_object(num_codes); - - for (uint i = 0; i < num_syms; i++) { - uint c = pCodesizes[i]; - if (c) { - CRNLIB_ASSERT(c <= cMaxExpectedCodeSize); - num_codes[c]++; - } - } - - uint code = 0; - - uint next_code[cMaxExpectedCodeSize + 1]; - next_code[0] = 0; - - for (uint i = 1; i <= cMaxExpectedCodeSize; i++) { - next_code[i] = code; - - code = (code + num_codes[i]) << 1; - } - - if (code != (1 << (cMaxExpectedCodeSize + 1))) { - uint t = 0; - for (uint i = 1; i <= cMaxExpectedCodeSize; i++) { - t += num_codes[i]; - if (t > 1) - return false; - } - } - - for (uint i = 0; i < num_syms; i++) { - uint c = pCodesizes[i]; - if (c) { - CRNLIB_ASSERT(next_code[c] <= cUINT16_MAX); - pCodes[i] = static_cast(next_code[c]++); - - CRNLIB_ASSERT(math::total_bits(pCodes[i]) <= pCodesizes[i]); - } - } - - return true; -} - -bool generate_decoder_tables(uint num_syms, const uint8* pCodesizes, decoder_tables* pTables, uint table_bits) { - uint min_codes[cMaxExpectedCodeSize]; - - if ((!num_syms) || (table_bits > cMaxTableBits)) - return false; - - pTables->m_num_syms = num_syms; - - uint num_codes[cMaxExpectedCodeSize + 1]; - utils::zero_object(num_codes); - - for (uint i = 0; i < num_syms; i++) { - uint c = pCodesizes[i]; - if (c) - num_codes[c]++; - } - - uint sorted_positions[cMaxExpectedCodeSize + 1]; - - uint code = 0; - - uint total_used_syms = 0; - uint max_code_size = 0; - uint min_code_size = UINT_MAX; - for (uint i = 1; i <= cMaxExpectedCodeSize; i++) { - const uint n = num_codes[i]; - - if (!n) - pTables->m_max_codes[i - 1] = 0; //UINT_MAX; - else { - min_code_size = math::minimum(min_code_size, i); - max_code_size = math::maximum(max_code_size, i); - - min_codes[i - 1] = code; - - pTables->m_max_codes[i - 1] = code + n - 1; - pTables->m_max_codes[i - 1] = 1 + ((pTables->m_max_codes[i - 1] << (16 - i)) | ((1 << (16 - i)) - 1)); - - pTables->m_val_ptrs[i - 1] = total_used_syms; - - sorted_positions[i] = total_used_syms; - - code += n; - total_used_syms += n; - } - - code <<= 1; - } - - pTables->m_total_used_syms = total_used_syms; - - if (total_used_syms > pTables->m_cur_sorted_symbol_order_size) { - pTables->m_cur_sorted_symbol_order_size = total_used_syms; - - if (!math::is_power_of_2(total_used_syms)) - pTables->m_cur_sorted_symbol_order_size = math::minimum(num_syms, math::next_pow2(total_used_syms)); - - if (pTables->m_sorted_symbol_order) { - crnlib_delete_array(pTables->m_sorted_symbol_order); - pTables->m_sorted_symbol_order = NULL; - } - - pTables->m_sorted_symbol_order = crnlib_new_array(pTables->m_cur_sorted_symbol_order_size); - } - - pTables->m_min_code_size = static_cast(min_code_size); - pTables->m_max_code_size = static_cast(max_code_size); +namespace crnlib +{ + namespace prefix_coding + { + bool limit_max_code_size(uint num_syms, uint8* pCodesizes, uint max_code_size) + { + const uint cMaxEverCodeSize = 34; + + if ((!num_syms) || (num_syms > cMaxSupportedSyms) || (max_code_size < 1) || (max_code_size > cMaxEverCodeSize)) + { + return false; + } + + uint num_codes[cMaxEverCodeSize + 1]; + utils::zero_object(num_codes); + + bool should_limit = false; + + for (uint i = 0; i < num_syms; i++) + { + uint c = pCodesizes[i]; + if (c) + { + CRNLIB_ASSERT(c <= cMaxEverCodeSize); + + num_codes[c]++; + if (c > max_code_size) + { + should_limit = true; + } + } + } + + if (!should_limit) + { + return true; + } + + uint ofs = 0; + uint next_sorted_ofs[cMaxEverCodeSize + 1]; + for (uint i = 1; i <= cMaxEverCodeSize; i++) + { + next_sorted_ofs[i] = ofs; + ofs += num_codes[i]; + } + + if ((ofs < 2) || (ofs > cMaxSupportedSyms)) + { + return true; + } + + if (ofs > (1U << max_code_size)) + { + return false; + } + + for (uint i = max_code_size + 1; i <= cMaxEverCodeSize; i++) + { + num_codes[max_code_size] += num_codes[i]; + } + + // Technique of adjusting tree to enforce maximum code size from LHArc. + + uint total = 0; + for (uint i = max_code_size; i; --i) + { + total += (num_codes[i] << (max_code_size - i)); + } + + if (total == (1U << max_code_size)) + { + return true; + } + + do + { + num_codes[max_code_size]--; + + uint i; + for (i = max_code_size - 1; i; --i) + { + if (!num_codes[i]) + { + continue; + } + num_codes[i]--; + num_codes[i + 1] += 2; + break; + } + if (!i) + { + return false; + } + + total--; + } + while (total != (1U << max_code_size)); + + uint8 new_codesizes[cMaxSupportedSyms]; + uint8* p = new_codesizes; + for (uint i = 1; i <= max_code_size; i++) + { + uint n = num_codes[i]; + if (n) + { + memset(p, i, n); + p += n; + } + } + + for (uint i = 0; i < num_syms; i++) + { + const uint c = pCodesizes[i]; + if (c) + { + uint ofs = next_sorted_ofs[c]; + next_sorted_ofs[c] = ofs + 1; + + pCodesizes[i] = static_cast(new_codesizes[ofs]); + } + } + + return true; + } - for (uint i = 0; i < num_syms; i++) { - uint c = pCodesizes[i]; - if (c) { - CRNLIB_ASSERT(num_codes[c]); + bool generate_codes(uint num_syms, const uint8* pCodesizes, uint16* pCodes) + { + uint num_codes[cMaxExpectedCodeSize + 1]; + utils::zero_object(num_codes); + + for (uint i = 0; i < num_syms; i++) + { + uint c = pCodesizes[i]; + if (c) + { + CRNLIB_ASSERT(c <= cMaxExpectedCodeSize); + num_codes[c]++; + } + } + + uint code = 0; + + uint next_code[cMaxExpectedCodeSize + 1]; + next_code[0] = 0; + + for (uint i = 1; i <= cMaxExpectedCodeSize; i++) + { + next_code[i] = code; + + code = (code + num_codes[i]) << 1; + } + + if (code != (1 << (cMaxExpectedCodeSize + 1))) + { + uint t = 0; + for (uint i = 1; i <= cMaxExpectedCodeSize; i++) + { + t += num_codes[i]; + if (t > 1) + { + return false; + } + } + } + + for (uint i = 0; i < num_syms; i++) + { + uint c = pCodesizes[i]; + if (c) + { + CRNLIB_ASSERT(next_code[c] <= cUINT16_MAX); + pCodes[i] = static_cast(next_code[c]++); + + CRNLIB_ASSERT(math::total_bits(pCodes[i]) <= pCodesizes[i]); + } + } + + return true; + } - uint sorted_pos = sorted_positions[c]++; + bool generate_decoder_tables(uint num_syms, const uint8* pCodesizes, decoder_tables* pTables, uint table_bits) + { + uint min_codes[cMaxExpectedCodeSize]; - CRNLIB_ASSERT(sorted_pos < total_used_syms); + if ((!num_syms) || (table_bits > cMaxTableBits)) + { + return false; + } - pTables->m_sorted_symbol_order[sorted_pos] = static_cast(i); - } - } + pTables->m_num_syms = num_syms; - if (table_bits <= pTables->m_min_code_size) - table_bits = 0; - pTables->m_table_bits = table_bits; + uint num_codes[cMaxExpectedCodeSize + 1]; + utils::zero_object(num_codes); - if (table_bits) { - uint table_size = 1 << table_bits; - if (table_size > pTables->m_cur_lookup_size) { - pTables->m_cur_lookup_size = table_size; + for (uint i = 0; i < num_syms; i++) + { + uint c = pCodesizes[i]; + if (c) + { + num_codes[c]++; + } + } - if (pTables->m_lookup) { - crnlib_delete_array(pTables->m_lookup); - pTables->m_lookup = NULL; - } + uint sorted_positions[cMaxExpectedCodeSize + 1]; - pTables->m_lookup = crnlib_new_array(table_size); - } + uint code = 0; + + uint total_used_syms = 0; + uint max_code_size = 0; + uint min_code_size = UINT_MAX; + for (uint i = 1; i <= cMaxExpectedCodeSize; i++) + { + const uint n = num_codes[i]; - memset(pTables->m_lookup, 0xFF, static_cast(sizeof(pTables->m_lookup[0])) * (1UL << table_bits)); + if (!n) + { + pTables->m_max_codes[i - 1] = 0; //UINT_MAX; + } + else + { + min_code_size = math::minimum(min_code_size, i); + max_code_size = math::maximum(max_code_size, i); + + min_codes[i - 1] = code; - for (uint codesize = 1; codesize <= table_bits; codesize++) { - if (!num_codes[codesize]) - continue; + pTables->m_max_codes[i - 1] = code + n - 1; + pTables->m_max_codes[i - 1] = 1 + ((pTables->m_max_codes[i - 1] << (16 - i)) | ((1 << (16 - i)) - 1)); - const uint fillsize = table_bits - codesize; - const uint fillnum = 1 << fillsize; + pTables->m_val_ptrs[i - 1] = total_used_syms; - const uint min_code = min_codes[codesize - 1]; - const uint max_code = pTables->get_unshifted_max_code(codesize); - const uint val_ptr = pTables->m_val_ptrs[codesize - 1]; + sorted_positions[i] = total_used_syms; - for (uint code = min_code; code <= max_code; code++) { - const uint sym_index = pTables->m_sorted_symbol_order[val_ptr + code - min_code]; - CRNLIB_ASSERT(pCodesizes[sym_index] == codesize); + code += n; + total_used_syms += n; + } - for (uint j = 0; j < fillnum; j++) { - const uint t = j + (code << fillsize); + code <<= 1; + } - CRNLIB_ASSERT(t < (1U << table_bits)); + pTables->m_total_used_syms = total_used_syms; - CRNLIB_ASSERT(pTables->m_lookup[t] == cUINT32_MAX); + if (total_used_syms > pTables->m_cur_sorted_symbol_order_size) + { + pTables->m_cur_sorted_symbol_order_size = total_used_syms; - pTables->m_lookup[t] = sym_index | (codesize << 16U); - } - } - } - } - - for (uint i = 0; i < cMaxExpectedCodeSize; i++) - pTables->m_val_ptrs[i] -= min_codes[i]; - - pTables->m_table_max_code = 0; - pTables->m_decode_start_code_size = pTables->m_min_code_size; - - if (table_bits) { - uint i; - for (i = table_bits; i >= 1; i--) { - if (num_codes[i]) { - pTables->m_table_max_code = pTables->m_max_codes[i - 1]; - break; - } - } - if (i >= 1) { - pTables->m_decode_start_code_size = table_bits + 1; - for (uint i = table_bits + 1; i <= max_code_size; i++) { - if (num_codes[i]) { - pTables->m_decode_start_code_size = i; - break; + if (!math::is_power_of_2(total_used_syms)) + { + pTables->m_cur_sorted_symbol_order_size = math::minimum(num_syms, math::next_pow2(total_used_syms)); + } + + if (pTables->m_sorted_symbol_order) + { + crnlib_delete_array(pTables->m_sorted_symbol_order); + pTables->m_sorted_symbol_order = NULL; + } + + pTables->m_sorted_symbol_order = crnlib_new_array(pTables->m_cur_sorted_symbol_order_size); + } + + pTables->m_min_code_size = static_cast(min_code_size); + pTables->m_max_code_size = static_cast(max_code_size); + + for (uint i = 0; i < num_syms; i++) + { + uint c = pCodesizes[i]; + if (c) + { + CRNLIB_ASSERT(num_codes[c]); + + uint sorted_pos = sorted_positions[c]++; + + CRNLIB_ASSERT(sorted_pos < total_used_syms); + + pTables->m_sorted_symbol_order[sorted_pos] = static_cast(i); + } + } + + if (table_bits <= pTables->m_min_code_size) + { + table_bits = 0; + } + pTables->m_table_bits = table_bits; + + if (table_bits) + { + uint table_size = 1 << table_bits; + if (table_size > pTables->m_cur_lookup_size) + { + pTables->m_cur_lookup_size = table_size; + + if (pTables->m_lookup) + { + crnlib_delete_array(pTables->m_lookup); + pTables->m_lookup = NULL; + } + + pTables->m_lookup = crnlib_new_array(table_size); + } + + memset(pTables->m_lookup, 0xFF, static_cast(sizeof(pTables->m_lookup[0])) * (1UL << table_bits)); + + for (uint codesize = 1; codesize <= table_bits; codesize++) + { + if (!num_codes[codesize]) + { + continue; + } + + const uint fillsize = table_bits - codesize; + const uint fillnum = 1 << fillsize; + + const uint min_code = min_codes[codesize - 1]; + const uint max_code = pTables->get_unshifted_max_code(codesize); + const uint val_ptr = pTables->m_val_ptrs[codesize - 1]; + + for (uint code = min_code; code <= max_code; code++) + { + const uint sym_index = pTables->m_sorted_symbol_order[val_ptr + code - min_code]; + CRNLIB_ASSERT(pCodesizes[sym_index] == codesize); + + for (uint j = 0; j < fillnum; j++) + { + const uint t = j + (code << fillsize); + + CRNLIB_ASSERT(t < (1U << table_bits)); + + CRNLIB_ASSERT(pTables->m_lookup[t] == cUINT32_MAX); + + pTables->m_lookup[t] = sym_index | (codesize << 16U); + } + } + } + } + + for (uint i = 0; i < cMaxExpectedCodeSize; i++) + { + pTables->m_val_ptrs[i] -= min_codes[i]; + } + + pTables->m_table_max_code = 0; + pTables->m_decode_start_code_size = pTables->m_min_code_size; + + if (table_bits) + { + uint i; + for (i = table_bits; i >= 1; i--) + { + if (num_codes[i]) + { + pTables->m_table_max_code = pTables->m_max_codes[i - 1]; + break; + } + } + if (i >= 1) + { + pTables->m_decode_start_code_size = table_bits + 1; + for (uint i = table_bits + 1; i <= max_code_size; i++) + { + if (num_codes[i]) + { + pTables->m_decode_start_code_size = i; + break; + } + } + } + } + + // sentinels + pTables->m_max_codes[cMaxExpectedCodeSize] = UINT_MAX; + pTables->m_val_ptrs[cMaxExpectedCodeSize] = 0xFFFFF; + + pTables->m_table_shift = 32 - pTables->m_table_bits; + + return true; } - } - } - } - - // sentinels - pTables->m_max_codes[cMaxExpectedCodeSize] = UINT_MAX; - pTables->m_val_ptrs[cMaxExpectedCodeSize] = 0xFFFFF; - - pTables->m_table_shift = 32 - pTables->m_table_bits; - - return true; -} - -} // namespace prefix_codig - + } // namespace prefix_codig } // namespace crnlib diff --git a/crnlib/crn_prefix_coding.h b/crnlib/crn_prefix_coding.h index c1dbe1b..51adc16 100644 --- a/crnlib/crn_prefix_coding.h +++ b/crnlib/crn_prefix_coding.h @@ -1,105 +1,138 @@ // File: crn_prefix_coding.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once #include "crn_export.h" -namespace crnlib { -namespace prefix_coding { -const uint cMaxExpectedCodeSize = 16; -const uint cMaxSupportedSyms = 8192; -const uint cMaxTableBits = 11; - -CRN_EXPORT bool limit_max_code_size(uint num_syms, uint8* pCodesizes, uint max_code_size); - -CRN_EXPORT bool generate_codes(uint num_syms, const uint8* pCodesizes, uint16* pCodes); - -class decoder_tables { - public: - inline decoder_tables() - : m_table_shift(0), m_table_max_code(0), m_decode_start_code_size(0), m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) { - } - - inline decoder_tables(const decoder_tables& other) - : m_table_shift(0), m_table_max_code(0), m_decode_start_code_size(0), m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) { - *this = other; - } - - decoder_tables& operator=(const decoder_tables& other) { - if (this == &other) - return *this; - - clear(); - - memcpy(this, &other, sizeof(*this)); - - if (other.m_lookup) { - m_lookup = crnlib_new_array(m_cur_lookup_size); - memcpy(m_lookup, other.m_lookup, sizeof(m_lookup[0]) * m_cur_lookup_size); - } - - if (other.m_sorted_symbol_order) { - m_sorted_symbol_order = crnlib_new_array(m_cur_sorted_symbol_order_size); - memcpy(m_sorted_symbol_order, other.m_sorted_symbol_order, sizeof(m_sorted_symbol_order[0]) * m_cur_sorted_symbol_order_size); - } - - return *this; - } - - inline void clear() { - if (m_lookup) { - crnlib_delete_array(m_lookup); - m_lookup = 0; - m_cur_lookup_size = 0; - } - - if (m_sorted_symbol_order) { - crnlib_delete_array(m_sorted_symbol_order); - m_sorted_symbol_order = NULL; - m_cur_sorted_symbol_order_size = 0; - } - } - - inline ~decoder_tables() { - if (m_lookup) - crnlib_delete_array(m_lookup); - - if (m_sorted_symbol_order) - crnlib_delete_array(m_sorted_symbol_order); - } - - // DO NOT use any complex classes here - it is bitwise copied. - - uint m_num_syms; - uint m_total_used_syms; - uint m_table_bits; - uint m_table_shift; - uint m_table_max_code; - uint m_decode_start_code_size; - - uint8 m_min_code_size; - uint8 m_max_code_size; - - uint m_max_codes[cMaxExpectedCodeSize + 1]; - int m_val_ptrs[cMaxExpectedCodeSize + 1]; - - uint m_cur_lookup_size; - uint32* m_lookup; - - uint m_cur_sorted_symbol_order_size; - uint16* m_sorted_symbol_order; - - inline uint get_unshifted_max_code(uint len) const { - CRNLIB_ASSERT((len >= 1) && (len <= cMaxExpectedCodeSize)); - uint k = m_max_codes[len - 1]; - if (!k) - return UINT_MAX; - return (k - 1) >> (16 - len); - } -}; - -CRN_EXPORT bool generate_decoder_tables(uint num_syms, const uint8* pCodesizes, decoder_tables* pTables, uint table_bits); - -} // namespace prefix_coding - +namespace crnlib +{ + namespace prefix_coding + { + const uint cMaxExpectedCodeSize = 16; + const uint cMaxSupportedSyms = 8192; + const uint cMaxTableBits = 11; + + CRN_EXPORT bool limit_max_code_size(uint num_syms, uint8* pCodesizes, uint max_code_size); + + CRN_EXPORT bool generate_codes(uint num_syms, const uint8* pCodesizes, uint16* pCodes); + + class decoder_tables + { + public: + inline decoder_tables(): + m_table_shift(0), + m_table_max_code(0), + m_decode_start_code_size(0), + m_cur_lookup_size(0), + m_lookup(NULL), + m_cur_sorted_symbol_order_size(0), + m_sorted_symbol_order(NULL) + { + } + + inline decoder_tables(const decoder_tables& other): + m_table_shift(0), + m_table_max_code(0), + m_decode_start_code_size(0), + m_cur_lookup_size(0), + m_lookup(NULL), + m_cur_sorted_symbol_order_size(0), + m_sorted_symbol_order(NULL) + { + *this = other; + } + + decoder_tables& operator=(const decoder_tables& other) + { + if (this == &other) + { + return *this; + } + + clear(); + + memcpy(this, &other, sizeof(*this)); + + if (other.m_lookup) + { + m_lookup = crnlib_new_array(m_cur_lookup_size); + memcpy(m_lookup, other.m_lookup, sizeof(m_lookup[0]) * m_cur_lookup_size); + } + + if (other.m_sorted_symbol_order) + { + m_sorted_symbol_order = crnlib_new_array(m_cur_sorted_symbol_order_size); + memcpy(m_sorted_symbol_order, other.m_sorted_symbol_order, sizeof(m_sorted_symbol_order[0]) * m_cur_sorted_symbol_order_size); + } + + return *this; + } + + inline void clear() + { + if (m_lookup) + { + crnlib_delete_array(m_lookup); + m_lookup = 0; + m_cur_lookup_size = 0; + } + + if (m_sorted_symbol_order) + { + crnlib_delete_array(m_sorted_symbol_order); + m_sorted_symbol_order = NULL; + m_cur_sorted_symbol_order_size = 0; + } + } + + inline ~decoder_tables() + { + if (m_lookup) + { + crnlib_delete_array(m_lookup); + } + + if (m_sorted_symbol_order) + { + crnlib_delete_array(m_sorted_symbol_order); + } + } + + // DO NOT use any complex classes here - it is bitwise copied. + + uint m_num_syms; + uint m_total_used_syms; + uint m_table_bits; + uint m_table_shift; + uint m_table_max_code; + uint m_decode_start_code_size; + + uint8 m_min_code_size; + uint8 m_max_code_size; + + uint m_max_codes[cMaxExpectedCodeSize + 1]; + int m_val_ptrs[cMaxExpectedCodeSize + 1]; + + uint m_cur_lookup_size; + uint32* m_lookup; + + uint m_cur_sorted_symbol_order_size; + uint16* m_sorted_symbol_order; + + inline uint get_unshifted_max_code(uint len) const + { + CRNLIB_ASSERT((len >= 1) && (len <= cMaxExpectedCodeSize)); + uint k = m_max_codes[len - 1]; + if (!k) + { + return UINT_MAX; + } + return (k - 1) >> (16 - len); + } + }; + + CRN_EXPORT bool generate_decoder_tables(uint num_syms, const uint8* pCodesizes, decoder_tables* pTables, uint table_bits); + + } // namespace prefix_coding } // namespace crnlib diff --git a/crnlib/crn_radix_sort.h b/crnlib/crn_radix_sort.h index 093ae67..15d4dbe 100644 --- a/crnlib/crn_radix_sort.h +++ b/crnlib/crn_radix_sort.h @@ -1,299 +1,349 @@ // File: crn_radix_sort.h +// File: crn_radix_sort.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once -namespace crnlib { -// Returns pointer to sorted array. -template -T* radix_sort(uint num_vals, T* pBuf0, T* pBuf1, uint key_ofs, uint key_size) { - CRNLIB_ASSERT_OPEN_RANGE(key_ofs, 0, sizeof(T)); - CRNLIB_ASSERT_CLOSED_RANGE(key_size, 1, 4); +namespace crnlib +{ + // Returns pointer to sorted array. + template + T* radix_sort(uint num_vals, T* pBuf0, T* pBuf1, uint key_ofs, uint key_size) + { + CRNLIB_ASSERT_OPEN_RANGE(key_ofs, 0, sizeof(T)); + CRNLIB_ASSERT_CLOSED_RANGE(key_size, 1, 4); - uint hist[256 * 4]; + uint hist[256 * 4]; - memset(hist, 0, sizeof(hist[0]) * 256 * key_size); + memset(hist, 0, sizeof(hist[0]) * 256 * key_size); #define CRNLIB_GET_KEY(p) (*(uint*)((uint8*)(p) + key_ofs)) - if (key_size == 4) { - T* p = pBuf0; - T* q = pBuf0 + num_vals; - for (; p != q; p++) { - const uint key = CRNLIB_GET_KEY(p); - - hist[key & 0xFF]++; - hist[256 + ((key >> 8) & 0xFF)]++; - hist[512 + ((key >> 16) & 0xFF)]++; - hist[768 + ((key >> 24) & 0xFF)]++; - } - } else if (key_size == 3) { - T* p = pBuf0; - T* q = pBuf0 + num_vals; - for (; p != q; p++) { - const uint key = CRNLIB_GET_KEY(p); - - hist[key & 0xFF]++; - hist[256 + ((key >> 8) & 0xFF)]++; - hist[512 + ((key >> 16) & 0xFF)]++; - } - } else if (key_size == 2) { - T* p = pBuf0; - T* q = pBuf0 + (num_vals >> 1) * 2; - - for (; p != q; p += 2) { - const uint key0 = CRNLIB_GET_KEY(p); - const uint key1 = CRNLIB_GET_KEY(p + 1); - - hist[key0 & 0xFF]++; - hist[256 + ((key0 >> 8) & 0xFF)]++; - - hist[key1 & 0xFF]++; - hist[256 + ((key1 >> 8) & 0xFF)]++; - } - - if (num_vals & 1) { - const uint key = CRNLIB_GET_KEY(p); - - hist[key & 0xFF]++; - hist[256 + ((key >> 8) & 0xFF)]++; - } - } else { - CRNLIB_ASSERT(key_size == 1); - if (key_size != 1) - return NULL; - - T* p = pBuf0; - T* q = pBuf0 + (num_vals >> 1) * 2; - - for (; p != q; p += 2) { - const uint key0 = CRNLIB_GET_KEY(p); - const uint key1 = CRNLIB_GET_KEY(p + 1); - - hist[key0 & 0xFF]++; - hist[key1 & 0xFF]++; - } - - if (num_vals & 1) { - const uint key = CRNLIB_GET_KEY(p); - hist[key & 0xFF]++; - } - } - - T* pCur = pBuf0; - T* pNew = pBuf1; - - for (uint pass = 0; pass < key_size; pass++) { - const uint* pHist = &hist[pass << 8]; - - uint offsets[256]; - - uint cur_ofs = 0; - for (uint i = 0; i < 256; i += 2) { - offsets[i] = cur_ofs; - cur_ofs += pHist[i]; - - offsets[i + 1] = cur_ofs; - cur_ofs += pHist[i + 1]; + if (key_size == 4) + { + T* p = pBuf0; + T* q = pBuf0 + num_vals; + for (; p != q; p++) + { + const uint key = CRNLIB_GET_KEY(p); + + hist[key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + hist[512 + ((key >> 16) & 0xFF)]++; + hist[768 + ((key >> 24) & 0xFF)]++; + } + } + else if (key_size == 3) + { + T* p = pBuf0; + T* q = pBuf0 + num_vals; + for (; p != q; p++) + { + const uint key = CRNLIB_GET_KEY(p); + + hist[key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + hist[512 + ((key >> 16) & 0xFF)]++; + } + } + else if (key_size == 2) + { + T* p = pBuf0; + T* q = pBuf0 + (num_vals >> 1) * 2; + + for (; p != q; p += 2) + { + const uint key0 = CRNLIB_GET_KEY(p); + const uint key1 = CRNLIB_GET_KEY(p + 1); + + hist[key0 & 0xFF]++; + hist[256 + ((key0 >> 8) & 0xFF)]++; + + hist[key1 & 0xFF]++; + hist[256 + ((key1 >> 8) & 0xFF)]++; + } + + if (num_vals & 1) + { + const uint key = CRNLIB_GET_KEY(p); + + hist[key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + } + } + else + { + CRNLIB_ASSERT(key_size == 1); + if (key_size != 1) + { + return NULL; + } + + T* p = pBuf0; + T* q = pBuf0 + (num_vals >> 1) * 2; + + for (; p != q; p += 2) + { + const uint key0 = CRNLIB_GET_KEY(p); + const uint key1 = CRNLIB_GET_KEY(p + 1); + + hist[key0 & 0xFF]++; + hist[key1 & 0xFF]++; + } + + if (num_vals & 1) + { + const uint key = CRNLIB_GET_KEY(p); + hist[key & 0xFF]++; + } + } + + T* pCur = pBuf0; + T* pNew = pBuf1; + + for (uint pass = 0; pass < key_size; pass++) + { + const uint* pHist = &hist[pass << 8]; + + uint offsets[256]; + + uint cur_ofs = 0; + for (uint i = 0; i < 256; i += 2) + { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + + offsets[i + 1] = cur_ofs; + cur_ofs += pHist[i + 1]; + } + + const uint pass_shift = pass << 3; + + T* p = pCur; + T* q = pCur + (num_vals >> 1) * 2; + + for (; p != q; p += 2) + { + uint c0 = (CRNLIB_GET_KEY(p) >> pass_shift) & 0xFF; + uint c1 = (CRNLIB_GET_KEY(p + 1) >> pass_shift) & 0xFF; + + if (c0 == c1) + { + uint dst_offset0 = offsets[c0]; + + offsets[c0] = dst_offset0 + 2; + + pNew[dst_offset0] = p[0]; + pNew[dst_offset0 + 1] = p[1]; + } + else + { + uint dst_offset0 = offsets[c0]++; + uint dst_offset1 = offsets[c1]++; + + pNew[dst_offset0] = p[0]; + pNew[dst_offset1] = p[1]; + } + } + + if (num_vals & 1) + { + uint c = (CRNLIB_GET_KEY(p) >> pass_shift) & 0xFF; + + uint dst_offset = offsets[c]; + offsets[c] = dst_offset + 1; + + pNew[dst_offset] = *p; + } + + T* t = pCur; + pCur = pNew; + pNew = t; + } + + return pCur; } - const uint pass_shift = pass << 3; - - T* p = pCur; - T* q = pCur + (num_vals >> 1) * 2; - - for (; p != q; p += 2) { - uint c0 = (CRNLIB_GET_KEY(p) >> pass_shift) & 0xFF; - uint c1 = (CRNLIB_GET_KEY(p + 1) >> pass_shift) & 0xFF; - - if (c0 == c1) { - uint dst_offset0 = offsets[c0]; - - offsets[c0] = dst_offset0 + 2; - - pNew[dst_offset0] = p[0]; - pNew[dst_offset0 + 1] = p[1]; - } else { - uint dst_offset0 = offsets[c0]++; - uint dst_offset1 = offsets[c1]++; - - pNew[dst_offset0] = p[0]; - pNew[dst_offset1] = p[1]; - } - } - - if (num_vals & 1) { - uint c = (CRNLIB_GET_KEY(p) >> pass_shift) & 0xFF; - - uint dst_offset = offsets[c]; - offsets[c] = dst_offset + 1; - - pNew[dst_offset] = *p; - } - - T* t = pCur; - pCur = pNew; - pNew = t; - } - - return pCur; -} - #undef CRNLIB_GET_KEY -// Returns pointer to sorted array. -template -T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices) { - CRNLIB_ASSERT_OPEN_RANGE(key_ofs, 0, sizeof(T)); - CRNLIB_ASSERT_CLOSED_RANGE(key_size, 1, 4); - - if (init_indices) { - T* p = pIndices0; - T* q = pIndices0 + (num_indices >> 1) * 2; - uint i; - for (i = 0; p != q; p += 2, i += 2) { - p[0] = static_cast(i); - p[1] = static_cast(i + 1); - } + // Returns pointer to sorted array. + template + T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices) + { + CRNLIB_ASSERT_OPEN_RANGE(key_ofs, 0, sizeof(T)); + CRNLIB_ASSERT_CLOSED_RANGE(key_size, 1, 4); + + if (init_indices) + { + T* p = pIndices0; + T* q = pIndices0 + (num_indices >> 1) * 2; + uint i; + for (i = 0; p != q; p += 2, i += 2) + { + p[0] = static_cast(i); + p[1] = static_cast(i + 1); + } - if (num_indices & 1) - *p = static_cast(i); - } + if (num_indices & 1) + *p = static_cast(i); + } - uint hist[256 * 4]; + uint hist[256 * 4]; - memset(hist, 0, sizeof(hist[0]) * 256 * key_size); + memset(hist, 0, sizeof(hist[0]) * 256 * key_size); #define CRNLIB_GET_KEY(p) (*(const uint*)((const uint8*)(pKeys + *(p)) + key_ofs)) #define CRNLIB_GET_KEY_FROM_INDEX(i) (*(const uint*)((const uint8*)(pKeys + (i)) + key_ofs)) - if (key_size == 4) { - T* p = pIndices0; - T* q = pIndices0 + num_indices; - for (; p != q; p++) { - const uint key = CRNLIB_GET_KEY(p); - - hist[key & 0xFF]++; - hist[256 + ((key >> 8) & 0xFF)]++; - hist[512 + ((key >> 16) & 0xFF)]++; - hist[768 + ((key >> 24) & 0xFF)]++; - } - } else if (key_size == 3) { - T* p = pIndices0; - T* q = pIndices0 + num_indices; - for (; p != q; p++) { - const uint key = CRNLIB_GET_KEY(p); - - hist[key & 0xFF]++; - hist[256 + ((key >> 8) & 0xFF)]++; - hist[512 + ((key >> 16) & 0xFF)]++; - } - } else if (key_size == 2) { - T* p = pIndices0; - T* q = pIndices0 + (num_indices >> 1) * 2; - - for (; p != q; p += 2) { - const uint key0 = CRNLIB_GET_KEY(p); - const uint key1 = CRNLIB_GET_KEY(p + 1); - - hist[key0 & 0xFF]++; - hist[256 + ((key0 >> 8) & 0xFF)]++; - - hist[key1 & 0xFF]++; - hist[256 + ((key1 >> 8) & 0xFF)]++; + if (key_size == 4) + { + T* p = pIndices0; + T* q = pIndices0 + num_indices; + for (; p != q; p++) + { + const uint key = CRNLIB_GET_KEY(p); + + hist[key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + hist[512 + ((key >> 16) & 0xFF)]++; + hist[768 + ((key >> 24) & 0xFF)]++; + } + } + else if (key_size == 3) + { + T* p = pIndices0; + T* q = pIndices0 + num_indices; + for (; p != q; p++) + { + const uint key = CRNLIB_GET_KEY(p); + + hist[key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + hist[512 + ((key >> 16) & 0xFF)]++; + } + } + else if (key_size == 2) + { + T* p = pIndices0; + T* q = pIndices0 + (num_indices >> 1) * 2; + + for (; p != q; p += 2) + { + const uint key0 = CRNLIB_GET_KEY(p); + const uint key1 = CRNLIB_GET_KEY(p + 1); + + hist[key0 & 0xFF]++; + hist[256 + ((key0 >> 8) & 0xFF)]++; + + hist[key1 & 0xFF]++; + hist[256 + ((key1 >> 8) & 0xFF)]++; + } + + if (num_indices & 1) + { + const uint key = CRNLIB_GET_KEY(p); + + hist[key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + } + } + else + { + CRNLIB_ASSERT(key_size == 1); + if (key_size != 1) + { + return NULL; + } + + T* p = pIndices0; + T* q = pIndices0 + (num_indices >> 1) * 2; + + for (; p != q; p += 2) + { + const uint key0 = CRNLIB_GET_KEY(p); + const uint key1 = CRNLIB_GET_KEY(p + 1); + + hist[key0 & 0xFF]++; + hist[key1 & 0xFF]++; + } + + if (num_indices & 1) + { + const uint key = CRNLIB_GET_KEY(p); + + hist[key & 0xFF]++; + } + } + + T* pCur = pIndices0; + T* pNew = pIndices1; + + for (uint pass = 0; pass < key_size; pass++) + { + const uint* pHist = &hist[pass << 8]; + + uint offsets[256]; + + uint cur_ofs = 0; + for (uint i = 0; i < 256; i += 2) + { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + + offsets[i + 1] = cur_ofs; + cur_ofs += pHist[i + 1]; + } + + const uint pass_shift = pass << 3; + + T* p = pCur; + T* q = pCur + (num_indices >> 1) * 2; + + for (; p != q; p += 2) + { + uint index0 = p[0]; + uint index1 = p[1]; + + uint c0 = (CRNLIB_GET_KEY_FROM_INDEX(index0) >> pass_shift) & 0xFF; + uint c1 = (CRNLIB_GET_KEY_FROM_INDEX(index1) >> pass_shift) & 0xFF; + + if (c0 == c1) + { + uint dst_offset0 = offsets[c0]; + + offsets[c0] = dst_offset0 + 2; + + pNew[dst_offset0] = static_cast(index0); + pNew[dst_offset0 + 1] = static_cast(index1); + } + else + { + uint dst_offset0 = offsets[c0]++; + uint dst_offset1 = offsets[c1]++; + + pNew[dst_offset0] = static_cast(index0); + pNew[dst_offset1] = static_cast(index1); + } + } + + if (num_indices & 1) + { + uint index = *p; + uint c = (CRNLIB_GET_KEY_FROM_INDEX(index) >> pass_shift) & 0xFF; + + uint dst_offset = offsets[c]; + offsets[c] = dst_offset + 1; + + pNew[dst_offset] = static_cast(index); + } + + T* t = pCur; + pCur = pNew; + pNew = t; + } + + return pCur; } - if (num_indices & 1) { - const uint key = CRNLIB_GET_KEY(p); - - hist[key & 0xFF]++; - hist[256 + ((key >> 8) & 0xFF)]++; - } - } else { - CRNLIB_ASSERT(key_size == 1); - if (key_size != 1) - return NULL; - - T* p = pIndices0; - T* q = pIndices0 + (num_indices >> 1) * 2; - - for (; p != q; p += 2) { - const uint key0 = CRNLIB_GET_KEY(p); - const uint key1 = CRNLIB_GET_KEY(p + 1); - - hist[key0 & 0xFF]++; - hist[key1 & 0xFF]++; - } - - if (num_indices & 1) { - const uint key = CRNLIB_GET_KEY(p); - - hist[key & 0xFF]++; - } - } - - T* pCur = pIndices0; - T* pNew = pIndices1; - - for (uint pass = 0; pass < key_size; pass++) { - const uint* pHist = &hist[pass << 8]; - - uint offsets[256]; - - uint cur_ofs = 0; - for (uint i = 0; i < 256; i += 2) { - offsets[i] = cur_ofs; - cur_ofs += pHist[i]; - - offsets[i + 1] = cur_ofs; - cur_ofs += pHist[i + 1]; - } - - const uint pass_shift = pass << 3; - - T* p = pCur; - T* q = pCur + (num_indices >> 1) * 2; - - for (; p != q; p += 2) { - uint index0 = p[0]; - uint index1 = p[1]; - - uint c0 = (CRNLIB_GET_KEY_FROM_INDEX(index0) >> pass_shift) & 0xFF; - uint c1 = (CRNLIB_GET_KEY_FROM_INDEX(index1) >> pass_shift) & 0xFF; - - if (c0 == c1) { - uint dst_offset0 = offsets[c0]; - - offsets[c0] = dst_offset0 + 2; - - pNew[dst_offset0] = static_cast(index0); - pNew[dst_offset0 + 1] = static_cast(index1); - } else { - uint dst_offset0 = offsets[c0]++; - uint dst_offset1 = offsets[c1]++; - - pNew[dst_offset0] = static_cast(index0); - pNew[dst_offset1] = static_cast(index1); - } - } - - if (num_indices & 1) { - uint index = *p; - uint c = (CRNLIB_GET_KEY_FROM_INDEX(index) >> pass_shift) & 0xFF; - - uint dst_offset = offsets[c]; - offsets[c] = dst_offset + 1; - - pNew[dst_offset] = static_cast(index); - } - - T* t = pCur; - pCur = pNew; - pNew = t; - } - - return pCur; -} - #undef CRNLIB_GET_KEY #undef CRNLIB_GET_KEY_FROM_INDEX diff --git a/crnlib/crn_strutils.h b/crnlib/crn_strutils.h index a9e2dae..cfefeb2 100644 --- a/crnlib/crn_strutils.h +++ b/crnlib/crn_strutils.h @@ -1,5 +1,6 @@ // File: crn_strutils.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once #include "crn_export.h" diff --git a/crnlib/crn_tree_clusterizer.h b/crnlib/crn_tree_clusterizer.h index bc94b9a..a6d72d9 100644 --- a/crnlib/crn_tree_clusterizer.h +++ b/crnlib/crn_tree_clusterizer.h @@ -1,599 +1,735 @@ // File: crn_tree_clusterizer.h // See Copyright Notice and license at the end of inc/crnlib.h -#pragma once -#include "crn_matrix.h" -#include "crn_threading.h" -#include - -namespace crnlib { -template -class tree_clusterizer { - public: - tree_clusterizer() {} - - struct VectorInfo { - uint index; - uint weight; - }; - - struct NodeInfo { - uint m_index; - float m_variance; - NodeInfo (uint index, float variance) : m_index(index), m_variance(variance) {} - bool operator<(const NodeInfo& other) const { - return m_index < other.m_index ? m_variance < other.m_variance : !(other.m_variance < m_variance); - } - }; - - struct split_alternative_node_task_params { - uint main_node; - uint alternative_node; - uint max_splits; - }; - - void split_alternative_node_task(uint64, void* pData_ptr) { - split_alternative_node_task_params* pParams = (split_alternative_node_task_params*)pData_ptr; - std::priority_queue node_queue; - uint begin_node = pParams->alternative_node, end_node = begin_node, splits = 0; - - m_nodes[end_node] = m_nodes[pParams->main_node]; - node_queue.push(NodeInfo(end_node, m_nodes[end_node].m_variance)); - end_node++; - splits++; - - while (splits < pParams->max_splits && split_node(node_queue, end_node)) - splits++; - - m_nodes[pParams->main_node] = m_nodes[pParams->alternative_node]; - m_nodes[pParams->main_node].m_alternative = true; - } - - - void generate_codebook(VectorType* vectors, uint* weights, uint size, uint max_splits, bool generate_node_index_map = false, task_pool* pTask_pool = 0) { - m_vectors = vectors; - m_vectorsInfo.resize(size); - m_weightedVectors.resize(size); - m_weightedDotProducts.resize(size); - m_vectorsInfoLeft.resize(size); - m_vectorsInfoRight.resize(size); - m_vectorComparison.resize(size); - m_nodes.resize(max_splits << 2); - m_codebook.clear(); - uint num_tasks = pTask_pool ? pTask_pool->get_num_threads() + 1 : 1; - - vq_node root; - root.m_begin = 0; - root.m_end = size; - double ttsum = 0.0f; - for (uint i = 0; i < m_vectorsInfo.size(); i++) { - const VectorType& v = vectors[i]; - m_vectorsInfo[i].index = i; - const uint weight = m_vectorsInfo[i].weight = weights[i]; - m_weightedVectors[i] = v * (float)weight; - root.m_centroid += m_weightedVectors[i]; - root.m_total_weight += weight; - m_weightedDotProducts[i] = v.dot(v) * weight; - ttsum += m_weightedDotProducts[i]; - } - root.m_variance = (float)(ttsum - (root.m_centroid.dot(root.m_centroid) / root.m_total_weight)); - root.m_centroid *= (1.0f / root.m_total_weight); - - std::priority_queue node_queue; - uint begin_node = 0, end_node = begin_node, splits = 0; - m_nodes[end_node] = root; - node_queue.push(NodeInfo(end_node, root.m_variance)); - end_node++; - splits++; - - if (num_tasks > 1) { - while (splits < max_splits && node_queue.size() != num_tasks && split_node(node_queue, end_node, pTask_pool)) - splits++; - if (node_queue.size() == num_tasks) { - std::priority_queue alternative_node_queue = node_queue; - uint alternative_node = max_splits << 1, alternative_max_splits = max_splits / num_tasks; - crnlib::vector params(num_tasks); - for (uint task = 0; !alternative_node_queue.empty(); alternative_node_queue.pop(), alternative_node += alternative_max_splits << 1, task++) { - params[task].main_node = alternative_node_queue.top().m_index; - params[task].alternative_node = alternative_node; - params[task].max_splits = alternative_max_splits; - pTask_pool->queue_object_task(this, &tree_clusterizer::split_alternative_node_task, task, ¶ms[task]); - } - pTask_pool->join(); - } - } - - while (splits < max_splits && split_node(node_queue, end_node, pTask_pool)) - splits++; - - for (uint i = begin_node; i < end_node; i++) { - vq_node& node = m_nodes[i]; - if (!node.m_alternative && node.m_left != -1) - continue; - node.m_codebook_index = m_codebook.size(); - m_codebook.push_back(node.m_centroid); - if (generate_node_index_map) { - for (uint j = node.m_begin; j < node.m_end; j++) - m_node_index_map.insert(std::make_pair(m_vectors[m_vectorsInfo[j].index], node.m_codebook_index)); - } - } - } - - inline uint get_node_index(const VectorType& v) { - return m_node_index_map.find(v)->second; - } - - inline uint get_codebook_size() const { - return m_codebook.size(); - } - - inline const VectorType& get_codebook_entry(uint index) const { - return m_codebook[index]; - } - - typedef crnlib::vector vector_vec_type; - inline const vector_vec_type& get_codebook() const { - return m_codebook; - } - - private: - VectorType* m_vectors; - crnlib::vector m_weightedVectors; - crnlib::vector m_weightedDotProducts; - crnlib::vector m_vectorsInfo, m_vectorsInfoLeft, m_vectorsInfoRight; - crnlib::vector m_vectorComparison; - crnlib::hash_map m_node_index_map; - - struct vq_node { - vq_node() - : m_centroid(cClear), m_total_weight(0), m_left(-1), m_right(-1), m_codebook_index(-1), m_unsplittable(false), m_alternative(false), m_processed(false) {} - - VectorType m_centroid; - uint64 m_total_weight; - - float m_variance; - - uint m_begin; - uint m_end; - - int m_left; - int m_right; - - int m_codebook_index; - - bool m_unsplittable; - bool m_alternative; - bool m_processed; - }; - - typedef crnlib::vector node_vec_type; - - node_vec_type m_nodes; - - vector_vec_type m_codebook; - - struct distance_comparison_task_params { - VectorType* left_child; - VectorType* right_child; - uint begin; - uint end; - uint num_tasks; - }; - - void distance_comparison_task(uint64 data, void* pData_ptr) { - distance_comparison_task_params* pParams = (distance_comparison_task_params*)pData_ptr; - const VectorType& left_child = *pParams->left_child; - const VectorType& right_child = *pParams->right_child; - uint begin = pParams->begin + (pParams->end - pParams->begin) * data / pParams->num_tasks; - uint end = pParams->begin + (pParams->end - pParams->begin) * (data + 1) / pParams->num_tasks; - for (uint i = begin; i < end; i++) { - const VectorType& v = m_vectors[m_vectorsInfo[i].index]; - double left_dist2 = left_child.squared_distance(v); - double right_dist2 = right_child.squared_distance(v); - m_vectorComparison[i] = left_dist2 < right_dist2; - } - } - - bool split_node(std::priority_queue& node_queue, uint& end_node, task_pool* pTask_pool = 0) { - if (node_queue.empty()) - return false; - - vq_node& parent_node = m_nodes[node_queue.top().m_index]; - - if (parent_node.m_alternative) - parent_node.m_alternative = false; - - if (parent_node.m_variance <= 0.0f || parent_node.m_begin + 1 == parent_node.m_end) - return false; - - node_queue.pop(); - - if (parent_node.m_processed) { - if (!parent_node.m_unsplittable) { - m_nodes[end_node] = m_nodes[parent_node.m_left]; - m_nodes[end_node].m_alternative = true; - node_queue.push(NodeInfo(end_node, m_nodes[end_node].m_variance)); - parent_node.m_left = end_node++; - m_nodes[end_node] = m_nodes[parent_node.m_right]; - m_nodes[end_node].m_alternative = true; - node_queue.push(NodeInfo(end_node, m_nodes[end_node].m_variance)); - parent_node.m_right = end_node++; - } - return true; - } - parent_node.m_processed = true; - - uint num_blocks = (parent_node.m_end - parent_node.m_begin) >> 9; - uint num_tasks = num_blocks > 1 && pTask_pool ? math::minimum(num_blocks, pTask_pool->get_num_threads() + 1) : 1; - VectorType furthest(0); - double furthest_dist = -1.0f; - - for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) { - const VectorType& v = m_vectors[m_vectorsInfo[i].index]; - double dist = v.squared_distance(parent_node.m_centroid); - if (dist > furthest_dist) { - furthest_dist = dist; - furthest = v; - } - } - - VectorType opposite; - double opposite_dist = -1.0f; - - for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) { - const VectorType& v = m_vectors[m_vectorsInfo[i].index]; - double dist = v.squared_distance(furthest); - if (dist > opposite_dist) { - opposite_dist = dist; - opposite = v; - } - } +#pragma once - VectorType left_child((furthest + parent_node.m_centroid) * .5f); - VectorType right_child((opposite + parent_node.m_centroid) * .5f); +#include - if (parent_node.m_begin + 2 < parent_node.m_end) { - const uint N = VectorType::num_elements; +#include "crn_matrix.h" +#include "crn_threading.h" - matrix covar; - covar.clear(); - for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) { - const VectorType& v = m_vectors[m_vectorsInfo[i].index] - parent_node.m_centroid; - const VectorType w = v * (float)m_vectorsInfo[i].weight; - for (uint x = 0; x < N; x++) { - for (uint y = x; y < N; y++) - covar[x][y] = covar[x][y] + v[x] * w[y]; +namespace crnlib +{ + template + class tree_clusterizer + { + public: + tree_clusterizer() + { } - } - float divider = (float)parent_node.m_total_weight; - for (uint x = 0; x < N; x++) { - for (uint y = x; y < N; y++) { - covar[x][y] /= divider; - covar[y][x] = covar[x][y]; + struct VectorInfo + { + uint index; + uint weight; + }; + + struct NodeInfo + { + uint m_index; + float m_variance; + NodeInfo(uint index, float variance) : + m_index(index), + m_variance(variance) + { + } + bool operator<(const NodeInfo& other) const + { + return m_index < other.m_index ? m_variance < other.m_variance : !(other.m_variance < m_variance); + } + }; + + struct split_alternative_node_task_params + { + uint main_node; + uint alternative_node; + uint max_splits; + }; + + void split_alternative_node_task(uint64, void* pData_ptr) + { + split_alternative_node_task_params* pParams = (split_alternative_node_task_params*)pData_ptr; + std::priority_queue node_queue; + uint begin_node = pParams->alternative_node, end_node = begin_node, splits = 0; + + m_nodes[end_node] = m_nodes[pParams->main_node]; + node_queue.push(NodeInfo(end_node, m_nodes[end_node].m_variance)); + end_node++; + splits++; + + while (splits < pParams->max_splits && split_node(node_queue, end_node)) + { + splits++; + } + + m_nodes[pParams->main_node] = m_nodes[pParams->alternative_node]; + m_nodes[pParams->main_node].m_alternative = true; } - } - - VectorType axis(1.0f); - // Starting with an estimate of the principle axis should work better, but doesn't in practice? - //left_child - right_child); - //axis.normalize(); - - for (uint iter = 0; iter < 10; iter++) { - VectorType x; - - double max_sum = 0; - - for (uint i = 0; i < N; i++) { - double sum = 0; - - for (uint j = 0; j < N; j++) - sum += axis[j] * covar[i][j]; - x[i] = (float)sum; - max_sum = i ? math::maximum(max_sum, sum) : sum; + void generate_codebook(VectorType* vectors, uint* weights, uint size, uint max_splits, bool generate_node_index_map = false, task_pool* pTask_pool = 0) + { + m_vectors = vectors; + m_vectorsInfo.resize(size); + m_weightedVectors.resize(size); + m_weightedDotProducts.resize(size); + m_vectorsInfoLeft.resize(size); + m_vectorsInfoRight.resize(size); + m_vectorComparison.resize(size); + m_nodes.resize(max_splits << 2); + m_codebook.clear(); + uint num_tasks = pTask_pool ? pTask_pool->get_num_threads() + 1 : 1; + + vq_node root; + root.m_begin = 0; + root.m_end = size; + double ttsum = 0.0f; + for (uint i = 0; i < m_vectorsInfo.size(); i++) + { + const VectorType& v = vectors[i]; + m_vectorsInfo[i].index = i; + const uint weight = m_vectorsInfo[i].weight = weights[i]; + m_weightedVectors[i] = v * (float)weight; + root.m_centroid += m_weightedVectors[i]; + root.m_total_weight += weight; + m_weightedDotProducts[i] = v.dot(v) * weight; + ttsum += m_weightedDotProducts[i]; + } + root.m_variance = (float)(ttsum - (root.m_centroid.dot(root.m_centroid) / root.m_total_weight)); + root.m_centroid *= (1.0f / root.m_total_weight); + + std::priority_queue node_queue; + uint begin_node = 0, end_node = begin_node, splits = 0; + m_nodes[end_node] = root; + node_queue.push(NodeInfo(end_node, root.m_variance)); + end_node++; + splits++; + + if (num_tasks > 1) + { + while (splits < max_splits && node_queue.size() != num_tasks && split_node(node_queue, end_node, pTask_pool)) + { + splits++; + } + if (node_queue.size() == num_tasks) + { + std::priority_queue alternative_node_queue = node_queue; + uint alternative_node = max_splits << 1, alternative_max_splits = max_splits / num_tasks; + crnlib::vector params(num_tasks); + for (uint task = 0; !alternative_node_queue.empty(); alternative_node_queue.pop(), alternative_node += alternative_max_splits << 1, task++) + { + params[task].main_node = alternative_node_queue.top().m_index; + params[task].alternative_node = alternative_node; + params[task].max_splits = alternative_max_splits; + pTask_pool->queue_object_task(this, &tree_clusterizer::split_alternative_node_task, task, ¶ms[task]); + } + pTask_pool->join(); + } + } + + while (splits < max_splits && split_node(node_queue, end_node, pTask_pool)) + { + splits++; + } + + for (uint i = begin_node; i < end_node; i++) + { + vq_node& node = m_nodes[i]; + if (!node.m_alternative && node.m_left != -1) + { + continue; + } + node.m_codebook_index = m_codebook.size(); + m_codebook.push_back(node.m_centroid); + if (generate_node_index_map) + { + for (uint j = node.m_begin; j < node.m_end; j++) + { + m_node_index_map.insert(std::make_pair(m_vectors[m_vectorsInfo[j].index], node.m_codebook_index)); + } + } + } } - if (max_sum != 0.0f) - x *= (float)(1.0f / max_sum); - - axis = x; - } - - axis.normalize(); + inline uint get_node_index(const VectorType& v) + { + return m_node_index_map.find(v)->second; + } - VectorType new_left_child(0.0f); - VectorType new_right_child(0.0f); + inline uint get_codebook_size() const + { + return m_codebook.size(); + } - double left_weight = 0.0f; - double right_weight = 0.0f; + inline const VectorType& get_codebook_entry(uint index) const + { + return m_codebook[index]; + } - for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) { - const VectorInfo& vectorInfo = m_vectorsInfo[i]; - const float weight = (float)vectorInfo.weight; - double t = (m_vectors[vectorInfo.index] - parent_node.m_centroid) * axis; - if (t < 0.0f) { - new_left_child += m_weightedVectors[vectorInfo.index]; - left_weight += weight; - } else { - new_right_child += m_weightedVectors[vectorInfo.index]; - right_weight += weight; + typedef crnlib::vector vector_vec_type; + inline const vector_vec_type& get_codebook() const + { + return m_codebook; } - } - if ((left_weight > 0.0f) && (right_weight > 0.0f)) { - left_child = new_left_child * (float)(1.0f / left_weight); - right_child = new_right_child * (float)(1.0f / right_weight); - } - } + private: + VectorType* m_vectors; + crnlib::vector m_weightedVectors; + crnlib::vector m_weightedDotProducts; + crnlib::vector m_vectorsInfo, m_vectorsInfoLeft, m_vectorsInfoRight; + crnlib::vector m_vectorComparison; + crnlib::hash_map m_node_index_map; + + struct vq_node + { + vq_node(): + m_centroid(cClear), + m_total_weight(0), + m_left(-1), + m_right(-1), + m_codebook_index(-1), + m_unsplittable(false), + m_alternative(false), + m_processed(false) + { + } + + VectorType m_centroid; + uint64 m_total_weight; + + float m_variance; + + uint m_begin; + uint m_end; + + int m_left; + int m_right; + + int m_codebook_index; + + bool m_unsplittable; + bool m_alternative; + bool m_processed; + }; + + typedef crnlib::vector node_vec_type; + + node_vec_type m_nodes; + + vector_vec_type m_codebook; + + struct distance_comparison_task_params + { + VectorType* left_child; + VectorType* right_child; + uint begin; + uint end; + uint num_tasks; + }; + + void distance_comparison_task(uint64 data, void* pData_ptr) + { + distance_comparison_task_params* pParams = (distance_comparison_task_params*)pData_ptr; + const VectorType& left_child = *pParams->left_child; + const VectorType& right_child = *pParams->right_child; + uint begin = pParams->begin + (pParams->end - pParams->begin) * data / pParams->num_tasks; + uint end = pParams->begin + (pParams->end - pParams->begin) * (data + 1) / pParams->num_tasks; + for (uint i = begin; i < end; i++) + { + const VectorType& v = m_vectors[m_vectorsInfo[i].index]; + double left_dist2 = left_child.squared_distance(v); + double right_dist2 = right_child.squared_distance(v); + m_vectorComparison[i] = left_dist2 < right_dist2; + } + } - uint64 left_weight = 0; - uint64 right_weight = 0; - - uint left_info_index = 0; - uint right_info_index = 0; - - float prev_total_variance = 1e+10f; - - float left_variance = 0.0f; - float right_variance = 0.0f; - - // FIXME: Excessive upper limit - const uint cMaxLoops = 1024; - for (uint total_loops = 0; total_loops < cMaxLoops; total_loops++) { - left_info_index = right_info_index = parent_node.m_begin; - - VectorType new_left_child(cClear); - VectorType new_right_child(cClear); - - double left_ttsum = 0.0f; - double right_ttsum = 0.0f; - - left_weight = 0; - right_weight = 0; - - if (num_tasks > 1) { - distance_comparison_task_params params; - params.left_child = &left_child; - params.right_child = &right_child; - params.begin = parent_node.m_begin; - params.end = parent_node.m_end; - params.num_tasks = num_tasks; - - for (uint task = 0; task < params.num_tasks; task++) - pTask_pool->queue_object_task(this, &tree_clusterizer::distance_comparison_task, task, ¶ms); - pTask_pool->join(); - - for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) { - const VectorInfo& vectorInfo = m_vectorsInfo[i]; - if (m_vectorComparison[i]) { - new_left_child += m_weightedVectors[vectorInfo.index]; - left_ttsum += m_weightedDotProducts[vectorInfo.index]; - left_weight += vectorInfo.weight; - m_vectorsInfoLeft[left_info_index++] = vectorInfo; - } else { - new_right_child += m_weightedVectors[vectorInfo.index]; - right_ttsum += m_weightedDotProducts[vectorInfo.index]; - right_weight += vectorInfo.weight; - m_vectorsInfoRight[right_info_index++] = vectorInfo; - } + bool split_node(std::priority_queue& node_queue, uint& end_node, task_pool* pTask_pool = 0) + { + if (node_queue.empty()) + { + return false; + } + + vq_node& parent_node = m_nodes[node_queue.top().m_index]; + + if (parent_node.m_alternative) + { + parent_node.m_alternative = false; + } + + if (parent_node.m_variance <= 0.0f || parent_node.m_begin + 1 == parent_node.m_end) + { + return false; + } + + node_queue.pop(); + + if (parent_node.m_processed) + { + if (!parent_node.m_unsplittable) + { + m_nodes[end_node] = m_nodes[parent_node.m_left]; + m_nodes[end_node].m_alternative = true; + node_queue.push(NodeInfo(end_node, m_nodes[end_node].m_variance)); + parent_node.m_left = end_node++; + m_nodes[end_node] = m_nodes[parent_node.m_right]; + m_nodes[end_node].m_alternative = true; + node_queue.push(NodeInfo(end_node, m_nodes[end_node].m_variance)); + parent_node.m_right = end_node++; + } + return true; + } + parent_node.m_processed = true; + + uint num_blocks = (parent_node.m_end - parent_node.m_begin) >> 9; + uint num_tasks = num_blocks > 1 && pTask_pool ? math::minimum(num_blocks, pTask_pool->get_num_threads() + 1) : 1; + + VectorType furthest(0); + double furthest_dist = -1.0f; + + for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) + { + const VectorType& v = m_vectors[m_vectorsInfo[i].index]; + double dist = v.squared_distance(parent_node.m_centroid); + if (dist > furthest_dist) + { + furthest_dist = dist; + furthest = v; + } + } + + VectorType opposite; + double opposite_dist = -1.0f; + + for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) + { + const VectorType& v = m_vectors[m_vectorsInfo[i].index]; + double dist = v.squared_distance(furthest); + if (dist > opposite_dist) + { + opposite_dist = dist; + opposite = v; + } + } + + VectorType left_child((furthest + parent_node.m_centroid) * .5f); + VectorType right_child((opposite + parent_node.m_centroid) * .5f); + + if (parent_node.m_begin + 2 < parent_node.m_end) + { + const uint N = VectorType::num_elements; + + matrix covar; + covar.clear(); + + for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) + { + const VectorType& v = m_vectors[m_vectorsInfo[i].index] - parent_node.m_centroid; + const VectorType w = v * (float)m_vectorsInfo[i].weight; + for (uint x = 0; x < N; x++) + { + for (uint y = x; y < N; y++) + { + covar[x][y] = covar[x][y] + v[x] * w[y]; + } + } + } + + float divider = (float)parent_node.m_total_weight; + for (uint x = 0; x < N; x++) + { + for (uint y = x; y < N; y++) + { + covar[x][y] /= divider; + covar[y][x] = covar[x][y]; + } + } + + VectorType axis(1.0f); + // Starting with an estimate of the principle axis should work better, but doesn't in practice? + //left_child - right_child); + //axis.normalize(); + + for (uint iter = 0; iter < 10; iter++) + { + VectorType x; + + double max_sum = 0; + + for (uint i = 0; i < N; i++) + { + double sum = 0; + + for (uint j = 0; j < N; j++) + { + sum += axis[j] * covar[i][j]; + } + + x[i] = (float)sum; + + max_sum = i ? math::maximum(max_sum, sum) : sum; + } + + if (max_sum != 0.0f) + { + x *= (float)(1.0f / max_sum); + } + + axis = x; + } + + axis.normalize(); + + VectorType new_left_child(0.0f); + VectorType new_right_child(0.0f); + + double left_weight = 0.0f; + double right_weight = 0.0f; + + for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) + { + const VectorInfo& vectorInfo = m_vectorsInfo[i]; + const float weight = (float)vectorInfo.weight; + double t = (m_vectors[vectorInfo.index] - parent_node.m_centroid) * axis; + if (t < 0.0f) + { + new_left_child += m_weightedVectors[vectorInfo.index]; + left_weight += weight; + } + else + { + new_right_child += m_weightedVectors[vectorInfo.index]; + right_weight += weight; + } + } + + if ((left_weight > 0.0f) && (right_weight > 0.0f)) + { + left_child = new_left_child * (float)(1.0f / left_weight); + right_child = new_right_child * (float)(1.0f / right_weight); + } + } + + uint64 left_weight = 0; + uint64 right_weight = 0; + + uint left_info_index = 0; + uint right_info_index = 0; + + float prev_total_variance = 1e+10f; + + float left_variance = 0.0f; + float right_variance = 0.0f; + + // FIXME: Excessive upper limit + const uint cMaxLoops = 1024; + for (uint total_loops = 0; total_loops < cMaxLoops; total_loops++) + { + left_info_index = right_info_index = parent_node.m_begin; + + VectorType new_left_child(cClear); + VectorType new_right_child(cClear); + + double left_ttsum = 0.0f; + double right_ttsum = 0.0f; + + left_weight = 0; + right_weight = 0; + + if (num_tasks > 1) + { + distance_comparison_task_params params; + params.left_child = &left_child; + params.right_child = &right_child; + params.begin = parent_node.m_begin; + params.end = parent_node.m_end; + params.num_tasks = num_tasks; + + for (uint task = 0; task < params.num_tasks; task++) + { + pTask_pool->queue_object_task(this, &tree_clusterizer::distance_comparison_task, task, ¶ms); + } + pTask_pool->join(); + + for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) + { + const VectorInfo& vectorInfo = m_vectorsInfo[i]; + if (m_vectorComparison[i]) + { + new_left_child += m_weightedVectors[vectorInfo.index]; + left_ttsum += m_weightedDotProducts[vectorInfo.index]; + left_weight += vectorInfo.weight; + m_vectorsInfoLeft[left_info_index++] = vectorInfo; + } + else + { + new_right_child += m_weightedVectors[vectorInfo.index]; + right_ttsum += m_weightedDotProducts[vectorInfo.index]; + right_weight += vectorInfo.weight; + m_vectorsInfoRight[right_info_index++] = vectorInfo; + } + } + } + else + { + for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) + { + const VectorInfo& vectorInfo = m_vectorsInfo[i]; + double left_dist2 = left_child.squared_distance(m_vectors[vectorInfo.index]); + double right_dist2 = right_child.squared_distance(m_vectors[vectorInfo.index]); + if (left_dist2 < right_dist2) + { + new_left_child += m_weightedVectors[vectorInfo.index]; + left_ttsum += m_weightedDotProducts[vectorInfo.index]; + left_weight += vectorInfo.weight; + m_vectorsInfoLeft[left_info_index++] = vectorInfo; + } + else + { + new_right_child += m_weightedVectors[vectorInfo.index]; + right_ttsum += m_weightedDotProducts[vectorInfo.index]; + right_weight += vectorInfo.weight; + m_vectorsInfoRight[right_info_index++] = vectorInfo; + } + } + } + + if ((!left_weight) || (!right_weight)) + { + parent_node.m_unsplittable = true; + return true; + } + + left_variance = (float)(left_ttsum - (new_left_child.dot(new_left_child) / left_weight)); + right_variance = (float)(right_ttsum - (new_right_child.dot(new_right_child) / right_weight)); + + new_left_child *= (1.0f / left_weight); + new_right_child *= (1.0f / right_weight); + + left_child = new_left_child; + right_child = new_right_child; + + float total_variance = left_variance + right_variance; + if (total_variance < .00001f) + { + break; + } + + if (((prev_total_variance - total_variance) / total_variance) < .00001f) + { + break; + } + + prev_total_variance = total_variance; + } + + parent_node.m_left = end_node++; + parent_node.m_right = end_node++; + + node_queue.push(NodeInfo(parent_node.m_left, left_variance)); + node_queue.push(NodeInfo(parent_node.m_right, right_variance)); + + vq_node& left_child_node = m_nodes[parent_node.m_left]; + vq_node& right_child_node = m_nodes[parent_node.m_right]; + + left_child_node.m_begin = parent_node.m_begin; + left_child_node.m_end = right_child_node.m_begin = left_info_index; + right_child_node.m_end = parent_node.m_end; + + memcpy(&m_vectorsInfo[left_child_node.m_begin], &m_vectorsInfoLeft[parent_node.m_begin], (left_child_node.m_end - left_child_node.m_begin) * sizeof(VectorInfo)); + memcpy(&m_vectorsInfo[right_child_node.m_begin], &m_vectorsInfoRight[parent_node.m_begin], (right_child_node.m_end - right_child_node.m_begin) * sizeof(VectorInfo)); + + left_child_node.m_centroid = left_child; + left_child_node.m_total_weight = left_weight; + left_child_node.m_variance = left_variance; + + right_child_node.m_centroid = right_child; + right_child_node.m_total_weight = right_weight; + right_child_node.m_variance = right_variance; + + return true; } - } else { - for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) { - const VectorInfo& vectorInfo = m_vectorsInfo[i]; - double left_dist2 = left_child.squared_distance(m_vectors[vectorInfo.index]); - double right_dist2 = right_child.squared_distance(m_vectors[vectorInfo.index]); - if (left_dist2 < right_dist2) { - new_left_child += m_weightedVectors[vectorInfo.index]; - left_ttsum += m_weightedDotProducts[vectorInfo.index]; - left_weight += vectorInfo.weight; - m_vectorsInfoLeft[left_info_index++] = vectorInfo; - } else { - new_right_child += m_weightedVectors[vectorInfo.index]; - right_ttsum += m_weightedDotProducts[vectorInfo.index]; - right_weight += vectorInfo.weight; - m_vectorsInfoRight[right_info_index++] = vectorInfo; - } + }; + + template + void split_vectors(VectorType(&vectors)[64], uint(&weights)[64], uint size, VectorType(&result)[2]) + { + VectorType weightedVectors[64]; + double weightedDotProducts[64]; + VectorType centroid(cClear); + uint64 total_weight = 0; + double ttsum = 0.0f; + for (uint i = 0; i < size; i++) + { + const VectorType& v = vectors[i]; + const uint weight = weights[i]; + weightedVectors[i] = v * (float)weight; + centroid += weightedVectors[i]; + total_weight += weight; + weightedDotProducts[i] = v.dot(v) * weight; + ttsum += weightedDotProducts[i]; } - } - - if ((!left_weight) || (!right_weight)) { - parent_node.m_unsplittable = true; - return true; - } - - left_variance = (float)(left_ttsum - (new_left_child.dot(new_left_child) / left_weight)); - right_variance = (float)(right_ttsum - (new_right_child.dot(new_right_child) / right_weight)); - - new_left_child *= (1.0f / left_weight); - new_right_child *= (1.0f / right_weight); - - left_child = new_left_child; - right_child = new_right_child; - - float total_variance = left_variance + right_variance; - if (total_variance < .00001f) - break; - - if (((prev_total_variance - total_variance) / total_variance) < .00001f) - break; - - prev_total_variance = total_variance; - } - - parent_node.m_left = end_node++; - parent_node.m_right = end_node++; - - node_queue.push(NodeInfo(parent_node.m_left, left_variance)); - node_queue.push(NodeInfo(parent_node.m_right, right_variance)); - - vq_node& left_child_node = m_nodes[parent_node.m_left]; - vq_node& right_child_node = m_nodes[parent_node.m_right]; - - left_child_node.m_begin = parent_node.m_begin; - left_child_node.m_end = right_child_node.m_begin = left_info_index; - right_child_node.m_end = parent_node.m_end; - - memcpy(&m_vectorsInfo[left_child_node.m_begin], &m_vectorsInfoLeft[parent_node.m_begin], (left_child_node.m_end - left_child_node.m_begin) * sizeof(VectorInfo)); - memcpy(&m_vectorsInfo[right_child_node.m_begin], &m_vectorsInfoRight[parent_node.m_begin], (right_child_node.m_end - right_child_node.m_begin) * sizeof(VectorInfo)); - - left_child_node.m_centroid = left_child; - left_child_node.m_total_weight = left_weight; - left_child_node.m_variance = left_variance; - - right_child_node.m_centroid = right_child; - right_child_node.m_total_weight = right_weight; - right_child_node.m_variance = right_variance; - - return true; - } -}; - -template -void split_vectors(VectorType (&vectors)[64], uint (&weights)[64], uint size, VectorType (&result)[2]) { - VectorType weightedVectors[64]; - double weightedDotProducts[64]; - VectorType centroid(cClear); - uint64 total_weight = 0; - double ttsum = 0.0f; - for (uint i = 0; i < size; i++) { - const VectorType& v = vectors[i]; - const uint weight = weights[i]; - weightedVectors[i] = v * (float)weight; - centroid += weightedVectors[i]; - total_weight += weight; - weightedDotProducts[i] = v.dot(v) * weight; - ttsum += weightedDotProducts[i]; - } - float variance = (float)(ttsum - (centroid.dot(centroid) / total_weight)); - centroid *= (1.0f / total_weight); - result[0] = result[1] = centroid; - if (variance <= 0.0f || size == 1) - return; - VectorType furthest; - double furthest_dist = -1.0f; - for (uint i = 0; i < size; i++) { - const VectorType& v = vectors[i]; - double dist = v.squared_distance(centroid); - if (dist > furthest_dist) { - furthest_dist = dist; - furthest = v; - } - } - VectorType opposite; - double opposite_dist = -1.0f; - for (uint i = 0; i < size; i++) { - const VectorType& v = vectors[i]; - double dist = v.squared_distance(furthest); - if (dist > opposite_dist) { - opposite_dist = dist; - opposite = v; - } - } - VectorType left_child((furthest + centroid) * .5f); - VectorType right_child((opposite + centroid) * .5f); - if (size > 2) { - const uint N = VectorType::num_elements; - matrix covar; - covar.clear(); - for (uint i = 0; i < size; i++) { - const VectorType& v = vectors[i] - centroid; - const VectorType w = v * (float)weights[i]; - for (uint x = 0; x < N; x++) { - for (uint y = x; y < N; y++) - covar[x][y] = covar[x][y] + v[x] * w[y]; - } - } - float divider = (float)total_weight; - for (uint x = 0; x < N; x++) { - for (uint y = x; y < N; y++) { - covar[x][y] /= divider; - covar[y][x] = covar[x][y]; - } - } - VectorType axis(1.0f); - for (uint iter = 0; iter < 10; iter++) { - VectorType x; - double max_sum = 0; - for (uint i = 0; i < N; i++) { - double sum = 0; - for (uint j = 0; j < N; j++) - sum += axis[j] * covar[i][j]; - x[i] = (float)sum; - max_sum = i ? math::maximum(max_sum, sum) : sum; - } - if (max_sum != 0.0f) - x *= (float)(1.0f / max_sum); - axis = x; - } - axis.normalize(); - VectorType new_left_child(0.0f); - VectorType new_right_child(0.0f); - double left_weight = 0.0f; - double right_weight = 0.0f; - for (uint i = 0; i < size; i++) { - const VectorType& v = vectors[i]; - const float weight = (float)weights[i]; - double t = (v - centroid) * axis; - if (t < 0.0f) { - new_left_child += weightedVectors[i]; - left_weight += weight; - } else { - new_right_child += weightedVectors[i]; - right_weight += weight; - } - } - if ((left_weight > 0.0f) && (right_weight > 0.0f)) { - left_child = new_left_child * (float)(1.0f / left_weight); - right_child = new_right_child * (float)(1.0f / right_weight); - } - } - uint64 left_weight = 0; - uint64 right_weight = 0; - float prev_total_variance = 1e+10f; - float left_variance = 0.0f; - float right_variance = 0.0f; - const uint cMaxLoops = 1024; - for (uint total_loops = 0; total_loops < cMaxLoops; total_loops++) { - VectorType new_left_child(cClear); - VectorType new_right_child(cClear); - double left_ttsum = 0.0f; - double right_ttsum = 0.0f; - left_weight = 0; - right_weight = 0; - for (uint i = 0; i < size; i++) { - const VectorType& v = vectors[i]; - double left_dist2 = left_child.squared_distance(v); - double right_dist2 = right_child.squared_distance(v); - if (left_dist2 < right_dist2) { - new_left_child += weightedVectors[i]; - left_ttsum += weightedDotProducts[i]; - left_weight += weights[i]; - } else { - new_right_child += weightedVectors[i]; - right_ttsum += weightedDotProducts[i]; - right_weight += weights[i]; - } + float variance = (float)(ttsum - (centroid.dot(centroid) / total_weight)); + centroid *= (1.0f / total_weight); + result[0] = result[1] = centroid; + if (variance <= 0.0f || size == 1) + { + return; + } + VectorType furthest; + double furthest_dist = -1.0f; + for (uint i = 0; i < size; i++) + { + const VectorType& v = vectors[i]; + double dist = v.squared_distance(centroid); + if (dist > furthest_dist) + { + furthest_dist = dist; + furthest = v; + } + } + VectorType opposite; + double opposite_dist = -1.0f; + for (uint i = 0; i < size; i++) + { + const VectorType& v = vectors[i]; + double dist = v.squared_distance(furthest); + if (dist > opposite_dist) + { + opposite_dist = dist; + opposite = v; + } + } + VectorType left_child((furthest + centroid) * .5f); + VectorType right_child((opposite + centroid) * .5f); + if (size > 2) + { + const uint N = VectorType::num_elements; + matrix covar; + covar.clear(); + for (uint i = 0; i < size; i++) + { + const VectorType& v = vectors[i] - centroid; + const VectorType w = v * (float)weights[i]; + for (uint x = 0; x < N; x++) + { + for (uint y = x; y < N; y++) + covar[x][y] = covar[x][y] + v[x] * w[y]; + } + } + float divider = (float)total_weight; + for (uint x = 0; x < N; x++) + { + for (uint y = x; y < N; y++) + { + covar[x][y] /= divider; + covar[y][x] = covar[x][y]; + } + } + VectorType axis(1.0f); + for (uint iter = 0; iter < 10; iter++) + { + VectorType x; + double max_sum = 0; + for (uint i = 0; i < N; i++) + { + double sum = 0; + for (uint j = 0; j < N; j++) + { + sum += axis[j] * covar[i][j]; + } + x[i] = (float)sum; + max_sum = i ? math::maximum(max_sum, sum) : sum; + } + if (max_sum != 0.0f) + { + x *= (float)(1.0f / max_sum); + } + axis = x; + } + axis.normalize(); + VectorType new_left_child(0.0f); + VectorType new_right_child(0.0f); + double left_weight = 0.0f; + double right_weight = 0.0f; + for (uint i = 0; i < size; i++) + { + const VectorType& v = vectors[i]; + const float weight = (float)weights[i]; + double t = (v - centroid) * axis; + if (t < 0.0f) + { + new_left_child += weightedVectors[i]; + left_weight += weight; + } + else + { + new_right_child += weightedVectors[i]; + right_weight += weight; + } + } + if ((left_weight > 0.0f) && (right_weight > 0.0f)) + { + left_child = new_left_child * (float)(1.0f / left_weight); + right_child = new_right_child * (float)(1.0f / right_weight); + } + } + uint64 left_weight = 0; + uint64 right_weight = 0; + float prev_total_variance = 1e+10f; + float left_variance = 0.0f; + float right_variance = 0.0f; + const uint cMaxLoops = 1024; + for (uint total_loops = 0; total_loops < cMaxLoops; total_loops++) + { + VectorType new_left_child(cClear); + VectorType new_right_child(cClear); + double left_ttsum = 0.0f; + double right_ttsum = 0.0f; + left_weight = 0; + right_weight = 0; + for (uint i = 0; i < size; i++) + { + const VectorType& v = vectors[i]; + double left_dist2 = left_child.squared_distance(v); + double right_dist2 = right_child.squared_distance(v); + if (left_dist2 < right_dist2) + { + new_left_child += weightedVectors[i]; + left_ttsum += weightedDotProducts[i]; + left_weight += weights[i]; + } + else + { + new_right_child += weightedVectors[i]; + right_ttsum += weightedDotProducts[i]; + right_weight += weights[i]; + } + } + if ((!left_weight) || (!right_weight)) + { + return; + } + left_variance = (float)(left_ttsum - (new_left_child.dot(new_left_child) / left_weight)); + right_variance = (float)(right_ttsum - (new_right_child.dot(new_right_child) / right_weight)); + new_left_child *= (1.0f / left_weight); + new_right_child *= (1.0f / right_weight); + left_child = new_left_child; + right_child = new_right_child; + float total_variance = left_variance + right_variance; + if (total_variance < .00001f) + { + break; + } + if (((prev_total_variance - total_variance) / total_variance) < .00001f) + { + break; + } + prev_total_variance = total_variance; + } + result[0] = left_child; + result[1] = right_child; } - if ((!left_weight) || (!right_weight)) - return; - left_variance = (float)(left_ttsum - (new_left_child.dot(new_left_child) / left_weight)); - right_variance = (float)(right_ttsum - (new_right_child.dot(new_right_child) / right_weight)); - new_left_child *= (1.0f / left_weight); - new_right_child *= (1.0f / right_weight); - left_child = new_left_child; - right_child = new_right_child; - float total_variance = left_variance + right_variance; - if (total_variance < .00001f) - break; - if (((prev_total_variance - total_variance) / total_variance) < .00001f) - break; - prev_total_variance = total_variance; - } - result[0] = left_child; - result[1] = right_child; -} } // namespace crnlib diff --git a/crnlib/crn_types.h b/crnlib/crn_types.h index 2e1c25e..9703587 100644 --- a/crnlib/crn_types.h +++ b/crnlib/crn_types.h @@ -2,60 +2,63 @@ // See Copyright Notice and license at the end of inc/crnlib.h #pragma once -namespace crnlib { -typedef unsigned char uint8; -typedef signed char int8; -typedef unsigned short uint16; -typedef signed short int16; -typedef unsigned int uint32; -typedef uint32 uint; -typedef signed int int32; - -#ifdef __GNUC__ -typedef unsigned long long uint64; -typedef long long int64; +#include "crn_core.h" + +namespace crnlib +{ + typedef unsigned char uint8; + typedef signed char int8; + typedef unsigned short uint16; + typedef signed short int16; + typedef unsigned int uint32; + typedef uint32 uint; + typedef signed int int32; + +#if defined(CRN_CC_MSVC) + typedef unsigned __int64 uint64; + typedef signed __int64 int64; #else -typedef unsigned __int64 uint64; -typedef signed __int64 int64; + typedef unsigned long long uint64; + typedef long long int64; #endif -const uint8 cUINT8_MIN = 0; -const uint8 cUINT8_MAX = 0xFFU; -const uint16 cUINT16_MIN = 0; -const uint16 cUINT16_MAX = 0xFFFFU; -const uint32 cUINT32_MIN = 0; -const uint32 cUINT32_MAX = 0xFFFFFFFFU; -const uint64 cUINT64_MIN = 0; -const uint64 cUINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64; - -const int8 cINT8_MIN = -128; -const int8 cINT8_MAX = 127; -const int16 cINT16_MIN = -32768; -const int16 cINT16_MAX = 32767; -const int32 cINT32_MIN = (-2147483647 - 1); -const int32 cINT32_MAX = 2147483647; -const int64 cINT64_MIN = (int64)0x8000000000000000ULL; //(-9223372036854775807i64 - 1); -const int64 cINT64_MAX = (int64)0x7FFFFFFFFFFFFFFFULL; // 9223372036854775807i64; + const uint8 cUINT8_MIN = 0; + const uint8 cUINT8_MAX = 0xFFU; + const uint16 cUINT16_MIN = 0; + const uint16 cUINT16_MAX = 0xFFFFU; + const uint32 cUINT32_MIN = 0; + const uint32 cUINT32_MAX = 0xFFFFFFFFU; + const uint64 cUINT64_MIN = 0; + const uint64 cUINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64; + + const int8 cINT8_MIN = -128; + const int8 cINT8_MAX = 127; + const int16 cINT16_MIN = -32768; + const int16 cINT16_MAX = 32767; + const int32 cINT32_MIN = (-2147483647 - 1); + const int32 cINT32_MAX = 2147483647; + const int64 cINT64_MIN = (int64)0x8000000000000000ULL; //(-9223372036854775807i64 - 1); + const int64 cINT64_MAX = (int64)0x7FFFFFFFFFFFFFFFULL; // 9223372036854775807i64; #if CRNLIB_64BIT_POINTERS -typedef uint64 uint_ptr; -typedef uint64 uint32_ptr; -typedef int64 signed_size_t; -typedef uint64 ptr_bits_t; + typedef uint64 uint_ptr; + typedef uint64 uint32_ptr; + typedef int64 signed_size_t; + typedef uint64 ptr_bits_t; #else -typedef unsigned int uint_ptr; -typedef unsigned int uint32_ptr; -typedef signed int signed_size_t; -typedef uint32 ptr_bits_t; + typedef unsigned int uint_ptr; + typedef unsigned int uint32_ptr; + typedef signed int signed_size_t; + typedef uint32 ptr_bits_t; #endif -enum eVarArg { cVarArg }; -enum eClear { cClear }; -enum eNoClamp { cNoClamp }; -enum { cInvalidIndex = -1 }; + enum eVarArg { cVarArg }; + enum eClear { cClear }; + enum eNoClamp { cNoClamp }; + enum { cInvalidIndex = -1 }; -const uint cIntBits = 32; + const uint cIntBits = 32; -struct empty_type {}; + struct empty_type {}; } // namespace crnlib diff --git a/crnlib/crn_value.cpp b/crnlib/crn_value.cpp index c13783b..92c21ce 100644 --- a/crnlib/crn_value.cpp +++ b/crnlib/crn_value.cpp @@ -3,8 +3,9 @@ #include "crn_core.h" #include "crn_value.h" -namespace crnlib { -const char* gValueDataTypeStrings[cDTTotal + 1] = +namespace crnlib +{ + const char* gValueDataTypeStrings[cDTTotal + 1] = { "invalid", "string", @@ -16,6 +17,5 @@ const char* gValueDataTypeStrings[cDTTotal + 1] = "vec3i", NULL, -}; - + }; } // namespace crnlib diff --git a/crnlib/crn_value.h b/crnlib/crn_value.h index 8445dd8..189c645 100644 --- a/crnlib/crn_value.h +++ b/crnlib/crn_value.h @@ -1,5 +1,6 @@ // File: crn_value.h // See Copyright Notice and license at the end of inc/crnlib.h + #pragma once #include "crn_strutils.h" @@ -7,915 +8,1138 @@ #include "crn_vec.h" #include "crn_export.h" -namespace crnlib { -enum value_data_type { - cDTInvalid, - cDTString, - cDTBool, - cDTInt, - cDTUInt, - cDTFloat, - cDTVec3F, - cDTVec3I, - - cDTTotal -}; - -CRN_EXPORT extern const char* gValueDataTypeStrings[cDTTotal + 1]; - -class CRN_EXPORT value { - public: - value() - : m_type(cDTInvalid) { - } - - value(const char* pStr) - : m_pStr(crnlib_new(pStr)), m_type(cDTString) { - } - - value(const dynamic_string& str) - : m_pStr(crnlib_new(str)), m_type(cDTString) { - } - - explicit value(bool v) - : m_bool(v), m_type(cDTBool) { - } - - value(int v) - : m_int(v), m_type(cDTInt) { - } - - value(uint v) - : m_uint(v), m_type(cDTUInt) { - } - - value(float v) - : m_float(v), m_type(cDTFloat) { - } - - value(const vec3F& v) - : m_pVec3F(crnlib_new(v)), m_type(cDTVec3F) { - } - - value(const vec3I& v) - : m_pVec3I(crnlib_new(v)), m_type(cDTVec3I) { - } - - ~value() { - switch (m_type) { - case cDTString: - crnlib_delete(m_pStr); - break; - case cDTVec3F: - crnlib_delete(m_pVec3F); - break; - case cDTVec3I: - crnlib_delete(m_pVec3I); - break; - default: - break; - } - } - - value(const value& other) - : m_type(cDTInvalid) { - *this = other; - } - - value& operator=(const value& other) { - if (this == &other) - return *this; - - change_type(other.m_type); - - switch (other.m_type) { - case cDTString: - m_pStr->set(*other.m_pStr); - break; - case cDTBool: - m_bool = other.m_bool; - break; - case cDTInt: - m_int = other.m_int; - break; - case cDTUInt: - m_uint = other.m_uint; - break; - case cDTFloat: - m_float = other.m_float; - break; - case cDTVec3F: - m_pVec3F->set(*other.m_pVec3F); - break; - case cDTVec3I: - m_pVec3I->set(*other.m_pVec3I); - break; - default: - break; - } - return *this; - } - - inline value_data_type get_data_type() const { return m_type; } - - void clear() { - clear_dynamic(); - - m_type = cDTInvalid; - } - - void set_string(const char* pStr) { - set_str(pStr); - } - - void set_int(int v) { - clear_dynamic(); - m_type = cDTInt; - m_int = v; - } - - void set_uint(uint v) { - clear_dynamic(); - m_type = cDTUInt; - m_uint = v; - } - - void set_bool(bool v) { - clear_dynamic(); - m_type = cDTBool; - m_bool = v; - } - - void set_float(float v) { - clear_dynamic(); - m_type = cDTFloat; - m_float = v; - } - - void set_vec(const vec3F& v) { - change_type(cDTVec3F); - m_pVec3F->set(v); - } - - void set_vec(const vec3I& v) { - change_type(cDTVec3I); - m_pVec3I->set(v); - } - - bool parse(const char* p) { - if ((!p) || (!p[0])) { - clear(); - return false; - } - - if (_stricmp(p, "false") == 0) { - set_bool(false); - return true; - } else if (_stricmp(p, "true") == 0) { - set_bool(true); - return true; - } - - if (p[0] == '\"') { - dynamic_string str; - str = p + 1; - if (!str.is_empty()) { - if (str[str.get_len() - 1] == '\"') { - str.left(str.get_len() - 1); - set_str(str); - - return true; - } - } - } - - if (strchr(p, ',') != NULL) { - float fx = 0, fy = 0, fz = 0; +namespace crnlib +{ + enum value_data_type + { + cDTInvalid, + cDTString, + cDTBool, + cDTInt, + cDTUInt, + cDTFloat, + cDTVec3F, + cDTVec3I, + + cDTTotal + }; + + CRN_EXPORT extern const char* gValueDataTypeStrings[cDTTotal + 1]; + + class CRN_EXPORT value + { + public: + value(): + m_type(cDTInvalid) + { + } + + value(const char* pStr): + m_pStr(crnlib_new(pStr)), + m_type(cDTString) + { + } + + value(const dynamic_string& str): + m_pStr(crnlib_new(str)), + m_type(cDTString) + { + } + + explicit value(bool v): + m_bool(v), + m_type(cDTBool) + { + } + + value(int v): + m_int(v), + m_type(cDTInt) + { + } + + value(uint v): + m_uint(v), + m_type(cDTUInt) + { + } + + value(float v): + m_float(v), + m_type(cDTFloat) + { + } + + value(const vec3F& v): + m_pVec3F(crnlib_new(v)), + m_type(cDTVec3F) + { + } + + value(const vec3I& v): + m_pVec3I(crnlib_new(v)), + m_type(cDTVec3I) + { + } + + ~value() + { + switch (m_type) + { + case cDTString: + crnlib_delete(m_pStr); + break; + case cDTVec3F: + crnlib_delete(m_pVec3F); + break; + case cDTVec3I: + crnlib_delete(m_pVec3I); + break; + default: + break; + } + } + + value(const value& other): + m_type(cDTInvalid) + { + *this = other; + } + + value& operator=(const value& other) + { + if (this == &other) + { + return *this; + } + + change_type(other.m_type); + + switch (other.m_type) + { + case cDTString: + m_pStr->set(*other.m_pStr); + break; + case cDTBool: + m_bool = other.m_bool; + break; + case cDTInt: + m_int = other.m_int; + break; + case cDTUInt: + m_uint = other.m_uint; + break; + case cDTFloat: + m_float = other.m_float; + break; + case cDTVec3F: + m_pVec3F->set(*other.m_pVec3F); + break; + case cDTVec3I: + m_pVec3I->set(*other.m_pVec3I); + break; + default: + break; + } + return *this; + } + + inline value_data_type get_data_type() const + { + return m_type; + } + + void clear() + { + clear_dynamic(); + + m_type = cDTInvalid; + } + + void set_string(const char* pStr) + { + set_str(pStr); + } + + void set_int(int v) + { + clear_dynamic(); + m_type = cDTInt; + m_int = v; + } + + void set_uint(uint v) + { + clear_dynamic(); + m_type = cDTUInt; + m_uint = v; + } + + void set_bool(bool v) + { + clear_dynamic(); + m_type = cDTBool; + m_bool = v; + } + + void set_float(float v) + { + clear_dynamic(); + m_type = cDTFloat; + m_float = v; + } + + void set_vec(const vec3F& v) + { + change_type(cDTVec3F); + m_pVec3F->set(v); + } + + void set_vec(const vec3I& v) + { + change_type(cDTVec3I); + m_pVec3I->set(v); + } + + bool parse(const char* p) + { + if ((!p) || (!p[0])) + { + clear(); + return false; + } + + if (_stricmp(p, "false") == 0) + { + set_bool(false); + return true; + } + else if (_stricmp(p, "true") == 0) + { + set_bool(true); + return true; + } + + if (p[0] == '\"') + { + dynamic_string str; + str = p + 1; + if (!str.is_empty()) + { + if (str[str.get_len() - 1] == '\"') + { + str.left(str.get_len() - 1); + set_str(str); + + return true; + } + } + } + + if (strchr(p, ',') != NULL) + { + float fx = 0, fy = 0, fz = 0; #ifdef _MSC_VER - if (sscanf_s(p, "%f,%f,%f", &fx, &fy, &fz) == 3) + if (sscanf_s(p, "%f,%f,%f", &fx, &fy, &fz) == 3) #else - if (sscanf(p, "%f,%f,%f", &fx, &fy, &fz) == 3) + if (sscanf(p, "%f,%f,%f", &fx, &fy, &fz) == 3) #endif - { - bool as_float = true; - int ix = 0, iy = 0, iz = 0; + { + bool as_float = true; + int ix = 0, iy = 0, iz = 0; #ifdef _MSC_VER - if (sscanf_s(p, "%i,%i,%i", &ix, &iy, &iz) == 3) + if (sscanf_s(p, "%i,%i,%i", &ix, &iy, &iz) == 3) #else - if (sscanf(p, "%i,%i,%i", &ix, &iy, &iz) == 3) + if (sscanf(p, "%i,%i,%i", &ix, &iy, &iz) == 3) #endif + { + if ((ix == fx) && (iy == fy) && (iz == fz)) + { + as_float = false; + } + } + + if (as_float) + { + set_vec(vec3F(fx, fy, fz)); + } + else + { + set_vec(vec3I(ix, iy, iz)); + } + + return true; + } + } + + const char* q = p; + bool success = string_to_uint(q, m_uint); + if ((success) && (*q == 0)) + { + set_uint(m_uint); + return true; + } + + q = p; + success = string_to_int(q, m_int); + if ((success) && (*q == 0)) + { + set_int(m_int); + return true; + } + + q = p; + success = string_to_float(q, m_float); + if ((success) && (*q == 0)) + { + set_float(m_float); + return true; + } + + set_string(p); + + return true; + } + + dynamic_string& get_as_string(dynamic_string& dst) const + { + switch (m_type) + { + case cDTInvalid: + dst.clear(); + break; + case cDTString: + dst = *m_pStr; + break; + case cDTBool: + dst = m_bool ? "TRUE" : "FALSE"; + break; + case cDTInt: + dst.format("%i", m_int); + break; + case cDTUInt: + dst.format("%u", m_uint); + break; + case cDTFloat: + dst.format("%f", m_float); + break; + case cDTVec3F: + dst.format("%f,%f,%f", (*m_pVec3F)[0], (*m_pVec3F)[1], (*m_pVec3F)[2]); + break; + case cDTVec3I: + dst.format("%i,%i,%i", (*m_pVec3I)[0], (*m_pVec3I)[1], (*m_pVec3I)[2]); + break; + default: + break; + } + + return dst; + } + + bool get_as_int(int& val, uint component = 0) const + { + switch (m_type) + { + case cDTInvalid: + { + val = 0; + return false; + } + case cDTString: + { + const char* p = m_pStr->get_ptr(); + return string_to_int(p, val); + } + case cDTBool: + val = m_bool; + break; + case cDTInt: + val = m_int; + break; + case cDTUInt: { + if (m_uint > INT_MAX) + { + val = 0; + return false; + } + val = m_uint; + break; + } + case cDTFloat: + { + if ((m_float < INT_MIN) || (m_float > INT_MAX)) + { + val = 0; + return false; + } + val = (int)m_float; + break; + } + case cDTVec3F: + { + if (component > 2) + { + val = 0; + return false; + } + if (((*m_pVec3F)[component] < INT_MIN) || ((*m_pVec3F)[component] > INT_MAX)) + { + val = 0; + return false; + } + val = (int)(*m_pVec3F)[component]; + break; + } + case cDTVec3I: + { + if (component > 2) + { + val = 0; + return false; + } + val = (int)(*m_pVec3I)[component]; + break; + } + default: + break; + } + return true; + } + + bool get_as_uint(uint& val, uint component = 0) const + { + switch (m_type) + { + case cDTInvalid: + { + val = 0; + return false; + } + case cDTString: + { + const char* p = m_pStr->get_ptr(); + return string_to_uint(p, val); + } + case cDTBool: + { + val = m_bool; + break; + } + case cDTInt: { + if (m_int < 0) + { + val = 0; + return false; + } + val = (uint)m_int; + break; + } + case cDTUInt: + { + val = m_uint; + break; + } + case cDTFloat: { + if ((m_float < 0) || (m_float > UINT_MAX)) + { + val = 0; + return false; + } + val = (uint)m_float; + break; + } + case cDTVec3F: + { + if (component > 2) + { + val = 0; + return false; + } + if (((*m_pVec3F)[component] < 0) || ((*m_pVec3F)[component] > UINT_MAX)) + { + val = 0; + return false; + } + val = (uint)(*m_pVec3F)[component]; + break; + } + case cDTVec3I: { + if (component > 2) + { + val = 0; + return false; + } + if ((*m_pVec3I)[component] < 0) + { + val = 0; + return false; + } + val = (uint)(*m_pVec3I)[component]; + break; + } + default: + break; + } + return true; + } + + bool get_as_bool(bool& val, uint component = 0) const + { + switch (m_type) + { + case cDTInvalid: + { + val = false; + return false; + } + case cDTString: + { + const char* p = m_pStr->get_ptr(); + return string_to_bool(p, val); + } + case cDTBool: + { + val = m_bool; + break; + } + case cDTInt: + { + val = (m_int != 0); + break; + } + case cDTUInt: + { + val = (m_uint != 0); + break; + } + case cDTFloat: + { + val = (m_float != 0); + break; + } + case cDTVec3F: + { + if (component > 2) + { + val = false; + return false; + } + val = ((*m_pVec3F)[component] != 0); + break; + } + case cDTVec3I: + { + if (component > 2) + { + val = false; + return false; + } + val = ((*m_pVec3I)[component] != 0); + break; + } + default: + break; + } + return true; + } + + bool get_as_float(float& val, uint component = 0) const { - if ((ix == fx) && (iy == fy) && (iz == fz)) - as_float = false; - } - - if (as_float) - set_vec(vec3F(fx, fy, fz)); - else - set_vec(vec3I(ix, iy, iz)); - - return true; - } - } - - const char* q = p; - bool success = string_to_uint(q, m_uint); - if ((success) && (*q == 0)) { - set_uint(m_uint); - return true; - } - - q = p; - success = string_to_int(q, m_int); - if ((success) && (*q == 0)) { - set_int(m_int); - return true; - } - - q = p; - success = string_to_float(q, m_float); - if ((success) && (*q == 0)) { - set_float(m_float); - return true; - } - - set_string(p); - - return true; - } - - dynamic_string& get_as_string(dynamic_string& dst) const { - switch (m_type) { - case cDTInvalid: - dst.clear(); - break; - case cDTString: - dst = *m_pStr; - break; - case cDTBool: - dst = m_bool ? "TRUE" : "FALSE"; - break; - case cDTInt: - dst.format("%i", m_int); - break; - case cDTUInt: - dst.format("%u", m_uint); - break; - case cDTFloat: - dst.format("%f", m_float); - break; - case cDTVec3F: - dst.format("%f,%f,%f", (*m_pVec3F)[0], (*m_pVec3F)[1], (*m_pVec3F)[2]); - break; - case cDTVec3I: - dst.format("%i,%i,%i", (*m_pVec3I)[0], (*m_pVec3I)[1], (*m_pVec3I)[2]); - break; - default: - break; - } - - return dst; - } - - bool get_as_int(int& val, uint component = 0) const { - switch (m_type) { - case cDTInvalid: { - val = 0; - return false; - } - case cDTString: { - const char* p = m_pStr->get_ptr(); - return string_to_int(p, val); - } - case cDTBool: - val = m_bool; - break; - case cDTInt: - val = m_int; - break; - case cDTUInt: { - if (m_uint > INT_MAX) { - val = 0; - return false; - } - val = m_uint; - break; - } - case cDTFloat: { - if ((m_float < INT_MIN) || (m_float > INT_MAX)) { - val = 0; - return false; - } - val = (int)m_float; - break; - } - case cDTVec3F: { - if (component > 2) { - val = 0; - return false; - } - if (((*m_pVec3F)[component] < INT_MIN) || ((*m_pVec3F)[component] > INT_MAX)) { - val = 0; - return false; - } - val = (int)(*m_pVec3F)[component]; - break; - } - case cDTVec3I: { - if (component > 2) { - val = 0; - return false; - } - val = (int)(*m_pVec3I)[component]; - break; - } - default: - break; - } - return true; - } - - bool get_as_uint(uint& val, uint component = 0) const { - switch (m_type) { - case cDTInvalid: { - val = 0; - return false; - } - case cDTString: { - const char* p = m_pStr->get_ptr(); - return string_to_uint(p, val); - } - case cDTBool: { - val = m_bool; - break; - } - case cDTInt: { - if (m_int < 0) { - val = 0; - return false; - } - val = (uint)m_int; - break; - } - case cDTUInt: { - val = m_uint; - break; - } - case cDTFloat: { - if ((m_float < 0) || (m_float > UINT_MAX)) { - val = 0; - return false; - } - val = (uint)m_float; - break; - } - case cDTVec3F: { - if (component > 2) { - val = 0; - return false; - } - if (((*m_pVec3F)[component] < 0) || ((*m_pVec3F)[component] > UINT_MAX)) { - val = 0; - return false; - } - val = (uint)(*m_pVec3F)[component]; - break; - } - case cDTVec3I: { - if (component > 2) { - val = 0; - return false; - } - if ((*m_pVec3I)[component] < 0) { - val = 0; - return false; - } - val = (uint)(*m_pVec3I)[component]; - break; - } - default: - break; - } - return true; - } - - bool get_as_bool(bool& val, uint component = 0) const { - switch (m_type) { - case cDTInvalid: { - val = false; - return false; - } - case cDTString: { - const char* p = m_pStr->get_ptr(); - return string_to_bool(p, val); - } - case cDTBool: { - val = m_bool; - break; - } - case cDTInt: { - val = (m_int != 0); - break; - } - case cDTUInt: { - val = (m_uint != 0); - break; - } - case cDTFloat: { - val = (m_float != 0); - break; - } - case cDTVec3F: { - if (component > 2) { - val = false; - return false; - } - val = ((*m_pVec3F)[component] != 0); - break; - } - case cDTVec3I: { - if (component > 2) { - val = false; - return false; - } - val = ((*m_pVec3I)[component] != 0); - break; - } - default: - break; - } - return true; - } - - bool get_as_float(float& val, uint component = 0) const { - switch (m_type) { - case cDTInvalid: { - val = 0; - return false; - } - case cDTString: { - const char* p = m_pStr->get_ptr(); - return string_to_float(p, val); - } - case cDTBool: { - val = m_bool; - break; - } - case cDTInt: { - val = (float)m_int; - break; - } - case cDTUInt: { - val = (float)m_uint; - break; - } - case cDTFloat: { - val = m_float; - break; - } - case cDTVec3F: { - if (component > 2) { - val = 0; - return false; - } - val = (*m_pVec3F)[component]; - break; - } - case cDTVec3I: { - if (component > 2) { - val = 0; - return false; - } - val = (float)(*m_pVec3I)[component]; - break; - } - default: - break; - } - return true; - } - - bool get_as_vec(vec3F& val) const { - switch (m_type) { - case cDTInvalid: { - val.clear(); - return false; - } - case cDTString: { - const char* p = m_pStr->get_ptr(); - float x = 0, y = 0, z = 0; + switch (m_type) + { + case cDTInvalid: + { + val = 0; + return false; + } + case cDTString: + { + const char* p = m_pStr->get_ptr(); + return string_to_float(p, val); + } + case cDTBool: + { + val = m_bool; + break; + } + case cDTInt: + { + val = (float)m_int; + break; + } + case cDTUInt: + { + val = (float)m_uint; + break; + } + case cDTFloat: + { + val = m_float; + break; + } + case cDTVec3F: + { + if (component > 2) + { + val = 0; + return false; + } + val = (*m_pVec3F)[component]; + break; + } + case cDTVec3I: + { + if (component > 2) + { + val = 0; + return false; + } + val = (float)(*m_pVec3I)[component]; + break; + } + default: + break; + } + return true; + } + + bool get_as_vec(vec3F& val) const + { + switch (m_type) + { + case cDTInvalid: + { + val.clear(); + return false; + } + case cDTString: + { + const char* p = m_pStr->get_ptr(); + float x = 0, y = 0, z = 0; #ifdef _MSC_VER - if (sscanf_s(p, "%f,%f,%f", &x, &y, &z) == 3) + if (sscanf_s(p, "%f,%f,%f", &x, &y, &z) == 3) #else - if (sscanf(p, "%f,%f,%f", &x, &y, &z) == 3) + if (sscanf(p, "%f,%f,%f", &x, &y, &z) == 3) #endif + { + val.set(x, y, z); + return true; + } + else + { + val.clear(); + return false; + } + } + case cDTBool: + { + val.set(m_bool); + break; + } + case cDTInt: + { + val.set(static_cast(m_int)); + break; + } + case cDTUInt: + { + val.set(static_cast(m_uint)); + break; + } + case cDTFloat: + { + val.set(m_float); + break; + } + case cDTVec3F: + { + val = *m_pVec3F; + break; + } + case cDTVec3I: + { + val.set((float)(*m_pVec3I)[0], (float)(*m_pVec3I)[1], (float)(*m_pVec3I)[2]); + break; + } + default: + break; + } + return true; + } + + bool get_as_vec(vec3I& val) const { - val.set(x, y, z); - return true; - } else { - val.clear(); - return false; - } - } - case cDTBool: { - val.set(m_bool); - break; - } - case cDTInt: { - val.set(static_cast(m_int)); - break; - } - case cDTUInt: { - val.set(static_cast(m_uint)); - break; - } - case cDTFloat: { - val.set(m_float); - break; - } - case cDTVec3F: { - val = *m_pVec3F; - break; - } - case cDTVec3I: { - val.set((float)(*m_pVec3I)[0], (float)(*m_pVec3I)[1], (float)(*m_pVec3I)[2]); - break; - } - default: - break; - } - return true; - } - - bool get_as_vec(vec3I& val) const { - switch (m_type) { - case cDTInvalid: { - val.clear(); - return false; - } - case cDTString: { - const char* p = m_pStr->get_ptr(); - float x = 0, y = 0, z = 0; + switch (m_type) + { + case cDTInvalid: + { + val.clear(); + return false; + } + case cDTString: + { + const char* p = m_pStr->get_ptr(); + float x = 0, y = 0, z = 0; #ifdef _MSC_VER - if (sscanf_s(p, "%f,%f,%f", &x, &y, &z) == 3) + if (sscanf_s(p, "%f,%f,%f", &x, &y, &z) == 3) #else - if (sscanf(p, "%f,%f,%f", &x, &y, &z) == 3) + if (sscanf(p, "%f,%f,%f", &x, &y, &z) == 3) #endif + { + if ((x < INT_MIN) || (x > INT_MAX) || (y < INT_MIN) || (y > INT_MAX) || (z < INT_MIN) || (z > INT_MAX)) + { + val.clear(); + return false; + } + val.set((int)x, (int)y, (int)z); + return true; + } + else + { + val.clear(); + return false; + } + + break; + } + case cDTBool: + { + val.set(m_bool); + break; + } + case cDTInt: + { + val.set(m_int); + break; + } + case cDTUInt: + { + val.set(m_uint); + break; + } + case cDTFloat: + { + val.set((int)m_float); + break; + } + case cDTVec3F: + { + val.set((int)(*m_pVec3F)[0], (int)(*m_pVec3F)[1], (int)(*m_pVec3F)[2]); + break; + } + case cDTVec3I: + { + val = *m_pVec3I; + break; + } + default: + break; + } + return true; + } + + bool set_zero() + { + switch (m_type) + { + case cDTInvalid: + { + return false; + } + case cDTString: + { + m_pStr->empty(); + break; + } + case cDTBool: + { + m_bool = false; + break; + } + case cDTInt: + { + m_int = 0; + break; + } + case cDTUInt: + { + m_uint = 0; + break; + } + case cDTFloat: + { + m_float = 0; + break; + } + case cDTVec3F: + { + m_pVec3F->clear(); + break; + } + case cDTVec3I: + { + m_pVec3I->clear(); + break; + } + default: + break; + } + return true; + } + + bool is_vector() const + { + switch (m_type) + { + case cDTVec3F: + case cDTVec3I: + return true; + default: + break; + } + return false; + } + + uint get_num_components() const + { + switch (m_type) + { + case cDTVec3F: + case cDTVec3I: + return 3; + default: + break; + } + return 1; + } + + bool is_numeric() const + { + switch (m_type) + { + case cDTInt: + case cDTUInt: + case cDTFloat: + case cDTVec3F: + case cDTVec3I: + return true; + default: + break; + } + return false; + } + + bool is_float() const + { + switch (m_type) + { + case cDTFloat: + case cDTVec3F: + return true; + default: + break; + } + return false; + } + + bool is_integer() const { - if ((x < INT_MIN) || (x > INT_MAX) || (y < INT_MIN) || (y > INT_MAX) || (z < INT_MIN) || (z > INT_MAX)) { - val.clear(); + switch (m_type) + { + case cDTInt: + case cDTUInt: + case cDTVec3I: + return true; + default: + break; + } return false; - } - val.set((int)x, (int)y, (int)z); - return true; - } else { - val.clear(); - return false; - } - - break; - } - case cDTBool: { - val.set(m_bool); - break; - } - case cDTInt: { - val.set(m_int); - break; - } - case cDTUInt: { - val.set(m_uint); - break; - } - case cDTFloat: { - val.set((int)m_float); - break; - } - case cDTVec3F: { - val.set((int)(*m_pVec3F)[0], (int)(*m_pVec3F)[1], (int)(*m_pVec3F)[2]); - break; - } - case cDTVec3I: { - val = *m_pVec3I; - break; - } - default: - break; - } - return true; - } - - bool set_zero() { - switch (m_type) { - case cDTInvalid: { - return false; - } - case cDTString: { - m_pStr->empty(); - break; - } - case cDTBool: { - m_bool = false; - break; - } - case cDTInt: { - m_int = 0; - break; - } - case cDTUInt: { - m_uint = 0; - break; - } - case cDTFloat: { - m_float = 0; - break; - } - case cDTVec3F: { - m_pVec3F->clear(); - break; - } - case cDTVec3I: { - m_pVec3I->clear(); - break; - } - default: - break; - } - return true; - } - - bool is_vector() const { - switch (m_type) { - case cDTVec3F: - case cDTVec3I: - return true; - default: - break; - } - return false; - } - - uint get_num_components() const { - switch (m_type) { - case cDTVec3F: - case cDTVec3I: - return 3; - default: - break; - } - return 1; - } - - bool is_numeric() const { - switch (m_type) { - case cDTInt: - case cDTUInt: - case cDTFloat: - case cDTVec3F: - case cDTVec3I: - return true; - default: - break; - } - return false; - } - - bool is_float() const { - switch (m_type) { - case cDTFloat: - case cDTVec3F: - return true; - default: - break; - } - return false; - } - - bool is_integer() const { - switch (m_type) { - case cDTInt: - case cDTUInt: - case cDTVec3I: - return true; - default: - break; - } - return false; - } - - bool is_signed() const { - switch (m_type) { - case cDTInt: - case cDTFloat: - case cDTVec3F: - case cDTVec3I: - return true; - default: - break; - } - return false; - } - - bool is_string() const { - return m_type == cDTString; - } - - int serialize(void* pBuf, uint buf_size, bool little_endian) const { - uint buf_left = buf_size; - - uint8 t = (uint8)m_type; - if (!utils::write_obj(t, pBuf, buf_left, little_endian)) - return -1; - - switch (m_type) { - case cDTString: { - int bytes_written = m_pStr->serialize(pBuf, buf_left, little_endian); - if (bytes_written < 0) - return -1; - - pBuf = static_cast(pBuf) + bytes_written; - buf_left -= bytes_written; - - break; - } - case cDTBool: { - if (!utils::write_obj(m_bool, pBuf, buf_left, little_endian)) - return -1; - break; - } - case cDTInt: - case cDTUInt: - case cDTFloat: { - if (!utils::write_obj(m_float, pBuf, buf_left, little_endian)) - return -1; - break; - } - case cDTVec3F: { - for (uint i = 0; i < 3; i++) - if (!utils::write_obj((*m_pVec3F)[i], pBuf, buf_left, little_endian)) - return -1; - break; - } - case cDTVec3I: { - for (uint i = 0; i < 3; i++) - if (!utils::write_obj((*m_pVec3I)[i], pBuf, buf_left, little_endian)) - return -1; - break; - } - default: - break; - } - - return buf_size - buf_left; - } - - int deserialize(const void* pBuf, uint buf_size, bool little_endian) { - uint buf_left = buf_size; - - uint8 t; - if (!utils::read_obj(t, pBuf, buf_left, little_endian)) - return -1; - - if (t >= cDTTotal) - return -1; - - m_type = static_cast(t); - - switch (m_type) { - case cDTString: { - change_type(cDTString); - - int bytes_read = m_pStr->deserialize(pBuf, buf_left, little_endian); - if (bytes_read < 0) - return -1; - - pBuf = static_cast(pBuf) + bytes_read; - buf_left -= bytes_read; - - break; - } - case cDTBool: { - if (!utils::read_obj(m_bool, pBuf, buf_left, little_endian)) - return -1; - break; - } - case cDTInt: - case cDTUInt: - case cDTFloat: { - if (!utils::read_obj(m_float, pBuf, buf_left, little_endian)) - return -1; - break; - } - case cDTVec3F: { - change_type(cDTVec3F); - - for (uint i = 0; i < 3; i++) - if (!utils::read_obj((*m_pVec3F)[i], pBuf, buf_left, little_endian)) - return -1; - break; - } - case cDTVec3I: { - change_type(cDTVec3I); - - for (uint i = 0; i < 3; i++) - if (!utils::read_obj((*m_pVec3I)[i], pBuf, buf_left, little_endian)) - return -1; - break; - } - default: - break; - } - - return buf_size - buf_left; - } - - void swap(value& other) { - for (uint i = 0; i < cUnionSize; i++) - std::swap(m_union[i], other.m_union[i]); - - std::swap(m_type, other.m_type); - } - - private: - void clear_dynamic() { - if (m_type == cDTVec3F) { - crnlib_delete(m_pVec3F); - m_pVec3F = NULL; - - m_type = cDTInvalid; - } else if (m_type == cDTVec3I) { - crnlib_delete(m_pVec3I); - m_pVec3I = NULL; - - m_type = cDTInvalid; - } else if (m_type == cDTString) { - crnlib_delete(m_pStr); - m_pStr = NULL; - - m_type = cDTInvalid; - } - } - - void change_type(value_data_type type) { - if (type != m_type) { - clear_dynamic(); - - m_type = type; - - switch (m_type) { - case cDTString: - m_pStr = crnlib_new(); - break; - case cDTVec3F: - m_pVec3F = crnlib_new(); - break; - case cDTVec3I: - m_pVec3I = crnlib_new(); - break; - default: - break; - } - } - } - - void set_str(const dynamic_string& s) { - if (m_type == cDTString) - m_pStr->set(s); - else { - clear_dynamic(); - - m_type = cDTString; - m_pStr = crnlib_new(s); - } - } - - void set_str(const char* p) { - if (m_type == cDTString) - m_pStr->set(p); - else { - clear_dynamic(); - - m_type = cDTString; - m_pStr = crnlib_new(p); - } - } - - enum { cUnionSize = 1 }; - - union { - bool m_bool; - int m_int; - uint m_uint; - float m_float; - - vec3F* m_pVec3F; - vec3I* m_pVec3I; - dynamic_string* m_pStr; - - uint m_union[cUnionSize]; - }; - - value_data_type m_type; -}; + } + + bool is_signed() const + { + switch (m_type) + { + case cDTInt: + case cDTFloat: + case cDTVec3F: + case cDTVec3I: + return true; + default: + break; + } + return false; + } + + bool is_string() const + { + return m_type == cDTString; + } + + int serialize(void* pBuf, uint buf_size, bool little_endian) const + { + uint buf_left = buf_size; + + uint8 t = (uint8)m_type; + if (!utils::write_obj(t, pBuf, buf_left, little_endian)) + { + return -1; + } + + switch (m_type) + { + case cDTString: + { + int bytes_written = m_pStr->serialize(pBuf, buf_left, little_endian); + if (bytes_written < 0) + { + return -1; + } + + pBuf = static_cast(pBuf) + bytes_written; + buf_left -= bytes_written; + + break; + } + case cDTBool: + { + if (!utils::write_obj(m_bool, pBuf, buf_left, little_endian)) + { + return -1; + } + break; + } + case cDTInt: + case cDTUInt: + case cDTFloat: + { + if (!utils::write_obj(m_float, pBuf, buf_left, little_endian)) + { + return -1; + } + break; + } + case cDTVec3F: + { + for (uint i = 0; i < 3; i++) + { + if (!utils::write_obj((*m_pVec3F)[i], pBuf, buf_left, little_endian)) + { + return -1; + } + } + break; + } + case cDTVec3I: + { + for (uint i = 0; i < 3; i++) + { + if (!utils::write_obj((*m_pVec3I)[i], pBuf, buf_left, little_endian)) + { + return -1; + } + } + break; + } + default: + break; + } + + return buf_size - buf_left; + } + + int deserialize(const void* pBuf, uint buf_size, bool little_endian) + { + uint buf_left = buf_size; + + uint8 t; + if (!utils::read_obj(t, pBuf, buf_left, little_endian)) + { + return -1; + } + + if (t >= cDTTotal) + { + return -1; + } + + m_type = static_cast(t); + + switch (m_type) + { + case cDTString: + { + change_type(cDTString); + + int bytes_read = m_pStr->deserialize(pBuf, buf_left, little_endian); + if (bytes_read < 0) + { + return -1; + } + + pBuf = static_cast(pBuf) + bytes_read; + buf_left -= bytes_read; + + break; + } + case cDTBool: + { + if (!utils::read_obj(m_bool, pBuf, buf_left, little_endian)) + { + return -1; + } + break; + } + case cDTInt: + case cDTUInt: + case cDTFloat: + { + if (!utils::read_obj(m_float, pBuf, buf_left, little_endian)) + { + return -1; + } + break; + } + case cDTVec3F: + { + change_type(cDTVec3F); + + for (uint i = 0; i < 3; i++) + { + if (!utils::read_obj((*m_pVec3F)[i], pBuf, buf_left, little_endian)) + { + return -1; + } + } + break; + } + case cDTVec3I: + { + change_type(cDTVec3I); + + for (uint i = 0; i < 3; i++) + { + if (!utils::read_obj((*m_pVec3I)[i], pBuf, buf_left, little_endian)) + { + return -1; + } + } + break; + } + default: + break; + } + + return buf_size - buf_left; + } + + void swap(value& other) + { + for (uint i = 0; i < cUnionSize; i++) + { + std::swap(m_union[i], other.m_union[i]); + } + + std::swap(m_type, other.m_type); + } + + private: + void clear_dynamic() + { + if (m_type == cDTVec3F) + { + crnlib_delete(m_pVec3F); + m_pVec3F = NULL; + + m_type = cDTInvalid; + } + else if (m_type == cDTVec3I) + { + crnlib_delete(m_pVec3I); + m_pVec3I = NULL; + + m_type = cDTInvalid; + } + else if (m_type == cDTString) + { + crnlib_delete(m_pStr); + m_pStr = NULL; + + m_type = cDTInvalid; + } + } + + void change_type(value_data_type type) + { + if (type != m_type) + { + clear_dynamic(); + + m_type = type; + + switch (m_type) + { + case cDTString: + m_pStr = crnlib_new(); + break; + case cDTVec3F: + m_pVec3F = crnlib_new(); + break; + case cDTVec3I: + m_pVec3I = crnlib_new(); + break; + default: + break; + } + } + } + + void set_str(const dynamic_string& s) + { + if (m_type == cDTString) + { + m_pStr->set(s); + } + else + { + clear_dynamic(); + + m_type = cDTString; + m_pStr = crnlib_new(s); + } + } + + void set_str(const char* p) + { + if (m_type == cDTString) + { + m_pStr->set(p); + } + else + { + clear_dynamic(); + + m_type = cDTString; + m_pStr = crnlib_new(p); + } + } + + enum { cUnionSize = 1 }; + + union { + bool m_bool; + int m_int; + uint m_uint; + float m_float; + + vec3F* m_pVec3F; + vec3I* m_pVec3I; + dynamic_string* m_pStr; + + uint m_union[cUnionSize]; + }; + + value_data_type m_type; + }; } // namespace crnlib diff --git a/crnlib/crn_vector.h b/crnlib/crn_vector.h index 23fe61a..8385182 100644 --- a/crnlib/crn_vector.h +++ b/crnlib/crn_vector.h @@ -4,657 +4,869 @@ #include "crn_export.h" -namespace crnlib { -struct CRN_EXPORT elemental_vector { - void* m_p; - uint m_size; - uint m_capacity; - - typedef void (*object_mover)(void* pDst, void* pSrc, uint num); - - bool increase_capacity(uint min_new_capacity, bool grow_hint, uint element_size, object_mover pRelocate, bool nofail); -}; - -template -class vector : public helpers::rel_ops > { - public: - typedef T* iterator; - typedef const T* const_iterator; - typedef T value_type; - typedef T& reference; - typedef const T& const_reference; - typedef T* pointer; - typedef const T* const_pointer; - - inline vector() - : m_p(NULL), - m_size(0), - m_capacity(0) { - } - - inline vector(uint n, const T& init) - : m_p(NULL), - m_size(0), - m_capacity(0) { - increase_capacity(n, false); - helpers::construct_array(m_p, n, init); - m_size = n; - } - - inline vector(const vector& other) - : m_p(NULL), - m_size(0), - m_capacity(0) { - increase_capacity(other.m_size, false); - - m_size = other.m_size; - - if (CRNLIB_IS_BITWISE_COPYABLE(T)) - memcpy(m_p, other.m_p, m_size * sizeof(T)); - else { - T* pDst = m_p; - const T* pSrc = other.m_p; - for (uint i = m_size; i > 0; i--) - helpers::construct(pDst++, *pSrc++); - } - } - - inline explicit vector(uint size) - : m_p(NULL), - m_size(0), - m_capacity(0) { - resize(size); - } - - inline ~vector() { - if (m_p) { - scalar_type::destruct_array(m_p, m_size); - crnlib_free(m_p); - } - } +namespace crnlib +{ + struct CRN_EXPORT elemental_vector + { + void* m_p; + uint m_size; + uint m_capacity; + + typedef void (*object_mover)(void* pDst, void* pSrc, uint num); + + bool increase_capacity(uint min_new_capacity, bool grow_hint, uint element_size, object_mover pRelocate, bool nofail); + }; + + template + class vector : public helpers::rel_ops> + { + public: + typedef T* iterator; + typedef const T* const_iterator; + typedef T value_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + + inline vector(): + m_p(NULL), + m_size(0), + m_capacity(0) + { + } - inline vector& operator=(const vector& other) { - if (this == &other) - return *this; + inline vector(uint n, const T& init): + m_p(NULL), + m_size(0), + m_capacity(0) + { + increase_capacity(n, false); + helpers::construct_array(m_p, n, init); + m_size = n; + } - if (m_capacity >= other.m_size) - resize(0); - else { - clear(); - increase_capacity(other.m_size, false); - } + inline vector(const vector& other): + m_p(NULL), + m_size(0), + m_capacity(0) + { + increase_capacity(other.m_size, false); + + m_size = other.m_size; + + if (CRNLIB_IS_BITWISE_COPYABLE(T)) + { + memcpy(m_p, other.m_p, m_size * sizeof(T)); + } + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (uint i = m_size; i > 0; i--) + { + helpers::construct(pDst++, *pSrc++); + } + } + } - if (CRNLIB_IS_BITWISE_COPYABLE(T)) - memcpy(m_p, other.m_p, other.m_size * sizeof(T)); - else { - T* pDst = m_p; - const T* pSrc = other.m_p; - for (uint i = other.m_size; i > 0; i--) - helpers::construct(pDst++, *pSrc++); - } + inline explicit vector(uint size): + m_p(NULL), + m_size(0), + m_capacity(0) + { + resize(size); + } - m_size = other.m_size; - - return *this; - } - - inline const T* begin() const { return m_p; } - T* begin() { return m_p; } - - inline const T* end() const { return m_p + m_size; } - T* end() { return m_p + m_size; } - - inline bool empty() const { return !m_size; } - inline uint size() const { return m_size; } - inline uint size_in_bytes() const { return m_size * sizeof(T); } - inline uint capacity() const { return m_capacity; } - - // operator[] will assert on out of range indices, but in final builds there is (and will never be) any range checking on this method. - inline const T& operator[](uint i) const { - CRNLIB_ASSERT(i < m_size); - return m_p[i]; - } - inline T& operator[](uint i) { - CRNLIB_ASSERT(i < m_size); - return m_p[i]; - } - - // at() always includes range checking, even in final builds, unlike operator []. - // The first element is returned if the index is out of range. - inline const T& at(uint i) const { - CRNLIB_ASSERT(i < m_size); - return (i >= m_size) ? m_p[0] : m_p[i]; - } - inline T& at(uint i) { - CRNLIB_ASSERT(i < m_size); - return (i >= m_size) ? m_p[0] : m_p[i]; - } - - inline const T& front() const { - CRNLIB_ASSERT(m_size); - return m_p[0]; - } - inline T& front() { - CRNLIB_ASSERT(m_size); - return m_p[0]; - } - - inline const T& back() const { - CRNLIB_ASSERT(m_size); - return m_p[m_size - 1]; - } - inline T& back() { - CRNLIB_ASSERT(m_size); - return m_p[m_size - 1]; - } - - inline const T* get_ptr() const { return m_p; } - inline T* get_ptr() { return m_p; } - - // clear() sets the container to empty, then frees the allocated block. - inline void clear() { - if (m_p) { - scalar_type::destruct_array(m_p, m_size); - crnlib_free(m_p); - m_p = NULL; - m_size = 0; - m_capacity = 0; - } - } - - inline void clear_no_destruction() { - if (m_p) { - crnlib_free(m_p); - m_p = NULL; - m_size = 0; - m_capacity = 0; - } - } - - inline void reserve(uint new_capacity) { - if (new_capacity > m_capacity) - increase_capacity(new_capacity, false); - else if (new_capacity < m_capacity) { - // Must work around the lack of a "decrease_capacity()" method. - // This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize. - vector tmp; - tmp.increase_capacity(math::maximum(m_size, new_capacity), false); - tmp = *this; - swap(tmp); - } - } + inline ~vector() + { + if (m_p) + { + scalar_type::destruct_array(m_p, m_size); + crnlib_free(m_p); + } + } - inline bool try_reserve(uint new_capacity) { - return increase_capacity(new_capacity, true, true); - } + inline vector& operator=(const vector& other) + { + if (this == &other) + { + return *this; + } + + if (m_capacity >= other.m_size) + { + resize(0); + } + else + { + clear(); + increase_capacity(other.m_size, false); + } + + if (CRNLIB_IS_BITWISE_COPYABLE(T)) + { + memcpy(m_p, other.m_p, other.m_size * sizeof(T)); + } + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (uint i = other.m_size; i > 0; i--) + { + helpers::construct(pDst++, *pSrc++); + } + } + + m_size = other.m_size; + + return *this; + } - // resize(0) sets the container to empty, but does not free the allocated block. - inline void resize(uint new_size, bool grow_hint = false) { - if (m_size != new_size) { - if (new_size < m_size) - scalar_type::destruct_array(m_p + new_size, m_size - new_size); - else { - if (new_size > m_capacity) - increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint); + inline const T* begin() const + { + return m_p; + } + T* begin() + { + return m_p; + } - scalar_type::construct_array(m_p + m_size, new_size - m_size); - } + inline const T* end() const + { + return m_p + m_size; + } + T* end() + { + return m_p + m_size; + } - m_size = new_size; - } - } + inline bool empty() const + { + return !m_size; + } + inline uint size() const + { + return m_size; + } + inline uint size_in_bytes() const + { + return m_size * sizeof(T); + } + inline uint capacity() const + { + return m_capacity; + } - inline bool try_resize(uint new_size, bool grow_hint = false) { - if (m_size != new_size) { - if (new_size < m_size) - scalar_type::destruct_array(m_p + new_size, m_size - new_size); - else { - if (new_size > m_capacity) { - if (!increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint, true)) - return false; + // operator[] will assert on out of range indices, but in final builds there is (and will never be) any range checking on this method. + inline const T& operator[](uint i) const + { + CRNLIB_ASSERT(i < m_size); + return m_p[i]; + } + inline T& operator[](uint i) + { + CRNLIB_ASSERT(i < m_size); + return m_p[i]; } - scalar_type::construct_array(m_p + m_size, new_size - m_size); - } + // at() always includes range checking, even in final builds, unlike operator []. + // The first element is returned if the index is out of range. + inline const T& at(uint i) const + { + CRNLIB_ASSERT(i < m_size); + return (i >= m_size) ? m_p[0] : m_p[i]; + } + inline T& at(uint i) + { + CRNLIB_ASSERT(i < m_size); + return (i >= m_size) ? m_p[0] : m_p[i]; + } - m_size = new_size; - } + inline const T& front() const + { + CRNLIB_ASSERT(m_size); + return m_p[0]; + } + inline T& front() + { + CRNLIB_ASSERT(m_size); + return m_p[0]; + } - return true; - } - - // If size >= capacity/2, reset() sets the container's size to 0 but doesn't free the allocated block (because the container may be similarly loaded in the future). - // Otherwise it blows away the allocated block. See http://www.codercorner.com/blog/?p=494 - inline void reset() { - if (m_size >= (m_capacity >> 1)) - resize(0); - else - clear(); - } - - inline T* enlarge(uint i) { - uint cur_size = m_size; - resize(cur_size + i, true); - return get_ptr() + cur_size; - } - - inline T* try_enlarge(uint i) { - uint cur_size = m_size; - if (!try_resize(cur_size + i, true)) - return NULL; - return get_ptr() + cur_size; - } - - inline void push_back(const T& obj) { - CRNLIB_ASSERT(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); - - if (m_size >= m_capacity) - increase_capacity(m_size + 1, true); - - scalar_type::construct(m_p + m_size, obj); - m_size++; - } - - inline bool try_push_back(const T& obj) { - CRNLIB_ASSERT(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); - - if (m_size >= m_capacity) { - if (!increase_capacity(m_size + 1, true, true)) - return false; - } + inline const T& back() const + { + CRNLIB_ASSERT(m_size); + return m_p[m_size - 1]; + } + inline T& back() + { + CRNLIB_ASSERT(m_size); + return m_p[m_size - 1]; + } - scalar_type::construct(m_p + m_size, obj); - m_size++; + inline const T* get_ptr() const + { + return m_p; + } + inline T* get_ptr() + { + return m_p; + } - return true; - } + // clear() sets the container to empty, then frees the allocated block. + inline void clear() + { + if (m_p) + { + scalar_type::destruct_array(m_p, m_size); + crnlib_free(m_p); + m_p = NULL; + m_size = 0; + m_capacity = 0; + } + } - inline void push_back_value(T obj) { - if (m_size >= m_capacity) - increase_capacity(m_size + 1, true); + inline void clear_no_destruction() + { + if (m_p) + { + crnlib_free(m_p); + m_p = NULL; + m_size = 0; + m_capacity = 0; + } + } - scalar_type::construct(m_p + m_size, obj); - m_size++; - } + inline void reserve(uint new_capacity) + { + if (new_capacity > m_capacity) + { + increase_capacity(new_capacity, false); + } + else if (new_capacity < m_capacity) + { + // Must work around the lack of a "decrease_capacity()" method. + // This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize. + vector tmp; + tmp.increase_capacity(math::maximum(m_size, new_capacity), false); + tmp = *this; + swap(tmp); + } + } - inline void pop_back() { - CRNLIB_ASSERT(m_size); + inline bool try_reserve(uint new_capacity) + { + return increase_capacity(new_capacity, true, true); + } - if (m_size) { - m_size--; - scalar_type::destruct(&m_p[m_size]); - } - } + // resize(0) sets the container to empty, but does not free the allocated block. + inline void resize(uint new_size, bool grow_hint = false) + { + if (m_size != new_size) { + if (new_size < m_size) + { + scalar_type::destruct_array(m_p + new_size, m_size - new_size); + } + else + { + if (new_size > m_capacity) + { + increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint); + } + + scalar_type::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } + } - inline void insert(uint index, const T* p, uint n) { - CRNLIB_ASSERT(index <= m_size); - if (!n) - return; + inline bool try_resize(uint new_size, bool grow_hint = false) + { + if (m_size != new_size) + { + if (new_size < m_size) + { + scalar_type::destruct_array(m_p + new_size, m_size - new_size); + } + else + { + if (new_size > m_capacity) + { + if (!increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint, true)) + { + return false; + } + } + + scalar_type::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } + + return true; + } - const uint orig_size = m_size; - resize(m_size + n, true); + // If size >= capacity/2, reset() sets the container's size to 0 but doesn't free the allocated block (because the container may be similarly loaded in the future). + // Otherwise it blows away the allocated block. See http://www.codercorner.com/blog/?p=494 + inline void reset() + { + if (m_size >= (m_capacity >> 1)) + { + resize(0); + } + else + { + clear(); + } + } - const uint num_to_move = orig_size - index; + inline T* enlarge(uint i) + { + uint cur_size = m_size; + resize(cur_size + i, true); + return get_ptr() + cur_size; + } - if (CRNLIB_IS_BITWISE_COPYABLE(T)) { - // This overwrites the destination object bits, but bitwise copyable means we don't need to worry about destruction. - memmove(m_p + index + n, m_p + index, sizeof(T) * num_to_move); - } else { - const T* pSrc = m_p + orig_size - 1; - T* pDst = const_cast(pSrc) + n; + inline T* try_enlarge(uint i) + { + uint cur_size = m_size; + if (!try_resize(cur_size + i, true)) + { + return NULL; + } + return get_ptr() + cur_size; + } - for (uint i = 0; i < num_to_move; i++) { - CRNLIB_ASSERT((pDst - m_p) < (int)m_size); - *pDst-- = *pSrc--; - } - } + inline void push_back(const T& obj) + { + CRNLIB_ASSERT(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); - T* pDst = m_p + index; + if (m_size >= m_capacity) + { + increase_capacity(m_size + 1, true); + } - if (CRNLIB_IS_BITWISE_COPYABLE(T)) { - // This copies in the new bits, overwriting the existing objects, which is OK for copyable types that don't need destruction. - memcpy(pDst, p, sizeof(T) * n); - } else { - for (uint i = 0; i < n; i++) { - CRNLIB_ASSERT((pDst - m_p) < (int)m_size); - *pDst++ = *p++; - } - } - } - - // push_front() isn't going to be very fast - it's only here for usability. - inline void push_front(const T& obj) { - insert(0, &obj, 1); - } - - vector& append(const vector& other) { - if (other.m_size) - insert(m_size, &other[0], other.m_size); - return *this; - } - - vector& append(const T* p, uint n) { - if (n) - insert(m_size, p, n); - return *this; - } - - inline void erase(uint start, uint n) { - CRNLIB_ASSERT((start + n) <= m_size); - if ((start + n) > m_size) - return; - - if (!n) - return; - - const uint num_to_move = m_size - (start + n); - - T* pDst = m_p + start; - - const T* pSrc = m_p + start + n; - - if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(T)) { - // This test is overly cautious. - if ((!CRNLIB_IS_BITWISE_COPYABLE(T)) || (CRNLIB_HAS_DESTRUCTOR(T))) { - // Type has been marked explictly as bitwise movable, which means we can move them around but they may need to be destructed. - // First destroy the erased objects. - scalar_type::destruct_array(pDst, n); - } - - // Copy "down" the objects to preserve, filling in the empty slots. - memmove(pDst, pSrc, num_to_move * sizeof(T)); - } else { - // Type is not bitwise copyable or movable. - // Move them down one at a time by using the equals operator, and destroying anything that's left over at the end. - T* pDst_end = pDst + num_to_move; - while (pDst != pDst_end) - *pDst++ = *pSrc++; - - scalar_type::destruct_array(pDst_end, n); - } + scalar_type::construct(m_p + m_size, obj); + m_size++; + } - m_size -= n; - } - - inline void erase(uint index) { - erase(index, 1); - } - - inline void erase(T* p) { - CRNLIB_ASSERT((p >= m_p) && (p < (m_p + m_size))); - erase(static_cast(p - m_p)); - } - - void erase_unordered(uint index) { - CRNLIB_ASSERT(index < m_size); - - if ((index + 1) < m_size) - (*this)[index] = back(); - - pop_back(); - } - - inline bool operator==(const vector& rhs) const { - if (m_size != rhs.m_size) - return false; - else if (m_size) { - if (scalar_type::cFlag) - return memcmp(m_p, rhs.m_p, sizeof(T) * m_size) == 0; - else { - const T* pSrc = m_p; - const T* pDst = rhs.m_p; - for (uint i = m_size; i; i--) - if (!(*pSrc++ == *pDst++)) - return false; - } - } + inline bool try_push_back(const T& obj) + { + CRNLIB_ASSERT(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + { + if (!increase_capacity(m_size + 1, true, true)) + { + return false; + } + } - return true; - } + scalar_type::construct(m_p + m_size, obj); + m_size++; - inline bool operator<(const vector& rhs) const { - const uint min_size = math::minimum(m_size, rhs.m_size); + return true; + } - const T* pSrc = m_p; - const T* pSrc_end = m_p + min_size; - const T* pDst = rhs.m_p; + inline void push_back_value(T obj) + { + if (m_size >= m_capacity) + { + increase_capacity(m_size + 1, true); + } - while ((pSrc < pSrc_end) && (*pSrc == *pDst)) { - pSrc++; - pDst++; - } + scalar_type::construct(m_p + m_size, obj); + m_size++; + } - if (pSrc < pSrc_end) - return *pSrc < *pDst; + inline void pop_back() + { + CRNLIB_ASSERT(m_size); - return m_size < rhs.m_size; - } + if (m_size) + { + m_size--; + scalar_type::destruct(&m_p[m_size]); + } + } - inline void swap(vector& other) { - utils::swap(m_p, other.m_p); - utils::swap(m_size, other.m_size); - utils::swap(m_capacity, other.m_capacity); - } + inline void insert(uint index, const T* p, uint n) + { + CRNLIB_ASSERT(index <= m_size); + if (!n) + { + return; + } + + const uint orig_size = m_size; + resize(m_size + n, true); + + const uint num_to_move = orig_size - index; + + if (CRNLIB_IS_BITWISE_COPYABLE(T)) + { + // This overwrites the destination object bits, but bitwise copyable means we don't need to worry about destruction. + memmove(m_p + index + n, m_p + index, sizeof(T) * num_to_move); + } + else + { + const T* pSrc = m_p + orig_size - 1; + T* pDst = const_cast(pSrc) + n; + + for (uint i = 0; i < num_to_move; i++) + { + CRNLIB_ASSERT((pDst - m_p) < (int)m_size); + *pDst-- = *pSrc--; + } + } + + T* pDst = m_p + index; + + if (CRNLIB_IS_BITWISE_COPYABLE(T)) + { + // This copies in the new bits, overwriting the existing objects, which is OK for copyable types that don't need destruction. + memcpy(pDst, p, sizeof(T) * n); + } + else + { + for (uint i = 0; i < n; i++) + { + CRNLIB_ASSERT((pDst - m_p) < (int)m_size); + *pDst++ = *p++; + } + } + } - inline void sort() { - std::sort(begin(), end()); - } + // push_front() isn't going to be very fast - it's only here for usability. + inline void push_front(const T& obj) + { + insert(0, &obj, 1); + } - inline void unique() { - if (!empty()) { - sort(); + vector& append(const vector& other) + { + if (other.m_size) + { + insert(m_size, &other[0], other.m_size); + } + return *this; + } - resize(std::unique(begin(), end()) - begin()); - } - } + vector& append(const T* p, uint n) + { + if (n) + { + insert(m_size, p, n); + } + return *this; + } - inline void reverse() { - uint j = m_size >> 1; - for (uint i = 0; i < j; i++) - utils::swap(m_p[i], m_p[m_size - 1 - i]); - } + inline void erase(uint start, uint n) + { + CRNLIB_ASSERT((start + n) <= m_size); + if ((start + n) > m_size) + { + return; + } + + if (!n) + { + return; + } + + const uint num_to_move = m_size - (start + n); + + T* pDst = m_p + start; + + const T* pSrc = m_p + start + n; + + if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(T)) + { + // This test is overly cautious. + if ((!CRNLIB_IS_BITWISE_COPYABLE(T)) || (CRNLIB_HAS_DESTRUCTOR(T))) + { + // Type has been marked explictly as bitwise movable, which means we can move them around but they may need to be destructed. + // First destroy the erased objects. + scalar_type::destruct_array(pDst, n); + } + + // Copy "down" the objects to preserve, filling in the empty slots. + memmove(pDst, pSrc, num_to_move * sizeof(T)); + } + else + { + // Type is not bitwise copyable or movable. + // Move them down one at a time by using the equals operator, and destroying anything that's left over at the end. + T* pDst_end = pDst + num_to_move; + while (pDst != pDst_end) + { + *pDst++ = *pSrc++; + } + + scalar_type::destruct_array(pDst_end, n); + } + + m_size -= n; + } - inline int find(const T& key) const { - const T* p = m_p; - const T* p_end = m_p + m_size; + inline void erase(uint index) + { + erase(index, 1); + } - uint index = 0; + inline void erase(T* p) + { + CRNLIB_ASSERT((p >= m_p) && (p < (m_p + m_size))); + erase(static_cast(p - m_p)); + } - while (p != p_end) { - if (key == *p) - return index; + void erase_unordered(uint index) + { + CRNLIB_ASSERT(index < m_size); - p++; - index++; - } + if ((index + 1) < m_size) + { + (*this)[index] = back(); + } - return cInvalidIndex; - } - - inline int find_sorted(const T& key) const { - if (m_size) { - // Uniform binary search - Knuth Algorithm 6.2.1 U, unrolled twice. - int i = ((m_size + 1) >> 1) - 1; - int m = m_size; - - for (;;) { - CRNLIB_ASSERT_OPEN_RANGE(i, 0, (int)m_size); - const T* pKey_i = m_p + i; - int cmp = key < *pKey_i; - if ((!cmp) && (key == *pKey_i)) - return i; - m >>= 1; - if (!m) - break; - cmp = -cmp; - i += (((m + 1) >> 1) ^ cmp) - cmp; - - CRNLIB_ASSERT_OPEN_RANGE(i, 0, (int)m_size); - pKey_i = m_p + i; - cmp = key < *pKey_i; - if ((!cmp) && (key == *pKey_i)) - return i; - m >>= 1; - if (!m) - break; - cmp = -cmp; - i += (((m + 1) >> 1) ^ cmp) - cmp; - } - } + pop_back(); + } - return cInvalidIndex; - } - - template - inline int find_sorted(const T& key, Q less_than) const { - if (m_size) { - // Uniform binary search - Knuth Algorithm 6.2.1 U, unrolled twice. - int i = ((m_size + 1) >> 1) - 1; - int m = m_size; - - for (;;) { - CRNLIB_ASSERT_OPEN_RANGE(i, 0, (int)m_size); - const T* pKey_i = m_p + i; - int cmp = less_than(key, *pKey_i); - if ((!cmp) && (!less_than(*pKey_i, key))) - return i; - m >>= 1; - if (!m) - break; - cmp = -cmp; - i += (((m + 1) >> 1) ^ cmp) - cmp; - - CRNLIB_ASSERT_OPEN_RANGE(i, 0, (int)m_size); - pKey_i = m_p + i; - cmp = less_than(key, *pKey_i); - if ((!cmp) && (!less_than(*pKey_i, key))) - return i; - m >>= 1; - if (!m) - break; - cmp = -cmp; - i += (((m + 1) >> 1) ^ cmp) - cmp; - } - } + inline bool operator==(const vector& rhs) const + { + if (m_size != rhs.m_size) + { + return false; + } + else if (m_size) + { + if (scalar_type::cFlag) + { + return memcmp(m_p, rhs.m_p, sizeof(T) * m_size) == 0; + } + else + { + const T* pSrc = m_p; + const T* pDst = rhs.m_p; + for (uint i = m_size; i; i--) + { + if (!(*pSrc++ == *pDst++)) + { + return false; + } + } + } + } + + return true; + } - return cInvalidIndex; - } + inline bool operator<(const vector& rhs) const + { + const uint min_size = math::minimum(m_size, rhs.m_size); - inline uint count_occurences(const T& key) const { - uint c = 0; + const T* pSrc = m_p; + const T* pSrc_end = m_p + min_size; + const T* pDst = rhs.m_p; - const T* p = m_p; - const T* p_end = m_p + m_size; + while ((pSrc < pSrc_end) && (*pSrc == *pDst)) + { + pSrc++; + pDst++; + } - while (p != p_end) { - if (key == *p) - c++; + if (pSrc < pSrc_end) + { + return *pSrc < *pDst; + } - p++; - } + return m_size < rhs.m_size; + } - return c; - } - - inline void set_all(const T& o) { - if ((sizeof(T) == 1) && (scalar_type::cFlag)) - memset(m_p, *reinterpret_cast(&o), m_size); - else { - T* pDst = m_p; - T* pDst_end = pDst + m_size; - while (pDst != pDst_end) - *pDst++ = o; - } - } - - // Caller assumes ownership of the heap block associated with the container. Container is cleared. - inline void* assume_ownership() { - T* p = m_p; - m_p = NULL; - m_size = 0; - m_capacity = 0; - return p; - } - - // Caller is granting ownership of the indicated heap block. - // Block must have size constructed elements, and have enough room for capacity elements. - inline bool grant_ownership(T* p, uint size, uint capacity) { - // To to prevent the caller from obviously shooting themselves in the foot. - if (((p + capacity) > m_p) && (p < (m_p + m_capacity))) { - // Can grant ownership of a block inside the container itself! - CRNLIB_ASSERT(0); - return false; - } + inline void swap(vector& other) + { + utils::swap(m_p, other.m_p); + utils::swap(m_size, other.m_size); + utils::swap(m_capacity, other.m_capacity); + } - if (size > capacity) { - CRNLIB_ASSERT(0); - return false; - } + inline void sort() + { + std::sort(begin(), end()); + } - if (!p) { - if (capacity) { - CRNLIB_ASSERT(0); - return false; - } - } else if (!capacity) { - CRNLIB_ASSERT(0); - return false; - } + inline void unique() + { + if (!empty()) + { + sort(); - clear(); - m_p = p; - m_size = size; - m_capacity = capacity; - return true; - } - - private: - T* m_p; - uint m_size; - uint m_capacity; - - template - struct is_vector { - enum { cFlag = false }; - }; - template - struct is_vector > { - enum { cFlag = true }; - }; - - static void object_mover(void* pDst_void, void* pSrc_void, uint num) { - T* pSrc = static_cast(pSrc_void); - T* const pSrc_end = pSrc + num; - T* pDst = static_cast(pDst_void); - - while (pSrc != pSrc_end) { - // placement new - new (static_cast(pDst)) T(*pSrc); - pSrc->~T(); - ++pSrc; - ++pDst; - } - } + resize(std::unique(begin(), end()) - begin()); + } + } + + inline void reverse() + { + uint j = m_size >> 1; + for (uint i = 0; i < j; i++) + { + utils::swap(m_p[i], m_p[m_size - 1 - i]); + } + } - inline bool increase_capacity(uint min_new_capacity, bool grow_hint, bool nofail = false) { - return reinterpret_cast(this)->increase_capacity( - min_new_capacity, grow_hint, sizeof(T), - (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(T) || (is_vector::cFlag)) ? NULL : object_mover, nofail); - } -}; + inline int find(const T& key) const + { + const T* p = m_p; + const T* p_end = m_p + m_size; -typedef crnlib::vector uint8_vec; + uint index = 0; -template -struct bitwise_movable > { - enum { cFlag = true }; -}; + while (p != p_end) + { + if (key == *p) + { + return index; + } -extern void vector_test(); + p++; + index++; + } -template -inline void swap(vector& a, vector& b) { - a.swap(b); -} + return cInvalidIndex; + } + + inline int find_sorted(const T& key) const + { + if (m_size) + { + // Uniform binary search - Knuth Algorithm 6.2.1 U, unrolled twice. + int i = ((m_size + 1) >> 1) - 1; + int m = m_size; + + for (;;) + { + CRNLIB_ASSERT_OPEN_RANGE(i, 0, (int)m_size); + const T* pKey_i = m_p + i; + int cmp = key < *pKey_i; + if ((!cmp) && (key == *pKey_i)) + { + return i; + } + m >>= 1; + if (!m) + { + break; + } + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + + CRNLIB_ASSERT_OPEN_RANGE(i, 0, (int)m_size); + pKey_i = m_p + i; + cmp = key < *pKey_i; + if ((!cmp) && (key == *pKey_i)) + { + return i; + } + m >>= 1; + if (!m) + { + break; + } + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + } + } + + return cInvalidIndex; + } + + template + inline int find_sorted(const T& key, Q less_than) const + { + if (m_size) + { + // Uniform binary search - Knuth Algorithm 6.2.1 U, unrolled twice. + int i = ((m_size + 1) >> 1) - 1; + int m = m_size; + + for (;;) + { + CRNLIB_ASSERT_OPEN_RANGE(i, 0, (int)m_size); + const T* pKey_i = m_p + i; + int cmp = less_than(key, *pKey_i); + if ((!cmp) && (!less_than(*pKey_i, key))) + { + return i; + } + m >>= 1; + if (!m) + { + break; + } + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + + CRNLIB_ASSERT_OPEN_RANGE(i, 0, (int)m_size); + pKey_i = m_p + i; + cmp = less_than(key, *pKey_i); + if ((!cmp) && (!less_than(*pKey_i, key))) + { + return i; + } + m >>= 1; + if (!m) + { + break; + } + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + } + } + + return cInvalidIndex; + } + + inline uint count_occurences(const T& key) const + { + uint c = 0; + + const T* p = m_p; + const T* p_end = m_p + m_size; + + while (p != p_end) + { + if (key == *p) + { + c++; + } + + p++; + } + + return c; + } + + inline void set_all(const T& o) + { + if ((sizeof(T) == 1) && (scalar_type::cFlag)) + { + memset(m_p, *reinterpret_cast(&o), m_size); + } + else + { + T* pDst = m_p; + T* pDst_end = pDst + m_size; + while (pDst != pDst_end) + { + *pDst++ = o; + } + } + } + + // Caller assumes ownership of the heap block associated with the container. Container is cleared. + inline void* assume_ownership() + { + T* p = m_p; + m_p = NULL; + m_size = 0; + m_capacity = 0; + return p; + } + + // Caller is granting ownership of the indicated heap block. + // Block must have size constructed elements, and have enough room for capacity elements. + inline bool grant_ownership(T* p, uint size, uint capacity) + { + // To to prevent the caller from obviously shooting themselves in the foot. + if (((p + capacity) > m_p) && (p < (m_p + m_capacity))) + { + // Can grant ownership of a block inside the container itself! + CRNLIB_ASSERT(0); + return false; + } + + if (size > capacity) + { + CRNLIB_ASSERT(0); + return false; + } + + if (!p) + { + if (capacity) + { + CRNLIB_ASSERT(0); + return false; + } + } + else if (!capacity) + { + CRNLIB_ASSERT(0); + return false; + } + + clear(); + m_p = p; + m_size = size; + m_capacity = capacity; + return true; + } + + private: + T* m_p; + uint m_size; + uint m_capacity; + + template + struct is_vector { + enum { cFlag = false }; + }; + template + struct is_vector> { + enum { cFlag = true }; + }; + + static void object_mover(void* pDst_void, void* pSrc_void, uint num) + { + T* pSrc = static_cast(pSrc_void); + T* const pSrc_end = pSrc + num; + T* pDst = static_cast(pDst_void); + + while (pSrc != pSrc_end) + { + // placement new + new (static_cast(pDst)) T(*pSrc); + pSrc->~T(); + ++pSrc; + ++pDst; + } + } + + inline bool increase_capacity(uint min_new_capacity, bool grow_hint, bool nofail = false) + { + return reinterpret_cast(this)->increase_capacity( + min_new_capacity, grow_hint, sizeof(T), + (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(T) || (is_vector::cFlag)) ? NULL : object_mover, nofail); + } + }; + + typedef crnlib::vector uint8_vec; + + template + struct bitwise_movable> + { + enum { cFlag = true }; + }; + + extern void vector_test(); + + template + inline void swap(vector& a, vector& b) + { + a.swap(b); + } } // namespace crnlib diff --git a/crnlib/crnlib.cpp b/crnlib/crnlib.cpp index e471a29..d7d2593 100644 --- a/crnlib/crnlib.cpp +++ b/crnlib/crnlib.cpp @@ -1,5 +1,6 @@ // File: crnlib.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +// See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" #include "crnlib.h" #include "crn_comp.h" @@ -8,428 +9,515 @@ #include "crn_buffer_stream.h" #include "crn_ryg_dxt.hpp" #include "crn_etc.h" - -#include "../inc/crn_defs.h" - +#include "crn_defs.h" #include "crn_rg_etc1.h" -namespace crnlib { -static void* realloc_func(void* p, size_t size, size_t* pActual_size, bool movable, void*) { - return crnlib_realloc(p, size, pActual_size, movable); -} +namespace crnlib +{ + static void* realloc_func(void* p, size_t size, size_t* pActual_size, bool movable, void*) + { + return crnlib_realloc(p, size, pActual_size, movable); + } -static size_t msize_func(void* p, void*) { - return crnlib_msize(p); -} + static size_t msize_func(void* p, void*) + { + return crnlib_msize(p); + } -class crnlib_global_initializer { - public: - crnlib_global_initializer() { - crn_threading_init(); + class crnlib_global_initializer + { + public: + crnlib_global_initializer() + { + crn_threading_init(); - crnlib_enable_fail_exceptions(true); + crnlib_enable_fail_exceptions(true); - // Redirect crn_decomp.h's memory allocations into crnlib, which may be further redirected by the outside caller. - crnd::crnd_set_memory_callbacks(realloc_func, msize_func, NULL); + // Redirect crn_decomp.h's memory allocations into crnlib, which may be further redirected by the outside caller. + crnd::crnd_set_memory_callbacks(realloc_func, msize_func, NULL); - ryg_dxt::sInitDXT(); + ryg_dxt::sInitDXT(); - pack_etc1_block_init(); + pack_etc1_block_init(); - rg_etc1::pack_etc1_block_init(); - } -}; + rg_etc1::pack_etc1_block_init(); + } + }; -crnlib_global_initializer g_crnlib_initializer; + crnlib_global_initializer g_crnlib_initializer; } // namespace crnlib using namespace crnlib; -const char* crn_get_format_string(crn_format fmt) { - return pixel_format_helpers::get_crn_format_string(fmt); +const char* crn_get_format_string(crn_format fmt) +{ + return pixel_format_helpers::get_crn_format_string(fmt); } -crn_uint32 crn_get_format_fourcc(crn_format fmt) { - return crnd::crnd_crn_format_to_fourcc(fmt); +crn_uint32 crn_get_format_fourcc(crn_format fmt) +{ + return crnd::crnd_crn_format_to_fourcc(fmt); } -crn_uint32 crn_get_format_bits_per_texel(crn_format fmt) { - return crnd::crnd_get_crn_format_bits_per_texel(fmt); +crn_uint32 crn_get_format_bits_per_texel(crn_format fmt) +{ + return crnd::crnd_get_crn_format_bits_per_texel(fmt); } -crn_uint32 crn_get_bytes_per_dxt_block(crn_format fmt) { - return crnd::crnd_get_bytes_per_dxt_block(fmt); +crn_uint32 crn_get_bytes_per_dxt_block(crn_format fmt) +{ + return crnd::crnd_get_bytes_per_dxt_block(fmt); } -crn_format crn_get_fundamental_dxt_format(crn_format fmt) { - return crnd::crnd_get_fundamental_dxt_format(fmt); +crn_format crn_get_fundamental_dxt_format(crn_format fmt) +{ + return crnd::crnd_get_fundamental_dxt_format(fmt); } -const char* crn_get_file_type_ext(crn_file_type file_type) { - switch (file_type) { +const char* crn_get_file_type_ext(crn_file_type file_type) +{ + switch (file_type) + { case cCRNFileTypeDDS: - return "dds"; + return "dds"; case cCRNFileTypeCRN: - return "crn"; + return "crn"; default: - break; - } - return "?"; + break; + } + return "?"; } -const char* crn_get_mip_mode_desc(crn_mip_mode m) { - switch (m) { +const char* crn_get_mip_mode_desc(crn_mip_mode m) +{ + switch (m) + { case cCRNMipModeUseSourceOrGenerateMips: - return "Use source/generate if none"; + return "Use source/generate if none"; case cCRNMipModeUseSourceMips: - return "Only use source MIP maps (if any)"; + return "Only use source MIP maps (if any)"; case cCRNMipModeGenerateMips: - return "Always generate new MIP maps"; + return "Always generate new MIP maps"; case cCRNMipModeNoMips: - return "No MIP maps"; + return "No MIP maps"; default: - break; - } - return "?"; + break; + } + return "?"; } -const char* crn_get_mip_mode_name(crn_mip_mode m) { - switch (m) { +const char* crn_get_mip_mode_name(crn_mip_mode m) +{ + switch (m) + { case cCRNMipModeUseSourceOrGenerateMips: - return "UseSourceOrGenerate"; + return "UseSourceOrGenerate"; case cCRNMipModeUseSourceMips: - return "UseSource"; + return "UseSource"; case cCRNMipModeGenerateMips: - return "Generate"; + return "Generate"; case cCRNMipModeNoMips: - return "None"; + return "None"; default: - break; - } - return "?"; + break; + } + return "?"; } -const char* crn_get_mip_filter_name(crn_mip_filter f) { - switch (f) { +const char* crn_get_mip_filter_name(crn_mip_filter f) +{ + switch (f) + { case cCRNMipFilterBox: - return "box"; + return "box"; case cCRNMipFilterTent: - return "tent"; + return "tent"; case cCRNMipFilterLanczos4: - return "lanczos4"; + return "lanczos4"; case cCRNMipFilterMitchell: - return "mitchell"; + return "mitchell"; case cCRNMipFilterKaiser: - return "kaiser"; + return "kaiser"; default: - break; - } - return "?"; + break; + } + return "?"; } -const char* crn_get_scale_mode_desc(crn_scale_mode sm) { - switch (sm) { +const char* crn_get_scale_mode_desc(crn_scale_mode sm) +{ + switch (sm) + { case cCRNSMDisabled: - return "disabled"; + return "disabled"; case cCRNSMAbsolute: - return "absolute"; + return "absolute"; case cCRNSMRelative: - return "relative"; + return "relative"; case cCRNSMLowerPow2: - return "lowerpow2"; + return "lowerpow2"; case cCRNSMNearestPow2: - return "nearestpow2"; + return "nearestpow2"; case cCRNSMNextPow2: - return "nextpow2"; + return "nextpow2"; default: - break; - } - return "?"; + break; + } + return "?"; } -const char* crn_get_dxt_quality_string(crn_dxt_quality q) { - switch (q) { +const char* crn_get_dxt_quality_string(crn_dxt_quality q) +{ + switch (q) + { case cCRNDXTQualitySuperFast: - return "SuperFast"; + return "SuperFast"; case cCRNDXTQualityFast: - return "Fast"; + return "Fast"; case cCRNDXTQualityNormal: - return "Normal"; + return "Normal"; case cCRNDXTQualityBetter: - return "Better"; + return "Better"; case cCRNDXTQualityUber: - return "Uber"; + return "Uber"; default: - break; - } - CRNLIB_ASSERT(false); - return "?"; + break; + } + CRNLIB_ASSERT(false); + return "?"; } -void crn_free_block(void* pBlock) { - crnlib_free(pBlock); +void crn_free_block(void* pBlock) +{ + crnlib_free(pBlock); } -void* crn_compress(const crn_comp_params& comp_params, crn_uint32& compressed_size, crn_uint32* pActual_quality_level, float* pActual_bitrate) { - compressed_size = 0; - if (pActual_quality_level) - *pActual_quality_level = 0; - if (pActual_bitrate) - *pActual_bitrate = 0.0f; +void* crn_compress(const crn_comp_params& comp_params, crn_uint32& compressed_size, crn_uint32* pActual_quality_level, float* pActual_bitrate) +{ + compressed_size = 0; + if (pActual_quality_level) + { + *pActual_quality_level = 0; + } + if (pActual_bitrate) + { + *pActual_bitrate = 0.0f; + } - if (!comp_params.check()) - return NULL; + if (!comp_params.check()) + { + return NULL; + } - crnlib::vector crn_file_data; - if (!create_compressed_texture(comp_params, crn_file_data, pActual_quality_level, pActual_bitrate)) - return NULL; + crnlib::vector crn_file_data; + if (!create_compressed_texture(comp_params, crn_file_data, pActual_quality_level, pActual_bitrate)) + { + return NULL; + } - compressed_size = crn_file_data.size(); - return crn_file_data.assume_ownership(); + compressed_size = crn_file_data.size(); + return crn_file_data.assume_ownership(); } -void* crn_compress(const crn_comp_params& comp_params, const crn_mipmap_params& mip_params, crn_uint32& compressed_size, crn_uint32* pActual_quality_level, float* pActual_bitrate) { - compressed_size = 0; - if (pActual_quality_level) - *pActual_quality_level = 0; - if (pActual_bitrate) - *pActual_bitrate = 0.0f; +void* crn_compress(const crn_comp_params& comp_params, const crn_mipmap_params& mip_params, crn_uint32& compressed_size, crn_uint32* pActual_quality_level, float* pActual_bitrate) +{ + compressed_size = 0; + if (pActual_quality_level) + { + *pActual_quality_level = 0; + } + if (pActual_bitrate) + { + *pActual_bitrate = 0.0f; + } - if ((!comp_params.check()) || (!mip_params.check())) - return NULL; + if ((!comp_params.check()) || (!mip_params.check())) + { + return NULL; + } - crnlib::vector crn_file_data; - if (!create_compressed_texture(comp_params, mip_params, crn_file_data, pActual_quality_level, pActual_bitrate)) - return NULL; + crnlib::vector crn_file_data; + if (!create_compressed_texture(comp_params, mip_params, crn_file_data, pActual_quality_level, pActual_bitrate)) + { + return NULL; + } - compressed_size = crn_file_data.size(); - return crn_file_data.assume_ownership(); + compressed_size = crn_file_data.size(); + return crn_file_data.assume_ownership(); } -void* crn_decompress_crn_to_dds(const void* pCRN_file_data, crn_uint32& file_size) { - mipmapped_texture tex; - if (!tex.read_crn_from_memory(pCRN_file_data, file_size, "from_memory.crn")) { - file_size = 0; - return NULL; - } - - file_size = 0; - - dynamic_stream dds_file_data; - dds_file_data.reserve(128 * 1024); - data_stream_serializer serializer(dds_file_data); - if (!tex.write_dds(serializer)) - return NULL; - dds_file_data.reserve(0); +void* crn_decompress_crn_to_dds(const void* pCRN_file_data, crn_uint32& file_size) +{ + mipmapped_texture tex; + if (!tex.read_crn_from_memory(pCRN_file_data, file_size, "from_memory.crn")) + { + file_size = 0; + return NULL; + } - file_size = static_cast(dds_file_data.get_size()); - return dds_file_data.get_buf().assume_ownership(); -} + file_size = 0; -bool crn_decompress_dds_to_images(const void* pDDS_file_data, crn_uint32 dds_file_size, crn_uint32** ppImages, crn_texture_desc& tex_desc) { - memset(&tex_desc, 0, sizeof(tex_desc)); - - mipmapped_texture tex; - buffer_stream in_stream(pDDS_file_data, dds_file_size); - data_stream_serializer in_serializer(in_stream); - if (!tex.read_dds(in_serializer)) - return false; - - if (tex.is_packed()) { - // TODO: Allow the user to disable uncooking of swizzled DXT5 formats? - bool uncook = true; - - if (!tex.unpack_from_dxt(uncook)) - return false; - } - - tex_desc.m_faces = tex.get_num_faces(); - tex_desc.m_width = tex.get_width(); - tex_desc.m_height = tex.get_height(); - tex_desc.m_levels = tex.get_num_levels(); - tex_desc.m_fmt_fourcc = (crn_uint32)tex.get_format(); - - for (uint32 f = 0; f < tex.get_num_faces(); f++) { - for (uint32 l = 0; l < tex.get_num_levels(); l++) { - mip_level* pLevel = tex.get_level(f, l); - image_u8* pImg = pLevel->get_image(); - ppImages[l + tex.get_num_levels() * f] = static_cast(pImg->get_pixel_buf().assume_ownership()); + dynamic_stream dds_file_data; + dds_file_data.reserve(128 * 1024); + data_stream_serializer serializer(dds_file_data); + if (!tex.write_dds(serializer)) + { + return NULL; } - } - - return true; -} + dds_file_data.reserve(0); -void crn_free_all_images(crn_uint32** ppImages, const crn_texture_desc& desc) { - for (uint32 f = 0; f < desc.m_faces; f++) - for (uint32 l = 0; l < desc.m_levels; l++) - crn_free_block(ppImages[l + desc.m_levels * f]); + file_size = static_cast(dds_file_data.get_size()); + return dds_file_data.get_buf().assume_ownership(); } -// Simple low-level DXTn 4x4 block compressor API. -// Basically just a basic wrapper over the crnlib::dxt_image class. - -namespace crnlib { -class crn_block_compressor { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(crn_block_compressor); +bool crn_decompress_dds_to_images(const void* pDDS_file_data, crn_uint32 dds_file_size, crn_uint32** ppImages, crn_texture_desc& tex_desc) +{ + memset(&tex_desc, 0, sizeof(tex_desc)); - public: - crn_block_compressor() { - } - - bool init(const crn_comp_params& params) { - m_comp_params = params; - - m_pack_params.init(params); + mipmapped_texture tex; + buffer_stream in_stream(pDDS_file_data, dds_file_size); + data_stream_serializer in_serializer(in_stream); + if (!tex.read_dds(in_serializer)) + { + return false; + } - crn_format basic_crn_fmt = crnd::crnd_get_fundamental_dxt_format(params.m_format); - pixel_format basic_pixel_fmt = pixel_format_helpers::convert_crn_format_to_pixel_format(basic_crn_fmt); + if (tex.is_packed()) + { + // TODO: Allow the user to disable uncooking of swizzled DXT5 formats? + bool uncook = true; - if ((params.get_flag(cCRNCompFlagDXT1AForTransparency)) && (basic_pixel_fmt == PIXEL_FMT_DXT1)) - basic_pixel_fmt = PIXEL_FMT_DXT1A; + if (!tex.unpack_from_dxt(uncook)) + { + return false; + } + } - if (!m_image.init(pixel_format_helpers::get_dxt_format(basic_pixel_fmt), cDXTBlockSize, cDXTBlockSize, false)) - return false; + tex_desc.m_faces = tex.get_num_faces(); + tex_desc.m_width = tex.get_width(); + tex_desc.m_height = tex.get_height(); + tex_desc.m_levels = tex.get_num_levels(); + tex_desc.m_fmt_fourcc = (crn_uint32)tex.get_format(); + + for (uint32 f = 0; f < tex.get_num_faces(); f++) + { + for (uint32 l = 0; l < tex.get_num_levels(); l++) + { + mip_level* pLevel = tex.get_level(f, l); + image_u8* pImg = pLevel->get_image(); + ppImages[l + tex.get_num_levels() * f] = static_cast(pImg->get_pixel_buf().assume_ownership()); + } + } return true; - } +} - void compress_block(const crn_uint32* pPixels, void* pDst_block) { - if (m_image.is_valid()) { - m_image.set_block_pixels(0, 0, reinterpret_cast(pPixels), m_pack_params, m_set_block_pixels_context); - memcpy(pDst_block, &m_image.get_element(0, 0, 0), m_image.get_bytes_per_block()); +void crn_free_all_images(crn_uint32** ppImages, const crn_texture_desc& desc) +{ + for (uint32 f = 0; f < desc.m_faces; f++) + { + for (uint32 l = 0; l < desc.m_levels; l++) + { + crn_free_block(ppImages[l + desc.m_levels * f]); + } } - } - - private: - dxt_image m_image; - crn_comp_params m_comp_params; - dxt_image::pack_params m_pack_params; - dxt_image::set_block_pixels_context m_set_block_pixels_context; -}; } -crn_block_compressor_context_t crn_create_block_compressor(const crn_comp_params& params) { - crn_block_compressor* pComp = crnlib_new(); - if (!pComp->init(params)) { - crnlib_delete(pComp); - return NULL; - } - return pComp; +// Simple low-level DXTn 4x4 block compressor API. +// Basically just a basic wrapper over the crnlib::dxt_image class. + +namespace crnlib +{ + class crn_block_compressor + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(crn_block_compressor); + public: + crn_block_compressor() + { + } + + bool init(const crn_comp_params& params) + { + m_comp_params = params; + + m_pack_params.init(params); + + crn_format basic_crn_fmt = crnd::crnd_get_fundamental_dxt_format(params.m_format); + pixel_format basic_pixel_fmt = pixel_format_helpers::convert_crn_format_to_pixel_format(basic_crn_fmt); + + if ((params.get_flag(cCRNCompFlagDXT1AForTransparency)) && (basic_pixel_fmt == PIXEL_FMT_DXT1)) + { + basic_pixel_fmt = PIXEL_FMT_DXT1A; + } + + if (!m_image.init(pixel_format_helpers::get_dxt_format(basic_pixel_fmt), cDXTBlockSize, cDXTBlockSize, false)) + { + return false; + } + + return true; + } + + void compress_block(const crn_uint32* pPixels, void* pDst_block) + { + if (m_image.is_valid()) + { + m_image.set_block_pixels(0, 0, reinterpret_cast(pPixels), m_pack_params, m_set_block_pixels_context); + memcpy(pDst_block, &m_image.get_element(0, 0, 0), m_image.get_bytes_per_block()); + } + } + + private: + dxt_image m_image; + crn_comp_params m_comp_params; + dxt_image::pack_params m_pack_params; + dxt_image::set_block_pixels_context m_set_block_pixels_context; + }; } -void crn_compress_block(crn_block_compressor_context_t pContext, const crn_uint32* pPixels, void* pDst_block) { - crn_block_compressor* pComp = static_cast(pContext); - pComp->compress_block(pPixels, pDst_block); +crn_block_compressor_context_t crn_create_block_compressor(const crn_comp_params& params) +{ + crn_block_compressor* pComp = crnlib_new(); + if (!pComp->init(params)) + { + crnlib_delete(pComp); + return NULL; + } + return pComp; } -void crn_free_block_compressor(crn_block_compressor_context_t pContext) { - crnlib_delete(static_cast(pContext)); +void crn_compress_block(crn_block_compressor_context_t pContext, const crn_uint32* pPixels, void* pDst_block) +{ + crn_block_compressor* pComp = static_cast(pContext); + pComp->compress_block(pPixels, pDst_block); } -bool crn_decompress_block(const void* pSrc_block, crn_uint32* pDst_pixels_u32, crn_format crn_fmt) { - color_quad_u8* pDst_pixels = reinterpret_cast(pDst_pixels_u32); +void crn_free_block_compressor(crn_block_compressor_context_t pContext) +{ + crnlib_delete(static_cast(pContext)); +} - switch (crn_get_fundamental_dxt_format(crn_fmt)) { - case cCRNFmtETC1: { - const etc1_block& block = *reinterpret_cast(pSrc_block); - unpack_etc1(block, pDst_pixels, false); - break; +bool crn_decompress_block(const void* pSrc_block, crn_uint32* pDst_pixels_u32, crn_format crn_fmt) +{ + color_quad_u8* pDst_pixels = reinterpret_cast(pDst_pixels_u32); + + switch (crn_get_fundamental_dxt_format(crn_fmt)) + { + case cCRNFmtETC1: + { + const etc1_block& block = *reinterpret_cast(pSrc_block); + unpack_etc1(block, pDst_pixels, false); + break; } - case cCRNFmtDXT1: { - const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block); + case cCRNFmtDXT1: + { + const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block); - color_quad_u8 colors[cDXT1SelectorValues]; - pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); + color_quad_u8 colors[cDXT1SelectorValues]; + pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { - const uint s = pDXT1_block->get_selector(i & 3, i >> 2); + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + const uint s = pDXT1_block->get_selector(i & 3, i >> 2); - pDst_pixels[i] = colors[s]; - } + pDst_pixels[i] = colors[s]; + } - break; + break; } - case cCRNFmtDXT3: { - const dxt3_block* pDXT3_block = reinterpret_cast(pSrc_block); + case cCRNFmtDXT3: + { + const dxt3_block* pDXT3_block = reinterpret_cast(pSrc_block); - const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block) + 1; - color_quad_u8 colors[cDXT1SelectorValues]; - pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); + const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block) + 1; + color_quad_u8 colors[cDXT1SelectorValues]; + pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { - const uint s = pDXT1_block->get_selector(i & 3, i >> 2); - const uint a = pDXT3_block->get_alpha(i & 3, i >> 2, true); + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + const uint s = pDXT1_block->get_selector(i & 3, i >> 2); + const uint a = pDXT3_block->get_alpha(i & 3, i >> 2, true); - pDst_pixels[i] = colors[s]; - pDst_pixels[i].a = static_cast(a); - } + pDst_pixels[i] = colors[s]; + pDst_pixels[i].a = static_cast(a); + } - break; + break; } - case cCRNFmtDXT5: { - const dxt5_block* pDXT5_block = reinterpret_cast(pSrc_block); + case cCRNFmtDXT5: + { + const dxt5_block* pDXT5_block = reinterpret_cast(pSrc_block); - const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block) + 1; - color_quad_u8 colors[cDXT1SelectorValues]; - pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); + const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block) + 1; + color_quad_u8 colors[cDXT1SelectorValues]; + pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); - uint values[cDXT5SelectorValues]; - dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); + uint values[cDXT5SelectorValues]; + dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { - const uint s = pDXT1_block->get_selector(i & 3, i >> 2); - const uint a = pDXT5_block->get_selector(i & 3, i >> 2); + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + const uint s = pDXT1_block->get_selector(i & 3, i >> 2); + const uint a = pDXT5_block->get_selector(i & 3, i >> 2); - pDst_pixels[i] = colors[s]; - pDst_pixels[i].a = static_cast(values[a]); - } + pDst_pixels[i] = colors[s]; + pDst_pixels[i].a = static_cast(values[a]); + } } case cCRNFmtDXN_XY: - case cCRNFmtDXN_YX: { - const dxt5_block* pDXT5_block0 = reinterpret_cast(pSrc_block); - const dxt5_block* pDXT5_block1 = reinterpret_cast(pSrc_block) + 1; - - uint values0[cDXT5SelectorValues]; - dxt5_block::get_block_values(values0, pDXT5_block0->get_low_alpha(), pDXT5_block0->get_high_alpha()); - - uint values1[cDXT5SelectorValues]; - dxt5_block::get_block_values(values1, pDXT5_block1->get_low_alpha(), pDXT5_block1->get_high_alpha()); - - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { - const uint s0 = pDXT5_block0->get_selector(i & 3, i >> 2); - const uint s1 = pDXT5_block1->get_selector(i & 3, i >> 2); - - if (crn_fmt == cCRNFmtDXN_XY) - pDst_pixels[i].set_noclamp_rgba(values0[s0], values1[s1], 255, 255); - else - pDst_pixels[i].set_noclamp_rgba(values1[s1], values0[s0], 255, 255); - } - - break; + case cCRNFmtDXN_YX: + { + const dxt5_block* pDXT5_block0 = reinterpret_cast(pSrc_block); + const dxt5_block* pDXT5_block1 = reinterpret_cast(pSrc_block) + 1; + + uint values0[cDXT5SelectorValues]; + dxt5_block::get_block_values(values0, pDXT5_block0->get_low_alpha(), pDXT5_block0->get_high_alpha()); + + uint values1[cDXT5SelectorValues]; + dxt5_block::get_block_values(values1, pDXT5_block1->get_low_alpha(), pDXT5_block1->get_high_alpha()); + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + const uint s0 = pDXT5_block0->get_selector(i & 3, i >> 2); + const uint s1 = pDXT5_block1->get_selector(i & 3, i >> 2); + + if (crn_fmt == cCRNFmtDXN_XY) + { + pDst_pixels[i].set_noclamp_rgba(values0[s0], values1[s1], 255, 255); + } + else + { + pDst_pixels[i].set_noclamp_rgba(values1[s1], values0[s0], 255, 255); + } + } + + break; } - case cCRNFmtDXT5A: { - const dxt5_block* pDXT5_block = reinterpret_cast(pSrc_block); + case cCRNFmtDXT5A: + { + const dxt5_block* pDXT5_block = reinterpret_cast(pSrc_block); - uint values[cDXT5SelectorValues]; - dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); + uint values[cDXT5SelectorValues]; + dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { - const uint s = pDXT5_block->get_selector(i & 3, i >> 2); + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + const uint s = pDXT5_block->get_selector(i & 3, i >> 2); - pDst_pixels[i].set_noclamp_rgba(255, 255, 255, values[s]); - } + pDst_pixels[i].set_noclamp_rgba(255, 255, 255, values[s]); + } - break; + break; + } + default: + { + return false; } - default: { - return false; } - } - return true; + return true; } From bac8314b4a64a0e86139dbca7244b26b40b36995 Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Mon, 7 Sep 2020 15:30:30 -0400 Subject: [PATCH 12/18] Use std::swap --- crnlib/crn_dxt1.h | 2 +- crnlib/crn_dxt5a.cpp | 4 +- crnlib/crn_dxt_fast.cpp | 6 +- crnlib/crn_dxt_hc.cpp | 4 +- crnlib/crn_dynamic_string.cpp | 6 +- crnlib/crn_hash_map.h | 10 +- crnlib/crn_image.h | 18 +- crnlib/crn_matrix.h | 1128 +++++++++++++++++------------- crnlib/crn_mem.h | 372 +++++----- crnlib/crn_mipmapped_texture.cpp | 10 +- crnlib/crn_pixel_format.cpp | 662 +++++++++--------- crnlib/crn_pixel_format.h | 670 +++++++++--------- crnlib/crn_sparse_array.h | 4 +- crnlib/crn_sparse_bit_array.cpp | 4 +- crnlib/crn_utils.h | 9 +- crnlib/crn_vector.cpp | 113 +-- crnlib/crn_vector.h | 8 +- inc/crn_decomp.h | 6 +- 18 files changed, 1660 insertions(+), 1376 deletions(-) diff --git a/crnlib/crn_dxt1.h b/crnlib/crn_dxt1.h index 79b81ed..d279aa3 100644 --- a/crnlib/crn_dxt1.h +++ b/crnlib/crn_dxt1.h @@ -58,7 +58,7 @@ namespace crnlib { if (m_low_color < m_high_color) { - utils::swap(m_low_color, m_high_color); + std::swap(m_low_color, m_high_color); } return *this; } diff --git a/crnlib/crn_dxt5a.cpp b/crnlib/crn_dxt5a.cpp index b2dd881..1883b49 100644 --- a/crnlib/crn_dxt5a.cpp +++ b/crnlib/crn_dxt5a.cpp @@ -143,7 +143,7 @@ namespace crnlib if (m_pResults->m_first_endpoint > m_pResults->m_second_endpoint) { - utils::swap(m_pResults->m_first_endpoint, m_pResults->m_second_endpoint); + std::swap(m_pResults->m_first_endpoint, m_pResults->m_second_endpoint); m_pResults->m_reordered = true; for (uint i = 0; i < m_best_selectors.size(); i++) { @@ -153,7 +153,7 @@ namespace crnlib } else if (!(m_pResults->m_first_endpoint > m_pResults->m_second_endpoint)) { - utils::swap(m_pResults->m_first_endpoint, m_pResults->m_second_endpoint); + std::swap(m_pResults->m_first_endpoint, m_pResults->m_second_endpoint); m_pResults->m_reordered = true; for (uint i = 0; i < m_best_selectors.size(); i++) { diff --git a/crnlib/crn_dxt_fast.cpp b/crnlib/crn_dxt_fast.cpp index da588f6..496c2d9 100644 --- a/crnlib/crn_dxt_fast.cpp +++ b/crnlib/crn_dxt_fast.cpp @@ -627,7 +627,7 @@ namespace crnlib uint h = probe_high[j]; if (l < h) { - utils::swap(l, h); + std::swap(l, h); } uint c = l | (h << 16); @@ -729,7 +729,7 @@ namespace crnlib if (low16 < high16) { - utils::swap(low16, high16); + std::swap(low16, high16); for (uint i = 0; i < n; i++) { pSelectors[i] ^= 1; @@ -961,7 +961,7 @@ namespace crnlib if (energy[0] > energy[1]) { - utils::swap(lo_color, hi_color); + std::swap(lo_color, hi_color); } lo = lo_color; diff --git a/crnlib/crn_dxt_hc.cpp b/crnlib/crn_dxt_hc.cpp index 9b539d9..63c9b1f 100644 --- a/crnlib/crn_dxt_hc.cpp +++ b/crnlib/crn_dxt_hc.cpp @@ -255,7 +255,7 @@ vec6F dxt_hc::palettize_color(color_quad_u8* pixels, uint pixels_count) { vec3F result[2]; split_vectors(vectors, weights, size, result); if (result[0].length() > result[1].length()) - utils::swap(result[0], result[1]); + std::swap(result[0], result[1]); return *(vec6F*)result; } @@ -279,7 +279,7 @@ vec2F dxt_hc::palettize_alpha(color_quad_u8* pixels, uint pixels_count, uint com vec1F result[2]; split_vectors(vectors, weights, size, result); if (result[0] > result[1]) - utils::swap(result[0], result[1]); + std::swap(result[0], result[1]); return *(vec2F*)result; } diff --git a/crnlib/crn_dynamic_string.cpp b/crnlib/crn_dynamic_string.cpp index 1ba8621..9375b38 100644 --- a/crnlib/crn_dynamic_string.cpp +++ b/crnlib/crn_dynamic_string.cpp @@ -663,9 +663,9 @@ namespace crnlib void dynamic_string::swap(dynamic_string& other) { - utils::swap(other.m_buf_size, m_buf_size); - utils::swap(other.m_len, m_len); - utils::swap(other.m_pStr, m_pStr); + std::swap(other.m_buf_size, m_buf_size); + std::swap(other.m_len, m_len); + std::swap(other.m_pStr, m_pStr); } int dynamic_string::serialize(void* pBuf, uint buf_size, bool little_endian) const diff --git a/crnlib/crn_hash_map.h b/crnlib/crn_hash_map.h index 4617696..6de0bf5 100644 --- a/crnlib/crn_hash_map.h +++ b/crnlib/crn_hash_map.h @@ -406,11 +406,11 @@ class hash_map { inline void swap(hash_map_type& other) { m_values.swap(other.m_values); - utils::swap(m_hash_shift, other.m_hash_shift); - utils::swap(m_num_valid, other.m_num_valid); - utils::swap(m_grow_threshold, other.m_grow_threshold); - utils::swap(m_hasher, other.m_hasher); - utils::swap(m_equals, other.m_equals); + std::swap(m_hash_shift, other.m_hash_shift); + std::swap(m_num_valid, other.m_num_valid); + std::swap(m_grow_threshold, other.m_grow_threshold); + std::swap(m_hasher, other.m_hasher); + std::swap(m_equals, other.m_equals); } private: diff --git a/crnlib/crn_image.h b/crnlib/crn_image.h index 99a6fc2..3235125 100644 --- a/crnlib/crn_image.h +++ b/crnlib/crn_image.h @@ -529,25 +529,25 @@ class image { } inline void swap(image& other) { - utils::swap(m_width, other.m_width); - utils::swap(m_height, other.m_height); - utils::swap(m_pitch, other.m_pitch); - utils::swap(m_total, other.m_total); - utils::swap(m_comp_flags, other.m_comp_flags); - utils::swap(m_pPixels, other.m_pPixels); + std::swap(m_width, other.m_width); + std::swap(m_height, other.m_height); + std::swap(m_pitch, other.m_pitch); + std::swap(m_total, other.m_total); + std::swap(m_comp_flags, other.m_comp_flags); + std::swap(m_pPixels, other.m_pPixels); m_pixel_buf.swap(other.m_pixel_buf); } void draw_line(int xs, int ys, int xe, int ye, const color_type& color) { if (xs > xe) { - utils::swap(xs, xe); - utils::swap(ys, ye); + std::swap(xs, xe); + std::swap(ys, ye); } int dx = xe - xs, dy = ye - ys; if (!dx) { if (ys > ye) - utils::swap(ys, ye); + std::swap(ys, ye); for (int i = ys; i <= ye; i++) set_pixel_clipped(xs, i, color); } else if (!dy) { diff --git a/crnlib/crn_matrix.h b/crnlib/crn_matrix.h index a7fa5f3..ae2f51b 100644 --- a/crnlib/crn_matrix.h +++ b/crnlib/crn_matrix.h @@ -4,491 +4,665 @@ #include "crn_vec.h" -namespace crnlib { -template -Z& matrix_mul_helper(Z& result, const X& lhs, const Y& rhs) { - CRNLIB_ASSUME(Z::num_rows == X::num_rows); - CRNLIB_ASSUME(Z::num_cols == Y::num_cols); - CRNLIB_ASSUME(X::num_cols == Y::num_rows); - CRNLIB_ASSERT((&result != &lhs) && (&result != &rhs)); - for (int r = 0; r < X::num_rows; r++) - for (int c = 0; c < Y::num_cols; c++) { - typename Z::scalar_type s = lhs(r, 0) * rhs(0, c); - for (uint i = 1; i < X::num_cols; i++) - s += lhs(r, i) * rhs(i, c); - result(r, c) = s; - } - return result; -} - -template -Z& matrix_mul_helper_transpose_lhs(Z& result, const X& lhs, const Y& rhs) { - CRNLIB_ASSUME(Z::num_rows == X::num_cols); - CRNLIB_ASSUME(Z::num_cols == Y::num_cols); - CRNLIB_ASSUME(X::num_rows == Y::num_rows); - for (int r = 0; r < X::num_cols; r++) - for (int c = 0; c < Y::num_cols; c++) { - typename Z::scalar_type s = lhs(0, r) * rhs(0, c); - for (uint i = 1; i < X::num_rows; i++) - s += lhs(i, r) * rhs(i, c); - result(r, c) = s; +namespace crnlib +{ + template + Z& matrix_mul_helper(Z& result, const X& lhs, const Y& rhs) + { + CRNLIB_ASSUME(Z::num_rows == X::num_rows); + CRNLIB_ASSUME(Z::num_cols == Y::num_cols); + CRNLIB_ASSUME(X::num_cols == Y::num_rows); + CRNLIB_ASSERT((&result != &lhs) && (&result != &rhs)); + for (int r = 0; r < X::num_rows; r++) + { + for (int c = 0; c < Y::num_cols; c++) + { + typename Z::scalar_type s = lhs(r, 0) * rhs(0, c); + for (uint i = 1; i < X::num_cols; i++) + { + s += lhs(r, i) * rhs(i, c); + } + result(r, c) = s; + } + } + return result; } - return result; -} - -template -Z& matrix_mul_helper_transpose_rhs(Z& result, const X& lhs, const Y& rhs) { - CRNLIB_ASSUME(Z::num_rows == X::num_rows); - CRNLIB_ASSUME(Z::num_cols == Y::num_rows); - CRNLIB_ASSUME(X::num_cols == Y::num_cols); - for (int r = 0; r < X::num_rows; r++) - for (int c = 0; c < Y::num_rows; c++) { - typename Z::scalar_type s = lhs(r, 0) * rhs(c, 0); - for (uint i = 1; i < X::num_cols; i++) - s += lhs(r, i) * rhs(c, i); - result(r, c) = s; + + template + Z& matrix_mul_helper_transpose_lhs(Z& result, const X& lhs, const Y& rhs) + { + CRNLIB_ASSUME(Z::num_rows == X::num_cols); + CRNLIB_ASSUME(Z::num_cols == Y::num_cols); + CRNLIB_ASSUME(X::num_rows == Y::num_rows); + for (int r = 0; r < X::num_cols; r++) + { + for (int c = 0; c < Y::num_cols; c++) + { + typename Z::scalar_type s = lhs(0, r) * rhs(0, c); + for (uint i = 1; i < X::num_rows; i++) + { + s += lhs(i, r) * rhs(i, c); + } + result(r, c) = s; + } + } + return result; } - return result; -} - -template -class matrix { - public: - typedef T scalar_type; - enum { num_rows = R, - num_cols = C }; - - typedef vec col_vec; - typedef vec<(R > 1) ? (R - 1) : 0, T> subcol_vec; - - typedef vec row_vec; - typedef vec<(C > 1) ? (C - 1) : 0, T> subrow_vec; - - inline matrix() {} - - inline matrix(eClear) { clear(); } - - inline matrix(const T* p) { set(p); } - - inline matrix(const matrix& other) { - for (uint i = 0; i < R; i++) - m_rows[i] = other.m_rows[i]; - } - - inline matrix& operator=(const matrix& rhs) { - if (this != &rhs) - for (uint i = 0; i < R; i++) - m_rows[i] = rhs.m_rows[i]; - return *this; - } - - inline matrix(T val00, T val01, - T val10, T val11) { - set(val00, val01, val10, val11); - } - - inline matrix(T val00, T val01, T val02, - T val10, T val11, T val12, - T val20, T val21, T val22) { - set(val00, val01, val02, val10, val11, val12, val20, val21, val22); - } - - inline matrix(T val00, T val01, T val02, T val03, - T val10, T val11, T val12, T val13, - T val20, T val21, T val22, T val23, - T val30, T val31, T val32, T val33) { - set(val00, val01, val02, val03, val10, val11, val12, val13, val20, val21, val22, val23, val30, val31, val32, val33); - } - - inline void set(const float* p) { - for (uint i = 0; i < R; i++) { - m_rows[i].set(p); - p += C; + + template + Z& matrix_mul_helper_transpose_rhs(Z& result, const X& lhs, const Y& rhs) + { + CRNLIB_ASSUME(Z::num_rows == X::num_rows); + CRNLIB_ASSUME(Z::num_cols == Y::num_rows); + CRNLIB_ASSUME(X::num_cols == Y::num_cols); + for (int r = 0; r < X::num_rows; r++) + { + for (int c = 0; c < Y::num_rows; c++) + { + typename Z::scalar_type s = lhs(r, 0) * rhs(c, 0); + for (uint i = 1; i < X::num_cols; i++) + { + s += lhs(r, i) * rhs(c, i); + } + result(r, c) = s; + } + } + return result; } - } - inline void set(T val00, T val01, - T val10, T val11) { - m_rows[0].set(val00, val01); - if (R >= 2) { - m_rows[1].set(val10, val11); + template + class matrix + { + public: + typedef T scalar_type; + enum { + num_rows = R, + num_cols = C + }; - for (uint i = 2; i < R; i++) - m_rows[i].clear(); - } - } - - inline void set(T val00, T val01, T val02, - T val10, T val11, T val12, - T val20, T val21, T val22) { - m_rows[0].set(val00, val01, val02); - if (R >= 2) { - m_rows[1].set(val10, val11, val12); - if (R >= 3) { - m_rows[2].set(val20, val21, val22); - - for (uint i = 3; i < R; i++) - m_rows[i].clear(); - } - } - } + typedef vec col_vec; + typedef vec<(R > 1) ? (R - 1) : 0, T> subcol_vec; + + typedef vec row_vec; + typedef vec<(C > 1) ? (C - 1) : 0, T> subrow_vec; - inline void set(T val00, T val01, T val02, T val03, - T val10, T val11, T val12, T val13, - T val20, T val21, T val22, T val23, - T val30, T val31, T val32, T val33) { - m_rows[0].set(val00, val01, val02, val03); - if (R >= 2) { - m_rows[1].set(val10, val11, val12, val13); - if (R >= 3) { - m_rows[2].set(val20, val21, val22, val23); + inline matrix() {} - if (R >= 4) { - m_rows[3].set(val30, val31, val32, val33); + inline matrix(eClear) { clear(); } - for (uint i = 4; i < R; i++) - m_rows[i].clear(); + inline matrix(const T* p) { set(p); } + + inline matrix(const matrix& other) + { + for (uint i = 0; i < R; i++) + { + m_rows[i] = other.m_rows[i]; + } } - } - } - } - - inline T operator()(uint r, uint c) const { - CRNLIB_ASSERT((r < R) && (c < C)); - return m_rows[r][c]; - } - - inline T& operator()(uint r, uint c) { - CRNLIB_ASSERT((r < R) && (c < C)); - return m_rows[r][c]; - } - - inline const row_vec& operator[](uint r) const { - CRNLIB_ASSERT(r < R); - return m_rows[r]; - } - - inline row_vec& operator[](uint r) { - CRNLIB_ASSERT(r < R); - return m_rows[r]; - } - - inline const row_vec& get_row(uint r) const { return (*this)[r]; } - inline row_vec& get_row(uint r) { return (*this)[r]; } - - inline col_vec get_col(uint c) const { - CRNLIB_ASSERT(c < C); - col_vec result; - for (uint i = 0; i < R; i++) - result[i] = m_rows[i][c]; - return result; - } - - inline void set_col(uint c, const col_vec& col) { - CRNLIB_ASSERT(c < C); - for (uint i = 0; i < R; i++) - m_rows[i][c] = col[i]; - } - - inline void set_col(uint c, const subcol_vec& col) { - CRNLIB_ASSERT(c < C); - for (uint i = 0; i < (R - 1); i++) - m_rows[i][c] = col[i]; - - m_rows[R - 1][c] = 0.0f; - } - - inline const row_vec& get_translate() const { - return m_rows[R - 1]; - } - - inline matrix& set_translate(const row_vec& r) { - m_rows[R - 1] = r; - return *this; - } - - inline matrix& set_translate(const subrow_vec& r) { - m_rows[R - 1] = row_vec(r).as_point(); - return *this; - } - - inline const T* get_ptr() const { return reinterpret_cast(&m_rows[0]); } - inline T* get_ptr() { return reinterpret_cast(&m_rows[0]); } - - inline matrix& operator+=(const matrix& other) { - for (uint i = 0; i < R; i++) - m_rows[i] += other.m_rows[i]; - return *this; - } - - inline matrix& operator-=(const matrix& other) { - for (uint i = 0; i < R; i++) - m_rows[i] -= other.m_rows[i]; - return *this; - } - - inline matrix& operator*=(T val) { - for (uint i = 0; i < R; i++) - m_rows[i] *= val; - return *this; - } - - inline matrix& operator/=(T val) { - for (uint i = 0; i < R; i++) - m_rows[i] /= val; - return *this; - } - - inline matrix& operator*=(const matrix& other) { - matrix result; - matrix_mul_helper(result, *this, other); - *this = result; - return *this; - } - - friend inline matrix operator+(const matrix& lhs, const matrix& rhs) { - matrix result; - for (uint i = 0; i < R; i++) - result[i] = lhs.m_rows[i] + rhs.m_rows[i]; - return result; - } - - friend inline matrix operator-(const matrix& lhs, const matrix& rhs) { - matrix result; - for (uint i = 0; i < R; i++) - result[i] = lhs.m_rows[i] - rhs.m_rows[i]; - return result; - } - - friend inline matrix operator*(const matrix& lhs, T val) { - matrix result; - for (uint i = 0; i < R; i++) - result[i] = lhs.m_rows[i] * val; - return result; - } - - friend inline matrix operator/(const matrix& lhs, T val) { - matrix result; - for (uint i = 0; i < R; i++) - result[i] = lhs.m_rows[i] / val; - return result; - } - - friend inline matrix operator*(T val, const matrix& rhs) { - matrix result; - for (uint i = 0; i < R; i++) - result[i] = val * rhs.m_rows[i]; - return result; - } - - friend inline matrix operator*(const matrix& lhs, const matrix& rhs) { - matrix result; - return matrix_mul_helper(result, lhs, rhs); - } - - friend inline row_vec operator*(const col_vec& a, const matrix& b) { - return transform(a, b); - } - - inline matrix operator+() const { - return *this; - } - - inline matrix operator-() const { - matrix result; - for (uint i = 0; i < R; i++) - result[i] = -m_rows[i]; - return result; - } - - inline void clear(void) { - for (uint i = 0; i < R; i++) - m_rows[i].clear(); - } - - inline void set_zero_matrix() { - clear(); - } - - inline void set_identity_matrix() { - for (uint i = 0; i < R; i++) { - m_rows[i].clear(); - m_rows[i][i] = 1.0f; - } - } - - inline matrix& set_scale_matrix(float s) { - clear(); - for (int i = 0; i < (R - 1); i++) - m_rows[i][i] = s; - m_rows[R - 1][C - 1] = 1.0f; - return *this; - } - - inline matrix& set_scale_matrix(const row_vec& s) { - clear(); - for (uint i = 0; i < R; i++) - m_rows[i][i] = s[i]; - return *this; - } - - inline matrix& set_translate_matrix(const row_vec& s) { - set_identity_matrix(); - set_translate(s); - return *this; - } - - inline matrix& set_translate_matrix(float x, float y) { - set_identity_matrix(); - set_translate(row_vec(x, y).as_point()); - return *this; - } - - inline matrix& set_translate_matrix(float x, float y, float z) { - set_identity_matrix(); - set_translate(row_vec(x, y, z).as_point()); - return *this; - } - - inline matrix get_transposed(void) const { - matrix result; - for (uint i = 0; i < R; i++) - for (uint j = 0; j < C; j++) - result.m_rows[i][j] = m_rows[j][i]; - return result; - } - - inline matrix& transpose_in_place(void) { - matrix result; - for (uint i = 0; i < R; i++) - for (uint j = 0; j < C; j++) - result.m_rows[i][j] = m_rows[j][i]; - *this = result; - return *this; - } - - // This method transforms a column vec by a matrix (D3D-style). - static inline row_vec transform(const col_vec& a, const matrix& b) { - row_vec result(b[0] * a[0]); - for (uint r = 1; r < R; r++) - result += b[r] * a[r]; - return result; - } - - // This method transforms a column vec by a matrix. Last component of vec is assumed to be 1. - static inline row_vec transform_point(const col_vec& a, const matrix& b) { - row_vec result(0); - for (int r = 0; r < (R - 1); r++) - result += b[r] * a[r]; - result += b[R - 1]; - return result; - } - - // This method transforms a column vec by a matrix. Last component of vec is assumed to be 0. - static inline row_vec transform_vector(const col_vec& a, const matrix& b) { - row_vec result(0); - for (int r = 0; r < (R - 1); r++) - result += b[r] * a[r]; - return result; - } - - static inline subcol_vec transform_point(const subcol_vec& a, const matrix& b) { - subcol_vec result(0); - for (int r = 0; r < R; r++) { - const T s = (r < subcol_vec::num_elements) ? a[r] : 1.0f; - for (int c = 0; c < (C - 1); c++) - result[c] += b[r][c] * s; - } - return result; - } - - static inline subcol_vec transform_vector(const subcol_vec& a, const matrix& b) { - subcol_vec result(0); - for (int r = 0; r < (R - 1); r++) { - const T s = a[r]; - for (int c = 0; c < (C - 1); c++) - result[c] += b[r][c] * s; - } - return result; - } - - // This method transforms a column vec by the transpose of a matrix. - static inline col_vec transform_transposed(const matrix& b, const col_vec& a) { - CRNLIB_ASSUME(R == C); - col_vec result; - for (uint r = 0; r < R; r++) - result[r] = b[r] * a; - return result; - } - - // This method transforms a column vec by the transpose of a matrix. Last component of vec is assumed to be 0. - static inline col_vec transform_vector_transposed(const matrix& b, const col_vec& a) { - CRNLIB_ASSUME(R == C); - col_vec result; - for (uint r = 0; r < R; r++) { - T s = 0; - for (uint c = 0; c < (C - 1); c++) - s += b[r][c] * a[c]; - - result[r] = s; - } - return result; - } - - // This method transforms a matrix by a row vector (OGL style). - static inline col_vec transform(const matrix& b, const row_vec& a) { - col_vec result; - for (int r = 0; r < R; r++) - result[r] = b[r] * a; - return result; - } - - static inline matrix& multiply(matrix& result, const matrix& lhs, const matrix& rhs) { - return matrix_mul_helper(result, lhs, rhs); - } - - static inline matrix make_scale_matrix(float s) { - return matrix().set_scale_matrix(s); - } - - static inline matrix make_scale_matrix(const row_vec& s) { - return matrix().set_scale_matrix(s); - } - - static inline matrix make_scale_matrix(float x, float y) { - CRNLIB_ASSUME(R >= 3 && C >= 3); - matrix result; - result.clear(); - result.m_rows[0][0] = x; - result.m_rows[1][1] = y; - result.m_rows[2][2] = 1.0f; - return result; - } - - static inline matrix make_scale_matrix(float x, float y, float z) { - CRNLIB_ASSUME(R >= 4 && C >= 4); - matrix result; - result.clear(); - result.m_rows[0][0] = x; - result.m_rows[1][1] = y; - result.m_rows[2][2] = z; - result.m_rows[3][3] = 1.0f; - return result; - } - - private: - row_vec m_rows[R]; -}; - -typedef matrix<2, 2, float> matrix22F; -typedef matrix<2, 2, double> matrix22D; - -typedef matrix<3, 3, float> matrix33F; -typedef matrix<3, 3, double> matrix33D; - -typedef matrix<4, 4, float> matrix44F; -typedef matrix<4, 4, double> matrix44D; - -typedef matrix<8, 8, float> matrix88F; + + inline matrix& operator=(const matrix& rhs) + { + if (this != &rhs) + { + for (uint i = 0; i < R; i++) + { + m_rows[i] = rhs.m_rows[i]; + } + } + return *this; + } + + inline matrix( + T val00, T val01, + T val10, T val11) + { + set(val00, val01, val10, val11); + } + + inline matrix( + T val00, T val01, T val02, + T val10, T val11, T val12, + T val20, T val21, T val22) + { + set(val00, val01, val02, val10, val11, val12, val20, val21, val22); + } + + inline matrix( + T val00, T val01, T val02, T val03, + T val10, T val11, T val12, T val13, + T val20, T val21, T val22, T val23, + T val30, T val31, T val32, T val33) + { + set(val00, val01, val02, val03, val10, val11, val12, val13, val20, val21, val22, val23, val30, val31, val32, val33); + } + + inline void set(const float* p) + { + for (uint i = 0; i < R; i++) + { + m_rows[i].set(p); + p += C; + } + } + + inline void set( + T val00, T val01, + T val10, T val11) + { + m_rows[0].set(val00, val01); + if (R >= 2) + { + m_rows[1].set(val10, val11); + + for (uint i = 2; i < R; i++) + { + m_rows[i].clear(); + } + } + } + + inline void set( + T val00, T val01, T val02, + T val10, T val11, T val12, + T val20, T val21, T val22) + { + m_rows[0].set(val00, val01, val02); + if (R >= 2) + { + m_rows[1].set(val10, val11, val12); + if (R >= 3) + { + m_rows[2].set(val20, val21, val22); + + for (uint i = 3; i < R; i++) + { + m_rows[i].clear(); + } + } + } + } + + inline void set( + T val00, T val01, T val02, T val03, + T val10, T val11, T val12, T val13, + T val20, T val21, T val22, T val23, + T val30, T val31, T val32, T val33) + { + m_rows[0].set(val00, val01, val02, val03); + if (R >= 2) + { + m_rows[1].set(val10, val11, val12, val13); + if (R >= 3) + { + m_rows[2].set(val20, val21, val22, val23); + + if (R >= 4) + { + m_rows[3].set(val30, val31, val32, val33); + + for (uint i = 4; i < R; i++) + { + m_rows[i].clear(); + } + } + } + } + } + + inline T operator()(uint r, uint c) const + { + CRNLIB_ASSERT((r < R) && (c < C)); + return m_rows[r][c]; + } + + inline T& operator()(uint r, uint c) + { + CRNLIB_ASSERT((r < R) && (c < C)); + return m_rows[r][c]; + } + + inline const row_vec& operator[](uint r) const + { + CRNLIB_ASSERT(r < R); + return m_rows[r]; + } + + inline row_vec& operator[](uint r) + { + CRNLIB_ASSERT(r < R); + return m_rows[r]; + } + + inline const row_vec& get_row(uint r) const + { + return (*this)[r]; + } + inline row_vec& get_row(uint r) + { + return (*this)[r]; + } + + inline col_vec get_col(uint c) const + { + CRNLIB_ASSERT(c < C); + col_vec result; + for (uint i = 0; i < R; i++) + { + result[i] = m_rows[i][c]; + } + return result; + } + + inline void set_col(uint c, const col_vec& col) + { + CRNLIB_ASSERT(c < C); + for (uint i = 0; i < R; i++) + { + m_rows[i][c] = col[i]; + } + } + + inline void set_col(uint c, const subcol_vec& col) + { + CRNLIB_ASSERT(c < C); + for (uint i = 0; i < (R - 1); i++) + { + m_rows[i][c] = col[i]; + } + + m_rows[R - 1][c] = 0.0f; + } + + inline const row_vec& get_translate() const + { + return m_rows[R - 1]; + } + + inline matrix& set_translate(const row_vec& r) + { + m_rows[R - 1] = r; + return *this; + } + + inline matrix& set_translate(const subrow_vec& r) + { + m_rows[R - 1] = row_vec(r).as_point(); + return *this; + } + + inline const T* get_ptr() const + { + return reinterpret_cast(&m_rows[0]); + } + inline T* get_ptr() + { + return reinterpret_cast(&m_rows[0]); + } + + inline matrix& operator+=(const matrix& other) + { + for (uint i = 0; i < R; i++) + { + m_rows[i] += other.m_rows[i]; + } + return *this; + } + + inline matrix& operator-=(const matrix& other) + { + for (uint i = 0; i < R; i++) + { + m_rows[i] -= other.m_rows[i]; + } + return *this; + } + + inline matrix& operator*=(T val) + { + for (uint i = 0; i < R; i++) + { + m_rows[i] *= val; + } + return *this; + } + + inline matrix& operator/=(T val) + { + for (uint i = 0; i < R; i++) + { + m_rows[i] /= val; + } + return *this; + } + + inline matrix& operator*=(const matrix& other) + { + matrix result; + matrix_mul_helper(result, *this, other); + *this = result; + return *this; + } + + friend inline matrix operator+(const matrix& lhs, const matrix& rhs) + { + matrix result; + for (uint i = 0; i < R; i++) + { + result[i] = lhs.m_rows[i] + rhs.m_rows[i]; + } + return result; + } + + friend inline matrix operator-(const matrix& lhs, const matrix& rhs) + { + matrix result; + for (uint i = 0; i < R; i++) + { + result[i] = lhs.m_rows[i] - rhs.m_rows[i]; + } + return result; + } + + friend inline matrix operator*(const matrix& lhs, T val) + { + matrix result; + for (uint i = 0; i < R; i++) + { + result[i] = lhs.m_rows[i] * val; + } + return result; + } + + friend inline matrix operator/(const matrix& lhs, T val) + { + matrix result; + for (uint i = 0; i < R; i++) + { + result[i] = lhs.m_rows[i] / val; + } + return result; + } + + friend inline matrix operator*(T val, const matrix& rhs) + { + matrix result; + for (uint i = 0; i < R; i++) + { + result[i] = val * rhs.m_rows[i]; + } + return result; + } + + friend inline matrix operator*(const matrix& lhs, const matrix& rhs) + { + matrix result; + return matrix_mul_helper(result, lhs, rhs); + } + + friend inline row_vec operator*(const col_vec& a, const matrix& b) + { + return transform(a, b); + } + + inline matrix operator+() const + { + return *this; + } + + inline matrix operator-() const + { + matrix result; + for (uint i = 0; i < R; i++) + { + result[i] = -m_rows[i]; + } + return result; + } + + inline void clear(void) + { + for (uint i = 0; i < R; i++) + { + m_rows[i].clear(); + } + } + + inline void set_zero_matrix() + { + clear(); + } + + inline void set_identity_matrix() + { + for (uint i = 0; i < R; i++) + { + m_rows[i].clear(); + m_rows[i][i] = 1.0f; + } + } + + inline matrix& set_scale_matrix(float s) + { + clear(); + for (int i = 0; i < (R - 1); i++) + { + m_rows[i][i] = s; + } + m_rows[R - 1][C - 1] = 1.0f; + return *this; + } + + inline matrix& set_scale_matrix(const row_vec& s) + { + clear(); + for (uint i = 0; i < R; i++) + { + m_rows[i][i] = s[i]; + } + return *this; + } + + inline matrix& set_translate_matrix(const row_vec& s) + { + set_identity_matrix(); + set_translate(s); + return *this; + } + + inline matrix& set_translate_matrix(float x, float y) + { + set_identity_matrix(); + set_translate(row_vec(x, y).as_point()); + return *this; + } + + inline matrix& set_translate_matrix(float x, float y, float z) + { + set_identity_matrix(); + set_translate(row_vec(x, y, z).as_point()); + return *this; + } + + inline matrix get_transposed(void) const + { + matrix result; + for (uint i = 0; i < R; i++) + { + for (uint j = 0; j < C; j++) + { + result.m_rows[i][j] = m_rows[j][i]; + } + } + return result; + } + + inline matrix& transpose_in_place(void) + { + matrix result; + for (uint i = 0; i < R; i++) + { + for (uint j = 0; j < C; j++) + { + result.m_rows[i][j] = m_rows[j][i]; + } + } + *this = result; + return *this; + } + + // This method transforms a column vec by a matrix (D3D-style). + static inline row_vec transform(const col_vec& a, const matrix& b) + { + row_vec result(b[0] * a[0]); + for (uint r = 1; r < R; r++) + { + result += b[r] * a[r]; + } + return result; + } + + // This method transforms a column vec by a matrix. Last component of vec is assumed to be 1. + static inline row_vec transform_point(const col_vec& a, const matrix& b) + { + row_vec result(0); + for (int r = 0; r < (R - 1); r++) + { + result += b[r] * a[r]; + } + result += b[R - 1]; + return result; + } + + // This method transforms a column vec by a matrix. Last component of vec is assumed to be 0. + static inline row_vec transform_vector(const col_vec& a, const matrix& b) + { + row_vec result(0); + for (int r = 0; r < (R - 1); r++) + { + result += b[r] * a[r]; + } + return result; + } + + static inline subcol_vec transform_point(const subcol_vec& a, const matrix& b) { + subcol_vec result(0); + for (int r = 0; r < R; r++) + { + const T s = (r < subcol_vec::num_elements) ? a[r] : 1.0f; + for (int c = 0; c < (C - 1); c++) + { + result[c] += b[r][c] * s; + } + } + return result; + } + + static inline subcol_vec transform_vector(const subcol_vec& a, const matrix& b) + { + subcol_vec result(0); + for (int r = 0; r < (R - 1); r++) + { + const T s = a[r]; + for (int c = 0; c < (C - 1); c++) + { + result[c] += b[r][c] * s; + } + } + return result; + } + + // This method transforms a column vec by the transpose of a matrix. + static inline col_vec transform_transposed(const matrix& b, const col_vec& a) + { + CRNLIB_ASSUME(R == C); + col_vec result; + for (uint r = 0; r < R; r++) + { + result[r] = b[r] * a; + } + return result; + } + + // This method transforms a column vec by the transpose of a matrix. Last component of vec is assumed to be 0. + static inline col_vec transform_vector_transposed(const matrix& b, const col_vec& a) + { + CRNLIB_ASSUME(R == C); + col_vec result; + for (uint r = 0; r < R; r++) + { + T s = 0; + for (uint c = 0; c < (C - 1); c++) + { + s += b[r][c] * a[c]; + } + + result[r] = s; + } + return result; + } + + // This method transforms a matrix by a row vector (OGL style). + static inline col_vec transform(const matrix& b, const row_vec& a) + { + col_vec result; + for (int r = 0; r < R; r++) + { + result[r] = b[r] * a; + } + return result; + } + + static inline matrix& multiply(matrix& result, const matrix& lhs, const matrix& rhs) + { + return matrix_mul_helper(result, lhs, rhs); + } + + static inline matrix make_scale_matrix(float s) + { + return matrix().set_scale_matrix(s); + } + + static inline matrix make_scale_matrix(const row_vec& s) + { + return matrix().set_scale_matrix(s); + } + + static inline matrix make_scale_matrix(float x, float y) + { + CRNLIB_ASSUME(R >= 3 && C >= 3); + matrix result; + result.clear(); + result.m_rows[0][0] = x; + result.m_rows[1][1] = y; + result.m_rows[2][2] = 1.0f; + return result; + } + + static inline matrix make_scale_matrix(float x, float y, float z) + { + CRNLIB_ASSUME(R >= 4 && C >= 4); + matrix result; + result.clear(); + result.m_rows[0][0] = x; + result.m_rows[1][1] = y; + result.m_rows[2][2] = z; + result.m_rows[3][3] = 1.0f; + return result; + } + + private: + row_vec m_rows[R]; + }; + + typedef matrix<2, 2, float> matrix22F; + typedef matrix<2, 2, double> matrix22D; + + typedef matrix<3, 3, float> matrix33F; + typedef matrix<3, 3, double> matrix33D; + + typedef matrix<4, 4, float> matrix44F; + typedef matrix<4, 4, double> matrix44D; + + typedef matrix<8, 8, float> matrix88F; } // namespace crnlib diff --git a/crnlib/crn_mem.h b/crnlib/crn_mem.h index a63dff3..507cc4e 100644 --- a/crnlib/crn_mem.h +++ b/crnlib/crn_mem.h @@ -8,176 +8,214 @@ #define CRNLIB_MIN_ALLOC_ALIGNMENT sizeof(size_t) * 2 #endif -namespace crnlib { +namespace crnlib +{ #if CRNLIB_64BIT_POINTERS -const uint64 CRNLIB_MAX_POSSIBLE_BLOCK_SIZE = 0x400000000ULL; + const uint64 CRNLIB_MAX_POSSIBLE_BLOCK_SIZE = 0x400000000ULL; #else -const uint32 CRNLIB_MAX_POSSIBLE_BLOCK_SIZE = 0x7FFF0000U; + const uint32 CRNLIB_MAX_POSSIBLE_BLOCK_SIZE = 0x7FFF0000U; #endif -CRN_EXPORT void* crnlib_malloc(size_t size); -CRN_EXPORT void* crnlib_malloc(size_t size, size_t* pActual_size); -CRN_EXPORT void* crnlib_realloc(void* p, size_t size, size_t* pActual_size = NULL, bool movable = true); -CRN_EXPORT void* crnlib_calloc(size_t count, size_t size, size_t* pActual_size = NULL); -CRN_EXPORT void crnlib_free(void* p); -CRN_EXPORT size_t crnlib_msize(void* p); -CRN_EXPORT void crnlib_print_mem_stats(); -CRN_EXPORT void crnlib_mem_error(const char* p_msg); - -// omfg - there must be a better way - -template -inline T* crnlib_new() { - T* p = static_cast(crnlib_malloc(sizeof(T))); - if (CRNLIB_IS_SCALAR_TYPE(T)) - return p; - return helpers::construct(p); -} - -template -inline T* crnlib_new(const A& init0) { - T* p = static_cast(crnlib_malloc(sizeof(T))); - return new (static_cast(p)) T(init0); -} - -template -inline T* crnlib_new(A& init0) { - T* p = static_cast(crnlib_malloc(sizeof(T))); - return new (static_cast(p)) T(init0); -} - -template -inline T* crnlib_new(const A& init0, const B& init1) { - T* p = static_cast(crnlib_malloc(sizeof(T))); - return new (static_cast(p)) T(init0, init1); -} - -template -inline T* crnlib_new(const A& init0, const B& init1, const C& init2) { - T* p = static_cast(crnlib_malloc(sizeof(T))); - return new (static_cast(p)) T(init0, init1, init2); -} - -template -inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3) { - T* p = static_cast(crnlib_malloc(sizeof(T))); - return new (static_cast(p)) T(init0, init1, init2, init3); -} - -template -inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4) { - T* p = static_cast(crnlib_malloc(sizeof(T))); - return new (static_cast(p)) T(init0, init1, init2, init3, init4); -} - -template -inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5) { - T* p = static_cast(crnlib_malloc(sizeof(T))); - return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5); -} - -template -inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6) { - T* p = static_cast(crnlib_malloc(sizeof(T))); - return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6); -} - -template -inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7) { - T* p = static_cast(crnlib_malloc(sizeof(T))); - return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7); -} - -template -inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8) { - T* p = static_cast(crnlib_malloc(sizeof(T))); - return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8); -} - -template -inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8, const J& init9) { - T* p = static_cast(crnlib_malloc(sizeof(T))); - return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8, init9); -} - -template -inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8, const J& init9, const K& init10) { - T* p = static_cast(crnlib_malloc(sizeof(T))); - return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8, init9, init10); -} - -template -inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8, const J& init9, const K& init10, const L& init11) { - T* p = static_cast(crnlib_malloc(sizeof(T))); - return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8, init9, init10, init11); -} - -template -inline T* crnlib_new_array(uint32 num) { - if (!num) - num = 1; - - uint64 total = CRNLIB_MIN_ALLOC_ALIGNMENT + sizeof(T) * num; - if (total > CRNLIB_MAX_POSSIBLE_BLOCK_SIZE) { - crnlib_mem_error("crnlib_new_array: Array too large!"); - return NULL; - } - uint8* q = static_cast(crnlib_malloc(static_cast(total))); - - T* p = reinterpret_cast(q + CRNLIB_MIN_ALLOC_ALIGNMENT); - - reinterpret_cast(p)[-1] = num; - reinterpret_cast(p)[-2] = ~num; - - if (!CRNLIB_IS_SCALAR_TYPE(T)) { - helpers::construct_array(p, num); - } - return p; -} - -template -inline void crnlib_delete(T* p) { - if (p) { - if (!CRNLIB_IS_SCALAR_TYPE(T)) { - helpers::destruct(p); - } - crnlib_free(p); - } -} - -template -inline void crnlib_delete_array(T* p) { - if (p) { - const uint32 num = reinterpret_cast(p)[-1]; - const uint32 num_check = reinterpret_cast(p)[-2]; - CRNLIB_ASSERT(num && (num == ~num_check)); - if (num == ~num_check) { - if (!CRNLIB_IS_SCALAR_TYPE(T)) { - helpers::destruct_array(p, num); - } - - crnlib_free(reinterpret_cast(p) - CRNLIB_MIN_ALLOC_ALIGNMENT); - } - } -} + CRN_EXPORT void* crnlib_malloc(size_t size); + CRN_EXPORT void* crnlib_malloc(size_t size, size_t* pActual_size); + CRN_EXPORT void* crnlib_realloc(void* p, size_t size, size_t* pActual_size = NULL, bool movable = true); + CRN_EXPORT void* crnlib_calloc(size_t count, size_t size, size_t* pActual_size = NULL); + CRN_EXPORT void crnlib_free(void* p); + CRN_EXPORT size_t crnlib_msize(void* p); + CRN_EXPORT void crnlib_print_mem_stats(); + CRN_EXPORT void crnlib_mem_error(const char* p_msg); + + // omfg - there must be a better way + + template + inline T* crnlib_new() + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + if (CRNLIB_IS_SCALAR_TYPE(T)) + { + return p; + } + return helpers::construct(p); + } + + template + inline T* crnlib_new(const A& init0) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0); + } + + template + inline T* crnlib_new(A& init0) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0); + } + + template + inline T* crnlib_new(const A& init0, const B& init1) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3, init4); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8, const J& init9) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8, init9); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8, const J& init9, const K& init10) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8, init9, init10); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8, const J& init9, const K& init10, const L& init11) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8, init9, init10, init11); + } + + template + inline T* crnlib_new_array(uint32 num) + { + if (!num) + { + num = 1; + } + + uint64 total = CRNLIB_MIN_ALLOC_ALIGNMENT + sizeof(T) * num; + if (total > CRNLIB_MAX_POSSIBLE_BLOCK_SIZE) + { + crnlib_mem_error("crnlib_new_array: Array too large!"); + return NULL; + } + uint8* q = static_cast(crnlib_malloc(static_cast(total))); + + T* p = reinterpret_cast(q + CRNLIB_MIN_ALLOC_ALIGNMENT); + + reinterpret_cast(p)[-1] = num; + reinterpret_cast(p)[-2] = ~num; + + if (!CRNLIB_IS_SCALAR_TYPE(T)) + { + helpers::construct_array(p, num); + } + return p; + } + + template + inline void crnlib_delete(T* p) + { + if (p) + { + if (!CRNLIB_IS_SCALAR_TYPE(T)) + { + helpers::destruct(p); + } + crnlib_free(p); + } + } + + template + inline void crnlib_delete_array(T* p) + { + if (p) + { + const uint32 num = reinterpret_cast(p)[-1]; + const uint32 num_check = reinterpret_cast(p)[-2]; + CRNLIB_ASSERT(num && (num == ~num_check)); + if (num == ~num_check) + { + if (!CRNLIB_IS_SCALAR_TYPE(T)) + { + helpers::destruct_array(p, num); + } + + crnlib_free(reinterpret_cast(p) - CRNLIB_MIN_ALLOC_ALIGNMENT); + } + } + } } // namespace crnlib -#define CRNLIB_DEFINE_NEW_DELETE \ - void* operator new(size_t size) { \ - void* p = crnlib::crnlib_malloc(size); \ - if (!p) \ - crnlib_fail("new: Out of memory!", __FILE__, __LINE__); \ - return p; \ - } \ - void* operator new[](size_t size) { \ - void* p = crnlib::crnlib_malloc(size); \ - if (!p) \ - crnlib_fail("new[]: Out of memory!", __FILE__, __LINE__); \ - return p; \ - } \ - void operator delete(void* p_block) { \ - crnlib::crnlib_free(p_block); \ - } \ - void operator delete[](void* p_block) { \ - crnlib::crnlib_free(p_block); \ - } + +#define CRNLIB_DEFINE_NEW_DELETE \ + void* operator new(size_t size) \ + { \ + void* p = crnlib::crnlib_malloc(size); \ + if (!p) \ + { \ + crnlib_fail("new: Out of memory!", __FILE__, __LINE__); \ + } \ + return p; \ + } \ + void* operator new[](size_t size) \ + { \ + void* p = crnlib::crnlib_malloc(size); \ + if (!p) \ + { \ + crnlib_fail("new[]: Out of memory!", __FILE__, __LINE__); \ + } \ + return p; \ + } \ + void operator delete(void* p_block) \ + { \ + crnlib::crnlib_free(p_block); \ + } \ + void operator delete[](void* p_block) \ + { \ + crnlib::crnlib_free(p_block); \ + } diff --git a/crnlib/crn_mipmapped_texture.cpp b/crnlib/crn_mipmapped_texture.cpp index 95f1b45..689b618 100644 --- a/crnlib/crn_mipmapped_texture.cpp +++ b/crnlib/crn_mipmapped_texture.cpp @@ -1756,13 +1756,13 @@ image_u8* mipmapped_texture::get_level_image(uint face, uint level, image_u8& im } void mipmapped_texture::swap(mipmapped_texture& img) { - utils::swap(m_width, img.m_width); - utils::swap(m_height, img.m_height); - utils::swap(m_comp_flags, img.m_comp_flags); - utils::swap(m_format, img.m_format); + std::swap(m_width, img.m_width); + std::swap(m_height, img.m_height); + std::swap(m_comp_flags, img.m_comp_flags); + std::swap(m_format, img.m_format); m_faces.swap(img.m_faces); m_last_error.swap(img.m_last_error); - utils::swap(m_source_file_type, img.m_source_file_type); + std::swap(m_source_file_type, img.m_source_file_type); CRNLIB_ASSERT(check()); } diff --git a/crnlib/crn_pixel_format.cpp b/crnlib/crn_pixel_format.cpp index 68bad47..e719b67 100644 --- a/crnlib/crn_pixel_format.cpp +++ b/crnlib/crn_pixel_format.cpp @@ -4,330 +4,360 @@ #include "crn_pixel_format.h" #include "crn_image.h" -namespace crnlib { -namespace pixel_format_helpers { -const pixel_format g_all_pixel_formats[] = +namespace crnlib +{ + namespace pixel_format_helpers { - PIXEL_FMT_DXT1, - PIXEL_FMT_DXT2, - PIXEL_FMT_DXT3, - PIXEL_FMT_DXT4, - PIXEL_FMT_DXT5, - PIXEL_FMT_3DC, - PIXEL_FMT_DXN, - PIXEL_FMT_DXT5A, - PIXEL_FMT_DXT5_CCxY, - PIXEL_FMT_DXT5_xGxR, - PIXEL_FMT_DXT5_xGBR, - PIXEL_FMT_DXT5_AGBR, - PIXEL_FMT_DXT1A, - PIXEL_FMT_ETC1, - PIXEL_FMT_ETC2, - PIXEL_FMT_ETC2A, - PIXEL_FMT_ETC1S, - PIXEL_FMT_ETC2AS, - PIXEL_FMT_R8G8B8, - PIXEL_FMT_L8, - PIXEL_FMT_A8, - PIXEL_FMT_A8L8, - PIXEL_FMT_A8R8G8B8}; + const pixel_format g_all_pixel_formats[] = + { + PIXEL_FMT_DXT1, + PIXEL_FMT_DXT2, + PIXEL_FMT_DXT3, + PIXEL_FMT_DXT4, + PIXEL_FMT_DXT5, + PIXEL_FMT_3DC, + PIXEL_FMT_DXN, + PIXEL_FMT_DXT5A, + PIXEL_FMT_DXT5_CCxY, + PIXEL_FMT_DXT5_xGxR, + PIXEL_FMT_DXT5_xGBR, + PIXEL_FMT_DXT5_AGBR, + PIXEL_FMT_DXT1A, + PIXEL_FMT_ETC1, + PIXEL_FMT_ETC2, + PIXEL_FMT_ETC2A, + PIXEL_FMT_ETC1S, + PIXEL_FMT_ETC2AS, + PIXEL_FMT_R8G8B8, + PIXEL_FMT_L8, + PIXEL_FMT_A8, + PIXEL_FMT_A8L8, + PIXEL_FMT_A8R8G8B8 + }; -uint get_num_formats() { - return sizeof(g_all_pixel_formats) / sizeof(g_all_pixel_formats[0]); -} + uint get_num_formats() + { + return sizeof(g_all_pixel_formats) / sizeof(g_all_pixel_formats[0]); + } -pixel_format get_pixel_format_by_index(uint index) { - CRNLIB_ASSERT(index < get_num_formats()); - return g_all_pixel_formats[index]; -} + pixel_format get_pixel_format_by_index(uint index) + { + CRNLIB_ASSERT(index < get_num_formats()); + return g_all_pixel_formats[index]; + } -const char* get_pixel_format_string(pixel_format fmt) { - switch (fmt) { - case PIXEL_FMT_INVALID: - return "INVALID"; - case PIXEL_FMT_DXT1: - return "DXT1"; - case PIXEL_FMT_DXT1A: - return "DXT1A"; - case PIXEL_FMT_DXT2: - return "DXT2"; - case PIXEL_FMT_DXT3: - return "DXT3"; - case PIXEL_FMT_DXT4: - return "DXT4"; - case PIXEL_FMT_DXT5: - return "DXT5"; - case PIXEL_FMT_3DC: - return "3DC"; - case PIXEL_FMT_DXN: - return "DXN"; - case PIXEL_FMT_DXT5A: - return "DXT5A"; - case PIXEL_FMT_DXT5_CCxY: - return "DXT5_CCxY"; - case PIXEL_FMT_DXT5_xGxR: - return "DXT5_xGxR"; - case PIXEL_FMT_DXT5_xGBR: - return "DXT5_xGBR"; - case PIXEL_FMT_DXT5_AGBR: - return "DXT5_AGBR"; - case PIXEL_FMT_ETC1: - return "ETC1"; - case PIXEL_FMT_ETC2: - return "ETC2"; - case PIXEL_FMT_ETC2A: - return "ETC2A"; - case PIXEL_FMT_ETC1S: - return "ETC1S"; - case PIXEL_FMT_ETC2AS: - return "ETC2AS"; - case PIXEL_FMT_R8G8B8: - return "R8G8B8"; - case PIXEL_FMT_A8R8G8B8: - return "A8R8G8B8"; - case PIXEL_FMT_A8: - return "A8"; - case PIXEL_FMT_L8: - return "L8"; - case PIXEL_FMT_A8L8: - return "A8L8"; - default: - break; - } - CRNLIB_ASSERT(false); - return "?"; -} + const char* get_pixel_format_string(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_INVALID: + return "INVALID"; + case PIXEL_FMT_DXT1: + return "DXT1"; + case PIXEL_FMT_DXT1A: + return "DXT1A"; + case PIXEL_FMT_DXT2: + return "DXT2"; + case PIXEL_FMT_DXT3: + return "DXT3"; + case PIXEL_FMT_DXT4: + return "DXT4"; + case PIXEL_FMT_DXT5: + return "DXT5"; + case PIXEL_FMT_3DC: + return "3DC"; + case PIXEL_FMT_DXN: + return "DXN"; + case PIXEL_FMT_DXT5A: + return "DXT5A"; + case PIXEL_FMT_DXT5_CCxY: + return "DXT5_CCxY"; + case PIXEL_FMT_DXT5_xGxR: + return "DXT5_xGxR"; + case PIXEL_FMT_DXT5_xGBR: + return "DXT5_xGBR"; + case PIXEL_FMT_DXT5_AGBR: + return "DXT5_AGBR"; + case PIXEL_FMT_ETC1: + return "ETC1"; + case PIXEL_FMT_ETC2: + return "ETC2"; + case PIXEL_FMT_ETC2A: + return "ETC2A"; + case PIXEL_FMT_ETC1S: + return "ETC1S"; + case PIXEL_FMT_ETC2AS: + return "ETC2AS"; + case PIXEL_FMT_R8G8B8: + return "R8G8B8"; + case PIXEL_FMT_A8R8G8B8: + return "A8R8G8B8"; + case PIXEL_FMT_A8: + return "A8"; + case PIXEL_FMT_L8: + return "L8"; + case PIXEL_FMT_A8L8: + return "A8L8"; + default: + break; + } + CRNLIB_ASSERT(false); + return "?"; + } -const char* get_crn_format_string(crn_format fmt) { - switch (fmt) { - case cCRNFmtDXT1: - return "DXT1"; - case cCRNFmtDXT3: - return "DXT3"; - case cCRNFmtDXT5: - return "DXT5"; - case cCRNFmtDXT5_CCxY: - return "DXT5_CCxY"; - case cCRNFmtDXT5_xGBR: - return "DXT5_xGBR"; - case cCRNFmtDXT5_AGBR: - return "DXT5_AGBR"; - case cCRNFmtDXT5_xGxR: - return "DXT5_xGxR"; - case cCRNFmtDXN_XY: - return "DXN_XY"; - case cCRNFmtDXN_YX: - return "DXN_YX"; - case cCRNFmtDXT5A: - return "DXT5A"; - case cCRNFmtETC1: - return "ETC1"; - case cCRNFmtETC2: - return "ETC2"; - case cCRNFmtETC2A: - return "ETC2A"; - case cCRNFmtETC1S: - return "ETC1S"; - case cCRNFmtETC2AS: - return "ETC2AS"; - default: - break; - } - CRNLIB_ASSERT(false); - return "?"; -} + const char* get_crn_format_string(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXT1: + return "DXT1"; + case cCRNFmtDXT3: + return "DXT3"; + case cCRNFmtDXT5: + return "DXT5"; + case cCRNFmtDXT5_CCxY: + return "DXT5_CCxY"; + case cCRNFmtDXT5_xGBR: + return "DXT5_xGBR"; + case cCRNFmtDXT5_AGBR: + return "DXT5_AGBR"; + case cCRNFmtDXT5_xGxR: + return "DXT5_xGxR"; + case cCRNFmtDXN_XY: + return "DXN_XY"; + case cCRNFmtDXN_YX: + return "DXN_YX"; + case cCRNFmtDXT5A: + return "DXT5A"; + case cCRNFmtETC1: + return "ETC1"; + case cCRNFmtETC2: + return "ETC2"; + case cCRNFmtETC2A: + return "ETC2A"; + case cCRNFmtETC1S: + return "ETC1S"; + case cCRNFmtETC2AS: + return "ETC2AS"; + default: + break; + } + CRNLIB_ASSERT(false); + return "?"; + } -component_flags get_component_flags(pixel_format fmt) { - // These flags are for *uncooked* pixels, i.e. after after adding Z to DXN maps, or converting YCC maps to RGB, etc. + component_flags get_component_flags(pixel_format fmt) + { + // These flags are for *uncooked* pixels, i.e. after after adding Z to DXN maps, or converting YCC maps to RGB, etc. - uint flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid | cCompFlagGrayscale; - switch (fmt) { - case PIXEL_FMT_DXT1: - case PIXEL_FMT_ETC1: - case PIXEL_FMT_ETC2: - case PIXEL_FMT_ETC1S: { - flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid; - break; - } - case PIXEL_FMT_DXT1A: { - flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid; - break; - } - case PIXEL_FMT_DXT2: - case PIXEL_FMT_DXT3: { - flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid; - break; - } - case PIXEL_FMT_DXT4: - case PIXEL_FMT_DXT5: - case PIXEL_FMT_ETC2A: - case PIXEL_FMT_ETC2AS: { - flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid; - break; - } - case PIXEL_FMT_DXT5A: { - flags = cCompFlagAValid; - break; - } - case PIXEL_FMT_DXT5_CCxY: { - flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagLumaChroma; - break; - } - case PIXEL_FMT_DXT5_xGBR: { - flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagNormalMap; - break; - } - case PIXEL_FMT_DXT5_AGBR: { - flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid | cCompFlagNormalMap; - break; - } - case PIXEL_FMT_DXT5_xGxR: { - flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagNormalMap; - break; - } - case PIXEL_FMT_3DC: { - flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagNormalMap; - break; - } - case PIXEL_FMT_DXN: { - flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagNormalMap; - break; - } - case PIXEL_FMT_R8G8B8: { - flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid; - break; - } - case PIXEL_FMT_A8R8G8B8: { - flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid; - break; - } - case PIXEL_FMT_A8: { - flags = cCompFlagAValid; - break; - } - case PIXEL_FMT_L8: { - flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagGrayscale; - break; - } - case PIXEL_FMT_A8L8: { - flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid | cCompFlagGrayscale; - break; - } - default: { - CRNLIB_ASSERT(0); - break; - } - } - return static_cast(flags); -} + uint flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid | cCompFlagGrayscale; + switch (fmt) + { + case PIXEL_FMT_DXT1: + case PIXEL_FMT_ETC1: + case PIXEL_FMT_ETC2: + case PIXEL_FMT_ETC1S: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid; + break; + } + case PIXEL_FMT_DXT1A: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid; + break; + } + case PIXEL_FMT_DXT2: + case PIXEL_FMT_DXT3: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid; + break; + } + case PIXEL_FMT_DXT4: + case PIXEL_FMT_DXT5: + case PIXEL_FMT_ETC2A: + case PIXEL_FMT_ETC2AS: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid; + break; + } + case PIXEL_FMT_DXT5A: + { + flags = cCompFlagAValid; + break; + } + case PIXEL_FMT_DXT5_CCxY: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagLumaChroma; + break; + } + case PIXEL_FMT_DXT5_xGBR: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagNormalMap; + break; + } + case PIXEL_FMT_DXT5_AGBR: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid | cCompFlagNormalMap; + break; + } + case PIXEL_FMT_DXT5_xGxR: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagNormalMap; + break; + } + case PIXEL_FMT_3DC: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagNormalMap; + break; + } + case PIXEL_FMT_DXN: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagNormalMap; + break; + } + case PIXEL_FMT_R8G8B8: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid; + break; + } + case PIXEL_FMT_A8R8G8B8: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid; + break; + } + case PIXEL_FMT_A8: + { + flags = cCompFlagAValid; + break; + } + case PIXEL_FMT_L8: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagGrayscale; + break; + } + case PIXEL_FMT_A8L8: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid | cCompFlagGrayscale; + break; + } + default: + { + CRNLIB_ASSERT(0); + break; + } + } + return static_cast(flags); + } -crn_format convert_pixel_format_to_best_crn_format(pixel_format crn_fmt) { - crn_format fmt = cCRNFmtDXT1; - switch (crn_fmt) { - case PIXEL_FMT_DXT1: - case PIXEL_FMT_DXT1A: - fmt = cCRNFmtDXT1; - break; - case PIXEL_FMT_DXT2: - case PIXEL_FMT_DXT3: - case PIXEL_FMT_DXT4: - case PIXEL_FMT_DXT5: - fmt = cCRNFmtDXT5; - break; - case PIXEL_FMT_3DC: - fmt = cCRNFmtDXN_YX; - break; - case PIXEL_FMT_DXN: - fmt = cCRNFmtDXN_XY; - break; - case PIXEL_FMT_DXT5A: - fmt = cCRNFmtDXT5A; - break; - case PIXEL_FMT_R8G8B8: - case PIXEL_FMT_L8: - fmt = cCRNFmtDXT1; - break; - case PIXEL_FMT_A8R8G8B8: - case PIXEL_FMT_A8: - case PIXEL_FMT_A8L8: - fmt = cCRNFmtDXT5; - break; - case PIXEL_FMT_DXT5_CCxY: - fmt = cCRNFmtDXT5_CCxY; - break; - case PIXEL_FMT_DXT5_xGBR: - fmt = cCRNFmtDXT5_xGBR; - break; - case PIXEL_FMT_DXT5_AGBR: - fmt = cCRNFmtDXT5_AGBR; - break; - case PIXEL_FMT_DXT5_xGxR: - fmt = cCRNFmtDXT5_xGxR; - break; - case PIXEL_FMT_ETC1: - fmt = cCRNFmtETC1; - break; - case PIXEL_FMT_ETC2: - fmt = cCRNFmtETC2; - break; - case PIXEL_FMT_ETC2A: - fmt = cCRNFmtETC2A; - break; - case PIXEL_FMT_ETC1S: - fmt = cCRNFmtETC1S; - break; - case PIXEL_FMT_ETC2AS: - fmt = cCRNFmtETC2AS; - break; - default: { - CRNLIB_ASSERT(false); - break; - } - } - return fmt; -} + crn_format convert_pixel_format_to_best_crn_format(pixel_format crn_fmt) + { + crn_format fmt = cCRNFmtDXT1; + switch (crn_fmt) + { + case PIXEL_FMT_DXT1: + case PIXEL_FMT_DXT1A: + fmt = cCRNFmtDXT1; + break; + case PIXEL_FMT_DXT2: + case PIXEL_FMT_DXT3: + case PIXEL_FMT_DXT4: + case PIXEL_FMT_DXT5: + fmt = cCRNFmtDXT5; + break; + case PIXEL_FMT_3DC: + fmt = cCRNFmtDXN_YX; + break; + case PIXEL_FMT_DXN: + fmt = cCRNFmtDXN_XY; + break; + case PIXEL_FMT_DXT5A: + fmt = cCRNFmtDXT5A; + break; + case PIXEL_FMT_R8G8B8: + case PIXEL_FMT_L8: + fmt = cCRNFmtDXT1; + break; + case PIXEL_FMT_A8R8G8B8: + case PIXEL_FMT_A8: + case PIXEL_FMT_A8L8: + fmt = cCRNFmtDXT5; + break; + case PIXEL_FMT_DXT5_CCxY: + fmt = cCRNFmtDXT5_CCxY; + break; + case PIXEL_FMT_DXT5_xGBR: + fmt = cCRNFmtDXT5_xGBR; + break; + case PIXEL_FMT_DXT5_AGBR: + fmt = cCRNFmtDXT5_AGBR; + break; + case PIXEL_FMT_DXT5_xGxR: + fmt = cCRNFmtDXT5_xGxR; + break; + case PIXEL_FMT_ETC1: + fmt = cCRNFmtETC1; + break; + case PIXEL_FMT_ETC2: + fmt = cCRNFmtETC2; + break; + case PIXEL_FMT_ETC2A: + fmt = cCRNFmtETC2A; + break; + case PIXEL_FMT_ETC1S: + fmt = cCRNFmtETC1S; + break; + case PIXEL_FMT_ETC2AS: + fmt = cCRNFmtETC2AS; + break; + default: { + CRNLIB_ASSERT(false); + break; + } + } + return fmt; + } -pixel_format convert_crn_format_to_pixel_format(crn_format fmt) { - switch (fmt) { - case cCRNFmtDXT1: - return PIXEL_FMT_DXT1; - case cCRNFmtDXT3: - return PIXEL_FMT_DXT3; - case cCRNFmtDXT5: - return PIXEL_FMT_DXT5; - case cCRNFmtDXT5_CCxY: - return PIXEL_FMT_DXT5_CCxY; - case cCRNFmtDXT5_xGxR: - return PIXEL_FMT_DXT5_xGxR; - case cCRNFmtDXT5_xGBR: - return PIXEL_FMT_DXT5_xGBR; - case cCRNFmtDXT5_AGBR: - return PIXEL_FMT_DXT5_AGBR; - case cCRNFmtDXN_XY: - return PIXEL_FMT_DXN; - case cCRNFmtDXN_YX: - return PIXEL_FMT_3DC; - case cCRNFmtDXT5A: - return PIXEL_FMT_DXT5A; - case cCRNFmtETC1: - return PIXEL_FMT_ETC1; - case cCRNFmtETC2: - return PIXEL_FMT_ETC2; - case cCRNFmtETC2A: - return PIXEL_FMT_ETC2A; - case cCRNFmtETC1S: - return PIXEL_FMT_ETC1S; - case cCRNFmtETC2AS: - return PIXEL_FMT_ETC2AS; - default: { - CRNLIB_ASSERT(false); - break; - } - } - - return PIXEL_FMT_INVALID; -} - -} // namespace pixel_format + pixel_format convert_crn_format_to_pixel_format(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXT1: + return PIXEL_FMT_DXT1; + case cCRNFmtDXT3: + return PIXEL_FMT_DXT3; + case cCRNFmtDXT5: + return PIXEL_FMT_DXT5; + case cCRNFmtDXT5_CCxY: + return PIXEL_FMT_DXT5_CCxY; + case cCRNFmtDXT5_xGxR: + return PIXEL_FMT_DXT5_xGxR; + case cCRNFmtDXT5_xGBR: + return PIXEL_FMT_DXT5_xGBR; + case cCRNFmtDXT5_AGBR: + return PIXEL_FMT_DXT5_AGBR; + case cCRNFmtDXN_XY: + return PIXEL_FMT_DXN; + case cCRNFmtDXN_YX: + return PIXEL_FMT_3DC; + case cCRNFmtDXT5A: + return PIXEL_FMT_DXT5A; + case cCRNFmtETC1: + return PIXEL_FMT_ETC1; + case cCRNFmtETC2: + return PIXEL_FMT_ETC2; + case cCRNFmtETC2A: + return PIXEL_FMT_ETC2A; + case cCRNFmtETC1S: + return PIXEL_FMT_ETC1S; + case cCRNFmtETC2AS: + return PIXEL_FMT_ETC2AS; + default: { + CRNLIB_ASSERT(false); + break; + } + } + return PIXEL_FMT_INVALID; + } + } // namespace pixel_format } // namespace crnlib diff --git a/crnlib/crn_pixel_format.h b/crnlib/crn_pixel_format.h index c8654c8..35ff12e 100644 --- a/crnlib/crn_pixel_format.h +++ b/crnlib/crn_pixel_format.h @@ -5,348 +5,376 @@ #include "crnlib.h" #include "dds_defs.h" -namespace crnlib { -namespace pixel_format_helpers { - CRN_EXPORT uint get_num_formats(); - CRN_EXPORT pixel_format get_pixel_format_by_index(uint index); +namespace crnlib +{ + namespace pixel_format_helpers + { + CRN_EXPORT uint get_num_formats(); + CRN_EXPORT pixel_format get_pixel_format_by_index(uint index); - CRN_EXPORT const char* get_pixel_format_string(pixel_format fmt); + CRN_EXPORT const char* get_pixel_format_string(pixel_format fmt); - CRN_EXPORT const char* get_crn_format_string(crn_format fmt); + CRN_EXPORT const char* get_crn_format_string(crn_format fmt); -inline bool is_grayscale(pixel_format fmt) { - switch (fmt) { - case PIXEL_FMT_L8: - case PIXEL_FMT_A8L8: - return true; - default: - break; - } - return false; -} + inline bool is_grayscale(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_L8: + case PIXEL_FMT_A8L8: + return true; + default: + break; + } + return false; + } -inline bool is_dxt1(pixel_format fmt) { - return (fmt == PIXEL_FMT_DXT1) || (fmt == PIXEL_FMT_DXT1A); -} + inline bool is_dxt1(pixel_format fmt) + { + return (fmt == PIXEL_FMT_DXT1) || (fmt == PIXEL_FMT_DXT1A); + } -// has_alpha() should probably be called "has_opacity()" - it indicates if the format encodes opacity -// because some swizzled DXT5 formats do not encode opacity. -inline bool has_alpha(pixel_format fmt) { - switch (fmt) { - case PIXEL_FMT_DXT1A: - case PIXEL_FMT_DXT2: - case PIXEL_FMT_DXT3: - case PIXEL_FMT_DXT4: - case PIXEL_FMT_DXT5: - case PIXEL_FMT_DXT5A: - case PIXEL_FMT_A8R8G8B8: - case PIXEL_FMT_A8: - case PIXEL_FMT_A8L8: - case PIXEL_FMT_DXT5_AGBR: - case PIXEL_FMT_ETC2A: - case PIXEL_FMT_ETC2AS: - return true; - default: - break; - } - return false; -} + // has_alpha() should probably be called "has_opacity()" - it indicates if the format encodes opacity + // because some swizzled DXT5 formats do not encode opacity. + inline bool has_alpha(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_DXT1A: + case PIXEL_FMT_DXT2: + case PIXEL_FMT_DXT3: + case PIXEL_FMT_DXT4: + case PIXEL_FMT_DXT5: + case PIXEL_FMT_DXT5A: + case PIXEL_FMT_A8R8G8B8: + case PIXEL_FMT_A8: + case PIXEL_FMT_A8L8: + case PIXEL_FMT_DXT5_AGBR: + case PIXEL_FMT_ETC2A: + case PIXEL_FMT_ETC2AS: + return true; + default: + break; + } + return false; + } -inline bool is_alpha_only(pixel_format fmt) { - switch (fmt) { - case PIXEL_FMT_A8: - case PIXEL_FMT_DXT5A: - return true; - default: - break; - } - return false; -} + inline bool is_alpha_only(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_A8: + case PIXEL_FMT_DXT5A: + return true; + default: + break; + } + return false; + } -inline bool is_normal_map(pixel_format fmt) { - switch (fmt) { - case PIXEL_FMT_3DC: - case PIXEL_FMT_DXN: - case PIXEL_FMT_DXT5_xGBR: - case PIXEL_FMT_DXT5_xGxR: - case PIXEL_FMT_DXT5_AGBR: - return true; - default: - break; - } - return false; -} + inline bool is_normal_map(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_3DC: + case PIXEL_FMT_DXN: + case PIXEL_FMT_DXT5_xGBR: + case PIXEL_FMT_DXT5_xGxR: + case PIXEL_FMT_DXT5_AGBR: + return true; + default: + break; + } + return false; + } -inline int is_dxt(pixel_format fmt) { - switch (fmt) { - case PIXEL_FMT_DXT1: - case PIXEL_FMT_DXT1A: - case PIXEL_FMT_DXT2: - case PIXEL_FMT_DXT3: - case PIXEL_FMT_DXT4: - case PIXEL_FMT_DXT5: - case PIXEL_FMT_3DC: - case PIXEL_FMT_DXT5A: - case PIXEL_FMT_DXN: - case PIXEL_FMT_DXT5_CCxY: - case PIXEL_FMT_DXT5_xGxR: - case PIXEL_FMT_DXT5_xGBR: - case PIXEL_FMT_DXT5_AGBR: - case PIXEL_FMT_ETC1: - case PIXEL_FMT_ETC2: - case PIXEL_FMT_ETC2A: - case PIXEL_FMT_ETC1S: - case PIXEL_FMT_ETC2AS: - return true; - default: - break; - } - return false; -} + inline int is_dxt(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_DXT1: + case PIXEL_FMT_DXT1A: + case PIXEL_FMT_DXT2: + case PIXEL_FMT_DXT3: + case PIXEL_FMT_DXT4: + case PIXEL_FMT_DXT5: + case PIXEL_FMT_3DC: + case PIXEL_FMT_DXT5A: + case PIXEL_FMT_DXN: + case PIXEL_FMT_DXT5_CCxY: + case PIXEL_FMT_DXT5_xGxR: + case PIXEL_FMT_DXT5_xGBR: + case PIXEL_FMT_DXT5_AGBR: + case PIXEL_FMT_ETC1: + case PIXEL_FMT_ETC2: + case PIXEL_FMT_ETC2A: + case PIXEL_FMT_ETC1S: + case PIXEL_FMT_ETC2AS: + return true; + default: + break; + } + return false; + } -inline int get_fundamental_format(pixel_format fmt) { - switch (fmt) { - case PIXEL_FMT_DXT1A: - return PIXEL_FMT_DXT1; - case PIXEL_FMT_DXT5_CCxY: - case PIXEL_FMT_DXT5_xGxR: - case PIXEL_FMT_DXT5_xGBR: - case PIXEL_FMT_DXT5_AGBR: - return PIXEL_FMT_DXT5; - default: - break; - } - return fmt; -} + inline int get_fundamental_format(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_DXT1A: + return PIXEL_FMT_DXT1; + case PIXEL_FMT_DXT5_CCxY: + case PIXEL_FMT_DXT5_xGxR: + case PIXEL_FMT_DXT5_xGBR: + case PIXEL_FMT_DXT5_AGBR: + return PIXEL_FMT_DXT5; + default: + break; + } + return fmt; + } -inline dxt_format get_dxt_format(pixel_format fmt) { - switch (fmt) { - case PIXEL_FMT_DXT1: - return cDXT1; - case PIXEL_FMT_DXT1A: - return cDXT1A; - case PIXEL_FMT_DXT2: - return cDXT3; - case PIXEL_FMT_DXT3: - return cDXT3; - case PIXEL_FMT_DXT4: - return cDXT5; - case PIXEL_FMT_DXT5: - return cDXT5; - case PIXEL_FMT_3DC: - return cDXN_YX; - case PIXEL_FMT_DXT5A: - return cDXT5A; - case PIXEL_FMT_DXN: - return cDXN_XY; - case PIXEL_FMT_DXT5_CCxY: - return cDXT5; - case PIXEL_FMT_DXT5_xGxR: - return cDXT5; - case PIXEL_FMT_DXT5_xGBR: - return cDXT5; - case PIXEL_FMT_DXT5_AGBR: - return cDXT5; - case PIXEL_FMT_ETC1: - return cETC1; - case PIXEL_FMT_ETC2: - return cETC2; - case PIXEL_FMT_ETC2A: - return cETC2A; - case PIXEL_FMT_ETC1S: - return cETC1S; - case PIXEL_FMT_ETC2AS: - return cETC2AS; - default: - break; - } - return cDXTInvalid; -} + inline dxt_format get_dxt_format(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_DXT1: + return cDXT1; + case PIXEL_FMT_DXT1A: + return cDXT1A; + case PIXEL_FMT_DXT2: + return cDXT3; + case PIXEL_FMT_DXT3: + return cDXT3; + case PIXEL_FMT_DXT4: + return cDXT5; + case PIXEL_FMT_DXT5: + return cDXT5; + case PIXEL_FMT_3DC: + return cDXN_YX; + case PIXEL_FMT_DXT5A: + return cDXT5A; + case PIXEL_FMT_DXN: + return cDXN_XY; + case PIXEL_FMT_DXT5_CCxY: + return cDXT5; + case PIXEL_FMT_DXT5_xGxR: + return cDXT5; + case PIXEL_FMT_DXT5_xGBR: + return cDXT5; + case PIXEL_FMT_DXT5_AGBR: + return cDXT5; + case PIXEL_FMT_ETC1: + return cETC1; + case PIXEL_FMT_ETC2: + return cETC2; + case PIXEL_FMT_ETC2A: + return cETC2A; + case PIXEL_FMT_ETC1S: + return cETC1S; + case PIXEL_FMT_ETC2AS: + return cETC2AS; + default: + break; + } + return cDXTInvalid; + } -inline pixel_format from_dxt_format(dxt_format dxt_fmt) { - switch (dxt_fmt) { - case cDXT1: - return PIXEL_FMT_DXT1; - case cDXT1A: - return PIXEL_FMT_DXT1A; - case cDXT3: - return PIXEL_FMT_DXT3; - case cDXT5: - return PIXEL_FMT_DXT5; - case cDXN_XY: - return PIXEL_FMT_DXN; - case cDXN_YX: - return PIXEL_FMT_3DC; - case cDXT5A: - return PIXEL_FMT_DXT5A; - case cETC1: - return PIXEL_FMT_ETC1; - case cETC2: - return PIXEL_FMT_ETC2; - case cETC2A: - return PIXEL_FMT_ETC2A; - case cETC1S: - return PIXEL_FMT_ETC1S; - case cETC2AS: - return PIXEL_FMT_ETC2AS; - default: - break; - } - CRNLIB_ASSERT(false); - return PIXEL_FMT_INVALID; -} + inline pixel_format from_dxt_format(dxt_format dxt_fmt) + { + switch (dxt_fmt) + { + case cDXT1: + return PIXEL_FMT_DXT1; + case cDXT1A: + return PIXEL_FMT_DXT1A; + case cDXT3: + return PIXEL_FMT_DXT3; + case cDXT5: + return PIXEL_FMT_DXT5; + case cDXN_XY: + return PIXEL_FMT_DXN; + case cDXN_YX: + return PIXEL_FMT_3DC; + case cDXT5A: + return PIXEL_FMT_DXT5A; + case cETC1: + return PIXEL_FMT_ETC1; + case cETC2: + return PIXEL_FMT_ETC2; + case cETC2A: + return PIXEL_FMT_ETC2A; + case cETC1S: + return PIXEL_FMT_ETC1S; + case cETC2AS: + return PIXEL_FMT_ETC2AS; + default: + break; + } + CRNLIB_ASSERT(false); + return PIXEL_FMT_INVALID; + } -inline bool is_pixel_format_non_srgb(pixel_format fmt) { - switch (fmt) { - case PIXEL_FMT_3DC: - case PIXEL_FMT_DXN: - case PIXEL_FMT_DXT5A: - case PIXEL_FMT_DXT5_CCxY: - case PIXEL_FMT_DXT5_xGxR: - case PIXEL_FMT_DXT5_xGBR: - case PIXEL_FMT_DXT5_AGBR: - return true; - default: - break; - } - return false; -} + inline bool is_pixel_format_non_srgb(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_3DC: + case PIXEL_FMT_DXN: + case PIXEL_FMT_DXT5A: + case PIXEL_FMT_DXT5_CCxY: + case PIXEL_FMT_DXT5_xGxR: + case PIXEL_FMT_DXT5_xGBR: + case PIXEL_FMT_DXT5_AGBR: + return true; + default: + break; + } + return false; + } -inline bool is_crn_format_non_srgb(crn_format fmt) { - switch (fmt) { - case cCRNFmtDXN_XY: - case cCRNFmtDXN_YX: - case cCRNFmtDXT5A: - case cCRNFmtDXT5_CCxY: - case cCRNFmtDXT5_xGxR: - case cCRNFmtDXT5_xGBR: - case cCRNFmtDXT5_AGBR: - return true; - default: - break; - } - return false; -} + inline bool is_crn_format_non_srgb(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXN_XY: + case cCRNFmtDXN_YX: + case cCRNFmtDXT5A: + case cCRNFmtDXT5_CCxY: + case cCRNFmtDXT5_xGxR: + case cCRNFmtDXT5_xGBR: + case cCRNFmtDXT5_AGBR: + return true; + default: + break; + } + return false; + } -inline uint get_bpp(pixel_format fmt) { - switch (fmt) { - case PIXEL_FMT_DXT1: - return 4; - case PIXEL_FMT_DXT1A: - return 4; - case PIXEL_FMT_ETC1: - return 4; - case PIXEL_FMT_ETC2: - return 4; - case PIXEL_FMT_ETC2A: - return 8; - case PIXEL_FMT_ETC1S: - return 4; - case PIXEL_FMT_ETC2AS: - return 8; - case PIXEL_FMT_DXT2: - return 8; - case PIXEL_FMT_DXT3: - return 8; - case PIXEL_FMT_DXT4: - return 8; - case PIXEL_FMT_DXT5: - return 8; - case PIXEL_FMT_3DC: - return 8; - case PIXEL_FMT_DXT5A: - return 4; - case PIXEL_FMT_R8G8B8: - return 24; - case PIXEL_FMT_A8R8G8B8: - return 32; - case PIXEL_FMT_A8: - return 8; - case PIXEL_FMT_L8: - return 8; - case PIXEL_FMT_A8L8: - return 16; - case PIXEL_FMT_DXN: - return 8; - case PIXEL_FMT_DXT5_CCxY: - return 8; - case PIXEL_FMT_DXT5_xGxR: - return 8; - case PIXEL_FMT_DXT5_xGBR: - return 8; - case PIXEL_FMT_DXT5_AGBR: - return 8; - default: - break; - } - CRNLIB_ASSERT(false); - return 0; -}; + inline uint get_bpp(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_DXT1: + return 4; + case PIXEL_FMT_DXT1A: + return 4; + case PIXEL_FMT_ETC1: + return 4; + case PIXEL_FMT_ETC2: + return 4; + case PIXEL_FMT_ETC2A: + return 8; + case PIXEL_FMT_ETC1S: + return 4; + case PIXEL_FMT_ETC2AS: + return 8; + case PIXEL_FMT_DXT2: + return 8; + case PIXEL_FMT_DXT3: + return 8; + case PIXEL_FMT_DXT4: + return 8; + case PIXEL_FMT_DXT5: + return 8; + case PIXEL_FMT_3DC: + return 8; + case PIXEL_FMT_DXT5A: + return 4; + case PIXEL_FMT_R8G8B8: + return 24; + case PIXEL_FMT_A8R8G8B8: + return 32; + case PIXEL_FMT_A8: + return 8; + case PIXEL_FMT_L8: + return 8; + case PIXEL_FMT_A8L8: + return 16; + case PIXEL_FMT_DXN: + return 8; + case PIXEL_FMT_DXT5_CCxY: + return 8; + case PIXEL_FMT_DXT5_xGxR: + return 8; + case PIXEL_FMT_DXT5_xGBR: + return 8; + case PIXEL_FMT_DXT5_AGBR: + return 8; + default: + break; + } + CRNLIB_ASSERT(false); + return 0; + }; -inline uint get_dxt_bytes_per_block(pixel_format fmt) { - switch (fmt) { - case PIXEL_FMT_DXT1: - return 8; - case PIXEL_FMT_DXT1A: - return 8; - case PIXEL_FMT_DXT5A: - return 8; - case PIXEL_FMT_ETC1: - return 8; - case PIXEL_FMT_ETC2: - return 8; - case PIXEL_FMT_ETC2A: - return 16; - case PIXEL_FMT_ETC1S: - return 8; - case PIXEL_FMT_ETC2AS: - return 16; - case PIXEL_FMT_DXT2: - return 16; - case PIXEL_FMT_DXT3: - return 16; - case PIXEL_FMT_DXT4: - return 16; - case PIXEL_FMT_DXT5: - return 16; - case PIXEL_FMT_3DC: - return 16; - case PIXEL_FMT_DXN: - return 16; - case PIXEL_FMT_DXT5_CCxY: - return 16; - case PIXEL_FMT_DXT5_xGxR: - return 16; - case PIXEL_FMT_DXT5_xGBR: - return 16; - case PIXEL_FMT_DXT5_AGBR: - return 16; - default: - break; - } - CRNLIB_ASSERT(false); - return 0; -} + inline uint get_dxt_bytes_per_block(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_DXT1: + return 8; + case PIXEL_FMT_DXT1A: + return 8; + case PIXEL_FMT_DXT5A: + return 8; + case PIXEL_FMT_ETC1: + return 8; + case PIXEL_FMT_ETC2: + return 8; + case PIXEL_FMT_ETC2A: + return 16; + case PIXEL_FMT_ETC1S: + return 8; + case PIXEL_FMT_ETC2AS: + return 16; + case PIXEL_FMT_DXT2: + return 16; + case PIXEL_FMT_DXT3: + return 16; + case PIXEL_FMT_DXT4: + return 16; + case PIXEL_FMT_DXT5: + return 16; + case PIXEL_FMT_3DC: + return 16; + case PIXEL_FMT_DXN: + return 16; + case PIXEL_FMT_DXT5_CCxY: + return 16; + case PIXEL_FMT_DXT5_xGxR: + return 16; + case PIXEL_FMT_DXT5_xGBR: + return 16; + case PIXEL_FMT_DXT5_AGBR: + return 16; + default: + break; + } + CRNLIB_ASSERT(false); + return 0; + } -enum component_flags { - cCompFlagRValid = 1, - cCompFlagGValid = 2, - cCompFlagBValid = 4, - cCompFlagAValid = 8, + enum component_flags + { + cCompFlagRValid = 1, + cCompFlagGValid = 2, + cCompFlagBValid = 4, + cCompFlagAValid = 8, - cCompFlagGrayscale = 16, - cCompFlagNormalMap = 32, - cCompFlagLumaChroma = 64, + cCompFlagGrayscale = 16, + cCompFlagNormalMap = 32, + cCompFlagLumaChroma = 64, - cDefaultCompFlags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid -}; + cDefaultCompFlags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid + }; -CRN_EXPORT component_flags get_component_flags(pixel_format fmt); + CRN_EXPORT component_flags get_component_flags(pixel_format fmt); -CRN_EXPORT crn_format convert_pixel_format_to_best_crn_format(pixel_format crn_fmt); + CRN_EXPORT crn_format convert_pixel_format_to_best_crn_format(pixel_format crn_fmt); -CRN_EXPORT pixel_format convert_crn_format_to_pixel_format(crn_format fmt); + CRN_EXPORT pixel_format convert_crn_format_to_pixel_format(crn_format fmt); -} // namespace pixel_format_helpers + } // namespace pixel_format_helpers } // namespace crnlib diff --git a/crnlib/crn_sparse_array.h b/crnlib/crn_sparse_array.h index 6236c6a..4e9cc01 100644 --- a/crnlib/crn_sparse_array.h +++ b/crnlib/crn_sparse_array.h @@ -293,9 +293,9 @@ class sparse_array : public Traits { } inline void swap(sparse_array& other) { - utils::swap(m_size, other.m_size); + std::swap(m_size, other.m_size); m_groups.swap(other.m_groups); - utils::swap(m_num_active_groups, other.m_num_active_groups); + std::swap(m_num_active_groups, other.m_num_active_groups); } private: diff --git a/crnlib/crn_sparse_bit_array.cpp b/crnlib/crn_sparse_bit_array.cpp index becd002..e9670d9 100644 --- a/crnlib/crn_sparse_bit_array.cpp +++ b/crnlib/crn_sparse_bit_array.cpp @@ -71,8 +71,8 @@ void sparse_bit_array::clear() { } void sparse_bit_array::swap(sparse_bit_array& other) { - utils::swap(m_ppGroups, other.m_ppGroups); - utils::swap(m_num_groups, other.m_num_groups); + std::swap(m_ppGroups, other.m_ppGroups); + std::swap(m_num_groups, other.m_num_groups); } void sparse_bit_array::optimize() { diff --git a/crnlib/crn_utils.h b/crnlib/crn_utils.h index 302481a..7124ffd 100644 --- a/crnlib/crn_utils.h +++ b/crnlib/crn_utils.h @@ -36,13 +36,6 @@ namespace crnlib { namespace utils { - template - inline void swap(T& l, T& r) - { - T temp(l); - l = r; - r = temp; - } template inline void zero_object(T& obj) @@ -92,7 +85,7 @@ namespace crnlib const uint half_size = size >> 1; for (uint i = 0; i < half_size; i++) { - utils::swap(p[i], p[size - 1U - i]); + std::swap(p[i], p[size - 1U - i]); } } diff --git a/crnlib/crn_vector.cpp b/crnlib/crn_vector.cpp index c018e54..1881eb9 100644 --- a/crnlib/crn_vector.cpp +++ b/crnlib/crn_vector.cpp @@ -1,76 +1,97 @@ // File: crn_vector.cpp // See Copyright Notice and license at the end of inc/crnlib.h + #include "crn_core.h" #include "crn_vector.h" #include "crn_rand.h" - #include "crn_color.h" #include "crn_vec.h" -namespace crnlib { -bool elemental_vector::increase_capacity(uint min_new_capacity, bool grow_hint, uint element_size, object_mover pMover, bool nofail) { - CRNLIB_ASSERT(m_size <= m_capacity); +namespace crnlib +{ + bool elemental_vector::increase_capacity(uint min_new_capacity, bool grow_hint, uint element_size, object_mover pMover, bool nofail) + { + CRNLIB_ASSERT(m_size <= m_capacity); #ifdef CRNLIB_64BIT_POINTERS - CRNLIB_ASSERT(min_new_capacity < (0x400000000ULL / element_size)); + CRNLIB_ASSERT(min_new_capacity < (0x400000000ULL / element_size)); #else - CRNLIB_ASSERT(min_new_capacity < (0x7FFF0000U / element_size)); + CRNLIB_ASSERT(min_new_capacity < (0x7FFF0000U / element_size)); #endif - if (m_capacity >= min_new_capacity) - return true; + if (m_capacity >= min_new_capacity) + { + return true; + } - ptr_bits_t new_capacity = min_new_capacity; - if ((grow_hint) && (!math::is_power_of_2((uint64)new_capacity))) - new_capacity = math::next_pow2((uint64)new_capacity); + ptr_bits_t new_capacity = min_new_capacity; + if ((grow_hint) && (!math::is_power_of_2((uint64)new_capacity))) + { + new_capacity = math::next_pow2((uint64)new_capacity); + } - CRNLIB_ASSERT(new_capacity && (new_capacity > m_capacity)); + CRNLIB_ASSERT(new_capacity && (new_capacity > m_capacity)); - const size_t desired_size = element_size * new_capacity; - size_t actual_size; - if (!pMover) { - void* new_p = crnlib_realloc(m_p, desired_size, &actual_size, true); - if (!new_p) { - if (nofail) - return false; + const size_t desired_size = element_size * new_capacity; + size_t actual_size; + if (!pMover) + { + void* new_p = crnlib_realloc(m_p, desired_size, &actual_size, true); + if (!new_p) + { + if (nofail) + { + return false; + } - char buf[256]; + char buf[256]; #ifdef _MSC_VER - sprintf_s(buf, sizeof(buf), "vector: crnlib_realloc() failed allocating %u bytes", (uint)desired_size); + sprintf_s(buf, sizeof(buf), "vector: crnlib_realloc() failed allocating %u bytes", (uint)desired_size); #else - sprintf(buf, "vector: crnlib_realloc() failed allocating %u bytes", (uint)desired_size); + sprintf(buf, "vector: crnlib_realloc() failed allocating %u bytes", (uint)desired_size); #endif - CRNLIB_FAIL(buf); - } - m_p = new_p; - } else { - void* new_p = crnlib_malloc(desired_size, &actual_size); - if (!new_p) { - if (nofail) - return false; + CRNLIB_FAIL(buf); + } + m_p = new_p; + } + else + { + void* new_p = crnlib_malloc(desired_size, &actual_size); + if (!new_p) + { + if (nofail) + { + return false; + } - char buf[256]; + char buf[256]; #ifdef _MSC_VER - sprintf_s(buf, sizeof(buf), "vector: crnlib_malloc() failed allocating %u bytes", (uint)desired_size); + sprintf_s(buf, sizeof(buf), "vector: crnlib_malloc() failed allocating %u bytes", (uint)desired_size); #else - sprintf(buf, "vector: crnlib_malloc() failed allocating %u bytes", (uint)desired_size); + sprintf(buf, "vector: crnlib_malloc() failed allocating %u bytes", (uint)desired_size); #endif - CRNLIB_FAIL(buf); - } + CRNLIB_FAIL(buf); + } - (*pMover)(new_p, m_p, m_size); + (*pMover)(new_p, m_p, m_size); - if (m_p) - crnlib_free(m_p); + if (m_p) + { + crnlib_free(m_p); + } - m_p = new_p; - } + m_p = new_p; + } - if (actual_size > desired_size) - m_capacity = static_cast(actual_size / element_size); - else - m_capacity = static_cast(new_capacity); + if (actual_size > desired_size) + { + m_capacity = static_cast(actual_size / element_size); + } + else + { + m_capacity = static_cast(new_capacity); + } - return true; -} + return true; + } } // namespace crnlib diff --git a/crnlib/crn_vector.h b/crnlib/crn_vector.h index 8385182..4ff0e78 100644 --- a/crnlib/crn_vector.h +++ b/crnlib/crn_vector.h @@ -588,9 +588,9 @@ namespace crnlib inline void swap(vector& other) { - utils::swap(m_p, other.m_p); - utils::swap(m_size, other.m_size); - utils::swap(m_capacity, other.m_capacity); + std::swap(m_p, other.m_p); + std::swap(m_size, other.m_size); + std::swap(m_capacity, other.m_capacity); } inline void sort() @@ -613,7 +613,7 @@ namespace crnlib uint j = m_size >> 1; for (uint i = 0; i < j; i++) { - utils::swap(m_p[i], m_p[m_size - 1 - i]); + std::swap(m_p[i], m_p[m_size - 1 - i]); } } diff --git a/inc/crn_decomp.h b/inc/crn_decomp.h index fb31455..1293029 100644 --- a/inc/crn_decomp.h +++ b/inc/crn_decomp.h @@ -772,9 +772,9 @@ class vector : public helpers::rel_ops > { } void swap(vector& other) { - utils::swap(m_p, other.m_p); - utils::swap(m_size, other.m_size); - utils::swap(m_capacity, other.m_capacity); + std::swap(m_p, other.m_p); + std::swap(m_size, other.m_size); + std::swap(m_capacity, other.m_capacity); } private: From 79e7dddb3600a65609294c4c16edfd5cd38c5f53 Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Mon, 7 Sep 2020 15:42:24 -0400 Subject: [PATCH 13/18] Replace NULL with nullptr --- crnlib/crn_arealist.cpp | 32 ++++++------- crnlib/crn_assert.cpp | 2 +- crnlib/crn_buffer_stream.h | 8 ++-- crnlib/crn_cfile_stream.h | 10 ++-- crnlib/crn_clusterizer.h | 2 +- crnlib/crn_colorized_console.cpp | 2 +- crnlib/crn_comp.cpp | 10 ++-- crnlib/crn_comp.h | 2 +- crnlib/crn_console.cpp | 4 +- crnlib/crn_console.h | 4 +- crnlib/crn_darwin_pthreads.cpp | 12 ++--- crnlib/crn_data_stream.h | 4 +- crnlib/crn_data_stream_serializer.h | 4 +- crnlib/crn_dds_comp.cpp | 8 ++-- crnlib/crn_dxt1.cpp | 4 +- crnlib/crn_dxt1.h | 4 +- crnlib/crn_dxt5a.cpp | 4 +- crnlib/crn_dxt5a.h | 2 +- crnlib/crn_dxt_endpoint_refiner.cpp | 4 +- crnlib/crn_dxt_endpoint_refiner.h | 4 +- crnlib/crn_dxt_hc.cpp | 8 ++-- crnlib/crn_dxt_image.cpp | 10 ++-- crnlib/crn_dxt_image.h | 6 +-- crnlib/crn_dynamic_stream.h | 2 +- crnlib/crn_dynamic_string.cpp | 12 ++--- crnlib/crn_dynamic_string.h | 6 +-- crnlib/crn_etc.cpp | 2 +- crnlib/crn_etc.h | 8 ++-- crnlib/crn_file_utils.cpp | 24 +++++----- crnlib/crn_find_files.cpp | 2 +- crnlib/crn_hash.cpp | 2 +- crnlib/crn_hash_map.h | 4 +- crnlib/crn_image.h | 8 ++-- crnlib/crn_image_utils.cpp | 14 +++--- crnlib/crn_image_utils.h | 6 +-- crnlib/crn_ktx_texture.cpp | 2 +- crnlib/crn_mem.cpp | 20 ++++---- crnlib/crn_mem.h | 6 +-- crnlib/crn_mipmapped_texture.cpp | 18 +++---- crnlib/crn_mipmapped_texture.h | 10 ++-- crnlib/crn_prefix_coding.cpp | 4 +- crnlib/crn_prefix_coding.h | 10 ++-- crnlib/crn_qdxt1.cpp | 6 +-- crnlib/crn_qdxt1.h | 4 +- crnlib/crn_qdxt5.cpp | 6 +-- crnlib/crn_qdxt5.h | 4 +- crnlib/crn_radix_sort.h | 4 +- crnlib/crn_resampler.cpp | 74 ++++++++++++++--------------- crnlib/crn_resampler.h | 6 +-- crnlib/crn_rg_etc1.cpp | 12 ++--- crnlib/crn_sparse_array.h | 18 +++---- crnlib/crn_sparse_bit_array.cpp | 24 +++++----- crnlib/crn_symbol_codec.cpp | 26 +++++----- crnlib/crn_symbol_codec.h | 8 ++-- crnlib/crn_texture_comp.cpp | 4 +- crnlib/crn_texture_conversion.cpp | 10 ++-- crnlib/crn_texture_conversion.h | 10 ++-- crnlib/crn_texture_file_types.cpp | 4 +- crnlib/crn_threaded_clusterizer.h | 6 +-- crnlib/crn_threaded_resampler.cpp | 18 +++---- crnlib/crn_threading_null.h | 12 ++--- crnlib/crn_threading_pthreads.cpp | 8 ++-- crnlib/crn_threading_pthreads.h | 16 +++---- crnlib/crn_threading_win32.cpp | 10 ++-- crnlib/crn_threading_win32.h | 22 ++++----- crnlib/crn_timer.cpp | 2 +- crnlib/crn_value.cpp | 2 +- crnlib/crn_value.h | 8 ++-- crnlib/crn_vec.h | 12 ++--- crnlib/crn_vector.h | 18 +++---- crnlib/crnlib.cpp | 16 +++---- crunch/corpus_test.cpp | 2 +- crunch/crunch.cpp | 8 ++-- examples/example1/example1.cpp | 20 ++++---- examples/example2/example2.cpp | 6 +-- examples/example2/timer.cpp | 2 +- examples/example3/example3.cpp | 2 +- 77 files changed, 360 insertions(+), 360 deletions(-) diff --git a/crnlib/crn_arealist.cpp b/crnlib/crn_arealist.cpp index 6e124f5..fc05372 100644 --- a/crnlib/crn_arealist.cpp +++ b/crnlib/crn_arealist.cpp @@ -43,7 +43,7 @@ namespace crnlib q->Pprev = p; Parea->Pnext = Plist->Pfree; - Parea->Pprev = NULL; + Parea->Pprev = nullptr; Plist->Pfree = Parea; return q; @@ -53,7 +53,7 @@ namespace crnlib { Area* p = Plist->Pfree; - if (p == NULL) + if (p == nullptr) { if (Plist->next_free == Plist->total_areas) { @@ -123,7 +123,7 @@ namespace crnlib if (Plist->Phead) { crnlib_free(Plist->Phead); - Plist->Phead = NULL; + Plist->Phead = nullptr; } crnlib_free(Plist); @@ -138,13 +138,13 @@ namespace crnlib Plist->Phead = (Area*)crnlib_calloc(max_areas + 2, sizeof(Area)); Plist->Ptail = Plist->Phead + 1; - Plist->Phead->Pprev = NULL; + Plist->Phead->Pprev = nullptr; Plist->Phead->Pnext = Plist->Ptail; Plist->Ptail->Pprev = Plist->Phead; - Plist->Ptail->Pnext = NULL; + Plist->Ptail->Pnext = nullptr; - Plist->Pfree = NULL; + Plist->Pfree = nullptr; Plist->next_free = 2; return Plist; @@ -172,7 +172,7 @@ namespace crnlib Pnew_list->Phead = (Area*)crnlib_malloc(sizeof(Area) * Plist->total_areas); Pnew_list->Ptail = Pnew_list->Phead + 1; - Pnew_list->Pfree = (Plist->Pfree) ? ((Plist->Pfree - Plist->Phead) + Pnew_list->Phead) : NULL; + Pnew_list->Pfree = (Plist->Pfree) ? ((Plist->Pfree - Plist->Phead) + Pnew_list->Phead) : nullptr; Pnew_list->next_free = Plist->next_free; @@ -180,8 +180,8 @@ namespace crnlib for (i = 0; i < Plist->total_areas; i++) { - Pnew_list->Phead[i].Pnext = (Plist->Phead[i].Pnext == NULL) ? NULL : (Plist->Phead[i].Pnext - Plist->Phead) + Pnew_list->Phead; - Pnew_list->Phead[i].Pprev = (Plist->Phead[i].Pprev == NULL) ? NULL : (Plist->Phead[i].Pprev - Plist->Phead) + Pnew_list->Phead; + Pnew_list->Phead[i].Pnext = (Plist->Phead[i].Pnext == nullptr) ? nullptr : (Plist->Phead[i].Pnext - Plist->Phead) + Pnew_list->Phead; + Pnew_list->Phead[i].Pprev = (Plist->Phead[i].Pprev == nullptr) ? nullptr : (Plist->Phead[i].Pprev - Plist->Phead) + Pnew_list->Phead; Pnew_list->Phead[i].x1 += x_ofs; Pnew_list->Phead[i].y1 += y_ofs; @@ -216,7 +216,7 @@ namespace crnlib { area_fatal_error("Area_List_dup", "Src and Dst total_areas must be equal!"); } - Pdst_list->Pfree = (Psrc_list->Pfree) ? ((Psrc_list->Pfree - Psrc_list->Phead) + Pdst_list->Phead) : NULL; + Pdst_list->Pfree = (Psrc_list->Pfree) ? ((Psrc_list->Pfree - Psrc_list->Phead) + Pdst_list->Phead) : nullptr; Pdst_list->next_free = Psrc_list->next_free; @@ -226,8 +226,8 @@ namespace crnlib { for (i = 0; i < Psrc_list->total_areas; i++) { - Pdst_list->Phead[i].Pnext = (Psrc_list->Phead[i].Pnext == NULL) ? NULL : (Psrc_list->Phead[i].Pnext - Psrc_list->Phead) + Pdst_list->Phead; - Pdst_list->Phead[i].Pprev = (Psrc_list->Phead[i].Pprev == NULL) ? NULL : (Psrc_list->Phead[i].Pprev - Psrc_list->Phead) + Pdst_list->Phead; + Pdst_list->Phead[i].Pnext = (Psrc_list->Phead[i].Pnext == nullptr) ? nullptr : (Psrc_list->Phead[i].Pnext - Psrc_list->Phead) + Pdst_list->Phead; + Pdst_list->Phead[i].Pprev = (Psrc_list->Phead[i].Pprev == nullptr) ? nullptr : (Psrc_list->Phead[i].Pprev - Psrc_list->Phead) + Pdst_list->Phead; Pdst_list->Phead[i].x1 += x_ofs; Pdst_list->Phead[i].y1 += y_ofs; @@ -239,8 +239,8 @@ namespace crnlib { for (i = 0; i < Psrc_list->total_areas; i++) { - Pdst_list->Phead[i].Pnext = (Psrc_list->Phead[i].Pnext == NULL) ? NULL : (Psrc_list->Phead[i].Pnext - Psrc_list->Phead) + Pdst_list->Phead; - Pdst_list->Phead[i].Pprev = (Psrc_list->Phead[i].Pprev == NULL) ? NULL : (Psrc_list->Phead[i].Pprev - Psrc_list->Phead) + Pdst_list->Phead; + Pdst_list->Phead[i].Pnext = (Psrc_list->Phead[i].Pnext == nullptr) ? nullptr : (Psrc_list->Phead[i].Pnext - Psrc_list->Phead) + Pdst_list->Phead; + Pdst_list->Phead[i].Pprev = (Psrc_list->Phead[i].Pprev == nullptr) ? nullptr : (Psrc_list->Phead[i].Pprev - Psrc_list->Phead) + Pdst_list->Phead; } } } @@ -329,13 +329,13 @@ namespace crnlib { Plist->Phead->Pnext = Plist->Ptail; Plist->Ptail->Pprev = Plist->Phead; - Plist->Pfree = NULL; + Plist->Pfree = nullptr; Plist->next_free = 2; } void Area_List_set(Area_List* Plist, int x1, int y1, int x2, int y2) { - Plist->Pfree = NULL; + Plist->Pfree = nullptr; Plist->Phead[2].x1 = x1; Plist->Phead[2].y1 = y1; diff --git a/crnlib/crn_assert.cpp b/crnlib/crn_assert.cpp index 91d0500..686dcb7 100644 --- a/crnlib/crn_assert.cpp +++ b/crnlib/crn_assert.cpp @@ -49,7 +49,7 @@ void crnlib_fail(const char* pExp, const char* pFile, unsigned line) #if CRNLIB_USE_WIN32_API if (g_fail_exceptions) { - RaiseException(CRNLIB_FAIL_EXCEPTION_CODE, 0, 0, NULL); + RaiseException(CRNLIB_FAIL_EXCEPTION_CODE, 0, 0, nullptr); } else #endif diff --git a/crnlib/crn_buffer_stream.h b/crnlib/crn_buffer_stream.h index 86332ca..0ad5724 100644 --- a/crnlib/crn_buffer_stream.h +++ b/crnlib/crn_buffer_stream.h @@ -9,14 +9,14 @@ namespace crnlib { public: buffer_stream(): data_stream(), - m_pBuf(NULL), + m_pBuf(nullptr), m_size(0), m_ofs(0) { } buffer_stream(void* p, uint size): data_stream(), - m_pBuf(NULL), + m_pBuf(nullptr), m_size(0), m_ofs(0) { @@ -24,7 +24,7 @@ namespace crnlib } buffer_stream(const void* p, uint size): data_stream(), - m_pBuf(NULL), + m_pBuf(nullptr), m_size(0), m_ofs(0) { @@ -78,7 +78,7 @@ namespace crnlib if (m_opened) { m_opened = false; - m_pBuf = NULL; + m_pBuf = nullptr; m_size = 0; m_ofs = 0; return true; diff --git a/crnlib/crn_cfile_stream.h b/crnlib/crn_cfile_stream.h index 0e2ae22..4c9cc61 100644 --- a/crnlib/crn_cfile_stream.h +++ b/crnlib/crn_cfile_stream.h @@ -10,7 +10,7 @@ namespace crnlib { public: cfile_stream(): data_stream(), - m_pFile(NULL), + m_pFile(nullptr), m_size(0), m_ofs(0), m_has_ownership(false) @@ -18,7 +18,7 @@ namespace crnlib } cfile_stream(FILE* pFile, const char* pFilename, uint attribs, bool has_ownership): data_stream(), - m_pFile(NULL), + m_pFile(nullptr), m_size(0), m_ofs(0), m_has_ownership(false) @@ -27,7 +27,7 @@ namespace crnlib } cfile_stream(const char* pFilename, uint attribs = cDataStreamReadable | cDataStreamSeekable, bool open_existing = false): data_stream(), - m_pFile(NULL), + m_pFile(nullptr), m_size(0), m_ofs(0), m_has_ownership(false) @@ -55,7 +55,7 @@ namespace crnlib } } - m_pFile = NULL; + m_pFile = nullptr; m_opened = false; m_size = 0; m_ofs = 0; @@ -116,7 +116,7 @@ namespace crnlib return false; } - FILE* pFile = NULL; + FILE* pFile = nullptr; crn_fopen(&pFile, pFilename, pMode); m_has_ownership = true; diff --git a/crnlib/crn_clusterizer.h b/crnlib/crn_clusterizer.h index 83407cc..c1f92db 100644 --- a/crnlib/crn_clusterizer.h +++ b/crnlib/crn_clusterizer.h @@ -40,7 +40,7 @@ namespace crnlib typedef bool (*progress_callback_func_ptr)(uint percentage_completed, void* pData); - bool generate_codebook(uint max_size, progress_callback_func_ptr pProgress_callback = NULL, void* pProgress_data = NULL, bool quick = false) + bool generate_codebook(uint max_size, progress_callback_func_ptr pProgress_callback = nullptr, void* pProgress_data = nullptr, bool quick = false) { if (m_training_vecs.empty()) { diff --git a/crnlib/crn_colorized_console.cpp b/crnlib/crn_colorized_console.cpp index c25525d..3388e8a 100644 --- a/crnlib/crn_colorized_console.cpp +++ b/crnlib/crn_colorized_console.cpp @@ -12,7 +12,7 @@ namespace crnlib void colorized_console::init() { console::init(); - console::add_console_output_func(console_output_func, NULL); + console::add_console_output_func(console_output_func, nullptr); } void colorized_console::deinit() diff --git a/crnlib/crn_comp.cpp b/crnlib/crn_comp.cpp index 4fc2141..fd06725 100644 --- a/crnlib/crn_comp.cpp +++ b/crnlib/crn_comp.cpp @@ -11,7 +11,7 @@ namespace crnlib { crn_comp::crn_comp() - : m_pParams(NULL) { + : m_pParams(nullptr) { } crn_comp::~crn_comp() { @@ -346,7 +346,7 @@ bool crn_comp::alias_images() { } void crn_comp::clear() { - m_pParams = NULL; + m_pParams = nullptr; for (uint f = 0; f < cCRNMaxFaces; f++) for (uint l = 0; l < cCRNMaxLevels; l++) @@ -1244,9 +1244,9 @@ bool crn_comp::compress_internal() { if (!pack_blocks( level, - !pass && !level, pass ? &codec : NULL, - m_has_comp[cColor] ? &m_endpoint_remaping[cColor] : NULL, m_has_comp[cColor] ? &m_selector_remaping[cColor] : NULL, - m_has_comp[cAlpha0] ? &m_endpoint_remaping[cAlpha0] : NULL, m_has_comp[cAlpha0] ? &m_selector_remaping[cAlpha0] : NULL)) { + !pass && !level, pass ? &codec : nullptr, + m_has_comp[cColor] ? &m_endpoint_remaping[cColor] : nullptr, m_has_comp[cColor] ? &m_selector_remaping[cColor] : nullptr, + m_has_comp[cAlpha0] ? &m_endpoint_remaping[cAlpha0] : nullptr, m_has_comp[cAlpha0] ? &m_selector_remaping[cAlpha0] : nullptr)) { return false; } diff --git a/crnlib/crn_comp.h b/crnlib/crn_comp.h index 35f4c0a..53b67b3 100644 --- a/crnlib/crn_comp.h +++ b/crnlib/crn_comp.h @@ -49,7 +49,7 @@ namespace crnlib } const uint8* get_comp_data_ptr() const { - return m_comp_data.size() ? &m_comp_data[0] : NULL; + return m_comp_data.size() ? &m_comp_data[0] : nullptr; } private: diff --git a/crnlib/crn_console.cpp b/crnlib/crn_console.cpp index f13d445..121fa81 100644 --- a/crnlib/crn_console.cpp +++ b/crnlib/crn_console.cpp @@ -32,7 +32,7 @@ namespace crnlib if (m_pMutex) { crnlib_delete(m_pMutex); - m_pMutex = NULL; + m_pMutex = nullptr; } } @@ -74,7 +74,7 @@ namespace crnlib } } - const char* pPrefix = NULL; + const char* pPrefix = nullptr; if ((m_prefixes) && (m_at_beginning_of_line)) { switch (type) diff --git a/crnlib/crn_console.h b/crnlib/crn_console.h index 1c811fb..f9b357c 100644 --- a/crnlib/crn_console.h +++ b/crnlib/crn_console.h @@ -44,7 +44,7 @@ namespace crnlib CRN_EXPORT static void init(); CRN_EXPORT static void deinit(); - static bool is_initialized() { return m_pMutex != NULL; } + static bool is_initialized() { return m_pMutex != nullptr; } CRN_EXPORT static void set_default_category(eConsoleMessageType category); CRN_EXPORT static eConsoleMessageType get_default_category(); @@ -89,7 +89,7 @@ namespace crnlib struct console_func { - console_func(console_output_func func = NULL, void* pData = NULL): + console_func(console_output_func func = nullptr, void* pData = nullptr): m_func(func), m_pData(pData) { diff --git a/crnlib/crn_darwin_pthreads.cpp b/crnlib/crn_darwin_pthreads.cpp index 6821313..6d46493 100644 --- a/crnlib/crn_darwin_pthreads.cpp +++ b/crnlib/crn_darwin_pthreads.cpp @@ -73,7 +73,7 @@ int sem_timedwait(sem_t* sem, const struct timespec* abs_timeout) struct timeval currentTime; /* Time now */ long secsToWait, nsecsToWait; /* Seconds and nsec to delay */ - gettimeofday(¤tTime, NULL); + gettimeofday(¤tTime, nullptr); secsToWait = abs_timeout->tv_sec - currentTime.tv_sec; nsecsToWait = (abs_timeout->tv_nsec - (currentTime.tv_usec * 1000)); while (nsecsToWait < 0) @@ -138,13 +138,13 @@ int sem_timedwait(sem_t* sem, const struct timespec* abs_timeout) details.callingThread = pthread_self(); details.timedOutShort = &timedOut; timedOut = CRNLIB_FALSE; - sigaction(SIGUSR2, NULL, &oldSignalAction); + sigaction(SIGUSR2, nullptr, &oldSignalAction); /* Start up the timeout thread. Once we've done that, we can * restore the previous cancellation state. */ - createStatus = pthread_create(&timeoutThread, NULL, timeoutThreadMain, (void*)&details); + createStatus = pthread_create(&timeoutThread, nullptr, timeoutThreadMain, (void*)&details); pthread_setcancelstate(oldCancelState, &ignoreCancelState); if (createStatus < 0) @@ -221,7 +221,7 @@ void timeoutThreadCleanup(void* passedPtr) { pthread_cancel(timeoutThread); } - pthread_join(timeoutThread, NULL); + pthread_join(timeoutThread, nullptr); /* The code originally restored the old action handler, which generally * was the default handler that caused the task to exit. Just occasionally, @@ -233,7 +233,7 @@ void timeoutThreadCleanup(void* passedPtr) * to crash is not a good idea, and so the line below has been commented * out. * - * sigaction (SIGUSR2,detailsPtr->sigHandlerAddr,NULL); + * sigaction (SIGUSR2,detailsPtr->sigHandlerAddr,nullptr); */ } @@ -305,7 +305,7 @@ static int triggerSignal(int Signal, pthread_t Thread) SignalDetails.sa_handler = ignoreSignal; SignalDetails.sa_flags = 0; (void)sigemptyset(&SignalDetails.sa_mask); - if ((Result = sigaction(Signal, &SignalDetails, NULL)) == 0) + if ((Result = sigaction(Signal, &SignalDetails, nullptr)) == 0) { Result = pthread_kill(Thread, Signal); } diff --git a/crnlib/crn_data_stream.h b/crnlib/crn_data_stream.h index ceb118e..cc207bd 100644 --- a/crnlib/crn_data_stream.h +++ b/crnlib/crn_data_stream.h @@ -31,7 +31,7 @@ namespace crnlib virtual data_stream* get_parent() { - return NULL; + return nullptr; } virtual bool close() @@ -100,7 +100,7 @@ namespace crnlib virtual const void* get_ptr() const { - return NULL; + return nullptr; } inline int read_byte() diff --git a/crnlib/crn_data_stream_serializer.h b/crnlib/crn_data_stream_serializer.h index eec77a7..6a2c7fb 100644 --- a/crnlib/crn_data_stream_serializer.h +++ b/crnlib/crn_data_stream_serializer.h @@ -13,7 +13,7 @@ namespace crnlib { public: data_stream_serializer(): - m_pStream(NULL), + m_pStream(nullptr), m_little_endian(true) { } @@ -307,7 +307,7 @@ namespace crnlib return false; } - if (memchr(str.get_ptr(), 0, len) != NULL) + if (memchr(str.get_ptr(), 0, len) != nullptr) { str.truncate(0); return false; diff --git a/crnlib/crn_dds_comp.cpp b/crnlib/crn_dds_comp.cpp index 7139a87..430fd89 100644 --- a/crnlib/crn_dds_comp.cpp +++ b/crnlib/crn_dds_comp.cpp @@ -9,9 +9,9 @@ namespace crnlib { dds_comp::dds_comp(): - m_pParams(NULL), + m_pParams(nullptr), m_pixel_fmt(PIXEL_FMT_INVALID), - m_pQDXT_state(NULL) + m_pQDXT_state(nullptr) { } @@ -25,13 +25,13 @@ namespace crnlib m_src_tex.clear(); m_packed_tex.clear(); m_comp_data.clear(); - m_pParams = NULL; + m_pParams = nullptr; m_pixel_fmt = PIXEL_FMT_INVALID; m_task_pool.deinit(); if (m_pQDXT_state) { crnlib_delete(m_pQDXT_state); - m_pQDXT_state = NULL; + m_pQDXT_state = nullptr; } } diff --git a/crnlib/crn_dxt1.cpp b/crnlib/crn_dxt1.cpp index 576cac7..af331ae 100644 --- a/crnlib/crn_dxt1.cpp +++ b/crnlib/crn_dxt1.cpp @@ -41,8 +41,8 @@ static struct { //----------------------------------------------------------------------------------------------------------------------------------------- dxt1_endpoint_optimizer::dxt1_endpoint_optimizer() - : m_pParams(NULL), - m_pResults(NULL), + : m_pParams(nullptr), + m_pResults(nullptr), m_perceptual(false), m_num_prev_results(0) { m_low_coords.reserve(512); diff --git a/crnlib/crn_dxt1.h b/crnlib/crn_dxt1.h index d279aa3..8a9733b 100644 --- a/crnlib/crn_dxt1.h +++ b/crnlib/crn_dxt1.h @@ -148,7 +148,7 @@ namespace crnlib { params(): m_block_index(0), - m_pPixels(NULL), + m_pPixels(nullptr), m_num_pixels(0), m_dxt1a_alpha_threshold(128U), m_quality(cCRNDXTQualityUber), @@ -181,7 +181,7 @@ namespace crnlib struct results { - inline results(): m_pSelectors(NULL) + inline results(): m_pSelectors(nullptr) { } diff --git a/crnlib/crn_dxt5a.cpp b/crnlib/crn_dxt5a.cpp index 1883b49..5267570 100644 --- a/crnlib/crn_dxt5a.cpp +++ b/crnlib/crn_dxt5a.cpp @@ -9,8 +9,8 @@ namespace crnlib { dxt5_endpoint_optimizer::dxt5_endpoint_optimizer(): - m_pParams(NULL), - m_pResults(NULL) { + m_pParams(nullptr), + m_pResults(nullptr) { m_unique_values.reserve(16); m_unique_value_weights.reserve(16); } diff --git a/crnlib/crn_dxt5a.h b/crnlib/crn_dxt5a.h index dd67388..49abc3b 100644 --- a/crnlib/crn_dxt5a.h +++ b/crnlib/crn_dxt5a.h @@ -16,7 +16,7 @@ namespace crnlib { params(): m_block_index(0), - m_pPixels(NULL), + m_pPixels(nullptr), m_num_pixels(0), m_comp_index(3), m_quality(cCRNDXTQualityUber), diff --git a/crnlib/crn_dxt_endpoint_refiner.cpp b/crnlib/crn_dxt_endpoint_refiner.cpp index d830a24..e441942 100644 --- a/crnlib/crn_dxt_endpoint_refiner.cpp +++ b/crnlib/crn_dxt_endpoint_refiner.cpp @@ -8,8 +8,8 @@ namespace crnlib { dxt_endpoint_refiner::dxt_endpoint_refiner() : - m_pParams(NULL), - m_pResults(NULL) + m_pParams(nullptr), + m_pResults(nullptr) { } diff --git a/crnlib/crn_dxt_endpoint_refiner.h b/crnlib/crn_dxt_endpoint_refiner.h index abb9da3..fc939bd 100644 --- a/crnlib/crn_dxt_endpoint_refiner.h +++ b/crnlib/crn_dxt_endpoint_refiner.h @@ -16,9 +16,9 @@ namespace crnlib struct params { params(): m_block_index(0), - m_pPixels(NULL), + m_pPixels(nullptr), m_num_pixels(0), - m_pSelectors(NULL), + m_pSelectors(nullptr), m_alpha_comp_index(0), m_error_to_beat(cUINT64_MAX), m_dxt1_selectors(true), diff --git a/crnlib/crn_dxt_hc.cpp b/crnlib/crn_dxt_hc.cpp index 63c9b1f..60fdd8a 100644 --- a/crnlib/crn_dxt_hc.cpp +++ b/crnlib/crn_dxt_hc.cpp @@ -31,7 +31,7 @@ dxt_hc::dxt_hc() m_num_alpha_blocks(0), m_main_thread_id(crn_get_current_thread_id()), m_canceled(false), - m_pTask_pool(NULL), + m_pTask_pool(nullptr), m_prev_phase_index(-1), m_prev_percentage_complete(-1) { } @@ -229,7 +229,7 @@ bool dxt_hc::compress( } } - m_pTask_pool = NULL; + m_pTask_pool = nullptr; return true; } @@ -769,7 +769,7 @@ void dxt_hc::determine_color_endpoints() { } for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) - m_pTask_pool->queue_object_task(this, m_has_etc_color_blocks ? &dxt_hc::determine_color_endpoint_codebook_task_etc : &dxt_hc::determine_color_endpoint_codebook_task, i, NULL); + m_pTask_pool->queue_object_task(this, m_has_etc_color_blocks ? &dxt_hc::determine_color_endpoint_codebook_task_etc : &dxt_hc::determine_color_endpoint_codebook_task, i, nullptr); m_pTask_pool->join(); } @@ -978,7 +978,7 @@ void dxt_hc::determine_alpha_endpoints() { } for (uint i = 0; i < num_tasks; i++) - m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_codebook_task, i, NULL); + m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_codebook_task, i, nullptr); m_pTask_pool->join(); } diff --git a/crnlib/crn_dxt_image.cpp b/crnlib/crn_dxt_image.cpp index 8d962f0..afc6534 100644 --- a/crnlib/crn_dxt_image.cpp +++ b/crnlib/crn_dxt_image.cpp @@ -25,7 +25,7 @@ namespace crnlib { dxt_image::dxt_image() - : m_pElements(NULL), + : m_pElements(nullptr), m_width(0), m_height(0), m_blocks_x(0), @@ -40,7 +40,7 @@ dxt_image::dxt_image() } dxt_image::dxt_image(const dxt_image& other) - : m_pElements(NULL) { + : m_pElements(nullptr) { *this = other; } @@ -59,7 +59,7 @@ dxt_image& dxt_image::operator=(const dxt_image& rhs) { m_format = rhs.m_format; m_total_blocks = rhs.m_total_blocks; m_total_elements = rhs.m_total_elements; - m_pElements = NULL; + m_pElements = nullptr; memcpy(m_element_type, rhs.m_element_type, sizeof(m_element_type)); memcpy(m_element_component_index, rhs.m_element_component_index, sizeof(m_element_component_index)); @@ -85,7 +85,7 @@ void dxt_image::clear() { utils::zero_object(m_element_component_index); m_total_blocks = 0; m_total_elements = 0; - m_pElements = NULL; + m_pElements = nullptr; } bool dxt_image::init_internal(dxt_format fmt, uint width, uint height) { @@ -361,7 +361,7 @@ bool dxt_image::init_ati_compress(dxt_format fmt, const image_u8& img, const pac options.fWeightingBlue = .072169f; } - ATI_TC_ERROR err = ATI_TC_ConvertTexture(&src_tex, &dst_tex, &options, NULL, NULL, NULL); + ATI_TC_ERROR err = ATI_TC_ConvertTexture(&src_tex, &dst_tex, &options, nullptr, nullptr, nullptr); return err == ATI_TC_OK; } #endif diff --git a/crnlib/crn_dxt_image.h b/crnlib/crn_dxt_image.h index a11b146..79c6cf5 100644 --- a/crnlib/crn_dxt_image.h +++ b/crnlib/crn_dxt_image.h @@ -113,14 +113,14 @@ class CRN_EXPORT dxt_image { m_use_both_block_types = true; m_endpoint_caching = true; m_compressor = cCRNDXTCompressorCRN; - m_pProgress_callback = NULL; - m_pProgress_callback_user_data_ptr = NULL; + m_pProgress_callback = nullptr; + m_pProgress_callback_user_data_ptr = nullptr; m_dxt1a_alpha_threshold = 128; m_num_helper_threads = 0; m_progress_start = 0; m_progress_range = 100; m_use_transparent_indices_for_black = false; - m_pTask_pool = NULL; + m_pTask_pool = nullptr; } void init(const crn_comp_params& params) { diff --git a/crnlib/crn_dynamic_stream.h b/crnlib/crn_dynamic_stream.h index 577dbbe..c1e19f5 100644 --- a/crnlib/crn_dynamic_stream.h +++ b/crnlib/crn_dynamic_stream.h @@ -110,7 +110,7 @@ namespace crnlib virtual const void* get_ptr() const { - return m_buf.empty() ? NULL : &m_buf[0]; + return m_buf.empty() ? nullptr : &m_buf[0]; } virtual uint read(void* pBuf, uint len) diff --git a/crnlib/crn_dynamic_string.cpp b/crnlib/crn_dynamic_string.cpp index 9375b38..ec0e1dd 100644 --- a/crnlib/crn_dynamic_string.cpp +++ b/crnlib/crn_dynamic_string.cpp @@ -11,7 +11,7 @@ namespace crnlib dynamic_string::dynamic_string(eVarArg, const char* p, ...): m_buf_size(0), m_len(0), - m_pStr(NULL) + m_pStr(nullptr) { CRNLIB_ASSERT(p); @@ -24,7 +24,7 @@ namespace crnlib dynamic_string::dynamic_string(const char* p): m_buf_size(0), m_len(0), - m_pStr(NULL) + m_pStr(nullptr) { CRNLIB_ASSERT(p); set(p); @@ -33,7 +33,7 @@ namespace crnlib dynamic_string::dynamic_string(const char* p, uint len): m_buf_size(0), m_len(0), - m_pStr(NULL) + m_pStr(nullptr) { CRNLIB_ASSERT(p); set_from_buf(p, len); @@ -42,7 +42,7 @@ namespace crnlib dynamic_string::dynamic_string(const dynamic_string& other): m_buf_size(0), m_len(0), - m_pStr(NULL) + m_pStr(nullptr) { set(other); } @@ -54,7 +54,7 @@ namespace crnlib if (m_pStr) { crnlib_delete_array(m_pStr); - m_pStr = NULL; + m_pStr = nullptr; m_len = 0; m_buf_size = 0; @@ -230,7 +230,7 @@ namespace crnlib } #ifdef CRNLIB_BUILD_DEBUG - if ((buf_size) && (memchr(pBuf, 0, buf_size) != NULL)) + if ((buf_size) && (memchr(pBuf, 0, buf_size) != nullptr)) { CRNLIB_ASSERT(0); clear(); diff --git a/crnlib/crn_dynamic_string.h b/crnlib/crn_dynamic_string.h index 7598b40..42fe1d1 100644 --- a/crnlib/crn_dynamic_string.h +++ b/crnlib/crn_dynamic_string.h @@ -15,7 +15,7 @@ namespace crnlib inline dynamic_string(): m_buf_size(0), m_len(0), - m_pStr(NULL) + m_pStr(nullptr) { } dynamic_string(eVarArg dummy, const char* p, ...); @@ -40,7 +40,7 @@ namespace crnlib inline const char* assume_ownership() { const char* p = m_pStr; - m_pStr = NULL; + m_pStr = nullptr; m_len = 0; m_buf_size = 0; return p; @@ -279,7 +279,7 @@ namespace crnlib if (buf_size_in_chars > cUINT16_MAX) { CRNLIB_ASSERT(0); - return NULL; + return nullptr; } buf_size_in_chars = math::minimum(cUINT16_MAX, math::next_pow2(buf_size_in_chars)); return crnlib_new_array(buf_size_in_chars); diff --git a/crnlib/crn_etc.cpp b/crnlib/crn_etc.cpp index da15b37..9db9635 100644 --- a/crnlib/crn_etc.cpp +++ b/crnlib/crn_etc.cpp @@ -1408,7 +1408,7 @@ uint64 pack_etc1_block(etc1_block& dst_block, const color_quad_u8* pSrc_pixels, if ((subblock_pixels[r].r != subblock_pixel0.r) || (subblock_pixels[r].g != subblock_pixel0.g) || (subblock_pixels[r].b != subblock_pixel0.b)) break; if (!r) { - pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixel0.r, pack_params, context, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : NULL); + pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixel0.r, pack_params, context, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : nullptr); } } diff --git a/crnlib/crn_etc.h b/crnlib/crn_etc.h index 322c990..88d23c2 100644 --- a/crnlib/crn_etc.h +++ b/crnlib/crn_etc.h @@ -413,10 +413,10 @@ class CRN_EXPORT etc1_optimizer { } void clear() { - m_pParams = NULL; - m_pResult = NULL; - m_pSorted_luma = NULL; - m_pSorted_luma_indices = NULL; + m_pParams = nullptr; + m_pResult = nullptr; + m_pSorted_luma = nullptr; + m_pSorted_luma_indices = nullptr; } struct params : crn_etc1_pack_params { diff --git a/crnlib/crn_file_utils.cpp b/crnlib/crn_file_utils.cpp index f96929f..708760e 100644 --- a/crnlib/crn_file_utils.cpp +++ b/crnlib/crn_file_utils.cpp @@ -299,10 +299,10 @@ namespace crnlib #ifdef _MSC_VER // Compiling with MSVC errno_t error = _splitpath_s(p, - pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0, - pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0, - pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0, - pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0); + pDrive ? drive_buf : nullptr, pDrive ? _MAX_DRIVE : 0, + pDir ? dir_buf : nullptr, pDir ? _MAX_DIR : 0, + pFilename ? fname_buf : nullptr, pFilename ? _MAX_FNAME : 0, + pExt ? ext_buf : nullptr, pExt ? _MAX_EXT : 0); if (error != 0) { return false; @@ -310,10 +310,10 @@ namespace crnlib #else // Compiling with MinGW _splitpath(p, - pDrive ? drive_buf : NULL, - pDir ? dir_buf : NULL, - pFilename ? fname_buf : NULL, - pExt ? ext_buf : NULL); + pDrive ? drive_buf : nullptr, + pDir ? dir_buf : nullptr, + pFilename ? fname_buf : nullptr, + pExt ? ext_buf : nullptr); #endif if (pDrive) @@ -398,7 +398,7 @@ namespace crnlib bool file_utils::get_pathname(const char* p, dynamic_string& path) { dynamic_string temp_drive, temp_path; - if (!split_path(p, &temp_drive, &temp_path, NULL, NULL)) + if (!split_path(p, &temp_drive, &temp_path, nullptr, nullptr)) { return false; } @@ -410,7 +410,7 @@ namespace crnlib bool file_utils::get_filename(const char* p, dynamic_string& filename) { dynamic_string temp_ext; - if (!split_path(p, NULL, NULL, &filename, &temp_ext)) + if (!split_path(p, nullptr, nullptr, &filename, &temp_ext)) { return false; } @@ -609,7 +609,7 @@ namespace crnlib // See http://www.codeproject.com/KB/string/wildcmp.aspx int file_utils::wildcmp(const char* pWild, const char* pString) { - const char* cp = NULL, * mp = NULL; + const char* cp = nullptr, * mp = nullptr; while ((*pString) && (*pWild != '*')) { @@ -656,7 +656,7 @@ namespace crnlib bool file_utils::write_buf_to_file(const char* pPath, const void* pData, size_t data_size) { - FILE* pFile = NULL; + FILE* pFile = nullptr; #if defined(CRN_CC_MSVC) // Compiling with MSVC diff --git a/crnlib/crn_find_files.cpp b/crnlib/crn_find_files.cpp index 09bb972..7331297 100644 --- a/crnlib/crn_find_files.cpp +++ b/crnlib/crn_find_files.cpp @@ -315,7 +315,7 @@ namespace crnlib } closedir(dp); - dp = NULL; + dp = nullptr; if (flags & cFlagRecursive) { diff --git a/crnlib/crn_hash.cpp b/crnlib/crn_hash.cpp index aa6e6e3..64db4b3 100644 --- a/crnlib/crn_hash.cpp +++ b/crnlib/crn_hash.cpp @@ -23,7 +23,7 @@ namespace crnlib uint32 hash = len, tmp; int rem; - if (len <= 0 || data == NULL) + if (len <= 0 || data == nullptr) { return 0; } diff --git a/crnlib/crn_hash_map.h b/crnlib/crn_hash_map.h index 6de0bf5..5bb1b16 100644 --- a/crnlib/crn_hash_map.h +++ b/crnlib/crn_hash_map.h @@ -199,7 +199,7 @@ class hash_map { public: inline iterator() - : m_pTable(NULL), m_index(0) {} + : m_pTable(nullptr), m_index(0) {} inline iterator(hash_map_type& table, uint index) : m_pTable(&table), m_index(index) {} inline iterator(const iterator& other) @@ -255,7 +255,7 @@ class hash_map { public: inline const_iterator() - : m_pTable(NULL), m_index(0) {} + : m_pTable(nullptr), m_index(0) {} inline const_iterator(const hash_map_type& table, uint index) : m_pTable(&table), m_index(index) {} inline const_iterator(const iterator& other) diff --git a/crnlib/crn_image.h b/crnlib/crn_image.h index 3235125..e5ea98b 100644 --- a/crnlib/crn_image.h +++ b/crnlib/crn_image.h @@ -20,7 +20,7 @@ class image { m_pitch(0), m_total(0), m_comp_flags(pixel_format_helpers::cDefaultCompFlags), - m_pPixels(NULL) { + m_pPixels(nullptr) { } // pitch is in PIXELS, not bytes. @@ -63,7 +63,7 @@ class image { m_pPixels = &m_pixel_buf.front(); } else { m_pixel_buf.clear(); - m_pPixels = NULL; + m_pPixels = nullptr; } } else { m_pixel_buf = other.m_pixel_buf; @@ -80,7 +80,7 @@ class image { } image(const image& other) - : m_width(0), m_height(0), m_pitch(0), m_total(0), m_comp_flags(pixel_format_helpers::cDefaultCompFlags), m_pPixels(NULL) { + : m_width(0), m_height(0), m_pitch(0), m_total(0), m_comp_flags(pixel_format_helpers::cDefaultCompFlags), m_pPixels(nullptr) { *this = other; } @@ -129,7 +129,7 @@ class image { } void clear() { - m_pPixels = NULL; + m_pPixels = nullptr; m_pixel_buf.clear(); m_width = 0; m_height = 0; diff --git a/crnlib/crn_image_utils.cpp b/crnlib/crn_image_utils.cpp index f3196cb..b0b0edb 100644 --- a/crnlib/crn_image_utils.cpp +++ b/crnlib/crn_image_utils.cpp @@ -171,7 +171,7 @@ bool write_to_file(const char* pFilename, const image_u8& img, uint write_flags, crnlib::vector temp; uint num_src_chans = 0; - const void* pSrc_img = NULL; + const void* pSrc_img = nullptr; if (is_jpeg) { write_flags |= cWriteFlagIgnoreAlpha; @@ -406,7 +406,7 @@ bool resample_single_thread(const image_u8& src, image_u8& dst, const resample_p resamplers[0] = crnlib_new(src_width, src_height, dst_width, dst_height, params.m_wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, - params.m_pFilter, (Resampler::Contrib_List*)NULL, (Resampler::Contrib_List*)NULL, params.m_filter_scale, params.m_filter_scale); + params.m_pFilter, (Resampler::Contrib_List*)nullptr, (Resampler::Contrib_List*)nullptr, params.m_filter_scale, params.m_filter_scale); samples[0].resize(src_width); for (uint i = 1; i < params.m_num_comps; i++) { @@ -1168,7 +1168,7 @@ uint8* read_image_from_memory(const uint8* pImage, int nSize, int* pWidth, int* *pActualComps = 0; if ((req_comps < 1) || (req_comps > 4)) - return NULL; + return nullptr; mipmapped_texture tex; @@ -1177,17 +1177,17 @@ uint8* read_image_from_memory(const uint8* pImage, int nSize, int* pWidth, int* data_stream_serializer serializer(buf_stream); if (!tex.read_from_stream(serializer)) - return NULL; + return nullptr; if (tex.is_packed()) { if (!tex.unpack_from_dxt(true)) - return NULL; + return nullptr; } image_u8 img; image_u8* pImg = tex.get_level_image(0, 0, img); if (!pImg) - return NULL; + return nullptr; *pWidth = tex.get_width(); *pHeight = tex.get_height(); @@ -1199,7 +1199,7 @@ uint8* read_image_from_memory(const uint8* pImage, int nSize, int* pWidth, int* else *pActualComps = 3; - uint8* pDst = NULL; + uint8* pDst = nullptr; if (req_comps == 4) { pDst = (uint8*)malloc(tex.get_total_pixels() * sizeof(uint32)); uint8* pSrc = (uint8*)pImg->get_ptr(); diff --git a/crnlib/crn_image_utils.h b/crnlib/crn_image_utils.h index 3c57c8d..5463e58 100644 --- a/crnlib/crn_image_utils.h +++ b/crnlib/crn_image_utils.h @@ -53,7 +53,7 @@ namespace crnlib CRN_EXPORT bool write_to_file(const char* pFilename, const image_u8& img, uint write_flags = 0, int grayscale_comp_index = cLumaComponentIndex); CRN_EXPORT bool has_alpha(const image_u8& img); - CRN_EXPORT bool is_normal_map(const image_u8& img, const char* pFilename = NULL); + CRN_EXPORT bool is_normal_map(const image_u8& img, const char* pFilename = nullptr); CRN_EXPORT void renorm_normal_map(image_u8& img); struct resample_params @@ -101,7 +101,7 @@ namespace crnlib void print(const char* pName) const; // If num_channels==0, luma error is computed. - // If pHist != NULL, it must point to a 256 entry array. + // If pHist != nullptr, it must point to a 256 entry array. bool compute(const image_u8& a, const image_u8& b, uint first_channel, uint num_channels, bool average_component_error = true); uint mMax; @@ -169,7 +169,7 @@ namespace crnlib if (!packer.is_valid()) { - return NULL; + return nullptr; } const uint width = img.get_width(), height = img.get_height(); diff --git a/crnlib/crn_ktx_texture.cpp b/crnlib/crn_ktx_texture.cpp index e802af4..7b07552 100644 --- a/crnlib/crn_ktx_texture.cpp +++ b/crnlib/crn_ktx_texture.cpp @@ -799,7 +799,7 @@ const uint8_vec* ktx_texture::find_key(const char* pKey) const { return &v; } - return NULL; + return nullptr; } bool ktx_texture::get_key_value_as_string(const char* pKey, dynamic_string& str) const { diff --git a/crnlib/crn_mem.cpp b/crnlib/crn_mem.cpp index 18c8f49..0400960 100644 --- a/crnlib/crn_mem.cpp +++ b/crnlib/crn_mem.cpp @@ -79,7 +79,7 @@ static void* crnlib_default_realloc(void* p, size_t size, size_t* pActual_size, *pActual_size = p_new ? ::_msize(p_new) : 0; } else if (!size) { ::free(p); - p_new = NULL; + p_new = nullptr; if (pActual_size) *pActual_size = 0; @@ -88,7 +88,7 @@ static void* crnlib_default_realloc(void* p, size_t size, size_t* pActual_size, #ifdef WIN32 p_new = ::_expand(p, size); #else - p_new = NULL; + p_new = nullptr; #endif if (p_new) { @@ -124,7 +124,7 @@ void crnlib_mem_error(const char* p_msg) { crnlib_assert(p_msg, __FILE__, __LINE__); } void* crnlib_malloc(size_t size) { - return crnlib_malloc(size, NULL); + return crnlib_malloc(size, nullptr); } void* crnlib_malloc(size_t size, size_t* pActual_size) { @@ -134,18 +134,18 @@ void* crnlib_malloc(size_t size, size_t* pActual_size) { if (size > CRNLIB_MAX_POSSIBLE_BLOCK_SIZE) { crnlib_mem_error("crnlib_malloc: size too big"); - return NULL; + return nullptr; } size_t actual_size = size; - uint8* p_new = static_cast((*g_pRealloc)(NULL, size, &actual_size, true, g_pUser_data)); + uint8* p_new = static_cast((*g_pRealloc)(nullptr, size, &actual_size, true, g_pUser_data)); if (pActual_size) *pActual_size = actual_size; if ((!p_new) || (actual_size < size)) { crnlib_mem_error("crnlib_malloc: out of memory"); - return NULL; + return nullptr; } CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); @@ -161,12 +161,12 @@ void* crnlib_malloc(size_t size, size_t* pActual_size) { void* crnlib_realloc(void* p, size_t size, size_t* pActual_size, bool movable) { if ((ptr_bits_t)p & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) { crnlib_mem_error("crnlib_realloc: bad ptr"); - return NULL; + return nullptr; } if (size > CRNLIB_MAX_POSSIBLE_BLOCK_SIZE) { crnlib_mem_error("crnlib_malloc: size too big"); - return NULL; + return nullptr; } #if CRNLIB_MEM_STATS @@ -223,7 +223,7 @@ void crnlib_free(void* p) { update_total_allocated(-1, -static_cast(cur_size)); #endif - (*g_pRealloc)(p, 0, NULL, true, g_pUser_data); + (*g_pRealloc)(p, 0, nullptr, true, g_pUser_data); } size_t crnlib_msize(void* p) { @@ -256,7 +256,7 @@ void crn_set_memory_callbacks(crn_realloc_func pRealloc, crn_msize_func pMSize, if ((!pRealloc) || (!pMSize)) { crnlib::g_pRealloc = crnlib::crnlib_default_realloc; crnlib::g_pMSize = crnlib::crnlib_default_msize; - crnlib::g_pUser_data = NULL; + crnlib::g_pUser_data = nullptr; } else { crnlib::g_pRealloc = pRealloc; crnlib::g_pMSize = pMSize; diff --git a/crnlib/crn_mem.h b/crnlib/crn_mem.h index 507cc4e..396e5cb 100644 --- a/crnlib/crn_mem.h +++ b/crnlib/crn_mem.h @@ -18,8 +18,8 @@ namespace crnlib CRN_EXPORT void* crnlib_malloc(size_t size); CRN_EXPORT void* crnlib_malloc(size_t size, size_t* pActual_size); - CRN_EXPORT void* crnlib_realloc(void* p, size_t size, size_t* pActual_size = NULL, bool movable = true); - CRN_EXPORT void* crnlib_calloc(size_t count, size_t size, size_t* pActual_size = NULL); + CRN_EXPORT void* crnlib_realloc(void* p, size_t size, size_t* pActual_size = nullptr, bool movable = true); + CRN_EXPORT void* crnlib_calloc(size_t count, size_t size, size_t* pActual_size = nullptr); CRN_EXPORT void crnlib_free(void* p); CRN_EXPORT size_t crnlib_msize(void* p); CRN_EXPORT void crnlib_print_mem_stats(); @@ -141,7 +141,7 @@ namespace crnlib if (total > CRNLIB_MAX_POSSIBLE_BLOCK_SIZE) { crnlib_mem_error("crnlib_new_array: Array too large!"); - return NULL; + return nullptr; } uint8* q = static_cast(crnlib_malloc(static_cast(total))); diff --git a/crnlib/crn_mipmapped_texture.cpp b/crnlib/crn_mipmapped_texture.cpp index 689b618..b7f8d72 100644 --- a/crnlib/crn_mipmapped_texture.cpp +++ b/crnlib/crn_mipmapped_texture.cpp @@ -18,8 +18,8 @@ mip_level::mip_level() m_height(0), m_comp_flags(pixel_format_helpers::cDefaultCompFlags), m_format(PIXEL_FMT_INVALID), - m_pImage(NULL), - m_pDXTImage(NULL), + m_pImage(nullptr), + m_pDXTImage(nullptr), m_orient_flags(cDefaultOrientationFlags) { } @@ -28,8 +28,8 @@ mip_level::mip_level(const mip_level& other) m_height(0), m_comp_flags(pixel_format_helpers::cDefaultCompFlags), m_format(PIXEL_FMT_INVALID), - m_pImage(NULL), - m_pDXTImage(NULL), + m_pImage(nullptr), + m_pDXTImage(nullptr), m_orient_flags(cDefaultOrientationFlags) { *this = other; } @@ -66,12 +66,12 @@ void mip_level::clear() { if (m_pImage) { crnlib_delete(m_pImage); - m_pImage = NULL; + m_pImage = nullptr; } if (m_pDXTImage) { crnlib_delete(m_pDXTImage); - m_pDXTImage = NULL; + m_pDXTImage = nullptr; } } @@ -336,7 +336,7 @@ void mip_level::uncook_image(image_u8& img) const { image_u8* mip_level::get_unpacked_image(image_u8& tmp, uint unpack_flags) const { if (!is_valid()) - return NULL; + return nullptr; if (m_pDXTImage) { m_pDXTImage->unpack(tmp); @@ -1748,7 +1748,7 @@ void mipmapped_texture::set(texture_file_types::format source_file_type, const m image_u8* mipmapped_texture::get_level_image(uint face, uint level, image_u8& img, uint unpack_flags) const { if (!is_valid()) - return NULL; + return nullptr; const mip_level* pLevel = get_level(face, level); @@ -2726,7 +2726,7 @@ bool mipmapped_texture::read_crn_from_memory(const void* pData, uint data_size, void* pFaces[cCRNMaxFaces]; for (uint f = tex_info.m_faces; f < cCRNMaxFaces; f++) - pFaces[f] = NULL; + pFaces[f] = nullptr; for (uint l = 0; l < tex_info.m_levels; l++) { const uint level_width = math::maximum(1U, tex_info.m_width >> l); diff --git a/crnlib/crn_mipmapped_texture.h b/crnlib/crn_mipmapped_texture.h index 9f41a95..aeb08c5 100644 --- a/crnlib/crn_mipmapped_texture.h +++ b/crnlib/crn_mipmapped_texture.h @@ -56,9 +56,9 @@ class CRN_EXPORT mip_level { image_u8* get_unpacked_image(image_u8& tmp, uint unpack_flags) const; - inline bool is_packed() const { return m_pDXTImage != NULL; } + inline bool is_packed() const { return m_pDXTImage != nullptr; } - inline bool is_valid() const { return (m_pImage != NULL) || (m_pDXTImage != NULL); } + inline bool is_valid() const { return (m_pImage != nullptr) || (m_pDXTImage != nullptr); } inline pixel_format_helpers::component_flags get_comp_flags() const { return m_comp_flags; } inline void set_comp_flags(pixel_format_helpers::component_flags comp_flags) { m_comp_flags = comp_flags; } @@ -163,7 +163,7 @@ class CRN_EXPORT mipmapped_texture { inline bool is_unpacked() const { if (get_num_faces()) { - return get_level(0, 0)->get_image() != NULL; + return get_level(0, 0)->get_image() != nullptr; } return false; } @@ -200,8 +200,8 @@ class CRN_EXPORT mipmapped_texture { bool write_to_file( const char* pFilename, texture_file_types::format file_format = texture_file_types::cFormatInvalid, - crn_comp_params* pComp_params = NULL, - uint32* pActual_quality_level = NULL, float* pActual_bitrate = NULL, + crn_comp_params* pComp_params = nullptr, + uint32* pActual_quality_level = nullptr, float* pActual_bitrate = nullptr, uint32 image_write_flags = 0); // Conversion diff --git a/crnlib/crn_prefix_coding.cpp b/crnlib/crn_prefix_coding.cpp index b07f0dc..c2fc184 100644 --- a/crnlib/crn_prefix_coding.cpp +++ b/crnlib/crn_prefix_coding.cpp @@ -262,7 +262,7 @@ namespace crnlib if (pTables->m_sorted_symbol_order) { crnlib_delete_array(pTables->m_sorted_symbol_order); - pTables->m_sorted_symbol_order = NULL; + pTables->m_sorted_symbol_order = nullptr; } pTables->m_sorted_symbol_order = crnlib_new_array(pTables->m_cur_sorted_symbol_order_size); @@ -302,7 +302,7 @@ namespace crnlib if (pTables->m_lookup) { crnlib_delete_array(pTables->m_lookup); - pTables->m_lookup = NULL; + pTables->m_lookup = nullptr; } pTables->m_lookup = crnlib_new_array(table_size); diff --git a/crnlib/crn_prefix_coding.h b/crnlib/crn_prefix_coding.h index 51adc16..7f97cd9 100644 --- a/crnlib/crn_prefix_coding.h +++ b/crnlib/crn_prefix_coding.h @@ -25,9 +25,9 @@ namespace crnlib m_table_max_code(0), m_decode_start_code_size(0), m_cur_lookup_size(0), - m_lookup(NULL), + m_lookup(nullptr), m_cur_sorted_symbol_order_size(0), - m_sorted_symbol_order(NULL) + m_sorted_symbol_order(nullptr) { } @@ -36,9 +36,9 @@ namespace crnlib m_table_max_code(0), m_decode_start_code_size(0), m_cur_lookup_size(0), - m_lookup(NULL), + m_lookup(nullptr), m_cur_sorted_symbol_order_size(0), - m_sorted_symbol_order(NULL) + m_sorted_symbol_order(nullptr) { *this = other; } @@ -81,7 +81,7 @@ namespace crnlib if (m_sorted_symbol_order) { crnlib_delete_array(m_sorted_symbol_order); - m_sorted_symbol_order = NULL; + m_sorted_symbol_order = nullptr; m_cur_sorted_symbol_order_size = 0; } } diff --git a/crnlib/crn_qdxt1.cpp b/crnlib/crn_qdxt1.cpp index 2ac1559..115cfb1 100644 --- a/crnlib/crn_qdxt1.cpp +++ b/crnlib/crn_qdxt1.cpp @@ -17,8 +17,8 @@ qdxt1::qdxt1(task_pool& task_pool) m_progress_start(0), m_progress_range(100), m_num_blocks(0), - m_pBlocks(NULL), - m_pDst_elements(NULL), + m_pBlocks(nullptr), + m_pDst_elements(nullptr), m_elements_per_block(0), m_max_selector_clusters(0), m_prev_percentage_complete(-1), @@ -32,7 +32,7 @@ void qdxt1::clear() { m_main_thread_id = 0; m_num_blocks = 0; m_pBlocks = 0; - m_pDst_elements = NULL; + m_pDst_elements = nullptr; m_elements_per_block = 0; m_params.clear(); m_endpoint_clusterizer.clear(); diff --git a/crnlib/crn_qdxt1.h b/crnlib/crn_qdxt1.h index a3545f5..d5f333b 100644 --- a/crnlib/crn_qdxt1.h +++ b/crnlib/crn_qdxt1.h @@ -21,8 +21,8 @@ struct qdxt1_params { m_perceptual = true; m_dxt1a_alpha_threshold = 0; m_use_alpha_blocks = true; - m_pProgress_func = NULL; - m_pProgress_data = NULL; + m_pProgress_func = nullptr; + m_pProgress_data = nullptr; m_num_mips = 0; m_hierarchical = true; utils::zero_object(m_mip_desc); diff --git a/crnlib/crn_qdxt5.cpp b/crnlib/crn_qdxt5.cpp index c32953e..3e10ff8 100644 --- a/crnlib/crn_qdxt5.cpp +++ b/crnlib/crn_qdxt5.cpp @@ -18,8 +18,8 @@ qdxt5::qdxt5(task_pool& task_pool) m_progress_start(0), m_progress_range(100), m_num_blocks(0), - m_pBlocks(NULL), - m_pDst_elements(NULL), + m_pBlocks(nullptr), + m_pDst_elements(nullptr), m_elements_per_block(0), m_max_selector_clusters(0), m_prev_percentage_complete(-1), @@ -33,7 +33,7 @@ void qdxt5::clear() { m_main_thread_id = 0; m_num_blocks = 0; m_pBlocks = 0; - m_pDst_elements = NULL; + m_pDst_elements = nullptr; m_elements_per_block = 0; m_params.clear(); m_endpoint_clusterizer.clear(); diff --git a/crnlib/crn_qdxt5.h b/crnlib/crn_qdxt5.h index c0b84d1..f3c7b88 100644 --- a/crnlib/crn_qdxt5.h +++ b/crnlib/crn_qdxt5.h @@ -19,8 +19,8 @@ struct qdxt5_params { m_quality_level = cMaxQuality; m_dxt_quality = cCRNDXTQualityUber; - m_pProgress_func = NULL; - m_pProgress_data = NULL; + m_pProgress_func = nullptr; + m_pProgress_data = nullptr; m_num_mips = 0; m_hierarchical = true; utils::zero_object(m_mip_desc); diff --git a/crnlib/crn_radix_sort.h b/crnlib/crn_radix_sort.h index 15d4dbe..5ab60ed 100644 --- a/crnlib/crn_radix_sort.h +++ b/crnlib/crn_radix_sort.h @@ -75,7 +75,7 @@ namespace crnlib CRNLIB_ASSERT(key_size == 1); if (key_size != 1) { - return NULL; + return nullptr; } T* p = pBuf0; @@ -251,7 +251,7 @@ namespace crnlib CRNLIB_ASSERT(key_size == 1); if (key_size != 1) { - return NULL; + return nullptr; } T* p = pIndices0; diff --git a/crnlib/crn_resampler.cpp b/crnlib/crn_resampler.cpp index 99c3bb2..0071b62 100644 --- a/crnlib/crn_resampler.cpp +++ b/crnlib/crn_resampler.cpp @@ -105,13 +105,13 @@ Resampler::Contrib_List* Resampler::make_clist( Contrib* Pcpool_next; Contrib_Bounds* Pcontrib_bounds; - if ((Pcontrib = (Contrib_List*)crnlib_calloc(dst_x, sizeof(Contrib_List))) == NULL) - return NULL; + if ((Pcontrib = (Contrib_List*)crnlib_calloc(dst_x, sizeof(Contrib_List))) == nullptr) + return nullptr; Pcontrib_bounds = (Contrib_Bounds*)crnlib_calloc(dst_x, sizeof(Contrib_Bounds)); if (!Pcontrib_bounds) { crnlib_free(Pcontrib); - return (NULL); + return (nullptr); } const Resample_Real oo_filter_scale = 1.0f / filter_scale; @@ -150,10 +150,10 @@ Resampler::Contrib_List* Resampler::make_clist( /* Allocate memory for contributors. */ - if ((n == 0) || ((Pcpool = (Contrib*)crnlib_calloc(n, sizeof(Contrib))) == NULL)) { + if ((n == 0) || ((Pcpool = (Contrib*)crnlib_calloc(n, sizeof(Contrib))) == nullptr)) { crnlib_free(Pcontrib); crnlib_free(Pcontrib_bounds); - return NULL; + return nullptr; } total = n; @@ -227,7 +227,7 @@ Resampler::Contrib_List* Resampler::make_clist( crnlib_free(Pcpool); crnlib_free(Pcontrib); crnlib_free(Pcontrib_bounds); - return NULL; + return nullptr; } if (total_weight != 1.0f) @@ -262,10 +262,10 @@ Resampler::Contrib_List* Resampler::make_clist( /* Allocate memory for contributors. */ int total = n; - if ((total == 0) || ((Pcpool = (Contrib*)crnlib_calloc(total, sizeof(Contrib))) == NULL)) { + if ((total == 0) || ((Pcpool = (Contrib*)crnlib_calloc(total, sizeof(Contrib))) == nullptr)) { crnlib_free(Pcontrib); crnlib_free(Pcontrib_bounds); - return NULL; + return nullptr; } Pcpool_next = Pcpool; @@ -339,7 +339,7 @@ Resampler::Contrib_List* Resampler::make_clist( crnlib_free(Pcpool); crnlib_free(Pcontrib); crnlib_free(Pcontrib_bounds); - return NULL; + return nullptr; } if (total_weight != 1.0f) @@ -496,7 +496,7 @@ bool Resampler::put_line(const Sample* Psrc) { /* Does this slot have any memory allocated to it? */ if (!m_Pscan_buf->scan_buf_l[i]) { - if ((m_Pscan_buf->scan_buf_l[i] = (Sample*)crnlib_malloc(m_intermediate_x * sizeof(Sample))) == NULL) { + if ((m_Pscan_buf->scan_buf_l[i] = (Sample*)crnlib_malloc(m_intermediate_x * sizeof(Sample))) == nullptr) { m_status = STATUS_OUT_OF_MEMORY; return false; } @@ -524,20 +524,20 @@ const Resampler::Sample* Resampler::get_line() { int i; /* If all the destination lines have been - * generated, then always return NULL. + * generated, then always return nullptr. */ if (m_cur_dst_y == m_resample_dst_y) - return NULL; + return nullptr; /* Check to see if all the required * contributors are present, if not, - * return NULL. + * return nullptr. */ for (i = 0; i < m_Pclist_y[m_cur_dst_y].n; i++) if (!m_Psrc_y_flag[resampler_range_check(m_Pclist_y[m_cur_dst_y].p[i].pixel, m_resample_src_y)]) - return NULL; + return nullptr; resample_y(m_Pdst_buf); @@ -554,11 +554,11 @@ Resampler::~Resampler() { #endif crnlib_free(m_Pdst_buf); - m_Pdst_buf = NULL; + m_Pdst_buf = nullptr; if (m_Ptmp_buf) { crnlib_free(m_Ptmp_buf); - m_Ptmp_buf = NULL; + m_Ptmp_buf = nullptr; } /* Don't deallocate a contibutor list @@ -568,27 +568,27 @@ Resampler::~Resampler() { if ((m_Pclist_x) && (!m_clist_x_forced)) { crnlib_free(m_Pclist_x->p); crnlib_free(m_Pclist_x); - m_Pclist_x = NULL; + m_Pclist_x = nullptr; } if ((m_Pclist_y) && (!m_clist_y_forced)) { crnlib_free(m_Pclist_y->p); crnlib_free(m_Pclist_y); - m_Pclist_y = NULL; + m_Pclist_y = nullptr; } crnlib_free(m_Psrc_y_count); - m_Psrc_y_count = NULL; + m_Psrc_y_count = nullptr; crnlib_free(m_Psrc_y_flag); - m_Psrc_y_flag = NULL; + m_Psrc_y_flag = nullptr; if (m_Pscan_buf) { for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) crnlib_free(m_Pscan_buf->scan_buf_l[i]); crnlib_free(m_Pscan_buf); - m_Pscan_buf = NULL; + m_Pscan_buf = nullptr; } } @@ -613,7 +613,7 @@ void Resampler::restart() { m_Pscan_buf->scan_buf_y[i] = -1; crnlib_free(m_Pscan_buf->scan_buf_l[i]); - m_Pscan_buf->scan_buf_l[i] = NULL; + m_Pscan_buf->scan_buf_l[i] = nullptr; } } @@ -645,15 +645,15 @@ Resampler::Resampler(int src_x, int src_y, m_delay_x_resample = false; m_intermediate_x = 0; - m_Pdst_buf = NULL; - m_Ptmp_buf = NULL; + m_Pdst_buf = nullptr; + m_Ptmp_buf = nullptr; m_clist_x_forced = false; - m_Pclist_x = NULL; + m_Pclist_x = nullptr; m_clist_y_forced = false; - m_Pclist_y = NULL; - m_Psrc_y_count = NULL; - m_Psrc_y_flag = NULL; - m_Pscan_buf = NULL; + m_Pclist_y = nullptr; + m_Psrc_y_count = nullptr; + m_Psrc_y_flag = nullptr; + m_Pscan_buf = nullptr; m_status = STATUS_OKAY; m_resample_src_x = src_x; @@ -663,14 +663,14 @@ Resampler::Resampler(int src_x, int src_y, m_boundary_op = boundary_op; - if ((m_Pdst_buf = (Sample*)crnlib_malloc(m_resample_dst_x * sizeof(Sample))) == NULL) { + if ((m_Pdst_buf = (Sample*)crnlib_malloc(m_resample_dst_x * sizeof(Sample))) == nullptr) { m_status = STATUS_OUT_OF_MEMORY; return; } // Find the specified filter. - if (Pfilter_name == NULL) + if (Pfilter_name == nullptr) Pfilter_name = CRNLIB_RESAMPLER_DEFAULT_FILTER; for (i = 0; i < g_num_resample_filters; i++) @@ -709,12 +709,12 @@ Resampler::Resampler(int src_x, int src_y, m_clist_y_forced = true; } - if ((m_Psrc_y_count = (int*)crnlib_calloc(m_resample_src_y, sizeof(int))) == NULL) { + if ((m_Psrc_y_count = (int*)crnlib_calloc(m_resample_src_y, sizeof(int))) == nullptr) { m_status = STATUS_OUT_OF_MEMORY; return; } - if ((m_Psrc_y_flag = (unsigned char*)crnlib_calloc(m_resample_src_y, sizeof(unsigned char))) == NULL) { + if ((m_Psrc_y_flag = (unsigned char*)crnlib_calloc(m_resample_src_y, sizeof(unsigned char))) == nullptr) { m_status = STATUS_OUT_OF_MEMORY; return; } @@ -727,14 +727,14 @@ Resampler::Resampler(int src_x, int src_y, for (j = 0; j < m_Pclist_y[i].n; j++) m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++; - if ((m_Pscan_buf = (Scan_Buf*)crnlib_malloc(sizeof(Scan_Buf))) == NULL) { + if ((m_Pscan_buf = (Scan_Buf*)crnlib_malloc(sizeof(Scan_Buf))) == nullptr) { m_status = STATUS_OUT_OF_MEMORY; return; } for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) { m_Pscan_buf->scan_buf_y[i] = -1; - m_Pscan_buf->scan_buf_l[i] = NULL; + m_Pscan_buf->scan_buf_l[i] = nullptr; } m_cur_src_y = m_cur_dst_y = 0; @@ -777,7 +777,7 @@ Resampler::Resampler(int src_x, int src_y, } if (m_delay_x_resample) { - if ((m_Ptmp_buf = (Sample*)crnlib_malloc(m_intermediate_x * sizeof(Sample))) == NULL) { + if ((m_Ptmp_buf = (Sample*)crnlib_malloc(m_intermediate_x * sizeof(Sample))) == nullptr) { m_status = STATUS_OUT_OF_MEMORY; return; } @@ -798,7 +798,7 @@ int Resampler::get_filter_num() { const char* Resampler::get_filter_name(int filter_num) { if ((filter_num < 0) || (filter_num >= g_num_resample_filters)) - return NULL; + return nullptr; else return g_resample_filters[filter_num].name; } diff --git a/crnlib/crn_resampler.h b/crnlib/crn_resampler.h index 63e57ee..aa0bd4b 100644 --- a/crnlib/crn_resampler.h +++ b/crnlib/crn_resampler.h @@ -52,8 +52,8 @@ class CRN_EXPORT Resampler { Boundary_Op boundary_op = BOUNDARY_CLAMP, Resample_Real sample_low = 0.0f, Resample_Real sample_high = 0.0f, const char* Pfilter_name = CRNLIB_RESAMPLER_DEFAULT_FILTER, - Contrib_List* Pclist_x = NULL, - Contrib_List* Pclist_y = NULL, + Contrib_List* Pclist_x = nullptr, + Contrib_List* Pclist_y = nullptr, Resample_Real filter_x_scale = 1.0f, Resample_Real filter_y_scale = 1.0f, Resample_Real src_x_ofs = 0.0f, @@ -67,7 +67,7 @@ class CRN_EXPORT Resampler { // false on out of memory. bool put_line(const Sample* Psrc); - // NULL if no scanlines are currently available (give the resampler more scanlines!) + // nullptr if no scanlines are currently available (give the resampler more scanlines!) const Sample* get_line(); Status status() const { return m_status; } diff --git a/crnlib/crn_rg_etc1.cpp b/crnlib/crn_rg_etc1.cpp index dc97e5f..9941cbe 100644 --- a/crnlib/crn_rg_etc1.cpp +++ b/crnlib/crn_rg_etc1.cpp @@ -1073,7 +1073,7 @@ namespace crnlib { else { RG_ETC1_ASSERT(key_size == 1); if (key_size != 1) - return NULL; + return nullptr; T* p = pIndices0; T* q = pIndices0 + (num_indices >> 1) * 2; @@ -1645,10 +1645,10 @@ namespace crnlib { } void clear() { - m_pParams = NULL; - m_pResult = NULL; - m_pSorted_luma = NULL; - m_pSorted_luma_indices = NULL; + m_pParams = nullptr; + m_pResult = nullptr; + m_pSorted_luma = nullptr; + m_pSorted_luma_indices = nullptr; } struct params : etc1_pack_params { @@ -2479,7 +2479,7 @@ namespace crnlib { if (subblock_pixels[r].m_u32 != subblock_pixel0_u32) break; if (!r) { - pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixels[0].r, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : NULL); + pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixels[0].r, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : nullptr); } } diff --git a/crnlib/crn_sparse_array.h b/crnlib/crn_sparse_array.h index 4e9cc01..bf7535d 100644 --- a/crnlib/crn_sparse_array.h +++ b/crnlib/crn_sparse_array.h @@ -92,7 +92,7 @@ class sparse_array : public Traits { copy_group(q, p); } else if (q) { free_group(q); - m_groups[i] = NULL; + m_groups[i] = nullptr; } } @@ -183,18 +183,18 @@ class sparse_array : public Traits { inline const T* get(uint i) const { CRNLIB_ASSERT(i < m_size); const T* p = m_groups[i >> Log2N]; - return p ? &p[i & (N - 1)] : NULL; + return p ? &p[i & (N - 1)] : nullptr; } inline T* get(uint i) { CRNLIB_ASSERT(i < m_size); T* p = m_groups[i >> Log2N]; - return p ? &p[i & (N - 1)] : NULL; + return p ? &p[i & (N - 1)] : nullptr; } inline bool is_present(uint i) const { CRNLIB_ASSERT(i < m_size); - return m_groups[i >> Log2N] != NULL; + return m_groups[i >> Log2N] != nullptr; } inline uint get_num_groups() const { return m_groups.size(); } @@ -219,11 +219,11 @@ class sparse_array : public Traits { if (group_index >= m_groups.size()) { T* p = alloc_group(true); if (!p) - return NULL; + return nullptr; if (!m_groups.try_push_back(p)) { free_group(p); - return NULL; + return nullptr; } } @@ -231,7 +231,7 @@ class sparse_array : public Traits { if (!p) { p = alloc_group(true); if (!p) - return NULL; + return nullptr; m_groups[group_index] = p; } @@ -283,7 +283,7 @@ class sparse_array : public Traits { T* p = m_groups[first_group + i]; if (p) { free_group(p); - m_groups[i] = NULL; + m_groups[i] = nullptr; } } } @@ -311,7 +311,7 @@ class sparse_array : public Traits { if (!p) { if (nofail) - return NULL; + return nullptr; CRNLIB_FAIL("Out of memory"); } diff --git a/crnlib/crn_sparse_bit_array.cpp b/crnlib/crn_sparse_bit_array.cpp index e9670d9..406a26b 100644 --- a/crnlib/crn_sparse_bit_array.cpp +++ b/crnlib/crn_sparse_bit_array.cpp @@ -5,11 +5,11 @@ namespace crnlib { sparse_bit_array::sparse_bit_array() - : m_num_groups(0), m_ppGroups(NULL) { + : m_num_groups(0), m_ppGroups(nullptr) { } sparse_bit_array::sparse_bit_array(uint size) - : m_num_groups(0), m_ppGroups(NULL) { + : m_num_groups(0), m_ppGroups(nullptr) { resize(size); } @@ -23,7 +23,7 @@ sparse_bit_array::sparse_bit_array(sparse_bit_array& other) { m_ppGroups[i] = alloc_group(false); memcpy(m_ppGroups[i], other.m_ppGroups[i], cBytesPerGroup); } else - m_ppGroups[i] = NULL; + m_ppGroups[i] = nullptr; } } @@ -50,7 +50,7 @@ sparse_bit_array& sparse_bit_array::operator=(sparse_bit_array& other) { memcpy(m_ppGroups[i], other.m_ppGroups[i], cBytesPerGroup); } else if (m_ppGroups[i]) { free_group(m_ppGroups[i]); - m_ppGroups[i] = NULL; + m_ppGroups[i] = nullptr; } } @@ -65,7 +65,7 @@ void sparse_bit_array::clear() { free_group(m_ppGroups[i]); crnlib_free(m_ppGroups); - m_ppGroups = NULL; + m_ppGroups = nullptr; m_num_groups = 0; } @@ -85,7 +85,7 @@ void sparse_bit_array::optimize() { break; if (j == cDWORDsPerGroup) { free_group(s); - m_ppGroups[i] = NULL; + m_ppGroups[i] = nullptr; } } } @@ -216,7 +216,7 @@ void sparse_bit_array::clear_bit_range(uint index, uint num) { uint32* pGroup = m_ppGroups[group_index]; if (pGroup) { free_group(pGroup); - m_ppGroups[group_index] = NULL; + m_ppGroups[group_index] = nullptr; } num -= cBitsPerGroup; @@ -265,7 +265,7 @@ void sparse_bit_array::resize(uint size) { uint32* p = temp.m_ppGroups[i]; if (p) { m_ppGroups[i] = temp.m_ppGroups[i]; - temp.m_ppGroups[i] = NULL; + temp.m_ppGroups[i] = nullptr; } } } @@ -284,7 +284,7 @@ sparse_bit_array& sparse_bit_array::operator&=(const sparse_bit_array& other) { if (!s) { free_group(d); - m_ppGroups[i] = NULL; + m_ppGroups[i] = nullptr; } else { uint32 oc = 0; for (uint j = 0; j < cDWORDsPerGroup; j++) { @@ -294,7 +294,7 @@ sparse_bit_array& sparse_bit_array::operator&=(const sparse_bit_array& other) { } if (!oc) { free_group(d); - m_ppGroups[i] = NULL; + m_ppGroups[i] = nullptr; } } } @@ -327,7 +327,7 @@ sparse_bit_array& sparse_bit_array::operator|=(const sparse_bit_array& other) { } if (!oc) { free_group(d); - m_ppGroups[i] = NULL; + m_ppGroups[i] = nullptr; } } } @@ -357,7 +357,7 @@ sparse_bit_array& sparse_bit_array::and_not(const sparse_bit_array& other) { } if (!oc) { free_group(d); - m_ppGroups[i] = NULL; + m_ppGroups[i] = nullptr; } } diff --git a/crnlib/crn_symbol_codec.cpp b/crnlib/crn_symbol_codec.cpp index 4e43740..ea74ba0 100644 --- a/crnlib/crn_symbol_codec.cpp +++ b/crnlib/crn_symbol_codec.cpp @@ -54,7 +54,7 @@ adaptive_huffman_data_model::adaptive_huffman_data_model(bool encoding, uint tot m_update_cycle(0), m_symbols_until_update(0), m_total_count(0), - m_pDecode_tables(NULL), + m_pDecode_tables(nullptr), m_decoder_table_bits(0), m_encoding(encoding) { if (total_syms) @@ -66,7 +66,7 @@ adaptive_huffman_data_model::adaptive_huffman_data_model(const adaptive_huffman_ m_update_cycle(0), m_symbols_until_update(0), m_total_count(0), - m_pDecode_tables(NULL), + m_pDecode_tables(nullptr), m_decoder_table_bits(0), m_encoding(false) { *this = other; @@ -100,7 +100,7 @@ adaptive_huffman_data_model& adaptive_huffman_data_model::operator=(const adapti m_pDecode_tables = crnlib_new(*rhs.m_pDecode_tables); } else { crnlib_delete(m_pDecode_tables); - m_pDecode_tables = NULL; + m_pDecode_tables = nullptr; } m_decoder_table_bits = rhs.m_decoder_table_bits; @@ -122,7 +122,7 @@ void adaptive_huffman_data_model::clear() { if (m_pDecode_tables) { crnlib_delete(m_pDecode_tables); - m_pDecode_tables = NULL; + m_pDecode_tables = nullptr; } } @@ -213,13 +213,13 @@ void adaptive_huffman_data_model::update() { static_huffman_data_model::static_huffman_data_model() : m_total_syms(0), - m_pDecode_tables(NULL), + m_pDecode_tables(nullptr), m_encoding(false) { } static_huffman_data_model::static_huffman_data_model(const static_huffman_data_model& other) : m_total_syms(0), - m_pDecode_tables(NULL), + m_pDecode_tables(nullptr), m_encoding(false) { *this = other; } @@ -244,7 +244,7 @@ static_huffman_data_model& static_huffman_data_model::operator=(const static_huf m_pDecode_tables = crnlib_new(*rhs.m_pDecode_tables); } else { crnlib_delete(m_pDecode_tables); - m_pDecode_tables = NULL; + m_pDecode_tables = nullptr; } m_encoding = rhs.m_encoding; @@ -258,7 +258,7 @@ void static_huffman_data_model::clear() { m_code_sizes.clear(); if (m_pDecode_tables) { crnlib_delete(m_pDecode_tables); - m_pDecode_tables = NULL; + m_pDecode_tables = nullptr; } m_encoding = false; } @@ -294,7 +294,7 @@ bool static_huffman_data_model::init(bool encoding, uint total_syms, const uint1 if (m_pDecode_tables) { crnlib_delete(m_pDecode_tables); - m_pDecode_tables = NULL; + m_pDecode_tables = nullptr; } if (!prefix_coding::generate_codes(m_total_syms, &m_code_sizes[0], &m_codes[0])) @@ -381,7 +381,7 @@ bool static_huffman_data_model::init(bool encoding, uint total_syms, const uint8 if (m_pDecode_tables) { crnlib_delete(m_pDecode_tables); - m_pDecode_tables = NULL; + m_pDecode_tables = nullptr; } if (!prefix_coding::generate_codes(m_total_syms, &m_code_sizes[0], &m_codes[0])) @@ -530,9 +530,9 @@ symbol_codec::symbol_codec() { } void symbol_codec::clear() { - m_pDecode_buf = NULL; - m_pDecode_buf_next = NULL; - m_pDecode_buf_end = NULL; + m_pDecode_buf = nullptr; + m_pDecode_buf_next = nullptr; + m_pDecode_buf_end = nullptr; m_decode_buf_size = 0; m_bit_buf = 0; diff --git a/crnlib/crn_symbol_codec.h b/crnlib/crn_symbol_codec.h index f5484d6..6f10dc7 100644 --- a/crnlib/crn_symbol_codec.h +++ b/crnlib/crn_symbol_codec.h @@ -40,7 +40,7 @@ class CRN_EXPORT symbol_histogram { inline void resize(uint new_size) { m_hist.resize(new_size); } - inline const uint* get_ptr() const { return m_hist.empty() ? NULL : &m_hist.front(); } + inline const uint* get_ptr() const { return m_hist.empty() ? nullptr : &m_hist.front(); } double calc_entropy() const; @@ -112,7 +112,7 @@ class CRN_EXPORT static_huffman_data_model { uint get_total_syms() const { return m_total_syms; } uint get_cost(uint sym) const { return m_code_sizes[sym]; } - const uint8* get_code_sizes() const { return m_code_sizes.empty() ? NULL : &m_code_sizes[0]; } + const uint8* get_code_sizes() const { return m_code_sizes.empty() ? nullptr : &m_code_sizes[0]; } private: uint m_total_syms; @@ -189,7 +189,7 @@ class CRN_EXPORT symbol_codec { // Encoding void start_encoding(uint expected_file_size); - uint encode_transmit_static_huffman_data_model(static_huffman_data_model& model, bool simulate, static_huffman_data_model* pDelta_model = NULL); + uint encode_transmit_static_huffman_data_model(static_huffman_data_model& model, bool simulate, static_huffman_data_model* pDelta_model = nullptr); void encode_bits(uint bits, uint num_bits); void encode_align_to_byte(); void encode(uint sym, adaptive_huffman_data_model& model); @@ -215,7 +215,7 @@ class CRN_EXPORT symbol_codec { typedef void (*need_bytes_func_ptr)(size_t num_bytes_consumed, void* pPrivate_data, const uint8*& pBuf, size_t& buf_size, bool& eof_flag); - bool start_decoding(const uint8* pBuf, size_t buf_size, bool eof_flag = true, need_bytes_func_ptr pNeed_bytes_func = NULL, void* pPrivate_data = NULL); + bool start_decoding(const uint8* pBuf, size_t buf_size, bool eof_flag = true, need_bytes_func_ptr pNeed_bytes_func = nullptr, void* pPrivate_data = nullptr); void decode_set_input_buffer(const uint8* pBuf, size_t buf_size, const uint8* pBuf_next, bool eof_flag = true); inline uint64 decode_get_bytes_consumed() const { return m_pDecode_buf_next - m_pDecode_buf; } inline uint64 decode_get_bits_remaining() const { return ((m_pDecode_buf_end - m_pDecode_buf_next) << 3) + m_bit_count; } diff --git a/crnlib/crn_texture_comp.cpp b/crnlib/crn_texture_comp.cpp index e2d9365..a81b3bc 100644 --- a/crnlib/crn_texture_comp.cpp +++ b/crnlib/crn_texture_comp.cpp @@ -21,7 +21,7 @@ namespace crnlib } else { - return NULL; + return nullptr; } } @@ -246,7 +246,7 @@ namespace crnlib } crnlib_delete(pTexture_comp); - pTexture_comp = NULL; + pTexture_comp = nullptr; if (best_quality_level < 0) { diff --git a/crnlib/crn_texture_conversion.cpp b/crnlib/crn_texture_conversion.cpp index 3011f2c..11d6431 100644 --- a/crnlib/crn_texture_conversion.cpp +++ b/crnlib/crn_texture_conversion.cpp @@ -174,7 +174,7 @@ bool convert_stats::print(bool psnr_metrics, bool mip_stats, bool grayscale_samp if (!bCSVStatsFileExists) fprintf(pFile, "name,width,height,miplevels,rgb_rms,luma_rms,effective_output_size,effective_bitrate\n"); dynamic_string filename; - file_utils::split_path(m_src_filename.get_ptr(), NULL, NULL, &filename, NULL); + file_utils::split_path(m_src_filename.get_ptr(), nullptr, nullptr, &filename, nullptr); uint64 effective_output_size = m_output_comp_file_size ? m_output_comp_file_size : m_output_file_size; float bitrate = (effective_output_size * 8.0f) / m_total_output_pixels; @@ -199,7 +199,7 @@ void convert_stats::clear() { m_dst_filename.clear(); m_dst_file_type = texture_file_types::cFormatInvalid; - m_pInput_tex = NULL; + m_pInput_tex = nullptr; m_output_tex.clear(); m_input_file_size = 0; @@ -469,14 +469,14 @@ static bool convert_and_write_normal_texture(mipmapped_texture& work_tex, conver console::info("Writing texture face %u mip level %u to file %s", f, l, filename.get_ptr()); - if (!new_tex.write_to_file(filename.get_ptr(), params.m_dst_file_type, NULL, NULL, NULL)) + if (!new_tex.write_to_file(filename.get_ptr(), params.m_dst_file_type, nullptr, nullptr, nullptr)) return convert_error(params, "Failed writing output file!"); } } } else { console::message("Writing texture to file: \"%s\"", params.m_dst_filename.get_ptr()); - if (!work_tex.write_to_file(params.m_dst_filename.get_ptr(), params.m_dst_file_type, NULL, NULL, NULL)) + if (!work_tex.write_to_file(params.m_dst_filename.get_ptr(), params.m_dst_file_type, nullptr, nullptr, nullptr)) return convert_error(params, "Failed writing output file!"); if (!params.m_no_stats) { @@ -505,7 +505,7 @@ bool process(convert_params& params, convert_stats& stats) { if (params.m_pIntermediate_texture) { crnlib_delete(params.m_pIntermediate_texture); - params.m_pIntermediate_texture = NULL; + params.m_pIntermediate_texture = nullptr; } params.m_pIntermediate_texture = crnlib_new(*params.m_pInput_texture); diff --git a/crnlib/crn_texture_conversion.h b/crnlib/crn_texture_conversion.h index 025ed7e..16d82de 100644 --- a/crnlib/crn_texture_conversion.h +++ b/crnlib/crn_texture_conversion.h @@ -20,7 +20,7 @@ class CRN_EXPORT convert_stats { texture_file_types::format dst_file_type, bool lzma_stats); - bool print(bool psnr_metrics, bool mip_stats, bool grayscale_sampling, const char* pCSVStatsFile = NULL) const; + bool print(bool psnr_metrics, bool mip_stats, bool grayscale_sampling, const char* pCSVStatsFile = nullptr) const; void clear(); @@ -43,13 +43,13 @@ class CRN_EXPORT convert_stats { class CRN_EXPORT convert_params { public: convert_params() - : m_pInput_texture(NULL), + : m_pInput_texture(nullptr), m_texture_type(cTextureTypeUnknown), m_dst_file_type(texture_file_types::cFormatInvalid), m_dst_format(PIXEL_FMT_INVALID), - m_pProgress_func(NULL), - m_pProgress_user_data(NULL), - m_pIntermediate_texture(NULL), + m_pProgress_func(nullptr), + m_pProgress_user_data(nullptr), + m_pIntermediate_texture(nullptr), m_y_flip(false), m_unflip(false), m_always_use_source_pixel_format(false), diff --git a/crnlib/crn_texture_file_types.cpp b/crnlib/crn_texture_file_types.cpp index d1d214a..60b7404 100644 --- a/crnlib/crn_texture_file_types.cpp +++ b/crnlib/crn_texture_file_types.cpp @@ -12,7 +12,7 @@ namespace crnlib CRNLIB_ASSERT(fmt < cNumFileFormats); if (fmt >= cNumFileFormats) { - return NULL; + return nullptr; } static const char* extensions[cNumFileFormats] = @@ -43,7 +43,7 @@ namespace crnlib texture_file_types::format texture_file_types::determine_file_format(const char* pFilename) { dynamic_string ext; - if (!file_utils::split_path(pFilename, NULL, NULL, NULL, &ext)) + if (!file_utils::split_path(pFilename, nullptr, nullptr, nullptr, &ext)) { return cFormatInvalid; } diff --git a/crnlib/crn_threaded_clusterizer.h b/crnlib/crn_threaded_clusterizer.h index 46d4980..a9be100 100644 --- a/crnlib/crn_threaded_clusterizer.h +++ b/crnlib/crn_threaded_clusterizer.h @@ -12,8 +12,8 @@ class threaded_clusterizer { public: threaded_clusterizer(task_pool& tp) : m_pTask_pool(&tp), - m_pProgress_callback(NULL), - m_pProgress_callback_data(NULL), + m_pProgress_callback(nullptr), + m_pProgress_callback_data(nullptr), m_canceled(false) { } @@ -128,7 +128,7 @@ class threaded_clusterizer { struct create_clusters_task_state { create_clusters_task_state() - : m_pWeighted_vecs(NULL), m_pIndices(NULL), m_max_clusters(0) { + : m_pWeighted_vecs(nullptr), m_pIndices(nullptr), m_max_clusters(0) { } const weighted_vec_array* m_pWeighted_vecs; diff --git a/crnlib/crn_threaded_resampler.cpp b/crnlib/crn_threaded_resampler.cpp index 5257468..bc5fa81 100644 --- a/crnlib/crn_threaded_resampler.cpp +++ b/crnlib/crn_threaded_resampler.cpp @@ -8,9 +8,9 @@ namespace crnlib { threaded_resampler::threaded_resampler(task_pool& tp) : m_pTask_pool(&tp), - m_pParams(NULL), - m_pX_contribs(NULL), - m_pY_contribs(NULL), + m_pParams(nullptr), + m_pX_contribs(nullptr), + m_pY_contribs(nullptr), m_bytes_per_pixel(0) { } @@ -21,18 +21,18 @@ threaded_resampler::~threaded_resampler() { void threaded_resampler::free_contrib_lists() { if (m_pX_contribs) { crnlib_free(m_pX_contribs->p); - m_pX_contribs->p = NULL; + m_pX_contribs->p = nullptr; crnlib_free(m_pX_contribs); - m_pX_contribs = NULL; + m_pX_contribs = nullptr; } if (m_pY_contribs) { crnlib_free(m_pY_contribs->p); - m_pY_contribs->p = NULL; + m_pY_contribs->p = nullptr; crnlib_free(m_pY_contribs); - m_pY_contribs = NULL; + m_pY_contribs = nullptr; } } @@ -266,11 +266,11 @@ bool threaded_resampler::resample(const params& p) { return false; for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) - m_pTask_pool->queue_object_task(this, &threaded_resampler::resample_x_task, i, NULL); + m_pTask_pool->queue_object_task(this, &threaded_resampler::resample_x_task, i, nullptr); m_pTask_pool->join(); for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) - m_pTask_pool->queue_object_task(this, &threaded_resampler::resample_y_task, i, NULL); + m_pTask_pool->queue_object_task(this, &threaded_resampler::resample_y_task, i, nullptr); m_pTask_pool->join(); m_tmp_img.clear(); diff --git a/crnlib/crn_threading_null.h b/crnlib/crn_threading_null.h index f5f2fe8..730b896 100644 --- a/crnlib/crn_threading_null.h +++ b/crnlib/crn_threading_null.h @@ -127,7 +127,7 @@ namespace crnlib { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(semaphore); public: - inline semaphore(long initialCount = 0, long maximumCount = 1, const char* pName = NULL) + inline semaphore(long initialCount = 0, long maximumCount = 1, const char* pName = nullptr) { initialCount, maximumCount, pName; } @@ -136,7 +136,7 @@ namespace crnlib { } - inline void release(long releaseCount = 1, long* pPreviousCount = NULL) + inline void release(long releaseCount = 1, long* pPreviousCount = nullptr) { releaseCount, pPreviousCount; } @@ -183,7 +183,7 @@ namespace crnlib // C-style task callback typedef void (*task_callback_func)(uint64 data, void* pData_ptr); - inline bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = NULL) + inline bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = nullptr) { pFunc(data, pData_ptr); return true; @@ -195,21 +195,21 @@ namespace crnlib }; // It's the caller's responsibility to delete pObj within the execute_task() method, if needed! - inline bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = NULL) + inline bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = nullptr) { pObj->execute_task(data, pData_ptr); return true; } template - inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = NULL) + inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = nullptr) { (pObject->*pObject_method)(data, pData_ptr); return true; } template - inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = NULL) + inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = nullptr) { for (uint i = 0; i < num_tasks; i++) { diff --git a/crnlib/crn_threading_pthreads.cpp b/crnlib/crn_threading_pthreads.cpp index 5dc4325..7be12e3 100644 --- a/crnlib/crn_threading_pthreads.cpp +++ b/crnlib/crn_threading_pthreads.cpp @@ -61,7 +61,7 @@ void crn_sleep(unsigned int milliseconds) { mutex::mutex(unsigned int spin_count) { spin_count; - if (pthread_mutex_init(&m_mutex, NULL)) + if (pthread_mutex_init(&m_mutex, nullptr)) crnlib_fail("mutex::mutex: pthread_mutex_init() failed", __FILE__, __LINE__); #ifdef CRNLIB_BUILD_DEBUG @@ -289,7 +289,7 @@ bool task_pool::init(uint num_threads) { m_num_threads = 0; while (m_num_threads < num_threads) { - int status = pthread_create(&m_threads[m_num_threads], NULL, thread_func, this); + int status = pthread_create(&m_threads[m_num_threads], nullptr, thread_func, this); if (status) { succeeded = false; break; @@ -315,7 +315,7 @@ void task_pool::deinit() { m_tasks_available.release(m_num_threads); for (uint i = 0; i < m_num_threads; i++) - pthread_join(m_threads[i], NULL); + pthread_join(m_threads[i], nullptr); m_num_threads = 0; @@ -413,7 +413,7 @@ void* task_pool::thread_func(void* pContext) { } } - return NULL; + return nullptr; } } // namespace crnlib diff --git a/crnlib/crn_threading_pthreads.h b/crnlib/crn_threading_pthreads.h index 64ff2d4..b0cfdeb 100644 --- a/crnlib/crn_threading_pthreads.h +++ b/crnlib/crn_threading_pthreads.h @@ -71,7 +71,7 @@ class CRN_EXPORT semaphore { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(semaphore); public: - semaphore(long initialCount = 0, long maximumCount = 1, const char* pName = NULL); + semaphore(long initialCount = 0, long maximumCount = 1, const char* pName = nullptr); ~semaphore(); void release(long releaseCount = 1); @@ -173,7 +173,7 @@ class CRN_EXPORT task_pool { // C-style task callback typedef void (*task_callback_func)(uint64 data, void* pData_ptr); - bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = NULL); + bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = nullptr); class executable_task { public: @@ -181,20 +181,20 @@ class CRN_EXPORT task_pool { }; // It's the caller's responsibility to delete pObj within the execute_task() method, if needed! - bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = NULL); + bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = nullptr); template - inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = NULL); + inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = nullptr); template - inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = NULL); + inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = nullptr); void join(); private: struct task { inline task() - : m_data(0), m_pData_ptr(NULL), m_pObj(NULL), m_flags(0) {} + : m_data(0), m_pData_ptr(nullptr), m_pObj(nullptr), m_flags(0) {} uint64 m_data; void* m_pData_ptr; @@ -240,8 +240,8 @@ template class object_task : public task_pool::executable_task { public: object_task(uint flags = cObjectTaskFlagDefault) - : m_pObject(NULL), - m_pMethod(NULL), + : m_pObject(nullptr), + m_pMethod(nullptr), m_flags(flags) { } diff --git a/crnlib/crn_threading_win32.cpp b/crnlib/crn_threading_win32.cpp index 5dbb665..3957ef0 100644 --- a/crnlib/crn_threading_win32.cpp +++ b/crnlib/crn_threading_win32.cpp @@ -154,8 +154,8 @@ void spinlock::unlock() { } semaphore::semaphore(int32 initialCount, int32 maximumCount, const char* pName) { - m_handle = CreateSemaphoreA(NULL, initialCount, maximumCount, pName); - if (NULL == m_handle) { + m_handle = CreateSemaphoreA(nullptr, initialCount, maximumCount, pName); + if (nullptr == m_handle) { CRNLIB_FAIL("semaphore: CreateSemaphore() failed"); } } @@ -163,7 +163,7 @@ semaphore::semaphore(int32 initialCount, int32 maximumCount, const char* pName) semaphore::~semaphore() { if (m_handle) { CloseHandle(m_handle); - m_handle = NULL; + m_handle = nullptr; } } @@ -229,7 +229,7 @@ bool task_pool::init(uint num_threads) { m_num_threads = 0; while (m_num_threads < num_threads) { - m_threads[m_num_threads] = (HANDLE)_beginthreadex(NULL, 32768, thread_func, this, 0, NULL); + m_threads[m_num_threads] = (HANDLE)_beginthreadex(nullptr, 32768, thread_func, this, 0, nullptr); CRNLIB_ASSERT(m_threads[m_num_threads] != 0); if (!m_threads[m_num_threads]) { @@ -268,7 +268,7 @@ void task_pool::deinit() { } CloseHandle(m_threads[i]); - m_threads[i] = NULL; + m_threads[i] = nullptr; } } diff --git a/crnlib/crn_threading_win32.h b/crnlib/crn_threading_win32.h index ceca614..2b4b4f2 100644 --- a/crnlib/crn_threading_win32.h +++ b/crnlib/crn_threading_win32.h @@ -90,14 +90,14 @@ class CRN_EXPORT semaphore { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(semaphore); public: - semaphore(int32 initialCount = 0, int32 maximumCount = 1, const char* pName = NULL); + semaphore(int32 initialCount = 0, int32 maximumCount = 1, const char* pName = nullptr); ~semaphore(); inline HANDLE get_handle(void) const { return m_handle; } - void release(int32 releaseCount = 1, int32* pPreviousCount = NULL); - bool try_release(int32 releaseCount = 1, int32* pPreviousCount = NULL); + void release(int32 releaseCount = 1, int32* pPreviousCount = nullptr); + bool try_release(int32 releaseCount = 1, int32* pPreviousCount = nullptr); bool wait(uint32 milliseconds = cUINT32_MAX); @@ -194,7 +194,7 @@ class tsstack { bool m_use_freelist; inline node* alloc_node() { - node* pNode = m_use_freelist ? (node*)InterlockedPopEntrySList(&m_freelist_head) : NULL; + node* pNode = m_use_freelist ? (node*)InterlockedPopEntrySList(&m_freelist_head) : nullptr; if (!pNode) pNode = (node*)crnlib_malloc(sizeof(node)); @@ -228,7 +228,7 @@ class CRN_EXPORT task_pool { // C-style task callback typedef void (*task_callback_func)(uint64 data, void* pData_ptr); - bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = NULL); + bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = nullptr); class executable_task { public: @@ -236,13 +236,13 @@ class CRN_EXPORT task_pool { }; // It's the caller's responsibility to delete pObj within the execute_task() method, if needed! - bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = NULL); + bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = nullptr); template - inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = NULL); + inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = nullptr); template - inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = NULL); + inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = nullptr); // Waits for all outstanding tasks (if any) to complete. // The calling thread will steal any outstanding tasks from worker threads, if possible. @@ -250,7 +250,7 @@ class CRN_EXPORT task_pool { private: struct task { - //inline task() : m_data(0), m_pData_ptr(NULL), m_pObj(NULL), m_flags(0) { } + //inline task() : m_data(0), m_pData_ptr(nullptr), m_pObj(nullptr), m_flags(0) { } uint64 m_data; void* m_pData_ptr; @@ -297,8 +297,8 @@ template class object_task : public task_pool::executable_task { public: object_task(uint flags = cObjectTaskFlagDefault) - : m_pObject(NULL), - m_pMethod(NULL), + : m_pObject(nullptr), + m_pMethod(nullptr), m_flags(flags) { } diff --git a/crnlib/crn_timer.cpp b/crnlib/crn_timer.cpp index 5b64a4d..0941828 100644 --- a/crnlib/crn_timer.cpp +++ b/crnlib/crn_timer.cpp @@ -32,7 +32,7 @@ namespace crnlib inline void query_counter(timer_ticks* pTicks) { struct timeval cur_time; - gettimeofday(&cur_time, NULL); + gettimeofday(&cur_time, nullptr); *pTicks = static_cast(cur_time.tv_sec) * 1000000ULL + static_cast(cur_time.tv_usec); } inline void query_counter_frequency(timer_ticks* pTicks) diff --git a/crnlib/crn_value.cpp b/crnlib/crn_value.cpp index 92c21ce..fb1aaea 100644 --- a/crnlib/crn_value.cpp +++ b/crnlib/crn_value.cpp @@ -16,6 +16,6 @@ namespace crnlib "vec3f", "vec3i", - NULL, + nullptr, }; } // namespace crnlib diff --git a/crnlib/crn_value.h b/crnlib/crn_value.h index 189c645..dc8a3c2 100644 --- a/crnlib/crn_value.h +++ b/crnlib/crn_value.h @@ -236,7 +236,7 @@ namespace crnlib } } - if (strchr(p, ',') != NULL) + if (strchr(p, ',') != nullptr) { float fx = 0, fy = 0, fz = 0; #ifdef _MSC_VER @@ -1049,21 +1049,21 @@ namespace crnlib if (m_type == cDTVec3F) { crnlib_delete(m_pVec3F); - m_pVec3F = NULL; + m_pVec3F = nullptr; m_type = cDTInvalid; } else if (m_type == cDTVec3I) { crnlib_delete(m_pVec3I); - m_pVec3I = NULL; + m_pVec3I = nullptr; m_type = cDTInvalid; } else if (m_type == cDTString) { crnlib_delete(m_pStr); - m_pStr = NULL; + m_pStr = nullptr; m_type = cDTInvalid; } diff --git a/crnlib/crn_vec.h b/crnlib/crn_vec.h index 05d59b9..088c667 100644 --- a/crnlib/crn_vec.h +++ b/crnlib/crn_vec.h @@ -650,7 +650,7 @@ namespace crnlib return result; } - inline double normalize(const vec* pDefaultVec = NULL) { + inline double normalize(const vec* pDefaultVec = nullptr) { double n = m_s[0] * m_s[0]; for (uint i = 1; i < N; i++) { @@ -668,7 +668,7 @@ namespace crnlib return n; } - inline double normalize3(const vec* pDefaultVec = NULL) + inline double normalize3(const vec* pDefaultVec = nullptr) { CRNLIB_ASSUME(N >= 3); @@ -685,26 +685,26 @@ namespace crnlib return n; } - inline vec& normalize_in_place(const vec* pDefaultVec = NULL) + inline vec& normalize_in_place(const vec* pDefaultVec = nullptr) { normalize(pDefaultVec); return *this; } - inline vec& normalize3_in_place(const vec* pDefaultVec = NULL) + inline vec& normalize3_in_place(const vec* pDefaultVec = nullptr) { normalize3(pDefaultVec); return *this; } - inline vec get_normalized(const vec* pDefaultVec = NULL) const + inline vec get_normalized(const vec* pDefaultVec = nullptr) const { vec result(*this); result.normalize(pDefaultVec); return result; } - inline vec get_normalized3(const vec* pDefaultVec = NULL) const + inline vec get_normalized3(const vec* pDefaultVec = nullptr) const { vec result(*this); result.normalize3(pDefaultVec); diff --git a/crnlib/crn_vector.h b/crnlib/crn_vector.h index 4ff0e78..9de2336 100644 --- a/crnlib/crn_vector.h +++ b/crnlib/crn_vector.h @@ -30,14 +30,14 @@ namespace crnlib typedef const T* const_pointer; inline vector(): - m_p(NULL), + m_p(nullptr), m_size(0), m_capacity(0) { } inline vector(uint n, const T& init): - m_p(NULL), + m_p(nullptr), m_size(0), m_capacity(0) { @@ -47,7 +47,7 @@ namespace crnlib } inline vector(const vector& other): - m_p(NULL), + m_p(nullptr), m_size(0), m_capacity(0) { @@ -71,7 +71,7 @@ namespace crnlib } inline explicit vector(uint size): - m_p(NULL), + m_p(nullptr), m_size(0), m_capacity(0) { @@ -221,7 +221,7 @@ namespace crnlib { scalar_type::destruct_array(m_p, m_size); crnlib_free(m_p); - m_p = NULL; + m_p = nullptr; m_size = 0; m_capacity = 0; } @@ -232,7 +232,7 @@ namespace crnlib if (m_p) { crnlib_free(m_p); - m_p = NULL; + m_p = nullptr; m_size = 0; m_capacity = 0; } @@ -335,7 +335,7 @@ namespace crnlib uint cur_size = m_size; if (!try_resize(cur_size + i, true)) { - return NULL; + return nullptr; } return get_ptr() + cur_size; } @@ -770,7 +770,7 @@ namespace crnlib inline void* assume_ownership() { T* p = m_p; - m_p = NULL; + m_p = nullptr; m_size = 0; m_capacity = 0; return p; @@ -849,7 +849,7 @@ namespace crnlib { return reinterpret_cast(this)->increase_capacity( min_new_capacity, grow_hint, sizeof(T), - (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(T) || (is_vector::cFlag)) ? NULL : object_mover, nofail); + (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(T) || (is_vector::cFlag)) ? nullptr : object_mover, nofail); } }; diff --git a/crnlib/crnlib.cpp b/crnlib/crnlib.cpp index d7d2593..0a6f3bc 100644 --- a/crnlib/crnlib.cpp +++ b/crnlib/crnlib.cpp @@ -34,7 +34,7 @@ namespace crnlib crnlib_enable_fail_exceptions(true); // Redirect crn_decomp.h's memory allocations into crnlib, which may be further redirected by the outside caller. - crnd::crnd_set_memory_callbacks(realloc_func, msize_func, NULL); + crnd::crnd_set_memory_callbacks(realloc_func, msize_func, nullptr); ryg_dxt::sInitDXT(); @@ -206,13 +206,13 @@ void* crn_compress(const crn_comp_params& comp_params, crn_uint32& compressed_si if (!comp_params.check()) { - return NULL; + return nullptr; } crnlib::vector crn_file_data; if (!create_compressed_texture(comp_params, crn_file_data, pActual_quality_level, pActual_bitrate)) { - return NULL; + return nullptr; } compressed_size = crn_file_data.size(); @@ -233,13 +233,13 @@ void* crn_compress(const crn_comp_params& comp_params, const crn_mipmap_params& if ((!comp_params.check()) || (!mip_params.check())) { - return NULL; + return nullptr; } crnlib::vector crn_file_data; if (!create_compressed_texture(comp_params, mip_params, crn_file_data, pActual_quality_level, pActual_bitrate)) { - return NULL; + return nullptr; } compressed_size = crn_file_data.size(); @@ -252,7 +252,7 @@ void* crn_decompress_crn_to_dds(const void* pCRN_file_data, crn_uint32& file_siz if (!tex.read_crn_from_memory(pCRN_file_data, file_size, "from_memory.crn")) { file_size = 0; - return NULL; + return nullptr; } file_size = 0; @@ -262,7 +262,7 @@ void* crn_decompress_crn_to_dds(const void* pCRN_file_data, crn_uint32& file_siz data_stream_serializer serializer(dds_file_data); if (!tex.write_dds(serializer)) { - return NULL; + return nullptr; } dds_file_data.reserve(0); @@ -381,7 +381,7 @@ crn_block_compressor_context_t crn_create_block_compressor(const crn_comp_params if (!pComp->init(params)) { crnlib_delete(pComp); - return NULL; + return nullptr; } return pComp; } diff --git a/crunch/corpus_test.cpp b/crunch/corpus_test.cpp index 49f5660..d3304db 100644 --- a/crunch/corpus_test.cpp +++ b/crunch/corpus_test.cpp @@ -64,7 +64,7 @@ namespace crn indices[0].resize(worse_blocks.size()); indices[1].resize(worse_blocks.size()); - uint* pSorted_indices = NULL; + uint* pSorted_indices = nullptr; if (worse_blocks.size()) { pSorted_indices = indirect_radix_sort(worse_blocks.size(), &indices[0][0], &indices[1][0], &delta_psnr[0], 0, sizeof(float), true); diff --git a/crunch/crunch.cpp b/crunch/crunch.cpp index 32ac164..73cdfe2 100644 --- a/crunch/crunch.cpp +++ b/crunch/crunch.cpp @@ -347,7 +347,7 @@ class crunch if (m_log_stream.is_opened()) { - console::set_log_stream(NULL); + console::set_log_stream(nullptr); m_log_stream.close(); } @@ -624,7 +624,7 @@ class crunch if ((!compare_mode) && (!info_mode)) { dynamic_string out_drive, out_path; - file_utils::split_path(out_filename.get_ptr(), &out_drive, &out_path, NULL, NULL); + file_utils::split_path(out_filename.get_ptr(), &out_drive, &out_path, nullptr, nullptr); out_drive += out_path; file_utils::create_path(out_drive.get_ptr()); } @@ -1181,9 +1181,9 @@ class crunch void print_stats(texture_conversion::convert_stats& stats, bool force_image_stats = false) { dynamic_string csv_filename; - const char* pCSVStatsFilename = m_params.get_value_as_string("csvfile", 0, csv_filename) ? csv_filename.get_ptr() : NULL; + const char* pCSVStatsFilename = m_params.get_value_as_string("csvfile", 0, csv_filename) ? csv_filename.get_ptr() : nullptr; - bool image_stats = force_image_stats || m_params.get_value_as_bool("imagestats") || m_params.get_value_as_bool("mipstats") || (pCSVStatsFilename != NULL); + bool image_stats = force_image_stats || m_params.get_value_as_bool("imagestats") || m_params.get_value_as_bool("mipstats") || (pCSVStatsFilename != nullptr); bool mip_stats = m_params.get_value_as_bool("mipstats"); bool grayscale_sampling = m_params.get_value_as_bool("grayscalesampling"); if (!stats.print(image_stats, mip_stats, grayscale_sampling, pCSVStatsFilename)) diff --git a/examples/example1/example1.cpp b/examples/example1/example1.cpp index 8cc3bc6..9b166f4 100644 --- a/examples/example1/example1.cpp +++ b/examples/example1/example1.cpp @@ -80,10 +80,10 @@ static int error(const char* pMsg, ...) { static crn_uint8* read_file_into_buffer(const char* pFilename, crn_uint32& size) { size = 0; - FILE* pFile = NULL; + FILE* pFile = nullptr; fopen_s(&pFile, pFilename, "rb"); if (!pFile) - return NULL; + return nullptr; fseek(pFile, 0, SEEK_END); size = ftell(pFile); @@ -94,7 +94,7 @@ static crn_uint8* read_file_into_buffer(const char* pFilename, crn_uint32& size) fclose(pFile); free(pSrc_file_data); size = 0; - return NULL; + return nullptr; } fclose(pFile); @@ -148,8 +148,8 @@ static bool print_dds_info(const void* pData, crn_uint32 data_size) { const char* pDDSDFlagNames[] = { "DDSD_CAPS", "DDSD_HEIGHT", "DDSD_WIDTH", "DDSD_PITCH", - NULL, "DDSD_BACKBUFFERCOUNT", "DDSD_ZBUFFERBITDEPTH", "DDSD_ALPHABITDEPTH", - NULL, NULL, NULL, "DDSD_LPSURFACE", + nullptr, "DDSD_BACKBUFFERCOUNT", "DDSD_ZBUFFERBITDEPTH", "DDSD_ALPHABITDEPTH", + nullptr, nullptr, nullptr, "DDSD_LPSURFACE", "DDSD_PIXELFORMAT", "DDSD_CKDESTOVERLAY", "DDSD_CKDESTBLT", "DDSD_CKSRCOVERLAY", "DDSD_CKSRCBLT", "DDSD_MIPMAPCOUNT", "DDSD_REFRESHRATE", "DDSD_LINEARSIZE", "DDSD_TEXTURESTAGE", "DDSD_FVF", "DDSD_SRCVBHANDLE", "DDSD_DEPTH"}; @@ -207,11 +207,11 @@ static bool print_dds_info(const void* pData, crn_uint32 data_size) { printf("ddsCaps.dwCaps2: 0x%08X ", desc.ddsCaps.dwCaps2); const char* pDDCAPS2FlagNames[] = { - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, "DDSCAPS2_CUBEMAP", "DDSCAPS2_CUBEMAP_POSITIVEX", "DDSCAPS2_CUBEMAP_NEGATIVEX", + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, "DDSCAPS2_CUBEMAP", "DDSCAPS2_CUBEMAP_POSITIVEX", "DDSCAPS2_CUBEMAP_NEGATIVEX", "DDSCAPS2_CUBEMAP_POSITIVEY", "DDSCAPS2_CUBEMAP_NEGATIVEY", "DDSCAPS2_CUBEMAP_POSITIVEZ", "DDSCAPS2_CUBEMAP_NEGATIVEZ", - NULL, NULL, NULL, NULL, - NULL, "DDSCAPS2_VOLUME"}; + nullptr, nullptr, nullptr, nullptr, + nullptr, "DDSCAPS2_VOLUME"}; for (int i = 0; i < sizeof(pDDCAPS2FlagNames) / sizeof(pDDCAPS2FlagNames[0]); i++) if ((pDDCAPS2FlagNames[i]) && (desc.ddsCaps.dwCaps2 & (1 << i))) printf("%s ", pDDCAPS2FlagNames[i]); @@ -353,7 +353,7 @@ int main(int argc, char* argv[]) { // If the user has explicitly specified an output file, check the output file's extension to ensure we write the expected format. if (out_filename[0]) { char out_fname_buf[_MAX_FNAME], out_ext_buf[_MAX_EXT]; - _splitpath_s(out_filename, NULL, 0, NULL, 0, out_fname_buf, _MAX_FNAME, out_ext_buf, _MAX_EXT); + _splitpath_s(out_filename, nullptr, 0, nullptr, 0, out_fname_buf, _MAX_FNAME, out_ext_buf, _MAX_EXT); if (!_stricmp(out_ext_buf, ".crn")) output_crn = true; else if (!_stricmp(out_ext_buf, ".dds")) diff --git a/examples/example2/example2.cpp b/examples/example2/example2.cpp index 83d46ae..50d887b 100644 --- a/examples/example2/example2.cpp +++ b/examples/example2/example2.cpp @@ -45,10 +45,10 @@ static int error(const char* pMsg, ...) { static crn_uint8* read_file_into_buffer(const char* pFilename, crn_uint32& size) { size = 0; - FILE* pFile = NULL; + FILE* pFile = nullptr; fopen_s(&pFile, pFilename, "rb"); if (!pFile) - return NULL; + return nullptr; fseek(pFile, 0, SEEK_END); size = ftell(pFile); @@ -59,7 +59,7 @@ static crn_uint8* read_file_into_buffer(const char* pFilename, crn_uint32& size) fclose(pFile); free(pSrc_file_data); size = 0; - return NULL; + return nullptr; } fclose(pFile); diff --git a/examples/example2/timer.cpp b/examples/example2/timer.cpp index 0a1a592..81d187c 100644 --- a/examples/example2/timer.cpp +++ b/examples/example2/timer.cpp @@ -28,7 +28,7 @@ inline void query_counter_frequency(timer_ticks* pTicks) { #include inline void query_counter(timer_ticks* pTicks) { struct timeval cur_time; - gettimeofday(&cur_time, NULL); + gettimeofday(&cur_time, nullptr); *pTicks = static_cast(cur_time.tv_sec) * 1000000ULL + static_cast(cur_time.tv_usec); } inline void query_counter_frequency(timer_ticks* pTicks) { diff --git a/examples/example3/example3.cpp b/examples/example3/example3.cpp index 864a08c..5e0fe60 100644 --- a/examples/example3/example3.cpp +++ b/examples/example3/example3.cpp @@ -217,7 +217,7 @@ int main(int argc, char* argv[]) { // Free the block compressor. crn_free_block_compressor(pContext); - pContext = NULL; + pContext = nullptr; // Now create the DDS file. char dst_filename[FILENAME_MAX]; From 4e2598cf88ac7eb74268021008c32eeaed458d3f Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Sun, 21 Feb 2021 15:03:44 -0500 Subject: [PATCH 14/18] Format some files --- .clang-format | 88 + .clang-tidy | 1 + 3rdparty/lzma/Linux/7zBuf.cpp | 66 +- 3rdparty/lzma/Linux/7zBuf.h | 68 +- 3rdparty/lzma/Linux/7zBuf2.cpp | 86 +- 3rdparty/lzma/Linux/7zCrc.cpp | 66 +- 3rdparty/lzma/Linux/7zCrc.h | 54 +- 3rdparty/lzma/Linux/7zFile.cpp | 514 +- 3rdparty/lzma/Linux/7zFile.h | 146 +- 3rdparty/lzma/Linux/7zStream.cpp | 296 +- 3rdparty/lzma/Linux/7zVersion.h | 14 +- 3rdparty/lzma/Linux/Alloc.cpp | 242 +- 3rdparty/lzma/Linux/Alloc.h | 70 +- 3rdparty/lzma/Linux/Bcj2.cpp | 310 +- 3rdparty/lzma/Linux/Bcj2.h | 66 +- 3rdparty/lzma/Linux/Bra.cpp | 246 +- 3rdparty/lzma/Linux/Bra.h | 128 +- 3rdparty/lzma/Linux/Bra86.cpp | 156 +- 3rdparty/lzma/Linux/BraIA64.cpp | 128 +- 3rdparty/lzma/Linux/CpuArch.h | 144 +- 3rdparty/lzma/Linux/LzFind.cpp | 1364 +++--- 3rdparty/lzma/Linux/LzFind.h | 216 +- 3rdparty/lzma/Linux/LzFindMt.cpp | 1510 +++--- 3rdparty/lzma/Linux/LzFindMt.h | 196 +- 3rdparty/lzma/Linux/LzHash.h | 126 +- 3rdparty/lzma/Linux/LzmaDec.cpp | 1892 ++++---- 3rdparty/lzma/Linux/LzmaDec.h | 442 +- 3rdparty/lzma/Linux/LzmaEnc.cpp | 4168 ++++++++-------- 3rdparty/lzma/Linux/LzmaEnc.h | 146 +- 3rdparty/lzma/Linux/LzmaLib.cpp | 102 +- 3rdparty/lzma/Linux/LzmaLib.h | 290 +- 3rdparty/lzma/Linux/LzmaTypes.h | 428 +- 3rdparty/lzma/Linux/Threads.cpp | 244 +- 3rdparty/lzma/Linux/Threads.h | 130 +- 3rdparty/miniz/LICENSE | 44 +- crnlib/crn_arealist.cpp | 66 +- crnlib/crn_arealist.h | 28 +- crnlib/crn_assert.cpp | 24 +- crnlib/crn_assert.h | 84 +- crnlib/crn_atomics.h | 338 +- crnlib/crn_buffer_stream.h | 52 +- crnlib/crn_cfile_stream.h | 36 +- crnlib/crn_checksum.cpp | 26 +- crnlib/crn_checksum.h | 26 +- crnlib/crn_clusterizer.h | 46 +- crnlib/crn_color.h | 107 +- crnlib/crn_colorized_console.cpp | 26 +- crnlib/crn_colorized_console.h | 27 +- crnlib/crn_command_line_params.cpp | 26 +- crnlib/crn_command_line_params.h | 33 +- crnlib/crn_comp.cpp | 2750 ++++++----- crnlib/crn_comp.h | 30 +- crnlib/crn_console.cpp | 26 +- crnlib/crn_console.h | 92 +- crnlib/crn_core.cpp | 25 +- crnlib/crn_core.h | 46 +- crnlib/crn_darwin_pthreads.cpp | 23 + crnlib/crn_darwin_pthreads.h | 23 + crnlib/crn_data_stream.cpp | 30 +- crnlib/crn_data_stream.h | 27 +- crnlib/crn_data_stream_serializer.h | 64 +- crnlib/crn_dds_comp.cpp | 31 +- crnlib/crn_dds_comp.h | 27 +- crnlib/crn_decomp.cpp | 25 +- crnlib/crn_dxt.cpp | 34 +- crnlib/crn_dxt.h | 61 +- crnlib/crn_dxt1.cpp | 3740 +++++++------- crnlib/crn_dxt1.h | 54 +- crnlib/crn_dxt5a.cpp | 34 +- crnlib/crn_dxt5a.h | 31 +- crnlib/crn_dxt_endpoint_refiner.cpp | 44 +- crnlib/crn_dxt_endpoint_refiner.h | 31 +- crnlib/crn_dxt_fast.cpp | 54 +- crnlib/crn_dxt_fast.h | 28 +- crnlib/crn_dxt_hc.cpp | 2987 +++++++----- crnlib/crn_dxt_hc.h | 57 +- crnlib/crn_dxt_hc_common.cpp | 66 +- crnlib/crn_dxt_hc_common.h | 26 +- crnlib/crn_dxt_image.cpp | 3221 ++++++------ crnlib/crn_dxt_image.h | 534 +- crnlib/crn_dynamic_stream.h | 35 +- crnlib/crn_dynamic_string.cpp | 45 +- crnlib/crn_dynamic_string.h | 45 +- crnlib/crn_etc.cpp | 3331 +++++++------ crnlib/crn_etc.h | 1177 ++--- crnlib/crn_file_utils.cpp | 30 +- crnlib/crn_file_utils.h | 29 +- crnlib/crn_find_files.cpp | 33 +- crnlib/crn_find_files.h | 33 +- crnlib/crn_freeimage_image_utils.h | 25 +- crnlib/crn_hash.cpp | 26 +- crnlib/crn_hash.h | 26 +- crnlib/crn_hash_map.cpp | 30 +- crnlib/crn_hash_map.h | 1640 ++++--- crnlib/crn_helpers.h | 77 +- crnlib/crn_huffman_codes.cpp | 37 +- crnlib/crn_huffman_codes.h | 26 +- crnlib/crn_image.h | 1394 +++--- crnlib/crn_image_utils.cpp | 2692 ++++++----- crnlib/crn_image_utils.h | 39 +- crnlib/crn_intersect.h | 34 +- crnlib/crn_ktx_texture.cpp | 1713 ++++--- crnlib/crn_ktx_texture.h | 689 +-- crnlib/crn_lzma_codec.cpp | 32 +- crnlib/crn_lzma_codec.h | 51 +- crnlib/crn_math.cpp | 34 +- crnlib/crn_math.h | 64 +- crnlib/crn_matrix.h | 56 +- crnlib/crn_mem.cpp | 471 +- crnlib/crn_mem.h | 113 +- crnlib/crn_mipmapped_texture.cpp | 25 +- crnlib/crn_mipmapped_texture.h | 26 +- crnlib/crn_packed_uint.h | 30 +- crnlib/crn_pixel_format.cpp | 25 +- crnlib/crn_pixel_format.h | 26 +- crnlib/crn_platform.cpp | 24 +- crnlib/crn_platform.h | 25 +- crnlib/crn_prefix_coding.cpp | 24 +- crnlib/crn_prefix_coding.h | 24 +- crnlib/crn_qdxt1.cpp | 1455 +++--- crnlib/crn_qdxt1.h | 362 +- crnlib/crn_qdxt5.cpp | 1314 ++--- crnlib/crn_qdxt5.h | 356 +- crnlib/crn_radix_sort.h | 34 +- crnlib/crn_rand.cpp | 10 +- crnlib/crn_rand.h | 31 +- crnlib/crn_ray.h | 30 +- crnlib/crn_rect.h | 415 +- crnlib/crn_resample_filters.cpp | 50 +- crnlib/crn_resample_filters.h | 4 +- crnlib/crn_resampler.cpp | 1333 ++--- crnlib/crn_resampler.h | 293 +- crnlib/crn_sparse_array.h | 703 +-- crnlib/crn_sparse_bit_array.cpp | 905 ++-- crnlib/crn_sparse_bit_array.h | 325 +- crnlib/crn_strutils.cpp | 38 +- crnlib/crn_strutils.h | 54 +- crnlib/crn_symbol_codec.cpp | 3356 +++++++------ crnlib/crn_symbol_codec.h | 908 ++-- crnlib/crn_texture_comp.cpp | 35 +- crnlib/crn_texture_comp.h | 36 +- crnlib/crn_texture_conversion.cpp | 1461 +++--- crnlib/crn_texture_conversion.h | 228 +- crnlib/crn_texture_file_types.cpp | 38 +- crnlib/crn_texture_file_types.h | 27 +- crnlib/crn_threaded_clusterizer.h | 712 +-- crnlib/crn_threaded_resampler.cpp | 584 ++- crnlib/crn_threaded_resampler.h | 142 +- crnlib/crn_threading.h | 24 +- crnlib/crn_threading_null.h | 24 +- crnlib/crn_threading_pthreads.cpp | 718 +-- crnlib/crn_threading_pthreads.h | 592 ++- crnlib/crn_threading_win32.cpp | 763 +-- crnlib/crn_threading_win32.h | 755 +-- crnlib/crn_timer.cpp | 28 +- crnlib/crn_timer.h | 24 +- crnlib/crn_traits.h | 395 +- crnlib/crn_tree_clusterizer.h | 24 +- crnlib/crn_types.h | 25 +- crnlib/crn_utils.cpp | 30 +- crnlib/crn_utils.h | 25 +- crnlib/crn_value.cpp | 28 +- crnlib/crn_value.h | 24 +- crnlib/crn_vec.h | 29 +- crnlib/crn_vec_interval.h | 24 +- crnlib/crn_vector.cpp | 30 +- crnlib/crn_vector.h | 27 +- crnlib/crn_vector2d.h | 24 +- crnlib/crn_version.cpp | 24 +- crnlib/crn_winhdr.h | 23 + crnlib/crnlib.cpp | 920 ++-- crunch/corpus_gen.cpp | 36 +- crunch/corpus_gen.h | 25 +- crunch/corpus_test.cpp | 27 +- crunch/corpus_test.h | 25 +- crunch/crunch.cpp | 60 +- emscripten/crunch_lib.cpp | 44 +- examples/example1/example1.cpp | 996 ++-- examples/example2/example2.cpp | 468 +- examples/example2/timer.cpp | 217 +- examples/example2/timer.h | 100 +- examples/example3/example3.cpp | 469 +- inc/crn_decomp.h | 6981 +++++++++++++++------------ inc/crn_defs.h | 554 ++- inc/crnlib.h | 240 +- inc/dds_defs.h | 43 +- 186 files changed, 41655 insertions(+), 32058 deletions(-) create mode 100644 .clang-format create mode 100644 .clang-tidy diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..bb2440b --- /dev/null +++ b/.clang-format @@ -0,0 +1,88 @@ +--- +IndentWidth: 4 +ColumnLimit: 0 + +--- +Language: Cpp + +AccessModifierOffset: -4 + +AlignAfterOpenBracket: DontAlign +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: DontAlign +AlignOperands: false +AlignTrailingComments: false + +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine : false +AllowShortLoopsOnASingleLine: false + +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: true + +BinPackArguments: true +BinPackParameters: true + +BreakBeforeBraces: Custom +BraceWrapping: + AfterCaseLabel: true + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterStruct: true + AfterUnion: true + AfterExternBlock: true + BeforeCatch: true + BeforeElse: true + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true + +BreakBeforeInheritanceComma: false +BreakBeforeTernaryOperators: true + +CompactNamespaces: false +FixNamespaceComments: false +NamespaceIndentation: All + +BreakConstructorInitializers: AfterColon +ConstructorInitializerIndentWidth: 4 +ConstructorInitializerAllOnOneLineOrOnePerLine: true +Cpp11BracedListStyle: false + +IncludeBlocks: Preserve + +IndentCaseLabels: true +IndentWrappedFunctionNames: true + +KeepEmptyLinesAtTheStartOfBlocks: false +MaxEmptyLinesToKeep: 1 + +PointerAlignment: Left + +ReflowComments: true + +SortIncludes: false +SortUsingDeclarations: true + +SpaceAfterTemplateKeyword: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +# SpaceBeforeInheritanceColon: false +# SpaceBeforeCtorInitializerColon: false +# SpaceBeforeRangeBasedForLoopColon: true + +IndentCaseBlocks: false +IndentCaseLabels: false diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 0000000..0b6893b --- /dev/null +++ b/.clang-tidy @@ -0,0 +1 @@ +Checks: '-*,readability-braces-around-statements' diff --git a/3rdparty/lzma/Linux/7zBuf.cpp b/3rdparty/lzma/Linux/7zBuf.cpp index d136e38..3b1c667 100644 --- a/3rdparty/lzma/Linux/7zBuf.cpp +++ b/3rdparty/lzma/Linux/7zBuf.cpp @@ -1,34 +1,34 @@ -/* 7zBuf.c -- Byte Buffer -2008-03-28 -Igor Pavlov -Public domain */ - -#include "7zBuf.h" - -namespace crnlib { - -void Buf_Init(CBuf* p) { - p->data = 0; - p->size = 0; -} - -int Buf_Create(CBuf* p, size_t size, ISzAlloc* alloc) { - p->size = 0; - if (size == 0) { - p->data = 0; - return 1; - } - p->data = (Byte*)alloc->Alloc(alloc, size); - if (p->data != 0) { - p->size = size; - return 1; - } - return 0; -} - -void Buf_Free(CBuf* p, ISzAlloc* alloc) { - alloc->Free(alloc, p->data); - p->data = 0; - p->size = 0; -} +/* 7zBuf.c -- Byte Buffer +2008-03-28 +Igor Pavlov +Public domain */ + +#include "7zBuf.h" + +namespace crnlib { + +void Buf_Init(CBuf* p) { + p->data = 0; + p->size = 0; +} + +int Buf_Create(CBuf* p, size_t size, ISzAlloc* alloc) { + p->size = 0; + if (size == 0) { + p->data = 0; + return 1; + } + p->data = (Byte*)alloc->Alloc(alloc, size); + if (p->data != 0) { + p->size = size; + return 1; + } + return 0; +} + +void Buf_Free(CBuf* p, ISzAlloc* alloc) { + alloc->Free(alloc, p->data); + p->data = 0; + p->size = 0; +} } diff --git a/3rdparty/lzma/Linux/7zBuf.h b/3rdparty/lzma/Linux/7zBuf.h index 8f51df7..eb44051 100644 --- a/3rdparty/lzma/Linux/7zBuf.h +++ b/3rdparty/lzma/Linux/7zBuf.h @@ -1,34 +1,34 @@ -/* 7zBuf.h -- Byte Buffer -2008-10-04 : Igor Pavlov : Public domain */ - -#ifndef __7Z_BUF_H -#define __7Z_BUF_H - -#include "LzmaTypes.h" - -namespace crnlib { - -typedef struct -{ - Byte* data; - size_t size; -} CBuf; - -void Buf_Init(CBuf* p); -int Buf_Create(CBuf* p, size_t size, ISzAlloc* alloc); -void Buf_Free(CBuf* p, ISzAlloc* alloc); - -typedef struct -{ - Byte* data; - size_t size; - size_t pos; -} CDynBuf; - -void DynBuf_Construct(CDynBuf* p); -void DynBuf_SeekToBeg(CDynBuf* p); -int DynBuf_Write(CDynBuf* p, const Byte* buf, size_t size, ISzAlloc* alloc); -void DynBuf_Free(CDynBuf* p, ISzAlloc* alloc); -} - -#endif +/* 7zBuf.h -- Byte Buffer +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __7Z_BUF_H +#define __7Z_BUF_H + +#include "LzmaTypes.h" + +namespace crnlib { + +typedef struct +{ + Byte* data; + size_t size; +} CBuf; + +void Buf_Init(CBuf* p); +int Buf_Create(CBuf* p, size_t size, ISzAlloc* alloc); +void Buf_Free(CBuf* p, ISzAlloc* alloc); + +typedef struct +{ + Byte* data; + size_t size; + size_t pos; +} CDynBuf; + +void DynBuf_Construct(CDynBuf* p); +void DynBuf_SeekToBeg(CDynBuf* p); +int DynBuf_Write(CDynBuf* p, const Byte* buf, size_t size, ISzAlloc* alloc); +void DynBuf_Free(CDynBuf* p, ISzAlloc* alloc); +} + +#endif diff --git a/3rdparty/lzma/Linux/7zBuf2.cpp b/3rdparty/lzma/Linux/7zBuf2.cpp index e6b71a8..146f13c 100644 --- a/3rdparty/lzma/Linux/7zBuf2.cpp +++ b/3rdparty/lzma/Linux/7zBuf2.cpp @@ -1,44 +1,44 @@ -/* 7zBuf2.c -- Byte Buffer -2008-10-04 : Igor Pavlov : Public domain */ - +/* 7zBuf2.c -- Byte Buffer +2008-10-04 : Igor Pavlov : Public domain */ + #include - -#include "7zBuf.h" - -namespace crnlib { - -void DynBuf_Construct(CDynBuf* p) { - p->data = 0; - p->size = 0; - p->pos = 0; -} - -void DynBuf_SeekToBeg(CDynBuf* p) { - p->pos = 0; -} - -int DynBuf_Write(CDynBuf* p, const Byte* buf, size_t size, ISzAlloc* alloc) { - if (size > p->size - p->pos) { - size_t newSize = p->pos + size; - Byte* data; - newSize += newSize / 4; - data = (Byte*)alloc->Alloc(alloc, newSize); - if (data == 0) - return 0; - p->size = newSize; - memcpy(data, p->data, p->pos); - alloc->Free(alloc, p->data); - p->data = data; - } - memcpy(p->data + p->pos, buf, size); - p->pos += size; - return 1; -} - -void DynBuf_Free(CDynBuf* p, ISzAlloc* alloc) { - alloc->Free(alloc, p->data); - p->data = 0; - p->size = 0; - p->pos = 0; -} -} + +#include "7zBuf.h" + +namespace crnlib { + +void DynBuf_Construct(CDynBuf* p) { + p->data = 0; + p->size = 0; + p->pos = 0; +} + +void DynBuf_SeekToBeg(CDynBuf* p) { + p->pos = 0; +} + +int DynBuf_Write(CDynBuf* p, const Byte* buf, size_t size, ISzAlloc* alloc) { + if (size > p->size - p->pos) { + size_t newSize = p->pos + size; + Byte* data; + newSize += newSize / 4; + data = (Byte*)alloc->Alloc(alloc, newSize); + if (data == 0) + return 0; + p->size = newSize; + memcpy(data, p->data, p->pos); + alloc->Free(alloc, p->data); + p->data = data; + } + memcpy(p->data + p->pos, buf, size); + p->pos += size; + return 1; +} + +void DynBuf_Free(CDynBuf* p, ISzAlloc* alloc) { + alloc->Free(alloc, p->data); + p->data = 0; + p->size = 0; + p->pos = 0; +} +} diff --git a/3rdparty/lzma/Linux/7zCrc.cpp b/3rdparty/lzma/Linux/7zCrc.cpp index d5604e0..c5c1301 100644 --- a/3rdparty/lzma/Linux/7zCrc.cpp +++ b/3rdparty/lzma/Linux/7zCrc.cpp @@ -1,34 +1,34 @@ -/* 7zCrc.c -- CRC32 calculation -2008-08-05 -Igor Pavlov +/* 7zCrc.c -- CRC32 calculation +2008-08-05 +Igor Pavlov Public domain */ - -#include "7zCrc.h" - -namespace crnlib { - -#define kCrcPoly 0xEDB88320 -UInt32 g_CrcTable[256]; - -void MY_FAST_CALL CrcGenerateTable(void) { - UInt32 i; - for (i = 0; i < 256; i++) { - UInt32 r = i; - int j; - for (j = 0; j < 8; j++) - r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); - g_CrcTable[i] = r; - } -} - -UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void* data, size_t size) { - const Byte* p = (const Byte*)data; - for (; size > 0; size--, p++) - v = CRC_UPDATE_BYTE(v, *p); - return v; -} - -UInt32 MY_FAST_CALL CrcCalc(const void* data, size_t size) { - return CrcUpdate(CRC_INIT_VAL, data, size) ^ 0xFFFFFFFF; -} -} + +#include "7zCrc.h" + +namespace crnlib { + +#define kCrcPoly 0xEDB88320 +UInt32 g_CrcTable[256]; + +void MY_FAST_CALL CrcGenerateTable(void) { + UInt32 i; + for (i = 0; i < 256; i++) { + UInt32 r = i; + int j; + for (j = 0; j < 8; j++) + r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); + g_CrcTable[i] = r; + } +} + +UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void* data, size_t size) { + const Byte* p = (const Byte*)data; + for (; size > 0; size--, p++) + v = CRC_UPDATE_BYTE(v, *p); + return v; +} + +UInt32 MY_FAST_CALL CrcCalc(const void* data, size_t size) { + return CrcUpdate(CRC_INIT_VAL, data, size) ^ 0xFFFFFFFF; +} +} diff --git a/3rdparty/lzma/Linux/7zCrc.h b/3rdparty/lzma/Linux/7zCrc.h index 4c8fbd7..dd63b61 100644 --- a/3rdparty/lzma/Linux/7zCrc.h +++ b/3rdparty/lzma/Linux/7zCrc.h @@ -1,27 +1,27 @@ -/* 7zCrc.h -- CRC32 calculation -2008-03-13 -Igor Pavlov -Public domain */ - -#ifndef __7Z_CRC_H -#define __7Z_CRC_H - -#include - -#include "LzmaTypes.h" - -namespace crnlib { - -extern UInt32 g_CrcTable[]; - -void MY_FAST_CALL CrcGenerateTable(void); - -#define CRC_INIT_VAL 0xFFFFFFFF -#define CRC_GET_DIGEST(crc) ((crc) ^ 0xFFFFFFFF) -#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) - -UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void* data, size_t size); -UInt32 MY_FAST_CALL CrcCalc(const void* data, size_t size); -} - -#endif +/* 7zCrc.h -- CRC32 calculation +2008-03-13 +Igor Pavlov +Public domain */ + +#ifndef __7Z_CRC_H +#define __7Z_CRC_H + +#include + +#include "LzmaTypes.h" + +namespace crnlib { + +extern UInt32 g_CrcTable[]; + +void MY_FAST_CALL CrcGenerateTable(void); + +#define CRC_INIT_VAL 0xFFFFFFFF +#define CRC_GET_DIGEST(crc) ((crc) ^ 0xFFFFFFFF) +#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) + +UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void* data, size_t size); +UInt32 MY_FAST_CALL CrcCalc(const void* data, size_t size); +} + +#endif diff --git a/3rdparty/lzma/Linux/7zFile.cpp b/3rdparty/lzma/Linux/7zFile.cpp index 530f2b2..2749e8c 100644 --- a/3rdparty/lzma/Linux/7zFile.cpp +++ b/3rdparty/lzma/Linux/7zFile.cpp @@ -1,257 +1,257 @@ -/* 7zFile.c -- File IO -2008-11-22 : Igor Pavlov : Public domain */ - -#include "7zFile.h" - -#ifndef USE_WINDOWS_FILE - -#include - -#endif - -#ifdef USE_WINDOWS_FILE - -/* - ReadFile and WriteFile functions in Windows have BUG: - If you Read or Write 64MB or more (probably min_failure_size = 64MB - 32KB + 1) - from/to Network file, it returns ERROR_NO_SYSTEM_RESOURCES - (Insufficient system resources exist to complete the requested service). - Probably in some version of Windows there are problems with other sizes: - for 32 MB (maybe also for 16 MB). - And message can be "Network connection was lost" -*/ - -#define kChunkSizeMax (1 << 22) - -#endif - -namespace crnlib { - -void File_Construct(CSzFile* p) { -#ifdef USE_WINDOWS_FILE - p->handle = INVALID_HANDLE_VALUE; -#else - p->file = NULL; -#endif -} - -static WRes File_Open(CSzFile* p, const char* name, int writeMode) { -#ifdef USE_WINDOWS_FILE - p->handle = CreateFileA(name, - writeMode ? GENERIC_WRITE : GENERIC_READ, - FILE_SHARE_READ, NULL, - writeMode ? CREATE_ALWAYS : OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, NULL); - return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError(); -#else - p->file = fopen(name, writeMode ? "wb+" : "rb"); - return (p->file != 0) ? 0 : errno; -#endif -} - -WRes InFile_Open(CSzFile* p, const char* name) { - return File_Open(p, name, 0); -} -WRes OutFile_Open(CSzFile* p, const char* name) { - return File_Open(p, name, 1); -} - -WRes File_Close(CSzFile* p) { -#ifdef USE_WINDOWS_FILE - if (p->handle != INVALID_HANDLE_VALUE) { - if (!CloseHandle(p->handle)) - return GetLastError(); - p->handle = INVALID_HANDLE_VALUE; - } -#else - if (p->file != NULL) { - int res = fclose(p->file); - if (res != 0) - return res; - p->file = NULL; - } -#endif - return 0; -} - -WRes File_Read(CSzFile* p, void* data, size_t* size) { - size_t originalSize = *size; - if (originalSize == 0) - return 0; - -#ifdef USE_WINDOWS_FILE - - *size = 0; - do { - DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; - DWORD processed = 0; - BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL); - data = (void*)((Byte*)data + processed); - originalSize -= processed; - *size += processed; - if (!res) - return GetLastError(); - if (processed == 0) - break; - } while (originalSize > 0); - return 0; - -#else - - *size = fread(data, 1, originalSize, p->file); - if (*size == originalSize) - return 0; - return ferror(p->file); - -#endif -} - -WRes File_Write(CSzFile* p, const void* data, size_t* size) { - size_t originalSize = *size; - if (originalSize == 0) - return 0; - -#ifdef USE_WINDOWS_FILE - - *size = 0; - do { - DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; - DWORD processed = 0; - BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL); - data = (void*)((Byte*)data + processed); - originalSize -= processed; - *size += processed; - if (!res) - return GetLastError(); - if (processed == 0) - break; - } while (originalSize > 0); - return 0; - -#else - - *size = fwrite(data, 1, originalSize, p->file); - if (*size == originalSize) - return 0; - return ferror(p->file); - -#endif -} - -WRes File_Seek(CSzFile* p, Int64* pos, ESzSeek origin) { -#ifdef USE_WINDOWS_FILE - - LARGE_INTEGER value; - DWORD moveMethod; - value.LowPart = (DWORD)*pos; - value.HighPart = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */ - switch (origin) { - case SZ_SEEK_SET: - moveMethod = FILE_BEGIN; - break; - case SZ_SEEK_CUR: - moveMethod = FILE_CURRENT; - break; - case SZ_SEEK_END: - moveMethod = FILE_END; - break; - default: - return ERROR_INVALID_PARAMETER; - } - value.LowPart = SetFilePointer(p->handle, value.LowPart, &value.HighPart, moveMethod); - if (value.LowPart == 0xFFFFFFFF) { - WRes res = GetLastError(); - if (res != NO_ERROR) - return res; - } - *pos = ((Int64)value.HighPart << 32) | value.LowPart; - return 0; - -#else - - int moveMethod; - int res; - switch (origin) { - case SZ_SEEK_SET: - moveMethod = SEEK_SET; - break; - case SZ_SEEK_CUR: - moveMethod = SEEK_CUR; - break; - case SZ_SEEK_END: - moveMethod = SEEK_END; - break; - default: - return 1; - } - res = fseek(p->file, (long)*pos, moveMethod); - *pos = ftell(p->file); - return res; - -#endif -} - -WRes File_GetLength(CSzFile* p, UInt64* length) { -#ifdef USE_WINDOWS_FILE - - DWORD sizeHigh; - DWORD sizeLow = GetFileSize(p->handle, &sizeHigh); - if (sizeLow == 0xFFFFFFFF) { - DWORD res = GetLastError(); - if (res != NO_ERROR) - return res; - } - *length = (((UInt64)sizeHigh) << 32) + sizeLow; - return 0; - -#else - - long pos = ftell(p->file); - int res = fseek(p->file, 0, SEEK_END); - *length = ftell(p->file); - fseek(p->file, pos, SEEK_SET); - return res; - -#endif -} - -/* ---------- FileSeqInStream ---------- */ - -static SRes FileSeqInStream_Read(void* pp, void* buf, size_t* size) { - CFileSeqInStream* p = (CFileSeqInStream*)pp; - return File_Read(&p->file, buf, size) == 0 ? SZ_OK : SZ_ERROR_READ; -} - -void FileSeqInStream_CreateVTable(CFileSeqInStream* p) { - p->s.Read = FileSeqInStream_Read; -} - -/* ---------- FileInStream ---------- */ - -static SRes FileInStream_Read(void* pp, void* buf, size_t* size) { - CFileInStream* p = (CFileInStream*)pp; - return (File_Read(&p->file, buf, size) == 0) ? SZ_OK : SZ_ERROR_READ; -} - -static SRes FileInStream_Seek(void* pp, Int64* pos, ESzSeek origin) { - CFileInStream* p = (CFileInStream*)pp; - return File_Seek(&p->file, pos, origin); -} - -void FileInStream_CreateVTable(CFileInStream* p) { - p->s.Read = FileInStream_Read; - p->s.Seek = FileInStream_Seek; -} - -/* ---------- FileOutStream ---------- */ - -static size_t FileOutStream_Write(void* pp, const void* data, size_t size) { - CFileOutStream* p = (CFileOutStream*)pp; - File_Write(&p->file, data, &size); - return size; -} - -void FileOutStream_CreateVTable(CFileOutStream* p) { - p->s.Write = FileOutStream_Write; -} -} +/* 7zFile.c -- File IO +2008-11-22 : Igor Pavlov : Public domain */ + +#include "7zFile.h" + +#ifndef USE_WINDOWS_FILE + +#include + +#endif + +#ifdef USE_WINDOWS_FILE + +/* + ReadFile and WriteFile functions in Windows have BUG: + If you Read or Write 64MB or more (probably min_failure_size = 64MB - 32KB + 1) + from/to Network file, it returns ERROR_NO_SYSTEM_RESOURCES + (Insufficient system resources exist to complete the requested service). + Probably in some version of Windows there are problems with other sizes: + for 32 MB (maybe also for 16 MB). + And message can be "Network connection was lost" +*/ + +#define kChunkSizeMax (1 << 22) + +#endif + +namespace crnlib { + +void File_Construct(CSzFile* p) { +#ifdef USE_WINDOWS_FILE + p->handle = INVALID_HANDLE_VALUE; +#else + p->file = NULL; +#endif +} + +static WRes File_Open(CSzFile* p, const char* name, int writeMode) { +#ifdef USE_WINDOWS_FILE + p->handle = CreateFileA(name, + writeMode ? GENERIC_WRITE : GENERIC_READ, + FILE_SHARE_READ, NULL, + writeMode ? CREATE_ALWAYS : OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, NULL); + return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError(); +#else + p->file = fopen(name, writeMode ? "wb+" : "rb"); + return (p->file != 0) ? 0 : errno; +#endif +} + +WRes InFile_Open(CSzFile* p, const char* name) { + return File_Open(p, name, 0); +} +WRes OutFile_Open(CSzFile* p, const char* name) { + return File_Open(p, name, 1); +} + +WRes File_Close(CSzFile* p) { +#ifdef USE_WINDOWS_FILE + if (p->handle != INVALID_HANDLE_VALUE) { + if (!CloseHandle(p->handle)) + return GetLastError(); + p->handle = INVALID_HANDLE_VALUE; + } +#else + if (p->file != NULL) { + int res = fclose(p->file); + if (res != 0) + return res; + p->file = NULL; + } +#endif + return 0; +} + +WRes File_Read(CSzFile* p, void* data, size_t* size) { + size_t originalSize = *size; + if (originalSize == 0) + return 0; + +#ifdef USE_WINDOWS_FILE + + *size = 0; + do { + DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; + DWORD processed = 0; + BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL); + data = (void*)((Byte*)data + processed); + originalSize -= processed; + *size += processed; + if (!res) + return GetLastError(); + if (processed == 0) + break; + } while (originalSize > 0); + return 0; + +#else + + *size = fread(data, 1, originalSize, p->file); + if (*size == originalSize) + return 0; + return ferror(p->file); + +#endif +} + +WRes File_Write(CSzFile* p, const void* data, size_t* size) { + size_t originalSize = *size; + if (originalSize == 0) + return 0; + +#ifdef USE_WINDOWS_FILE + + *size = 0; + do { + DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; + DWORD processed = 0; + BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL); + data = (void*)((Byte*)data + processed); + originalSize -= processed; + *size += processed; + if (!res) + return GetLastError(); + if (processed == 0) + break; + } while (originalSize > 0); + return 0; + +#else + + *size = fwrite(data, 1, originalSize, p->file); + if (*size == originalSize) + return 0; + return ferror(p->file); + +#endif +} + +WRes File_Seek(CSzFile* p, Int64* pos, ESzSeek origin) { +#ifdef USE_WINDOWS_FILE + + LARGE_INTEGER value; + DWORD moveMethod; + value.LowPart = (DWORD)*pos; + value.HighPart = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */ + switch (origin) { + case SZ_SEEK_SET: + moveMethod = FILE_BEGIN; + break; + case SZ_SEEK_CUR: + moveMethod = FILE_CURRENT; + break; + case SZ_SEEK_END: + moveMethod = FILE_END; + break; + default: + return ERROR_INVALID_PARAMETER; + } + value.LowPart = SetFilePointer(p->handle, value.LowPart, &value.HighPart, moveMethod); + if (value.LowPart == 0xFFFFFFFF) { + WRes res = GetLastError(); + if (res != NO_ERROR) + return res; + } + *pos = ((Int64)value.HighPart << 32) | value.LowPart; + return 0; + +#else + + int moveMethod; + int res; + switch (origin) { + case SZ_SEEK_SET: + moveMethod = SEEK_SET; + break; + case SZ_SEEK_CUR: + moveMethod = SEEK_CUR; + break; + case SZ_SEEK_END: + moveMethod = SEEK_END; + break; + default: + return 1; + } + res = fseek(p->file, (long)*pos, moveMethod); + *pos = ftell(p->file); + return res; + +#endif +} + +WRes File_GetLength(CSzFile* p, UInt64* length) { +#ifdef USE_WINDOWS_FILE + + DWORD sizeHigh; + DWORD sizeLow = GetFileSize(p->handle, &sizeHigh); + if (sizeLow == 0xFFFFFFFF) { + DWORD res = GetLastError(); + if (res != NO_ERROR) + return res; + } + *length = (((UInt64)sizeHigh) << 32) + sizeLow; + return 0; + +#else + + long pos = ftell(p->file); + int res = fseek(p->file, 0, SEEK_END); + *length = ftell(p->file); + fseek(p->file, pos, SEEK_SET); + return res; + +#endif +} + +/* ---------- FileSeqInStream ---------- */ + +static SRes FileSeqInStream_Read(void* pp, void* buf, size_t* size) { + CFileSeqInStream* p = (CFileSeqInStream*)pp; + return File_Read(&p->file, buf, size) == 0 ? SZ_OK : SZ_ERROR_READ; +} + +void FileSeqInStream_CreateVTable(CFileSeqInStream* p) { + p->s.Read = FileSeqInStream_Read; +} + +/* ---------- FileInStream ---------- */ + +static SRes FileInStream_Read(void* pp, void* buf, size_t* size) { + CFileInStream* p = (CFileInStream*)pp; + return (File_Read(&p->file, buf, size) == 0) ? SZ_OK : SZ_ERROR_READ; +} + +static SRes FileInStream_Seek(void* pp, Int64* pos, ESzSeek origin) { + CFileInStream* p = (CFileInStream*)pp; + return File_Seek(&p->file, pos, origin); +} + +void FileInStream_CreateVTable(CFileInStream* p) { + p->s.Read = FileInStream_Read; + p->s.Seek = FileInStream_Seek; +} + +/* ---------- FileOutStream ---------- */ + +static size_t FileOutStream_Write(void* pp, const void* data, size_t size) { + CFileOutStream* p = (CFileOutStream*)pp; + File_Write(&p->file, data, &size); + return size; +} + +void FileOutStream_CreateVTable(CFileOutStream* p) { + p->s.Write = FileOutStream_Write; +} +} diff --git a/3rdparty/lzma/Linux/7zFile.h b/3rdparty/lzma/Linux/7zFile.h index e505049..06cdb56 100644 --- a/3rdparty/lzma/Linux/7zFile.h +++ b/3rdparty/lzma/Linux/7zFile.h @@ -1,73 +1,73 @@ -/* 7zFile.h -- File IO -2008-11-22 : Igor Pavlov : Public domain */ - -#ifndef __7Z_FILE_H -#define __7Z_FILE_H - -#ifdef _WIN32 -#define USE_WINDOWS_FILE -#endif - -#ifdef USE_WINDOWS_FILE -#include -#else -#include -#endif - -#include "LzmaTypes.h" - -namespace crnlib { - -/* ---------- File ---------- */ - -typedef struct -{ -#ifdef USE_WINDOWS_FILE - HANDLE handle; -#else - FILE* file; -#endif -} CSzFile; - -void File_Construct(CSzFile* p); -WRes InFile_Open(CSzFile* p, const char* name); -WRes OutFile_Open(CSzFile* p, const char* name); -WRes File_Close(CSzFile* p); - -/* reads max(*size, remain file's size) bytes */ -WRes File_Read(CSzFile* p, void* data, size_t* size); - -/* writes *size bytes */ -WRes File_Write(CSzFile* p, const void* data, size_t* size); - -WRes File_Seek(CSzFile* p, Int64* pos, ESzSeek origin); -WRes File_GetLength(CSzFile* p, UInt64* length); - -/* ---------- FileInStream ---------- */ - -typedef struct -{ - ISeqInStream s; - CSzFile file; -} CFileSeqInStream; - -void FileSeqInStream_CreateVTable(CFileSeqInStream* p); - -typedef struct -{ - ISeekInStream s; - CSzFile file; -} CFileInStream; - -void FileInStream_CreateVTable(CFileInStream* p); - -typedef struct -{ - ISeqOutStream s; - CSzFile file; -} CFileOutStream; - -void FileOutStream_CreateVTable(CFileOutStream* p); -} - -#endif +/* 7zFile.h -- File IO +2008-11-22 : Igor Pavlov : Public domain */ + +#ifndef __7Z_FILE_H +#define __7Z_FILE_H + +#ifdef _WIN32 +#define USE_WINDOWS_FILE +#endif + +#ifdef USE_WINDOWS_FILE +#include +#else +#include +#endif + +#include "LzmaTypes.h" + +namespace crnlib { + +/* ---------- File ---------- */ + +typedef struct +{ +#ifdef USE_WINDOWS_FILE + HANDLE handle; +#else + FILE* file; +#endif +} CSzFile; + +void File_Construct(CSzFile* p); +WRes InFile_Open(CSzFile* p, const char* name); +WRes OutFile_Open(CSzFile* p, const char* name); +WRes File_Close(CSzFile* p); + +/* reads max(*size, remain file's size) bytes */ +WRes File_Read(CSzFile* p, void* data, size_t* size); + +/* writes *size bytes */ +WRes File_Write(CSzFile* p, const void* data, size_t* size); + +WRes File_Seek(CSzFile* p, Int64* pos, ESzSeek origin); +WRes File_GetLength(CSzFile* p, UInt64* length); + +/* ---------- FileInStream ---------- */ + +typedef struct +{ + ISeqInStream s; + CSzFile file; +} CFileSeqInStream; + +void FileSeqInStream_CreateVTable(CFileSeqInStream* p); + +typedef struct +{ + ISeekInStream s; + CSzFile file; +} CFileInStream; + +void FileInStream_CreateVTable(CFileInStream* p); + +typedef struct +{ + ISeqOutStream s; + CSzFile file; +} CFileOutStream; + +void FileOutStream_CreateVTable(CFileOutStream* p); +} + +#endif diff --git a/3rdparty/lzma/Linux/7zStream.cpp b/3rdparty/lzma/Linux/7zStream.cpp index 1402ba5..eb048a6 100644 --- a/3rdparty/lzma/Linux/7zStream.cpp +++ b/3rdparty/lzma/Linux/7zStream.cpp @@ -1,148 +1,148 @@ -/* 7zStream.c -- 7z Stream functions -2008-11-23 : Igor Pavlov : Public domain */ - -#include - -#include "LzmaTypes.h" - -namespace crnlib { - -SRes SeqInStream_Read2(ISeqInStream* stream, void* buf, size_t size, SRes errorType) { - while (size != 0) { - size_t processed = size; - RINOK(stream->Read(stream, buf, &processed)); - if (processed == 0) - return errorType; - buf = (void*)((Byte*)buf + processed); - size -= processed; - } - return SZ_OK; -} - -SRes SeqInStream_Read(ISeqInStream* stream, void* buf, size_t size) { - return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); -} - -SRes SeqInStream_ReadByte(ISeqInStream* stream, Byte* buf) { - size_t processed = 1; - RINOK(stream->Read(stream, buf, &processed)); - return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF; -} - -SRes LookInStream_SeekTo(ILookInStream* stream, UInt64 offset) { - Int64 t = offset; - return stream->Seek(stream, &t, SZ_SEEK_SET); -} - -SRes LookInStream_LookRead(ILookInStream* stream, void* buf, size_t* size) { - void* lookBuf; - if (*size == 0) - return SZ_OK; - RINOK(stream->Look(stream, &lookBuf, size)); - memcpy(buf, lookBuf, *size); - return stream->Skip(stream, *size); -} - -SRes LookInStream_Read2(ILookInStream* stream, void* buf, size_t size, SRes errorType) { - while (size != 0) { - size_t processed = size; - RINOK(stream->Read(stream, buf, &processed)); - if (processed == 0) - return errorType; - buf = (void*)((Byte*)buf + processed); - size -= processed; - } - return SZ_OK; -} - -SRes LookInStream_Read(ILookInStream* stream, void* buf, size_t size) { - return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); -} - -static SRes LookToRead_Look_Lookahead(void* pp, void** buf, size_t* size) { - SRes res = SZ_OK; - CLookToRead* p = (CLookToRead*)pp; - size_t size2 = p->size - p->pos; - if (size2 == 0 && *size > 0) { - p->pos = 0; - size2 = LookToRead_BUF_SIZE; - res = p->realStream->Read(p->realStream, p->buf, &size2); - p->size = size2; - } - if (size2 < *size) - *size = size2; - *buf = p->buf + p->pos; - return res; -} - -static SRes LookToRead_Look_Exact(void* pp, void** buf, size_t* size) { - SRes res = SZ_OK; - CLookToRead* p = (CLookToRead*)pp; - size_t size2 = p->size - p->pos; - if (size2 == 0 && *size > 0) { - p->pos = 0; - if (*size > LookToRead_BUF_SIZE) - *size = LookToRead_BUF_SIZE; - res = p->realStream->Read(p->realStream, p->buf, size); - size2 = p->size = *size; - } - if (size2 < *size) - *size = size2; - *buf = p->buf + p->pos; - return res; -} - -static SRes LookToRead_Skip(void* pp, size_t offset) { - CLookToRead* p = (CLookToRead*)pp; - p->pos += offset; - return SZ_OK; -} - -static SRes LookToRead_Read(void* pp, void* buf, size_t* size) { - CLookToRead* p = (CLookToRead*)pp; - size_t rem = p->size - p->pos; - if (rem == 0) - return p->realStream->Read(p->realStream, buf, size); - if (rem > *size) - rem = *size; - memcpy(buf, p->buf + p->pos, rem); - p->pos += rem; - *size = rem; - return SZ_OK; -} - -static SRes LookToRead_Seek(void* pp, Int64* pos, ESzSeek origin) { - CLookToRead* p = (CLookToRead*)pp; - p->pos = p->size = 0; - return p->realStream->Seek(p->realStream, pos, origin); -} - -void LookToRead_CreateVTable(CLookToRead* p, int lookahead) { - p->s.Look = lookahead ? LookToRead_Look_Lookahead : LookToRead_Look_Exact; - p->s.Skip = LookToRead_Skip; - p->s.Read = LookToRead_Read; - p->s.Seek = LookToRead_Seek; -} - -void LookToRead_Init(CLookToRead* p) { - p->pos = p->size = 0; -} - -static SRes SecToLook_Read(void* pp, void* buf, size_t* size) { - CSecToLook* p = (CSecToLook*)pp; - return LookInStream_LookRead(p->realStream, buf, size); -} - -void SecToLook_CreateVTable(CSecToLook* p) { - p->s.Read = SecToLook_Read; -} - -static SRes SecToRead_Read(void* pp, void* buf, size_t* size) { - CSecToRead* p = (CSecToRead*)pp; - return p->realStream->Read(p->realStream, buf, size); -} - -void SecToRead_CreateVTable(CSecToRead* p) { - p->s.Read = SecToRead_Read; -} -} +/* 7zStream.c -- 7z Stream functions +2008-11-23 : Igor Pavlov : Public domain */ + +#include + +#include "LzmaTypes.h" + +namespace crnlib { + +SRes SeqInStream_Read2(ISeqInStream* stream, void* buf, size_t size, SRes errorType) { + while (size != 0) { + size_t processed = size; + RINOK(stream->Read(stream, buf, &processed)); + if (processed == 0) + return errorType; + buf = (void*)((Byte*)buf + processed); + size -= processed; + } + return SZ_OK; +} + +SRes SeqInStream_Read(ISeqInStream* stream, void* buf, size_t size) { + return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); +} + +SRes SeqInStream_ReadByte(ISeqInStream* stream, Byte* buf) { + size_t processed = 1; + RINOK(stream->Read(stream, buf, &processed)); + return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF; +} + +SRes LookInStream_SeekTo(ILookInStream* stream, UInt64 offset) { + Int64 t = offset; + return stream->Seek(stream, &t, SZ_SEEK_SET); +} + +SRes LookInStream_LookRead(ILookInStream* stream, void* buf, size_t* size) { + void* lookBuf; + if (*size == 0) + return SZ_OK; + RINOK(stream->Look(stream, &lookBuf, size)); + memcpy(buf, lookBuf, *size); + return stream->Skip(stream, *size); +} + +SRes LookInStream_Read2(ILookInStream* stream, void* buf, size_t size, SRes errorType) { + while (size != 0) { + size_t processed = size; + RINOK(stream->Read(stream, buf, &processed)); + if (processed == 0) + return errorType; + buf = (void*)((Byte*)buf + processed); + size -= processed; + } + return SZ_OK; +} + +SRes LookInStream_Read(ILookInStream* stream, void* buf, size_t size) { + return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); +} + +static SRes LookToRead_Look_Lookahead(void* pp, void** buf, size_t* size) { + SRes res = SZ_OK; + CLookToRead* p = (CLookToRead*)pp; + size_t size2 = p->size - p->pos; + if (size2 == 0 && *size > 0) { + p->pos = 0; + size2 = LookToRead_BUF_SIZE; + res = p->realStream->Read(p->realStream, p->buf, &size2); + p->size = size2; + } + if (size2 < *size) + *size = size2; + *buf = p->buf + p->pos; + return res; +} + +static SRes LookToRead_Look_Exact(void* pp, void** buf, size_t* size) { + SRes res = SZ_OK; + CLookToRead* p = (CLookToRead*)pp; + size_t size2 = p->size - p->pos; + if (size2 == 0 && *size > 0) { + p->pos = 0; + if (*size > LookToRead_BUF_SIZE) + *size = LookToRead_BUF_SIZE; + res = p->realStream->Read(p->realStream, p->buf, size); + size2 = p->size = *size; + } + if (size2 < *size) + *size = size2; + *buf = p->buf + p->pos; + return res; +} + +static SRes LookToRead_Skip(void* pp, size_t offset) { + CLookToRead* p = (CLookToRead*)pp; + p->pos += offset; + return SZ_OK; +} + +static SRes LookToRead_Read(void* pp, void* buf, size_t* size) { + CLookToRead* p = (CLookToRead*)pp; + size_t rem = p->size - p->pos; + if (rem == 0) + return p->realStream->Read(p->realStream, buf, size); + if (rem > *size) + rem = *size; + memcpy(buf, p->buf + p->pos, rem); + p->pos += rem; + *size = rem; + return SZ_OK; +} + +static SRes LookToRead_Seek(void* pp, Int64* pos, ESzSeek origin) { + CLookToRead* p = (CLookToRead*)pp; + p->pos = p->size = 0; + return p->realStream->Seek(p->realStream, pos, origin); +} + +void LookToRead_CreateVTable(CLookToRead* p, int lookahead) { + p->s.Look = lookahead ? LookToRead_Look_Lookahead : LookToRead_Look_Exact; + p->s.Skip = LookToRead_Skip; + p->s.Read = LookToRead_Read; + p->s.Seek = LookToRead_Seek; +} + +void LookToRead_Init(CLookToRead* p) { + p->pos = p->size = 0; +} + +static SRes SecToLook_Read(void* pp, void* buf, size_t* size) { + CSecToLook* p = (CSecToLook*)pp; + return LookInStream_LookRead(p->realStream, buf, size); +} + +void SecToLook_CreateVTable(CSecToLook* p) { + p->s.Read = SecToLook_Read; +} + +static SRes SecToRead_Read(void* pp, void* buf, size_t* size) { + CSecToRead* p = (CSecToRead*)pp; + return p->realStream->Read(p->realStream, buf, size); +} + +void SecToRead_CreateVTable(CSecToRead* p) { + p->s.Read = SecToRead_Read; +} +} diff --git a/3rdparty/lzma/Linux/7zVersion.h b/3rdparty/lzma/Linux/7zVersion.h index 9009300..595dec5 100644 --- a/3rdparty/lzma/Linux/7zVersion.h +++ b/3rdparty/lzma/Linux/7zVersion.h @@ -1,7 +1,7 @@ -#define MY_VER_MAJOR 4 -#define MY_VER_MINOR 63 -#define MY_VER_BUILD 0 -#define MY_VERSION "4.63" -#define MY_DATE "2008-12-31" -#define MY_COPYRIGHT ": Igor Pavlov : Public domain" -#define MY_VERSION_COPYRIGHT_DATE MY_VERSION " " MY_COPYRIGHT " : " MY_DATE +#define MY_VER_MAJOR 4 +#define MY_VER_MINOR 63 +#define MY_VER_BUILD 0 +#define MY_VERSION "4.63" +#define MY_DATE "2008-12-31" +#define MY_COPYRIGHT ": Igor Pavlov : Public domain" +#define MY_VERSION_COPYRIGHT_DATE MY_VERSION " " MY_COPYRIGHT " : " MY_DATE diff --git a/3rdparty/lzma/Linux/Alloc.cpp b/3rdparty/lzma/Linux/Alloc.cpp index 10c368f..37af0ef 100644 --- a/3rdparty/lzma/Linux/Alloc.cpp +++ b/3rdparty/lzma/Linux/Alloc.cpp @@ -1,121 +1,121 @@ -/* Alloc.c -- Memory allocation functions -2008-09-24 -Igor Pavlov -Public domain */ - -#ifdef _WIN32 -#include -#endif -#include - -#include "Alloc.h" - -namespace crnlib { - -/* #define _SZ_ALLOC_DEBUG */ - -/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ -#ifdef _SZ_ALLOC_DEBUG -#include -int g_allocCount = 0; -int g_allocCountMid = 0; -int g_allocCountBig = 0; -#endif - -void* MyAlloc(size_t size) { - if (size == 0) - return 0; -#ifdef _SZ_ALLOC_DEBUG - { - void* p = crnlib::crnlib_malloc(size); - fprintf(stderr, "\nAlloc %10d bytes, count = %10d, addr = %8X", size, g_allocCount++, (unsigned)p); - return p; - } -#else - return malloc(size); -#endif -} - -void MyFree(void* address) { -#ifdef _SZ_ALLOC_DEBUG - if (address != 0) - fprintf(stderr, "\nFree; count = %10d, addr = %8X", --g_allocCount, (unsigned)address); -#endif - free(address); -} - -#ifdef _WIN32 - -void* MidAlloc(size_t size) { - if (size == 0) - return 0; -#ifdef _SZ_ALLOC_DEBUG - fprintf(stderr, "\nAlloc_Mid %10d bytes; count = %10d", size, g_allocCountMid++); -#endif - return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); -} - -void MidFree(void* address) { -#ifdef _SZ_ALLOC_DEBUG - if (address != 0) - fprintf(stderr, "\nFree_Mid; count = %10d", --g_allocCountMid); -#endif - if (address == 0) - return; - VirtualFree(address, 0, MEM_RELEASE); -} - -#ifndef MEM_LARGE_PAGES -#undef _7ZIP_LARGE_PAGES -#endif - -#ifdef _7ZIP_LARGE_PAGES -SIZE_T g_LargePageSize = 0; -typedef SIZE_T(WINAPI* GetLargePageMinimumP)(); -#endif - -void SetLargePageSize() { -#ifdef _7ZIP_LARGE_PAGES - SIZE_T size = 0; - GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) - GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); - if (largePageMinimum == 0) - return; - size = largePageMinimum(); - if (size == 0 || (size & (size - 1)) != 0) - return; - g_LargePageSize = size; -#endif -} - -void* BigAlloc(size_t size) { - if (size == 0) - return 0; -#ifdef _SZ_ALLOC_DEBUG - fprintf(stderr, "\nAlloc_Big %10d bytes; count = %10d", size, g_allocCountBig++); -#endif - -#ifdef _7ZIP_LARGE_PAGES - if (g_LargePageSize != 0 && g_LargePageSize <= (1 << 30) && size >= (1 << 18)) { - void* res = VirtualAlloc(0, (size + g_LargePageSize - 1) & (~(g_LargePageSize - 1)), - MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); - if (res != 0) - return res; - } -#endif - return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); -} - -void BigFree(void* address) { -#ifdef _SZ_ALLOC_DEBUG - if (address != 0) - fprintf(stderr, "\nFree_Big; count = %10d", --g_allocCountBig); -#endif - - if (address == 0) - return; - VirtualFree(address, 0, MEM_RELEASE); -} - -#endif -} +/* Alloc.c -- Memory allocation functions +2008-09-24 +Igor Pavlov +Public domain */ + +#ifdef _WIN32 +#include +#endif +#include + +#include "Alloc.h" + +namespace crnlib { + +/* #define _SZ_ALLOC_DEBUG */ + +/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ +#ifdef _SZ_ALLOC_DEBUG +#include +int g_allocCount = 0; +int g_allocCountMid = 0; +int g_allocCountBig = 0; +#endif + +void* MyAlloc(size_t size) { + if (size == 0) + return 0; +#ifdef _SZ_ALLOC_DEBUG + { + void* p = crnlib::crnlib_malloc(size); + fprintf(stderr, "\nAlloc %10d bytes, count = %10d, addr = %8X", size, g_allocCount++, (unsigned)p); + return p; + } +#else + return malloc(size); +#endif +} + +void MyFree(void* address) { +#ifdef _SZ_ALLOC_DEBUG + if (address != 0) + fprintf(stderr, "\nFree; count = %10d, addr = %8X", --g_allocCount, (unsigned)address); +#endif + free(address); +} + +#ifdef _WIN32 + +void* MidAlloc(size_t size) { + if (size == 0) + return 0; +#ifdef _SZ_ALLOC_DEBUG + fprintf(stderr, "\nAlloc_Mid %10d bytes; count = %10d", size, g_allocCountMid++); +#endif + return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); +} + +void MidFree(void* address) { +#ifdef _SZ_ALLOC_DEBUG + if (address != 0) + fprintf(stderr, "\nFree_Mid; count = %10d", --g_allocCountMid); +#endif + if (address == 0) + return; + VirtualFree(address, 0, MEM_RELEASE); +} + +#ifndef MEM_LARGE_PAGES +#undef _7ZIP_LARGE_PAGES +#endif + +#ifdef _7ZIP_LARGE_PAGES +SIZE_T g_LargePageSize = 0; +typedef SIZE_T(WINAPI* GetLargePageMinimumP)(); +#endif + +void SetLargePageSize() { +#ifdef _7ZIP_LARGE_PAGES + SIZE_T size = 0; + GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) + GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); + if (largePageMinimum == 0) + return; + size = largePageMinimum(); + if (size == 0 || (size & (size - 1)) != 0) + return; + g_LargePageSize = size; +#endif +} + +void* BigAlloc(size_t size) { + if (size == 0) + return 0; +#ifdef _SZ_ALLOC_DEBUG + fprintf(stderr, "\nAlloc_Big %10d bytes; count = %10d", size, g_allocCountBig++); +#endif + +#ifdef _7ZIP_LARGE_PAGES + if (g_LargePageSize != 0 && g_LargePageSize <= (1 << 30) && size >= (1 << 18)) { + void* res = VirtualAlloc(0, (size + g_LargePageSize - 1) & (~(g_LargePageSize - 1)), + MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); + if (res != 0) + return res; + } +#endif + return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); +} + +void BigFree(void* address) { +#ifdef _SZ_ALLOC_DEBUG + if (address != 0) + fprintf(stderr, "\nFree_Big; count = %10d", --g_allocCountBig); +#endif + + if (address == 0) + return; + VirtualFree(address, 0, MEM_RELEASE); +} + +#endif +} diff --git a/3rdparty/lzma/Linux/Alloc.h b/3rdparty/lzma/Linux/Alloc.h index 28e5471..296813d 100644 --- a/3rdparty/lzma/Linux/Alloc.h +++ b/3rdparty/lzma/Linux/Alloc.h @@ -1,35 +1,35 @@ -/* Alloc.h -- Memory allocation functions -2008-03-13 -Igor Pavlov -Public domain */ - -#ifndef __COMMON_ALLOC_H -#define __COMMON_ALLOC_H - -#include - -namespace crnlib { - -void* MyAlloc(size_t size); -void MyFree(void* address); - -#ifdef _WIN32 - -void SetLargePageSize(); - -void* MidAlloc(size_t size); -void MidFree(void* address); -void* BigAlloc(size_t size); -void BigFree(void* address); - -#else - -#define MidAlloc(size) MyAlloc(size) -#define MidFree(address) MyFree(address) -#define BigAlloc(size) MyAlloc(size) -#define BigFree(address) MyFree(address) - -#endif -} - -#endif +/* Alloc.h -- Memory allocation functions +2008-03-13 +Igor Pavlov +Public domain */ + +#ifndef __COMMON_ALLOC_H +#define __COMMON_ALLOC_H + +#include + +namespace crnlib { + +void* MyAlloc(size_t size); +void MyFree(void* address); + +#ifdef _WIN32 + +void SetLargePageSize(); + +void* MidAlloc(size_t size); +void MidFree(void* address); +void* BigAlloc(size_t size); +void BigFree(void* address); + +#else + +#define MidAlloc(size) MyAlloc(size) +#define MidFree(address) MyFree(address) +#define BigAlloc(size) MyAlloc(size) +#define BigFree(address) MyFree(address) + +#endif +} + +#endif diff --git a/3rdparty/lzma/Linux/Bcj2.cpp b/3rdparty/lzma/Linux/Bcj2.cpp index 879b5d9..67bf2ff 100644 --- a/3rdparty/lzma/Linux/Bcj2.cpp +++ b/3rdparty/lzma/Linux/Bcj2.cpp @@ -1,155 +1,155 @@ -/* Bcj2.c -- Converter for x86 code (BCJ2) -2008-10-04 : Igor Pavlov : Public domain */ - -#include "Bcj2.h" - -namespace crnlib { - -#ifdef _LZMA_PROB32 -#define CProb UInt32 -#else -#define CProb UInt16 -#endif - -#define IsJcc(b0, b1) ((b0) == 0x0F && ((b1)&0xF0) == 0x80) -#define IsJ(b0, b1) ((b1 & 0xFE) == 0xE8 || IsJcc(b0, b1)) - -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) - -#define kNumBitModelTotalBits 11 -#define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 - -#define RC_READ_BYTE (*buffer++) -#define RC_TEST \ - { \ - if (buffer == bufferLim) \ - return SZ_ERROR_DATA; \ - } -#define RC_INIT2 \ - code = 0; \ - range = 0xFFFFFFFF; \ - { \ - int i; \ - for (i = 0; i < 5; i++) { \ - RC_TEST; \ - code = (code << 8) | RC_READ_BYTE; \ - } \ - } - -#define NORMALIZE \ - if (range < kTopValue) { \ - RC_TEST; \ - range <<= 8; \ - code = (code << 8) | RC_READ_BYTE; \ - } - -#define IF_BIT_0(p) \ - ttt = *(p); \ - bound = (range >> kNumBitModelTotalBits) * ttt; \ - if (code < bound) -#define UPDATE_0(p) \ - range = bound; \ - *(p) = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); \ - NORMALIZE; -#define UPDATE_1(p) \ - range -= bound; \ - code -= bound; \ - *(p) = (CProb)(ttt - (ttt >> kNumMoveBits)); \ - NORMALIZE; - -int Bcj2_Decode( - const Byte* buf0, SizeT size0, - const Byte* buf1, SizeT size1, - const Byte* buf2, SizeT size2, - const Byte* buf3, SizeT size3, - Byte* outBuf, SizeT outSize) { - CProb p[256 + 2]; - SizeT inPos = 0, outPos = 0; - - const Byte *buffer, *bufferLim; - UInt32 range, code; - Byte prevByte = 0; - - unsigned int i; - for (i = 0; i < sizeof(p) / sizeof(p[0]); i++) - p[i] = kBitModelTotal >> 1; - - buffer = buf3; - bufferLim = buffer + size3; - RC_INIT2 - - if (outSize == 0) - return SZ_OK; - - for (;;) { - Byte b; - CProb* prob; - UInt32 bound; - UInt32 ttt; - - SizeT limit = size0 - inPos; - if (outSize - outPos < limit) - limit = outSize - outPos; - while (limit != 0) { - Byte b = buf0[inPos]; - outBuf[outPos++] = b; - if (IsJ(prevByte, b)) - break; - inPos++; - prevByte = b; - limit--; - } - - if (limit == 0 || outPos == outSize) - break; - - b = buf0[inPos++]; - - if (b == 0xE8) - prob = p + prevByte; - else if (b == 0xE9) - prob = p + 256; - else - prob = p + 257; - - IF_BIT_0(prob) { - UPDATE_0(prob) - prevByte = b; - } - else { - UInt32 dest; - const Byte* v; - UPDATE_1(prob) - if (b == 0xE8) { - v = buf1; - if (size1 < 4) - return SZ_ERROR_DATA; - buf1 += 4; - size1 -= 4; - } else { - v = buf2; - if (size2 < 4) - return SZ_ERROR_DATA; - buf2 += 4; - size2 -= 4; - } - dest = (((UInt32)v[0] << 24) | ((UInt32)v[1] << 16) | - ((UInt32)v[2] << 8) | ((UInt32)v[3])) - - ((UInt32)outPos + 4); - outBuf[outPos++] = (Byte)dest; - if (outPos == outSize) - break; - outBuf[outPos++] = (Byte)(dest >> 8); - if (outPos == outSize) - break; - outBuf[outPos++] = (Byte)(dest >> 16); - if (outPos == outSize) - break; - outBuf[outPos++] = prevByte = (Byte)(dest >> 24); - } - } - return (outPos == outSize) ? SZ_OK : SZ_ERROR_DATA; -} -} +/* Bcj2.c -- Converter for x86 code (BCJ2) +2008-10-04 : Igor Pavlov : Public domain */ + +#include "Bcj2.h" + +namespace crnlib { + +#ifdef _LZMA_PROB32 +#define CProb UInt32 +#else +#define CProb UInt16 +#endif + +#define IsJcc(b0, b1) ((b0) == 0x0F && ((b1)&0xF0) == 0x80) +#define IsJ(b0, b1) ((b1 & 0xFE) == 0xE8 || IsJcc(b0, b1)) + +#define kNumTopBits 24 +#define kTopValue ((UInt32)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 + +#define RC_READ_BYTE (*buffer++) +#define RC_TEST \ + { \ + if (buffer == bufferLim) \ + return SZ_ERROR_DATA; \ + } +#define RC_INIT2 \ + code = 0; \ + range = 0xFFFFFFFF; \ + { \ + int i; \ + for (i = 0; i < 5; i++) { \ + RC_TEST; \ + code = (code << 8) | RC_READ_BYTE; \ + } \ + } + +#define NORMALIZE \ + if (range < kTopValue) { \ + RC_TEST; \ + range <<= 8; \ + code = (code << 8) | RC_READ_BYTE; \ + } + +#define IF_BIT_0(p) \ + ttt = *(p); \ + bound = (range >> kNumBitModelTotalBits) * ttt; \ + if (code < bound) +#define UPDATE_0(p) \ + range = bound; \ + *(p) = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); \ + NORMALIZE; +#define UPDATE_1(p) \ + range -= bound; \ + code -= bound; \ + *(p) = (CProb)(ttt - (ttt >> kNumMoveBits)); \ + NORMALIZE; + +int Bcj2_Decode( + const Byte* buf0, SizeT size0, + const Byte* buf1, SizeT size1, + const Byte* buf2, SizeT size2, + const Byte* buf3, SizeT size3, + Byte* outBuf, SizeT outSize) { + CProb p[256 + 2]; + SizeT inPos = 0, outPos = 0; + + const Byte *buffer, *bufferLim; + UInt32 range, code; + Byte prevByte = 0; + + unsigned int i; + for (i = 0; i < sizeof(p) / sizeof(p[0]); i++) + p[i] = kBitModelTotal >> 1; + + buffer = buf3; + bufferLim = buffer + size3; + RC_INIT2 + + if (outSize == 0) + return SZ_OK; + + for (;;) { + Byte b; + CProb* prob; + UInt32 bound; + UInt32 ttt; + + SizeT limit = size0 - inPos; + if (outSize - outPos < limit) + limit = outSize - outPos; + while (limit != 0) { + Byte b = buf0[inPos]; + outBuf[outPos++] = b; + if (IsJ(prevByte, b)) + break; + inPos++; + prevByte = b; + limit--; + } + + if (limit == 0 || outPos == outSize) + break; + + b = buf0[inPos++]; + + if (b == 0xE8) + prob = p + prevByte; + else if (b == 0xE9) + prob = p + 256; + else + prob = p + 257; + + IF_BIT_0(prob) { + UPDATE_0(prob) + prevByte = b; + } + else { + UInt32 dest; + const Byte* v; + UPDATE_1(prob) + if (b == 0xE8) { + v = buf1; + if (size1 < 4) + return SZ_ERROR_DATA; + buf1 += 4; + size1 -= 4; + } else { + v = buf2; + if (size2 < 4) + return SZ_ERROR_DATA; + buf2 += 4; + size2 -= 4; + } + dest = (((UInt32)v[0] << 24) | ((UInt32)v[1] << 16) | + ((UInt32)v[2] << 8) | ((UInt32)v[3])) - + ((UInt32)outPos + 4); + outBuf[outPos++] = (Byte)dest; + if (outPos == outSize) + break; + outBuf[outPos++] = (Byte)(dest >> 8); + if (outPos == outSize) + break; + outBuf[outPos++] = (Byte)(dest >> 16); + if (outPos == outSize) + break; + outBuf[outPos++] = prevByte = (Byte)(dest >> 24); + } + } + return (outPos == outSize) ? SZ_OK : SZ_ERROR_DATA; +} +} diff --git a/3rdparty/lzma/Linux/Bcj2.h b/3rdparty/lzma/Linux/Bcj2.h index fef09cb..f267090 100644 --- a/3rdparty/lzma/Linux/Bcj2.h +++ b/3rdparty/lzma/Linux/Bcj2.h @@ -1,33 +1,33 @@ -/* Bcj2.h -- Converter for x86 code (BCJ2) -2008-10-04 : Igor Pavlov : Public domain */ - -#ifndef __BCJ2_H -#define __BCJ2_H - -#include "LzmaTypes.h" - -namespace crnlib { - -/* -Conditions: - outSize <= FullOutputSize, - where FullOutputSize is full size of output stream of x86_2 filter. - -If buf0 overlaps outBuf, there are two required conditions: - 1) (buf0 >= outBuf) - 2) (buf0 + size0 >= outBuf + FullOutputSize). - -Returns: - SZ_OK - SZ_ERROR_DATA - Data error -*/ - -int Bcj2_Decode( - const Byte* buf0, SizeT size0, - const Byte* buf1, SizeT size1, - const Byte* buf2, SizeT size2, - const Byte* buf3, SizeT size3, - Byte* outBuf, SizeT outSize); -} - -#endif +/* Bcj2.h -- Converter for x86 code (BCJ2) +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __BCJ2_H +#define __BCJ2_H + +#include "LzmaTypes.h" + +namespace crnlib { + +/* +Conditions: + outSize <= FullOutputSize, + where FullOutputSize is full size of output stream of x86_2 filter. + +If buf0 overlaps outBuf, there are two required conditions: + 1) (buf0 >= outBuf) + 2) (buf0 + size0 >= outBuf + FullOutputSize). + +Returns: + SZ_OK + SZ_ERROR_DATA - Data error +*/ + +int Bcj2_Decode( + const Byte* buf0, SizeT size0, + const Byte* buf1, SizeT size1, + const Byte* buf2, SizeT size2, + const Byte* buf3, SizeT size3, + Byte* outBuf, SizeT outSize); +} + +#endif diff --git a/3rdparty/lzma/Linux/Bra.cpp b/3rdparty/lzma/Linux/Bra.cpp index 825d9ec..38b7f34 100644 --- a/3rdparty/lzma/Linux/Bra.cpp +++ b/3rdparty/lzma/Linux/Bra.cpp @@ -1,124 +1,124 @@ -/* Bra.c -- Converters for RISC code -2008-10-04 : Igor Pavlov : Public domain */ +/* Bra.c -- Converters for RISC code +2008-10-04 : Igor Pavlov : Public domain */ -#include "Bra.h" - -namespace crnlib { - -SizeT ARM_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { - SizeT i; - if (size < 4) - return 0; - size -= 4; - ip += 8; - for (i = 0; i <= size; i += 4) { - if (data[i + 3] == 0xEB) { - UInt32 dest; - UInt32 src = ((UInt32)data[i + 2] << 16) | ((UInt32)data[i + 1] << 8) | (data[i + 0]); - src <<= 2; - if (encoding) - dest = ip + (UInt32)i + src; - else - dest = src - (ip + (UInt32)i); - dest >>= 2; - data[i + 2] = (Byte)(dest >> 16); - data[i + 1] = (Byte)(dest >> 8); - data[i + 0] = (Byte)dest; - } - } - return i; -} - -SizeT ARMT_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { - SizeT i; - if (size < 4) - return 0; - size -= 4; - ip += 4; - for (i = 0; i <= size; i += 2) { - if ((data[i + 1] & 0xF8) == 0xF0 && - (data[i + 3] & 0xF8) == 0xF8) { - UInt32 dest; - UInt32 src = - (((UInt32)data[i + 1] & 0x7) << 19) | - ((UInt32)data[i + 0] << 11) | - (((UInt32)data[i + 3] & 0x7) << 8) | - (data[i + 2]); - - src <<= 1; - if (encoding) - dest = ip + (UInt32)i + src; - else - dest = src - (ip + (UInt32)i); - dest >>= 1; - - data[i + 1] = (Byte)(0xF0 | ((dest >> 19) & 0x7)); - data[i + 0] = (Byte)(dest >> 11); - data[i + 3] = (Byte)(0xF8 | ((dest >> 8) & 0x7)); - data[i + 2] = (Byte)dest; - i += 2; - } - } - return i; -} - -SizeT PPC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { - SizeT i; - if (size < 4) - return 0; - size -= 4; - for (i = 0; i <= size; i += 4) { - if ((data[i] >> 2) == 0x12 && (data[i + 3] & 3) == 1) { - UInt32 src = ((UInt32)(data[i + 0] & 3) << 24) | - ((UInt32)data[i + 1] << 16) | - ((UInt32)data[i + 2] << 8) | - ((UInt32)data[i + 3] & (~3)); - - UInt32 dest; - if (encoding) - dest = ip + (UInt32)i + src; - else - dest = src - (ip + (UInt32)i); - data[i + 0] = (Byte)(0x48 | ((dest >> 24) & 0x3)); - data[i + 1] = (Byte)(dest >> 16); - data[i + 2] = (Byte)(dest >> 8); - data[i + 3] &= 0x3; - data[i + 3] |= dest; - } - } - return i; -} - -SizeT SPARC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { - UInt32 i; - if (size < 4) - return 0; - size -= 4; - for (i = 0; i <= size; i += 4) { - if (((data[i] == 0x40) && ((data[i + 1] & 0xC0) == 0x00)) || - ((data[i] == 0x7F) && ((data[i + 1] & 0xC0) == 0xC0))) { - UInt32 src = - ((UInt32)data[i + 0] << 24) | - ((UInt32)data[i + 1] << 16) | - ((UInt32)data[i + 2] << 8) | - ((UInt32)data[i + 3]); - UInt32 dest; - - src <<= 2; - if (encoding) - dest = ip + i + src; - else - dest = src - (ip + i); - dest >>= 2; - - dest = (((0 - ((dest >> 22) & 1)) << 22) & 0x3FFFFFFF) | (dest & 0x3FFFFF) | 0x40000000; - - data[i + 0] = (Byte)(dest >> 24); - data[i + 1] = (Byte)(dest >> 16); - data[i + 2] = (Byte)(dest >> 8); - data[i + 3] = (Byte)dest; - } - } - return i; -} -} +#include "Bra.h" + +namespace crnlib { + +SizeT ARM_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { + SizeT i; + if (size < 4) + return 0; + size -= 4; + ip += 8; + for (i = 0; i <= size; i += 4) { + if (data[i + 3] == 0xEB) { + UInt32 dest; + UInt32 src = ((UInt32)data[i + 2] << 16) | ((UInt32)data[i + 1] << 8) | (data[i + 0]); + src <<= 2; + if (encoding) + dest = ip + (UInt32)i + src; + else + dest = src - (ip + (UInt32)i); + dest >>= 2; + data[i + 2] = (Byte)(dest >> 16); + data[i + 1] = (Byte)(dest >> 8); + data[i + 0] = (Byte)dest; + } + } + return i; +} + +SizeT ARMT_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { + SizeT i; + if (size < 4) + return 0; + size -= 4; + ip += 4; + for (i = 0; i <= size; i += 2) { + if ((data[i + 1] & 0xF8) == 0xF0 && + (data[i + 3] & 0xF8) == 0xF8) { + UInt32 dest; + UInt32 src = + (((UInt32)data[i + 1] & 0x7) << 19) | + ((UInt32)data[i + 0] << 11) | + (((UInt32)data[i + 3] & 0x7) << 8) | + (data[i + 2]); + + src <<= 1; + if (encoding) + dest = ip + (UInt32)i + src; + else + dest = src - (ip + (UInt32)i); + dest >>= 1; + + data[i + 1] = (Byte)(0xF0 | ((dest >> 19) & 0x7)); + data[i + 0] = (Byte)(dest >> 11); + data[i + 3] = (Byte)(0xF8 | ((dest >> 8) & 0x7)); + data[i + 2] = (Byte)dest; + i += 2; + } + } + return i; +} + +SizeT PPC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { + SizeT i; + if (size < 4) + return 0; + size -= 4; + for (i = 0; i <= size; i += 4) { + if ((data[i] >> 2) == 0x12 && (data[i + 3] & 3) == 1) { + UInt32 src = ((UInt32)(data[i + 0] & 3) << 24) | + ((UInt32)data[i + 1] << 16) | + ((UInt32)data[i + 2] << 8) | + ((UInt32)data[i + 3] & (~3)); + + UInt32 dest; + if (encoding) + dest = ip + (UInt32)i + src; + else + dest = src - (ip + (UInt32)i); + data[i + 0] = (Byte)(0x48 | ((dest >> 24) & 0x3)); + data[i + 1] = (Byte)(dest >> 16); + data[i + 2] = (Byte)(dest >> 8); + data[i + 3] &= 0x3; + data[i + 3] |= dest; + } + } + return i; +} + +SizeT SPARC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { + UInt32 i; + if (size < 4) + return 0; + size -= 4; + for (i = 0; i <= size; i += 4) { + if (((data[i] == 0x40) && ((data[i + 1] & 0xC0) == 0x00)) || + ((data[i] == 0x7F) && ((data[i + 1] & 0xC0) == 0xC0))) { + UInt32 src = + ((UInt32)data[i + 0] << 24) | + ((UInt32)data[i + 1] << 16) | + ((UInt32)data[i + 2] << 8) | + ((UInt32)data[i + 3]); + UInt32 dest; + + src <<= 2; + if (encoding) + dest = ip + i + src; + else + dest = src - (ip + i); + dest >>= 2; + + dest = (((0 - ((dest >> 22) & 1)) << 22) & 0x3FFFFFFF) | (dest & 0x3FFFFF) | 0x40000000; + + data[i + 0] = (Byte)(dest >> 24); + data[i + 1] = (Byte)(dest >> 16); + data[i + 2] = (Byte)(dest >> 8); + data[i + 3] = (Byte)dest; + } + } + return i; +} +} diff --git a/3rdparty/lzma/Linux/Bra.h b/3rdparty/lzma/Linux/Bra.h index 3dccfe7..57e4c40 100644 --- a/3rdparty/lzma/Linux/Bra.h +++ b/3rdparty/lzma/Linux/Bra.h @@ -1,64 +1,64 @@ -/* Bra.h -- Branch converters for executables -2008-10-04 : Igor Pavlov : Public domain */ - -#ifndef __BRA_H -#define __BRA_H - -#include "LzmaTypes.h" - -namespace crnlib { - -/* -These functions convert relative addresses to absolute addresses -in CALL instructions to increase the compression ratio. - - In: - data - data buffer - size - size of data - ip - current virtual Instruction Pinter (IP) value - state - state variable for x86 converter - encoding - 0 (for decoding), 1 (for encoding) - - Out: - state - state variable for x86 converter - - Returns: - The number of processed bytes. If you call these functions with multiple calls, - you must start next call with first byte after block of processed bytes. - - Type Endian Alignment LookAhead - - x86 little 1 4 - ARMT little 2 2 - ARM little 4 0 - PPC big 4 0 - SPARC big 4 0 - IA64 little 16 0 - - size must be >= Alignment + LookAhead, if it's not last block. - If (size < Alignment + LookAhead), converter returns 0. - - Example: - - UInt32 ip = 0; - for () - { - ; size must be >= Alignment + LookAhead, if it's not last block - SizeT processed = Convert(data, size, ip, 1); - data += processed; - size -= processed; - ip += processed; - } -*/ - -#define x86_Convert_Init(state) \ - { state = 0; } -SizeT x86_Convert(Byte* data, SizeT size, UInt32 ip, UInt32* state, int encoding); -SizeT ARM_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); -SizeT ARMT_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); -SizeT PPC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); -SizeT SPARC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); -SizeT IA64_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); -} - -#endif +/* Bra.h -- Branch converters for executables +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __BRA_H +#define __BRA_H + +#include "LzmaTypes.h" + +namespace crnlib { + +/* +These functions convert relative addresses to absolute addresses +in CALL instructions to increase the compression ratio. + + In: + data - data buffer + size - size of data + ip - current virtual Instruction Pinter (IP) value + state - state variable for x86 converter + encoding - 0 (for decoding), 1 (for encoding) + + Out: + state - state variable for x86 converter + + Returns: + The number of processed bytes. If you call these functions with multiple calls, + you must start next call with first byte after block of processed bytes. + + Type Endian Alignment LookAhead + + x86 little 1 4 + ARMT little 2 2 + ARM little 4 0 + PPC big 4 0 + SPARC big 4 0 + IA64 little 16 0 + + size must be >= Alignment + LookAhead, if it's not last block. + If (size < Alignment + LookAhead), converter returns 0. + + Example: + + UInt32 ip = 0; + for () + { + ; size must be >= Alignment + LookAhead, if it's not last block + SizeT processed = Convert(data, size, ip, 1); + data += processed; + size -= processed; + ip += processed; + } +*/ + +#define x86_Convert_Init(state) \ + { state = 0; } +SizeT x86_Convert(Byte* data, SizeT size, UInt32 ip, UInt32* state, int encoding); +SizeT ARM_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); +SizeT ARMT_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); +SizeT PPC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); +SizeT SPARC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); +SizeT IA64_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); +} + +#endif diff --git a/3rdparty/lzma/Linux/Bra86.cpp b/3rdparty/lzma/Linux/Bra86.cpp index 78987ce..5a8a09e 100644 --- a/3rdparty/lzma/Linux/Bra86.cpp +++ b/3rdparty/lzma/Linux/Bra86.cpp @@ -1,79 +1,79 @@ -/* Bra86.c -- Converter for x86 code (BCJ) -2008-10-04 : Igor Pavlov : Public domain */ +/* Bra86.c -- Converter for x86 code (BCJ) +2008-10-04 : Igor Pavlov : Public domain */ -#include "Bra.h" - -namespace crnlib { - -#define Test86MSByte(b) ((b) == 0 || (b) == 0xFF) - -const Byte kMaskToAllowedStatus[8] = {1, 1, 1, 0, 1, 0, 0, 0}; -const Byte kMaskToBitNumber[8] = {0, 1, 2, 2, 3, 3, 3, 3}; - -SizeT x86_Convert(Byte* data, SizeT size, UInt32 ip, UInt32* state, int encoding) { - SizeT bufferPos = 0, prevPosT; - UInt32 prevMask = *state & 0x7; - if (size < 5) - return 0; - ip += 5; - prevPosT = (SizeT)0 - 1; - - for (;;) { - Byte* p = data + bufferPos; - Byte* limit = data + size - 4; - for (; p < limit; p++) - if ((*p & 0xFE) == 0xE8) - break; - bufferPos = (SizeT)(p - data); - if (p >= limit) - break; - prevPosT = bufferPos - prevPosT; - if (prevPosT > 3) - prevMask = 0; - else { - prevMask = (prevMask << ((int)prevPosT - 1)) & 0x7; - if (prevMask != 0) { - Byte b = p[4 - kMaskToBitNumber[prevMask]]; - if (!kMaskToAllowedStatus[prevMask] || Test86MSByte(b)) { - prevPosT = bufferPos; - prevMask = ((prevMask << 1) & 0x7) | 1; - bufferPos++; - continue; - } - } - } - prevPosT = bufferPos; - - if (Test86MSByte(p[4])) { - UInt32 src = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]); - UInt32 dest; - for (;;) { - Byte b; - int index; - if (encoding) - dest = (ip + (UInt32)bufferPos) + src; - else - dest = src - (ip + (UInt32)bufferPos); - if (prevMask == 0) - break; - index = kMaskToBitNumber[prevMask] * 8; - b = (Byte)(dest >> (24 - index)); - if (!Test86MSByte(b)) - break; - src = dest ^ ((1 << (32 - index)) - 1); - } - p[4] = (Byte)(~(((dest >> 24) & 1) - 1)); - p[3] = (Byte)(dest >> 16); - p[2] = (Byte)(dest >> 8); - p[1] = (Byte)dest; - bufferPos += 5; - } else { - prevMask = ((prevMask << 1) & 0x7) | 1; - bufferPos++; - } - } - prevPosT = bufferPos - prevPosT; - *state = ((prevPosT > 3) ? 0 : ((prevMask << ((int)prevPosT - 1)) & 0x7)); - return bufferPos; -} -} +#include "Bra.h" + +namespace crnlib { + +#define Test86MSByte(b) ((b) == 0 || (b) == 0xFF) + +const Byte kMaskToAllowedStatus[8] = {1, 1, 1, 0, 1, 0, 0, 0}; +const Byte kMaskToBitNumber[8] = {0, 1, 2, 2, 3, 3, 3, 3}; + +SizeT x86_Convert(Byte* data, SizeT size, UInt32 ip, UInt32* state, int encoding) { + SizeT bufferPos = 0, prevPosT; + UInt32 prevMask = *state & 0x7; + if (size < 5) + return 0; + ip += 5; + prevPosT = (SizeT)0 - 1; + + for (;;) { + Byte* p = data + bufferPos; + Byte* limit = data + size - 4; + for (; p < limit; p++) + if ((*p & 0xFE) == 0xE8) + break; + bufferPos = (SizeT)(p - data); + if (p >= limit) + break; + prevPosT = bufferPos - prevPosT; + if (prevPosT > 3) + prevMask = 0; + else { + prevMask = (prevMask << ((int)prevPosT - 1)) & 0x7; + if (prevMask != 0) { + Byte b = p[4 - kMaskToBitNumber[prevMask]]; + if (!kMaskToAllowedStatus[prevMask] || Test86MSByte(b)) { + prevPosT = bufferPos; + prevMask = ((prevMask << 1) & 0x7) | 1; + bufferPos++; + continue; + } + } + } + prevPosT = bufferPos; + + if (Test86MSByte(p[4])) { + UInt32 src = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]); + UInt32 dest; + for (;;) { + Byte b; + int index; + if (encoding) + dest = (ip + (UInt32)bufferPos) + src; + else + dest = src - (ip + (UInt32)bufferPos); + if (prevMask == 0) + break; + index = kMaskToBitNumber[prevMask] * 8; + b = (Byte)(dest >> (24 - index)); + if (!Test86MSByte(b)) + break; + src = dest ^ ((1 << (32 - index)) - 1); + } + p[4] = (Byte)(~(((dest >> 24) & 1) - 1)); + p[3] = (Byte)(dest >> 16); + p[2] = (Byte)(dest >> 8); + p[1] = (Byte)dest; + bufferPos += 5; + } else { + prevMask = ((prevMask << 1) & 0x7) | 1; + bufferPos++; + } + } + prevPosT = bufferPos - prevPosT; + *state = ((prevPosT > 3) ? 0 : ((prevMask << ((int)prevPosT - 1)) & 0x7)); + return bufferPos; +} +} diff --git a/3rdparty/lzma/Linux/BraIA64.cpp b/3rdparty/lzma/Linux/BraIA64.cpp index d7ebeb9..9ec4adb 100644 --- a/3rdparty/lzma/Linux/BraIA64.cpp +++ b/3rdparty/lzma/Linux/BraIA64.cpp @@ -1,65 +1,65 @@ -/* BraIA64.c -- Converter for IA-64 code -2008-10-04 : Igor Pavlov : Public domain */ +/* BraIA64.c -- Converter for IA-64 code +2008-10-04 : Igor Pavlov : Public domain */ -#include "Bra.h" - -namespace crnlib { - -static const Byte kBranchTable[32] = - { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 4, 4, 6, 6, 0, 0, 7, 7, - 4, 4, 0, 0, 4, 4, 0, 0}; - -SizeT IA64_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { - SizeT i; - if (size < 16) - return 0; - size -= 16; - for (i = 0; i <= size; i += 16) { - UInt32 instrTemplate = data[i] & 0x1F; - UInt32 mask = kBranchTable[instrTemplate]; - UInt32 bitPos = 5; - int slot; - for (slot = 0; slot < 3; slot++, bitPos += 41) { - UInt32 bytePos, bitRes; - UInt64 instruction, instNorm; - int j; - if (((mask >> slot) & 1) == 0) - continue; - bytePos = (bitPos >> 3); - bitRes = bitPos & 0x7; - instruction = 0; - for (j = 0; j < 6; j++) - instruction += (UInt64)data[i + j + bytePos] << (8 * j); - - instNorm = instruction >> bitRes; - if (((instNorm >> 37) & 0xF) == 0x5 && ((instNorm >> 9) & 0x7) == 0) { - UInt32 src = (UInt32)((instNorm >> 13) & 0xFFFFF); - UInt32 dest; - src |= ((UInt32)(instNorm >> 36) & 1) << 20; - - src <<= 4; - - if (encoding) - dest = ip + (UInt32)i + src; - else - dest = src - (ip + (UInt32)i); - - dest >>= 4; - - instNorm &= ~((UInt64)(0x8FFFFF) << 13); - instNorm |= ((UInt64)(dest & 0xFFFFF) << 13); - instNorm |= ((UInt64)(dest & 0x100000) << (36 - 20)); - - instruction &= (1 << bitRes) - 1; - instruction |= (instNorm << bitRes); - for (j = 0; j < 6; j++) - data[i + j + bytePos] = (Byte)(instruction >> (8 * j)); - } - } - } - return i; -} -} +#include "Bra.h" + +namespace crnlib { + +static const Byte kBranchTable[32] = + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 6, 6, 0, 0, 7, 7, + 4, 4, 0, 0, 4, 4, 0, 0}; + +SizeT IA64_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { + SizeT i; + if (size < 16) + return 0; + size -= 16; + for (i = 0; i <= size; i += 16) { + UInt32 instrTemplate = data[i] & 0x1F; + UInt32 mask = kBranchTable[instrTemplate]; + UInt32 bitPos = 5; + int slot; + for (slot = 0; slot < 3; slot++, bitPos += 41) { + UInt32 bytePos, bitRes; + UInt64 instruction, instNorm; + int j; + if (((mask >> slot) & 1) == 0) + continue; + bytePos = (bitPos >> 3); + bitRes = bitPos & 0x7; + instruction = 0; + for (j = 0; j < 6; j++) + instruction += (UInt64)data[i + j + bytePos] << (8 * j); + + instNorm = instruction >> bitRes; + if (((instNorm >> 37) & 0xF) == 0x5 && ((instNorm >> 9) & 0x7) == 0) { + UInt32 src = (UInt32)((instNorm >> 13) & 0xFFFFF); + UInt32 dest; + src |= ((UInt32)(instNorm >> 36) & 1) << 20; + + src <<= 4; + + if (encoding) + dest = ip + (UInt32)i + src; + else + dest = src - (ip + (UInt32)i); + + dest >>= 4; + + instNorm &= ~((UInt64)(0x8FFFFF) << 13); + instNorm |= ((UInt64)(dest & 0xFFFFF) << 13); + instNorm |= ((UInt64)(dest & 0x100000) << (36 - 20)); + + instruction &= (1 << bitRes) - 1; + instruction |= (instNorm << bitRes); + for (j = 0; j < 6; j++) + data[i + j + bytePos] = (Byte)(instruction >> (8 * j)); + } + } + } + return i; +} +} diff --git a/3rdparty/lzma/Linux/CpuArch.h b/3rdparty/lzma/Linux/CpuArch.h index 3b21f95..d993f40 100644 --- a/3rdparty/lzma/Linux/CpuArch.h +++ b/3rdparty/lzma/Linux/CpuArch.h @@ -1,72 +1,72 @@ -/* CpuArch.h -2008-08-05 -Igor Pavlov -Public domain */ - -#ifndef __CPUARCH_H -#define __CPUARCH_H - -/* -LITTLE_ENDIAN_UNALIGN means: - 1) CPU is LITTLE_ENDIAN - 2) it's allowed to make unaligned memory accesses -if LITTLE_ENDIAN_UNALIGN is not defined, it means that we don't know -about these properties of platform. -*/ - -#if defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || defined(__i386__) || defined(__x86_64__) -#define LITTLE_ENDIAN_UNALIGN -#endif - -#ifdef LITTLE_ENDIAN_UNALIGN - -#define GetUi16(p) (*(const UInt16*)(p)) -#define GetUi32(p) (*(const UInt32*)(p)) -#define GetUi64(p) (*(const UInt64*)(p)) -#define SetUi32(p, d) *(UInt32*)(p) = (d); - -#else - -#define GetUi16(p) (((const Byte*)(p))[0] | ((UInt16)((const Byte*)(p))[1] << 8)) - -#define GetUi32(p) ( \ - ((const Byte*)(p))[0] | \ - ((UInt32)((const Byte*)(p))[1] << 8) | \ - ((UInt32)((const Byte*)(p))[2] << 16) | \ - ((UInt32)((const Byte*)(p))[3] << 24)) - -#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte*)(p)) + 4) << 32)) - -#define SetUi32(p, d) \ - { \ - UInt32 _x_ = (d); \ - ((Byte*)(p))[0] = (Byte)_x_; \ - ((Byte*)(p))[1] = (Byte)(_x_ >> 8); \ - ((Byte*)(p))[2] = (Byte)(_x_ >> 16); \ - ((Byte*)(p))[3] = (Byte)(_x_ >> 24); \ - } - -#endif - -#if defined(LITTLE_ENDIAN_UNALIGN) && defined(_WIN64) && (_MSC_VER >= 1300) - -#pragma intrinsic(_byteswap_ulong) -#pragma intrinsic(_byteswap_uint64) -#define GetBe32(p) _byteswap_ulong(*(const UInt32*)(const Byte*)(p)) -#define GetBe64(p) _byteswap_uint64(*(const UInt64*)(const Byte*)(p)) - -#else - -#define GetBe32(p) ( \ - ((UInt32)((const Byte*)(p))[0] << 24) | \ - ((UInt32)((const Byte*)(p))[1] << 16) | \ - ((UInt32)((const Byte*)(p))[2] << 8) | \ - ((const Byte*)(p))[3]) - -#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte*)(p)) + 4)) - -#endif - -#define GetBe16(p) (((UInt16)((const Byte*)(p))[0] << 8) | ((const Byte*)(p))[1]) - -#endif +/* CpuArch.h +2008-08-05 +Igor Pavlov +Public domain */ + +#ifndef __CPUARCH_H +#define __CPUARCH_H + +/* +LITTLE_ENDIAN_UNALIGN means: + 1) CPU is LITTLE_ENDIAN + 2) it's allowed to make unaligned memory accesses +if LITTLE_ENDIAN_UNALIGN is not defined, it means that we don't know +about these properties of platform. +*/ + +#if defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || defined(__i386__) || defined(__x86_64__) +#define LITTLE_ENDIAN_UNALIGN +#endif + +#ifdef LITTLE_ENDIAN_UNALIGN + +#define GetUi16(p) (*(const UInt16*)(p)) +#define GetUi32(p) (*(const UInt32*)(p)) +#define GetUi64(p) (*(const UInt64*)(p)) +#define SetUi32(p, d) *(UInt32*)(p) = (d); + +#else + +#define GetUi16(p) (((const Byte*)(p))[0] | ((UInt16)((const Byte*)(p))[1] << 8)) + +#define GetUi32(p) ( \ + ((const Byte*)(p))[0] | \ + ((UInt32)((const Byte*)(p))[1] << 8) | \ + ((UInt32)((const Byte*)(p))[2] << 16) | \ + ((UInt32)((const Byte*)(p))[3] << 24)) + +#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte*)(p)) + 4) << 32)) + +#define SetUi32(p, d) \ + { \ + UInt32 _x_ = (d); \ + ((Byte*)(p))[0] = (Byte)_x_; \ + ((Byte*)(p))[1] = (Byte)(_x_ >> 8); \ + ((Byte*)(p))[2] = (Byte)(_x_ >> 16); \ + ((Byte*)(p))[3] = (Byte)(_x_ >> 24); \ + } + +#endif + +#if defined(LITTLE_ENDIAN_UNALIGN) && defined(_WIN64) && (_MSC_VER >= 1300) + +#pragma intrinsic(_byteswap_ulong) +#pragma intrinsic(_byteswap_uint64) +#define GetBe32(p) _byteswap_ulong(*(const UInt32*)(const Byte*)(p)) +#define GetBe64(p) _byteswap_uint64(*(const UInt64*)(const Byte*)(p)) + +#else + +#define GetBe32(p) ( \ + ((UInt32)((const Byte*)(p))[0] << 24) | \ + ((UInt32)((const Byte*)(p))[1] << 16) | \ + ((UInt32)((const Byte*)(p))[2] << 8) | \ + ((const Byte*)(p))[3]) + +#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte*)(p)) + 4)) + +#endif + +#define GetBe16(p) (((UInt16)((const Byte*)(p))[0] << 8) | ((const Byte*)(p))[1]) + +#endif diff --git a/3rdparty/lzma/Linux/LzFind.cpp b/3rdparty/lzma/Linux/LzFind.cpp index 5a19c35..9954a33 100644 --- a/3rdparty/lzma/Linux/LzFind.cpp +++ b/3rdparty/lzma/Linux/LzFind.cpp @@ -1,682 +1,682 @@ -/* LzFind.c -- Match finder for LZ algorithms -2008-10-04 : Igor Pavlov : Public domain */ - -#include - -#include "LzFind.h" -#include "LzHash.h" - -namespace crnlib { - -#define kEmptyHashValue 0 -#define kMaxValForNormalize ((UInt32)0xFFFFFFFF) -#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ -#define kNormalizeMask (~(kNormalizeStepMin - 1)) -#define kMaxHistorySize ((UInt32)3 << 30) - -#define kStartMaxLen 3 - -static void LzInWindow_Free(CMatchFinder* p, ISzAlloc* alloc) { - if (!p->directInput) { - alloc->Free(alloc, p->bufferBase); - p->bufferBase = 0; - } -} - -/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ - -static int LzInWindow_Create(CMatchFinder* p, UInt32 keepSizeReserv, ISzAlloc* alloc) { - UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; - if (p->directInput) { - p->blockSize = blockSize; - return 1; - } - if (p->bufferBase == 0 || p->blockSize != blockSize) { - LzInWindow_Free(p, alloc); - p->blockSize = blockSize; - p->bufferBase = (Byte*)alloc->Alloc(alloc, (size_t)blockSize); - } - return (p->bufferBase != 0); -} - -Byte* MatchFinder_GetPointerToCurrentPos(CMatchFinder* p) { - return p->buffer; -} -Byte MatchFinder_GetIndexByte(CMatchFinder* p, Int32 index) { - return p->buffer[index]; -} - -UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder* p) { - return p->streamPos - p->pos; -} - -void MatchFinder_ReduceOffsets(CMatchFinder* p, UInt32 subValue) { - p->posLimit -= subValue; - p->pos -= subValue; - p->streamPos -= subValue; -} - -static void MatchFinder_ReadBlock(CMatchFinder* p) { - if (p->streamEndWasReached || p->result != SZ_OK) - return; - for (;;) { - Byte* dest = p->buffer + (p->streamPos - p->pos); - size_t size = (p->bufferBase + p->blockSize - dest); - if (size == 0) - return; - p->result = p->stream->Read(p->stream, dest, &size); - if (p->result != SZ_OK) - return; - if (size == 0) { - p->streamEndWasReached = 1; - return; - } - p->streamPos += (UInt32)size; - if (p->streamPos - p->pos > p->keepSizeAfter) - return; - } -} - -void MatchFinder_MoveBlock(CMatchFinder* p) { - memmove(p->bufferBase, - p->buffer - p->keepSizeBefore, - (size_t)(p->streamPos - p->pos + p->keepSizeBefore)); - p->buffer = p->bufferBase + p->keepSizeBefore; -} - -int MatchFinder_NeedMove(CMatchFinder* p) { - /* if (p->streamEndWasReached) return 0; */ - return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); -} - -void MatchFinder_ReadIfRequired(CMatchFinder* p) { - if (p->streamEndWasReached) - return; - if (p->keepSizeAfter >= p->streamPos - p->pos) - MatchFinder_ReadBlock(p); -} - -static void MatchFinder_CheckAndMoveAndRead(CMatchFinder* p) { - if (MatchFinder_NeedMove(p)) - MatchFinder_MoveBlock(p); - MatchFinder_ReadBlock(p); -} - -static void MatchFinder_SetDefaultSettings(CMatchFinder* p) { - p->cutValue = 32; - p->btMode = 1; - p->numHashBytes = 4; - /* p->skipModeBits = 0; */ - p->directInput = 0; - p->bigHash = 0; -} - -#define kCrcPoly 0xEDB88320 - -void MatchFinder_Construct(CMatchFinder* p) { - UInt32 i; - p->bufferBase = 0; - p->directInput = 0; - p->hash = 0; - MatchFinder_SetDefaultSettings(p); - - for (i = 0; i < 256; i++) { - UInt32 r = i; - int j; - for (j = 0; j < 8; j++) - r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); - p->crc[i] = r; - } -} - -static void MatchFinder_FreeThisClassMemory(CMatchFinder* p, ISzAlloc* alloc) { - alloc->Free(alloc, p->hash); - p->hash = 0; -} - -void MatchFinder_Free(CMatchFinder* p, ISzAlloc* alloc) { - MatchFinder_FreeThisClassMemory(p, alloc); - LzInWindow_Free(p, alloc); -} - -static CLzRef* AllocRefs(UInt32 num, ISzAlloc* alloc) { - size_t sizeInBytes = (size_t)num * sizeof(CLzRef); - if (sizeInBytes / sizeof(CLzRef) != num) - return 0; - return (CLzRef*)alloc->Alloc(alloc, sizeInBytes); -} - -int MatchFinder_Create(CMatchFinder* p, UInt32 historySize, - UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, - ISzAlloc* alloc) { - UInt32 sizeReserv; - if (historySize > kMaxHistorySize) { - MatchFinder_Free(p, alloc); - return 0; - } - sizeReserv = historySize >> 1; - if (historySize > ((UInt32)2 << 30)) - sizeReserv = historySize >> 2; - sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); - - p->keepSizeBefore = historySize + keepAddBufferBefore + 1; - p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; - /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ - if (LzInWindow_Create(p, sizeReserv, alloc)) { - UInt32 newCyclicBufferSize = (historySize /* >> p->skipModeBits */) + 1; - UInt32 hs; - p->matchMaxLen = matchMaxLen; - { - p->fixedHashSize = 0; - if (p->numHashBytes == 2) - hs = (1 << 16) - 1; - else { - hs = historySize - 1; - hs |= (hs >> 1); - hs |= (hs >> 2); - hs |= (hs >> 4); - hs |= (hs >> 8); - hs >>= 1; - /* hs >>= p->skipModeBits; */ - hs |= 0xFFFF; /* don't change it! It's required for Deflate */ - if (hs > (1 << 24)) { - if (p->numHashBytes == 3) - hs = (1 << 24) - 1; - else - hs >>= 1; - } - } - p->hashMask = hs; - hs++; - if (p->numHashBytes > 2) - p->fixedHashSize += kHash2Size; - if (p->numHashBytes > 3) - p->fixedHashSize += kHash3Size; - if (p->numHashBytes > 4) - p->fixedHashSize += kHash4Size; - hs += p->fixedHashSize; - } - - { - UInt32 prevSize = p->hashSizeSum + p->numSons; - UInt32 newSize; - p->historySize = historySize; - p->hashSizeSum = hs; - p->cyclicBufferSize = newCyclicBufferSize; - p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize); - newSize = p->hashSizeSum + p->numSons; - if (p->hash != 0 && prevSize == newSize) - return 1; - MatchFinder_FreeThisClassMemory(p, alloc); - p->hash = AllocRefs(newSize, alloc); - if (p->hash != 0) { - p->son = p->hash + p->hashSizeSum; - return 1; - } - } - } - MatchFinder_Free(p, alloc); - return 0; -} - -static void MatchFinder_SetLimits(CMatchFinder* p) { - UInt32 limit = kMaxValForNormalize - p->pos; - UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos; - if (limit2 < limit) - limit = limit2; - limit2 = p->streamPos - p->pos; - if (limit2 <= p->keepSizeAfter) { - if (limit2 > 0) - limit2 = 1; - } else - limit2 -= p->keepSizeAfter; - if (limit2 < limit) - limit = limit2; - { - UInt32 lenLimit = p->streamPos - p->pos; - if (lenLimit > p->matchMaxLen) - lenLimit = p->matchMaxLen; - p->lenLimit = lenLimit; - } - p->posLimit = p->pos + limit; -} - -void MatchFinder_Init(CMatchFinder* p) { - UInt32 i; - for (i = 0; i < p->hashSizeSum; i++) - p->hash[i] = kEmptyHashValue; - p->cyclicBufferPos = 0; - p->buffer = p->bufferBase; - p->pos = p->streamPos = p->cyclicBufferSize; - p->result = SZ_OK; - p->streamEndWasReached = 0; - MatchFinder_ReadBlock(p); - MatchFinder_SetLimits(p); -} - -static UInt32 MatchFinder_GetSubValue(CMatchFinder* p) { - return (p->pos - p->historySize - 1) & kNormalizeMask; -} - -void MatchFinder_Normalize3(UInt32 subValue, CLzRef* items, UInt32 numItems) { - UInt32 i; - for (i = 0; i < numItems; i++) { - UInt32 value = items[i]; - if (value <= subValue) - value = kEmptyHashValue; - else - value -= subValue; - items[i] = value; - } -} - -static void MatchFinder_Normalize(CMatchFinder* p) { - UInt32 subValue = MatchFinder_GetSubValue(p); - MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons); - MatchFinder_ReduceOffsets(p, subValue); -} - -static void MatchFinder_CheckLimits(CMatchFinder* p) { - if (p->pos == kMaxValForNormalize) - MatchFinder_Normalize(p); - if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) - MatchFinder_CheckAndMoveAndRead(p); - if (p->cyclicBufferPos == p->cyclicBufferSize) - p->cyclicBufferPos = 0; - MatchFinder_SetLimits(p); -} - -static UInt32* Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte* cur, CLzRef* son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, - UInt32* distances, UInt32 maxLen) { - son[_cyclicBufferPos] = curMatch; - for (;;) { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) - return distances; - { - const Byte* pb = cur - delta; - curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; - if (pb[maxLen] == cur[maxLen] && *pb == *cur) { - UInt32 len = 0; - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - if (maxLen < len) { - *distances++ = maxLen = len; - *distances++ = delta - 1; - if (len == lenLimit) - return distances; - } - } - } - } -} - -UInt32* GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte* cur, CLzRef* son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, - UInt32* distances, UInt32 maxLen) { - CLzRef* ptr0 = son + (_cyclicBufferPos << 1) + 1; - CLzRef* ptr1 = son + (_cyclicBufferPos << 1); - UInt32 len0 = 0, len1 = 0; - for (;;) { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) { - *ptr0 = *ptr1 = kEmptyHashValue; - return distances; - } - { - CLzRef* pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); - const Byte* pb = cur - delta; - UInt32 len = (len0 < len1 ? len0 : len1); - if (pb[len] == cur[len]) { - if (++len != lenLimit && pb[len] == cur[len]) - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - if (maxLen < len) { - *distances++ = maxLen = len; - *distances++ = delta - 1; - if (len == lenLimit) { - *ptr1 = pair[0]; - *ptr0 = pair[1]; - return distances; - } - } - } - if (pb[len] < cur[len]) { - *ptr1 = curMatch; - ptr1 = pair + 1; - curMatch = *ptr1; - len1 = len; - } else { - *ptr0 = curMatch; - ptr0 = pair; - curMatch = *ptr0; - len0 = len; - } - } - } -} - -static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte* cur, CLzRef* son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) { - CLzRef* ptr0 = son + (_cyclicBufferPos << 1) + 1; - CLzRef* ptr1 = son + (_cyclicBufferPos << 1); - UInt32 len0 = 0, len1 = 0; - for (;;) { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) { - *ptr0 = *ptr1 = kEmptyHashValue; - return; - } - { - CLzRef* pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); - const Byte* pb = cur - delta; - UInt32 len = (len0 < len1 ? len0 : len1); - if (pb[len] == cur[len]) { - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - { - if (len == lenLimit) { - *ptr1 = pair[0]; - *ptr0 = pair[1]; - return; - } - } - } - if (pb[len] < cur[len]) { - *ptr1 = curMatch; - ptr1 = pair + 1; - curMatch = *ptr1; - len1 = len; - } else { - *ptr0 = curMatch; - ptr0 = pair; - curMatch = *ptr0; - len0 = len; - } - } - } -} - -#define MOVE_POS \ - ++p->cyclicBufferPos; \ - p->buffer++; \ - if (++p->pos == p->posLimit) \ - MatchFinder_CheckLimits(p); - -#define MOVE_POS_RET MOVE_POS return offset; - -static void MatchFinder_MovePos(CMatchFinder* p) { - MOVE_POS; -} - -#define GET_MATCHES_HEADER2(minLen, ret_op) \ - UInt32 lenLimit; \ - UInt32 hashValue; \ - const Byte* cur; \ - UInt32 curMatch; \ - lenLimit = p->lenLimit; \ - { \ - if (lenLimit < minLen) { \ - MatchFinder_MovePos(p); \ - ret_op; \ - } \ - } \ - cur = p->buffer; - -#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) -#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) - -#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue - -#define GET_MATCHES_FOOTER(offset, maxLen) \ - offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \ - distances + offset, maxLen) - \ - distances); \ - MOVE_POS_RET; - -#define SKIP_FOOTER \ - SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); \ - MOVE_POS; - -static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { - UInt32 offset; - GET_MATCHES_HEADER(2) - HASH2_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - offset = 0; - GET_MATCHES_FOOTER(offset, 1) -} - -UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { - UInt32 offset; - GET_MATCHES_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - offset = 0; - GET_MATCHES_FOOTER(offset, 2) -} - -static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { - UInt32 hash2Value, delta2, maxLen, offset; - GET_MATCHES_HEADER(3) - - HASH3_CALC; - - delta2 = p->pos - p->hash[hash2Value]; - curMatch = p->hash[kFix3HashSize + hashValue]; - - p->hash[hash2Value] = - p->hash[kFix3HashSize + hashValue] = p->pos; - - maxLen = 2; - offset = 0; - if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) { - for (; maxLen != lenLimit; maxLen++) - if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) - break; - distances[0] = maxLen; - distances[1] = delta2 - 1; - offset = 2; - if (maxLen == lenLimit) { - SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); - MOVE_POS_RET; - } - } - GET_MATCHES_FOOTER(offset, maxLen) -} - -static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { - UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset; - GET_MATCHES_HEADER(4) - - HASH4_CALC; - - delta2 = p->pos - p->hash[hash2Value]; - delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; - curMatch = p->hash[kFix4HashSize + hashValue]; - - p->hash[hash2Value] = - p->hash[kFix3HashSize + hash3Value] = - p->hash[kFix4HashSize + hashValue] = p->pos; - - maxLen = 1; - offset = 0; - if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) { - distances[0] = maxLen = 2; - distances[1] = delta2 - 1; - offset = 2; - } - if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) { - maxLen = 3; - distances[offset + 1] = delta3 - 1; - offset += 2; - delta2 = delta3; - } - if (offset != 0) { - for (; maxLen != lenLimit; maxLen++) - if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) - break; - distances[offset - 2] = maxLen; - if (maxLen == lenLimit) { - SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); - MOVE_POS_RET; - } - } - if (maxLen < 3) - maxLen = 3; - GET_MATCHES_FOOTER(offset, maxLen) -} - -static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { - UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset; - GET_MATCHES_HEADER(4) - - HASH4_CALC; - - delta2 = p->pos - p->hash[hash2Value]; - delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; - curMatch = p->hash[kFix4HashSize + hashValue]; - - p->hash[hash2Value] = - p->hash[kFix3HashSize + hash3Value] = - p->hash[kFix4HashSize + hashValue] = p->pos; - - maxLen = 1; - offset = 0; - if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) { - distances[0] = maxLen = 2; - distances[1] = delta2 - 1; - offset = 2; - } - if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) { - maxLen = 3; - distances[offset + 1] = delta3 - 1; - offset += 2; - delta2 = delta3; - } - if (offset != 0) { - for (; maxLen != lenLimit; maxLen++) - if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) - break; - distances[offset - 2] = maxLen; - if (maxLen == lenLimit) { - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS_RET; - } - } - if (maxLen < 3) - maxLen = 3; - offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances + offset, maxLen) - - (distances)); - MOVE_POS_RET -} - -UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { - UInt32 offset; - GET_MATCHES_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances, 2) - - (distances)); - MOVE_POS_RET -} - -static void Bt2_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { - do { - SKIP_HEADER(2) - HASH2_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - SKIP_FOOTER - } while (--num != 0); -} - -void Bt3Zip_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { - do { - SKIP_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - SKIP_FOOTER - } while (--num != 0); -} - -static void Bt3_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { - do { - UInt32 hash2Value; - SKIP_HEADER(3) - HASH3_CALC; - curMatch = p->hash[kFix3HashSize + hashValue]; - p->hash[hash2Value] = - p->hash[kFix3HashSize + hashValue] = p->pos; - SKIP_FOOTER - } while (--num != 0); -} - -static void Bt4_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { - do { - UInt32 hash2Value, hash3Value; - SKIP_HEADER(4) - HASH4_CALC; - curMatch = p->hash[kFix4HashSize + hashValue]; - p->hash[hash2Value] = - p->hash[kFix3HashSize + hash3Value] = p->pos; - p->hash[kFix4HashSize + hashValue] = p->pos; - SKIP_FOOTER - } while (--num != 0); -} - -static void Hc4_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { - do { - UInt32 hash2Value, hash3Value; - SKIP_HEADER(4) - HASH4_CALC; - curMatch = p->hash[kFix4HashSize + hashValue]; - p->hash[hash2Value] = - p->hash[kFix3HashSize + hash3Value] = - p->hash[kFix4HashSize + hashValue] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } while (--num != 0); -} - -void Hc3Zip_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { - do { - SKIP_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } while (--num != 0); -} - -void MatchFinder_CreateVTable(CMatchFinder* p, IMatchFinder* vTable) { - vTable->Init = (Mf_Init_Func)MatchFinder_Init; - vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte; - vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; - vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; - if (!p->btMode) { - vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; - } else if (p->numHashBytes == 2) { - vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; - } else if (p->numHashBytes == 3) { - vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; - } else { - vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; - } -} -} +/* LzFind.c -- Match finder for LZ algorithms +2008-10-04 : Igor Pavlov : Public domain */ + +#include + +#include "LzFind.h" +#include "LzHash.h" + +namespace crnlib { + +#define kEmptyHashValue 0 +#define kMaxValForNormalize ((UInt32)0xFFFFFFFF) +#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ +#define kNormalizeMask (~(kNormalizeStepMin - 1)) +#define kMaxHistorySize ((UInt32)3 << 30) + +#define kStartMaxLen 3 + +static void LzInWindow_Free(CMatchFinder* p, ISzAlloc* alloc) { + if (!p->directInput) { + alloc->Free(alloc, p->bufferBase); + p->bufferBase = 0; + } +} + +/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ + +static int LzInWindow_Create(CMatchFinder* p, UInt32 keepSizeReserv, ISzAlloc* alloc) { + UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; + if (p->directInput) { + p->blockSize = blockSize; + return 1; + } + if (p->bufferBase == 0 || p->blockSize != blockSize) { + LzInWindow_Free(p, alloc); + p->blockSize = blockSize; + p->bufferBase = (Byte*)alloc->Alloc(alloc, (size_t)blockSize); + } + return (p->bufferBase != 0); +} + +Byte* MatchFinder_GetPointerToCurrentPos(CMatchFinder* p) { + return p->buffer; +} +Byte MatchFinder_GetIndexByte(CMatchFinder* p, Int32 index) { + return p->buffer[index]; +} + +UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder* p) { + return p->streamPos - p->pos; +} + +void MatchFinder_ReduceOffsets(CMatchFinder* p, UInt32 subValue) { + p->posLimit -= subValue; + p->pos -= subValue; + p->streamPos -= subValue; +} + +static void MatchFinder_ReadBlock(CMatchFinder* p) { + if (p->streamEndWasReached || p->result != SZ_OK) + return; + for (;;) { + Byte* dest = p->buffer + (p->streamPos - p->pos); + size_t size = (p->bufferBase + p->blockSize - dest); + if (size == 0) + return; + p->result = p->stream->Read(p->stream, dest, &size); + if (p->result != SZ_OK) + return; + if (size == 0) { + p->streamEndWasReached = 1; + return; + } + p->streamPos += (UInt32)size; + if (p->streamPos - p->pos > p->keepSizeAfter) + return; + } +} + +void MatchFinder_MoveBlock(CMatchFinder* p) { + memmove(p->bufferBase, + p->buffer - p->keepSizeBefore, + (size_t)(p->streamPos - p->pos + p->keepSizeBefore)); + p->buffer = p->bufferBase + p->keepSizeBefore; +} + +int MatchFinder_NeedMove(CMatchFinder* p) { + /* if (p->streamEndWasReached) return 0; */ + return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); +} + +void MatchFinder_ReadIfRequired(CMatchFinder* p) { + if (p->streamEndWasReached) + return; + if (p->keepSizeAfter >= p->streamPos - p->pos) + MatchFinder_ReadBlock(p); +} + +static void MatchFinder_CheckAndMoveAndRead(CMatchFinder* p) { + if (MatchFinder_NeedMove(p)) + MatchFinder_MoveBlock(p); + MatchFinder_ReadBlock(p); +} + +static void MatchFinder_SetDefaultSettings(CMatchFinder* p) { + p->cutValue = 32; + p->btMode = 1; + p->numHashBytes = 4; + /* p->skipModeBits = 0; */ + p->directInput = 0; + p->bigHash = 0; +} + +#define kCrcPoly 0xEDB88320 + +void MatchFinder_Construct(CMatchFinder* p) { + UInt32 i; + p->bufferBase = 0; + p->directInput = 0; + p->hash = 0; + MatchFinder_SetDefaultSettings(p); + + for (i = 0; i < 256; i++) { + UInt32 r = i; + int j; + for (j = 0; j < 8; j++) + r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); + p->crc[i] = r; + } +} + +static void MatchFinder_FreeThisClassMemory(CMatchFinder* p, ISzAlloc* alloc) { + alloc->Free(alloc, p->hash); + p->hash = 0; +} + +void MatchFinder_Free(CMatchFinder* p, ISzAlloc* alloc) { + MatchFinder_FreeThisClassMemory(p, alloc); + LzInWindow_Free(p, alloc); +} + +static CLzRef* AllocRefs(UInt32 num, ISzAlloc* alloc) { + size_t sizeInBytes = (size_t)num * sizeof(CLzRef); + if (sizeInBytes / sizeof(CLzRef) != num) + return 0; + return (CLzRef*)alloc->Alloc(alloc, sizeInBytes); +} + +int MatchFinder_Create(CMatchFinder* p, UInt32 historySize, + UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, + ISzAlloc* alloc) { + UInt32 sizeReserv; + if (historySize > kMaxHistorySize) { + MatchFinder_Free(p, alloc); + return 0; + } + sizeReserv = historySize >> 1; + if (historySize > ((UInt32)2 << 30)) + sizeReserv = historySize >> 2; + sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); + + p->keepSizeBefore = historySize + keepAddBufferBefore + 1; + p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; + /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ + if (LzInWindow_Create(p, sizeReserv, alloc)) { + UInt32 newCyclicBufferSize = (historySize /* >> p->skipModeBits */) + 1; + UInt32 hs; + p->matchMaxLen = matchMaxLen; + { + p->fixedHashSize = 0; + if (p->numHashBytes == 2) + hs = (1 << 16) - 1; + else { + hs = historySize - 1; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + hs >>= 1; + /* hs >>= p->skipModeBits; */ + hs |= 0xFFFF; /* don't change it! It's required for Deflate */ + if (hs > (1 << 24)) { + if (p->numHashBytes == 3) + hs = (1 << 24) - 1; + else + hs >>= 1; + } + } + p->hashMask = hs; + hs++; + if (p->numHashBytes > 2) + p->fixedHashSize += kHash2Size; + if (p->numHashBytes > 3) + p->fixedHashSize += kHash3Size; + if (p->numHashBytes > 4) + p->fixedHashSize += kHash4Size; + hs += p->fixedHashSize; + } + + { + UInt32 prevSize = p->hashSizeSum + p->numSons; + UInt32 newSize; + p->historySize = historySize; + p->hashSizeSum = hs; + p->cyclicBufferSize = newCyclicBufferSize; + p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize); + newSize = p->hashSizeSum + p->numSons; + if (p->hash != 0 && prevSize == newSize) + return 1; + MatchFinder_FreeThisClassMemory(p, alloc); + p->hash = AllocRefs(newSize, alloc); + if (p->hash != 0) { + p->son = p->hash + p->hashSizeSum; + return 1; + } + } + } + MatchFinder_Free(p, alloc); + return 0; +} + +static void MatchFinder_SetLimits(CMatchFinder* p) { + UInt32 limit = kMaxValForNormalize - p->pos; + UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos; + if (limit2 < limit) + limit = limit2; + limit2 = p->streamPos - p->pos; + if (limit2 <= p->keepSizeAfter) { + if (limit2 > 0) + limit2 = 1; + } else + limit2 -= p->keepSizeAfter; + if (limit2 < limit) + limit = limit2; + { + UInt32 lenLimit = p->streamPos - p->pos; + if (lenLimit > p->matchMaxLen) + lenLimit = p->matchMaxLen; + p->lenLimit = lenLimit; + } + p->posLimit = p->pos + limit; +} + +void MatchFinder_Init(CMatchFinder* p) { + UInt32 i; + for (i = 0; i < p->hashSizeSum; i++) + p->hash[i] = kEmptyHashValue; + p->cyclicBufferPos = 0; + p->buffer = p->bufferBase; + p->pos = p->streamPos = p->cyclicBufferSize; + p->result = SZ_OK; + p->streamEndWasReached = 0; + MatchFinder_ReadBlock(p); + MatchFinder_SetLimits(p); +} + +static UInt32 MatchFinder_GetSubValue(CMatchFinder* p) { + return (p->pos - p->historySize - 1) & kNormalizeMask; +} + +void MatchFinder_Normalize3(UInt32 subValue, CLzRef* items, UInt32 numItems) { + UInt32 i; + for (i = 0; i < numItems; i++) { + UInt32 value = items[i]; + if (value <= subValue) + value = kEmptyHashValue; + else + value -= subValue; + items[i] = value; + } +} + +static void MatchFinder_Normalize(CMatchFinder* p) { + UInt32 subValue = MatchFinder_GetSubValue(p); + MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons); + MatchFinder_ReduceOffsets(p, subValue); +} + +static void MatchFinder_CheckLimits(CMatchFinder* p) { + if (p->pos == kMaxValForNormalize) + MatchFinder_Normalize(p); + if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) + MatchFinder_CheckAndMoveAndRead(p); + if (p->cyclicBufferPos == p->cyclicBufferSize) + p->cyclicBufferPos = 0; + MatchFinder_SetLimits(p); +} + +static UInt32* Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte* cur, CLzRef* son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32* distances, UInt32 maxLen) { + son[_cyclicBufferPos] = curMatch; + for (;;) { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + return distances; + { + const Byte* pb = cur - delta; + curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + if (pb[maxLen] == cur[maxLen] && *pb == *cur) { + UInt32 len = 0; + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) { + *distances++ = maxLen = len; + *distances++ = delta - 1; + if (len == lenLimit) + return distances; + } + } + } + } +} + +UInt32* GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte* cur, CLzRef* son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32* distances, UInt32 maxLen) { + CLzRef* ptr0 = son + (_cyclicBufferPos << 1) + 1; + CLzRef* ptr1 = son + (_cyclicBufferPos << 1); + UInt32 len0 = 0, len1 = 0; + for (;;) { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) { + *ptr0 = *ptr1 = kEmptyHashValue; + return distances; + } + { + CLzRef* pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + const Byte* pb = cur - delta; + UInt32 len = (len0 < len1 ? len0 : len1); + if (pb[len] == cur[len]) { + if (++len != lenLimit && pb[len] == cur[len]) + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) { + *distances++ = maxLen = len; + *distances++ = delta - 1; + if (len == lenLimit) { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return distances; + } + } + } + if (pb[len] < cur[len]) { + *ptr1 = curMatch; + ptr1 = pair + 1; + curMatch = *ptr1; + len1 = len; + } else { + *ptr0 = curMatch; + ptr0 = pair; + curMatch = *ptr0; + len0 = len; + } + } + } +} + +static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte* cur, CLzRef* son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) { + CLzRef* ptr0 = son + (_cyclicBufferPos << 1) + 1; + CLzRef* ptr1 = son + (_cyclicBufferPos << 1); + UInt32 len0 = 0, len1 = 0; + for (;;) { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) { + *ptr0 = *ptr1 = kEmptyHashValue; + return; + } + { + CLzRef* pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + const Byte* pb = cur - delta; + UInt32 len = (len0 < len1 ? len0 : len1); + if (pb[len] == cur[len]) { + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + { + if (len == lenLimit) { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return; + } + } + } + if (pb[len] < cur[len]) { + *ptr1 = curMatch; + ptr1 = pair + 1; + curMatch = *ptr1; + len1 = len; + } else { + *ptr0 = curMatch; + ptr0 = pair; + curMatch = *ptr0; + len0 = len; + } + } + } +} + +#define MOVE_POS \ + ++p->cyclicBufferPos; \ + p->buffer++; \ + if (++p->pos == p->posLimit) \ + MatchFinder_CheckLimits(p); + +#define MOVE_POS_RET MOVE_POS return offset; + +static void MatchFinder_MovePos(CMatchFinder* p) { + MOVE_POS; +} + +#define GET_MATCHES_HEADER2(minLen, ret_op) \ + UInt32 lenLimit; \ + UInt32 hashValue; \ + const Byte* cur; \ + UInt32 curMatch; \ + lenLimit = p->lenLimit; \ + { \ + if (lenLimit < minLen) { \ + MatchFinder_MovePos(p); \ + ret_op; \ + } \ + } \ + cur = p->buffer; + +#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) +#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) + +#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue + +#define GET_MATCHES_FOOTER(offset, maxLen) \ + offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \ + distances + offset, maxLen) - \ + distances); \ + MOVE_POS_RET; + +#define SKIP_FOOTER \ + SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); \ + MOVE_POS; + +static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { + UInt32 offset; + GET_MATCHES_HEADER(2) + HASH2_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + offset = 0; + GET_MATCHES_FOOTER(offset, 1) +} + +UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { + UInt32 offset; + GET_MATCHES_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + offset = 0; + GET_MATCHES_FOOTER(offset, 2) +} + +static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { + UInt32 hash2Value, delta2, maxLen, offset; + GET_MATCHES_HEADER(3) + + HASH3_CALC; + + delta2 = p->pos - p->hash[hash2Value]; + curMatch = p->hash[kFix3HashSize + hashValue]; + + p->hash[hash2Value] = + p->hash[kFix3HashSize + hashValue] = p->pos; + + maxLen = 2; + offset = 0; + if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) { + for (; maxLen != lenLimit; maxLen++) + if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) + break; + distances[0] = maxLen; + distances[1] = delta2 - 1; + offset = 2; + if (maxLen == lenLimit) { + SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); + MOVE_POS_RET; + } + } + GET_MATCHES_FOOTER(offset, maxLen) +} + +static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { + UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset; + GET_MATCHES_HEADER(4) + + HASH4_CALC; + + delta2 = p->pos - p->hash[hash2Value]; + delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; + curMatch = p->hash[kFix4HashSize + hashValue]; + + p->hash[hash2Value] = + p->hash[kFix3HashSize + hash3Value] = + p->hash[kFix4HashSize + hashValue] = p->pos; + + maxLen = 1; + offset = 0; + if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) { + distances[0] = maxLen = 2; + distances[1] = delta2 - 1; + offset = 2; + } + if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) { + maxLen = 3; + distances[offset + 1] = delta3 - 1; + offset += 2; + delta2 = delta3; + } + if (offset != 0) { + for (; maxLen != lenLimit; maxLen++) + if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) + break; + distances[offset - 2] = maxLen; + if (maxLen == lenLimit) { + SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); + MOVE_POS_RET; + } + } + if (maxLen < 3) + maxLen = 3; + GET_MATCHES_FOOTER(offset, maxLen) +} + +static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { + UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset; + GET_MATCHES_HEADER(4) + + HASH4_CALC; + + delta2 = p->pos - p->hash[hash2Value]; + delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; + curMatch = p->hash[kFix4HashSize + hashValue]; + + p->hash[hash2Value] = + p->hash[kFix3HashSize + hash3Value] = + p->hash[kFix4HashSize + hashValue] = p->pos; + + maxLen = 1; + offset = 0; + if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) { + distances[0] = maxLen = 2; + distances[1] = delta2 - 1; + offset = 2; + } + if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) { + maxLen = 3; + distances[offset + 1] = delta3 - 1; + offset += 2; + delta2 = delta3; + } + if (offset != 0) { + for (; maxLen != lenLimit; maxLen++) + if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) + break; + distances[offset - 2] = maxLen; + if (maxLen == lenLimit) { + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS_RET; + } + } + if (maxLen < 3) + maxLen = 3; + offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), + distances + offset, maxLen) - + (distances)); + MOVE_POS_RET +} + +UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { + UInt32 offset; + GET_MATCHES_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), + distances, 2) - + (distances)); + MOVE_POS_RET +} + +static void Bt2_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { + do { + SKIP_HEADER(2) + HASH2_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + SKIP_FOOTER + } while (--num != 0); +} + +void Bt3Zip_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { + do { + SKIP_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + SKIP_FOOTER + } while (--num != 0); +} + +static void Bt3_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { + do { + UInt32 hash2Value; + SKIP_HEADER(3) + HASH3_CALC; + curMatch = p->hash[kFix3HashSize + hashValue]; + p->hash[hash2Value] = + p->hash[kFix3HashSize + hashValue] = p->pos; + SKIP_FOOTER + } while (--num != 0); +} + +static void Bt4_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { + do { + UInt32 hash2Value, hash3Value; + SKIP_HEADER(4) + HASH4_CALC; + curMatch = p->hash[kFix4HashSize + hashValue]; + p->hash[hash2Value] = + p->hash[kFix3HashSize + hash3Value] = p->pos; + p->hash[kFix4HashSize + hashValue] = p->pos; + SKIP_FOOTER + } while (--num != 0); +} + +static void Hc4_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { + do { + UInt32 hash2Value, hash3Value; + SKIP_HEADER(4) + HASH4_CALC; + curMatch = p->hash[kFix4HashSize + hashValue]; + p->hash[hash2Value] = + p->hash[kFix3HashSize + hash3Value] = + p->hash[kFix4HashSize + hashValue] = p->pos; + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS + } while (--num != 0); +} + +void Hc3Zip_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { + do { + SKIP_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS + } while (--num != 0); +} + +void MatchFinder_CreateVTable(CMatchFinder* p, IMatchFinder* vTable) { + vTable->Init = (Mf_Init_Func)MatchFinder_Init; + vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte; + vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; + if (!p->btMode) { + vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; + } else if (p->numHashBytes == 2) { + vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; + } else if (p->numHashBytes == 3) { + vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; + } else { + vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; + } +} +} diff --git a/3rdparty/lzma/Linux/LzFind.h b/3rdparty/lzma/Linux/LzFind.h index 6a71de5..70727b2 100644 --- a/3rdparty/lzma/Linux/LzFind.h +++ b/3rdparty/lzma/Linux/LzFind.h @@ -1,108 +1,108 @@ -/* LzFind.h -- Match finder for LZ algorithms -2008-10-04 : Igor Pavlov : Public domain */ - -#ifndef __LZFIND_H -#define __LZFIND_H - -#include "LzmaTypes.h" - -namespace crnlib { - -typedef UInt32 CLzRef; - -typedef struct _CMatchFinder { - Byte* buffer; - UInt32 pos; - UInt32 posLimit; - UInt32 streamPos; - UInt32 lenLimit; - - UInt32 cyclicBufferPos; - UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ - - UInt32 matchMaxLen; - CLzRef* hash; - CLzRef* son; - UInt32 hashMask; - UInt32 cutValue; - - Byte* bufferBase; - ISeqInStream* stream; - int streamEndWasReached; - - UInt32 blockSize; - UInt32 keepSizeBefore; - UInt32 keepSizeAfter; - - UInt32 numHashBytes; - int directInput; - int btMode; - /* int skipModeBits; */ - int bigHash; - UInt32 historySize; - UInt32 fixedHashSize; - UInt32 hashSizeSum; - UInt32 numSons; - SRes result; - UInt32 crc[256]; -} CMatchFinder; - -#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) -#define Inline_MatchFinder_GetIndexByte(p, index) ((p)->buffer[(Int32)(index)]) - -#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) - -int MatchFinder_NeedMove(CMatchFinder* p); -Byte* MatchFinder_GetPointerToCurrentPos(CMatchFinder* p); -void MatchFinder_MoveBlock(CMatchFinder* p); -void MatchFinder_ReadIfRequired(CMatchFinder* p); - -void MatchFinder_Construct(CMatchFinder* p); - -/* Conditions: - historySize <= 3 GB - keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB -*/ -int MatchFinder_Create(CMatchFinder* p, UInt32 historySize, - UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, - ISzAlloc* alloc); -void MatchFinder_Free(CMatchFinder* p, ISzAlloc* alloc); -void MatchFinder_Normalize3(UInt32 subValue, CLzRef* items, UInt32 numItems); -void MatchFinder_ReduceOffsets(CMatchFinder* p, UInt32 subValue); - -UInt32* GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte* buffer, CLzRef* son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, - UInt32* distances, UInt32 maxLen); - -/* -Conditions: - Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. - Mf_GetPointerToCurrentPos_Func's result must be used only before any other function -*/ - -typedef void (*Mf_Init_Func)(void* object); -typedef Byte (*Mf_GetIndexByte_Func)(void* object, Int32 index); -typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void* object); -typedef const Byte* (*Mf_GetPointerToCurrentPos_Func)(void* object); -typedef UInt32 (*Mf_GetMatches_Func)(void* object, UInt32* distances); -typedef void (*Mf_Skip_Func)(void* object, UInt32); - -typedef struct _IMatchFinder { - Mf_Init_Func Init; - Mf_GetIndexByte_Func GetIndexByte; - Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; - Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; - Mf_GetMatches_Func GetMatches; - Mf_Skip_Func Skip; -} IMatchFinder; - -void MatchFinder_CreateVTable(CMatchFinder* p, IMatchFinder* vTable); - -void MatchFinder_Init(CMatchFinder* p); -UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances); -UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances); -void Bt3Zip_MatchFinder_Skip(CMatchFinder* p, UInt32 num); -void Hc3Zip_MatchFinder_Skip(CMatchFinder* p, UInt32 num); -} - -#endif +/* LzFind.h -- Match finder for LZ algorithms +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __LZFIND_H +#define __LZFIND_H + +#include "LzmaTypes.h" + +namespace crnlib { + +typedef UInt32 CLzRef; + +typedef struct _CMatchFinder { + Byte* buffer; + UInt32 pos; + UInt32 posLimit; + UInt32 streamPos; + UInt32 lenLimit; + + UInt32 cyclicBufferPos; + UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ + + UInt32 matchMaxLen; + CLzRef* hash; + CLzRef* son; + UInt32 hashMask; + UInt32 cutValue; + + Byte* bufferBase; + ISeqInStream* stream; + int streamEndWasReached; + + UInt32 blockSize; + UInt32 keepSizeBefore; + UInt32 keepSizeAfter; + + UInt32 numHashBytes; + int directInput; + int btMode; + /* int skipModeBits; */ + int bigHash; + UInt32 historySize; + UInt32 fixedHashSize; + UInt32 hashSizeSum; + UInt32 numSons; + SRes result; + UInt32 crc[256]; +} CMatchFinder; + +#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) +#define Inline_MatchFinder_GetIndexByte(p, index) ((p)->buffer[(Int32)(index)]) + +#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) + +int MatchFinder_NeedMove(CMatchFinder* p); +Byte* MatchFinder_GetPointerToCurrentPos(CMatchFinder* p); +void MatchFinder_MoveBlock(CMatchFinder* p); +void MatchFinder_ReadIfRequired(CMatchFinder* p); + +void MatchFinder_Construct(CMatchFinder* p); + +/* Conditions: + historySize <= 3 GB + keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB +*/ +int MatchFinder_Create(CMatchFinder* p, UInt32 historySize, + UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, + ISzAlloc* alloc); +void MatchFinder_Free(CMatchFinder* p, ISzAlloc* alloc); +void MatchFinder_Normalize3(UInt32 subValue, CLzRef* items, UInt32 numItems); +void MatchFinder_ReduceOffsets(CMatchFinder* p, UInt32 subValue); + +UInt32* GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte* buffer, CLzRef* son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, + UInt32* distances, UInt32 maxLen); + +/* +Conditions: + Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. + Mf_GetPointerToCurrentPos_Func's result must be used only before any other function +*/ + +typedef void (*Mf_Init_Func)(void* object); +typedef Byte (*Mf_GetIndexByte_Func)(void* object, Int32 index); +typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void* object); +typedef const Byte* (*Mf_GetPointerToCurrentPos_Func)(void* object); +typedef UInt32 (*Mf_GetMatches_Func)(void* object, UInt32* distances); +typedef void (*Mf_Skip_Func)(void* object, UInt32); + +typedef struct _IMatchFinder { + Mf_Init_Func Init; + Mf_GetIndexByte_Func GetIndexByte; + Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; + Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; + Mf_GetMatches_Func GetMatches; + Mf_Skip_Func Skip; +} IMatchFinder; + +void MatchFinder_CreateVTable(CMatchFinder* p, IMatchFinder* vTable); + +void MatchFinder_Init(CMatchFinder* p); +UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances); +UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances); +void Bt3Zip_MatchFinder_Skip(CMatchFinder* p, UInt32 num); +void Hc3Zip_MatchFinder_Skip(CMatchFinder* p, UInt32 num); +} + +#endif diff --git a/3rdparty/lzma/Linux/LzFindMt.cpp b/3rdparty/lzma/Linux/LzFindMt.cpp index cd3ab44..1bf03b1 100644 --- a/3rdparty/lzma/Linux/LzFindMt.cpp +++ b/3rdparty/lzma/Linux/LzFindMt.cpp @@ -1,755 +1,755 @@ -/* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2008-10-04 : Igor Pavlov : Public domain */ - -#include "LzHash.h" -#include "LzFindMt.h" - -namespace crnlib { - -void MtSync_Construct(CMtSync* p) { - p->wasCreated = False; - p->csWasInitialized = False; - p->csWasEntered = False; - Thread_Construct(&p->thread); - Event_Construct(&p->canStart); - Event_Construct(&p->wasStarted); - Event_Construct(&p->wasStopped); - Semaphore_Construct(&p->freeSemaphore); - Semaphore_Construct(&p->filledSemaphore); -} - -void MtSync_GetNextBlock(CMtSync* p) { - if (p->needStart) { - p->numProcessedBlocks = 1; - p->needStart = False; - p->stopWriting = False; - p->exit = False; - Event_Reset(&p->wasStarted); - Event_Reset(&p->wasStopped); - - Event_Set(&p->canStart); - Event_Wait(&p->wasStarted); - } else { - CriticalSection_Leave(&p->cs); - p->csWasEntered = False; - p->numProcessedBlocks++; - Semaphore_Release1(&p->freeSemaphore); - } - Semaphore_Wait(&p->filledSemaphore); - CriticalSection_Enter(&p->cs); - p->csWasEntered = True; -} - -/* MtSync_StopWriting must be called if Writing was started */ - -void MtSync_StopWriting(CMtSync* p) { - UInt32 myNumBlocks = p->numProcessedBlocks; - if (!Thread_WasCreated(&p->thread) || p->needStart) - return; - p->stopWriting = True; - if (p->csWasEntered) { - CriticalSection_Leave(&p->cs); - p->csWasEntered = False; - } - Semaphore_Release1(&p->freeSemaphore); - - Event_Wait(&p->wasStopped); - - while (myNumBlocks++ != p->numProcessedBlocks) { - Semaphore_Wait(&p->filledSemaphore); - Semaphore_Release1(&p->freeSemaphore); - } - p->needStart = True; -} - -void MtSync_Destruct(CMtSync* p) { - if (Thread_WasCreated(&p->thread)) { - MtSync_StopWriting(p); - p->exit = True; - if (p->needStart) - Event_Set(&p->canStart); - Thread_Wait(&p->thread); - Thread_Close(&p->thread); - } - if (p->csWasInitialized) { - CriticalSection_Delete(&p->cs); - p->csWasInitialized = False; - } - - Event_Close(&p->canStart); - Event_Close(&p->wasStarted); - Event_Close(&p->wasStopped); - Semaphore_Close(&p->freeSemaphore); - Semaphore_Close(&p->filledSemaphore); - - p->wasCreated = False; -} - -#define RINOK_THREAD(x) \ - { \ - if ((x) != 0) \ - return SZ_ERROR_THREAD; \ - } - -static SRes MtSync_Create2(CMtSync* p, unsigned(MY_STD_CALL* startAddress)(void*), void* obj, UInt32 numBlocks) { - if (p->wasCreated) - return SZ_OK; - - RINOK_THREAD(CriticalSection_Init(&p->cs)); - p->csWasInitialized = True; - - RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); - RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted)); - RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); - - RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks)); - RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks)); - - p->needStart = True; - - RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj)); - p->wasCreated = True; - return SZ_OK; -} - -static SRes MtSync_Create(CMtSync* p, unsigned(MY_STD_CALL* startAddress)(void*), void* obj, UInt32 numBlocks) { - SRes res = MtSync_Create2(p, startAddress, obj, numBlocks); - if (res != SZ_OK) - MtSync_Destruct(p); - return res; -} - -void MtSync_Init(CMtSync* p) { - p->needStart = True; -} - -#define kMtMaxValForNormalize 0xFFFFFFFF - -#define DEF_GetHeads2(name, v, action) \ - \ -static void GetHeads##name(const Byte* p, UInt32 pos, \ -UInt32* hash, \ - UInt32 hashMask, UInt32* heads, UInt32 numHeads, const UInt32* crc) \ -{ \ - action; \ - for (; numHeads != 0; numHeads--) { \ - \ -const UInt32 value = (v); \ - p++; \ - *heads++ = pos - hash[value]; \ - hash[value] = pos++; \ - } \ - } - -#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;) - -DEF_GetHeads2(2, (p[0] | ((UInt32)p[1] << 8)), hashMask = hashMask; crc = crc;) - DEF_GetHeads(3, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask) - DEF_GetHeads(4, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask) - DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask) - //DEF_GetHeads(5, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) - - void HashThreadFunc(CMatchFinderMt* mt) { - CMtSync* p = &mt->hashSync; - for (;;) { - UInt32 numProcessedBlocks = 0; - Event_Wait(&p->canStart); - Event_Set(&p->wasStarted); - for (;;) { - if (p->exit) - return; - if (p->stopWriting) { - p->numProcessedBlocks = numProcessedBlocks; - Event_Set(&p->wasStopped); - break; - } - - { - CMatchFinder* mf = mt->MatchFinder; - if (MatchFinder_NeedMove(mf)) { - CriticalSection_Enter(&mt->btSync.cs); - CriticalSection_Enter(&mt->hashSync.cs); - { - const Byte* beforePtr = MatchFinder_GetPointerToCurrentPos(mf); - const Byte* afterPtr; - MatchFinder_MoveBlock(mf); - afterPtr = MatchFinder_GetPointerToCurrentPos(mf); - mt->pointerToCurPos -= beforePtr - afterPtr; - mt->buffer -= beforePtr - afterPtr; - } - CriticalSection_Leave(&mt->btSync.cs); - CriticalSection_Leave(&mt->hashSync.cs); - continue; - } - - Semaphore_Wait(&p->freeSemaphore); - - MatchFinder_ReadIfRequired(mf); - if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize)) { - UInt32 subValue = (mf->pos - mf->historySize - 1); - MatchFinder_ReduceOffsets(mf, subValue); - MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, mf->hashMask + 1); - } - { - UInt32* heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize; - UInt32 num = mf->streamPos - mf->pos; - heads[0] = 2; - heads[1] = num; - if (num >= mf->numHashBytes) { - num = num - mf->numHashBytes + 1; - if (num > kMtHashBlockSize - 2) - num = kMtHashBlockSize - 2; - mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); - heads[0] += num; - } - mf->pos += num; - mf->buffer += num; - } - } - - Semaphore_Release1(&p->filledSemaphore); - } - } -} - -void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt* p) { - MtSync_GetNextBlock(&p->hashSync); - p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize; - p->hashBufPosLimit += p->hashBuf[p->hashBufPos++]; - p->hashNumAvail = p->hashBuf[p->hashBufPos++]; -} - -#define kEmptyHashValue 0 - -/* #define MFMT_GM_INLINE */ - -#ifdef MFMT_GM_INLINE - -#define NO_INLINE MY_FAST_CALL - -Int32 NO_INLINE GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte* cur, CLzRef* son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, - UInt32* _distances, UInt32 _maxLen, const UInt32* hash, Int32 limit, UInt32 size, UInt32* posRes) { - do { - UInt32* distances = _distances + 1; - UInt32 curMatch = pos - *hash++; - - CLzRef* ptr0 = son + (_cyclicBufferPos << 1) + 1; - CLzRef* ptr1 = son + (_cyclicBufferPos << 1); - UInt32 len0 = 0, len1 = 0; - UInt32 cutValue = _cutValue; - UInt32 maxLen = _maxLen; - for (;;) { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) { - *ptr0 = *ptr1 = kEmptyHashValue; - break; - } - { - CLzRef* pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); - const Byte* pb = cur - delta; - UInt32 len = (len0 < len1 ? len0 : len1); - if (pb[len] == cur[len]) { - if (++len != lenLimit && pb[len] == cur[len]) - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - if (maxLen < len) { - *distances++ = maxLen = len; - *distances++ = delta - 1; - if (len == lenLimit) { - *ptr1 = pair[0]; - *ptr0 = pair[1]; - break; - } - } - } - if (pb[len] < cur[len]) { - *ptr1 = curMatch; - ptr1 = pair + 1; - curMatch = *ptr1; - len1 = len; - } else { - *ptr0 = curMatch; - ptr0 = pair; - curMatch = *ptr0; - len0 = len; - } - } - } - pos++; - _cyclicBufferPos++; - cur++; - { - UInt32 num = (UInt32)(distances - _distances); - *_distances = num - 1; - _distances += num; - limit -= num; - } - } while (limit > 0 && --size != 0); - *posRes = pos; - return limit; -} - -#endif - -void BtGetMatches(CMatchFinderMt* p, UInt32* distances) { - UInt32 numProcessed = 0; - UInt32 curPos = 2; - UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); - distances[1] = p->hashNumAvail; - while (curPos < limit) { - if (p->hashBufPos == p->hashBufPosLimit) { - MatchFinderMt_GetNextBlock_Hash(p); - distances[1] = numProcessed + p->hashNumAvail; - if (p->hashNumAvail >= p->numHashBytes) - continue; - for (; p->hashNumAvail != 0; p->hashNumAvail--) - distances[curPos++] = 0; - break; - } - { - UInt32 size = p->hashBufPosLimit - p->hashBufPos; - UInt32 lenLimit = p->matchMaxLen; - UInt32 pos = p->pos; - UInt32 cyclicBufferPos = p->cyclicBufferPos; - if (lenLimit >= p->hashNumAvail) - lenLimit = p->hashNumAvail; - { - UInt32 size2 = p->hashNumAvail - lenLimit + 1; - if (size2 < size) - size = size2; - size2 = p->cyclicBufferSize - cyclicBufferPos; - if (size2 < size) - size = size2; - } -#ifndef MFMT_GM_INLINE - while (curPos < limit && size-- != 0) { - UInt32* startDistances = distances + curPos; - UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++], - pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, - startDistances + 1, p->numHashBytes - 1) - - startDistances); - *startDistances = num - 1; - curPos += num; - cyclicBufferPos++; - pos++; - p->buffer++; - } -#else - { - UInt32 posRes; - curPos = limit - GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, - distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos, (Int32)(limit - curPos), size, &posRes); - p->hashBufPos += posRes - pos; - cyclicBufferPos += posRes - pos; - p->buffer += posRes - pos; - pos = posRes; - } -#endif - - numProcessed += pos - p->pos; - p->hashNumAvail -= pos - p->pos; - p->pos = pos; - if (cyclicBufferPos == p->cyclicBufferSize) - cyclicBufferPos = 0; - p->cyclicBufferPos = cyclicBufferPos; - } - } - distances[0] = curPos; -} - -void BtFillBlock(CMatchFinderMt* p, UInt32 globalBlockIndex) { - CMtSync* sync = &p->hashSync; - if (!sync->needStart) { - CriticalSection_Enter(&sync->cs); - sync->csWasEntered = True; - } - - BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize); - - if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize) { - UInt32 subValue = p->pos - p->cyclicBufferSize; - MatchFinder_Normalize3(subValue, p->son, p->cyclicBufferSize * 2); - p->pos -= subValue; - } - - if (!sync->needStart) { - CriticalSection_Leave(&sync->cs); - sync->csWasEntered = False; - } -} - -void BtThreadFunc(CMatchFinderMt* mt) { - CMtSync* p = &mt->btSync; - for (;;) { - UInt32 blockIndex = 0; - Event_Wait(&p->canStart); - Event_Set(&p->wasStarted); - for (;;) { - if (p->exit) - return; - if (p->stopWriting) { - p->numProcessedBlocks = blockIndex; - MtSync_StopWriting(&mt->hashSync); - Event_Set(&p->wasStopped); - break; - } - Semaphore_Wait(&p->freeSemaphore); - BtFillBlock(mt, blockIndex++); - Semaphore_Release1(&p->filledSemaphore); - } - } -} - -void MatchFinderMt_Construct(CMatchFinderMt* p) { - p->hashBuf = 0; - MtSync_Construct(&p->hashSync); - MtSync_Construct(&p->btSync); -} - -void MatchFinderMt_FreeMem(CMatchFinderMt* p, ISzAlloc* alloc) { - alloc->Free(alloc, p->hashBuf); - p->hashBuf = 0; -} - -void MatchFinderMt_Destruct(CMatchFinderMt* p, ISzAlloc* alloc) { - MtSync_Destruct(&p->hashSync); - MtSync_Destruct(&p->btSync); - MatchFinderMt_FreeMem(p, alloc); -} - -#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks) -#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks) - -static unsigned MY_STD_CALL HashThreadFunc2(void* p) { - HashThreadFunc((CMatchFinderMt*)p); - return 0; -} -static unsigned MY_STD_CALL BtThreadFunc2(void* p) { - Byte allocaDummy[0x180]; - (void)allocaDummy; - int i = 0; - for (i = 0; i < 16; i++) - allocaDummy[i] = (Byte)i; - BtThreadFunc((CMatchFinderMt*)p); - return 0; -} - -SRes MatchFinderMt_Create(CMatchFinderMt* p, UInt32 historySize, UInt32 keepAddBufferBefore, - UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc* alloc) { - CMatchFinder* mf = p->MatchFinder; - p->historySize = historySize; - if (kMtBtBlockSize <= matchMaxLen * 4) - return SZ_ERROR_PARAM; - if (p->hashBuf == 0) { - p->hashBuf = (UInt32*)alloc->Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32)); - if (p->hashBuf == 0) - return SZ_ERROR_MEM; - p->btBuf = p->hashBuf + kHashBufferSize; - } - keepAddBufferBefore += (kHashBufferSize + kBtBufferSize); - keepAddBufferAfter += kMtHashBlockSize; - if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) - return SZ_ERROR_MEM; - - RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks)); - RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks)); - return SZ_OK; -} - -/* Call it after ReleaseStream / SetStream */ -void MatchFinderMt_Init(CMatchFinderMt* p) { - CMatchFinder* mf = p->MatchFinder; - p->btBufPos = p->btBufPosLimit = 0; - p->hashBufPos = p->hashBufPosLimit = 0; - MatchFinder_Init(mf); - p->pointerToCurPos = MatchFinder_GetPointerToCurrentPos(mf); - p->btNumAvailBytes = 0; - p->lzPos = p->historySize + 1; - - p->hash = mf->hash; - p->fixedHashSize = mf->fixedHashSize; - p->crc = mf->crc; - - p->son = mf->son; - p->matchMaxLen = mf->matchMaxLen; - p->numHashBytes = mf->numHashBytes; - p->pos = mf->pos; - p->buffer = mf->buffer; - p->cyclicBufferPos = mf->cyclicBufferPos; - p->cyclicBufferSize = mf->cyclicBufferSize; - p->cutValue = mf->cutValue; -} - -/* ReleaseStream is required to finish multithreading */ -void MatchFinderMt_ReleaseStream(CMatchFinderMt* p) { - MtSync_StopWriting(&p->btSync); - /* p->MatchFinder->ReleaseStream(); */ -} - -void MatchFinderMt_Normalize(CMatchFinderMt* p) { - MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize); - p->lzPos = p->historySize + 1; -} - -void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt* p) { - UInt32 blockIndex; - MtSync_GetNextBlock(&p->btSync); - blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask); - p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize; - p->btBufPosLimit += p->btBuf[p->btBufPos++]; - p->btNumAvailBytes = p->btBuf[p->btBufPos++]; - if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize) - MatchFinderMt_Normalize(p); -} - -const Byte* MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt* p) { - return p->pointerToCurPos; -} - -#define GET_NEXT_BLOCK_IF_REQUIRED \ - if (p->btBufPos == p->btBufPosLimit) \ - MatchFinderMt_GetNextBlock_Bt(p); - -UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt* p) { - GET_NEXT_BLOCK_IF_REQUIRED; - return p->btNumAvailBytes; -} - -Byte MatchFinderMt_GetIndexByte(CMatchFinderMt* p, Int32 index) { - return p->pointerToCurPos[index]; -} - -UInt32* MixMatches2(CMatchFinderMt* p, UInt32 matchMinPos, UInt32* distances) { - UInt32 hash2Value, curMatch2; - UInt32* hash = p->hash; - const Byte* cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; - MT_HASH2_CALC - - curMatch2 = hash[hash2Value]; - hash[hash2Value] = lzPos; - - if (curMatch2 >= matchMinPos) - if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) { - *distances++ = 2; - *distances++ = lzPos - curMatch2 - 1; - } - return distances; -} - -UInt32* MixMatches3(CMatchFinderMt* p, UInt32 matchMinPos, UInt32* distances) { - UInt32 hash2Value, hash3Value, curMatch2, curMatch3; - UInt32* hash = p->hash; - const Byte* cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; - MT_HASH3_CALC - - curMatch2 = hash[hash2Value]; - curMatch3 = hash[kFix3HashSize + hash3Value]; - - hash[hash2Value] = - hash[kFix3HashSize + hash3Value] = - lzPos; - - if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) { - distances[1] = lzPos - curMatch2 - 1; - if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) { - distances[0] = 3; - return distances + 2; - } - distances[0] = 2; - distances += 2; - } - if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) { - *distances++ = 3; - *distances++ = lzPos - curMatch3 - 1; - } - return distances; -} - -/* -UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) -{ - UInt32 hash2Value, hash3Value, hash4Value, curMatch2, curMatch3, curMatch4; - UInt32 *hash = p->hash; - const Byte *cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; - MT_HASH4_CALC - - curMatch2 = hash[ hash2Value]; - curMatch3 = hash[kFix3HashSize + hash3Value]; - curMatch4 = hash[kFix4HashSize + hash4Value]; - - hash[ hash2Value] = - hash[kFix3HashSize + hash3Value] = - hash[kFix4HashSize + hash4Value] = - lzPos; - - if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) - { - distances[1] = lzPos - curMatch2 - 1; - if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) - { - distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3; - return distances + 2; - } - distances[0] = 2; - distances += 2; - } - if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) - { - distances[1] = lzPos - curMatch3 - 1; - if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3]) - { - distances[0] = 4; - return distances + 2; - } - distances[0] = 3; - distances += 2; - } - - if (curMatch4 >= matchMinPos) - if ( - cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] && - cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3] - ) - { - *distances++ = 4; - *distances++ = lzPos - curMatch4 - 1; - } - return distances; -} -*/ - -#define INCREASE_LZ_POS \ - p->lzPos++; \ - p->pointerToCurPos++; - -UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt* p, UInt32* distances) { - const UInt32* btBuf = p->btBuf + p->btBufPos; - UInt32 len = *btBuf++; - p->btBufPos += 1 + len; - p->btNumAvailBytes--; - { - UInt32 i; - for (i = 0; i < len; i += 2) { - *distances++ = *btBuf++; - *distances++ = *btBuf++; - } - } - INCREASE_LZ_POS - return len; -} - -UInt32 MatchFinderMt_GetMatches(CMatchFinderMt* p, UInt32* distances) { - const UInt32* btBuf = p->btBuf + p->btBufPos; - UInt32 len = *btBuf++; - p->btBufPos += 1 + len; - - if (len == 0) { - if (p->btNumAvailBytes-- >= 4) - len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances)); - } else { - /* Condition: there are matches in btBuf with length < p->numHashBytes */ - UInt32* distances2; - p->btNumAvailBytes--; - distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances); - do { - *distances2++ = *btBuf++; - *distances2++ = *btBuf++; - } while ((len -= 2) != 0); - len = (UInt32)(distances2 - (distances)); - } - INCREASE_LZ_POS - return len; -} - -#define SKIP_HEADER2 \ - do { \ - GET_NEXT_BLOCK_IF_REQUIRED -#define SKIP_HEADER(n) \ - SKIP_HEADER2 if (p->btNumAvailBytes-- >= (n)) { \ - const Byte* cur = p->pointerToCurPos; \ - UInt32* hash = p->hash; -#define SKIP_FOOTER \ - } \ - INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; \ - } \ - while (--num != 0) \ - ; - -void MatchFinderMt0_Skip(CMatchFinderMt* p, UInt32 num) { - SKIP_HEADER2 { - p->btNumAvailBytes--; - SKIP_FOOTER - } - - void MatchFinderMt2_Skip(CMatchFinderMt * p, UInt32 num) { - SKIP_HEADER(2) - UInt32 hash2Value; - MT_HASH2_CALC - hash[hash2Value] = p->lzPos; - SKIP_FOOTER - } - - void MatchFinderMt3_Skip(CMatchFinderMt * p, UInt32 num) { - SKIP_HEADER(3) - UInt32 hash2Value, hash3Value; - MT_HASH3_CALC - hash[kFix3HashSize + hash3Value] = - hash[hash2Value] = - p->lzPos; - SKIP_FOOTER - } - - /* -void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) -{ - SKIP_HEADER(4) - UInt32 hash2Value, hash3Value, hash4Value; - MT_HASH4_CALC - hash[kFix4HashSize + hash4Value] = - hash[kFix3HashSize + hash3Value] = - hash[ hash2Value] = - p->lzPos; - SKIP_FOOTER -} -*/ - - void MatchFinderMt_CreateVTable(CMatchFinderMt * p, IMatchFinder * vTable) { - vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; - vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinderMt_GetIndexByte; - vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; - vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; - vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; - switch (p->MatchFinder->numHashBytes) { - case 2: - p->GetHeadsFunc = GetHeads2; - p->MixMatchesFunc = (Mf_Mix_Matches)0; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip; - vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; - break; - case 3: - p->GetHeadsFunc = GetHeads3; - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; - break; - default: - /* case 4: */ - p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4; - /* p->GetHeadsFunc = GetHeads4; */ - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; - break; - /* - default: - p->GetHeadsFunc = GetHeads5; - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip; - break; - */ - } - } -} +/* LzFindMt.c -- multithreaded Match finder for LZ algorithms +2008-10-04 : Igor Pavlov : Public domain */ + +#include "LzHash.h" +#include "LzFindMt.h" + +namespace crnlib { + +void MtSync_Construct(CMtSync* p) { + p->wasCreated = False; + p->csWasInitialized = False; + p->csWasEntered = False; + Thread_Construct(&p->thread); + Event_Construct(&p->canStart); + Event_Construct(&p->wasStarted); + Event_Construct(&p->wasStopped); + Semaphore_Construct(&p->freeSemaphore); + Semaphore_Construct(&p->filledSemaphore); +} + +void MtSync_GetNextBlock(CMtSync* p) { + if (p->needStart) { + p->numProcessedBlocks = 1; + p->needStart = False; + p->stopWriting = False; + p->exit = False; + Event_Reset(&p->wasStarted); + Event_Reset(&p->wasStopped); + + Event_Set(&p->canStart); + Event_Wait(&p->wasStarted); + } else { + CriticalSection_Leave(&p->cs); + p->csWasEntered = False; + p->numProcessedBlocks++; + Semaphore_Release1(&p->freeSemaphore); + } + Semaphore_Wait(&p->filledSemaphore); + CriticalSection_Enter(&p->cs); + p->csWasEntered = True; +} + +/* MtSync_StopWriting must be called if Writing was started */ + +void MtSync_StopWriting(CMtSync* p) { + UInt32 myNumBlocks = p->numProcessedBlocks; + if (!Thread_WasCreated(&p->thread) || p->needStart) + return; + p->stopWriting = True; + if (p->csWasEntered) { + CriticalSection_Leave(&p->cs); + p->csWasEntered = False; + } + Semaphore_Release1(&p->freeSemaphore); + + Event_Wait(&p->wasStopped); + + while (myNumBlocks++ != p->numProcessedBlocks) { + Semaphore_Wait(&p->filledSemaphore); + Semaphore_Release1(&p->freeSemaphore); + } + p->needStart = True; +} + +void MtSync_Destruct(CMtSync* p) { + if (Thread_WasCreated(&p->thread)) { + MtSync_StopWriting(p); + p->exit = True; + if (p->needStart) + Event_Set(&p->canStart); + Thread_Wait(&p->thread); + Thread_Close(&p->thread); + } + if (p->csWasInitialized) { + CriticalSection_Delete(&p->cs); + p->csWasInitialized = False; + } + + Event_Close(&p->canStart); + Event_Close(&p->wasStarted); + Event_Close(&p->wasStopped); + Semaphore_Close(&p->freeSemaphore); + Semaphore_Close(&p->filledSemaphore); + + p->wasCreated = False; +} + +#define RINOK_THREAD(x) \ + { \ + if ((x) != 0) \ + return SZ_ERROR_THREAD; \ + } + +static SRes MtSync_Create2(CMtSync* p, unsigned(MY_STD_CALL* startAddress)(void*), void* obj, UInt32 numBlocks) { + if (p->wasCreated) + return SZ_OK; + + RINOK_THREAD(CriticalSection_Init(&p->cs)); + p->csWasInitialized = True; + + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted)); + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); + + RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks)); + RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks)); + + p->needStart = True; + + RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj)); + p->wasCreated = True; + return SZ_OK; +} + +static SRes MtSync_Create(CMtSync* p, unsigned(MY_STD_CALL* startAddress)(void*), void* obj, UInt32 numBlocks) { + SRes res = MtSync_Create2(p, startAddress, obj, numBlocks); + if (res != SZ_OK) + MtSync_Destruct(p); + return res; +} + +void MtSync_Init(CMtSync* p) { + p->needStart = True; +} + +#define kMtMaxValForNormalize 0xFFFFFFFF + +#define DEF_GetHeads2(name, v, action) \ + \ +static void GetHeads##name(const Byte* p, UInt32 pos, \ +UInt32* hash, \ + UInt32 hashMask, UInt32* heads, UInt32 numHeads, const UInt32* crc) \ +{ \ + action; \ + for (; numHeads != 0; numHeads--) { \ + \ +const UInt32 value = (v); \ + p++; \ + *heads++ = pos - hash[value]; \ + hash[value] = pos++; \ + } \ + } + +#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;) + +DEF_GetHeads2(2, (p[0] | ((UInt32)p[1] << 8)), hashMask = hashMask; crc = crc;) + DEF_GetHeads(3, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask) + DEF_GetHeads(4, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask) + DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask) + //DEF_GetHeads(5, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) + + void HashThreadFunc(CMatchFinderMt* mt) { + CMtSync* p = &mt->hashSync; + for (;;) { + UInt32 numProcessedBlocks = 0; + Event_Wait(&p->canStart); + Event_Set(&p->wasStarted); + for (;;) { + if (p->exit) + return; + if (p->stopWriting) { + p->numProcessedBlocks = numProcessedBlocks; + Event_Set(&p->wasStopped); + break; + } + + { + CMatchFinder* mf = mt->MatchFinder; + if (MatchFinder_NeedMove(mf)) { + CriticalSection_Enter(&mt->btSync.cs); + CriticalSection_Enter(&mt->hashSync.cs); + { + const Byte* beforePtr = MatchFinder_GetPointerToCurrentPos(mf); + const Byte* afterPtr; + MatchFinder_MoveBlock(mf); + afterPtr = MatchFinder_GetPointerToCurrentPos(mf); + mt->pointerToCurPos -= beforePtr - afterPtr; + mt->buffer -= beforePtr - afterPtr; + } + CriticalSection_Leave(&mt->btSync.cs); + CriticalSection_Leave(&mt->hashSync.cs); + continue; + } + + Semaphore_Wait(&p->freeSemaphore); + + MatchFinder_ReadIfRequired(mf); + if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize)) { + UInt32 subValue = (mf->pos - mf->historySize - 1); + MatchFinder_ReduceOffsets(mf, subValue); + MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, mf->hashMask + 1); + } + { + UInt32* heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize; + UInt32 num = mf->streamPos - mf->pos; + heads[0] = 2; + heads[1] = num; + if (num >= mf->numHashBytes) { + num = num - mf->numHashBytes + 1; + if (num > kMtHashBlockSize - 2) + num = kMtHashBlockSize - 2; + mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); + heads[0] += num; + } + mf->pos += num; + mf->buffer += num; + } + } + + Semaphore_Release1(&p->filledSemaphore); + } + } +} + +void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt* p) { + MtSync_GetNextBlock(&p->hashSync); + p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize; + p->hashBufPosLimit += p->hashBuf[p->hashBufPos++]; + p->hashNumAvail = p->hashBuf[p->hashBufPos++]; +} + +#define kEmptyHashValue 0 + +/* #define MFMT_GM_INLINE */ + +#ifdef MFMT_GM_INLINE + +#define NO_INLINE MY_FAST_CALL + +Int32 NO_INLINE GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte* cur, CLzRef* son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, + UInt32* _distances, UInt32 _maxLen, const UInt32* hash, Int32 limit, UInt32 size, UInt32* posRes) { + do { + UInt32* distances = _distances + 1; + UInt32 curMatch = pos - *hash++; + + CLzRef* ptr0 = son + (_cyclicBufferPos << 1) + 1; + CLzRef* ptr1 = son + (_cyclicBufferPos << 1); + UInt32 len0 = 0, len1 = 0; + UInt32 cutValue = _cutValue; + UInt32 maxLen = _maxLen; + for (;;) { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) { + *ptr0 = *ptr1 = kEmptyHashValue; + break; + } + { + CLzRef* pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + const Byte* pb = cur - delta; + UInt32 len = (len0 < len1 ? len0 : len1); + if (pb[len] == cur[len]) { + if (++len != lenLimit && pb[len] == cur[len]) + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) { + *distances++ = maxLen = len; + *distances++ = delta - 1; + if (len == lenLimit) { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + break; + } + } + } + if (pb[len] < cur[len]) { + *ptr1 = curMatch; + ptr1 = pair + 1; + curMatch = *ptr1; + len1 = len; + } else { + *ptr0 = curMatch; + ptr0 = pair; + curMatch = *ptr0; + len0 = len; + } + } + } + pos++; + _cyclicBufferPos++; + cur++; + { + UInt32 num = (UInt32)(distances - _distances); + *_distances = num - 1; + _distances += num; + limit -= num; + } + } while (limit > 0 && --size != 0); + *posRes = pos; + return limit; +} + +#endif + +void BtGetMatches(CMatchFinderMt* p, UInt32* distances) { + UInt32 numProcessed = 0; + UInt32 curPos = 2; + UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); + distances[1] = p->hashNumAvail; + while (curPos < limit) { + if (p->hashBufPos == p->hashBufPosLimit) { + MatchFinderMt_GetNextBlock_Hash(p); + distances[1] = numProcessed + p->hashNumAvail; + if (p->hashNumAvail >= p->numHashBytes) + continue; + for (; p->hashNumAvail != 0; p->hashNumAvail--) + distances[curPos++] = 0; + break; + } + { + UInt32 size = p->hashBufPosLimit - p->hashBufPos; + UInt32 lenLimit = p->matchMaxLen; + UInt32 pos = p->pos; + UInt32 cyclicBufferPos = p->cyclicBufferPos; + if (lenLimit >= p->hashNumAvail) + lenLimit = p->hashNumAvail; + { + UInt32 size2 = p->hashNumAvail - lenLimit + 1; + if (size2 < size) + size = size2; + size2 = p->cyclicBufferSize - cyclicBufferPos; + if (size2 < size) + size = size2; + } +#ifndef MFMT_GM_INLINE + while (curPos < limit && size-- != 0) { + UInt32* startDistances = distances + curPos; + UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++], + pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, + startDistances + 1, p->numHashBytes - 1) - + startDistances); + *startDistances = num - 1; + curPos += num; + cyclicBufferPos++; + pos++; + p->buffer++; + } +#else + { + UInt32 posRes; + curPos = limit - GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, + distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos, (Int32)(limit - curPos), size, &posRes); + p->hashBufPos += posRes - pos; + cyclicBufferPos += posRes - pos; + p->buffer += posRes - pos; + pos = posRes; + } +#endif + + numProcessed += pos - p->pos; + p->hashNumAvail -= pos - p->pos; + p->pos = pos; + if (cyclicBufferPos == p->cyclicBufferSize) + cyclicBufferPos = 0; + p->cyclicBufferPos = cyclicBufferPos; + } + } + distances[0] = curPos; +} + +void BtFillBlock(CMatchFinderMt* p, UInt32 globalBlockIndex) { + CMtSync* sync = &p->hashSync; + if (!sync->needStart) { + CriticalSection_Enter(&sync->cs); + sync->csWasEntered = True; + } + + BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize); + + if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize) { + UInt32 subValue = p->pos - p->cyclicBufferSize; + MatchFinder_Normalize3(subValue, p->son, p->cyclicBufferSize * 2); + p->pos -= subValue; + } + + if (!sync->needStart) { + CriticalSection_Leave(&sync->cs); + sync->csWasEntered = False; + } +} + +void BtThreadFunc(CMatchFinderMt* mt) { + CMtSync* p = &mt->btSync; + for (;;) { + UInt32 blockIndex = 0; + Event_Wait(&p->canStart); + Event_Set(&p->wasStarted); + for (;;) { + if (p->exit) + return; + if (p->stopWriting) { + p->numProcessedBlocks = blockIndex; + MtSync_StopWriting(&mt->hashSync); + Event_Set(&p->wasStopped); + break; + } + Semaphore_Wait(&p->freeSemaphore); + BtFillBlock(mt, blockIndex++); + Semaphore_Release1(&p->filledSemaphore); + } + } +} + +void MatchFinderMt_Construct(CMatchFinderMt* p) { + p->hashBuf = 0; + MtSync_Construct(&p->hashSync); + MtSync_Construct(&p->btSync); +} + +void MatchFinderMt_FreeMem(CMatchFinderMt* p, ISzAlloc* alloc) { + alloc->Free(alloc, p->hashBuf); + p->hashBuf = 0; +} + +void MatchFinderMt_Destruct(CMatchFinderMt* p, ISzAlloc* alloc) { + MtSync_Destruct(&p->hashSync); + MtSync_Destruct(&p->btSync); + MatchFinderMt_FreeMem(p, alloc); +} + +#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks) +#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks) + +static unsigned MY_STD_CALL HashThreadFunc2(void* p) { + HashThreadFunc((CMatchFinderMt*)p); + return 0; +} +static unsigned MY_STD_CALL BtThreadFunc2(void* p) { + Byte allocaDummy[0x180]; + (void)allocaDummy; + int i = 0; + for (i = 0; i < 16; i++) + allocaDummy[i] = (Byte)i; + BtThreadFunc((CMatchFinderMt*)p); + return 0; +} + +SRes MatchFinderMt_Create(CMatchFinderMt* p, UInt32 historySize, UInt32 keepAddBufferBefore, + UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc* alloc) { + CMatchFinder* mf = p->MatchFinder; + p->historySize = historySize; + if (kMtBtBlockSize <= matchMaxLen * 4) + return SZ_ERROR_PARAM; + if (p->hashBuf == 0) { + p->hashBuf = (UInt32*)alloc->Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32)); + if (p->hashBuf == 0) + return SZ_ERROR_MEM; + p->btBuf = p->hashBuf + kHashBufferSize; + } + keepAddBufferBefore += (kHashBufferSize + kBtBufferSize); + keepAddBufferAfter += kMtHashBlockSize; + if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) + return SZ_ERROR_MEM; + + RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks)); + RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks)); + return SZ_OK; +} + +/* Call it after ReleaseStream / SetStream */ +void MatchFinderMt_Init(CMatchFinderMt* p) { + CMatchFinder* mf = p->MatchFinder; + p->btBufPos = p->btBufPosLimit = 0; + p->hashBufPos = p->hashBufPosLimit = 0; + MatchFinder_Init(mf); + p->pointerToCurPos = MatchFinder_GetPointerToCurrentPos(mf); + p->btNumAvailBytes = 0; + p->lzPos = p->historySize + 1; + + p->hash = mf->hash; + p->fixedHashSize = mf->fixedHashSize; + p->crc = mf->crc; + + p->son = mf->son; + p->matchMaxLen = mf->matchMaxLen; + p->numHashBytes = mf->numHashBytes; + p->pos = mf->pos; + p->buffer = mf->buffer; + p->cyclicBufferPos = mf->cyclicBufferPos; + p->cyclicBufferSize = mf->cyclicBufferSize; + p->cutValue = mf->cutValue; +} + +/* ReleaseStream is required to finish multithreading */ +void MatchFinderMt_ReleaseStream(CMatchFinderMt* p) { + MtSync_StopWriting(&p->btSync); + /* p->MatchFinder->ReleaseStream(); */ +} + +void MatchFinderMt_Normalize(CMatchFinderMt* p) { + MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize); + p->lzPos = p->historySize + 1; +} + +void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt* p) { + UInt32 blockIndex; + MtSync_GetNextBlock(&p->btSync); + blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask); + p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize; + p->btBufPosLimit += p->btBuf[p->btBufPos++]; + p->btNumAvailBytes = p->btBuf[p->btBufPos++]; + if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize) + MatchFinderMt_Normalize(p); +} + +const Byte* MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt* p) { + return p->pointerToCurPos; +} + +#define GET_NEXT_BLOCK_IF_REQUIRED \ + if (p->btBufPos == p->btBufPosLimit) \ + MatchFinderMt_GetNextBlock_Bt(p); + +UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt* p) { + GET_NEXT_BLOCK_IF_REQUIRED; + return p->btNumAvailBytes; +} + +Byte MatchFinderMt_GetIndexByte(CMatchFinderMt* p, Int32 index) { + return p->pointerToCurPos[index]; +} + +UInt32* MixMatches2(CMatchFinderMt* p, UInt32 matchMinPos, UInt32* distances) { + UInt32 hash2Value, curMatch2; + UInt32* hash = p->hash; + const Byte* cur = p->pointerToCurPos; + UInt32 lzPos = p->lzPos; + MT_HASH2_CALC + + curMatch2 = hash[hash2Value]; + hash[hash2Value] = lzPos; + + if (curMatch2 >= matchMinPos) + if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) { + *distances++ = 2; + *distances++ = lzPos - curMatch2 - 1; + } + return distances; +} + +UInt32* MixMatches3(CMatchFinderMt* p, UInt32 matchMinPos, UInt32* distances) { + UInt32 hash2Value, hash3Value, curMatch2, curMatch3; + UInt32* hash = p->hash; + const Byte* cur = p->pointerToCurPos; + UInt32 lzPos = p->lzPos; + MT_HASH3_CALC + + curMatch2 = hash[hash2Value]; + curMatch3 = hash[kFix3HashSize + hash3Value]; + + hash[hash2Value] = + hash[kFix3HashSize + hash3Value] = + lzPos; + + if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) { + distances[1] = lzPos - curMatch2 - 1; + if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) { + distances[0] = 3; + return distances + 2; + } + distances[0] = 2; + distances += 2; + } + if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) { + *distances++ = 3; + *distances++ = lzPos - curMatch3 - 1; + } + return distances; +} + +/* +UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) +{ + UInt32 hash2Value, hash3Value, hash4Value, curMatch2, curMatch3, curMatch4; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + UInt32 lzPos = p->lzPos; + MT_HASH4_CALC + + curMatch2 = hash[ hash2Value]; + curMatch3 = hash[kFix3HashSize + hash3Value]; + curMatch4 = hash[kFix4HashSize + hash4Value]; + + hash[ hash2Value] = + hash[kFix3HashSize + hash3Value] = + hash[kFix4HashSize + hash4Value] = + lzPos; + + if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) + { + distances[1] = lzPos - curMatch2 - 1; + if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) + { + distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3; + return distances + 2; + } + distances[0] = 2; + distances += 2; + } + if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) + { + distances[1] = lzPos - curMatch3 - 1; + if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3]) + { + distances[0] = 4; + return distances + 2; + } + distances[0] = 3; + distances += 2; + } + + if (curMatch4 >= matchMinPos) + if ( + cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] && + cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3] + ) + { + *distances++ = 4; + *distances++ = lzPos - curMatch4 - 1; + } + return distances; +} +*/ + +#define INCREASE_LZ_POS \ + p->lzPos++; \ + p->pointerToCurPos++; + +UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt* p, UInt32* distances) { + const UInt32* btBuf = p->btBuf + p->btBufPos; + UInt32 len = *btBuf++; + p->btBufPos += 1 + len; + p->btNumAvailBytes--; + { + UInt32 i; + for (i = 0; i < len; i += 2) { + *distances++ = *btBuf++; + *distances++ = *btBuf++; + } + } + INCREASE_LZ_POS + return len; +} + +UInt32 MatchFinderMt_GetMatches(CMatchFinderMt* p, UInt32* distances) { + const UInt32* btBuf = p->btBuf + p->btBufPos; + UInt32 len = *btBuf++; + p->btBufPos += 1 + len; + + if (len == 0) { + if (p->btNumAvailBytes-- >= 4) + len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances)); + } else { + /* Condition: there are matches in btBuf with length < p->numHashBytes */ + UInt32* distances2; + p->btNumAvailBytes--; + distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances); + do { + *distances2++ = *btBuf++; + *distances2++ = *btBuf++; + } while ((len -= 2) != 0); + len = (UInt32)(distances2 - (distances)); + } + INCREASE_LZ_POS + return len; +} + +#define SKIP_HEADER2 \ + do { \ + GET_NEXT_BLOCK_IF_REQUIRED +#define SKIP_HEADER(n) \ + SKIP_HEADER2 if (p->btNumAvailBytes-- >= (n)) { \ + const Byte* cur = p->pointerToCurPos; \ + UInt32* hash = p->hash; +#define SKIP_FOOTER \ + } \ + INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; \ + } \ + while (--num != 0) \ + ; + +void MatchFinderMt0_Skip(CMatchFinderMt* p, UInt32 num) { + SKIP_HEADER2 { + p->btNumAvailBytes--; + SKIP_FOOTER + } + + void MatchFinderMt2_Skip(CMatchFinderMt * p, UInt32 num) { + SKIP_HEADER(2) + UInt32 hash2Value; + MT_HASH2_CALC + hash[hash2Value] = p->lzPos; + SKIP_FOOTER + } + + void MatchFinderMt3_Skip(CMatchFinderMt * p, UInt32 num) { + SKIP_HEADER(3) + UInt32 hash2Value, hash3Value; + MT_HASH3_CALC + hash[kFix3HashSize + hash3Value] = + hash[hash2Value] = + p->lzPos; + SKIP_FOOTER + } + + /* +void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) +{ + SKIP_HEADER(4) + UInt32 hash2Value, hash3Value, hash4Value; + MT_HASH4_CALC + hash[kFix4HashSize + hash4Value] = + hash[kFix3HashSize + hash3Value] = + hash[ hash2Value] = + p->lzPos; + SKIP_FOOTER +} +*/ + + void MatchFinderMt_CreateVTable(CMatchFinderMt * p, IMatchFinder * vTable) { + vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; + vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinderMt_GetIndexByte; + vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; + vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; + switch (p->MatchFinder->numHashBytes) { + case 2: + p->GetHeadsFunc = GetHeads2; + p->MixMatchesFunc = (Mf_Mix_Matches)0; + vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip; + vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; + break; + case 3: + p->GetHeadsFunc = GetHeads3; + p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; + vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; + break; + default: + /* case 4: */ + p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4; + /* p->GetHeadsFunc = GetHeads4; */ + p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; + vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; + break; + /* + default: + p->GetHeadsFunc = GetHeads5; + p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; + vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip; + break; + */ + } + } +} diff --git a/3rdparty/lzma/Linux/LzFindMt.h b/3rdparty/lzma/Linux/LzFindMt.h index 7451707..bc76421 100644 --- a/3rdparty/lzma/Linux/LzFindMt.h +++ b/3rdparty/lzma/Linux/LzFindMt.h @@ -1,98 +1,98 @@ -/* LzFindMt.h -- multithreaded Match finder for LZ algorithms -2008-10-04 : Igor Pavlov : Public domain */ - -#ifndef __LZFINDMT_H -#define __LZFINDMT_H - -#include "Threads.h" -#include "LzFind.h" - -namespace crnlib { - -#define kMtHashBlockSize (1 << 13) -#define kMtHashNumBlocks (1 << 3) -#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1) - -#define kMtBtBlockSize (1 << 14) -#define kMtBtNumBlocks (1 << 6) -#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1) - -typedef struct _CMtSync { - Bool wasCreated; - Bool needStart; - Bool exit; - Bool stopWriting; - - CThread thread; - CAutoResetEvent canStart; - CAutoResetEvent wasStarted; - CAutoResetEvent wasStopped; - CSemaphore freeSemaphore; - CSemaphore filledSemaphore; - Bool csWasInitialized; - Bool csWasEntered; - CCriticalSection cs; - UInt32 numProcessedBlocks; -} CMtSync; - -typedef UInt32* (*Mf_Mix_Matches)(void* p, UInt32 matchMinPos, UInt32* distances); - -/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */ -#define kMtCacheLineDummy 128 - -typedef void (*Mf_GetHeads)(const Byte* buffer, UInt32 pos, - UInt32* hash, UInt32 hashMask, UInt32* heads, UInt32 numHeads, const UInt32* crc); - -typedef struct _CMatchFinderMt { - /* LZ */ - const Byte* pointerToCurPos; - UInt32* btBuf; - UInt32 btBufPos; - UInt32 btBufPosLimit; - UInt32 lzPos; - UInt32 btNumAvailBytes; - - UInt32* hash; - UInt32 fixedHashSize; - UInt32 historySize; - const UInt32* crc; - - Mf_Mix_Matches MixMatchesFunc; - - /* LZ + BT */ - CMtSync btSync; - Byte btDummy[kMtCacheLineDummy]; - - /* BT */ - UInt32* hashBuf; - UInt32 hashBufPos; - UInt32 hashBufPosLimit; - UInt32 hashNumAvail; - - CLzRef* son; - UInt32 matchMaxLen; - UInt32 numHashBytes; - UInt32 pos; - Byte* buffer; - UInt32 cyclicBufferPos; - UInt32 cyclicBufferSize; /* it must be historySize + 1 */ - UInt32 cutValue; - - /* BT + Hash */ - CMtSync hashSync; - /* Byte hashDummy[kMtCacheLineDummy]; */ - - /* Hash */ - Mf_GetHeads GetHeadsFunc; - CMatchFinder* MatchFinder; -} CMatchFinderMt; - -void MatchFinderMt_Construct(CMatchFinderMt* p); -void MatchFinderMt_Destruct(CMatchFinderMt* p, ISzAlloc* alloc); -SRes MatchFinderMt_Create(CMatchFinderMt* p, UInt32 historySize, UInt32 keepAddBufferBefore, - UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc* alloc); -void MatchFinderMt_CreateVTable(CMatchFinderMt* p, IMatchFinder* vTable); -void MatchFinderMt_ReleaseStream(CMatchFinderMt* p); -} - -#endif +/* LzFindMt.h -- multithreaded Match finder for LZ algorithms +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __LZFINDMT_H +#define __LZFINDMT_H + +#include "Threads.h" +#include "LzFind.h" + +namespace crnlib { + +#define kMtHashBlockSize (1 << 13) +#define kMtHashNumBlocks (1 << 3) +#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1) + +#define kMtBtBlockSize (1 << 14) +#define kMtBtNumBlocks (1 << 6) +#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1) + +typedef struct _CMtSync { + Bool wasCreated; + Bool needStart; + Bool exit; + Bool stopWriting; + + CThread thread; + CAutoResetEvent canStart; + CAutoResetEvent wasStarted; + CAutoResetEvent wasStopped; + CSemaphore freeSemaphore; + CSemaphore filledSemaphore; + Bool csWasInitialized; + Bool csWasEntered; + CCriticalSection cs; + UInt32 numProcessedBlocks; +} CMtSync; + +typedef UInt32* (*Mf_Mix_Matches)(void* p, UInt32 matchMinPos, UInt32* distances); + +/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */ +#define kMtCacheLineDummy 128 + +typedef void (*Mf_GetHeads)(const Byte* buffer, UInt32 pos, + UInt32* hash, UInt32 hashMask, UInt32* heads, UInt32 numHeads, const UInt32* crc); + +typedef struct _CMatchFinderMt { + /* LZ */ + const Byte* pointerToCurPos; + UInt32* btBuf; + UInt32 btBufPos; + UInt32 btBufPosLimit; + UInt32 lzPos; + UInt32 btNumAvailBytes; + + UInt32* hash; + UInt32 fixedHashSize; + UInt32 historySize; + const UInt32* crc; + + Mf_Mix_Matches MixMatchesFunc; + + /* LZ + BT */ + CMtSync btSync; + Byte btDummy[kMtCacheLineDummy]; + + /* BT */ + UInt32* hashBuf; + UInt32 hashBufPos; + UInt32 hashBufPosLimit; + UInt32 hashNumAvail; + + CLzRef* son; + UInt32 matchMaxLen; + UInt32 numHashBytes; + UInt32 pos; + Byte* buffer; + UInt32 cyclicBufferPos; + UInt32 cyclicBufferSize; /* it must be historySize + 1 */ + UInt32 cutValue; + + /* BT + Hash */ + CMtSync hashSync; + /* Byte hashDummy[kMtCacheLineDummy]; */ + + /* Hash */ + Mf_GetHeads GetHeadsFunc; + CMatchFinder* MatchFinder; +} CMatchFinderMt; + +void MatchFinderMt_Construct(CMatchFinderMt* p); +void MatchFinderMt_Destruct(CMatchFinderMt* p, ISzAlloc* alloc); +SRes MatchFinderMt_Create(CMatchFinderMt* p, UInt32 historySize, UInt32 keepAddBufferBefore, + UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc* alloc); +void MatchFinderMt_CreateVTable(CMatchFinderMt* p, IMatchFinder* vTable); +void MatchFinderMt_ReleaseStream(CMatchFinderMt* p); +} + +#endif diff --git a/3rdparty/lzma/Linux/LzHash.h b/3rdparty/lzma/Linux/LzHash.h index 9a3d7d2..420b624 100644 --- a/3rdparty/lzma/Linux/LzHash.h +++ b/3rdparty/lzma/Linux/LzHash.h @@ -1,63 +1,63 @@ -/* LzHash.h -- HASH functions for LZ algorithms -2008-10-04 : Igor Pavlov : Public domain */ - -#ifndef __LZHASH_H -#define __LZHASH_H - -#define kHash2Size (1 << 10) -#define kHash3Size (1 << 16) -#define kHash4Size (1 << 20) - -#define kFix3HashSize (kHash2Size) -#define kFix4HashSize (kHash2Size + kHash3Size) -#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) - -#define HASH2_CALC hashValue = cur[0] | ((UInt32)cur[1] << 8); - -#define HASH3_CALC \ - { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hashValue = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; \ - } - -#define HASH4_CALC \ - { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ - hashValue = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & p->hashMask; \ - } - -#define HASH5_CALC \ - { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ - hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)); \ - hashValue = (hash4Value ^ (p->crc[cur[4]] << 3)) & p->hashMask; \ - hash4Value &= (kHash4Size - 1); \ - } - -/* #define HASH_ZIP_CALC hashValue = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ -#define HASH_ZIP_CALC hashValue = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; - -#define MT_HASH2_CALC \ - hash2Value = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); - -#define MT_HASH3_CALC \ - { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ - } - -#define MT_HASH4_CALC \ - { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ - hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); \ - } - -#endif +/* LzHash.h -- HASH functions for LZ algorithms +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __LZHASH_H +#define __LZHASH_H + +#define kHash2Size (1 << 10) +#define kHash3Size (1 << 16) +#define kHash4Size (1 << 20) + +#define kFix3HashSize (kHash2Size) +#define kFix4HashSize (kHash2Size + kHash3Size) +#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) + +#define HASH2_CALC hashValue = cur[0] | ((UInt32)cur[1] << 8); + +#define HASH3_CALC \ + { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hashValue = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; \ + } + +#define HASH4_CALC \ + { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ + hashValue = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & p->hashMask; \ + } + +#define HASH5_CALC \ + { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ + hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)); \ + hashValue = (hash4Value ^ (p->crc[cur[4]] << 3)) & p->hashMask; \ + hash4Value &= (kHash4Size - 1); \ + } + +/* #define HASH_ZIP_CALC hashValue = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ +#define HASH_ZIP_CALC hashValue = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; + +#define MT_HASH2_CALC \ + hash2Value = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); + +#define MT_HASH3_CALC \ + { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ + } + +#define MT_HASH4_CALC \ + { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ + hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); \ + } + +#endif diff --git a/3rdparty/lzma/Linux/LzmaDec.cpp b/3rdparty/lzma/Linux/LzmaDec.cpp index 4e84e64..5fb575e 100644 --- a/3rdparty/lzma/Linux/LzmaDec.cpp +++ b/3rdparty/lzma/Linux/LzmaDec.cpp @@ -1,946 +1,946 @@ -/* LzmaDec.c -- LZMA Decoder -2008-11-06 : Igor Pavlov : Public domain */ - -#include "LzmaDec.h" - -#include - -namespace crnlib { - -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) - -#define kNumBitModelTotalBits 11 -#define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 - -#define RC_INIT_SIZE 5 - -#define NORMALIZE \ - if (range < kTopValue) { \ - range <<= 8; \ - code = (code << 8) | (*buf++); \ - } - -#define IF_BIT_0(p) \ - ttt = *(p); \ - NORMALIZE; \ - bound = (range >> kNumBitModelTotalBits) * ttt; \ - if (code < bound) -#define UPDATE_0(p) \ - range = bound; \ - *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); -#define UPDATE_1(p) \ - range -= bound; \ - code -= bound; \ - *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); -#define GET_BIT2(p, i, A0, A1) \ - IF_BIT_0(p) { \ - UPDATE_0(p); \ - i = (i + i); \ - A0; \ - } \ - else { \ - UPDATE_1(p); \ - i = (i + i) + 1; \ - A1; \ - } -#define GET_BIT(p, i) GET_BIT2(p, i, ;, ;) - -#define TREE_GET_BIT(probs, i) \ - { GET_BIT((probs + i), i); } -#define TREE_DECODE(probs, limit, i) \ - { \ - i = 1; \ - do { \ - TREE_GET_BIT(probs, i); \ - } while (i < limit); \ - i -= limit; \ - } - -/* #define _LZMA_SIZE_OPT */ - -#ifdef _LZMA_SIZE_OPT -#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) -#else -#define TREE_6_DECODE(probs, i) \ - { \ - i = 1; \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - i -= 0x40; \ - } -#endif - -#define NORMALIZE_CHECK \ - if (range < kTopValue) { \ - if (buf >= bufLimit) \ - return DUMMY_ERROR; \ - range <<= 8; \ - code = (code << 8) | (*buf++); \ - } - -#define IF_BIT_0_CHECK(p) \ - ttt = *(p); \ - NORMALIZE_CHECK; \ - bound = (range >> kNumBitModelTotalBits) * ttt; \ - if (code < bound) -#define UPDATE_0_CHECK range = bound; -#define UPDATE_1_CHECK \ - range -= bound; \ - code -= bound; -#define GET_BIT2_CHECK(p, i, A0, A1) \ - IF_BIT_0_CHECK(p) { \ - UPDATE_0_CHECK; \ - i = (i + i); \ - A0; \ - } \ - else { \ - UPDATE_1_CHECK; \ - i = (i + i) + 1; \ - A1; \ - } -#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ;, ;) -#define TREE_DECODE_CHECK(probs, limit, i) \ - { \ - i = 1; \ - do { \ - GET_BIT_CHECK(probs + i, i) \ - } while (i < limit); \ - i -= limit; \ - } - -#define kNumPosBitsMax 4 -#define kNumPosStatesMax (1 << kNumPosBitsMax) - -#define kLenNumLowBits 3 -#define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumMidBits 3 -#define kLenNumMidSymbols (1 << kLenNumMidBits) -#define kLenNumHighBits 8 -#define kLenNumHighSymbols (1 << kLenNumHighBits) - -#define LenChoice 0 -#define LenChoice2 (LenChoice + 1) -#define LenLow (LenChoice2 + 1) -#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) -#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) -#define kNumLenProbs (LenHigh + kLenNumHighSymbols) - -#define kNumStates 12 -#define kNumLitStates 7 - -#define kStartPosModelIndex 4 -#define kEndPosModelIndex 14 -#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) - -#define kNumPosSlotBits 6 -#define kNumLenToPosStates 4 - -#define kNumAlignBits 4 -#define kAlignTableSize (1 << kNumAlignBits) - -#define kMatchMinLen 2 -#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) - -#define IsMatch 0 -#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) -#define IsRepG0 (IsRep + kNumStates) -#define IsRepG1 (IsRepG0 + kNumStates) -#define IsRepG2 (IsRepG1 + kNumStates) -#define IsRep0Long (IsRepG2 + kNumStates) -#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) -#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) -#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) -#define LenCoder (Align + kAlignTableSize) -#define RepLenCoder (LenCoder + kNumLenProbs) -#define Literal (RepLenCoder + kNumLenProbs) - -#define LZMA_BASE_SIZE 1846 -#define LZMA_LIT_SIZE 768 - -#define LzmaProps_GetNumProbs(p) ((UInt32)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) - -#if Literal != LZMA_BASE_SIZE -StopCompilingDueBUG -#endif - - static const Byte kLiteralNextStates[kNumStates * 2] = - { - 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5, - 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; - -#define LZMA_DIC_MIN (1 << 12) - -/* First LZMA-symbol is always decoded. -And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization -Out: - Result: - SZ_OK - OK - SZ_ERROR_DATA - Error - p->remainLen: - < kMatchSpecLenStart : normal remain - = kMatchSpecLenStart : finished - = kMatchSpecLenStart + 1 : Flush marker - = kMatchSpecLenStart + 2 : State Init Marker -*/ - -static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec* p, SizeT limit, const Byte* bufLimit) { - CLzmaProb* probs = p->probs; - - unsigned state = p->state; - UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; - unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; - unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1; - unsigned lc = p->prop.lc; - - Byte* dic = p->dic; - SizeT dicBufSize = p->dicBufSize; - SizeT dicPos = p->dicPos; - - UInt32 processedPos = p->processedPos; - UInt32 checkDicSize = p->checkDicSize; - unsigned len = 0; - - const Byte* buf = p->buf; - UInt32 range = p->range; - UInt32 code = p->code; - - do { - CLzmaProb* prob; - UInt32 bound; - unsigned ttt; - unsigned posState = processedPos & pbMask; - - prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; - IF_BIT_0(prob) { - unsigned symbol; - UPDATE_0(prob); - prob = probs + Literal; - if (checkDicSize != 0 || processedPos != 0) - prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + - (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); - - if (state < kNumLitStates) { - symbol = 1; - do { - GET_BIT(prob + symbol, symbol) - } while (symbol < 0x100); - } else { - unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; - unsigned offs = 0x100; - symbol = 1; - do { - unsigned bit; - CLzmaProb* probLit; - matchByte <<= 1; - bit = (matchByte & offs); - probLit = prob + offs + bit + symbol; - GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) - } while (symbol < 0x100); - } - dic[dicPos++] = (Byte)symbol; - processedPos++; - - state = kLiteralNextStates[state]; - /* if (state < 4) state = 0; else if (state < 10) state -= 3; else state -= 6; */ - continue; - } - else { - UPDATE_1(prob); - prob = probs + IsRep + state; - IF_BIT_0(prob) { - UPDATE_0(prob); - state += kNumStates; - prob = probs + LenCoder; - } - else { - UPDATE_1(prob); - if (checkDicSize == 0 && processedPos == 0) - return SZ_ERROR_DATA; - prob = probs + IsRepG0 + state; - IF_BIT_0(prob) { - UPDATE_0(prob); - prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; - IF_BIT_0(prob) { - UPDATE_0(prob); - dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; - dicPos++; - processedPos++; - state = state < kNumLitStates ? 9 : 11; - continue; - } - UPDATE_1(prob); - } - else { - UInt32 distance; - UPDATE_1(prob); - prob = probs + IsRepG1 + state; - IF_BIT_0(prob) { - UPDATE_0(prob); - distance = rep1; - } - else { - UPDATE_1(prob); - prob = probs + IsRepG2 + state; - IF_BIT_0(prob) { - UPDATE_0(prob); - distance = rep2; - } - else { - UPDATE_1(prob); - distance = rep3; - rep3 = rep2; - } - rep2 = rep1; - } - rep1 = rep0; - rep0 = distance; - } - state = state < kNumLitStates ? 8 : 11; - prob = probs + RepLenCoder; - } - { - unsigned limit, offset; - CLzmaProb* probLen = prob + LenChoice; - IF_BIT_0(probLen) { - UPDATE_0(probLen); - probLen = prob + LenLow + (posState << kLenNumLowBits); - offset = 0; - limit = (1 << kLenNumLowBits); - } - else { - UPDATE_1(probLen); - probLen = prob + LenChoice2; - IF_BIT_0(probLen) { - UPDATE_0(probLen); - probLen = prob + LenMid + (posState << kLenNumMidBits); - offset = kLenNumLowSymbols; - limit = (1 << kLenNumMidBits); - } - else { - UPDATE_1(probLen); - probLen = prob + LenHigh; - offset = kLenNumLowSymbols + kLenNumMidSymbols; - limit = (1 << kLenNumHighBits); - } - } - TREE_DECODE(probLen, limit, len); - len += offset; - } - - if (state >= kNumStates) { - UInt32 distance; - prob = probs + PosSlot + - ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); - TREE_6_DECODE(prob, distance); - if (distance >= kStartPosModelIndex) { - unsigned posSlot = (unsigned)distance; - int numDirectBits = (int)(((distance >> 1) - 1)); - distance = (2 | (distance & 1)); - if (posSlot < kEndPosModelIndex) { - distance <<= numDirectBits; - prob = probs + SpecPos + distance - posSlot - 1; - { - UInt32 mask = 1; - unsigned i = 1; - do { - GET_BIT2(prob + i, i, ;, distance |= mask); - mask <<= 1; - } while (--numDirectBits != 0); - } - } else { - numDirectBits -= kNumAlignBits; - do { - NORMALIZE - range >>= 1; - - { - UInt32 t; - code -= range; - t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ - distance = (distance << 1) + (t + 1); - code += range & t; - } - /* - distance <<= 1; - if (code >= range) - { - code -= range; - distance |= 1; - } - */ - } while (--numDirectBits != 0); - prob = probs + Align; - distance <<= kNumAlignBits; - { - unsigned i = 1; - GET_BIT2(prob + i, i, ;, distance |= 1); - GET_BIT2(prob + i, i, ;, distance |= 2); - GET_BIT2(prob + i, i, ;, distance |= 4); - GET_BIT2(prob + i, i, ;, distance |= 8); - } - if (distance == (UInt32)0xFFFFFFFF) { - len += kMatchSpecLenStart; - state -= kNumStates; - break; - } - } - } - rep3 = rep2; - rep2 = rep1; - rep1 = rep0; - rep0 = distance + 1; - if (checkDicSize == 0) { - if (distance >= processedPos) - return SZ_ERROR_DATA; - } else if (distance >= checkDicSize) - return SZ_ERROR_DATA; - state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; - /* state = kLiteralNextStates[state]; */ - } - - len += kMatchMinLen; - - if (limit == dicPos) - return SZ_ERROR_DATA; - { - SizeT rem = limit - dicPos; - unsigned curLen = ((rem < len) ? (unsigned)rem : len); - SizeT pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0); - - processedPos += curLen; - - len -= curLen; - if (pos + curLen <= dicBufSize) { - Byte* dest = dic + dicPos; - ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; - const Byte* lim = dest + curLen; - dicPos += curLen; - do - *(dest) = (Byte) * (dest + src); - while (++dest != lim); - } else { - do { - dic[dicPos++] = dic[pos]; - if (++pos == dicBufSize) - pos = 0; - } while (--curLen != 0); - } - } - } - } while (dicPos < limit && buf < bufLimit); - NORMALIZE; - p->buf = buf; - p->range = range; - p->code = code; - p->remainLen = len; - p->dicPos = dicPos; - p->processedPos = processedPos; - p->reps[0] = rep0; - p->reps[1] = rep1; - p->reps[2] = rep2; - p->reps[3] = rep3; - p->state = state; - - return SZ_OK; -} - -static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec* p, SizeT limit) { - if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) { - Byte* dic = p->dic; - SizeT dicPos = p->dicPos; - SizeT dicBufSize = p->dicBufSize; - unsigned len = p->remainLen; - UInt32 rep0 = p->reps[0]; - if (limit - dicPos < len) - len = (unsigned)(limit - dicPos); - - if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) - p->checkDicSize = p->prop.dicSize; - - p->processedPos += len; - p->remainLen -= len; - while (len-- != 0) { - dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; - dicPos++; - } - p->dicPos = dicPos; - } -} - -static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec* p, SizeT limit, const Byte* bufLimit) { - do { - SizeT limit2 = limit; - if (p->checkDicSize == 0) { - UInt32 rem = p->prop.dicSize - p->processedPos; - if (limit - p->dicPos > rem) - limit2 = p->dicPos + rem; - } - RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); - if (p->processedPos >= p->prop.dicSize) - p->checkDicSize = p->prop.dicSize; - LzmaDec_WriteRem(p, limit); - } while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); - - if (p->remainLen > kMatchSpecLenStart) { - p->remainLen = kMatchSpecLenStart; - } - return 0; -} - -typedef enum { - DUMMY_ERROR, /* unexpected end of input stream */ - DUMMY_LIT, - DUMMY_MATCH, - DUMMY_REP -} ELzmaDummy; - -static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec* p, const Byte* buf, SizeT inSize) { - UInt32 range = p->range; - UInt32 code = p->code; - const Byte* bufLimit = buf + inSize; - CLzmaProb* probs = p->probs; - unsigned state = p->state; - ELzmaDummy res; - - { - CLzmaProb* prob; - UInt32 bound; - unsigned ttt; - unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1); - - prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; - IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK - - /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ - - prob = probs + Literal; - if (p->checkDicSize != 0 || p->processedPos != 0) - prob += (LZMA_LIT_SIZE * - ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + - (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); - - if (state < kNumLitStates) { - unsigned symbol = 1; - do { - GET_BIT_CHECK(prob + symbol, symbol) - } while (symbol < 0x100); - } else { - unsigned matchByte = p->dic[p->dicPos - p->reps[0] + - ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)]; - unsigned offs = 0x100; - unsigned symbol = 1; - do { - unsigned bit; - CLzmaProb* probLit; - matchByte <<= 1; - bit = (matchByte & offs); - probLit = prob + offs + bit + symbol; - GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) - } while (symbol < 0x100); - } - res = DUMMY_LIT; - } - else { - unsigned len; - UPDATE_1_CHECK; - - prob = probs + IsRep + state; - IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; - state = 0; - prob = probs + LenCoder; - res = DUMMY_MATCH; - } - else { - UPDATE_1_CHECK; - res = DUMMY_REP; - prob = probs + IsRepG0 + state; - IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; - prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; - IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; - NORMALIZE_CHECK; - return DUMMY_REP; - } - else { - UPDATE_1_CHECK; - } - } - else { - UPDATE_1_CHECK; - prob = probs + IsRepG1 + state; - IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; - } - else { - UPDATE_1_CHECK; - prob = probs + IsRepG2 + state; - IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; - } - else { - UPDATE_1_CHECK; - } - } - } - state = kNumStates; - prob = probs + RepLenCoder; - } - { - unsigned limit, offset; - CLzmaProb* probLen = prob + LenChoice; - IF_BIT_0_CHECK(probLen) { - UPDATE_0_CHECK; - probLen = prob + LenLow + (posState << kLenNumLowBits); - offset = 0; - limit = 1 << kLenNumLowBits; - } - else { - UPDATE_1_CHECK; - probLen = prob + LenChoice2; - IF_BIT_0_CHECK(probLen) { - UPDATE_0_CHECK; - probLen = prob + LenMid + (posState << kLenNumMidBits); - offset = kLenNumLowSymbols; - limit = 1 << kLenNumMidBits; - } - else { - UPDATE_1_CHECK; - probLen = prob + LenHigh; - offset = kLenNumLowSymbols + kLenNumMidSymbols; - limit = 1 << kLenNumHighBits; - } - } - TREE_DECODE_CHECK(probLen, limit, len); - len += offset; - } - - if (state < 4) { - unsigned posSlot; - prob = probs + PosSlot + - ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); - TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); - if (posSlot >= kStartPosModelIndex) { - int numDirectBits = ((posSlot >> 1) - 1); - - /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ - - if (posSlot < kEndPosModelIndex) { - prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1; - } else { - numDirectBits -= kNumAlignBits; - do { - NORMALIZE_CHECK - range >>= 1; - code -= range & (((code - range) >> 31) - 1); - /* if (code >= range) code -= range; */ - } while (--numDirectBits != 0); - prob = probs + Align; - numDirectBits = kNumAlignBits; - } - { - unsigned i = 1; - do { - GET_BIT_CHECK(prob + i, i); - } while (--numDirectBits != 0); - } - } - } - } - } - NORMALIZE_CHECK; - return res; -} - -static void LzmaDec_InitRc(CLzmaDec* p, const Byte* data) { - p->code = ((UInt32)data[1] << 24) | ((UInt32)data[2] << 16) | ((UInt32)data[3] << 8) | ((UInt32)data[4]); - p->range = 0xFFFFFFFF; - p->needFlush = 0; -} - -void LzmaDec_InitDicAndState(CLzmaDec* p, Bool initDic, Bool initState) { - p->needFlush = 1; - p->remainLen = 0; - p->tempBufSize = 0; - - if (initDic) { - p->processedPos = 0; - p->checkDicSize = 0; - p->needInitState = 1; - } - if (initState) - p->needInitState = 1; -} - -void LzmaDec_Init(CLzmaDec* p) { - p->dicPos = 0; - LzmaDec_InitDicAndState(p, True, True); -} - -static void LzmaDec_InitStateReal(CLzmaDec* p) { - UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp)); - UInt32 i; - CLzmaProb* probs = p->probs; - for (i = 0; i < numProbs; i++) - probs[i] = kBitModelTotal >> 1; - p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; - p->state = 0; - p->needInitState = 0; -} - -SRes LzmaDec_DecodeToDic(CLzmaDec* p, SizeT dicLimit, const Byte* src, SizeT* srcLen, - ELzmaFinishMode finishMode, ELzmaStatus* status) { - SizeT inSize = *srcLen; - (*srcLen) = 0; - LzmaDec_WriteRem(p, dicLimit); - - *status = LZMA_STATUS_NOT_SPECIFIED; - - while (p->remainLen != kMatchSpecLenStart) { - int checkEndMarkNow; - - if (p->needFlush != 0) { - for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) - p->tempBuf[p->tempBufSize++] = *src++; - if (p->tempBufSize < RC_INIT_SIZE) { - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (p->tempBuf[0] != 0) - return SZ_ERROR_DATA; - - LzmaDec_InitRc(p, p->tempBuf); - p->tempBufSize = 0; - } - - checkEndMarkNow = 0; - if (p->dicPos >= dicLimit) { - if (p->remainLen == 0 && p->code == 0) { - *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; - return SZ_OK; - } - if (finishMode == LZMA_FINISH_ANY) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_OK; - } - if (p->remainLen != 0) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - checkEndMarkNow = 1; - } - - if (p->needInitState) - LzmaDec_InitStateReal(p); - - if (p->tempBufSize == 0) { - SizeT processed; - const Byte* bufLimit; - if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { - int dummyRes = LzmaDec_TryDummy(p, src, inSize); - if (dummyRes == DUMMY_ERROR) { - memcpy(p->tempBuf, src, inSize); - p->tempBufSize = (unsigned)inSize; - (*srcLen) += inSize; - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - bufLimit = src; - } else - bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; - p->buf = src; - if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) - return SZ_ERROR_DATA; - processed = (SizeT)(p->buf - src); - (*srcLen) += processed; - src += processed; - inSize -= processed; - } else { - unsigned rem = p->tempBufSize, lookAhead = 0; - while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) - p->tempBuf[rem++] = src[lookAhead++]; - p->tempBufSize = rem; - if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { - int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem); - if (dummyRes == DUMMY_ERROR) { - (*srcLen) += lookAhead; - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - } - p->buf = p->tempBuf; - if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) - return SZ_ERROR_DATA; - lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf)); - (*srcLen) += lookAhead; - src += lookAhead; - inSize -= lookAhead; - p->tempBufSize = 0; - } - } - if (p->code == 0) - *status = LZMA_STATUS_FINISHED_WITH_MARK; - return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; -} - -SRes LzmaDec_DecodeToBuf(CLzmaDec* p, Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status) { - SizeT outSize = *destLen; - SizeT inSize = *srcLen; - *srcLen = *destLen = 0; - for (;;) { - SizeT inSizeCur = inSize, outSizeCur, dicPos; - ELzmaFinishMode curFinishMode; - SRes res; - if (p->dicPos == p->dicBufSize) - p->dicPos = 0; - dicPos = p->dicPos; - if (outSize > p->dicBufSize - dicPos) { - outSizeCur = p->dicBufSize; - curFinishMode = LZMA_FINISH_ANY; - } else { - outSizeCur = dicPos + outSize; - curFinishMode = finishMode; - } - - res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); - src += inSizeCur; - inSize -= inSizeCur; - *srcLen += inSizeCur; - outSizeCur = p->dicPos - dicPos; - memcpy(dest, p->dic + dicPos, outSizeCur); - dest += outSizeCur; - outSize -= outSizeCur; - *destLen += outSizeCur; - if (res != 0) - return res; - if (outSizeCur == 0 || outSize == 0) - return SZ_OK; - } -} - -void LzmaDec_FreeProbs(CLzmaDec* p, ISzAlloc* alloc) { - alloc->Free(alloc, p->probs); - p->probs = 0; -} - -static void LzmaDec_FreeDict(CLzmaDec* p, ISzAlloc* alloc) { - alloc->Free(alloc, p->dic); - p->dic = 0; -} - -void LzmaDec_Free(CLzmaDec* p, ISzAlloc* alloc) { - LzmaDec_FreeProbs(p, alloc); - LzmaDec_FreeDict(p, alloc); -} - -SRes LzmaProps_Decode(CLzmaProps* p, const Byte* data, unsigned size) { - UInt32 dicSize; - Byte d; - - if (size < LZMA_PROPS_SIZE) - return SZ_ERROR_UNSUPPORTED; - else - dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); - - if (dicSize < LZMA_DIC_MIN) - dicSize = LZMA_DIC_MIN; - p->dicSize = dicSize; - - d = data[0]; - if (d >= (9 * 5 * 5)) - return SZ_ERROR_UNSUPPORTED; - - p->lc = d % 9; - d /= 9; - p->pb = d / 5; - p->lp = d % 5; - - return SZ_OK; -} - -static SRes LzmaDec_AllocateProbs2(CLzmaDec* p, const CLzmaProps* propNew, ISzAlloc* alloc) { - UInt32 numProbs = LzmaProps_GetNumProbs(propNew); - if (p->probs == 0 || numProbs != p->numProbs) { - LzmaDec_FreeProbs(p, alloc); - p->probs = (CLzmaProb*)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb)); - p->numProbs = numProbs; - if (p->probs == 0) - return SZ_ERROR_MEM; - } - return SZ_OK; -} - -SRes LzmaDec_AllocateProbs(CLzmaDec* p, const Byte* props, unsigned propsSize, ISzAlloc* alloc) { - CLzmaProps propNew; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); - p->prop = propNew; - return SZ_OK; -} - -SRes LzmaDec_Allocate(CLzmaDec* p, const Byte* props, unsigned propsSize, ISzAlloc* alloc) { - CLzmaProps propNew; - SizeT dicBufSize; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); - dicBufSize = propNew.dicSize; - if (p->dic == 0 || dicBufSize != p->dicBufSize) { - LzmaDec_FreeDict(p, alloc); - p->dic = (Byte*)alloc->Alloc(alloc, dicBufSize); - if (p->dic == 0) { - LzmaDec_FreeProbs(p, alloc); - return SZ_ERROR_MEM; - } - } - p->dicBufSize = dicBufSize; - p->prop = propNew; - return SZ_OK; -} - -SRes LzmaDecode(Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, - const Byte* propData, unsigned propSize, ELzmaFinishMode finishMode, - ELzmaStatus* status, ISzAlloc* alloc) { - CLzmaDec p; - SRes res; - SizeT inSize = *srcLen; - SizeT outSize = *destLen; - *srcLen = *destLen = 0; - if (inSize < RC_INIT_SIZE) - return SZ_ERROR_INPUT_EOF; - - LzmaDec_Construct(&p); - res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc); - if (res != 0) - return res; - p.dic = dest; - p.dicBufSize = outSize; - - LzmaDec_Init(&p); - - *srcLen = inSize; - res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); - - if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) - res = SZ_ERROR_INPUT_EOF; - - (*destLen) = p.dicPos; - LzmaDec_FreeProbs(&p, alloc); - return res; -} -} +/* LzmaDec.c -- LZMA Decoder +2008-11-06 : Igor Pavlov : Public domain */ + +#include "LzmaDec.h" + +#include + +namespace crnlib { + +#define kNumTopBits 24 +#define kTopValue ((UInt32)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 + +#define RC_INIT_SIZE 5 + +#define NORMALIZE \ + if (range < kTopValue) { \ + range <<= 8; \ + code = (code << 8) | (*buf++); \ + } + +#define IF_BIT_0(p) \ + ttt = *(p); \ + NORMALIZE; \ + bound = (range >> kNumBitModelTotalBits) * ttt; \ + if (code < bound) +#define UPDATE_0(p) \ + range = bound; \ + *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); +#define UPDATE_1(p) \ + range -= bound; \ + code -= bound; \ + *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); +#define GET_BIT2(p, i, A0, A1) \ + IF_BIT_0(p) { \ + UPDATE_0(p); \ + i = (i + i); \ + A0; \ + } \ + else { \ + UPDATE_1(p); \ + i = (i + i) + 1; \ + A1; \ + } +#define GET_BIT(p, i) GET_BIT2(p, i, ;, ;) + +#define TREE_GET_BIT(probs, i) \ + { GET_BIT((probs + i), i); } +#define TREE_DECODE(probs, limit, i) \ + { \ + i = 1; \ + do { \ + TREE_GET_BIT(probs, i); \ + } while (i < limit); \ + i -= limit; \ + } + +/* #define _LZMA_SIZE_OPT */ + +#ifdef _LZMA_SIZE_OPT +#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) +#else +#define TREE_6_DECODE(probs, i) \ + { \ + i = 1; \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + i -= 0x40; \ + } +#endif + +#define NORMALIZE_CHECK \ + if (range < kTopValue) { \ + if (buf >= bufLimit) \ + return DUMMY_ERROR; \ + range <<= 8; \ + code = (code << 8) | (*buf++); \ + } + +#define IF_BIT_0_CHECK(p) \ + ttt = *(p); \ + NORMALIZE_CHECK; \ + bound = (range >> kNumBitModelTotalBits) * ttt; \ + if (code < bound) +#define UPDATE_0_CHECK range = bound; +#define UPDATE_1_CHECK \ + range -= bound; \ + code -= bound; +#define GET_BIT2_CHECK(p, i, A0, A1) \ + IF_BIT_0_CHECK(p) { \ + UPDATE_0_CHECK; \ + i = (i + i); \ + A0; \ + } \ + else { \ + UPDATE_1_CHECK; \ + i = (i + i) + 1; \ + A1; \ + } +#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ;, ;) +#define TREE_DECODE_CHECK(probs, limit, i) \ + { \ + i = 1; \ + do { \ + GET_BIT_CHECK(probs + i, i) \ + } while (i < limit); \ + i -= limit; \ + } + +#define kNumPosBitsMax 4 +#define kNumPosStatesMax (1 << kNumPosBitsMax) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumMidBits 3 +#define kLenNumMidSymbols (1 << kLenNumMidBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) + +#define LenChoice 0 +#define LenChoice2 (LenChoice + 1) +#define LenLow (LenChoice2 + 1) +#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) +#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) +#define kNumLenProbs (LenHigh + kLenNumHighSymbols) + +#define kNumStates 12 +#define kNumLitStates 7 + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + +#define kNumPosSlotBits 6 +#define kNumLenToPosStates 4 + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) + +#define kMatchMinLen 2 +#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) + +#define IsMatch 0 +#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) +#define IsRepG0 (IsRep + kNumStates) +#define IsRepG1 (IsRepG0 + kNumStates) +#define IsRepG2 (IsRepG1 + kNumStates) +#define IsRep0Long (IsRepG2 + kNumStates) +#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) +#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) +#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) +#define LenCoder (Align + kAlignTableSize) +#define RepLenCoder (LenCoder + kNumLenProbs) +#define Literal (RepLenCoder + kNumLenProbs) + +#define LZMA_BASE_SIZE 1846 +#define LZMA_LIT_SIZE 768 + +#define LzmaProps_GetNumProbs(p) ((UInt32)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) + +#if Literal != LZMA_BASE_SIZE +StopCompilingDueBUG +#endif + + static const Byte kLiteralNextStates[kNumStates * 2] = + { + 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5, + 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; + +#define LZMA_DIC_MIN (1 << 12) + +/* First LZMA-symbol is always decoded. +And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization +Out: + Result: + SZ_OK - OK + SZ_ERROR_DATA - Error + p->remainLen: + < kMatchSpecLenStart : normal remain + = kMatchSpecLenStart : finished + = kMatchSpecLenStart + 1 : Flush marker + = kMatchSpecLenStart + 2 : State Init Marker +*/ + +static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec* p, SizeT limit, const Byte* bufLimit) { + CLzmaProb* probs = p->probs; + + unsigned state = p->state; + UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; + unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; + unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1; + unsigned lc = p->prop.lc; + + Byte* dic = p->dic; + SizeT dicBufSize = p->dicBufSize; + SizeT dicPos = p->dicPos; + + UInt32 processedPos = p->processedPos; + UInt32 checkDicSize = p->checkDicSize; + unsigned len = 0; + + const Byte* buf = p->buf; + UInt32 range = p->range; + UInt32 code = p->code; + + do { + CLzmaProb* prob; + UInt32 bound; + unsigned ttt; + unsigned posState = processedPos & pbMask; + + prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; + IF_BIT_0(prob) { + unsigned symbol; + UPDATE_0(prob); + prob = probs + Literal; + if (checkDicSize != 0 || processedPos != 0) + prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + + (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); + + if (state < kNumLitStates) { + symbol = 1; + do { + GET_BIT(prob + symbol, symbol) + } while (symbol < 0x100); + } else { + unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; + unsigned offs = 0x100; + symbol = 1; + do { + unsigned bit; + CLzmaProb* probLit; + matchByte <<= 1; + bit = (matchByte & offs); + probLit = prob + offs + bit + symbol; + GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) + } while (symbol < 0x100); + } + dic[dicPos++] = (Byte)symbol; + processedPos++; + + state = kLiteralNextStates[state]; + /* if (state < 4) state = 0; else if (state < 10) state -= 3; else state -= 6; */ + continue; + } + else { + UPDATE_1(prob); + prob = probs + IsRep + state; + IF_BIT_0(prob) { + UPDATE_0(prob); + state += kNumStates; + prob = probs + LenCoder; + } + else { + UPDATE_1(prob); + if (checkDicSize == 0 && processedPos == 0) + return SZ_ERROR_DATA; + prob = probs + IsRepG0 + state; + IF_BIT_0(prob) { + UPDATE_0(prob); + prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; + IF_BIT_0(prob) { + UPDATE_0(prob); + dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; + dicPos++; + processedPos++; + state = state < kNumLitStates ? 9 : 11; + continue; + } + UPDATE_1(prob); + } + else { + UInt32 distance; + UPDATE_1(prob); + prob = probs + IsRepG1 + state; + IF_BIT_0(prob) { + UPDATE_0(prob); + distance = rep1; + } + else { + UPDATE_1(prob); + prob = probs + IsRepG2 + state; + IF_BIT_0(prob) { + UPDATE_0(prob); + distance = rep2; + } + else { + UPDATE_1(prob); + distance = rep3; + rep3 = rep2; + } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + state = state < kNumLitStates ? 8 : 11; + prob = probs + RepLenCoder; + } + { + unsigned limit, offset; + CLzmaProb* probLen = prob + LenChoice; + IF_BIT_0(probLen) { + UPDATE_0(probLen); + probLen = prob + LenLow + (posState << kLenNumLowBits); + offset = 0; + limit = (1 << kLenNumLowBits); + } + else { + UPDATE_1(probLen); + probLen = prob + LenChoice2; + IF_BIT_0(probLen) { + UPDATE_0(probLen); + probLen = prob + LenMid + (posState << kLenNumMidBits); + offset = kLenNumLowSymbols; + limit = (1 << kLenNumMidBits); + } + else { + UPDATE_1(probLen); + probLen = prob + LenHigh; + offset = kLenNumLowSymbols + kLenNumMidSymbols; + limit = (1 << kLenNumHighBits); + } + } + TREE_DECODE(probLen, limit, len); + len += offset; + } + + if (state >= kNumStates) { + UInt32 distance; + prob = probs + PosSlot + + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); + TREE_6_DECODE(prob, distance); + if (distance >= kStartPosModelIndex) { + unsigned posSlot = (unsigned)distance; + int numDirectBits = (int)(((distance >> 1) - 1)); + distance = (2 | (distance & 1)); + if (posSlot < kEndPosModelIndex) { + distance <<= numDirectBits; + prob = probs + SpecPos + distance - posSlot - 1; + { + UInt32 mask = 1; + unsigned i = 1; + do { + GET_BIT2(prob + i, i, ;, distance |= mask); + mask <<= 1; + } while (--numDirectBits != 0); + } + } else { + numDirectBits -= kNumAlignBits; + do { + NORMALIZE + range >>= 1; + + { + UInt32 t; + code -= range; + t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ + distance = (distance << 1) + (t + 1); + code += range & t; + } + /* + distance <<= 1; + if (code >= range) + { + code -= range; + distance |= 1; + } + */ + } while (--numDirectBits != 0); + prob = probs + Align; + distance <<= kNumAlignBits; + { + unsigned i = 1; + GET_BIT2(prob + i, i, ;, distance |= 1); + GET_BIT2(prob + i, i, ;, distance |= 2); + GET_BIT2(prob + i, i, ;, distance |= 4); + GET_BIT2(prob + i, i, ;, distance |= 8); + } + if (distance == (UInt32)0xFFFFFFFF) { + len += kMatchSpecLenStart; + state -= kNumStates; + break; + } + } + } + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance + 1; + if (checkDicSize == 0) { + if (distance >= processedPos) + return SZ_ERROR_DATA; + } else if (distance >= checkDicSize) + return SZ_ERROR_DATA; + state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; + /* state = kLiteralNextStates[state]; */ + } + + len += kMatchMinLen; + + if (limit == dicPos) + return SZ_ERROR_DATA; + { + SizeT rem = limit - dicPos; + unsigned curLen = ((rem < len) ? (unsigned)rem : len); + SizeT pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0); + + processedPos += curLen; + + len -= curLen; + if (pos + curLen <= dicBufSize) { + Byte* dest = dic + dicPos; + ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; + const Byte* lim = dest + curLen; + dicPos += curLen; + do + *(dest) = (Byte) * (dest + src); + while (++dest != lim); + } else { + do { + dic[dicPos++] = dic[pos]; + if (++pos == dicBufSize) + pos = 0; + } while (--curLen != 0); + } + } + } + } while (dicPos < limit && buf < bufLimit); + NORMALIZE; + p->buf = buf; + p->range = range; + p->code = code; + p->remainLen = len; + p->dicPos = dicPos; + p->processedPos = processedPos; + p->reps[0] = rep0; + p->reps[1] = rep1; + p->reps[2] = rep2; + p->reps[3] = rep3; + p->state = state; + + return SZ_OK; +} + +static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec* p, SizeT limit) { + if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) { + Byte* dic = p->dic; + SizeT dicPos = p->dicPos; + SizeT dicBufSize = p->dicBufSize; + unsigned len = p->remainLen; + UInt32 rep0 = p->reps[0]; + if (limit - dicPos < len) + len = (unsigned)(limit - dicPos); + + if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) + p->checkDicSize = p->prop.dicSize; + + p->processedPos += len; + p->remainLen -= len; + while (len-- != 0) { + dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; + dicPos++; + } + p->dicPos = dicPos; + } +} + +static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec* p, SizeT limit, const Byte* bufLimit) { + do { + SizeT limit2 = limit; + if (p->checkDicSize == 0) { + UInt32 rem = p->prop.dicSize - p->processedPos; + if (limit - p->dicPos > rem) + limit2 = p->dicPos + rem; + } + RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); + if (p->processedPos >= p->prop.dicSize) + p->checkDicSize = p->prop.dicSize; + LzmaDec_WriteRem(p, limit); + } while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); + + if (p->remainLen > kMatchSpecLenStart) { + p->remainLen = kMatchSpecLenStart; + } + return 0; +} + +typedef enum { + DUMMY_ERROR, /* unexpected end of input stream */ + DUMMY_LIT, + DUMMY_MATCH, + DUMMY_REP +} ELzmaDummy; + +static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec* p, const Byte* buf, SizeT inSize) { + UInt32 range = p->range; + UInt32 code = p->code; + const Byte* bufLimit = buf + inSize; + CLzmaProb* probs = p->probs; + unsigned state = p->state; + ELzmaDummy res; + + { + CLzmaProb* prob; + UInt32 bound; + unsigned ttt; + unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1); + + prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; + IF_BIT_0_CHECK(prob) { + UPDATE_0_CHECK + + /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ + + prob = probs + Literal; + if (p->checkDicSize != 0 || p->processedPos != 0) + prob += (LZMA_LIT_SIZE * + ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + + (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); + + if (state < kNumLitStates) { + unsigned symbol = 1; + do { + GET_BIT_CHECK(prob + symbol, symbol) + } while (symbol < 0x100); + } else { + unsigned matchByte = p->dic[p->dicPos - p->reps[0] + + ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)]; + unsigned offs = 0x100; + unsigned symbol = 1; + do { + unsigned bit; + CLzmaProb* probLit; + matchByte <<= 1; + bit = (matchByte & offs); + probLit = prob + offs + bit + symbol; + GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) + } while (symbol < 0x100); + } + res = DUMMY_LIT; + } + else { + unsigned len; + UPDATE_1_CHECK; + + prob = probs + IsRep + state; + IF_BIT_0_CHECK(prob) { + UPDATE_0_CHECK; + state = 0; + prob = probs + LenCoder; + res = DUMMY_MATCH; + } + else { + UPDATE_1_CHECK; + res = DUMMY_REP; + prob = probs + IsRepG0 + state; + IF_BIT_0_CHECK(prob) { + UPDATE_0_CHECK; + prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; + IF_BIT_0_CHECK(prob) { + UPDATE_0_CHECK; + NORMALIZE_CHECK; + return DUMMY_REP; + } + else { + UPDATE_1_CHECK; + } + } + else { + UPDATE_1_CHECK; + prob = probs + IsRepG1 + state; + IF_BIT_0_CHECK(prob) { + UPDATE_0_CHECK; + } + else { + UPDATE_1_CHECK; + prob = probs + IsRepG2 + state; + IF_BIT_0_CHECK(prob) { + UPDATE_0_CHECK; + } + else { + UPDATE_1_CHECK; + } + } + } + state = kNumStates; + prob = probs + RepLenCoder; + } + { + unsigned limit, offset; + CLzmaProb* probLen = prob + LenChoice; + IF_BIT_0_CHECK(probLen) { + UPDATE_0_CHECK; + probLen = prob + LenLow + (posState << kLenNumLowBits); + offset = 0; + limit = 1 << kLenNumLowBits; + } + else { + UPDATE_1_CHECK; + probLen = prob + LenChoice2; + IF_BIT_0_CHECK(probLen) { + UPDATE_0_CHECK; + probLen = prob + LenMid + (posState << kLenNumMidBits); + offset = kLenNumLowSymbols; + limit = 1 << kLenNumMidBits; + } + else { + UPDATE_1_CHECK; + probLen = prob + LenHigh; + offset = kLenNumLowSymbols + kLenNumMidSymbols; + limit = 1 << kLenNumHighBits; + } + } + TREE_DECODE_CHECK(probLen, limit, len); + len += offset; + } + + if (state < 4) { + unsigned posSlot; + prob = probs + PosSlot + + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); + TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); + if (posSlot >= kStartPosModelIndex) { + int numDirectBits = ((posSlot >> 1) - 1); + + /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ + + if (posSlot < kEndPosModelIndex) { + prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1; + } else { + numDirectBits -= kNumAlignBits; + do { + NORMALIZE_CHECK + range >>= 1; + code -= range & (((code - range) >> 31) - 1); + /* if (code >= range) code -= range; */ + } while (--numDirectBits != 0); + prob = probs + Align; + numDirectBits = kNumAlignBits; + } + { + unsigned i = 1; + do { + GET_BIT_CHECK(prob + i, i); + } while (--numDirectBits != 0); + } + } + } + } + } + NORMALIZE_CHECK; + return res; +} + +static void LzmaDec_InitRc(CLzmaDec* p, const Byte* data) { + p->code = ((UInt32)data[1] << 24) | ((UInt32)data[2] << 16) | ((UInt32)data[3] << 8) | ((UInt32)data[4]); + p->range = 0xFFFFFFFF; + p->needFlush = 0; +} + +void LzmaDec_InitDicAndState(CLzmaDec* p, Bool initDic, Bool initState) { + p->needFlush = 1; + p->remainLen = 0; + p->tempBufSize = 0; + + if (initDic) { + p->processedPos = 0; + p->checkDicSize = 0; + p->needInitState = 1; + } + if (initState) + p->needInitState = 1; +} + +void LzmaDec_Init(CLzmaDec* p) { + p->dicPos = 0; + LzmaDec_InitDicAndState(p, True, True); +} + +static void LzmaDec_InitStateReal(CLzmaDec* p) { + UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp)); + UInt32 i; + CLzmaProb* probs = p->probs; + for (i = 0; i < numProbs; i++) + probs[i] = kBitModelTotal >> 1; + p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; + p->state = 0; + p->needInitState = 0; +} + +SRes LzmaDec_DecodeToDic(CLzmaDec* p, SizeT dicLimit, const Byte* src, SizeT* srcLen, + ELzmaFinishMode finishMode, ELzmaStatus* status) { + SizeT inSize = *srcLen; + (*srcLen) = 0; + LzmaDec_WriteRem(p, dicLimit); + + *status = LZMA_STATUS_NOT_SPECIFIED; + + while (p->remainLen != kMatchSpecLenStart) { + int checkEndMarkNow; + + if (p->needFlush != 0) { + for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) + p->tempBuf[p->tempBufSize++] = *src++; + if (p->tempBufSize < RC_INIT_SIZE) { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (p->tempBuf[0] != 0) + return SZ_ERROR_DATA; + + LzmaDec_InitRc(p, p->tempBuf); + p->tempBufSize = 0; + } + + checkEndMarkNow = 0; + if (p->dicPos >= dicLimit) { + if (p->remainLen == 0 && p->code == 0) { + *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; + return SZ_OK; + } + if (finishMode == LZMA_FINISH_ANY) { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_OK; + } + if (p->remainLen != 0) { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + checkEndMarkNow = 1; + } + + if (p->needInitState) + LzmaDec_InitStateReal(p); + + if (p->tempBufSize == 0) { + SizeT processed; + const Byte* bufLimit; + if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { + int dummyRes = LzmaDec_TryDummy(p, src, inSize); + if (dummyRes == DUMMY_ERROR) { + memcpy(p->tempBuf, src, inSize); + p->tempBufSize = (unsigned)inSize; + (*srcLen) += inSize; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (checkEndMarkNow && dummyRes != DUMMY_MATCH) { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + bufLimit = src; + } else + bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; + p->buf = src; + if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) + return SZ_ERROR_DATA; + processed = (SizeT)(p->buf - src); + (*srcLen) += processed; + src += processed; + inSize -= processed; + } else { + unsigned rem = p->tempBufSize, lookAhead = 0; + while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) + p->tempBuf[rem++] = src[lookAhead++]; + p->tempBufSize = rem; + if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { + int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem); + if (dummyRes == DUMMY_ERROR) { + (*srcLen) += lookAhead; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (checkEndMarkNow && dummyRes != DUMMY_MATCH) { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + } + p->buf = p->tempBuf; + if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) + return SZ_ERROR_DATA; + lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf)); + (*srcLen) += lookAhead; + src += lookAhead; + inSize -= lookAhead; + p->tempBufSize = 0; + } + } + if (p->code == 0) + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; +} + +SRes LzmaDec_DecodeToBuf(CLzmaDec* p, Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status) { + SizeT outSize = *destLen; + SizeT inSize = *srcLen; + *srcLen = *destLen = 0; + for (;;) { + SizeT inSizeCur = inSize, outSizeCur, dicPos; + ELzmaFinishMode curFinishMode; + SRes res; + if (p->dicPos == p->dicBufSize) + p->dicPos = 0; + dicPos = p->dicPos; + if (outSize > p->dicBufSize - dicPos) { + outSizeCur = p->dicBufSize; + curFinishMode = LZMA_FINISH_ANY; + } else { + outSizeCur = dicPos + outSize; + curFinishMode = finishMode; + } + + res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); + src += inSizeCur; + inSize -= inSizeCur; + *srcLen += inSizeCur; + outSizeCur = p->dicPos - dicPos; + memcpy(dest, p->dic + dicPos, outSizeCur); + dest += outSizeCur; + outSize -= outSizeCur; + *destLen += outSizeCur; + if (res != 0) + return res; + if (outSizeCur == 0 || outSize == 0) + return SZ_OK; + } +} + +void LzmaDec_FreeProbs(CLzmaDec* p, ISzAlloc* alloc) { + alloc->Free(alloc, p->probs); + p->probs = 0; +} + +static void LzmaDec_FreeDict(CLzmaDec* p, ISzAlloc* alloc) { + alloc->Free(alloc, p->dic); + p->dic = 0; +} + +void LzmaDec_Free(CLzmaDec* p, ISzAlloc* alloc) { + LzmaDec_FreeProbs(p, alloc); + LzmaDec_FreeDict(p, alloc); +} + +SRes LzmaProps_Decode(CLzmaProps* p, const Byte* data, unsigned size) { + UInt32 dicSize; + Byte d; + + if (size < LZMA_PROPS_SIZE) + return SZ_ERROR_UNSUPPORTED; + else + dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); + + if (dicSize < LZMA_DIC_MIN) + dicSize = LZMA_DIC_MIN; + p->dicSize = dicSize; + + d = data[0]; + if (d >= (9 * 5 * 5)) + return SZ_ERROR_UNSUPPORTED; + + p->lc = d % 9; + d /= 9; + p->pb = d / 5; + p->lp = d % 5; + + return SZ_OK; +} + +static SRes LzmaDec_AllocateProbs2(CLzmaDec* p, const CLzmaProps* propNew, ISzAlloc* alloc) { + UInt32 numProbs = LzmaProps_GetNumProbs(propNew); + if (p->probs == 0 || numProbs != p->numProbs) { + LzmaDec_FreeProbs(p, alloc); + p->probs = (CLzmaProb*)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb)); + p->numProbs = numProbs; + if (p->probs == 0) + return SZ_ERROR_MEM; + } + return SZ_OK; +} + +SRes LzmaDec_AllocateProbs(CLzmaDec* p, const Byte* props, unsigned propsSize, ISzAlloc* alloc) { + CLzmaProps propNew; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)); + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDec_Allocate(CLzmaDec* p, const Byte* props, unsigned propsSize, ISzAlloc* alloc) { + CLzmaProps propNew; + SizeT dicBufSize; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)); + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + dicBufSize = propNew.dicSize; + if (p->dic == 0 || dicBufSize != p->dicBufSize) { + LzmaDec_FreeDict(p, alloc); + p->dic = (Byte*)alloc->Alloc(alloc, dicBufSize); + if (p->dic == 0) { + LzmaDec_FreeProbs(p, alloc); + return SZ_ERROR_MEM; + } + } + p->dicBufSize = dicBufSize; + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDecode(Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, + const Byte* propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus* status, ISzAlloc* alloc) { + CLzmaDec p; + SRes res; + SizeT inSize = *srcLen; + SizeT outSize = *destLen; + *srcLen = *destLen = 0; + if (inSize < RC_INIT_SIZE) + return SZ_ERROR_INPUT_EOF; + + LzmaDec_Construct(&p); + res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc); + if (res != 0) + return res; + p.dic = dest; + p.dicBufSize = outSize; + + LzmaDec_Init(&p); + + *srcLen = inSize; + res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); + + if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) + res = SZ_ERROR_INPUT_EOF; + + (*destLen) = p.dicPos; + LzmaDec_FreeProbs(&p, alloc); + return res; +} +} diff --git a/3rdparty/lzma/Linux/LzmaDec.h b/3rdparty/lzma/Linux/LzmaDec.h index 40f9bce..6bbff12 100644 --- a/3rdparty/lzma/Linux/LzmaDec.h +++ b/3rdparty/lzma/Linux/LzmaDec.h @@ -1,221 +1,221 @@ -/* LzmaDec.h -- LZMA Decoder -2008-10-04 : Igor Pavlov : Public domain */ - -#ifndef __LZMADEC_H -#define __LZMADEC_H - -#include "LzmaTypes.h" - -namespace crnlib { - -/* #define _LZMA_PROB32 */ -/* _LZMA_PROB32 can increase the speed on some CPUs, - but memory usage for CLzmaDec::probs will be doubled in that case */ - -#ifdef _LZMA_PROB32 -#define CLzmaProb UInt32 -#else -#define CLzmaProb UInt16 -#endif - -/* ---------- LZMA Properties ---------- */ - -#define LZMA_PROPS_SIZE 5 - -typedef struct _CLzmaProps { - unsigned lc, lp, pb; - UInt32 dicSize; -} CLzmaProps; - -/* LzmaProps_Decode - decodes properties -Returns: - SZ_OK - SZ_ERROR_UNSUPPORTED - Unsupported properties -*/ - -SRes LzmaProps_Decode(CLzmaProps* p, const Byte* data, unsigned size); - -/* ---------- LZMA Decoder state ---------- */ - -/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. - Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ - -#define LZMA_REQUIRED_INPUT_MAX 20 - -typedef struct -{ - CLzmaProps prop; - CLzmaProb* probs; - Byte* dic; - const Byte* buf; - UInt32 range, code; - SizeT dicPos; - SizeT dicBufSize; - UInt32 processedPos; - UInt32 checkDicSize; - unsigned state; - UInt32 reps[4]; - unsigned remainLen; - int needFlush; - int needInitState; - UInt32 numProbs; - unsigned tempBufSize; - Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; -} CLzmaDec; - -#define LzmaDec_Construct(p) \ - { \ - (p)->dic = 0; \ - (p)->probs = 0; \ - } - -void LzmaDec_Init(CLzmaDec* p); - -/* There are two types of LZMA streams: - 0) Stream with end mark. That end mark adds about 6 bytes to compressed size. - 1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ - -typedef enum { - LZMA_FINISH_ANY, /* finish at any point */ - LZMA_FINISH_END /* block must be finished at the end */ -} ELzmaFinishMode; - -/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! - - You must use LZMA_FINISH_END, when you know that current output buffer - covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. - - If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, - and output value of destLen will be less than output buffer size limit. - You can check status result also. - - You can use multiple checks to test data integrity after full decompression: - 1) Check Result and "status" variable. - 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. - 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. - You must use correct finish mode in that case. */ - -typedef enum { - LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ - LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ - LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ - LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ - LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ -} ELzmaStatus; - -/* ELzmaStatus is used only as output value for function call */ - -/* ---------- Interfaces ---------- */ - -/* There are 3 levels of interfaces: - 1) Dictionary Interface - 2) Buffer Interface - 3) One Call Interface - You can select any of these interfaces, but don't mix functions from different - groups for same object. */ - -/* There are two variants to allocate state for Dictionary Interface: - 1) LzmaDec_Allocate / LzmaDec_Free - 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs - You can use variant 2, if you set dictionary buffer manually. - For Buffer Interface you must always use variant 1. - -LzmaDec_Allocate* can return: - SZ_OK - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_UNSUPPORTED - Unsupported properties -*/ - -SRes LzmaDec_AllocateProbs(CLzmaDec* p, const Byte* props, unsigned propsSize, ISzAlloc* alloc); -void LzmaDec_FreeProbs(CLzmaDec* p, ISzAlloc* alloc); - -SRes LzmaDec_Allocate(CLzmaDec* state, const Byte* prop, unsigned propsSize, ISzAlloc* alloc); -void LzmaDec_Free(CLzmaDec* state, ISzAlloc* alloc); - -/* ---------- Dictionary Interface ---------- */ - -/* You can use it, if you want to eliminate the overhead for data copying from - dictionary to some other external buffer. - You must work with CLzmaDec variables directly in this interface. - - STEPS: - LzmaDec_Constr() - LzmaDec_Allocate() - for (each new stream) - { - LzmaDec_Init() - while (it needs more decompression) - { - LzmaDec_DecodeToDic() - use data from CLzmaDec::dic and update CLzmaDec::dicPos - } - } - LzmaDec_Free() -*/ - -/* LzmaDec_DecodeToDic - - The decoding to internal dictionary buffer (CLzmaDec::dic). - You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! - -finishMode: - It has meaning only if the decoding reaches output limit (dicLimit). - LZMA_FINISH_ANY - Decode just dicLimit bytes. - LZMA_FINISH_END - Stream must be finished after dicLimit. - -Returns: - SZ_OK - status: - LZMA_STATUS_FINISHED_WITH_MARK - LZMA_STATUS_NOT_FINISHED - LZMA_STATUS_NEEDS_MORE_INPUT - LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK - SZ_ERROR_DATA - Data error -*/ - -SRes LzmaDec_DecodeToDic(CLzmaDec* p, SizeT dicLimit, - const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status); - -/* ---------- Buffer Interface ---------- */ - -/* It's zlib-like interface. - See LzmaDec_DecodeToDic description for information about STEPS and return results, - but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need - to work with CLzmaDec variables manually. - -finishMode: - It has meaning only if the decoding reaches output limit (*destLen). - LZMA_FINISH_ANY - Decode just destLen bytes. - LZMA_FINISH_END - Stream must be finished after (*destLen). -*/ - -SRes LzmaDec_DecodeToBuf(CLzmaDec* p, Byte* dest, SizeT* destLen, - const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status); - -/* ---------- One Call Interface ---------- */ - -/* LzmaDecode - -finishMode: - It has meaning only if the decoding reaches output limit (*destLen). - LZMA_FINISH_ANY - Decode just destLen bytes. - LZMA_FINISH_END - Stream must be finished after (*destLen). - -Returns: - SZ_OK - status: - LZMA_STATUS_FINISHED_WITH_MARK - LZMA_STATUS_NOT_FINISHED - LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK - SZ_ERROR_DATA - Data error - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_UNSUPPORTED - Unsupported properties - SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). -*/ - -SRes LzmaDecode(Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, - const Byte* propData, unsigned propSize, ELzmaFinishMode finishMode, - ELzmaStatus* status, ISzAlloc* alloc); -} - -#endif +/* LzmaDec.h -- LZMA Decoder +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __LZMADEC_H +#define __LZMADEC_H + +#include "LzmaTypes.h" + +namespace crnlib { + +/* #define _LZMA_PROB32 */ +/* _LZMA_PROB32 can increase the speed on some CPUs, + but memory usage for CLzmaDec::probs will be doubled in that case */ + +#ifdef _LZMA_PROB32 +#define CLzmaProb UInt32 +#else +#define CLzmaProb UInt16 +#endif + +/* ---------- LZMA Properties ---------- */ + +#define LZMA_PROPS_SIZE 5 + +typedef struct _CLzmaProps { + unsigned lc, lp, pb; + UInt32 dicSize; +} CLzmaProps; + +/* LzmaProps_Decode - decodes properties +Returns: + SZ_OK + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaProps_Decode(CLzmaProps* p, const Byte* data, unsigned size); + +/* ---------- LZMA Decoder state ---------- */ + +/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. + Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ + +#define LZMA_REQUIRED_INPUT_MAX 20 + +typedef struct +{ + CLzmaProps prop; + CLzmaProb* probs; + Byte* dic; + const Byte* buf; + UInt32 range, code; + SizeT dicPos; + SizeT dicBufSize; + UInt32 processedPos; + UInt32 checkDicSize; + unsigned state; + UInt32 reps[4]; + unsigned remainLen; + int needFlush; + int needInitState; + UInt32 numProbs; + unsigned tempBufSize; + Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; +} CLzmaDec; + +#define LzmaDec_Construct(p) \ + { \ + (p)->dic = 0; \ + (p)->probs = 0; \ + } + +void LzmaDec_Init(CLzmaDec* p); + +/* There are two types of LZMA streams: + 0) Stream with end mark. That end mark adds about 6 bytes to compressed size. + 1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ + +typedef enum { + LZMA_FINISH_ANY, /* finish at any point */ + LZMA_FINISH_END /* block must be finished at the end */ +} ELzmaFinishMode; + +/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! + + You must use LZMA_FINISH_END, when you know that current output buffer + covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. + + If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, + and output value of destLen will be less than output buffer size limit. + You can check status result also. + + You can use multiple checks to test data integrity after full decompression: + 1) Check Result and "status" variable. + 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. + 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. + You must use correct finish mode in that case. */ + +typedef enum { + LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ + LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ + LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ + LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ +} ELzmaStatus; + +/* ELzmaStatus is used only as output value for function call */ + +/* ---------- Interfaces ---------- */ + +/* There are 3 levels of interfaces: + 1) Dictionary Interface + 2) Buffer Interface + 3) One Call Interface + You can select any of these interfaces, but don't mix functions from different + groups for same object. */ + +/* There are two variants to allocate state for Dictionary Interface: + 1) LzmaDec_Allocate / LzmaDec_Free + 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs + You can use variant 2, if you set dictionary buffer manually. + For Buffer Interface you must always use variant 1. + +LzmaDec_Allocate* can return: + SZ_OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaDec_AllocateProbs(CLzmaDec* p, const Byte* props, unsigned propsSize, ISzAlloc* alloc); +void LzmaDec_FreeProbs(CLzmaDec* p, ISzAlloc* alloc); + +SRes LzmaDec_Allocate(CLzmaDec* state, const Byte* prop, unsigned propsSize, ISzAlloc* alloc); +void LzmaDec_Free(CLzmaDec* state, ISzAlloc* alloc); + +/* ---------- Dictionary Interface ---------- */ + +/* You can use it, if you want to eliminate the overhead for data copying from + dictionary to some other external buffer. + You must work with CLzmaDec variables directly in this interface. + + STEPS: + LzmaDec_Constr() + LzmaDec_Allocate() + for (each new stream) + { + LzmaDec_Init() + while (it needs more decompression) + { + LzmaDec_DecodeToDic() + use data from CLzmaDec::dic and update CLzmaDec::dicPos + } + } + LzmaDec_Free() +*/ + +/* LzmaDec_DecodeToDic + + The decoding to internal dictionary buffer (CLzmaDec::dic). + You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! + +finishMode: + It has meaning only if the decoding reaches output limit (dicLimit). + LZMA_FINISH_ANY - Decode just dicLimit bytes. + LZMA_FINISH_END - Stream must be finished after dicLimit. + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_NEEDS_MORE_INPUT + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error +*/ + +SRes LzmaDec_DecodeToDic(CLzmaDec* p, SizeT dicLimit, + const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status); + +/* ---------- Buffer Interface ---------- */ + +/* It's zlib-like interface. + See LzmaDec_DecodeToDic description for information about STEPS and return results, + but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need + to work with CLzmaDec variables manually. + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). +*/ + +SRes LzmaDec_DecodeToBuf(CLzmaDec* p, Byte* dest, SizeT* destLen, + const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status); + +/* ---------- One Call Interface ---------- */ + +/* LzmaDecode + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). +*/ + +SRes LzmaDecode(Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, + const Byte* propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus* status, ISzAlloc* alloc); +} + +#endif diff --git a/3rdparty/lzma/Linux/LzmaEnc.cpp b/3rdparty/lzma/Linux/LzmaEnc.cpp index a322799..943695c 100644 --- a/3rdparty/lzma/Linux/LzmaEnc.cpp +++ b/3rdparty/lzma/Linux/LzmaEnc.cpp @@ -1,2084 +1,2084 @@ -/* LzmaEnc.c -- LZMA Encoder -2008-10-04 : Igor Pavlov : Public domain */ - -#include - -/* #define SHOW_STAT */ -/* #define SHOW_STAT2 */ - -#if defined(SHOW_STAT) || defined(SHOW_STAT2) -#include -#endif - -#include "LzmaEnc.h" - -#include "LzFind.h" -#ifdef COMPRESS_MF_MT -#include "LzFindMt.h" -#endif - -namespace crnlib { - -#ifdef SHOW_STAT -static int ttt = 0; -#endif - -#define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1) - -#define kBlockSize (9 << 10) -#define kUnpackBlockSize (1 << 18) -#define kMatchArraySize (1 << 21) -#define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX) - -#define kNumMaxDirectBits (31) - -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) - -#define kNumBitModelTotalBits 11 -#define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 -#define kProbInitValue (kBitModelTotal >> 1) - -#define kNumMoveReducingBits 4 -#define kNumBitPriceShiftBits 4 -#define kBitPrice (1 << kNumBitPriceShiftBits) - -void LzmaEncProps_Init(CLzmaEncProps* p) { - p->level = 5; - p->dictSize = p->mc = 0; - p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; - p->writeEndMark = 0; -} - -void LzmaEncProps_Normalize(CLzmaEncProps* p) { - int level = p->level; - if (level < 0) - level = 5; - p->level = level; - if (p->dictSize == 0) - p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26))); - if (p->lc < 0) - p->lc = 3; - if (p->lp < 0) - p->lp = 0; - if (p->pb < 0) - p->pb = 2; - if (p->algo < 0) - p->algo = (level < 5 ? 0 : 1); - if (p->fb < 0) - p->fb = (level < 7 ? 32 : 64); - if (p->btMode < 0) - p->btMode = (p->algo == 0 ? 0 : 1); - if (p->numHashBytes < 0) - p->numHashBytes = 4; - if (p->mc == 0) - p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); - if (p->numThreads < 0) - p->numThreads = ((p->btMode && p->algo) ? 2 : 1); -} - -UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps* props2) { - CLzmaEncProps props = *props2; - LzmaEncProps_Normalize(&props); - return props.dictSize; -} - -/* #define LZMA_LOG_BSR */ -/* Define it for Intel's CPU */ - -#ifdef LZMA_LOG_BSR - -#define kDicLogSizeMaxCompress 30 - -#define BSR2_RET(pos, res) \ - { \ - unsigned long i; \ - _BitScanReverse(&i, (pos)); \ - res = (i + i) + ((pos >> (i - 1)) & 1); \ - } - -UInt32 GetPosSlot1(UInt32 pos) { - UInt32 res; - BSR2_RET(pos, res); - return res; -} -#define GetPosSlot2(pos, res) \ - { BSR2_RET(pos, res); } -#define GetPosSlot(pos, res) \ - { \ - if (pos < 2) \ - res = pos; \ - else \ - BSR2_RET(pos, res); \ - } - -#else - -#define kNumLogBits (9 + (int)sizeof(size_t) / 2) -#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) - -void LzmaEnc_FastPosInit(Byte* g_FastPos) { - int c = 2, slotFast; - g_FastPos[0] = 0; - g_FastPos[1] = 1; - - for (slotFast = 2; slotFast < kNumLogBits * 2; slotFast++) { - UInt32 k = (1 << ((slotFast >> 1) - 1)); - UInt32 j; - for (j = 0; j < k; j++, c++) - g_FastPos[c] = (Byte)slotFast; - } -} - -#define BSR2_RET(pos, res) \ - { \ - UInt32 i = 6 + ((kNumLogBits - 1) & \ - (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ - res = p->g_FastPos[pos >> i] + (i * 2); \ - } -/* -#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ - p->g_FastPos[pos >> 6] + 12 : \ - p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } -*/ - -#define GetPosSlot1(pos) p->g_FastPos[pos] -#define GetPosSlot2(pos, res) \ - { BSR2_RET(pos, res); } -#define GetPosSlot(pos, res) \ - { \ - if (pos < kNumFullDistances) \ - res = p->g_FastPos[pos]; \ - else \ - BSR2_RET(pos, res); \ - } - -#endif - -#define LZMA_NUM_REPS 4 - -typedef unsigned CState; - -typedef struct _COptimal { - UInt32 price; - - CState state; - int prev1IsChar; - int prev2; - - UInt32 posPrev2; - UInt32 backPrev2; - - UInt32 posPrev; - UInt32 backPrev; - UInt32 backs[LZMA_NUM_REPS]; -} COptimal; - -#define kNumOpts (1 << 12) - -#define kNumLenToPosStates 4 -#define kNumPosSlotBits 6 -#define kDicLogSizeMin 0 -#define kDicLogSizeMax 32 -#define kDistTableSizeMax (kDicLogSizeMax * 2) - -#define kNumAlignBits 4 -#define kAlignTableSize (1 << kNumAlignBits) -#define kAlignMask (kAlignTableSize - 1) - -#define kStartPosModelIndex 4 -#define kEndPosModelIndex 14 -#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex) - -#define kNumFullDistances (1 << (kEndPosModelIndex / 2)) - -#ifdef _LZMA_PROB32 -#define CLzmaProb UInt32 -#else -#define CLzmaProb UInt16 -#endif - -#define LZMA_PB_MAX 4 -#define LZMA_LC_MAX 8 -#define LZMA_LP_MAX 4 - -#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) - -#define kLenNumLowBits 3 -#define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumMidBits 3 -#define kLenNumMidSymbols (1 << kLenNumMidBits) -#define kLenNumHighBits 8 -#define kLenNumHighSymbols (1 << kLenNumHighBits) - -#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) - -#define LZMA_MATCH_LEN_MIN 2 -#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) - -#define kNumStates 12 - -typedef struct -{ - CLzmaProb choice; - CLzmaProb choice2; - CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits]; - CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits]; - CLzmaProb high[kLenNumHighSymbols]; -} CLenEnc; - -typedef struct -{ - CLenEnc p; - UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; - UInt32 tableSize; - UInt32 counters[LZMA_NUM_PB_STATES_MAX]; -} CLenPriceEnc; - -typedef struct _CRangeEnc { - UInt32 range; - Byte cache; - UInt64 low; - UInt64 cacheSize; - Byte* buf; - Byte* bufLim; - Byte* bufBase; - ISeqOutStream* outStream; - UInt64 processed; - SRes res; -} CRangeEnc; - -typedef struct _CSeqInStreamBuf { - ISeqInStream funcTable; - const Byte* data; - SizeT rem; -} CSeqInStreamBuf; - -static SRes MyRead(void* pp, void* data, size_t* size) { - size_t curSize = *size; - CSeqInStreamBuf* p = (CSeqInStreamBuf*)pp; - if (p->rem < curSize) - curSize = p->rem; - memcpy(data, p->data, curSize); - p->rem -= curSize; - p->data += curSize; - *size = curSize; - return SZ_OK; -} - -typedef struct -{ - CLzmaProb* litProbs; - - CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; - CLzmaProb isRep[kNumStates]; - CLzmaProb isRepG0[kNumStates]; - CLzmaProb isRepG1[kNumStates]; - CLzmaProb isRepG2[kNumStates]; - CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; - - CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; - CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; - CLzmaProb posAlignEncoder[1 << kNumAlignBits]; - - CLenPriceEnc lenEnc; - CLenPriceEnc repLenEnc; - - UInt32 reps[LZMA_NUM_REPS]; - UInt32 state; -} CSaveState; - -typedef struct _CLzmaEnc { - IMatchFinder matchFinder; - void* matchFinderObj; - -#ifdef COMPRESS_MF_MT - Bool mtMode; - CMatchFinderMt matchFinderMt; -#endif - - CMatchFinder matchFinderBase; - -#ifdef COMPRESS_MF_MT - Byte pad[128]; -#endif - - UInt32 optimumEndIndex; - UInt32 optimumCurrentIndex; - - UInt32 longestMatchLength; - UInt32 numPairs; - UInt32 numAvail; - COptimal opt[kNumOpts]; - -#ifndef LZMA_LOG_BSR - Byte g_FastPos[1 << kNumLogBits]; -#endif - - UInt32 ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; - UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; - UInt32 numFastBytes; - UInt32 additionalOffset; - UInt32 reps[LZMA_NUM_REPS]; - UInt32 state; - - UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; - UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; - UInt32 alignPrices[kAlignTableSize]; - UInt32 alignPriceCount; - - UInt32 distTableSize; - - unsigned lc, lp, pb; - unsigned lpMask, pbMask; - - CLzmaProb* litProbs; - - CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; - CLzmaProb isRep[kNumStates]; - CLzmaProb isRepG0[kNumStates]; - CLzmaProb isRepG1[kNumStates]; - CLzmaProb isRepG2[kNumStates]; - CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; - - CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; - CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; - CLzmaProb posAlignEncoder[1 << kNumAlignBits]; - - CLenPriceEnc lenEnc; - CLenPriceEnc repLenEnc; - - unsigned lclp; - - Bool fastMode; - - CRangeEnc rc; - - Bool writeEndMark; - UInt64 nowPos64; - UInt32 matchPriceCount; - Bool finished; - Bool multiThread; - - SRes result; - UInt32 dictSize; - UInt32 matchFinderCycles; - - ISeqInStream* inStream; - CSeqInStreamBuf seqBufInStream; - - CSaveState saveState; -} CLzmaEnc; - -void LzmaEnc_SaveState(CLzmaEncHandle pp) { - CLzmaEnc* p = (CLzmaEnc*)pp; - CSaveState* dest = &p->saveState; - int i; - dest->lenEnc = p->lenEnc; - dest->repLenEnc = p->repLenEnc; - dest->state = p->state; - - for (i = 0; i < kNumStates; i++) { - memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); - memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); - } - for (i = 0; i < kNumLenToPosStates; i++) - memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); - memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); - memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); - memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); - memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); - memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); - memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); - memcpy(dest->reps, p->reps, sizeof(p->reps)); - memcpy(dest->litProbs, p->litProbs, (0x300 << p->lclp) * sizeof(CLzmaProb)); -} - -void LzmaEnc_RestoreState(CLzmaEncHandle pp) { - CLzmaEnc* dest = (CLzmaEnc*)pp; - const CSaveState* p = &dest->saveState; - int i; - dest->lenEnc = p->lenEnc; - dest->repLenEnc = p->repLenEnc; - dest->state = p->state; - - for (i = 0; i < kNumStates; i++) { - memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); - memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); - } - for (i = 0; i < kNumLenToPosStates; i++) - memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); - memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); - memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); - memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); - memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); - memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); - memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); - memcpy(dest->reps, p->reps, sizeof(p->reps)); - memcpy(dest->litProbs, p->litProbs, (0x300 << dest->lclp) * sizeof(CLzmaProb)); -} - -SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps* props2) { - CLzmaEnc* p = (CLzmaEnc*)pp; - CLzmaEncProps props = *props2; - LzmaEncProps_Normalize(&props); - - if (props.lc > LZMA_LC_MAX || - props.lp > LZMA_LP_MAX || - props.pb > LZMA_PB_MAX || - props.dictSize > (1U << kDicLogSizeMaxCompress) || - props.dictSize > (1 << 30)) - return SZ_ERROR_PARAM; - p->dictSize = props.dictSize; - p->matchFinderCycles = props.mc; - { - unsigned fb = props.fb; - if (fb < 5) - fb = 5; - if (fb > LZMA_MATCH_LEN_MAX) - fb = LZMA_MATCH_LEN_MAX; - p->numFastBytes = fb; - } - p->lc = props.lc; - p->lp = props.lp; - p->pb = props.pb; - p->fastMode = (props.algo == 0); - p->matchFinderBase.btMode = props.btMode; - { - UInt32 numHashBytes = 4; - if (props.btMode) { - if (props.numHashBytes < 2) - numHashBytes = 2; - else if (props.numHashBytes < 4) - numHashBytes = props.numHashBytes; - } - p->matchFinderBase.numHashBytes = numHashBytes; - } - - p->matchFinderBase.cutValue = props.mc; - - p->writeEndMark = props.writeEndMark; - -#ifdef COMPRESS_MF_MT - /* - if (newMultiThread != _multiThread) - { - ReleaseMatchFinder(); - _multiThread = newMultiThread; - } - */ - p->multiThread = (props.numThreads > 1); -#endif - - return SZ_OK; -} - -static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; -static const int kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; -static const int kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; -static const int kShortRepNextStates[kNumStates] = {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; - -#define IsCharState(s) ((s) < 7) - -#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len)-2 : kNumLenToPosStates - 1) - -#define kInfinityPrice (1 << 30) - -static void RangeEnc_Construct(CRangeEnc* p) { - p->outStream = 0; - p->bufBase = 0; -} - -#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) - -#define RC_BUF_SIZE (1 << 16) -static int RangeEnc_Alloc(CRangeEnc* p, ISzAlloc* alloc) { - if (p->bufBase == 0) { - p->bufBase = (Byte*)alloc->Alloc(alloc, RC_BUF_SIZE); - if (p->bufBase == 0) - return 0; - p->bufLim = p->bufBase + RC_BUF_SIZE; - } - return 1; -} - -static void RangeEnc_Free(CRangeEnc* p, ISzAlloc* alloc) { - alloc->Free(alloc, p->bufBase); - p->bufBase = 0; -} - -static void RangeEnc_Init(CRangeEnc* p) { - /* Stream.Init(); */ - p->low = 0; - p->range = 0xFFFFFFFF; - p->cacheSize = 1; - p->cache = 0; - - p->buf = p->bufBase; - - p->processed = 0; - p->res = SZ_OK; -} - -static void RangeEnc_FlushStream(CRangeEnc* p) { - size_t num; - if (p->res != SZ_OK) - return; - num = p->buf - p->bufBase; - if (num != p->outStream->Write(p->outStream, p->bufBase, num)) - p->res = SZ_ERROR_WRITE; - p->processed += num; - p->buf = p->bufBase; -} - -static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc* p) { - if ((UInt32)p->low < (UInt32)0xFF000000 || (int)(p->low >> 32) != 0) { - Byte temp = p->cache; - do { - Byte* buf = p->buf; - *buf++ = (Byte)(temp + (Byte)(p->low >> 32)); - p->buf = buf; - if (buf == p->bufLim) - RangeEnc_FlushStream(p); - temp = 0xFF; - } while (--p->cacheSize != 0); - p->cache = (Byte)((UInt32)p->low >> 24); - } - p->cacheSize++; - p->low = (UInt32)p->low << 8; -} - -static void RangeEnc_FlushData(CRangeEnc* p) { - int i; - for (i = 0; i < 5; i++) - RangeEnc_ShiftLow(p); -} - -static void RangeEnc_EncodeDirectBits(CRangeEnc* p, UInt32 value, int numBits) { - do { - p->range >>= 1; - p->low += p->range & (0 - ((value >> --numBits) & 1)); - if (p->range < kTopValue) { - p->range <<= 8; - RangeEnc_ShiftLow(p); - } - } while (numBits != 0); -} - -static void RangeEnc_EncodeBit(CRangeEnc* p, CLzmaProb* prob, UInt32 symbol) { - UInt32 ttt = *prob; - UInt32 newBound = (p->range >> kNumBitModelTotalBits) * ttt; - if (symbol == 0) { - p->range = newBound; - ttt += (kBitModelTotal - ttt) >> kNumMoveBits; - } else { - p->low += newBound; - p->range -= newBound; - ttt -= ttt >> kNumMoveBits; - } - *prob = (CLzmaProb)ttt; - if (p->range < kTopValue) { - p->range <<= 8; - RangeEnc_ShiftLow(p); - } -} - -static void LitEnc_Encode(CRangeEnc* p, CLzmaProb* probs, UInt32 symbol) { - symbol |= 0x100; - do { - RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1); - symbol <<= 1; - } while (symbol < 0x10000); -} - -static void LitEnc_EncodeMatched(CRangeEnc* p, CLzmaProb* probs, UInt32 symbol, UInt32 matchByte) { - UInt32 offs = 0x100; - symbol |= 0x100; - do { - matchByte <<= 1; - RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1); - symbol <<= 1; - offs &= ~(matchByte ^ symbol); - } while (symbol < 0x10000); -} - -void LzmaEnc_InitPriceTables(UInt32* ProbPrices) { - UInt32 i; - for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits)) { - const int kCyclesBits = kNumBitPriceShiftBits; - UInt32 w = i; - UInt32 bitCount = 0; - int j; - for (j = 0; j < kCyclesBits; j++) { - w = w * w; - bitCount <<= 1; - while (w >= ((UInt32)1 << 16)) { - w >>= 1; - bitCount++; - } - } - ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); - } -} - -#define GET_PRICE(prob, symbol) \ - p->ProbPrices[((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; - -#define GET_PRICEa(prob, symbol) \ - ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; - -#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] -#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] - -#define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits] -#define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] - -static UInt32 LitEnc_GetPrice(const CLzmaProb* probs, UInt32 symbol, UInt32* ProbPrices) { - UInt32 price = 0; - symbol |= 0x100; - do { - price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1); - symbol <<= 1; - } while (symbol < 0x10000); - return price; -} - -static UInt32 LitEnc_GetPriceMatched(const CLzmaProb* probs, UInt32 symbol, UInt32 matchByte, UInt32* ProbPrices) { - UInt32 price = 0; - UInt32 offs = 0x100; - symbol |= 0x100; - do { - matchByte <<= 1; - price += GET_PRICEa(probs[offs + (matchByte & offs) + (symbol >> 8)], (symbol >> 7) & 1); - symbol <<= 1; - offs &= ~(matchByte ^ symbol); - } while (symbol < 0x10000); - return price; -} - -static void RcTree_Encode(CRangeEnc* rc, CLzmaProb* probs, int numBitLevels, UInt32 symbol) { - UInt32 m = 1; - int i; - for (i = numBitLevels; i != 0;) { - UInt32 bit; - i--; - bit = (symbol >> i) & 1; - RangeEnc_EncodeBit(rc, probs + m, bit); - m = (m << 1) | bit; - } -} - -static void RcTree_ReverseEncode(CRangeEnc* rc, CLzmaProb* probs, int numBitLevels, UInt32 symbol) { - UInt32 m = 1; - int i; - for (i = 0; i < numBitLevels; i++) { - UInt32 bit = symbol & 1; - RangeEnc_EncodeBit(rc, probs + m, bit); - m = (m << 1) | bit; - symbol >>= 1; - } -} - -static UInt32 RcTree_GetPrice(const CLzmaProb* probs, int numBitLevels, UInt32 symbol, UInt32* ProbPrices) { - UInt32 price = 0; - symbol |= (1 << numBitLevels); - while (symbol != 1) { - price += GET_PRICEa(probs[symbol >> 1], symbol & 1); - symbol >>= 1; - } - return price; -} - -static UInt32 RcTree_ReverseGetPrice(const CLzmaProb* probs, int numBitLevels, UInt32 symbol, UInt32* ProbPrices) { - UInt32 price = 0; - UInt32 m = 1; - int i; - for (i = numBitLevels; i != 0; i--) { - UInt32 bit = symbol & 1; - symbol >>= 1; - price += GET_PRICEa(probs[m], bit); - m = (m << 1) | bit; - } - return price; -} - -static void LenEnc_Init(CLenEnc* p) { - unsigned i; - p->choice = p->choice2 = kProbInitValue; - for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++) - p->low[i] = kProbInitValue; - for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++) - p->mid[i] = kProbInitValue; - for (i = 0; i < kLenNumHighSymbols; i++) - p->high[i] = kProbInitValue; -} - -static void LenEnc_Encode(CLenEnc* p, CRangeEnc* rc, UInt32 symbol, UInt32 posState) { - if (symbol < kLenNumLowSymbols) { - RangeEnc_EncodeBit(rc, &p->choice, 0); - RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol); - } else { - RangeEnc_EncodeBit(rc, &p->choice, 1); - if (symbol < kLenNumLowSymbols + kLenNumMidSymbols) { - RangeEnc_EncodeBit(rc, &p->choice2, 0); - RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols); - } else { - RangeEnc_EncodeBit(rc, &p->choice2, 1); - RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols); - } - } -} - -static void LenEnc_SetPrices(CLenEnc* p, UInt32 posState, UInt32 numSymbols, UInt32* prices, UInt32* ProbPrices) { - UInt32 a0 = GET_PRICE_0a(p->choice); - UInt32 a1 = GET_PRICE_1a(p->choice); - UInt32 b0 = a1 + GET_PRICE_0a(p->choice2); - UInt32 b1 = a1 + GET_PRICE_1a(p->choice2); - UInt32 i = 0; - for (i = 0; i < kLenNumLowSymbols; i++) { - if (i >= numSymbols) - return; - prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices); - } - for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++) { - if (i >= numSymbols) - return; - prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices); - } - for (; i < numSymbols; i++) - prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices); -} - -static void MY_FAST_CALL LenPriceEnc_UpdateTable(CLenPriceEnc* p, UInt32 posState, UInt32* ProbPrices) { - LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices); - p->counters[posState] = p->tableSize; -} - -static void LenPriceEnc_UpdateTables(CLenPriceEnc* p, UInt32 numPosStates, UInt32* ProbPrices) { - UInt32 posState; - for (posState = 0; posState < numPosStates; posState++) - LenPriceEnc_UpdateTable(p, posState, ProbPrices); -} - -static void LenEnc_Encode2(CLenPriceEnc* p, CRangeEnc* rc, UInt32 symbol, UInt32 posState, Bool updatePrice, UInt32* ProbPrices) { - LenEnc_Encode(&p->p, rc, symbol, posState); - if (updatePrice) - if (--p->counters[posState] == 0) - LenPriceEnc_UpdateTable(p, posState, ProbPrices); -} - -static void MovePos(CLzmaEnc* p, UInt32 num) { -#ifdef SHOW_STAT - ttt += num; - printf("\n MovePos %d", num); -#endif - if (num != 0) { - p->additionalOffset += num; - p->matchFinder.Skip(p->matchFinderObj, num); - } -} - -static UInt32 ReadMatchDistances(CLzmaEnc* p, UInt32* numDistancePairsRes) { - UInt32 lenRes = 0, numPairs; - p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); - numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); -#ifdef SHOW_STAT - printf("\n i = %d numPairs = %d ", ttt, numPairs / 2); - ttt++; - { - UInt32 i; - for (i = 0; i < numPairs; i += 2) - printf("%2d %6d | ", p->matches[i], p->matches[i + 1]); - } -#endif - if (numPairs > 0) { - lenRes = p->matches[numPairs - 2]; - if (lenRes == p->numFastBytes) { - const Byte* pby = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - UInt32 distance = p->matches[numPairs - 1] + 1; - UInt32 numAvail = p->numAvail; - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; - { - const Byte* pby2 = pby - distance; - for (; lenRes < numAvail && pby[lenRes] == pby2[lenRes]; lenRes++) - ; - } - } - } - p->additionalOffset++; - *numDistancePairsRes = numPairs; - return lenRes; -} - -#define MakeAsChar(p) \ - (p)->backPrev = (UInt32)(-1); \ - (p)->prev1IsChar = False; -#define MakeAsShortRep(p) \ - (p)->backPrev = 0; \ - (p)->prev1IsChar = False; -#define IsShortRep(p) ((p)->backPrev == 0) - -static UInt32 GetRepLen1Price(CLzmaEnc* p, UInt32 state, UInt32 posState) { - return GET_PRICE_0(p->isRepG0[state]) + - GET_PRICE_0(p->isRep0Long[state][posState]); -} - -static UInt32 GetPureRepPrice(CLzmaEnc* p, UInt32 repIndex, UInt32 state, UInt32 posState) { - UInt32 price; - if (repIndex == 0) { - price = GET_PRICE_0(p->isRepG0[state]); - price += GET_PRICE_1(p->isRep0Long[state][posState]); - } else { - price = GET_PRICE_1(p->isRepG0[state]); - if (repIndex == 1) - price += GET_PRICE_0(p->isRepG1[state]); - else { - price += GET_PRICE_1(p->isRepG1[state]); - price += GET_PRICE(p->isRepG2[state], repIndex - 2); - } - } - return price; -} - -static UInt32 GetRepPrice(CLzmaEnc* p, UInt32 repIndex, UInt32 len, UInt32 state, UInt32 posState) { - return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] + - GetPureRepPrice(p, repIndex, state, posState); -} - -static UInt32 Backward(CLzmaEnc* p, UInt32* backRes, UInt32 cur) { - UInt32 posMem = p->opt[cur].posPrev; - UInt32 backMem = p->opt[cur].backPrev; - p->optimumEndIndex = cur; - do { - if (p->opt[cur].prev1IsChar) { - MakeAsChar(&p->opt[posMem]) - p->opt[posMem] - .posPrev = posMem - 1; - if (p->opt[cur].prev2) { - p->opt[posMem - 1].prev1IsChar = False; - p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2; - p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2; - } - } - { - UInt32 posPrev = posMem; - UInt32 backCur = backMem; - - backMem = p->opt[posPrev].backPrev; - posMem = p->opt[posPrev].posPrev; - - p->opt[posPrev].backPrev = backCur; - p->opt[posPrev].posPrev = cur; - cur = posPrev; - } - } while (cur != 0); - *backRes = p->opt[0].backPrev; - p->optimumCurrentIndex = p->opt[0].posPrev; - return p->optimumCurrentIndex; -} - -#define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos)&p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * 0x300) - -static UInt32 GetOptimum(CLzmaEnc* p, UInt32 position, UInt32* backRes) { - UInt32 numAvail, mainLen, numPairs, repMaxIndex, i, posState, lenEnd, len, cur; - UInt32 matchPrice, repMatchPrice, normalMatchPrice; - UInt32 reps[LZMA_NUM_REPS], repLens[LZMA_NUM_REPS]; - UInt32* matches; - const Byte* data; - Byte curByte, matchByte; - if (p->optimumEndIndex != p->optimumCurrentIndex) { - const COptimal* opt = &p->opt[p->optimumCurrentIndex]; - UInt32 lenRes = opt->posPrev - p->optimumCurrentIndex; - *backRes = opt->backPrev; - p->optimumCurrentIndex = opt->posPrev; - return lenRes; - } - p->optimumCurrentIndex = p->optimumEndIndex = 0; - - if (p->additionalOffset == 0) - mainLen = ReadMatchDistances(p, &numPairs); - else { - mainLen = p->longestMatchLength; - numPairs = p->numPairs; - } - - numAvail = p->numAvail; - if (numAvail < 2) { - *backRes = (UInt32)(-1); - return 1; - } - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; - - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - repMaxIndex = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) { - UInt32 lenTest; - const Byte* data2; - reps[i] = p->reps[i]; - data2 = data - (reps[i] + 1); - if (data[0] != data2[0] || data[1] != data2[1]) { - repLens[i] = 0; - continue; - } - for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++) - ; - repLens[i] = lenTest; - if (lenTest > repLens[repMaxIndex]) - repMaxIndex = i; - } - if (repLens[repMaxIndex] >= p->numFastBytes) { - UInt32 lenRes; - *backRes = repMaxIndex; - lenRes = repLens[repMaxIndex]; - MovePos(p, lenRes - 1); - return lenRes; - } - - matches = p->matches; - if (mainLen >= p->numFastBytes) { - *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; - MovePos(p, mainLen - 1); - return mainLen; - } - curByte = *data; - matchByte = *(data - (reps[0] + 1)); - - if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2) { - *backRes = (UInt32)-1; - return 1; - } - - p->opt[0].state = (CState)p->state; - - posState = (position & p->pbMask); - - { - const CLzmaProb* probs = LIT_PROBS(position, *(data - 1)); - p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + - (!IsCharState(p->state) ? LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : LitEnc_GetPrice(probs, curByte, p->ProbPrices)); - } - - MakeAsChar(&p->opt[1]); - - matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); - repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); - - if (matchByte == curByte) { - UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState); - if (shortRepPrice < p->opt[1].price) { - p->opt[1].price = shortRepPrice; - MakeAsShortRep(&p->opt[1]); - } - } - lenEnd = ((mainLen >= repLens[repMaxIndex]) ? mainLen : repLens[repMaxIndex]); - - if (lenEnd < 2) { - *backRes = p->opt[1].backPrev; - return 1; - } - - p->opt[1].posPrev = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) - p->opt[0].backs[i] = reps[i]; - - len = lenEnd; - do - p->opt[len--].price = kInfinityPrice; - while (len >= 2); - - for (i = 0; i < LZMA_NUM_REPS; i++) { - UInt32 repLen = repLens[i]; - UInt32 price; - if (repLen < 2) - continue; - price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState); - do { - UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2]; - COptimal* opt = &p->opt[repLen]; - if (curAndLenPrice < opt->price) { - opt->price = curAndLenPrice; - opt->posPrev = 0; - opt->backPrev = i; - opt->prev1IsChar = False; - } - } while (--repLen >= 2); - } - - normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); - - len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2); - if (len <= mainLen) { - UInt32 offs = 0; - while (len > matches[offs]) - offs += 2; - for (;; len++) { - COptimal* opt; - UInt32 distance = matches[offs + 1]; - - UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN]; - UInt32 lenToPosState = GetLenToPosState(len); - if (distance < kNumFullDistances) - curAndLenPrice += p->distancesPrices[lenToPosState][distance]; - else { - UInt32 slot; - GetPosSlot2(distance, slot); - curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot]; - } - opt = &p->opt[len]; - if (curAndLenPrice < opt->price) { - opt->price = curAndLenPrice; - opt->posPrev = 0; - opt->backPrev = distance + LZMA_NUM_REPS; - opt->prev1IsChar = False; - } - if (len == matches[offs]) { - offs += 2; - if (offs == numPairs) - break; - } - } - } - - cur = 0; - -#ifdef SHOW_STAT2 - if (position >= 0) { - unsigned i; - printf("\n pos = %4X", position); - for (i = cur; i <= lenEnd; i++) - printf("\nprice[%4X] = %d", position - cur + i, p->opt[i].price); - } -#endif - - for (;;) { - UInt32 numAvailFull, newLen, numPairs, posPrev, state, posState, startLen; - UInt32 curPrice, curAnd1Price, matchPrice, repMatchPrice; - Bool nextIsChar; - Byte curByte, matchByte; - const Byte* data; - COptimal* curOpt; - COptimal* nextOpt; - - cur++; - if (cur == lenEnd) - return Backward(p, backRes, cur); - - newLen = ReadMatchDistances(p, &numPairs); - if (newLen >= p->numFastBytes) { - p->numPairs = numPairs; - p->longestMatchLength = newLen; - return Backward(p, backRes, cur); - } - position++; - curOpt = &p->opt[cur]; - posPrev = curOpt->posPrev; - if (curOpt->prev1IsChar) { - posPrev--; - if (curOpt->prev2) { - state = p->opt[curOpt->posPrev2].state; - if (curOpt->backPrev2 < LZMA_NUM_REPS) - state = kRepNextStates[state]; - else - state = kMatchNextStates[state]; - } else - state = p->opt[posPrev].state; - state = kLiteralNextStates[state]; - } else - state = p->opt[posPrev].state; - if (posPrev == cur - 1) { - if (IsShortRep(curOpt)) - state = kShortRepNextStates[state]; - else - state = kLiteralNextStates[state]; - } else { - UInt32 pos; - const COptimal* prevOpt; - if (curOpt->prev1IsChar && curOpt->prev2) { - posPrev = curOpt->posPrev2; - pos = curOpt->backPrev2; - state = kRepNextStates[state]; - } else { - pos = curOpt->backPrev; - if (pos < LZMA_NUM_REPS) - state = kRepNextStates[state]; - else - state = kMatchNextStates[state]; - } - prevOpt = &p->opt[posPrev]; - if (pos < LZMA_NUM_REPS) { - UInt32 i; - reps[0] = prevOpt->backs[pos]; - for (i = 1; i <= pos; i++) - reps[i] = prevOpt->backs[i - 1]; - for (; i < LZMA_NUM_REPS; i++) - reps[i] = prevOpt->backs[i]; - } else { - UInt32 i; - reps[0] = (pos - LZMA_NUM_REPS); - for (i = 1; i < LZMA_NUM_REPS; i++) - reps[i] = prevOpt->backs[i - 1]; - } - } - curOpt->state = (CState)state; - - curOpt->backs[0] = reps[0]; - curOpt->backs[1] = reps[1]; - curOpt->backs[2] = reps[2]; - curOpt->backs[3] = reps[3]; - - curPrice = curOpt->price; - nextIsChar = False; - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - curByte = *data; - matchByte = *(data - (reps[0] + 1)); - - posState = (position & p->pbMask); - - curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]); - { - const CLzmaProb* probs = LIT_PROBS(position, *(data - 1)); - curAnd1Price += - (!IsCharState(state) ? LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : LitEnc_GetPrice(probs, curByte, p->ProbPrices)); - } - - nextOpt = &p->opt[cur + 1]; - - if (curAnd1Price < nextOpt->price) { - nextOpt->price = curAnd1Price; - nextOpt->posPrev = cur; - MakeAsChar(nextOpt); - nextIsChar = True; - } - - matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]); - repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); - - if (matchByte == curByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0)) { - UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState); - if (shortRepPrice <= nextOpt->price) { - nextOpt->price = shortRepPrice; - nextOpt->posPrev = cur; - MakeAsShortRep(nextOpt); - nextIsChar = True; - } - } - numAvailFull = p->numAvail; - { - UInt32 temp = kNumOpts - 1 - cur; - if (temp < numAvailFull) - numAvailFull = temp; - } - - if (numAvailFull < 2) - continue; - numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); - - if (!nextIsChar && matchByte != curByte) /* speed optimization */ - { - /* try Literal + rep0 */ - UInt32 temp; - UInt32 lenTest2; - const Byte* data2 = data - (reps[0] + 1); - UInt32 limit = p->numFastBytes + 1; - if (limit > numAvailFull) - limit = numAvailFull; - - for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++) - ; - lenTest2 = temp - 1; - if (lenTest2 >= 2) { - UInt32 state2 = kLiteralNextStates[state]; - UInt32 posStateNext = (position + 1) & p->pbMask; - UInt32 nextRepMatchPrice = curAnd1Price + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); - /* for (; lenTest2 >= 2; lenTest2--) */ - { - UInt32 curAndLenPrice; - COptimal* opt; - UInt32 offset = cur + 1 + lenTest2; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); - opt = &p->opt[offset]; - if (curAndLenPrice < opt->price) { - opt->price = curAndLenPrice; - opt->posPrev = cur + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = False; - } - } - } - } - - startLen = 2; /* speed optimization */ - { - UInt32 repIndex; - for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++) { - UInt32 lenTest; - UInt32 lenTestTemp; - UInt32 price; - const Byte* data2 = data - (reps[repIndex] + 1); - if (data[0] != data2[0] || data[1] != data2[1]) - continue; - for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++) - ; - while (lenEnd < cur + lenTest) - p->opt[++lenEnd].price = kInfinityPrice; - lenTestTemp = lenTest; - price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState); - do { - UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][lenTest - 2]; - COptimal* opt = &p->opt[cur + lenTest]; - if (curAndLenPrice < opt->price) { - opt->price = curAndLenPrice; - opt->posPrev = cur; - opt->backPrev = repIndex; - opt->prev1IsChar = False; - } - } while (--lenTest >= 2); - lenTest = lenTestTemp; - - if (repIndex == 0) - startLen = lenTest + 1; - - /* if (_maxMode) */ - { - UInt32 lenTest2 = lenTest + 1; - UInt32 limit = lenTest2 + p->numFastBytes; - UInt32 nextRepMatchPrice; - if (limit > numAvailFull) - limit = numAvailFull; - for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++) - ; - lenTest2 -= lenTest + 1; - if (lenTest2 >= 2) { - UInt32 state2 = kRepNextStates[state]; - UInt32 posStateNext = (position + lenTest) & p->pbMask; - UInt32 curAndLenCharPrice = - price + p->repLenEnc.prices[posState][lenTest - 2] + - GET_PRICE_0(p->isMatch[state2][posStateNext]) + - LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), - data[lenTest], data2[lenTest], p->ProbPrices); - state2 = kLiteralNextStates[state2]; - posStateNext = (position + lenTest + 1) & p->pbMask; - nextRepMatchPrice = curAndLenCharPrice + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); - - /* for (; lenTest2 >= 2; lenTest2--) */ - { - UInt32 curAndLenPrice; - COptimal* opt; - UInt32 offset = cur + lenTest + 1 + lenTest2; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); - opt = &p->opt[offset]; - if (curAndLenPrice < opt->price) { - opt->price = curAndLenPrice; - opt->posPrev = cur + lenTest + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = True; - opt->posPrev2 = cur; - opt->backPrev2 = repIndex; - } - } - } - } - } - } - /* for (UInt32 lenTest = 2; lenTest <= newLen; lenTest++) */ - if (newLen > numAvail) { - newLen = numAvail; - for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2) - ; - matches[numPairs] = newLen; - numPairs += 2; - } - if (newLen >= startLen) { - UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); - UInt32 offs, curBack, posSlot; - UInt32 lenTest; - while (lenEnd < cur + newLen) - p->opt[++lenEnd].price = kInfinityPrice; - - offs = 0; - while (startLen > matches[offs]) - offs += 2; - curBack = matches[offs + 1]; - GetPosSlot2(curBack, posSlot); - for (lenTest = /*2*/ startLen;; lenTest++) { - UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN]; - UInt32 lenToPosState = GetLenToPosState(lenTest); - COptimal* opt; - if (curBack < kNumFullDistances) - curAndLenPrice += p->distancesPrices[lenToPosState][curBack]; - else - curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask]; - - opt = &p->opt[cur + lenTest]; - if (curAndLenPrice < opt->price) { - opt->price = curAndLenPrice; - opt->posPrev = cur; - opt->backPrev = curBack + LZMA_NUM_REPS; - opt->prev1IsChar = False; - } - - if (/*_maxMode && */ lenTest == matches[offs]) { - /* Try Match + Literal + Rep0 */ - const Byte* data2 = data - (curBack + 1); - UInt32 lenTest2 = lenTest + 1; - UInt32 limit = lenTest2 + p->numFastBytes; - UInt32 nextRepMatchPrice; - if (limit > numAvailFull) - limit = numAvailFull; - for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++) - ; - lenTest2 -= lenTest + 1; - if (lenTest2 >= 2) { - UInt32 state2 = kMatchNextStates[state]; - UInt32 posStateNext = (position + lenTest) & p->pbMask; - UInt32 curAndLenCharPrice = curAndLenPrice + - GET_PRICE_0(p->isMatch[state2][posStateNext]) + - LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), - data[lenTest], data2[lenTest], p->ProbPrices); - state2 = kLiteralNextStates[state2]; - posStateNext = (posStateNext + 1) & p->pbMask; - nextRepMatchPrice = curAndLenCharPrice + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); - - /* for (; lenTest2 >= 2; lenTest2--) */ - { - UInt32 offset = cur + lenTest + 1 + lenTest2; - UInt32 curAndLenPrice; - COptimal* opt; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); - opt = &p->opt[offset]; - if (curAndLenPrice < opt->price) { - opt->price = curAndLenPrice; - opt->posPrev = cur + lenTest + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = True; - opt->posPrev2 = cur; - opt->backPrev2 = curBack + LZMA_NUM_REPS; - } - } - } - offs += 2; - if (offs == numPairs) - break; - curBack = matches[offs + 1]; - if (curBack >= kNumFullDistances) - GetPosSlot2(curBack, posSlot); - } - } - } - } -} - -#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) - -static UInt32 GetOptimumFast(CLzmaEnc* p, UInt32* backRes) { - UInt32 numAvail, mainLen, mainDist, numPairs, repIndex, repLen, i; - const Byte* data; - const UInt32* matches; - - if (p->additionalOffset == 0) - mainLen = ReadMatchDistances(p, &numPairs); - else { - mainLen = p->longestMatchLength; - numPairs = p->numPairs; - } - - numAvail = p->numAvail; - *backRes = (UInt32)-1; - if (numAvail < 2) - return 1; - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - - repLen = repIndex = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) { - UInt32 len; - const Byte* data2 = data - (p->reps[i] + 1); - if (data[0] != data2[0] || data[1] != data2[1]) - continue; - for (len = 2; len < numAvail && data[len] == data2[len]; len++) - ; - if (len >= p->numFastBytes) { - *backRes = i; - MovePos(p, len - 1); - return len; - } - if (len > repLen) { - repIndex = i; - repLen = len; - } - } - - matches = p->matches; - if (mainLen >= p->numFastBytes) { - *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; - MovePos(p, mainLen - 1); - return mainLen; - } - - mainDist = 0; /* for GCC */ - if (mainLen >= 2) { - mainDist = matches[numPairs - 1]; - while (numPairs > 2 && mainLen == matches[numPairs - 4] + 1) { - if (!ChangePair(matches[numPairs - 3], mainDist)) - break; - numPairs -= 2; - mainLen = matches[numPairs - 2]; - mainDist = matches[numPairs - 1]; - } - if (mainLen == 2 && mainDist >= 0x80) - mainLen = 1; - } - - if (repLen >= 2 && ((repLen + 1 >= mainLen) || - (repLen + 2 >= mainLen && mainDist >= (1 << 9)) || - (repLen + 3 >= mainLen && mainDist >= (1 << 15)))) { - *backRes = repIndex; - MovePos(p, repLen - 1); - return repLen; - } - - if (mainLen < 2 || numAvail <= 2) - return 1; - - p->longestMatchLength = ReadMatchDistances(p, &p->numPairs); - if (p->longestMatchLength >= 2) { - UInt32 newDistance = matches[p->numPairs - 1]; - if ((p->longestMatchLength >= mainLen && newDistance < mainDist) || - (p->longestMatchLength == mainLen + 1 && !ChangePair(mainDist, newDistance)) || - (p->longestMatchLength > mainLen + 1) || - (p->longestMatchLength + 1 >= mainLen && mainLen >= 3 && ChangePair(newDistance, mainDist))) - return 1; - } - - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - for (i = 0; i < LZMA_NUM_REPS; i++) { - UInt32 len, limit; - const Byte* data2 = data - (p->reps[i] + 1); - if (data[0] != data2[0] || data[1] != data2[1]) - continue; - limit = mainLen - 1; - for (len = 2; len < limit && data[len] == data2[len]; len++) - ; - if (len >= limit) - return 1; - } - *backRes = mainDist + LZMA_NUM_REPS; - MovePos(p, mainLen - 2); - return mainLen; -} - -static void WriteEndMarker(CLzmaEnc* p, UInt32 posState) { - UInt32 len; - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); - p->state = kMatchNextStates[p->state]; - len = LZMA_MATCH_LEN_MIN; - LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); - RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1); - RangeEnc_EncodeDirectBits(&p->rc, (((UInt32)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits); - RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); -} - -static SRes CheckErrors(CLzmaEnc* p) { - if (p->result != SZ_OK) - return p->result; - if (p->rc.res != SZ_OK) - p->result = SZ_ERROR_WRITE; - if (p->matchFinderBase.result != SZ_OK) - p->result = SZ_ERROR_READ; - if (p->result != SZ_OK) - p->finished = True; - return p->result; -} - -static SRes Flush(CLzmaEnc* p, UInt32 nowPos) { - /* ReleaseMFStream(); */ - p->finished = True; - if (p->writeEndMark) - WriteEndMarker(p, nowPos & p->pbMask); - RangeEnc_FlushData(&p->rc); - RangeEnc_FlushStream(&p->rc); - return CheckErrors(p); -} - -static void FillAlignPrices(CLzmaEnc* p) { - UInt32 i; - for (i = 0; i < kAlignTableSize; i++) - p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); - p->alignPriceCount = 0; -} - -static void FillDistancesPrices(CLzmaEnc* p) { - UInt32 tempPrices[kNumFullDistances]; - UInt32 i, lenToPosState; - for (i = kStartPosModelIndex; i < kNumFullDistances; i++) { - UInt32 posSlot = GetPosSlot1(i); - UInt32 footerBits = ((posSlot >> 1) - 1); - UInt32 base = ((2 | (posSlot & 1)) << footerBits); - tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices); - } - - for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++) { - UInt32 posSlot; - const CLzmaProb* encoder = p->posSlotEncoder[lenToPosState]; - UInt32* posSlotPrices = p->posSlotPrices[lenToPosState]; - for (posSlot = 0; posSlot < p->distTableSize; posSlot++) - posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices); - for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++) - posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits); - - { - UInt32* distancesPrices = p->distancesPrices[lenToPosState]; - UInt32 i; - for (i = 0; i < kStartPosModelIndex; i++) - distancesPrices[i] = posSlotPrices[i]; - for (; i < kNumFullDistances; i++) - distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i]; - } - } - p->matchPriceCount = 0; -} - -void LzmaEnc_Construct(CLzmaEnc* p) { - RangeEnc_Construct(&p->rc); - MatchFinder_Construct(&p->matchFinderBase); -#ifdef COMPRESS_MF_MT - MatchFinderMt_Construct(&p->matchFinderMt); - p->matchFinderMt.MatchFinder = &p->matchFinderBase; -#endif - - { - CLzmaEncProps props; - LzmaEncProps_Init(&props); - LzmaEnc_SetProps(p, &props); - } - -#ifndef LZMA_LOG_BSR - LzmaEnc_FastPosInit(p->g_FastPos); -#endif - - LzmaEnc_InitPriceTables(p->ProbPrices); - p->litProbs = 0; - p->saveState.litProbs = 0; -} - -CLzmaEncHandle LzmaEnc_Create(ISzAlloc* alloc) { - void* p; - p = alloc->Alloc(alloc, sizeof(CLzmaEnc)); - if (p != 0) - LzmaEnc_Construct((CLzmaEnc*)p); - return p; -} - -void LzmaEnc_FreeLits(CLzmaEnc* p, ISzAlloc* alloc) { - alloc->Free(alloc, p->litProbs); - alloc->Free(alloc, p->saveState.litProbs); - p->litProbs = 0; - p->saveState.litProbs = 0; -} - -void LzmaEnc_Destruct(CLzmaEnc* p, ISzAlloc* alloc, ISzAlloc* allocBig) { -#ifdef COMPRESS_MF_MT - MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); -#endif - MatchFinder_Free(&p->matchFinderBase, allocBig); - LzmaEnc_FreeLits(p, alloc); - RangeEnc_Free(&p->rc, alloc); -} - -void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc* alloc, ISzAlloc* allocBig) { - LzmaEnc_Destruct((CLzmaEnc*)p, alloc, allocBig); - alloc->Free(alloc, p); -} - -static SRes LzmaEnc_CodeOneBlock(CLzmaEnc* p, Bool useLimits, UInt32 maxPackSize, UInt32 maxUnpackSize) { - UInt32 nowPos32, startPos32; - if (p->inStream != 0) { - p->matchFinderBase.stream = p->inStream; - p->matchFinder.Init(p->matchFinderObj); - p->inStream = 0; - } - - if (p->finished) - return p->result; - RINOK(CheckErrors(p)); - - nowPos32 = (UInt32)p->nowPos64; - startPos32 = nowPos32; - - if (p->nowPos64 == 0) { - UInt32 numPairs; - Byte curByte; - if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) - return Flush(p, nowPos32); - ReadMatchDistances(p, &numPairs); - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0); - p->state = kLiteralNextStates[p->state]; - curByte = p->matchFinder.GetIndexByte(p->matchFinderObj, 0 - p->additionalOffset); - LitEnc_Encode(&p->rc, p->litProbs, curByte); - p->additionalOffset--; - nowPos32++; - } - - if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) - for (;;) { - UInt32 pos, len, posState; - - if (p->fastMode) - len = GetOptimumFast(p, &pos); - else - len = GetOptimum(p, nowPos32, &pos); - -#ifdef SHOW_STAT2 - printf("\n pos = %4X, len = %d pos = %d", nowPos32, len, pos); -#endif - - posState = nowPos32 & p->pbMask; - if (len == 1 && pos == (UInt32)-1) { - Byte curByte; - CLzmaProb* probs; - const Byte* data; - - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0); - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; - curByte = *data; - probs = LIT_PROBS(nowPos32, *(data - 1)); - if (IsCharState(p->state)) - LitEnc_Encode(&p->rc, probs, curByte); - else - LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1)); - p->state = kLiteralNextStates[p->state]; - } else { - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); - if (pos < LZMA_NUM_REPS) { - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1); - if (pos == 0) { - RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0); - RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1)); - } else { - UInt32 distance = p->reps[pos]; - RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1); - if (pos == 1) - RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0); - else { - RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1); - RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2); - if (pos == 3) - p->reps[3] = p->reps[2]; - p->reps[2] = p->reps[1]; - } - p->reps[1] = p->reps[0]; - p->reps[0] = distance; - } - if (len == 1) - p->state = kShortRepNextStates[p->state]; - else { - LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); - p->state = kRepNextStates[p->state]; - } - } else { - UInt32 posSlot; - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); - p->state = kMatchNextStates[p->state]; - LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); - pos -= LZMA_NUM_REPS; - GetPosSlot(pos, posSlot); - RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot); - - if (posSlot >= kStartPosModelIndex) { - UInt32 footerBits = ((posSlot >> 1) - 1); - UInt32 base = ((2 | (posSlot & 1)) << footerBits); - UInt32 posReduced = pos - base; - - if (posSlot < kEndPosModelIndex) - RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced); - else { - RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); - RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); - p->alignPriceCount++; - } - } - p->reps[3] = p->reps[2]; - p->reps[2] = p->reps[1]; - p->reps[1] = p->reps[0]; - p->reps[0] = pos; - p->matchPriceCount++; - } - } - p->additionalOffset -= len; - nowPos32 += len; - if (p->additionalOffset == 0) { - UInt32 processed; - if (!p->fastMode) { - if (p->matchPriceCount >= (1 << 7)) - FillDistancesPrices(p); - if (p->alignPriceCount >= kAlignTableSize) - FillAlignPrices(p); - } - if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) - break; - processed = nowPos32 - startPos32; - if (useLimits) { - if (processed + kNumOpts + 300 >= maxUnpackSize || - RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize) - break; - } else if (processed >= (1 << 15)) { - p->nowPos64 += nowPos32 - startPos32; - return CheckErrors(p); - } - } - } - p->nowPos64 += nowPos32 - startPos32; - return Flush(p, nowPos32); -} - -#define kBigHashDicLimit ((UInt32)1 << 24) - -static SRes LzmaEnc_Alloc(CLzmaEnc* p, UInt32 keepWindowSize, ISzAlloc* alloc, ISzAlloc* allocBig) { - UInt32 beforeSize = kNumOpts; - Bool btMode; - if (!RangeEnc_Alloc(&p->rc, alloc)) - return SZ_ERROR_MEM; - btMode = (p->matchFinderBase.btMode != 0); -#ifdef COMPRESS_MF_MT - p->mtMode = (p->multiThread && !p->fastMode && btMode); -#endif - - { - unsigned lclp = p->lc + p->lp; - if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp) { - LzmaEnc_FreeLits(p, alloc); - p->litProbs = (CLzmaProb*)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb)); - p->saveState.litProbs = (CLzmaProb*)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb)); - if (p->litProbs == 0 || p->saveState.litProbs == 0) { - LzmaEnc_FreeLits(p, alloc); - return SZ_ERROR_MEM; - } - p->lclp = lclp; - } - } - - p->matchFinderBase.bigHash = (p->dictSize > kBigHashDicLimit); - - if (beforeSize + p->dictSize < keepWindowSize) - beforeSize = keepWindowSize - p->dictSize; - -#ifdef COMPRESS_MF_MT - if (p->mtMode) { - RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)); - p->matchFinderObj = &p->matchFinderMt; - MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); - } else -#endif - { - if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)) - return SZ_ERROR_MEM; - p->matchFinderObj = &p->matchFinderBase; - MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder); - } - return SZ_OK; -} - -void LzmaEnc_Init(CLzmaEnc* p) { - UInt32 i; - p->state = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) - p->reps[i] = 0; - - RangeEnc_Init(&p->rc); - - for (i = 0; i < kNumStates; i++) { - UInt32 j; - for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) { - p->isMatch[i][j] = kProbInitValue; - p->isRep0Long[i][j] = kProbInitValue; - } - p->isRep[i] = kProbInitValue; - p->isRepG0[i] = kProbInitValue; - p->isRepG1[i] = kProbInitValue; - p->isRepG2[i] = kProbInitValue; - } - - { - UInt32 num = 0x300 << (p->lp + p->lc); - for (i = 0; i < num; i++) - p->litProbs[i] = kProbInitValue; - } - - { - for (i = 0; i < kNumLenToPosStates; i++) { - CLzmaProb* probs = p->posSlotEncoder[i]; - UInt32 j; - for (j = 0; j < (1 << kNumPosSlotBits); j++) - probs[j] = kProbInitValue; - } - } - { - for (i = 0; i < kNumFullDistances - kEndPosModelIndex; i++) - p->posEncoders[i] = kProbInitValue; - } - - LenEnc_Init(&p->lenEnc.p); - LenEnc_Init(&p->repLenEnc.p); - - for (i = 0; i < (1 << kNumAlignBits); i++) - p->posAlignEncoder[i] = kProbInitValue; - - p->optimumEndIndex = 0; - p->optimumCurrentIndex = 0; - p->additionalOffset = 0; - - p->pbMask = (1 << p->pb) - 1; - p->lpMask = (1 << p->lp) - 1; -} - -void LzmaEnc_InitPrices(CLzmaEnc* p) { - if (!p->fastMode) { - FillDistancesPrices(p); - FillAlignPrices(p); - } - - p->lenEnc.tableSize = - p->repLenEnc.tableSize = - p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; - LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices); - LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices); -} - -static SRes LzmaEnc_AllocAndInit(CLzmaEnc* p, UInt32 keepWindowSize, ISzAlloc* alloc, ISzAlloc* allocBig) { - UInt32 i; - for (i = 0; i < (UInt32)kDicLogSizeMaxCompress; i++) - if (p->dictSize <= ((UInt32)1 << i)) - break; - p->distTableSize = i * 2; - - p->finished = False; - p->result = SZ_OK; - RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); - LzmaEnc_Init(p); - LzmaEnc_InitPrices(p); - p->nowPos64 = 0; - return SZ_OK; -} - -static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqInStream* inStream, ISeqOutStream* outStream, - ISzAlloc* alloc, ISzAlloc* allocBig) { - CLzmaEnc* p = (CLzmaEnc*)pp; - p->inStream = inStream; - p->rc.outStream = outStream; - return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); -} - -SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, - ISeqInStream* inStream, UInt32 keepWindowSize, - ISzAlloc* alloc, ISzAlloc* allocBig) { - CLzmaEnc* p = (CLzmaEnc*)pp; - p->inStream = inStream; - return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); -} - -static void LzmaEnc_SetInputBuf(CLzmaEnc* p, const Byte* src, SizeT srcLen) { - p->seqBufInStream.funcTable.Read = MyRead; - p->seqBufInStream.data = src; - p->seqBufInStream.rem = srcLen; -} - -SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte* src, SizeT srcLen, - UInt32 keepWindowSize, ISzAlloc* alloc, ISzAlloc* allocBig) { - CLzmaEnc* p = (CLzmaEnc*)pp; - LzmaEnc_SetInputBuf(p, src, srcLen); - p->inStream = &p->seqBufInStream.funcTable; - return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); -} - -void LzmaEnc_Finish(CLzmaEncHandle pp) { -#ifdef COMPRESS_MF_MT - CLzmaEnc* p = (CLzmaEnc*)pp; - if (p->mtMode) - MatchFinderMt_ReleaseStream(&p->matchFinderMt); -#else - pp = pp; -#endif -} - -typedef struct _CSeqOutStreamBuf { - ISeqOutStream funcTable; - Byte* data; - SizeT rem; - Bool overflow; -} CSeqOutStreamBuf; - -static size_t MyWrite(void* pp, const void* data, size_t size) { - CSeqOutStreamBuf* p = (CSeqOutStreamBuf*)pp; - if (p->rem < size) { - size = p->rem; - p->overflow = True; - } - memcpy(p->data, data, size); - p->rem -= size; - p->data += size; - return size; -} - -UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) { - const CLzmaEnc* p = (CLzmaEnc*)pp; - return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); -} - -const Byte* LzmaEnc_GetCurBuf(CLzmaEncHandle pp) { - const CLzmaEnc* p = (CLzmaEnc*)pp; - return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; -} - -SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit, - Byte* dest, size_t* destLen, UInt32 desiredPackSize, UInt32* unpackSize) { - CLzmaEnc* p = (CLzmaEnc*)pp; - UInt64 nowPos64; - SRes res; - CSeqOutStreamBuf outStream; - - outStream.funcTable.Write = MyWrite; - outStream.data = dest; - outStream.rem = *destLen; - outStream.overflow = False; - - p->writeEndMark = False; - p->finished = False; - p->result = SZ_OK; - - if (reInit) - LzmaEnc_Init(p); - LzmaEnc_InitPrices(p); - nowPos64 = p->nowPos64; - RangeEnc_Init(&p->rc); - p->rc.outStream = &outStream.funcTable; - - res = LzmaEnc_CodeOneBlock(p, True, desiredPackSize, *unpackSize); - - *unpackSize = (UInt32)(p->nowPos64 - nowPos64); - *destLen -= outStream.rem; - if (outStream.overflow) - return SZ_ERROR_OUTPUT_EOF; - - return res; -} - -SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream* outStream, ISeqInStream* inStream, ICompressProgress* progress, - ISzAlloc* alloc, ISzAlloc* allocBig) { - CLzmaEnc* p = (CLzmaEnc*)pp; - SRes res = SZ_OK; - -#ifdef COMPRESS_MF_MT - Byte allocaDummy[0x300]; - (void)allocaDummy; - int i = 0; - for (i = 0; i < 16; i++) - allocaDummy[i] = (Byte)i; -#endif - - RINOK(LzmaEnc_Prepare(pp, inStream, outStream, alloc, allocBig)); - - for (;;) { - res = LzmaEnc_CodeOneBlock(p, False, 0, 0); - if (res != SZ_OK || p->finished != 0) - break; - if (progress != 0) { - res = progress->Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); - if (res != SZ_OK) { - res = SZ_ERROR_PROGRESS; - break; - } - } - } - LzmaEnc_Finish(pp); - return res; -} - -SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte* props, SizeT* size) { - CLzmaEnc* p = (CLzmaEnc*)pp; - int i; - UInt32 dictSize = p->dictSize; - if (*size < LZMA_PROPS_SIZE) - return SZ_ERROR_PARAM; - *size = LZMA_PROPS_SIZE; - props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); - - for (i = 11; i <= 30; i++) { - if (dictSize <= ((UInt32)2 << i)) { - dictSize = (2 << i); - break; - } - if (dictSize <= ((UInt32)3 << i)) { - dictSize = (3 << i); - break; - } - } - - for (i = 0; i < 4; i++) - props[1 + i] = (Byte)(dictSize >> (8 * i)); - return SZ_OK; -} - -SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte* dest, SizeT* destLen, const Byte* src, SizeT srcLen, - int writeEndMark, ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig) { - SRes res; - CLzmaEnc* p = (CLzmaEnc*)pp; - - CSeqOutStreamBuf outStream; - - LzmaEnc_SetInputBuf(p, src, srcLen); - - outStream.funcTable.Write = MyWrite; - outStream.data = dest; - outStream.rem = *destLen; - outStream.overflow = False; - - p->writeEndMark = writeEndMark; - res = LzmaEnc_Encode(pp, &outStream.funcTable, &p->seqBufInStream.funcTable, - progress, alloc, allocBig); - - *destLen -= outStream.rem; - if (outStream.overflow) - return SZ_ERROR_OUTPUT_EOF; - return res; -} - -SRes LzmaEncode(Byte* dest, SizeT* destLen, const Byte* src, SizeT srcLen, - const CLzmaEncProps* props, Byte* propsEncoded, SizeT* propsSize, int writeEndMark, - ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig) { - CLzmaEnc* p = (CLzmaEnc*)LzmaEnc_Create(alloc); - SRes res; - if (p == 0) - return SZ_ERROR_MEM; - - res = LzmaEnc_SetProps(p, props); - if (res == SZ_OK) { - res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize); - if (res == SZ_OK) - res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen, - writeEndMark, progress, alloc, allocBig); - } - - LzmaEnc_Destroy(p, alloc, allocBig); - return res; -} -} +/* LzmaEnc.c -- LZMA Encoder +2008-10-04 : Igor Pavlov : Public domain */ + +#include + +/* #define SHOW_STAT */ +/* #define SHOW_STAT2 */ + +#if defined(SHOW_STAT) || defined(SHOW_STAT2) +#include +#endif + +#include "LzmaEnc.h" + +#include "LzFind.h" +#ifdef COMPRESS_MF_MT +#include "LzFindMt.h" +#endif + +namespace crnlib { + +#ifdef SHOW_STAT +static int ttt = 0; +#endif + +#define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1) + +#define kBlockSize (9 << 10) +#define kUnpackBlockSize (1 << 18) +#define kMatchArraySize (1 << 21) +#define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX) + +#define kNumMaxDirectBits (31) + +#define kNumTopBits 24 +#define kTopValue ((UInt32)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 +#define kProbInitValue (kBitModelTotal >> 1) + +#define kNumMoveReducingBits 4 +#define kNumBitPriceShiftBits 4 +#define kBitPrice (1 << kNumBitPriceShiftBits) + +void LzmaEncProps_Init(CLzmaEncProps* p) { + p->level = 5; + p->dictSize = p->mc = 0; + p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; + p->writeEndMark = 0; +} + +void LzmaEncProps_Normalize(CLzmaEncProps* p) { + int level = p->level; + if (level < 0) + level = 5; + p->level = level; + if (p->dictSize == 0) + p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26))); + if (p->lc < 0) + p->lc = 3; + if (p->lp < 0) + p->lp = 0; + if (p->pb < 0) + p->pb = 2; + if (p->algo < 0) + p->algo = (level < 5 ? 0 : 1); + if (p->fb < 0) + p->fb = (level < 7 ? 32 : 64); + if (p->btMode < 0) + p->btMode = (p->algo == 0 ? 0 : 1); + if (p->numHashBytes < 0) + p->numHashBytes = 4; + if (p->mc == 0) + p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); + if (p->numThreads < 0) + p->numThreads = ((p->btMode && p->algo) ? 2 : 1); +} + +UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps* props2) { + CLzmaEncProps props = *props2; + LzmaEncProps_Normalize(&props); + return props.dictSize; +} + +/* #define LZMA_LOG_BSR */ +/* Define it for Intel's CPU */ + +#ifdef LZMA_LOG_BSR + +#define kDicLogSizeMaxCompress 30 + +#define BSR2_RET(pos, res) \ + { \ + unsigned long i; \ + _BitScanReverse(&i, (pos)); \ + res = (i + i) + ((pos >> (i - 1)) & 1); \ + } + +UInt32 GetPosSlot1(UInt32 pos) { + UInt32 res; + BSR2_RET(pos, res); + return res; +} +#define GetPosSlot2(pos, res) \ + { BSR2_RET(pos, res); } +#define GetPosSlot(pos, res) \ + { \ + if (pos < 2) \ + res = pos; \ + else \ + BSR2_RET(pos, res); \ + } + +#else + +#define kNumLogBits (9 + (int)sizeof(size_t) / 2) +#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) + +void LzmaEnc_FastPosInit(Byte* g_FastPos) { + int c = 2, slotFast; + g_FastPos[0] = 0; + g_FastPos[1] = 1; + + for (slotFast = 2; slotFast < kNumLogBits * 2; slotFast++) { + UInt32 k = (1 << ((slotFast >> 1) - 1)); + UInt32 j; + for (j = 0; j < k; j++, c++) + g_FastPos[c] = (Byte)slotFast; + } +} + +#define BSR2_RET(pos, res) \ + { \ + UInt32 i = 6 + ((kNumLogBits - 1) & \ + (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ + res = p->g_FastPos[pos >> i] + (i * 2); \ + } +/* +#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ + p->g_FastPos[pos >> 6] + 12 : \ + p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } +*/ + +#define GetPosSlot1(pos) p->g_FastPos[pos] +#define GetPosSlot2(pos, res) \ + { BSR2_RET(pos, res); } +#define GetPosSlot(pos, res) \ + { \ + if (pos < kNumFullDistances) \ + res = p->g_FastPos[pos]; \ + else \ + BSR2_RET(pos, res); \ + } + +#endif + +#define LZMA_NUM_REPS 4 + +typedef unsigned CState; + +typedef struct _COptimal { + UInt32 price; + + CState state; + int prev1IsChar; + int prev2; + + UInt32 posPrev2; + UInt32 backPrev2; + + UInt32 posPrev; + UInt32 backPrev; + UInt32 backs[LZMA_NUM_REPS]; +} COptimal; + +#define kNumOpts (1 << 12) + +#define kNumLenToPosStates 4 +#define kNumPosSlotBits 6 +#define kDicLogSizeMin 0 +#define kDicLogSizeMax 32 +#define kDistTableSizeMax (kDicLogSizeMax * 2) + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) +#define kAlignMask (kAlignTableSize - 1) + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex) + +#define kNumFullDistances (1 << (kEndPosModelIndex / 2)) + +#ifdef _LZMA_PROB32 +#define CLzmaProb UInt32 +#else +#define CLzmaProb UInt16 +#endif + +#define LZMA_PB_MAX 4 +#define LZMA_LC_MAX 8 +#define LZMA_LP_MAX 4 + +#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumMidBits 3 +#define kLenNumMidSymbols (1 << kLenNumMidBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) + +#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) + +#define LZMA_MATCH_LEN_MIN 2 +#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) + +#define kNumStates 12 + +typedef struct +{ + CLzmaProb choice; + CLzmaProb choice2; + CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits]; + CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits]; + CLzmaProb high[kLenNumHighSymbols]; +} CLenEnc; + +typedef struct +{ + CLenEnc p; + UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; + UInt32 tableSize; + UInt32 counters[LZMA_NUM_PB_STATES_MAX]; +} CLenPriceEnc; + +typedef struct _CRangeEnc { + UInt32 range; + Byte cache; + UInt64 low; + UInt64 cacheSize; + Byte* buf; + Byte* bufLim; + Byte* bufBase; + ISeqOutStream* outStream; + UInt64 processed; + SRes res; +} CRangeEnc; + +typedef struct _CSeqInStreamBuf { + ISeqInStream funcTable; + const Byte* data; + SizeT rem; +} CSeqInStreamBuf; + +static SRes MyRead(void* pp, void* data, size_t* size) { + size_t curSize = *size; + CSeqInStreamBuf* p = (CSeqInStreamBuf*)pp; + if (p->rem < curSize) + curSize = p->rem; + memcpy(data, p->data, curSize); + p->rem -= curSize; + p->data += curSize; + *size = curSize; + return SZ_OK; +} + +typedef struct +{ + CLzmaProb* litProbs; + + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb isRep[kNumStates]; + CLzmaProb isRepG0[kNumStates]; + CLzmaProb isRepG1[kNumStates]; + CLzmaProb isRepG2[kNumStates]; + CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; + + CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; + CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + + CLenPriceEnc lenEnc; + CLenPriceEnc repLenEnc; + + UInt32 reps[LZMA_NUM_REPS]; + UInt32 state; +} CSaveState; + +typedef struct _CLzmaEnc { + IMatchFinder matchFinder; + void* matchFinderObj; + +#ifdef COMPRESS_MF_MT + Bool mtMode; + CMatchFinderMt matchFinderMt; +#endif + + CMatchFinder matchFinderBase; + +#ifdef COMPRESS_MF_MT + Byte pad[128]; +#endif + + UInt32 optimumEndIndex; + UInt32 optimumCurrentIndex; + + UInt32 longestMatchLength; + UInt32 numPairs; + UInt32 numAvail; + COptimal opt[kNumOpts]; + +#ifndef LZMA_LOG_BSR + Byte g_FastPos[1 << kNumLogBits]; +#endif + + UInt32 ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; + UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; + UInt32 numFastBytes; + UInt32 additionalOffset; + UInt32 reps[LZMA_NUM_REPS]; + UInt32 state; + + UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; + UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; + UInt32 alignPrices[kAlignTableSize]; + UInt32 alignPriceCount; + + UInt32 distTableSize; + + unsigned lc, lp, pb; + unsigned lpMask, pbMask; + + CLzmaProb* litProbs; + + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb isRep[kNumStates]; + CLzmaProb isRepG0[kNumStates]; + CLzmaProb isRepG1[kNumStates]; + CLzmaProb isRepG2[kNumStates]; + CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; + + CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; + CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + + CLenPriceEnc lenEnc; + CLenPriceEnc repLenEnc; + + unsigned lclp; + + Bool fastMode; + + CRangeEnc rc; + + Bool writeEndMark; + UInt64 nowPos64; + UInt32 matchPriceCount; + Bool finished; + Bool multiThread; + + SRes result; + UInt32 dictSize; + UInt32 matchFinderCycles; + + ISeqInStream* inStream; + CSeqInStreamBuf seqBufInStream; + + CSaveState saveState; +} CLzmaEnc; + +void LzmaEnc_SaveState(CLzmaEncHandle pp) { + CLzmaEnc* p = (CLzmaEnc*)pp; + CSaveState* dest = &p->saveState; + int i; + dest->lenEnc = p->lenEnc; + dest->repLenEnc = p->repLenEnc; + dest->state = p->state; + + for (i = 0; i < kNumStates; i++) { + memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); + memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); + } + for (i = 0; i < kNumLenToPosStates; i++) + memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); + memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); + memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); + memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); + memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); + memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); + memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); + memcpy(dest->reps, p->reps, sizeof(p->reps)); + memcpy(dest->litProbs, p->litProbs, (0x300 << p->lclp) * sizeof(CLzmaProb)); +} + +void LzmaEnc_RestoreState(CLzmaEncHandle pp) { + CLzmaEnc* dest = (CLzmaEnc*)pp; + const CSaveState* p = &dest->saveState; + int i; + dest->lenEnc = p->lenEnc; + dest->repLenEnc = p->repLenEnc; + dest->state = p->state; + + for (i = 0; i < kNumStates; i++) { + memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); + memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); + } + for (i = 0; i < kNumLenToPosStates; i++) + memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); + memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); + memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); + memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); + memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); + memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); + memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); + memcpy(dest->reps, p->reps, sizeof(p->reps)); + memcpy(dest->litProbs, p->litProbs, (0x300 << dest->lclp) * sizeof(CLzmaProb)); +} + +SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps* props2) { + CLzmaEnc* p = (CLzmaEnc*)pp; + CLzmaEncProps props = *props2; + LzmaEncProps_Normalize(&props); + + if (props.lc > LZMA_LC_MAX || + props.lp > LZMA_LP_MAX || + props.pb > LZMA_PB_MAX || + props.dictSize > (1U << kDicLogSizeMaxCompress) || + props.dictSize > (1 << 30)) + return SZ_ERROR_PARAM; + p->dictSize = props.dictSize; + p->matchFinderCycles = props.mc; + { + unsigned fb = props.fb; + if (fb < 5) + fb = 5; + if (fb > LZMA_MATCH_LEN_MAX) + fb = LZMA_MATCH_LEN_MAX; + p->numFastBytes = fb; + } + p->lc = props.lc; + p->lp = props.lp; + p->pb = props.pb; + p->fastMode = (props.algo == 0); + p->matchFinderBase.btMode = props.btMode; + { + UInt32 numHashBytes = 4; + if (props.btMode) { + if (props.numHashBytes < 2) + numHashBytes = 2; + else if (props.numHashBytes < 4) + numHashBytes = props.numHashBytes; + } + p->matchFinderBase.numHashBytes = numHashBytes; + } + + p->matchFinderBase.cutValue = props.mc; + + p->writeEndMark = props.writeEndMark; + +#ifdef COMPRESS_MF_MT + /* + if (newMultiThread != _multiThread) + { + ReleaseMatchFinder(); + _multiThread = newMultiThread; + } + */ + p->multiThread = (props.numThreads > 1); +#endif + + return SZ_OK; +} + +static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; +static const int kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; +static const int kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; +static const int kShortRepNextStates[kNumStates] = {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; + +#define IsCharState(s) ((s) < 7) + +#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len)-2 : kNumLenToPosStates - 1) + +#define kInfinityPrice (1 << 30) + +static void RangeEnc_Construct(CRangeEnc* p) { + p->outStream = 0; + p->bufBase = 0; +} + +#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) + +#define RC_BUF_SIZE (1 << 16) +static int RangeEnc_Alloc(CRangeEnc* p, ISzAlloc* alloc) { + if (p->bufBase == 0) { + p->bufBase = (Byte*)alloc->Alloc(alloc, RC_BUF_SIZE); + if (p->bufBase == 0) + return 0; + p->bufLim = p->bufBase + RC_BUF_SIZE; + } + return 1; +} + +static void RangeEnc_Free(CRangeEnc* p, ISzAlloc* alloc) { + alloc->Free(alloc, p->bufBase); + p->bufBase = 0; +} + +static void RangeEnc_Init(CRangeEnc* p) { + /* Stream.Init(); */ + p->low = 0; + p->range = 0xFFFFFFFF; + p->cacheSize = 1; + p->cache = 0; + + p->buf = p->bufBase; + + p->processed = 0; + p->res = SZ_OK; +} + +static void RangeEnc_FlushStream(CRangeEnc* p) { + size_t num; + if (p->res != SZ_OK) + return; + num = p->buf - p->bufBase; + if (num != p->outStream->Write(p->outStream, p->bufBase, num)) + p->res = SZ_ERROR_WRITE; + p->processed += num; + p->buf = p->bufBase; +} + +static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc* p) { + if ((UInt32)p->low < (UInt32)0xFF000000 || (int)(p->low >> 32) != 0) { + Byte temp = p->cache; + do { + Byte* buf = p->buf; + *buf++ = (Byte)(temp + (Byte)(p->low >> 32)); + p->buf = buf; + if (buf == p->bufLim) + RangeEnc_FlushStream(p); + temp = 0xFF; + } while (--p->cacheSize != 0); + p->cache = (Byte)((UInt32)p->low >> 24); + } + p->cacheSize++; + p->low = (UInt32)p->low << 8; +} + +static void RangeEnc_FlushData(CRangeEnc* p) { + int i; + for (i = 0; i < 5; i++) + RangeEnc_ShiftLow(p); +} + +static void RangeEnc_EncodeDirectBits(CRangeEnc* p, UInt32 value, int numBits) { + do { + p->range >>= 1; + p->low += p->range & (0 - ((value >> --numBits) & 1)); + if (p->range < kTopValue) { + p->range <<= 8; + RangeEnc_ShiftLow(p); + } + } while (numBits != 0); +} + +static void RangeEnc_EncodeBit(CRangeEnc* p, CLzmaProb* prob, UInt32 symbol) { + UInt32 ttt = *prob; + UInt32 newBound = (p->range >> kNumBitModelTotalBits) * ttt; + if (symbol == 0) { + p->range = newBound; + ttt += (kBitModelTotal - ttt) >> kNumMoveBits; + } else { + p->low += newBound; + p->range -= newBound; + ttt -= ttt >> kNumMoveBits; + } + *prob = (CLzmaProb)ttt; + if (p->range < kTopValue) { + p->range <<= 8; + RangeEnc_ShiftLow(p); + } +} + +static void LitEnc_Encode(CRangeEnc* p, CLzmaProb* probs, UInt32 symbol) { + symbol |= 0x100; + do { + RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1); + symbol <<= 1; + } while (symbol < 0x10000); +} + +static void LitEnc_EncodeMatched(CRangeEnc* p, CLzmaProb* probs, UInt32 symbol, UInt32 matchByte) { + UInt32 offs = 0x100; + symbol |= 0x100; + do { + matchByte <<= 1; + RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1); + symbol <<= 1; + offs &= ~(matchByte ^ symbol); + } while (symbol < 0x10000); +} + +void LzmaEnc_InitPriceTables(UInt32* ProbPrices) { + UInt32 i; + for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits)) { + const int kCyclesBits = kNumBitPriceShiftBits; + UInt32 w = i; + UInt32 bitCount = 0; + int j; + for (j = 0; j < kCyclesBits; j++) { + w = w * w; + bitCount <<= 1; + while (w >= ((UInt32)1 << 16)) { + w >>= 1; + bitCount++; + } + } + ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); + } +} + +#define GET_PRICE(prob, symbol) \ + p->ProbPrices[((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + +#define GET_PRICEa(prob, symbol) \ + ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + +#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + +#define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + +static UInt32 LitEnc_GetPrice(const CLzmaProb* probs, UInt32 symbol, UInt32* ProbPrices) { + UInt32 price = 0; + symbol |= 0x100; + do { + price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1); + symbol <<= 1; + } while (symbol < 0x10000); + return price; +} + +static UInt32 LitEnc_GetPriceMatched(const CLzmaProb* probs, UInt32 symbol, UInt32 matchByte, UInt32* ProbPrices) { + UInt32 price = 0; + UInt32 offs = 0x100; + symbol |= 0x100; + do { + matchByte <<= 1; + price += GET_PRICEa(probs[offs + (matchByte & offs) + (symbol >> 8)], (symbol >> 7) & 1); + symbol <<= 1; + offs &= ~(matchByte ^ symbol); + } while (symbol < 0x10000); + return price; +} + +static void RcTree_Encode(CRangeEnc* rc, CLzmaProb* probs, int numBitLevels, UInt32 symbol) { + UInt32 m = 1; + int i; + for (i = numBitLevels; i != 0;) { + UInt32 bit; + i--; + bit = (symbol >> i) & 1; + RangeEnc_EncodeBit(rc, probs + m, bit); + m = (m << 1) | bit; + } +} + +static void RcTree_ReverseEncode(CRangeEnc* rc, CLzmaProb* probs, int numBitLevels, UInt32 symbol) { + UInt32 m = 1; + int i; + for (i = 0; i < numBitLevels; i++) { + UInt32 bit = symbol & 1; + RangeEnc_EncodeBit(rc, probs + m, bit); + m = (m << 1) | bit; + symbol >>= 1; + } +} + +static UInt32 RcTree_GetPrice(const CLzmaProb* probs, int numBitLevels, UInt32 symbol, UInt32* ProbPrices) { + UInt32 price = 0; + symbol |= (1 << numBitLevels); + while (symbol != 1) { + price += GET_PRICEa(probs[symbol >> 1], symbol & 1); + symbol >>= 1; + } + return price; +} + +static UInt32 RcTree_ReverseGetPrice(const CLzmaProb* probs, int numBitLevels, UInt32 symbol, UInt32* ProbPrices) { + UInt32 price = 0; + UInt32 m = 1; + int i; + for (i = numBitLevels; i != 0; i--) { + UInt32 bit = symbol & 1; + symbol >>= 1; + price += GET_PRICEa(probs[m], bit); + m = (m << 1) | bit; + } + return price; +} + +static void LenEnc_Init(CLenEnc* p) { + unsigned i; + p->choice = p->choice2 = kProbInitValue; + for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++) + p->low[i] = kProbInitValue; + for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++) + p->mid[i] = kProbInitValue; + for (i = 0; i < kLenNumHighSymbols; i++) + p->high[i] = kProbInitValue; +} + +static void LenEnc_Encode(CLenEnc* p, CRangeEnc* rc, UInt32 symbol, UInt32 posState) { + if (symbol < kLenNumLowSymbols) { + RangeEnc_EncodeBit(rc, &p->choice, 0); + RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol); + } else { + RangeEnc_EncodeBit(rc, &p->choice, 1); + if (symbol < kLenNumLowSymbols + kLenNumMidSymbols) { + RangeEnc_EncodeBit(rc, &p->choice2, 0); + RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols); + } else { + RangeEnc_EncodeBit(rc, &p->choice2, 1); + RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols); + } + } +} + +static void LenEnc_SetPrices(CLenEnc* p, UInt32 posState, UInt32 numSymbols, UInt32* prices, UInt32* ProbPrices) { + UInt32 a0 = GET_PRICE_0a(p->choice); + UInt32 a1 = GET_PRICE_1a(p->choice); + UInt32 b0 = a1 + GET_PRICE_0a(p->choice2); + UInt32 b1 = a1 + GET_PRICE_1a(p->choice2); + UInt32 i = 0; + for (i = 0; i < kLenNumLowSymbols; i++) { + if (i >= numSymbols) + return; + prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices); + } + for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++) { + if (i >= numSymbols) + return; + prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices); + } + for (; i < numSymbols; i++) + prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices); +} + +static void MY_FAST_CALL LenPriceEnc_UpdateTable(CLenPriceEnc* p, UInt32 posState, UInt32* ProbPrices) { + LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices); + p->counters[posState] = p->tableSize; +} + +static void LenPriceEnc_UpdateTables(CLenPriceEnc* p, UInt32 numPosStates, UInt32* ProbPrices) { + UInt32 posState; + for (posState = 0; posState < numPosStates; posState++) + LenPriceEnc_UpdateTable(p, posState, ProbPrices); +} + +static void LenEnc_Encode2(CLenPriceEnc* p, CRangeEnc* rc, UInt32 symbol, UInt32 posState, Bool updatePrice, UInt32* ProbPrices) { + LenEnc_Encode(&p->p, rc, symbol, posState); + if (updatePrice) + if (--p->counters[posState] == 0) + LenPriceEnc_UpdateTable(p, posState, ProbPrices); +} + +static void MovePos(CLzmaEnc* p, UInt32 num) { +#ifdef SHOW_STAT + ttt += num; + printf("\n MovePos %d", num); +#endif + if (num != 0) { + p->additionalOffset += num; + p->matchFinder.Skip(p->matchFinderObj, num); + } +} + +static UInt32 ReadMatchDistances(CLzmaEnc* p, UInt32* numDistancePairsRes) { + UInt32 lenRes = 0, numPairs; + p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); + numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); +#ifdef SHOW_STAT + printf("\n i = %d numPairs = %d ", ttt, numPairs / 2); + ttt++; + { + UInt32 i; + for (i = 0; i < numPairs; i += 2) + printf("%2d %6d | ", p->matches[i], p->matches[i + 1]); + } +#endif + if (numPairs > 0) { + lenRes = p->matches[numPairs - 2]; + if (lenRes == p->numFastBytes) { + const Byte* pby = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + UInt32 distance = p->matches[numPairs - 1] + 1; + UInt32 numAvail = p->numAvail; + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + { + const Byte* pby2 = pby - distance; + for (; lenRes < numAvail && pby[lenRes] == pby2[lenRes]; lenRes++) + ; + } + } + } + p->additionalOffset++; + *numDistancePairsRes = numPairs; + return lenRes; +} + +#define MakeAsChar(p) \ + (p)->backPrev = (UInt32)(-1); \ + (p)->prev1IsChar = False; +#define MakeAsShortRep(p) \ + (p)->backPrev = 0; \ + (p)->prev1IsChar = False; +#define IsShortRep(p) ((p)->backPrev == 0) + +static UInt32 GetRepLen1Price(CLzmaEnc* p, UInt32 state, UInt32 posState) { + return GET_PRICE_0(p->isRepG0[state]) + + GET_PRICE_0(p->isRep0Long[state][posState]); +} + +static UInt32 GetPureRepPrice(CLzmaEnc* p, UInt32 repIndex, UInt32 state, UInt32 posState) { + UInt32 price; + if (repIndex == 0) { + price = GET_PRICE_0(p->isRepG0[state]); + price += GET_PRICE_1(p->isRep0Long[state][posState]); + } else { + price = GET_PRICE_1(p->isRepG0[state]); + if (repIndex == 1) + price += GET_PRICE_0(p->isRepG1[state]); + else { + price += GET_PRICE_1(p->isRepG1[state]); + price += GET_PRICE(p->isRepG2[state], repIndex - 2); + } + } + return price; +} + +static UInt32 GetRepPrice(CLzmaEnc* p, UInt32 repIndex, UInt32 len, UInt32 state, UInt32 posState) { + return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] + + GetPureRepPrice(p, repIndex, state, posState); +} + +static UInt32 Backward(CLzmaEnc* p, UInt32* backRes, UInt32 cur) { + UInt32 posMem = p->opt[cur].posPrev; + UInt32 backMem = p->opt[cur].backPrev; + p->optimumEndIndex = cur; + do { + if (p->opt[cur].prev1IsChar) { + MakeAsChar(&p->opt[posMem]) + p->opt[posMem] + .posPrev = posMem - 1; + if (p->opt[cur].prev2) { + p->opt[posMem - 1].prev1IsChar = False; + p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2; + p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2; + } + } + { + UInt32 posPrev = posMem; + UInt32 backCur = backMem; + + backMem = p->opt[posPrev].backPrev; + posMem = p->opt[posPrev].posPrev; + + p->opt[posPrev].backPrev = backCur; + p->opt[posPrev].posPrev = cur; + cur = posPrev; + } + } while (cur != 0); + *backRes = p->opt[0].backPrev; + p->optimumCurrentIndex = p->opt[0].posPrev; + return p->optimumCurrentIndex; +} + +#define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos)&p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * 0x300) + +static UInt32 GetOptimum(CLzmaEnc* p, UInt32 position, UInt32* backRes) { + UInt32 numAvail, mainLen, numPairs, repMaxIndex, i, posState, lenEnd, len, cur; + UInt32 matchPrice, repMatchPrice, normalMatchPrice; + UInt32 reps[LZMA_NUM_REPS], repLens[LZMA_NUM_REPS]; + UInt32* matches; + const Byte* data; + Byte curByte, matchByte; + if (p->optimumEndIndex != p->optimumCurrentIndex) { + const COptimal* opt = &p->opt[p->optimumCurrentIndex]; + UInt32 lenRes = opt->posPrev - p->optimumCurrentIndex; + *backRes = opt->backPrev; + p->optimumCurrentIndex = opt->posPrev; + return lenRes; + } + p->optimumCurrentIndex = p->optimumEndIndex = 0; + + if (p->additionalOffset == 0) + mainLen = ReadMatchDistances(p, &numPairs); + else { + mainLen = p->longestMatchLength; + numPairs = p->numPairs; + } + + numAvail = p->numAvail; + if (numAvail < 2) { + *backRes = (UInt32)(-1); + return 1; + } + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + repMaxIndex = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) { + UInt32 lenTest; + const Byte* data2; + reps[i] = p->reps[i]; + data2 = data - (reps[i] + 1); + if (data[0] != data2[0] || data[1] != data2[1]) { + repLens[i] = 0; + continue; + } + for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++) + ; + repLens[i] = lenTest; + if (lenTest > repLens[repMaxIndex]) + repMaxIndex = i; + } + if (repLens[repMaxIndex] >= p->numFastBytes) { + UInt32 lenRes; + *backRes = repMaxIndex; + lenRes = repLens[repMaxIndex]; + MovePos(p, lenRes - 1); + return lenRes; + } + + matches = p->matches; + if (mainLen >= p->numFastBytes) { + *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; + MovePos(p, mainLen - 1); + return mainLen; + } + curByte = *data; + matchByte = *(data - (reps[0] + 1)); + + if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2) { + *backRes = (UInt32)-1; + return 1; + } + + p->opt[0].state = (CState)p->state; + + posState = (position & p->pbMask); + + { + const CLzmaProb* probs = LIT_PROBS(position, *(data - 1)); + p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + + (!IsCharState(p->state) ? LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + } + + MakeAsChar(&p->opt[1]); + + matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); + repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); + + if (matchByte == curByte) { + UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState); + if (shortRepPrice < p->opt[1].price) { + p->opt[1].price = shortRepPrice; + MakeAsShortRep(&p->opt[1]); + } + } + lenEnd = ((mainLen >= repLens[repMaxIndex]) ? mainLen : repLens[repMaxIndex]); + + if (lenEnd < 2) { + *backRes = p->opt[1].backPrev; + return 1; + } + + p->opt[1].posPrev = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) + p->opt[0].backs[i] = reps[i]; + + len = lenEnd; + do + p->opt[len--].price = kInfinityPrice; + while (len >= 2); + + for (i = 0; i < LZMA_NUM_REPS; i++) { + UInt32 repLen = repLens[i]; + UInt32 price; + if (repLen < 2) + continue; + price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState); + do { + UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2]; + COptimal* opt = &p->opt[repLen]; + if (curAndLenPrice < opt->price) { + opt->price = curAndLenPrice; + opt->posPrev = 0; + opt->backPrev = i; + opt->prev1IsChar = False; + } + } while (--repLen >= 2); + } + + normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); + + len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2); + if (len <= mainLen) { + UInt32 offs = 0; + while (len > matches[offs]) + offs += 2; + for (;; len++) { + COptimal* opt; + UInt32 distance = matches[offs + 1]; + + UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN]; + UInt32 lenToPosState = GetLenToPosState(len); + if (distance < kNumFullDistances) + curAndLenPrice += p->distancesPrices[lenToPosState][distance]; + else { + UInt32 slot; + GetPosSlot2(distance, slot); + curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot]; + } + opt = &p->opt[len]; + if (curAndLenPrice < opt->price) { + opt->price = curAndLenPrice; + opt->posPrev = 0; + opt->backPrev = distance + LZMA_NUM_REPS; + opt->prev1IsChar = False; + } + if (len == matches[offs]) { + offs += 2; + if (offs == numPairs) + break; + } + } + } + + cur = 0; + +#ifdef SHOW_STAT2 + if (position >= 0) { + unsigned i; + printf("\n pos = %4X", position); + for (i = cur; i <= lenEnd; i++) + printf("\nprice[%4X] = %d", position - cur + i, p->opt[i].price); + } +#endif + + for (;;) { + UInt32 numAvailFull, newLen, numPairs, posPrev, state, posState, startLen; + UInt32 curPrice, curAnd1Price, matchPrice, repMatchPrice; + Bool nextIsChar; + Byte curByte, matchByte; + const Byte* data; + COptimal* curOpt; + COptimal* nextOpt; + + cur++; + if (cur == lenEnd) + return Backward(p, backRes, cur); + + newLen = ReadMatchDistances(p, &numPairs); + if (newLen >= p->numFastBytes) { + p->numPairs = numPairs; + p->longestMatchLength = newLen; + return Backward(p, backRes, cur); + } + position++; + curOpt = &p->opt[cur]; + posPrev = curOpt->posPrev; + if (curOpt->prev1IsChar) { + posPrev--; + if (curOpt->prev2) { + state = p->opt[curOpt->posPrev2].state; + if (curOpt->backPrev2 < LZMA_NUM_REPS) + state = kRepNextStates[state]; + else + state = kMatchNextStates[state]; + } else + state = p->opt[posPrev].state; + state = kLiteralNextStates[state]; + } else + state = p->opt[posPrev].state; + if (posPrev == cur - 1) { + if (IsShortRep(curOpt)) + state = kShortRepNextStates[state]; + else + state = kLiteralNextStates[state]; + } else { + UInt32 pos; + const COptimal* prevOpt; + if (curOpt->prev1IsChar && curOpt->prev2) { + posPrev = curOpt->posPrev2; + pos = curOpt->backPrev2; + state = kRepNextStates[state]; + } else { + pos = curOpt->backPrev; + if (pos < LZMA_NUM_REPS) + state = kRepNextStates[state]; + else + state = kMatchNextStates[state]; + } + prevOpt = &p->opt[posPrev]; + if (pos < LZMA_NUM_REPS) { + UInt32 i; + reps[0] = prevOpt->backs[pos]; + for (i = 1; i <= pos; i++) + reps[i] = prevOpt->backs[i - 1]; + for (; i < LZMA_NUM_REPS; i++) + reps[i] = prevOpt->backs[i]; + } else { + UInt32 i; + reps[0] = (pos - LZMA_NUM_REPS); + for (i = 1; i < LZMA_NUM_REPS; i++) + reps[i] = prevOpt->backs[i - 1]; + } + } + curOpt->state = (CState)state; + + curOpt->backs[0] = reps[0]; + curOpt->backs[1] = reps[1]; + curOpt->backs[2] = reps[2]; + curOpt->backs[3] = reps[3]; + + curPrice = curOpt->price; + nextIsChar = False; + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + curByte = *data; + matchByte = *(data - (reps[0] + 1)); + + posState = (position & p->pbMask); + + curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]); + { + const CLzmaProb* probs = LIT_PROBS(position, *(data - 1)); + curAnd1Price += + (!IsCharState(state) ? LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + } + + nextOpt = &p->opt[cur + 1]; + + if (curAnd1Price < nextOpt->price) { + nextOpt->price = curAnd1Price; + nextOpt->posPrev = cur; + MakeAsChar(nextOpt); + nextIsChar = True; + } + + matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]); + repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); + + if (matchByte == curByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0)) { + UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState); + if (shortRepPrice <= nextOpt->price) { + nextOpt->price = shortRepPrice; + nextOpt->posPrev = cur; + MakeAsShortRep(nextOpt); + nextIsChar = True; + } + } + numAvailFull = p->numAvail; + { + UInt32 temp = kNumOpts - 1 - cur; + if (temp < numAvailFull) + numAvailFull = temp; + } + + if (numAvailFull < 2) + continue; + numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); + + if (!nextIsChar && matchByte != curByte) /* speed optimization */ + { + /* try Literal + rep0 */ + UInt32 temp; + UInt32 lenTest2; + const Byte* data2 = data - (reps[0] + 1); + UInt32 limit = p->numFastBytes + 1; + if (limit > numAvailFull) + limit = numAvailFull; + + for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++) + ; + lenTest2 = temp - 1; + if (lenTest2 >= 2) { + UInt32 state2 = kLiteralNextStates[state]; + UInt32 posStateNext = (position + 1) & p->pbMask; + UInt32 nextRepMatchPrice = curAnd1Price + + GET_PRICE_1(p->isMatch[state2][posStateNext]) + + GET_PRICE_1(p->isRep[state2]); + /* for (; lenTest2 >= 2; lenTest2--) */ + { + UInt32 curAndLenPrice; + COptimal* opt; + UInt32 offset = cur + 1 + lenTest2; + while (lenEnd < offset) + p->opt[++lenEnd].price = kInfinityPrice; + curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); + opt = &p->opt[offset]; + if (curAndLenPrice < opt->price) { + opt->price = curAndLenPrice; + opt->posPrev = cur + 1; + opt->backPrev = 0; + opt->prev1IsChar = True; + opt->prev2 = False; + } + } + } + } + + startLen = 2; /* speed optimization */ + { + UInt32 repIndex; + for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++) { + UInt32 lenTest; + UInt32 lenTestTemp; + UInt32 price; + const Byte* data2 = data - (reps[repIndex] + 1); + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++) + ; + while (lenEnd < cur + lenTest) + p->opt[++lenEnd].price = kInfinityPrice; + lenTestTemp = lenTest; + price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState); + do { + UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][lenTest - 2]; + COptimal* opt = &p->opt[cur + lenTest]; + if (curAndLenPrice < opt->price) { + opt->price = curAndLenPrice; + opt->posPrev = cur; + opt->backPrev = repIndex; + opt->prev1IsChar = False; + } + } while (--lenTest >= 2); + lenTest = lenTestTemp; + + if (repIndex == 0) + startLen = lenTest + 1; + + /* if (_maxMode) */ + { + UInt32 lenTest2 = lenTest + 1; + UInt32 limit = lenTest2 + p->numFastBytes; + UInt32 nextRepMatchPrice; + if (limit > numAvailFull) + limit = numAvailFull; + for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++) + ; + lenTest2 -= lenTest + 1; + if (lenTest2 >= 2) { + UInt32 state2 = kRepNextStates[state]; + UInt32 posStateNext = (position + lenTest) & p->pbMask; + UInt32 curAndLenCharPrice = + price + p->repLenEnc.prices[posState][lenTest - 2] + + GET_PRICE_0(p->isMatch[state2][posStateNext]) + + LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), + data[lenTest], data2[lenTest], p->ProbPrices); + state2 = kLiteralNextStates[state2]; + posStateNext = (position + lenTest + 1) & p->pbMask; + nextRepMatchPrice = curAndLenCharPrice + + GET_PRICE_1(p->isMatch[state2][posStateNext]) + + GET_PRICE_1(p->isRep[state2]); + + /* for (; lenTest2 >= 2; lenTest2--) */ + { + UInt32 curAndLenPrice; + COptimal* opt; + UInt32 offset = cur + lenTest + 1 + lenTest2; + while (lenEnd < offset) + p->opt[++lenEnd].price = kInfinityPrice; + curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); + opt = &p->opt[offset]; + if (curAndLenPrice < opt->price) { + opt->price = curAndLenPrice; + opt->posPrev = cur + lenTest + 1; + opt->backPrev = 0; + opt->prev1IsChar = True; + opt->prev2 = True; + opt->posPrev2 = cur; + opt->backPrev2 = repIndex; + } + } + } + } + } + } + /* for (UInt32 lenTest = 2; lenTest <= newLen; lenTest++) */ + if (newLen > numAvail) { + newLen = numAvail; + for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2) + ; + matches[numPairs] = newLen; + numPairs += 2; + } + if (newLen >= startLen) { + UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); + UInt32 offs, curBack, posSlot; + UInt32 lenTest; + while (lenEnd < cur + newLen) + p->opt[++lenEnd].price = kInfinityPrice; + + offs = 0; + while (startLen > matches[offs]) + offs += 2; + curBack = matches[offs + 1]; + GetPosSlot2(curBack, posSlot); + for (lenTest = /*2*/ startLen;; lenTest++) { + UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN]; + UInt32 lenToPosState = GetLenToPosState(lenTest); + COptimal* opt; + if (curBack < kNumFullDistances) + curAndLenPrice += p->distancesPrices[lenToPosState][curBack]; + else + curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask]; + + opt = &p->opt[cur + lenTest]; + if (curAndLenPrice < opt->price) { + opt->price = curAndLenPrice; + opt->posPrev = cur; + opt->backPrev = curBack + LZMA_NUM_REPS; + opt->prev1IsChar = False; + } + + if (/*_maxMode && */ lenTest == matches[offs]) { + /* Try Match + Literal + Rep0 */ + const Byte* data2 = data - (curBack + 1); + UInt32 lenTest2 = lenTest + 1; + UInt32 limit = lenTest2 + p->numFastBytes; + UInt32 nextRepMatchPrice; + if (limit > numAvailFull) + limit = numAvailFull; + for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++) + ; + lenTest2 -= lenTest + 1; + if (lenTest2 >= 2) { + UInt32 state2 = kMatchNextStates[state]; + UInt32 posStateNext = (position + lenTest) & p->pbMask; + UInt32 curAndLenCharPrice = curAndLenPrice + + GET_PRICE_0(p->isMatch[state2][posStateNext]) + + LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), + data[lenTest], data2[lenTest], p->ProbPrices); + state2 = kLiteralNextStates[state2]; + posStateNext = (posStateNext + 1) & p->pbMask; + nextRepMatchPrice = curAndLenCharPrice + + GET_PRICE_1(p->isMatch[state2][posStateNext]) + + GET_PRICE_1(p->isRep[state2]); + + /* for (; lenTest2 >= 2; lenTest2--) */ + { + UInt32 offset = cur + lenTest + 1 + lenTest2; + UInt32 curAndLenPrice; + COptimal* opt; + while (lenEnd < offset) + p->opt[++lenEnd].price = kInfinityPrice; + curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); + opt = &p->opt[offset]; + if (curAndLenPrice < opt->price) { + opt->price = curAndLenPrice; + opt->posPrev = cur + lenTest + 1; + opt->backPrev = 0; + opt->prev1IsChar = True; + opt->prev2 = True; + opt->posPrev2 = cur; + opt->backPrev2 = curBack + LZMA_NUM_REPS; + } + } + } + offs += 2; + if (offs == numPairs) + break; + curBack = matches[offs + 1]; + if (curBack >= kNumFullDistances) + GetPosSlot2(curBack, posSlot); + } + } + } + } +} + +#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) + +static UInt32 GetOptimumFast(CLzmaEnc* p, UInt32* backRes) { + UInt32 numAvail, mainLen, mainDist, numPairs, repIndex, repLen, i; + const Byte* data; + const UInt32* matches; + + if (p->additionalOffset == 0) + mainLen = ReadMatchDistances(p, &numPairs); + else { + mainLen = p->longestMatchLength; + numPairs = p->numPairs; + } + + numAvail = p->numAvail; + *backRes = (UInt32)-1; + if (numAvail < 2) + return 1; + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + + repLen = repIndex = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) { + UInt32 len; + const Byte* data2 = data - (p->reps[i] + 1); + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + for (len = 2; len < numAvail && data[len] == data2[len]; len++) + ; + if (len >= p->numFastBytes) { + *backRes = i; + MovePos(p, len - 1); + return len; + } + if (len > repLen) { + repIndex = i; + repLen = len; + } + } + + matches = p->matches; + if (mainLen >= p->numFastBytes) { + *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; + MovePos(p, mainLen - 1); + return mainLen; + } + + mainDist = 0; /* for GCC */ + if (mainLen >= 2) { + mainDist = matches[numPairs - 1]; + while (numPairs > 2 && mainLen == matches[numPairs - 4] + 1) { + if (!ChangePair(matches[numPairs - 3], mainDist)) + break; + numPairs -= 2; + mainLen = matches[numPairs - 2]; + mainDist = matches[numPairs - 1]; + } + if (mainLen == 2 && mainDist >= 0x80) + mainLen = 1; + } + + if (repLen >= 2 && ((repLen + 1 >= mainLen) || + (repLen + 2 >= mainLen && mainDist >= (1 << 9)) || + (repLen + 3 >= mainLen && mainDist >= (1 << 15)))) { + *backRes = repIndex; + MovePos(p, repLen - 1); + return repLen; + } + + if (mainLen < 2 || numAvail <= 2) + return 1; + + p->longestMatchLength = ReadMatchDistances(p, &p->numPairs); + if (p->longestMatchLength >= 2) { + UInt32 newDistance = matches[p->numPairs - 1]; + if ((p->longestMatchLength >= mainLen && newDistance < mainDist) || + (p->longestMatchLength == mainLen + 1 && !ChangePair(mainDist, newDistance)) || + (p->longestMatchLength > mainLen + 1) || + (p->longestMatchLength + 1 >= mainLen && mainLen >= 3 && ChangePair(newDistance, mainDist))) + return 1; + } + + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + for (i = 0; i < LZMA_NUM_REPS; i++) { + UInt32 len, limit; + const Byte* data2 = data - (p->reps[i] + 1); + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + limit = mainLen - 1; + for (len = 2; len < limit && data[len] == data2[len]; len++) + ; + if (len >= limit) + return 1; + } + *backRes = mainDist + LZMA_NUM_REPS; + MovePos(p, mainLen - 2); + return mainLen; +} + +static void WriteEndMarker(CLzmaEnc* p, UInt32 posState) { + UInt32 len; + RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); + RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); + p->state = kMatchNextStates[p->state]; + len = LZMA_MATCH_LEN_MIN; + LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); + RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1); + RangeEnc_EncodeDirectBits(&p->rc, (((UInt32)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits); + RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); +} + +static SRes CheckErrors(CLzmaEnc* p) { + if (p->result != SZ_OK) + return p->result; + if (p->rc.res != SZ_OK) + p->result = SZ_ERROR_WRITE; + if (p->matchFinderBase.result != SZ_OK) + p->result = SZ_ERROR_READ; + if (p->result != SZ_OK) + p->finished = True; + return p->result; +} + +static SRes Flush(CLzmaEnc* p, UInt32 nowPos) { + /* ReleaseMFStream(); */ + p->finished = True; + if (p->writeEndMark) + WriteEndMarker(p, nowPos & p->pbMask); + RangeEnc_FlushData(&p->rc); + RangeEnc_FlushStream(&p->rc); + return CheckErrors(p); +} + +static void FillAlignPrices(CLzmaEnc* p) { + UInt32 i; + for (i = 0; i < kAlignTableSize; i++) + p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); + p->alignPriceCount = 0; +} + +static void FillDistancesPrices(CLzmaEnc* p) { + UInt32 tempPrices[kNumFullDistances]; + UInt32 i, lenToPosState; + for (i = kStartPosModelIndex; i < kNumFullDistances; i++) { + UInt32 posSlot = GetPosSlot1(i); + UInt32 footerBits = ((posSlot >> 1) - 1); + UInt32 base = ((2 | (posSlot & 1)) << footerBits); + tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices); + } + + for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++) { + UInt32 posSlot; + const CLzmaProb* encoder = p->posSlotEncoder[lenToPosState]; + UInt32* posSlotPrices = p->posSlotPrices[lenToPosState]; + for (posSlot = 0; posSlot < p->distTableSize; posSlot++) + posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices); + for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++) + posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits); + + { + UInt32* distancesPrices = p->distancesPrices[lenToPosState]; + UInt32 i; + for (i = 0; i < kStartPosModelIndex; i++) + distancesPrices[i] = posSlotPrices[i]; + for (; i < kNumFullDistances; i++) + distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i]; + } + } + p->matchPriceCount = 0; +} + +void LzmaEnc_Construct(CLzmaEnc* p) { + RangeEnc_Construct(&p->rc); + MatchFinder_Construct(&p->matchFinderBase); +#ifdef COMPRESS_MF_MT + MatchFinderMt_Construct(&p->matchFinderMt); + p->matchFinderMt.MatchFinder = &p->matchFinderBase; +#endif + + { + CLzmaEncProps props; + LzmaEncProps_Init(&props); + LzmaEnc_SetProps(p, &props); + } + +#ifndef LZMA_LOG_BSR + LzmaEnc_FastPosInit(p->g_FastPos); +#endif + + LzmaEnc_InitPriceTables(p->ProbPrices); + p->litProbs = 0; + p->saveState.litProbs = 0; +} + +CLzmaEncHandle LzmaEnc_Create(ISzAlloc* alloc) { + void* p; + p = alloc->Alloc(alloc, sizeof(CLzmaEnc)); + if (p != 0) + LzmaEnc_Construct((CLzmaEnc*)p); + return p; +} + +void LzmaEnc_FreeLits(CLzmaEnc* p, ISzAlloc* alloc) { + alloc->Free(alloc, p->litProbs); + alloc->Free(alloc, p->saveState.litProbs); + p->litProbs = 0; + p->saveState.litProbs = 0; +} + +void LzmaEnc_Destruct(CLzmaEnc* p, ISzAlloc* alloc, ISzAlloc* allocBig) { +#ifdef COMPRESS_MF_MT + MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); +#endif + MatchFinder_Free(&p->matchFinderBase, allocBig); + LzmaEnc_FreeLits(p, alloc); + RangeEnc_Free(&p->rc, alloc); +} + +void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc* alloc, ISzAlloc* allocBig) { + LzmaEnc_Destruct((CLzmaEnc*)p, alloc, allocBig); + alloc->Free(alloc, p); +} + +static SRes LzmaEnc_CodeOneBlock(CLzmaEnc* p, Bool useLimits, UInt32 maxPackSize, UInt32 maxUnpackSize) { + UInt32 nowPos32, startPos32; + if (p->inStream != 0) { + p->matchFinderBase.stream = p->inStream; + p->matchFinder.Init(p->matchFinderObj); + p->inStream = 0; + } + + if (p->finished) + return p->result; + RINOK(CheckErrors(p)); + + nowPos32 = (UInt32)p->nowPos64; + startPos32 = nowPos32; + + if (p->nowPos64 == 0) { + UInt32 numPairs; + Byte curByte; + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) + return Flush(p, nowPos32); + ReadMatchDistances(p, &numPairs); + RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0); + p->state = kLiteralNextStates[p->state]; + curByte = p->matchFinder.GetIndexByte(p->matchFinderObj, 0 - p->additionalOffset); + LitEnc_Encode(&p->rc, p->litProbs, curByte); + p->additionalOffset--; + nowPos32++; + } + + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) + for (;;) { + UInt32 pos, len, posState; + + if (p->fastMode) + len = GetOptimumFast(p, &pos); + else + len = GetOptimum(p, nowPos32, &pos); + +#ifdef SHOW_STAT2 + printf("\n pos = %4X, len = %d pos = %d", nowPos32, len, pos); +#endif + + posState = nowPos32 & p->pbMask; + if (len == 1 && pos == (UInt32)-1) { + Byte curByte; + CLzmaProb* probs; + const Byte* data; + + RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0); + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; + curByte = *data; + probs = LIT_PROBS(nowPos32, *(data - 1)); + if (IsCharState(p->state)) + LitEnc_Encode(&p->rc, probs, curByte); + else + LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1)); + p->state = kLiteralNextStates[p->state]; + } else { + RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); + if (pos < LZMA_NUM_REPS) { + RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1); + if (pos == 0) { + RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0); + RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1)); + } else { + UInt32 distance = p->reps[pos]; + RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1); + if (pos == 1) + RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0); + else { + RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1); + RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2); + if (pos == 3) + p->reps[3] = p->reps[2]; + p->reps[2] = p->reps[1]; + } + p->reps[1] = p->reps[0]; + p->reps[0] = distance; + } + if (len == 1) + p->state = kShortRepNextStates[p->state]; + else { + LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); + p->state = kRepNextStates[p->state]; + } + } else { + UInt32 posSlot; + RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); + p->state = kMatchNextStates[p->state]; + LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); + pos -= LZMA_NUM_REPS; + GetPosSlot(pos, posSlot); + RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot); + + if (posSlot >= kStartPosModelIndex) { + UInt32 footerBits = ((posSlot >> 1) - 1); + UInt32 base = ((2 | (posSlot & 1)) << footerBits); + UInt32 posReduced = pos - base; + + if (posSlot < kEndPosModelIndex) + RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced); + else { + RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); + RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); + p->alignPriceCount++; + } + } + p->reps[3] = p->reps[2]; + p->reps[2] = p->reps[1]; + p->reps[1] = p->reps[0]; + p->reps[0] = pos; + p->matchPriceCount++; + } + } + p->additionalOffset -= len; + nowPos32 += len; + if (p->additionalOffset == 0) { + UInt32 processed; + if (!p->fastMode) { + if (p->matchPriceCount >= (1 << 7)) + FillDistancesPrices(p); + if (p->alignPriceCount >= kAlignTableSize) + FillAlignPrices(p); + } + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) + break; + processed = nowPos32 - startPos32; + if (useLimits) { + if (processed + kNumOpts + 300 >= maxUnpackSize || + RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize) + break; + } else if (processed >= (1 << 15)) { + p->nowPos64 += nowPos32 - startPos32; + return CheckErrors(p); + } + } + } + p->nowPos64 += nowPos32 - startPos32; + return Flush(p, nowPos32); +} + +#define kBigHashDicLimit ((UInt32)1 << 24) + +static SRes LzmaEnc_Alloc(CLzmaEnc* p, UInt32 keepWindowSize, ISzAlloc* alloc, ISzAlloc* allocBig) { + UInt32 beforeSize = kNumOpts; + Bool btMode; + if (!RangeEnc_Alloc(&p->rc, alloc)) + return SZ_ERROR_MEM; + btMode = (p->matchFinderBase.btMode != 0); +#ifdef COMPRESS_MF_MT + p->mtMode = (p->multiThread && !p->fastMode && btMode); +#endif + + { + unsigned lclp = p->lc + p->lp; + if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp) { + LzmaEnc_FreeLits(p, alloc); + p->litProbs = (CLzmaProb*)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb)); + p->saveState.litProbs = (CLzmaProb*)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb)); + if (p->litProbs == 0 || p->saveState.litProbs == 0) { + LzmaEnc_FreeLits(p, alloc); + return SZ_ERROR_MEM; + } + p->lclp = lclp; + } + } + + p->matchFinderBase.bigHash = (p->dictSize > kBigHashDicLimit); + + if (beforeSize + p->dictSize < keepWindowSize) + beforeSize = keepWindowSize - p->dictSize; + +#ifdef COMPRESS_MF_MT + if (p->mtMode) { + RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)); + p->matchFinderObj = &p->matchFinderMt; + MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); + } else +#endif + { + if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)) + return SZ_ERROR_MEM; + p->matchFinderObj = &p->matchFinderBase; + MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder); + } + return SZ_OK; +} + +void LzmaEnc_Init(CLzmaEnc* p) { + UInt32 i; + p->state = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) + p->reps[i] = 0; + + RangeEnc_Init(&p->rc); + + for (i = 0; i < kNumStates; i++) { + UInt32 j; + for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) { + p->isMatch[i][j] = kProbInitValue; + p->isRep0Long[i][j] = kProbInitValue; + } + p->isRep[i] = kProbInitValue; + p->isRepG0[i] = kProbInitValue; + p->isRepG1[i] = kProbInitValue; + p->isRepG2[i] = kProbInitValue; + } + + { + UInt32 num = 0x300 << (p->lp + p->lc); + for (i = 0; i < num; i++) + p->litProbs[i] = kProbInitValue; + } + + { + for (i = 0; i < kNumLenToPosStates; i++) { + CLzmaProb* probs = p->posSlotEncoder[i]; + UInt32 j; + for (j = 0; j < (1 << kNumPosSlotBits); j++) + probs[j] = kProbInitValue; + } + } + { + for (i = 0; i < kNumFullDistances - kEndPosModelIndex; i++) + p->posEncoders[i] = kProbInitValue; + } + + LenEnc_Init(&p->lenEnc.p); + LenEnc_Init(&p->repLenEnc.p); + + for (i = 0; i < (1 << kNumAlignBits); i++) + p->posAlignEncoder[i] = kProbInitValue; + + p->optimumEndIndex = 0; + p->optimumCurrentIndex = 0; + p->additionalOffset = 0; + + p->pbMask = (1 << p->pb) - 1; + p->lpMask = (1 << p->lp) - 1; +} + +void LzmaEnc_InitPrices(CLzmaEnc* p) { + if (!p->fastMode) { + FillDistancesPrices(p); + FillAlignPrices(p); + } + + p->lenEnc.tableSize = + p->repLenEnc.tableSize = + p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; + LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices); +} + +static SRes LzmaEnc_AllocAndInit(CLzmaEnc* p, UInt32 keepWindowSize, ISzAlloc* alloc, ISzAlloc* allocBig) { + UInt32 i; + for (i = 0; i < (UInt32)kDicLogSizeMaxCompress; i++) + if (p->dictSize <= ((UInt32)1 << i)) + break; + p->distTableSize = i * 2; + + p->finished = False; + p->result = SZ_OK; + RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); + LzmaEnc_Init(p); + LzmaEnc_InitPrices(p); + p->nowPos64 = 0; + return SZ_OK; +} + +static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqInStream* inStream, ISeqOutStream* outStream, + ISzAlloc* alloc, ISzAlloc* allocBig) { + CLzmaEnc* p = (CLzmaEnc*)pp; + p->inStream = inStream; + p->rc.outStream = outStream; + return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); +} + +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, + ISeqInStream* inStream, UInt32 keepWindowSize, + ISzAlloc* alloc, ISzAlloc* allocBig) { + CLzmaEnc* p = (CLzmaEnc*)pp; + p->inStream = inStream; + return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); +} + +static void LzmaEnc_SetInputBuf(CLzmaEnc* p, const Byte* src, SizeT srcLen) { + p->seqBufInStream.funcTable.Read = MyRead; + p->seqBufInStream.data = src; + p->seqBufInStream.rem = srcLen; +} + +SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte* src, SizeT srcLen, + UInt32 keepWindowSize, ISzAlloc* alloc, ISzAlloc* allocBig) { + CLzmaEnc* p = (CLzmaEnc*)pp; + LzmaEnc_SetInputBuf(p, src, srcLen); + p->inStream = &p->seqBufInStream.funcTable; + return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); +} + +void LzmaEnc_Finish(CLzmaEncHandle pp) { +#ifdef COMPRESS_MF_MT + CLzmaEnc* p = (CLzmaEnc*)pp; + if (p->mtMode) + MatchFinderMt_ReleaseStream(&p->matchFinderMt); +#else + pp = pp; +#endif +} + +typedef struct _CSeqOutStreamBuf { + ISeqOutStream funcTable; + Byte* data; + SizeT rem; + Bool overflow; +} CSeqOutStreamBuf; + +static size_t MyWrite(void* pp, const void* data, size_t size) { + CSeqOutStreamBuf* p = (CSeqOutStreamBuf*)pp; + if (p->rem < size) { + size = p->rem; + p->overflow = True; + } + memcpy(p->data, data, size); + p->rem -= size; + p->data += size; + return size; +} + +UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) { + const CLzmaEnc* p = (CLzmaEnc*)pp; + return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); +} + +const Byte* LzmaEnc_GetCurBuf(CLzmaEncHandle pp) { + const CLzmaEnc* p = (CLzmaEnc*)pp; + return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; +} + +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit, + Byte* dest, size_t* destLen, UInt32 desiredPackSize, UInt32* unpackSize) { + CLzmaEnc* p = (CLzmaEnc*)pp; + UInt64 nowPos64; + SRes res; + CSeqOutStreamBuf outStream; + + outStream.funcTable.Write = MyWrite; + outStream.data = dest; + outStream.rem = *destLen; + outStream.overflow = False; + + p->writeEndMark = False; + p->finished = False; + p->result = SZ_OK; + + if (reInit) + LzmaEnc_Init(p); + LzmaEnc_InitPrices(p); + nowPos64 = p->nowPos64; + RangeEnc_Init(&p->rc); + p->rc.outStream = &outStream.funcTable; + + res = LzmaEnc_CodeOneBlock(p, True, desiredPackSize, *unpackSize); + + *unpackSize = (UInt32)(p->nowPos64 - nowPos64); + *destLen -= outStream.rem; + if (outStream.overflow) + return SZ_ERROR_OUTPUT_EOF; + + return res; +} + +SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream* outStream, ISeqInStream* inStream, ICompressProgress* progress, + ISzAlloc* alloc, ISzAlloc* allocBig) { + CLzmaEnc* p = (CLzmaEnc*)pp; + SRes res = SZ_OK; + +#ifdef COMPRESS_MF_MT + Byte allocaDummy[0x300]; + (void)allocaDummy; + int i = 0; + for (i = 0; i < 16; i++) + allocaDummy[i] = (Byte)i; +#endif + + RINOK(LzmaEnc_Prepare(pp, inStream, outStream, alloc, allocBig)); + + for (;;) { + res = LzmaEnc_CodeOneBlock(p, False, 0, 0); + if (res != SZ_OK || p->finished != 0) + break; + if (progress != 0) { + res = progress->Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); + if (res != SZ_OK) { + res = SZ_ERROR_PROGRESS; + break; + } + } + } + LzmaEnc_Finish(pp); + return res; +} + +SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte* props, SizeT* size) { + CLzmaEnc* p = (CLzmaEnc*)pp; + int i; + UInt32 dictSize = p->dictSize; + if (*size < LZMA_PROPS_SIZE) + return SZ_ERROR_PARAM; + *size = LZMA_PROPS_SIZE; + props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); + + for (i = 11; i <= 30; i++) { + if (dictSize <= ((UInt32)2 << i)) { + dictSize = (2 << i); + break; + } + if (dictSize <= ((UInt32)3 << i)) { + dictSize = (3 << i); + break; + } + } + + for (i = 0; i < 4; i++) + props[1 + i] = (Byte)(dictSize >> (8 * i)); + return SZ_OK; +} + +SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte* dest, SizeT* destLen, const Byte* src, SizeT srcLen, + int writeEndMark, ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig) { + SRes res; + CLzmaEnc* p = (CLzmaEnc*)pp; + + CSeqOutStreamBuf outStream; + + LzmaEnc_SetInputBuf(p, src, srcLen); + + outStream.funcTable.Write = MyWrite; + outStream.data = dest; + outStream.rem = *destLen; + outStream.overflow = False; + + p->writeEndMark = writeEndMark; + res = LzmaEnc_Encode(pp, &outStream.funcTable, &p->seqBufInStream.funcTable, + progress, alloc, allocBig); + + *destLen -= outStream.rem; + if (outStream.overflow) + return SZ_ERROR_OUTPUT_EOF; + return res; +} + +SRes LzmaEncode(Byte* dest, SizeT* destLen, const Byte* src, SizeT srcLen, + const CLzmaEncProps* props, Byte* propsEncoded, SizeT* propsSize, int writeEndMark, + ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig) { + CLzmaEnc* p = (CLzmaEnc*)LzmaEnc_Create(alloc); + SRes res; + if (p == 0) + return SZ_ERROR_MEM; + + res = LzmaEnc_SetProps(p, props); + if (res == SZ_OK) { + res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize); + if (res == SZ_OK) + res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen, + writeEndMark, progress, alloc, allocBig); + } + + LzmaEnc_Destroy(p, alloc, allocBig); + return res; +} +} diff --git a/3rdparty/lzma/Linux/LzmaEnc.h b/3rdparty/lzma/Linux/LzmaEnc.h index 37b3268..069ccbe 100644 --- a/3rdparty/lzma/Linux/LzmaEnc.h +++ b/3rdparty/lzma/Linux/LzmaEnc.h @@ -1,73 +1,73 @@ -/* LzmaEnc.h -- LZMA Encoder -2008-10-04 : Igor Pavlov : Public domain */ - -#ifndef __LZMAENC_H -#define __LZMAENC_H - -#include "LzmaTypes.h" - -namespace crnlib { - -#define LZMA_PROPS_SIZE 5 - -typedef struct _CLzmaEncProps { - int level; /* 0 <= level <= 9 */ - UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version - (1 << 12) <= dictSize <= (1 << 30) for 64-bit version - default = (1 << 24) */ - int lc; /* 0 <= lc <= 8, default = 3 */ - int lp; /* 0 <= lp <= 4, default = 0 */ - int pb; /* 0 <= pb <= 4, default = 2 */ - int algo; /* 0 - fast, 1 - normal, default = 1 */ - int fb; /* 5 <= fb <= 273, default = 32 */ - int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ - int numHashBytes; /* 2, 3 or 4, default = 4 */ - UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ - unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ - int numThreads; /* 1 or 2, default = 2 */ -} CLzmaEncProps; - -void LzmaEncProps_Init(CLzmaEncProps* p); -void LzmaEncProps_Normalize(CLzmaEncProps* p); -UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps* props2); - -/* ---------- CLzmaEncHandle Interface ---------- */ - -/* LzmaEnc_* functions can return the following exit codes: -Returns: - SZ_OK - OK - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_PARAM - Incorrect paramater in props - SZ_ERROR_WRITE - Write callback error. - SZ_ERROR_PROGRESS - some break from progress callback - SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) -*/ - -typedef void* CLzmaEncHandle; - -CLzmaEncHandle LzmaEnc_Create(ISzAlloc* alloc); -void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc* alloc, ISzAlloc* allocBig); -SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps* props); -SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte* properties, SizeT* size); -SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream* outStream, ISeqInStream* inStream, - ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig); -SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte* dest, SizeT* destLen, const Byte* src, SizeT srcLen, - int writeEndMark, ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig); - -/* ---------- One Call Interface ---------- */ - -/* LzmaEncode -Return code: - SZ_OK - OK - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_PARAM - Incorrect paramater - SZ_ERROR_OUTPUT_EOF - output buffer overflow - SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) -*/ - -SRes LzmaEncode(Byte* dest, SizeT* destLen, const Byte* src, SizeT srcLen, - const CLzmaEncProps* props, Byte* propsEncoded, SizeT* propsSize, int writeEndMark, - ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig); -} - -#endif +/* LzmaEnc.h -- LZMA Encoder +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __LZMAENC_H +#define __LZMAENC_H + +#include "LzmaTypes.h" + +namespace crnlib { + +#define LZMA_PROPS_SIZE 5 + +typedef struct _CLzmaEncProps { + int level; /* 0 <= level <= 9 */ + UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version + (1 << 12) <= dictSize <= (1 << 30) for 64-bit version + default = (1 << 24) */ + int lc; /* 0 <= lc <= 8, default = 3 */ + int lp; /* 0 <= lp <= 4, default = 0 */ + int pb; /* 0 <= pb <= 4, default = 2 */ + int algo; /* 0 - fast, 1 - normal, default = 1 */ + int fb; /* 5 <= fb <= 273, default = 32 */ + int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ + int numHashBytes; /* 2, 3 or 4, default = 4 */ + UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ + unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ + int numThreads; /* 1 or 2, default = 2 */ +} CLzmaEncProps; + +void LzmaEncProps_Init(CLzmaEncProps* p); +void LzmaEncProps_Normalize(CLzmaEncProps* p); +UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps* props2); + +/* ---------- CLzmaEncHandle Interface ---------- */ + +/* LzmaEnc_* functions can return the following exit codes: +Returns: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater in props + SZ_ERROR_WRITE - Write callback error. + SZ_ERROR_PROGRESS - some break from progress callback + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) +*/ + +typedef void* CLzmaEncHandle; + +CLzmaEncHandle LzmaEnc_Create(ISzAlloc* alloc); +void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc* alloc, ISzAlloc* allocBig); +SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps* props); +SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte* properties, SizeT* size); +SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream* outStream, ISeqInStream* inStream, + ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig); +SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte* dest, SizeT* destLen, const Byte* src, SizeT srcLen, + int writeEndMark, ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig); + +/* ---------- One Call Interface ---------- */ + +/* LzmaEncode +Return code: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater + SZ_ERROR_OUTPUT_EOF - output buffer overflow + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) +*/ + +SRes LzmaEncode(Byte* dest, SizeT* destLen, const Byte* src, SizeT srcLen, + const CLzmaEncProps* props, Byte* propsEncoded, SizeT* propsSize, int writeEndMark, + ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig); +} + +#endif diff --git a/3rdparty/lzma/Linux/LzmaLib.cpp b/3rdparty/lzma/Linux/LzmaLib.cpp index 434f172..5c0bb96 100644 --- a/3rdparty/lzma/Linux/LzmaLib.cpp +++ b/3rdparty/lzma/Linux/LzmaLib.cpp @@ -1,52 +1,52 @@ -/* LzmaLib.c -- LZMA library wrapper -2008-08-05 -Igor Pavlov -Public domain */ +/* LzmaLib.c -- LZMA library wrapper +2008-08-05 +Igor Pavlov +Public domain */ -#include "LzmaEnc.h" -#include "LzmaDec.h" -#include "Alloc.h" -#include "LzmaLib.h" - -namespace crnlib { - -static void* SzAlloc(void* p, size_t size) { - p = p; - return MyAlloc(size); -} -static void SzFree(void* p, void* address) { - p = p; - MyFree(address); -} -static ISzAlloc g_Alloc = {SzAlloc, SzFree}; - -MY_STDAPI LzmaCompress(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t srcLen, - unsigned char* outProps, size_t* outPropsSize, - int level, /* 0 <= level <= 9, default = 5 */ - unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */ - int lc, /* 0 <= lc <= 8, default = 3 */ - int lp, /* 0 <= lp <= 4, default = 0 */ - int pb, /* 0 <= pb <= 4, default = 2 */ - int fb, /* 5 <= fb <= 273, default = 32 */ - int numThreads /* 1 or 2, default = 2 */ - ) { - CLzmaEncProps props; - LzmaEncProps_Init(&props); - props.level = level; - props.dictSize = dictSize; - props.lc = lc; - props.lp = lp; - props.pb = pb; - props.fb = fb; - props.numThreads = numThreads; - - return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0, - NULL, &g_Alloc, &g_Alloc); -} - -MY_STDAPI LzmaUncompress(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t* srcLen, - const unsigned char* props, size_t propsSize) { - ELzmaStatus status; - return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc); -} -} +#include "LzmaEnc.h" +#include "LzmaDec.h" +#include "Alloc.h" +#include "LzmaLib.h" + +namespace crnlib { + +static void* SzAlloc(void* p, size_t size) { + p = p; + return MyAlloc(size); +} +static void SzFree(void* p, void* address) { + p = p; + MyFree(address); +} +static ISzAlloc g_Alloc = {SzAlloc, SzFree}; + +MY_STDAPI LzmaCompress(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t srcLen, + unsigned char* outProps, size_t* outPropsSize, + int level, /* 0 <= level <= 9, default = 5 */ + unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */ + int lc, /* 0 <= lc <= 8, default = 3 */ + int lp, /* 0 <= lp <= 4, default = 0 */ + int pb, /* 0 <= pb <= 4, default = 2 */ + int fb, /* 5 <= fb <= 273, default = 32 */ + int numThreads /* 1 or 2, default = 2 */ + ) { + CLzmaEncProps props; + LzmaEncProps_Init(&props); + props.level = level; + props.dictSize = dictSize; + props.lc = lc; + props.lp = lp; + props.pb = pb; + props.fb = fb; + props.numThreads = numThreads; + + return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0, + NULL, &g_Alloc, &g_Alloc); +} + +MY_STDAPI LzmaUncompress(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t* srcLen, + const unsigned char* props, size_t propsSize) { + ELzmaStatus status; + return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc); +} +} diff --git a/3rdparty/lzma/Linux/LzmaLib.h b/3rdparty/lzma/Linux/LzmaLib.h index d0cc4a7..4fc2930 100644 --- a/3rdparty/lzma/Linux/LzmaLib.h +++ b/3rdparty/lzma/Linux/LzmaLib.h @@ -1,145 +1,145 @@ -/* LzmaLib.h -- LZMA library interface -2008-08-05 -Igor Pavlov -Public domain */ - -#ifndef __LZMALIB_H -#define __LZMALIB_H - -#include "LzmaTypes.h" - -namespace crnlib { - -#if 0 -#ifdef __cplusplus -#define MY_EXTERN_C extern "C" -#else -#define MY_EXTERN_C extern -#endif - -#define MY_STDAPI MY_EXTERN_C int MY_STD_CALL -#else -#define MY_STDAPI int MY_STD_CALL -#endif - -#define LZMA_PROPS_SIZE 5 - -/* -RAM requirements for LZMA: - for compression: (dictSize * 11.5 + 6 MB) + state_size - for decompression: dictSize + state_size - state_size = (4 + (1.5 << (lc + lp))) KB - by default (lc=3, lp=0), state_size = 16 KB. - -LZMA properties (5 bytes) format - Offset Size Description - 0 1 lc, lp and pb in encoded form. - 1 4 dictSize (little endian). -*/ - -/* -LzmaCompress ------------- - -outPropsSize - - In: the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. - Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. - - LZMA Encoder will use defult values for any parameter, if it is - -1 for any from: level, loc, lp, pb, fb, numThreads - 0 for dictSize - -level - compression level: 0 <= level <= 9; - - level dictSize algo fb - 0: 16 KB 0 32 - 1: 64 KB 0 32 - 2: 256 KB 0 32 - 3: 1 MB 0 32 - 4: 4 MB 0 32 - 5: 16 MB 1 32 - 6: 32 MB 1 32 - 7+: 64 MB 1 64 - - The default value for "level" is 5. - - algo = 0 means fast method - algo = 1 means normal method - -dictSize - The dictionary size in bytes. The maximum value is - 128 MB = (1 << 27) bytes for 32-bit version - 1 GB = (1 << 30) bytes for 64-bit version - The default value is 16 MB = (1 << 24) bytes. - It's recommended to use the dictionary that is larger than 4 KB and - that can be calculated as (1 << N) or (3 << N) sizes. - -lc - The number of literal context bits (high bits of previous literal). - It can be in the range from 0 to 8. The default value is 3. - Sometimes lc=4 gives the gain for big files. - -lp - The number of literal pos bits (low bits of current position for literals). - It can be in the range from 0 to 4. The default value is 0. - The lp switch is intended for periodical data when the period is equal to 2^lp. - For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's - better to set lc=0, if you change lp switch. - -pb - The number of pos bits (low bits of current position). - It can be in the range from 0 to 4. The default value is 2. - The pb switch is intended for periodical data when the period is equal 2^pb. - -fb - Word size (the number of fast bytes). - It can be in the range from 5 to 273. The default value is 32. - Usually, a big number gives a little bit better compression ratio and - slower compression process. - -numThreads - The number of thereads. 1 or 2. The default value is 2. - Fast mode (algo = 0) can use only 1 thread. - -Out: - destLen - processed output size -Returns: - SZ_OK - OK - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_PARAM - Incorrect paramater - SZ_ERROR_OUTPUT_EOF - output buffer overflow - SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) -*/ - -MY_STDAPI LzmaCompress(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t srcLen, - unsigned char* outProps, size_t* outPropsSize, /* *outPropsSize must be = 5 */ - int level, /* 0 <= level <= 9, default = 5 */ - unsigned dictSize, /* default = (1 << 24) */ - int lc, /* 0 <= lc <= 8, default = 3 */ - int lp, /* 0 <= lp <= 4, default = 0 */ - int pb, /* 0 <= pb <= 4, default = 2 */ - int fb, /* 5 <= fb <= 273, default = 32 */ - int numThreads /* 1 or 2, default = 2 */ - ); - -/* -LzmaUncompress --------------- -In: - dest - output data - destLen - output data size - src - input data - srcLen - input data size -Out: - destLen - processed output size - srcLen - processed input size -Returns: - SZ_OK - OK - SZ_ERROR_DATA - Data error - SZ_ERROR_MEM - Memory allocation arror - SZ_ERROR_UNSUPPORTED - Unsupported properties - SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src) -*/ - -MY_STDAPI LzmaUncompress(unsigned char* dest, size_t* destLen, const unsigned char* src, SizeT* srcLen, - const unsigned char* props, size_t propsSize); - -#define LZMA_COMPRESS_FUNC_EXPORT "LzmaCompress" -#define LZMA_UNCOMPRESS_FUNC_EXPORT "LzmaUncompress" -} - -#endif +/* LzmaLib.h -- LZMA library interface +2008-08-05 +Igor Pavlov +Public domain */ + +#ifndef __LZMALIB_H +#define __LZMALIB_H + +#include "LzmaTypes.h" + +namespace crnlib { + +#if 0 +#ifdef __cplusplus +#define MY_EXTERN_C extern "C" +#else +#define MY_EXTERN_C extern +#endif + +#define MY_STDAPI MY_EXTERN_C int MY_STD_CALL +#else +#define MY_STDAPI int MY_STD_CALL +#endif + +#define LZMA_PROPS_SIZE 5 + +/* +RAM requirements for LZMA: + for compression: (dictSize * 11.5 + 6 MB) + state_size + for decompression: dictSize + state_size + state_size = (4 + (1.5 << (lc + lp))) KB + by default (lc=3, lp=0), state_size = 16 KB. + +LZMA properties (5 bytes) format + Offset Size Description + 0 1 lc, lp and pb in encoded form. + 1 4 dictSize (little endian). +*/ + +/* +LzmaCompress +------------ + +outPropsSize - + In: the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. + Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. + + LZMA Encoder will use defult values for any parameter, if it is + -1 for any from: level, loc, lp, pb, fb, numThreads + 0 for dictSize + +level - compression level: 0 <= level <= 9; + + level dictSize algo fb + 0: 16 KB 0 32 + 1: 64 KB 0 32 + 2: 256 KB 0 32 + 3: 1 MB 0 32 + 4: 4 MB 0 32 + 5: 16 MB 1 32 + 6: 32 MB 1 32 + 7+: 64 MB 1 64 + + The default value for "level" is 5. + + algo = 0 means fast method + algo = 1 means normal method + +dictSize - The dictionary size in bytes. The maximum value is + 128 MB = (1 << 27) bytes for 32-bit version + 1 GB = (1 << 30) bytes for 64-bit version + The default value is 16 MB = (1 << 24) bytes. + It's recommended to use the dictionary that is larger than 4 KB and + that can be calculated as (1 << N) or (3 << N) sizes. + +lc - The number of literal context bits (high bits of previous literal). + It can be in the range from 0 to 8. The default value is 3. + Sometimes lc=4 gives the gain for big files. + +lp - The number of literal pos bits (low bits of current position for literals). + It can be in the range from 0 to 4. The default value is 0. + The lp switch is intended for periodical data when the period is equal to 2^lp. + For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's + better to set lc=0, if you change lp switch. + +pb - The number of pos bits (low bits of current position). + It can be in the range from 0 to 4. The default value is 2. + The pb switch is intended for periodical data when the period is equal 2^pb. + +fb - Word size (the number of fast bytes). + It can be in the range from 5 to 273. The default value is 32. + Usually, a big number gives a little bit better compression ratio and + slower compression process. + +numThreads - The number of thereads. 1 or 2. The default value is 2. + Fast mode (algo = 0) can use only 1 thread. + +Out: + destLen - processed output size +Returns: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater + SZ_ERROR_OUTPUT_EOF - output buffer overflow + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) +*/ + +MY_STDAPI LzmaCompress(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t srcLen, + unsigned char* outProps, size_t* outPropsSize, /* *outPropsSize must be = 5 */ + int level, /* 0 <= level <= 9, default = 5 */ + unsigned dictSize, /* default = (1 << 24) */ + int lc, /* 0 <= lc <= 8, default = 3 */ + int lp, /* 0 <= lp <= 4, default = 0 */ + int pb, /* 0 <= pb <= 4, default = 2 */ + int fb, /* 5 <= fb <= 273, default = 32 */ + int numThreads /* 1 or 2, default = 2 */ + ); + +/* +LzmaUncompress +-------------- +In: + dest - output data + destLen - output data size + src - input data + srcLen - input data size +Out: + destLen - processed output size + srcLen - processed input size +Returns: + SZ_OK - OK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation arror + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src) +*/ + +MY_STDAPI LzmaUncompress(unsigned char* dest, size_t* destLen, const unsigned char* src, SizeT* srcLen, + const unsigned char* props, size_t propsSize); + +#define LZMA_COMPRESS_FUNC_EXPORT "LzmaCompress" +#define LZMA_UNCOMPRESS_FUNC_EXPORT "LzmaUncompress" +} + +#endif diff --git a/3rdparty/lzma/Linux/LzmaTypes.h b/3rdparty/lzma/Linux/LzmaTypes.h index 298f1d8..4020f95 100644 --- a/3rdparty/lzma/Linux/LzmaTypes.h +++ b/3rdparty/lzma/Linux/LzmaTypes.h @@ -1,214 +1,214 @@ -/* Types.h -- Basic types -2008-11-23 : Igor Pavlov : Public domain */ - -#ifndef __7Z_TYPES_H -#define __7Z_TYPES_H - -#include - -#if defined(_WIN32) -#include -#define COMPRESS_MF_MT -#endif - -namespace crnlib { - -#define SZ_OK 0 - -#define SZ_ERROR_DATA 1 -#define SZ_ERROR_MEM 2 -#define SZ_ERROR_CRC 3 -#define SZ_ERROR_UNSUPPORTED 4 -#define SZ_ERROR_PARAM 5 -#define SZ_ERROR_INPUT_EOF 6 -#define SZ_ERROR_OUTPUT_EOF 7 -#define SZ_ERROR_READ 8 -#define SZ_ERROR_WRITE 9 -#define SZ_ERROR_PROGRESS 10 -#define SZ_ERROR_FAIL 11 -#define SZ_ERROR_THREAD 12 - -#define SZ_ERROR_ARCHIVE 16 -#define SZ_ERROR_NO_ARCHIVE 17 - -typedef int SRes; - -#ifdef _WIN32 -typedef DWORD WRes; -#else -typedef int WRes; -#endif - -#ifndef RINOK -#define RINOK(x) \ - { \ - int __result__ = (x); \ - if (__result__ != 0) \ - return __result__; \ - } -#endif - -typedef unsigned char Byte; -typedef short Int16; -typedef unsigned short UInt16; - -#ifdef _LZMA_UINT32_IS_ULONG -typedef long Int32; -typedef unsigned long UInt32; -#else -typedef int Int32; -typedef unsigned int UInt32; -#endif - -#ifdef _SZ_NO_INT_64 - -/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. - NOTES: Some code will work incorrectly in that case! */ - -typedef long Int64; -typedef unsigned long UInt64; - -#else - -#if defined(_MSC_VER) || defined(__BORLANDC__) -typedef __int64 Int64; -typedef unsigned __int64 UInt64; -#else -typedef long long int Int64; -typedef unsigned long long int UInt64; -#endif - -#endif - -#ifdef _LZMA_NO_SYSTEM_SIZE_T -typedef UInt32 SizeT; -#else -typedef size_t SizeT; -#endif - -typedef int Bool; -#define True 1 -#define False 0 - -#ifdef _MSC_VER - -#if _MSC_VER >= 1300 -#define MY_NO_INLINE __declspec(noinline) -#else -#define MY_NO_INLINE -#endif - -#define MY_CDECL __cdecl -#define MY_STD_CALL __stdcall -#define MY_FAST_CALL MY_NO_INLINE __fastcall - -#else - -#define MY_CDECL -#define MY_STD_CALL -#define MY_FAST_CALL - -#endif - -/* The following interfaces use first parameter as pointer to structure */ - -typedef struct -{ - SRes (*Read)(void* p, void* buf, size_t* size); - /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. - (output(*size) < input(*size)) is allowed */ -} ISeqInStream; - -/* it can return SZ_ERROR_INPUT_EOF */ -SRes SeqInStream_Read(ISeqInStream* stream, void* buf, size_t size); -SRes SeqInStream_Read2(ISeqInStream* stream, void* buf, size_t size, SRes errorType); -SRes SeqInStream_ReadByte(ISeqInStream* stream, Byte* buf); - -typedef struct -{ - size_t (*Write)(void* p, const void* buf, size_t size); - /* Returns: result - the number of actually written bytes. - (result < size) means error */ -} ISeqOutStream; - -typedef enum { - SZ_SEEK_SET = 0, - SZ_SEEK_CUR = 1, - SZ_SEEK_END = 2 -} ESzSeek; - -typedef struct -{ - SRes (*Read)(void* p, void* buf, size_t* size); /* same as ISeqInStream::Read */ - SRes (*Seek)(void* p, Int64* pos, ESzSeek origin); -} ISeekInStream; - -typedef struct -{ - SRes (*Look)(void* p, void** buf, size_t* size); - /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. - (output(*size) > input(*size)) is not allowed - (output(*size) < input(*size)) is allowed */ - SRes (*Skip)(void* p, size_t offset); - /* offset must be <= output(*size) of Look */ - - SRes (*Read)(void* p, void* buf, size_t* size); - /* reads directly (without buffer). It's same as ISeqInStream::Read */ - SRes (*Seek)(void* p, Int64* pos, ESzSeek origin); -} ILookInStream; - -SRes LookInStream_LookRead(ILookInStream* stream, void* buf, size_t* size); -SRes LookInStream_SeekTo(ILookInStream* stream, UInt64 offset); - -/* reads via ILookInStream::Read */ -SRes LookInStream_Read2(ILookInStream* stream, void* buf, size_t size, SRes errorType); -SRes LookInStream_Read(ILookInStream* stream, void* buf, size_t size); - -#define LookToRead_BUF_SIZE (1 << 14) - -typedef struct -{ - ILookInStream s; - ISeekInStream* realStream; - size_t pos; - size_t size; - Byte buf[LookToRead_BUF_SIZE]; -} CLookToRead; - -void LookToRead_CreateVTable(CLookToRead* p, int lookahead); -void LookToRead_Init(CLookToRead* p); - -typedef struct -{ - ISeqInStream s; - ILookInStream* realStream; -} CSecToLook; - -void SecToLook_CreateVTable(CSecToLook* p); - -typedef struct -{ - ISeqInStream s; - ILookInStream* realStream; -} CSecToRead; - -void SecToRead_CreateVTable(CSecToRead* p); - -typedef struct -{ - SRes (*Progress)(void* p, UInt64 inSize, UInt64 outSize); - /* Returns: result. (result != SZ_OK) means break. - Value (UInt64)(Int64)-1 for size means unknown value. */ -} ICompressProgress; - -typedef struct -{ - void* (*Alloc)(void* p, size_t size); - void (*Free)(void* p, void* address); /* address can be 0 */ -} ISzAlloc; - -#define IAlloc_Alloc(p, size) (p)->Alloc((p), size) -#define IAlloc_Free(p, a) (p)->Free((p), a) -} - -#endif +/* Types.h -- Basic types +2008-11-23 : Igor Pavlov : Public domain */ + +#ifndef __7Z_TYPES_H +#define __7Z_TYPES_H + +#include + +#if defined(_WIN32) +#include +#define COMPRESS_MF_MT +#endif + +namespace crnlib { + +#define SZ_OK 0 + +#define SZ_ERROR_DATA 1 +#define SZ_ERROR_MEM 2 +#define SZ_ERROR_CRC 3 +#define SZ_ERROR_UNSUPPORTED 4 +#define SZ_ERROR_PARAM 5 +#define SZ_ERROR_INPUT_EOF 6 +#define SZ_ERROR_OUTPUT_EOF 7 +#define SZ_ERROR_READ 8 +#define SZ_ERROR_WRITE 9 +#define SZ_ERROR_PROGRESS 10 +#define SZ_ERROR_FAIL 11 +#define SZ_ERROR_THREAD 12 + +#define SZ_ERROR_ARCHIVE 16 +#define SZ_ERROR_NO_ARCHIVE 17 + +typedef int SRes; + +#ifdef _WIN32 +typedef DWORD WRes; +#else +typedef int WRes; +#endif + +#ifndef RINOK +#define RINOK(x) \ + { \ + int __result__ = (x); \ + if (__result__ != 0) \ + return __result__; \ + } +#endif + +typedef unsigned char Byte; +typedef short Int16; +typedef unsigned short UInt16; + +#ifdef _LZMA_UINT32_IS_ULONG +typedef long Int32; +typedef unsigned long UInt32; +#else +typedef int Int32; +typedef unsigned int UInt32; +#endif + +#ifdef _SZ_NO_INT_64 + +/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. + NOTES: Some code will work incorrectly in that case! */ + +typedef long Int64; +typedef unsigned long UInt64; + +#else + +#if defined(_MSC_VER) || defined(__BORLANDC__) +typedef __int64 Int64; +typedef unsigned __int64 UInt64; +#else +typedef long long int Int64; +typedef unsigned long long int UInt64; +#endif + +#endif + +#ifdef _LZMA_NO_SYSTEM_SIZE_T +typedef UInt32 SizeT; +#else +typedef size_t SizeT; +#endif + +typedef int Bool; +#define True 1 +#define False 0 + +#ifdef _MSC_VER + +#if _MSC_VER >= 1300 +#define MY_NO_INLINE __declspec(noinline) +#else +#define MY_NO_INLINE +#endif + +#define MY_CDECL __cdecl +#define MY_STD_CALL __stdcall +#define MY_FAST_CALL MY_NO_INLINE __fastcall + +#else + +#define MY_CDECL +#define MY_STD_CALL +#define MY_FAST_CALL + +#endif + +/* The following interfaces use first parameter as pointer to structure */ + +typedef struct +{ + SRes (*Read)(void* p, void* buf, size_t* size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) < input(*size)) is allowed */ +} ISeqInStream; + +/* it can return SZ_ERROR_INPUT_EOF */ +SRes SeqInStream_Read(ISeqInStream* stream, void* buf, size_t size); +SRes SeqInStream_Read2(ISeqInStream* stream, void* buf, size_t size, SRes errorType); +SRes SeqInStream_ReadByte(ISeqInStream* stream, Byte* buf); + +typedef struct +{ + size_t (*Write)(void* p, const void* buf, size_t size); + /* Returns: result - the number of actually written bytes. + (result < size) means error */ +} ISeqOutStream; + +typedef enum { + SZ_SEEK_SET = 0, + SZ_SEEK_CUR = 1, + SZ_SEEK_END = 2 +} ESzSeek; + +typedef struct +{ + SRes (*Read)(void* p, void* buf, size_t* size); /* same as ISeqInStream::Read */ + SRes (*Seek)(void* p, Int64* pos, ESzSeek origin); +} ISeekInStream; + +typedef struct +{ + SRes (*Look)(void* p, void** buf, size_t* size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) > input(*size)) is not allowed + (output(*size) < input(*size)) is allowed */ + SRes (*Skip)(void* p, size_t offset); + /* offset must be <= output(*size) of Look */ + + SRes (*Read)(void* p, void* buf, size_t* size); + /* reads directly (without buffer). It's same as ISeqInStream::Read */ + SRes (*Seek)(void* p, Int64* pos, ESzSeek origin); +} ILookInStream; + +SRes LookInStream_LookRead(ILookInStream* stream, void* buf, size_t* size); +SRes LookInStream_SeekTo(ILookInStream* stream, UInt64 offset); + +/* reads via ILookInStream::Read */ +SRes LookInStream_Read2(ILookInStream* stream, void* buf, size_t size, SRes errorType); +SRes LookInStream_Read(ILookInStream* stream, void* buf, size_t size); + +#define LookToRead_BUF_SIZE (1 << 14) + +typedef struct +{ + ILookInStream s; + ISeekInStream* realStream; + size_t pos; + size_t size; + Byte buf[LookToRead_BUF_SIZE]; +} CLookToRead; + +void LookToRead_CreateVTable(CLookToRead* p, int lookahead); +void LookToRead_Init(CLookToRead* p); + +typedef struct +{ + ISeqInStream s; + ILookInStream* realStream; +} CSecToLook; + +void SecToLook_CreateVTable(CSecToLook* p); + +typedef struct +{ + ISeqInStream s; + ILookInStream* realStream; +} CSecToRead; + +void SecToRead_CreateVTable(CSecToRead* p); + +typedef struct +{ + SRes (*Progress)(void* p, UInt64 inSize, UInt64 outSize); + /* Returns: result. (result != SZ_OK) means break. + Value (UInt64)(Int64)-1 for size means unknown value. */ +} ICompressProgress; + +typedef struct +{ + void* (*Alloc)(void* p, size_t size); + void (*Free)(void* p, void* address); /* address can be 0 */ +} ISzAlloc; + +#define IAlloc_Alloc(p, size) (p)->Alloc((p), size) +#define IAlloc_Free(p, a) (p)->Free((p), a) +} + +#endif diff --git a/3rdparty/lzma/Linux/Threads.cpp b/3rdparty/lzma/Linux/Threads.cpp index 3adb203..ec7a42d 100644 --- a/3rdparty/lzma/Linux/Threads.cpp +++ b/3rdparty/lzma/Linux/Threads.cpp @@ -1,124 +1,124 @@ -/* Threads.c -- multithreading library -2008-08-05 -Igor Pavlov -Public domain */ - +/* Threads.c -- multithreading library +2008-08-05 +Igor Pavlov +Public domain */ + #include -#include "crn_core.h" -#include "Threads.h" - -namespace crnlib { - -static WRes GetError() { - DWORD res = GetLastError(); - return (res) ? (WRes)(res) : 1; -} - -WRes HandleToWRes(HANDLE h) { - return (h != 0) ? 0 : GetError(); -} -WRes BOOLToWRes(BOOL v) { - return v ? 0 : GetError(); -} - -static WRes MyCloseHandle(HANDLE* h) { - if (*h != NULL) - if (!CloseHandle(*h)) - return GetError(); - *h = NULL; - return 0; -} - -WRes Thread_Create(CThread* thread, THREAD_FUNC_RET_TYPE(THREAD_FUNC_CALL_TYPE* startAddress)(void*), LPVOID parameter) { - unsigned threadId; /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ - thread->handle = - /* CreateThread(0, 0, startAddress, parameter, 0, &threadId); */ - (HANDLE)_beginthreadex(NULL, 0, startAddress, parameter, 0, &threadId); - /* maybe we must use errno here, but probably GetLastError() is also OK. */ - return HandleToWRes(thread->handle); -} - -WRes WaitObject(HANDLE h) { - return (WRes)WaitForSingleObject(h, INFINITE); -} - -WRes Thread_Wait(CThread* thread) { - if (thread->handle == NULL) - return 1; - return WaitObject(thread->handle); -} - -WRes Thread_Close(CThread* thread) { - return MyCloseHandle(&thread->handle); -} - -WRes Event_Create(CEvent* p, BOOL manualReset, int initialSignaled) { - p->handle = CreateEvent(NULL, manualReset, (initialSignaled ? TRUE : FALSE), NULL); - return HandleToWRes(p->handle); -} - -WRes ManualResetEvent_Create(CManualResetEvent* p, int initialSignaled) { - return Event_Create(p, TRUE, initialSignaled); -} -WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent* p) { - return ManualResetEvent_Create(p, 0); -} - -WRes AutoResetEvent_Create(CAutoResetEvent* p, int initialSignaled) { - return Event_Create(p, FALSE, initialSignaled); -} -WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent* p) { - return AutoResetEvent_Create(p, 0); -} - -WRes Event_Set(CEvent* p) { - return BOOLToWRes(SetEvent(p->handle)); -} -WRes Event_Reset(CEvent* p) { - return BOOLToWRes(ResetEvent(p->handle)); -} -WRes Event_Wait(CEvent* p) { - return WaitObject(p->handle); -} -WRes Event_Close(CEvent* p) { - return MyCloseHandle(&p->handle); -} - -WRes Semaphore_Create(CSemaphore* p, UInt32 initiallyCount, UInt32 maxCount) { - p->handle = CreateSemaphore(NULL, (LONG)initiallyCount, (LONG)maxCount, NULL); - return HandleToWRes(p->handle); -} - -WRes Semaphore_Release(CSemaphore* p, LONG releaseCount, LONG* previousCount) { - return BOOLToWRes(ReleaseSemaphore(p->handle, releaseCount, previousCount)); -} -WRes Semaphore_ReleaseN(CSemaphore* p, UInt32 releaseCount) { - return Semaphore_Release(p, (LONG)releaseCount, NULL); -} -WRes Semaphore_Release1(CSemaphore* p) { - return Semaphore_ReleaseN(p, 1); -} - -WRes Semaphore_Wait(CSemaphore* p) { - return WaitObject(p->handle); -} -WRes Semaphore_Close(CSemaphore* p) { - return MyCloseHandle(&p->handle); -} - -WRes CriticalSection_Init(CCriticalSection* p) { -#ifdef _MSC_VER - /* InitializeCriticalSection can raise only STATUS_NO_MEMORY exception */ - __try { - InitializeCriticalSection(p); - /* InitializeCriticalSectionAndSpinCount(p, 0); */ - } __except (EXCEPTION_EXECUTE_HANDLER) { - return 1; - } -#else - InitializeCriticalSection(p); -#endif - return 0; -} -} +#include "crn_core.h" +#include "Threads.h" + +namespace crnlib { + +static WRes GetError() { + DWORD res = GetLastError(); + return (res) ? (WRes)(res) : 1; +} + +WRes HandleToWRes(HANDLE h) { + return (h != 0) ? 0 : GetError(); +} +WRes BOOLToWRes(BOOL v) { + return v ? 0 : GetError(); +} + +static WRes MyCloseHandle(HANDLE* h) { + if (*h != NULL) + if (!CloseHandle(*h)) + return GetError(); + *h = NULL; + return 0; +} + +WRes Thread_Create(CThread* thread, THREAD_FUNC_RET_TYPE(THREAD_FUNC_CALL_TYPE* startAddress)(void*), LPVOID parameter) { + unsigned threadId; /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ + thread->handle = + /* CreateThread(0, 0, startAddress, parameter, 0, &threadId); */ + (HANDLE)_beginthreadex(NULL, 0, startAddress, parameter, 0, &threadId); + /* maybe we must use errno here, but probably GetLastError() is also OK. */ + return HandleToWRes(thread->handle); +} + +WRes WaitObject(HANDLE h) { + return (WRes)WaitForSingleObject(h, INFINITE); +} + +WRes Thread_Wait(CThread* thread) { + if (thread->handle == NULL) + return 1; + return WaitObject(thread->handle); +} + +WRes Thread_Close(CThread* thread) { + return MyCloseHandle(&thread->handle); +} + +WRes Event_Create(CEvent* p, BOOL manualReset, int initialSignaled) { + p->handle = CreateEvent(NULL, manualReset, (initialSignaled ? TRUE : FALSE), NULL); + return HandleToWRes(p->handle); +} + +WRes ManualResetEvent_Create(CManualResetEvent* p, int initialSignaled) { + return Event_Create(p, TRUE, initialSignaled); +} +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent* p) { + return ManualResetEvent_Create(p, 0); +} + +WRes AutoResetEvent_Create(CAutoResetEvent* p, int initialSignaled) { + return Event_Create(p, FALSE, initialSignaled); +} +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent* p) { + return AutoResetEvent_Create(p, 0); +} + +WRes Event_Set(CEvent* p) { + return BOOLToWRes(SetEvent(p->handle)); +} +WRes Event_Reset(CEvent* p) { + return BOOLToWRes(ResetEvent(p->handle)); +} +WRes Event_Wait(CEvent* p) { + return WaitObject(p->handle); +} +WRes Event_Close(CEvent* p) { + return MyCloseHandle(&p->handle); +} + +WRes Semaphore_Create(CSemaphore* p, UInt32 initiallyCount, UInt32 maxCount) { + p->handle = CreateSemaphore(NULL, (LONG)initiallyCount, (LONG)maxCount, NULL); + return HandleToWRes(p->handle); +} + +WRes Semaphore_Release(CSemaphore* p, LONG releaseCount, LONG* previousCount) { + return BOOLToWRes(ReleaseSemaphore(p->handle, releaseCount, previousCount)); +} +WRes Semaphore_ReleaseN(CSemaphore* p, UInt32 releaseCount) { + return Semaphore_Release(p, (LONG)releaseCount, NULL); +} +WRes Semaphore_Release1(CSemaphore* p) { + return Semaphore_ReleaseN(p, 1); +} + +WRes Semaphore_Wait(CSemaphore* p) { + return WaitObject(p->handle); +} +WRes Semaphore_Close(CSemaphore* p) { + return MyCloseHandle(&p->handle); +} + +WRes CriticalSection_Init(CCriticalSection* p) { +#ifdef _MSC_VER + /* InitializeCriticalSection can raise only STATUS_NO_MEMORY exception */ + __try { + InitializeCriticalSection(p); + /* InitializeCriticalSectionAndSpinCount(p, 0); */ + } __except (EXCEPTION_EXECUTE_HANDLER) { + return 1; + } +#else + InitializeCriticalSection(p); +#endif + return 0; +} +} diff --git a/3rdparty/lzma/Linux/Threads.h b/3rdparty/lzma/Linux/Threads.h index 8db3c23..7c1ec0f 100644 --- a/3rdparty/lzma/Linux/Threads.h +++ b/3rdparty/lzma/Linux/Threads.h @@ -1,65 +1,65 @@ -/* Threads.h -- multithreading library -2008-11-22 : Igor Pavlov : Public domain */ - -#ifndef __7Z_THRESDS_H -#define __7Z_THRESDS_H - -#include "LzmaTypes.h" - -namespace crnlib { - -typedef struct _CThread { - HANDLE handle; -} CThread; - -#define Thread_Construct(thread) (thread)->handle = NULL -#define Thread_WasCreated(thread) ((thread)->handle != NULL) - -typedef unsigned THREAD_FUNC_RET_TYPE; -#define THREAD_FUNC_CALL_TYPE MY_STD_CALL -#define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE - -WRes Thread_Create(CThread* thread, THREAD_FUNC_RET_TYPE(THREAD_FUNC_CALL_TYPE* startAddress)(void*), LPVOID parameter); -WRes Thread_Wait(CThread* thread); -WRes Thread_Close(CThread* thread); - -typedef struct _CEvent { - HANDLE handle; -} CEvent; - -typedef CEvent CAutoResetEvent; -typedef CEvent CManualResetEvent; - -#define Event_Construct(event) (event)->handle = NULL -#define Event_IsCreated(event) ((event)->handle != NULL) - -WRes ManualResetEvent_Create(CManualResetEvent* event, int initialSignaled); -WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent* event); -WRes AutoResetEvent_Create(CAutoResetEvent* event, int initialSignaled); -WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent* event); -WRes Event_Set(CEvent* event); -WRes Event_Reset(CEvent* event); -WRes Event_Wait(CEvent* event); -WRes Event_Close(CEvent* event); - -typedef struct _CSemaphore { - HANDLE handle; -} CSemaphore; - -#define Semaphore_Construct(p) (p)->handle = NULL - -WRes Semaphore_Create(CSemaphore* p, UInt32 initiallyCount, UInt32 maxCount); -WRes Semaphore_ReleaseN(CSemaphore* p, UInt32 num); -WRes Semaphore_Release1(CSemaphore* p); -WRes Semaphore_Wait(CSemaphore* p); -WRes Semaphore_Close(CSemaphore* p); - -typedef CRITICAL_SECTION CCriticalSection; - -WRes CriticalSection_Init(CCriticalSection* p); -#define CriticalSection_Delete(p) DeleteCriticalSection(p) -#define CriticalSection_Enter(p) EnterCriticalSection(p) -#define CriticalSection_Leave(p) LeaveCriticalSection(p) -} - -#endif +/* Threads.h -- multithreading library +2008-11-22 : Igor Pavlov : Public domain */ + +#ifndef __7Z_THRESDS_H +#define __7Z_THRESDS_H + +#include "LzmaTypes.h" + +namespace crnlib { + +typedef struct _CThread { + HANDLE handle; +} CThread; + +#define Thread_Construct(thread) (thread)->handle = NULL +#define Thread_WasCreated(thread) ((thread)->handle != NULL) + +typedef unsigned THREAD_FUNC_RET_TYPE; +#define THREAD_FUNC_CALL_TYPE MY_STD_CALL +#define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE + +WRes Thread_Create(CThread* thread, THREAD_FUNC_RET_TYPE(THREAD_FUNC_CALL_TYPE* startAddress)(void*), LPVOID parameter); +WRes Thread_Wait(CThread* thread); +WRes Thread_Close(CThread* thread); + +typedef struct _CEvent { + HANDLE handle; +} CEvent; + +typedef CEvent CAutoResetEvent; +typedef CEvent CManualResetEvent; + +#define Event_Construct(event) (event)->handle = NULL +#define Event_IsCreated(event) ((event)->handle != NULL) + +WRes ManualResetEvent_Create(CManualResetEvent* event, int initialSignaled); +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent* event); +WRes AutoResetEvent_Create(CAutoResetEvent* event, int initialSignaled); +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent* event); +WRes Event_Set(CEvent* event); +WRes Event_Reset(CEvent* event); +WRes Event_Wait(CEvent* event); +WRes Event_Close(CEvent* event); + +typedef struct _CSemaphore { + HANDLE handle; +} CSemaphore; + +#define Semaphore_Construct(p) (p)->handle = NULL + +WRes Semaphore_Create(CSemaphore* p, UInt32 initiallyCount, UInt32 maxCount); +WRes Semaphore_ReleaseN(CSemaphore* p, UInt32 num); +WRes Semaphore_Release1(CSemaphore* p); +WRes Semaphore_Wait(CSemaphore* p); +WRes Semaphore_Close(CSemaphore* p); + +typedef CRITICAL_SECTION CCriticalSection; + +WRes CriticalSection_Init(CCriticalSection* p); +#define CriticalSection_Delete(p) DeleteCriticalSection(p) +#define CriticalSection_Enter(p) EnterCriticalSection(p) +#define CriticalSection_Leave(p) LeaveCriticalSection(p) +} + +#endif diff --git a/3rdparty/miniz/LICENSE b/3rdparty/miniz/LICENSE index b6ff45a..1982f4b 100644 --- a/3rdparty/miniz/LICENSE +++ b/3rdparty/miniz/LICENSE @@ -1,22 +1,22 @@ -Copyright 2013-2014 RAD Game Tools and Valve Software -Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC - -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. +Copyright 2013-2014 RAD Game Tools and Valve Software +Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/crnlib/crn_arealist.cpp b/crnlib/crn_arealist.cpp index fc05372..237349e 100644 --- a/crnlib/crn_arealist.cpp +++ b/crnlib/crn_arealist.cpp @@ -1,6 +1,26 @@ -// File: crn_arealist.cpp - 2D shape algebra (currently unused) -// See Copyright Notice and license at the end of inc/crnlib.h -// Ported from the PowerView DOS image viewer, a product I wrote back in 1993. Not currently used in the open source release of crnlib. +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_arealist.h" @@ -8,7 +28,6 @@ namespace crnlib { - static void area_fatal_error(const char*, const char* pMsg, ...) { va_list args; @@ -28,7 +47,7 @@ namespace crnlib static Area* delete_area(Area_List* Plist, Area* Parea) { - Area* p, * q; + Area *p, *q; #ifdef RECT_DEBUG if ((Parea == Plist->Phead) || (Parea == Plist->Ptail)) @@ -71,7 +90,7 @@ namespace crnlib static Area* insert_area_before(Area_List* Plist, Area* Parea, int x1, int y1, int x2, int y2) { - Area* p, * Pnew_area = alloc_area(Plist); + Area *p, *Pnew_area = alloc_area(Plist); p = Parea->Pprev; @@ -92,7 +111,7 @@ namespace crnlib static Area* insert_area_after(Area_List* Plist, Area* Parea, int x1, int y1, int x2, int y2) { - Area* p, * Pnew_area = alloc_area(Plist); + Area *p, *Pnew_area = alloc_area(Plist); p = Parea->Pnext; @@ -180,8 +199,12 @@ namespace crnlib for (i = 0; i < Plist->total_areas; i++) { - Pnew_list->Phead[i].Pnext = (Plist->Phead[i].Pnext == nullptr) ? nullptr : (Plist->Phead[i].Pnext - Plist->Phead) + Pnew_list->Phead; - Pnew_list->Phead[i].Pprev = (Plist->Phead[i].Pprev == nullptr) ? nullptr : (Plist->Phead[i].Pprev - Plist->Phead) + Pnew_list->Phead; + Pnew_list->Phead[i].Pnext = (Plist->Phead[i].Pnext == nullptr) + ? nullptr + : (Plist->Phead[i].Pnext - Plist->Phead) + Pnew_list->Phead; + Pnew_list->Phead[i].Pprev = (Plist->Phead[i].Pprev == nullptr) + ? nullptr + : (Plist->Phead[i].Pprev - Plist->Phead) + Pnew_list->Phead; Pnew_list->Phead[i].x1 += x_ofs; Pnew_list->Phead[i].y1 += y_ofs; @@ -226,8 +249,12 @@ namespace crnlib { for (i = 0; i < Psrc_list->total_areas; i++) { - Pdst_list->Phead[i].Pnext = (Psrc_list->Phead[i].Pnext == nullptr) ? nullptr : (Psrc_list->Phead[i].Pnext - Psrc_list->Phead) + Pdst_list->Phead; - Pdst_list->Phead[i].Pprev = (Psrc_list->Phead[i].Pprev == nullptr) ? nullptr : (Psrc_list->Phead[i].Pprev - Psrc_list->Phead) + Pdst_list->Phead; + Pdst_list->Phead[i].Pnext = (Psrc_list->Phead[i].Pnext == nullptr) + ? nullptr + : (Psrc_list->Phead[i].Pnext - Psrc_list->Phead) + Pdst_list->Phead; + Pdst_list->Phead[i].Pprev = (Psrc_list->Phead[i].Pprev == nullptr) + ? nullptr + : (Psrc_list->Phead[i].Pprev - Psrc_list->Phead) + Pdst_list->Phead; Pdst_list->Phead[i].x1 += x_ofs; Pdst_list->Phead[i].y1 += y_ofs; @@ -239,8 +266,12 @@ namespace crnlib { for (i = 0; i < Psrc_list->total_areas; i++) { - Pdst_list->Phead[i].Pnext = (Psrc_list->Phead[i].Pnext == nullptr) ? nullptr : (Psrc_list->Phead[i].Pnext - Psrc_list->Phead) + Pdst_list->Phead; - Pdst_list->Phead[i].Pprev = (Psrc_list->Phead[i].Pprev == nullptr) ? nullptr : (Psrc_list->Phead[i].Pprev - Psrc_list->Phead) + Pdst_list->Phead; + Pdst_list->Phead[i].Pnext = (Psrc_list->Phead[i].Pnext == nullptr) + ? nullptr + : (Psrc_list->Phead[i].Pnext - Psrc_list->Phead) + Pdst_list->Phead; + Pdst_list->Phead[i].Pprev = (Psrc_list->Phead[i].Pprev == nullptr) + ? nullptr + : (Psrc_list->Phead[i].Pprev - Psrc_list->Phead) + Pdst_list->Phead; } } } @@ -484,7 +515,8 @@ namespace crnlib if ((y2 == Parea->y1 - 1) || (y1 == Parea->y2 + 1)) { delete_area(Plist, Parea); - Area_List_insert(Plist, x1, math::minimum(y1, Parea->y1), x2, math::maximum(y2, Parea->y2), CRNLIB_TRUE); + Area_List_insert(Plist, x1, math::minimum(y1, Parea->y1), x2, math::maximum(y2, Parea->y2), + CRNLIB_TRUE); return; } } @@ -493,7 +525,8 @@ namespace crnlib if ((x2 == Parea->x1 - 1) || (x1 == Parea->x2 + 1)) { delete_area(Plist, Parea); - Area_List_insert(Plist, math::minimum(x1, Parea->x1), y1, math::maximum(x2, Parea->x2), y2, CRNLIB_TRUE); + Area_List_insert(Plist, math::minimum(x1, Parea->x1), y1, math::maximum(x2, Parea->x2), y2, + CRNLIB_TRUE); return; } } @@ -669,5 +702,4 @@ namespace crnlib return Pnew_list; } - -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_arealist.h b/crnlib/crn_arealist.h index 317d7b9..773f4b7 100644 --- a/crnlib/crn_arealist.h +++ b/crnlib/crn_arealist.h @@ -1,12 +1,34 @@ -// File: crn_arealist.h - 2D shape algebra -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_export.h" namespace crnlib { - struct Area { + struct Area + { struct Area *Pprev, *Pnext; int x1, y1, x2, y2; diff --git a/crnlib/crn_assert.cpp b/crnlib/crn_assert.cpp index 686dcb7..a4991f4 100644 --- a/crnlib/crn_assert.cpp +++ b/crnlib/crn_assert.cpp @@ -1,5 +1,25 @@ -// File: crn_assert.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" diff --git a/crnlib/crn_assert.h b/crnlib/crn_assert.h index d6cfa14..780d21f 100644 --- a/crnlib/crn_assert.h +++ b/crnlib/crn_assert.h @@ -1,5 +1,26 @@ -// File: crn_assert.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_export.h" @@ -20,10 +41,11 @@ CRN_EXPORT void crnlib_fail(const char* pExp, const char* pFile, unsigned line); #define CRNLIB_VERIFY(_exp) (void)((!!(_exp)) || (crnlib_assert(#_exp, __FILE__, __LINE__), 0)) -#define CRNLIB_FAIL(msg) \ - do { \ - crnlib_fail(#msg, __FILE__, __LINE__); \ - } while (0) +#define CRNLIB_FAIL(msg) \ + do \ + { \ + crnlib_fail(#msg, __FILE__, __LINE__); \ + } while (0) #define CRNLIB_ASSERT_OPEN_RANGE(x, l, h) CRNLIB_ASSERT((x >= l) && (x < h)) #define CRNLIB_ASSERT_CLOSED_RANGE(x, l, h) CRNLIB_ASSERT((x >= l) && (x <= h)) @@ -32,14 +54,20 @@ CRN_EXPORT void trace(const char* pFmt, va_list args); CRN_EXPORT void trace(const char* pFmt, ...); // Borrowed from boost libraries. -template +template struct crnlib_assume_failure; -template <> -struct crnlib_assume_failure { - enum { blah = 1 }; +template<> +struct crnlib_assume_failure +{ + enum + { + blah = 1 + }; +}; +template +struct crnlib_assume_try +{ }; -template -struct crnlib_assume_try {}; #define CRNLIB_JOINER_FINAL(a, b) a##b #define CRNLIB_JOINER(a, b) CRNLIB_JOINER_FINAL(a, b) @@ -47,23 +75,27 @@ struct crnlib_assume_try {}; #define CRNLIB_ASSUME(p) typedef crnlib_assume_try)> CRNLIB_JOIN(crnlib_assume_typedef, __COUNTER__) #ifdef NDEBUG -template -inline T crnlib_assert_range(T i, T) { - return i; +template +inline T crnlib_assert_range(T i, T) +{ + return i; } -template -inline T crnlib_assert_range_incl(T i, T) { - return i; +template +inline T crnlib_assert_range_incl(T i, T) +{ + return i; } #else -template -inline T crnlib_assert_range(T i, T m) { - CRNLIB_ASSERT((i >= 0) && (i < m)); - return i; +template +inline T crnlib_assert_range(T i, T m) +{ + CRNLIB_ASSERT((i >= 0) && (i < m)); + return i; } -template -inline T crnlib_assert_range_incl(T i, T m) { - CRNLIB_ASSERT((i >= 0) && (i <= m)); - return i; +template +inline T crnlib_assert_range_incl(T i, T m) +{ + CRNLIB_ASSERT((i >= 0) && (i <= m)); + return i; } #endif diff --git a/crnlib/crn_atomics.h b/crnlib/crn_atomics.h index c97c9d2..3f6734d 100644 --- a/crnlib/crn_atomics.h +++ b/crnlib/crn_atomics.h @@ -1,4 +1,26 @@ -// File: crn_atomics.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #ifndef CRN_ATOMICS_H #define CRN_ATOMICS_H @@ -11,16 +33,18 @@ #endif #if defined(__GNUC__) && CRNLIB_PLATFORM_PC -extern __inline__ __attribute__((__always_inline__, __gnu_inline__)) void crnlib_yield_processor() { - __asm__ __volatile__("pause"); +extern __inline__ __attribute__((__always_inline__, __gnu_inline__)) void crnlib_yield_processor() +{ + __asm__ __volatile__("pause"); } #else -CRN_FORCE_INLINE void crnlib_yield_processor() { +CRN_FORCE_INLINE void crnlib_yield_processor() +{ #if CRNLIB_USE_MSVC_INTRINSICS #if CRNLIB_PLATFORM_PC_X64 - _mm_pause(); + _mm_pause(); #else - YieldProcessor(); + YieldProcessor(); #endif #else // No implementation @@ -33,152 +57,174 @@ extern "C" __int64 _InterlockedCompareExchange64(__int64 volatile* Destination, #if defined(_MSC_VER) #pragma intrinsic(_InterlockedCompareExchange64) #endif -#endif // CRNLIB_USE_WIN32_ATOMIC_FUNCTIONS +#endif // CRNLIB_USE_WIN32_ATOMIC_FUNCTIONS -namespace crnlib { +namespace crnlib +{ #if CRNLIB_USE_WIN32_ATOMIC_FUNCTIONS -typedef LONG atomic32_t; -typedef LONGLONG atomic64_t; - -// Returns the original value. -inline atomic32_t atomic_compare_exchange32(atomic32_t volatile* pDest, atomic32_t exchange, atomic32_t comparand) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - return InterlockedCompareExchange(pDest, exchange, comparand); -} - -// Returns the original value. -inline atomic64_t atomic_compare_exchange64(atomic64_t volatile* pDest, atomic64_t exchange, atomic64_t comparand) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 7) == 0); - return _InterlockedCompareExchange64(pDest, exchange, comparand); -} - -// Returns the resulting incremented value. -inline atomic32_t atomic_increment32(atomic32_t volatile* pDest) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - return InterlockedIncrement(pDest); -} - -// Returns the resulting decremented value. -inline atomic32_t atomic_decrement32(atomic32_t volatile* pDest) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - return InterlockedDecrement(pDest); -} - -// Returns the original value. -inline atomic32_t atomic_exchange32(atomic32_t volatile* pDest, atomic32_t val) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - return InterlockedExchange(pDest, val); -} - -// Returns the resulting value. -inline atomic32_t atomic_add32(atomic32_t volatile* pDest, atomic32_t val) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - return InterlockedExchangeAdd(pDest, val) + val; -} - -// Returns the original value. -inline atomic32_t atomic_exchange_add32(atomic32_t volatile* pDest, atomic32_t val) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - return InterlockedExchangeAdd(pDest, val); -} + typedef LONG atomic32_t; + typedef LONGLONG atomic64_t; + + // Returns the original value. + inline atomic32_t atomic_compare_exchange32(atomic32_t volatile* pDest, atomic32_t exchange, atomic32_t comparand) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return InterlockedCompareExchange(pDest, exchange, comparand); + } + + // Returns the original value. + inline atomic64_t atomic_compare_exchange64(atomic64_t volatile* pDest, atomic64_t exchange, atomic64_t comparand) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 7) == 0); + return _InterlockedCompareExchange64(pDest, exchange, comparand); + } + + // Returns the resulting incremented value. + inline atomic32_t atomic_increment32(atomic32_t volatile* pDest) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return InterlockedIncrement(pDest); + } + + // Returns the resulting decremented value. + inline atomic32_t atomic_decrement32(atomic32_t volatile* pDest) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return InterlockedDecrement(pDest); + } + + // Returns the original value. + inline atomic32_t atomic_exchange32(atomic32_t volatile* pDest, atomic32_t val) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return InterlockedExchange(pDest, val); + } + + // Returns the resulting value. + inline atomic32_t atomic_add32(atomic32_t volatile* pDest, atomic32_t val) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return InterlockedExchangeAdd(pDest, val) + val; + } + + // Returns the original value. + inline atomic32_t atomic_exchange_add32(atomic32_t volatile* pDest, atomic32_t val) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return InterlockedExchangeAdd(pDest, val); + } #elif CRNLIB_USE_GCC_ATOMIC_BUILTINS -typedef long atomic32_t; -typedef long long atomic64_t; - -// Returns the original value. -inline atomic32_t atomic_compare_exchange32(atomic32_t volatile* pDest, atomic32_t exchange, atomic32_t comparand) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - return __sync_val_compare_and_swap(pDest, comparand, exchange); -} - -// Returns the original value. -inline atomic64_t atomic_compare_exchange64(atomic64_t volatile* pDest, atomic64_t exchange, atomic64_t comparand) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 7) == 0); - return __sync_val_compare_and_swap(pDest, comparand, exchange); -} - -// Returns the resulting incremented value. -inline atomic32_t atomic_increment32(atomic32_t volatile* pDest) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - return __sync_add_and_fetch(pDest, 1); -} - -// Returns the resulting decremented value. -inline atomic32_t atomic_decrement32(atomic32_t volatile* pDest) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - return __sync_sub_and_fetch(pDest, 1); -} - -// Returns the original value. -inline atomic32_t atomic_exchange32(atomic32_t volatile* pDest, atomic32_t val) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - return __sync_lock_test_and_set(pDest, val); -} - -// Returns the resulting value. -inline atomic32_t atomic_add32(atomic32_t volatile* pDest, atomic32_t val) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - return __sync_add_and_fetch(pDest, val); -} - -// Returns the original value. -inline atomic32_t atomic_exchange_add32(atomic32_t volatile* pDest, atomic32_t val) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - return __sync_fetch_and_add(pDest, val); -} + typedef long atomic32_t; + typedef long long atomic64_t; + + // Returns the original value. + inline atomic32_t atomic_compare_exchange32(atomic32_t volatile* pDest, atomic32_t exchange, atomic32_t comparand) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return __sync_val_compare_and_swap(pDest, comparand, exchange); + } + + // Returns the original value. + inline atomic64_t atomic_compare_exchange64(atomic64_t volatile* pDest, atomic64_t exchange, atomic64_t comparand) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 7) == 0); + return __sync_val_compare_and_swap(pDest, comparand, exchange); + } + + // Returns the resulting incremented value. + inline atomic32_t atomic_increment32(atomic32_t volatile* pDest) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return __sync_add_and_fetch(pDest, 1); + } + + // Returns the resulting decremented value. + inline atomic32_t atomic_decrement32(atomic32_t volatile* pDest) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return __sync_sub_and_fetch(pDest, 1); + } + + // Returns the original value. + inline atomic32_t atomic_exchange32(atomic32_t volatile* pDest, atomic32_t val) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return __sync_lock_test_and_set(pDest, val); + } + + // Returns the resulting value. + inline atomic32_t atomic_add32(atomic32_t volatile* pDest, atomic32_t val) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return __sync_add_and_fetch(pDest, val); + } + + // Returns the original value. + inline atomic32_t atomic_exchange_add32(atomic32_t volatile* pDest, atomic32_t val) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return __sync_fetch_and_add(pDest, val); + } #else #define CRNLIB_NO_ATOMICS 1 -// Atomic ops not supported - but try to do something reasonable. Assumes no threading at all. -typedef long atomic32_t; -typedef long long atomic64_t; - -inline atomic32_t atomic_compare_exchange32(atomic32_t volatile* pDest, atomic32_t exchange, atomic32_t comparand) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - atomic32_t cur = *pDest; - if (cur == comparand) - *pDest = exchange; - return cur; -} - -inline atomic64_t atomic_compare_exchange64(atomic64_t volatile* pDest, atomic64_t exchange, atomic64_t comparand) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 7) == 0); - atomic64_t cur = *pDest; - if (cur == comparand) - *pDest = exchange; - return cur; -} - -inline atomic32_t atomic_increment32(atomic32_t volatile* pDest) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - return (*pDest += 1); -} - -inline atomic32_t atomic_decrement32(atomic32_t volatile* pDest) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - return (*pDest -= 1); -} - -inline atomic32_t atomic_exchange32(atomic32_t volatile* pDest, atomic32_t val) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - atomic32_t cur = *pDest; - *pDest = val; - return cur; -} - -inline atomic32_t atomic_add32(atomic32_t volatile* pDest, atomic32_t val) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - return (*pDest += val); -} - -inline atomic32_t atomic_exchange_add32(atomic32_t volatile* pDest, atomic32_t val) { - CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); - atomic32_t cur = *pDest; - *pDest += val; - return cur; -} + // Atomic ops not supported - but try to do something reasonable. Assumes no threading at all. + typedef long atomic32_t; + typedef long long atomic64_t; + + inline atomic32_t atomic_compare_exchange32(atomic32_t volatile* pDest, atomic32_t exchange, atomic32_t comparand) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + atomic32_t cur = *pDest; + if (cur == comparand) + *pDest = exchange; + return cur; + } + + inline atomic64_t atomic_compare_exchange64(atomic64_t volatile* pDest, atomic64_t exchange, atomic64_t comparand) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 7) == 0); + atomic64_t cur = *pDest; + if (cur == comparand) + *pDest = exchange; + return cur; + } + + inline atomic32_t atomic_increment32(atomic32_t volatile* pDest) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return (*pDest += 1); + } + + inline atomic32_t atomic_decrement32(atomic32_t volatile* pDest) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return (*pDest -= 1); + } + + inline atomic32_t atomic_exchange32(atomic32_t volatile* pDest, atomic32_t val) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + atomic32_t cur = *pDest; + *pDest = val; + return cur; + } + + inline atomic32_t atomic_add32(atomic32_t volatile* pDest, atomic32_t val) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + return (*pDest += val); + } + + inline atomic32_t atomic_exchange_add32(atomic32_t volatile* pDest, atomic32_t val) + { + CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); + atomic32_t cur = *pDest; + *pDest += val; + return cur; + } #endif -} // namespace crnlib +} // namespace crnlib -#endif // CRN_ATOMICS_H +#endif // CRN_ATOMICS_H diff --git a/crnlib/crn_buffer_stream.h b/crnlib/crn_buffer_stream.h index 0ad5724..6889939 100644 --- a/crnlib/crn_buffer_stream.h +++ b/crnlib/crn_buffer_stream.h @@ -1,6 +1,28 @@ -// File: crn_buffer_stream.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once + #include "crn_data_stream.h" namespace crnlib @@ -8,14 +30,16 @@ namespace crnlib class buffer_stream : public data_stream { public: - buffer_stream(): data_stream(), + buffer_stream() : + data_stream(), m_pBuf(nullptr), m_size(0), m_ofs(0) { } - buffer_stream(void* p, uint size): data_stream(), + buffer_stream(void* p, uint size) : + data_stream(), m_pBuf(nullptr), m_size(0), m_ofs(0) @@ -23,7 +47,8 @@ namespace crnlib open(p, size); } - buffer_stream(const void* p, uint size): data_stream(), + buffer_stream(const void* p, uint size) : + data_stream(), m_pBuf(nullptr), m_size(0), m_ofs(0) @@ -151,7 +176,8 @@ namespace crnlib return len; } - virtual bool flush() { + virtual bool flush() + { if (!m_opened) { return false; @@ -160,7 +186,8 @@ namespace crnlib return true; } - virtual uint64 get_size() { + virtual uint64 get_size() + { if (!m_opened) { return 0; @@ -169,7 +196,8 @@ namespace crnlib return m_size; } - virtual uint64 get_remaining() { + virtual uint64 get_remaining() + { if (!m_opened) { return 0; @@ -180,7 +208,8 @@ namespace crnlib return m_size - m_ofs; } - virtual uint64 get_ofs() { + virtual uint64 get_ofs() + { if (!m_opened) { return 0; @@ -189,7 +218,8 @@ namespace crnlib return m_ofs; } - virtual bool seek(int64 ofs, bool relative) { + virtual bool seek(int64 ofs, bool relative) + { if ((!m_opened) || (!is_seekable())) { return false; @@ -219,4 +249,4 @@ namespace crnlib uint m_ofs; }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_cfile_stream.h b/crnlib/crn_cfile_stream.h index 4c9cc61..730c0fc 100644 --- a/crnlib/crn_cfile_stream.h +++ b/crnlib/crn_cfile_stream.h @@ -1,5 +1,26 @@ -// File: crn_cfile_stream.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_data_stream.h" @@ -9,7 +30,8 @@ namespace crnlib class cfile_stream : public data_stream { public: - cfile_stream(): data_stream(), + cfile_stream() : + data_stream(), m_pFile(nullptr), m_size(0), m_ofs(0), @@ -17,7 +39,8 @@ namespace crnlib { } - cfile_stream(FILE* pFile, const char* pFilename, uint attribs, bool has_ownership): data_stream(), + cfile_stream(FILE* pFile, const char* pFilename, uint attribs, bool has_ownership) : + data_stream(), m_pFile(nullptr), m_size(0), m_ofs(0), @@ -26,7 +49,8 @@ namespace crnlib open(pFile, pFilename, attribs, has_ownership); } - cfile_stream(const char* pFilename, uint attribs = cDataStreamReadable | cDataStreamSeekable, bool open_existing = false): data_stream(), + cfile_stream(const char* pFilename, uint attribs = cDataStreamReadable | cDataStreamSeekable, bool open_existing = false) : + data_stream(), m_pFile(nullptr), m_size(0), m_ofs(0), @@ -281,4 +305,4 @@ namespace crnlib uint64 m_size, m_ofs; bool m_has_ownership; }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_checksum.cpp b/crnlib/crn_checksum.cpp index fe774a1..f34ede4 100644 --- a/crnlib/crn_checksum.cpp +++ b/crnlib/crn_checksum.cpp @@ -1,4 +1,26 @@ -// File: crn_checksum.cpp +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" namespace crnlib @@ -63,4 +85,4 @@ namespace crnlib return static_cast(~crc); } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_checksum.h b/crnlib/crn_checksum.h index 9179284..78ac585 100644 --- a/crnlib/crn_checksum.h +++ b/crnlib/crn_checksum.h @@ -1,4 +1,26 @@ -// File: crn_checksum.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_export.h" @@ -12,4 +34,4 @@ namespace crnlib const uint cInitCRC16 = 0; CRN_EXPORT uint16 crc16(const void* pBuf, size_t len, uint16 crc = cInitCRC16); -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_clusterizer.h b/crnlib/crn_clusterizer.h index c1f92db..6d2d04d 100644 --- a/crnlib/crn_clusterizer.h +++ b/crnlib/crn_clusterizer.h @@ -1,15 +1,37 @@ -// File: crn_clusterizer.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once + #include "crn_matrix.h" namespace crnlib { - template + template class clusterizer { public: - clusterizer(): + clusterizer() : m_overall_variance(0.0f), m_split_index(0), m_heap_size(0), @@ -94,7 +116,9 @@ namespace crnlib m_heap[1] = m_heap[m_heap_size]; m_heap_size--; if (m_heap_size) + { down_heap(1); + } split_node(worst_node_index); total_leaves++; @@ -252,7 +276,8 @@ namespace crnlib } } - uint find_best_codebook_entry_fs(const VectorType& v) const { + uint find_best_codebook_entry_fs(const VectorType& v) const + { float best_dist = math::cNearlyInfinite; uint best_index = 0; @@ -273,7 +298,7 @@ namespace crnlib return best_index; } - void retrieve_clusters(uint max_clusters, crnlib::vector >& clusters) const + void retrieve_clusters(uint max_clusters, crnlib::vector>& clusters) const { clusters.resize(0); clusters.reserve(max_clusters); @@ -311,7 +336,7 @@ namespace crnlib struct vq_node { - vq_node(): + vq_node() : m_centroid(cClear), m_total_weight(0), m_left(-1), @@ -500,12 +525,13 @@ namespace crnlib } } - VectorType axis; //(1.0f); + VectorType axis; //(1.0f); if (N == 1) { axis.set(1.0f); } - else { + else + { for (uint i = 0; i < N; i++) { axis[i] = math::lerp(.75f, 1.25f, i * (1.0f / math::maximum(N - 1, 1))); @@ -846,4 +872,4 @@ namespace crnlib } } }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_color.h b/crnlib/crn_color.h index 2536424..456543c 100644 --- a/crnlib/crn_color.h +++ b/crnlib/crn_color.h @@ -1,5 +1,25 @@ -// File: crn_color.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -7,7 +27,7 @@ namespace crnlib { - template + template struct color_quad_component_traits { enum @@ -19,7 +39,7 @@ namespace crnlib }; }; - template <> + template<> struct color_quad_component_traits { enum @@ -31,7 +51,7 @@ namespace crnlib }; }; - template <> + template<> struct color_quad_component_traits { enum @@ -43,7 +63,7 @@ namespace crnlib }; }; - template <> + template<> struct color_quad_component_traits { enum @@ -55,7 +75,7 @@ namespace crnlib }; }; - template <> + template<> struct color_quad_component_traits { enum @@ -67,7 +87,7 @@ namespace crnlib }; }; - template <> + template<> struct color_quad_component_traits { enum @@ -79,7 +99,7 @@ namespace crnlib }; }; - template <> + template<> struct color_quad_component_traits { enum @@ -91,7 +111,7 @@ namespace crnlib }; }; - template <> + template<> struct color_quad_component_traits { enum @@ -103,10 +123,10 @@ namespace crnlib }; }; - template + template class color_quad : public helpers::rel_ops> { - template + template static inline parameter_type clamp(T v) { parameter_type result = static_cast(v); @@ -125,7 +145,7 @@ namespace crnlib } #if defined(CRN_CC_MSVC) - template <> + template<> static inline parameter_type clamp(int v) { if (!component_traits::cFloat) @@ -158,9 +178,13 @@ namespace crnlib typedef parameter_type parameter_t; typedef color_quad_component_traits component_traits; - enum { cNumComps = 4 }; + enum + { + cNumComps = 4 + }; - union { + union + { struct { component_type r; @@ -178,13 +202,14 @@ namespace crnlib { } - inline color_quad(eClear): + inline color_quad(eClear) : r(0), g(0), b(0), a(0) { } - inline color_quad(const color_quad& other): - r(other.r), g(other.g), b(other.b), a(other.a) { + inline color_quad(const color_quad& other) : + r(other.r), g(other.g), b(other.b), a(other.a) + { } explicit inline color_quad(parameter_type y, parameter_type alpha = component_traits::cMax) @@ -207,8 +232,8 @@ namespace crnlib set_noclamp_rgba(red, green, blue, alpha); } - template - inline color_quad(const color_quad& other): + template + inline color_quad(const color_quad& other) : r(static_cast(clamp(other.r))), g(static_cast(clamp(other.g))), b(static_cast(clamp(other.b))), a(static_cast(clamp(other.a))) { } @@ -238,7 +263,7 @@ namespace crnlib return *this; } - template + template inline color_quad& operator=(const color_quad& other) { r = static_cast(clamp(other.r)); @@ -254,7 +279,8 @@ namespace crnlib return *this; } - inline color_quad& set(parameter_type y, parameter_type alpha = component_traits::cMax) { + inline color_quad& set(parameter_type y, parameter_type alpha = component_traits::cMax) + { y = clamp(y); alpha = clamp(alpha); r = static_cast(y); @@ -427,7 +453,7 @@ namespace crnlib { return true; } - else if (!(c[i] == rhs.c[i])) + else if (c[i] != rhs.c[i]) { return false; } @@ -505,7 +531,8 @@ namespace crnlib return result; } - friend color_quad operator*(parameter_type v, const color_quad& rhs) { + friend color_quad operator*(parameter_type v, const color_quad& rhs) + { color_quad result(rhs); result *= v; return result; @@ -594,12 +621,15 @@ namespace crnlib { return color_quad(component_traits::cMax, component_traits::cMax, component_traits::cMax, component_traits::cMax); } - }; // class color_quad + }; // class color_quad - template + template struct scalar_type> { - enum { cFlag = true }; + enum + { + cFlag = true + }; static inline void construct(color_quad* p) { } @@ -691,9 +721,9 @@ namespace crnlib //const uint cGWeight = 24;//73; //const uint cBWeight = 1;//3; - const uint cRWeight = 8; //24; - const uint cGWeight = 25; //73; - const uint cBWeight = 1; //3; + const uint cRWeight = 8; //24; + const uint cGWeight = 25; //73; + const uint cBWeight = 1; //3; inline uint color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha) { @@ -813,12 +843,13 @@ namespace crnlib rgb.a = 255; } - } // namespace color + } // namespace color // This class purposely trades off speed for extremely flexibility. It can handle any component swizzle, any pixel type from 1-4 components and 1-32 bits/component, // any pixel size between 1-16 bytes/pixel, any pixel stride, any color_quad data type (signed/unsigned/float 8/16/32 bits/component), and scaled/non-scaled components. // On the downside, it's freaking slow. - class pixel_packer { + class pixel_packer + { public: pixel_packer() { @@ -878,7 +909,7 @@ namespace crnlib return m_rgb_is_luma; } - template + template const void* unpack(const void* p, color_quad_type& color, bool rescale = true) const { const uint8* pSrc = static_cast(p); @@ -943,7 +974,7 @@ namespace crnlib return pSrc + m_pixel_stride; } - template + template void* pack(const color_quad_type& color, void* p, bool rescale = true) const { uint8* pDst = static_cast(p); @@ -975,7 +1006,8 @@ namespace crnlib n = math::minimum(static_cast(floor(t + .5f)), mx); } } - else if (rescale) { + else if (rescale) + { if (color_quad_type::component_traits::cSigned) { n = math::maximum(static_cast(color[i]), 0); @@ -987,7 +1019,8 @@ namespace crnlib const uint32 h = static_cast(color_quad_type::component_traits::cMax); n = static_cast((static_cast(n) * mx + (h >> 1)) / h); } - else { + else + { if (color_quad_type::component_traits::cSigned) { n = math::minimum(static_cast(math::maximum(static_cast(color[i]), 0)), mx); @@ -1175,4 +1208,4 @@ namespace crnlib bool m_rgb_is_luma; }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_colorized_console.cpp b/crnlib/crn_colorized_console.cpp index 3388e8a..a600748 100644 --- a/crnlib/crn_colorized_console.cpp +++ b/crnlib/crn_colorized_console.cpp @@ -1,5 +1,25 @@ -// File: crn_colorized_console.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "crn_colorized_console.h" @@ -134,4 +154,4 @@ namespace crnlib } #endif -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_colorized_console.h b/crnlib/crn_colorized_console.h index 61777c9..ae9459d 100644 --- a/crnlib/crn_colorized_console.h +++ b/crnlib/crn_colorized_console.h @@ -1,5 +1,25 @@ -// File: crn_colorized_console.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -18,5 +38,4 @@ namespace crnlib private: static bool console_output_func(eConsoleMessageType type, const char* pMsg, void* pData); }; - -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_command_line_params.cpp b/crnlib/crn_command_line_params.cpp index ebd3b3a..95ede28 100644 --- a/crnlib/crn_command_line_params.cpp +++ b/crnlib/crn_command_line_params.cpp @@ -1,5 +1,25 @@ -// File: crn_command_line_params.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "crn_command_line_params.h" @@ -522,4 +542,4 @@ namespace crnlib } return it->second.m_values[value_index]; } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_command_line_params.h b/crnlib/crn_command_line_params.h index f3713db..3550b92 100644 --- a/crnlib/crn_command_line_params.h +++ b/crnlib/crn_command_line_params.h @@ -1,6 +1,28 @@ -// File: crn_command_line_params.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once + #include "crn_value.h" #include #include "crn_export.h" @@ -16,7 +38,7 @@ namespace crnlib public: struct param_value { - inline param_value(): + inline param_value() : m_index(0), m_modifier(0) { @@ -37,7 +59,8 @@ namespace crnlib static bool split_params(const char* p, dynamic_string_array& params); - struct param_desc { + struct param_desc + { const char* m_pName; uint m_num_values; bool m_support_listing_file; @@ -105,4 +128,4 @@ namespace crnlib static bool load_string_file(const char* pFilename, dynamic_string_array& strings); }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_comp.cpp b/crnlib/crn_comp.cpp index fd06725..6b4f5b9 100644 --- a/crnlib/crn_comp.cpp +++ b/crnlib/crn_comp.cpp @@ -1,5 +1,26 @@ -// File: crn_comp.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_console.h" #include "crn_comp.h" @@ -8,1317 +29,1634 @@ #define CRNLIB_CREATE_DEBUG_IMAGES 0 #define CRNLIB_ENABLE_DEBUG_MESSAGES 0 -namespace crnlib { +namespace crnlib +{ + crn_comp::crn_comp() : + m_pParams(nullptr) + { + } + + crn_comp::~crn_comp() + { + } + + bool crn_comp::pack_color_endpoints(crnlib::vector& packed_data, const crnlib::vector& remapping) + { + crnlib::vector remapped_endpoints(m_color_endpoints.size()); + + for (uint i = 0; i < m_color_endpoints.size(); i++) + { + remapped_endpoints[remapping[i]] = m_color_endpoints[i]; + } + + const uint component_limits[6] = { 31, 63, 31, 31, 63, 31 }; + + symbol_histogram hist[2]; + hist[0].resize(32); + hist[1].resize(64); -crn_comp::crn_comp() - : m_pParams(nullptr) { -} + crnlib::vector residual_syms; + residual_syms.reserve(m_color_endpoints.size() * 2 * 3); -crn_comp::~crn_comp() { -} + color_quad_u8 prev[2]; + prev[0].clear(); + prev[1].clear(); -bool crn_comp::pack_color_endpoints(crnlib::vector& packed_data, const crnlib::vector& remapping) { - crnlib::vector remapped_endpoints(m_color_endpoints.size()); + int total_residuals = 0; - for (uint i = 0; i < m_color_endpoints.size(); i++) - remapped_endpoints[remapping[i]] = m_color_endpoints[i]; + for (uint endpoint_index = 0; endpoint_index < m_color_endpoints.size(); endpoint_index++) + { + const uint endpoint = remapped_endpoints[endpoint_index]; - const uint component_limits[6] = {31, 63, 31, 31, 63, 31}; + color_quad_u8 cur[2]; + cur[0] = dxt1_block::unpack_color((uint16)(endpoint & 0xFFFF), false); + cur[1] = dxt1_block::unpack_color((uint16)((endpoint >> 16) & 0xFFFF), false); - symbol_histogram hist[2]; - hist[0].resize(32); - hist[1].resize(64); + for (uint j = 0; j < 2; j++) + { + for (uint k = 0; k < 3; k++) + { + int delta = cur[j][k] - prev[j][k]; + total_residuals += delta * delta; + int sym = delta & component_limits[j * 3 + k]; + int table = (k == 1) ? 1 : 0; + hist[table].inc_freq(sym); + residual_syms.push_back(sym); + } + } + + prev[0] = cur[0]; + prev[1] = cur[1]; + } - crnlib::vector residual_syms; - residual_syms.reserve(m_color_endpoints.size() * 2 * 3); + static_huffman_data_model residual_dm[2]; - color_quad_u8 prev[2]; - prev[0].clear(); - prev[1].clear(); + symbol_codec codec; + codec.start_encoding(1024 * 1024); + + // Transmit residuals + for (uint i = 0; i < 2; i++) + { + if (!residual_dm[i].init(true, hist[i], 15)) + { + return false; + } + + if (!codec.encode_transmit_static_huffman_data_model(residual_dm[i], false)) + { + return false; + } + } - int total_residuals = 0; + for (uint i = 0; i < residual_syms.size(); i++) + { + const uint sym = residual_syms[i]; + const uint table = ((i % 3) == 1) ? 1 : 0; + codec.encode(sym, residual_dm[table]); + } - for (uint endpoint_index = 0; endpoint_index < m_color_endpoints.size(); endpoint_index++) { - const uint endpoint = remapped_endpoints[endpoint_index]; + codec.stop_encoding(false); - color_quad_u8 cur[2]; - cur[0] = dxt1_block::unpack_color((uint16)(endpoint & 0xFFFF), false); - cur[1] = dxt1_block::unpack_color((uint16)((endpoint >> 16) & 0xFFFF), false); + packed_data.swap(codec.get_encoding_buf()); - for (uint j = 0; j < 2; j++) { - for (uint k = 0; k < 3; k++) { - int delta = cur[j][k] - prev[j][k]; - total_residuals += delta * delta; - int sym = delta & component_limits[j * 3 + k]; - int table = (k == 1) ? 1 : 0; - hist[table].inc_freq(sym); - residual_syms.push_back(sym); - } + return true; } - prev[0] = cur[0]; - prev[1] = cur[1]; - } + bool crn_comp::pack_color_endpoints_etc(crnlib::vector& packed_data, const crnlib::vector& remapping) + { + crnlib::vector remapped_endpoints(m_color_endpoints.size()); + for (uint i = 0; i < m_color_endpoints.size(); i++) + { + remapped_endpoints[remapping[i]] = (m_color_endpoints[i] & 0x07000000) | (m_color_endpoints[i] >> 3 & 0x001F1F1F); + } - static_huffman_data_model residual_dm[2]; + symbol_histogram hist(32); + for (uint32 prev_endpoint = 0, p = 0; p < remapped_endpoints.size(); p++) + { + for (uint32 _e = prev_endpoint, e = prev_endpoint = remapped_endpoints[p], c = 0; c < 4; c++, _e >>= 8, e >>= 8) + { + hist.inc_freq((e - _e) & 0x1F); + } + } + static_huffman_data_model dm; + dm.init(true, hist, 15); + symbol_codec codec; + codec.start_encoding(1024 * 1024); + codec.encode_transmit_static_huffman_data_model(dm, false); + for (uint32 prev_endpoint = 0, p = 0; p < remapped_endpoints.size(); p++) + { + for (uint32 _e = prev_endpoint, e = prev_endpoint = remapped_endpoints[p], c = 0; c < 4; c++, _e >>= 8, e >>= 8) + { + codec.encode((e - _e) & 0x1F, dm); + } + } + codec.stop_encoding(false); + packed_data.swap(codec.get_encoding_buf()); + return true; + } - symbol_codec codec; - codec.start_encoding(1024 * 1024); + bool crn_comp::pack_alpha_endpoints(crnlib::vector& packed_data, const crnlib::vector& remapping) + { + crnlib::vector remapped_endpoints(m_alpha_endpoints.size()); - // Transmit residuals - for (uint i = 0; i < 2; i++) { - if (!residual_dm[i].init(true, hist[i], 15)) - return false; + for (uint i = 0; i < m_alpha_endpoints.size(); i++) + { + remapped_endpoints[remapping[i]] = m_alpha_endpoints[i]; + } - if (!codec.encode_transmit_static_huffman_data_model(residual_dm[i], false)) - return false; - } + symbol_histogram hist; + hist.resize(256); - for (uint i = 0; i < residual_syms.size(); i++) { - const uint sym = residual_syms[i]; - const uint table = ((i % 3) == 1) ? 1 : 0; - codec.encode(sym, residual_dm[table]); - } + crnlib::vector residual_syms; + residual_syms.reserve(m_alpha_endpoints.size() * 2 * 3); - codec.stop_encoding(false); + uint prev[2]; + utils::zero_object(prev); - packed_data.swap(codec.get_encoding_buf()); + int total_residuals = 0; - return true; -} + for (uint endpoint_index = 0; endpoint_index < m_alpha_endpoints.size(); endpoint_index++) + { + const uint endpoint = remapped_endpoints[endpoint_index]; -bool crn_comp::pack_color_endpoints_etc(crnlib::vector& packed_data, const crnlib::vector& remapping) { - crnlib::vector remapped_endpoints(m_color_endpoints.size()); - for (uint i = 0; i < m_color_endpoints.size(); i++) - remapped_endpoints[remapping[i]] = (m_color_endpoints[i] & 0x07000000) | (m_color_endpoints[i] >> 3 & 0x001F1F1F); + uint cur[2]; + cur[0] = dxt5_block::unpack_endpoint(endpoint, 0); + cur[1] = dxt5_block::unpack_endpoint(endpoint, 1); - symbol_histogram hist(32); - for (uint32 prev_endpoint = 0, p = 0; p < remapped_endpoints.size(); p++) { - for (uint32 _e = prev_endpoint, e = prev_endpoint = remapped_endpoints[p], c = 0; c < 4; c++, _e >>= 8, e >>= 8) - hist.inc_freq((e - _e) & 0x1F); - } - static_huffman_data_model dm; - dm.init(true, hist, 15); - symbol_codec codec; - codec.start_encoding(1024 * 1024); - codec.encode_transmit_static_huffman_data_model(dm, false); - for (uint32 prev_endpoint = 0, p = 0; p < remapped_endpoints.size(); p++) { - for (uint32 _e = prev_endpoint, e = prev_endpoint = remapped_endpoints[p], c = 0; c < 4; c++, _e >>= 8, e >>= 8) - codec.encode((e - _e) & 0x1F, dm); - } - codec.stop_encoding(false); - packed_data.swap(codec.get_encoding_buf()); - return true; -} + for (uint j = 0; j < 2; j++) + { + int delta = cur[j] - prev[j]; + total_residuals += delta * delta; -bool crn_comp::pack_alpha_endpoints(crnlib::vector& packed_data, const crnlib::vector& remapping) { - crnlib::vector remapped_endpoints(m_alpha_endpoints.size()); + int sym = delta & 255; - for (uint i = 0; i < m_alpha_endpoints.size(); i++) - remapped_endpoints[remapping[i]] = m_alpha_endpoints[i]; + hist.inc_freq(sym); - symbol_histogram hist; - hist.resize(256); + residual_syms.push_back(sym); + } - crnlib::vector residual_syms; - residual_syms.reserve(m_alpha_endpoints.size() * 2 * 3); + prev[0] = cur[0]; + prev[1] = cur[1]; + } - uint prev[2]; - utils::zero_object(prev); + static_huffman_data_model residual_dm; - int total_residuals = 0; + symbol_codec codec; + codec.start_encoding(1024 * 1024); - for (uint endpoint_index = 0; endpoint_index < m_alpha_endpoints.size(); endpoint_index++) { - const uint endpoint = remapped_endpoints[endpoint_index]; + // Transmit residuals + if (!residual_dm.init(true, hist, 15)) + { + return false; + } - uint cur[2]; - cur[0] = dxt5_block::unpack_endpoint(endpoint, 0); - cur[1] = dxt5_block::unpack_endpoint(endpoint, 1); + if (!codec.encode_transmit_static_huffman_data_model(residual_dm, false)) + { + return false; + } - for (uint j = 0; j < 2; j++) { - int delta = cur[j] - prev[j]; - total_residuals += delta * delta; + for (uint i = 0; i < residual_syms.size(); i++) + { + const uint sym = residual_syms[i]; + codec.encode(sym, residual_dm); + } - int sym = delta & 255; + codec.stop_encoding(false); - hist.inc_freq(sym); + packed_data.swap(codec.get_encoding_buf()); - residual_syms.push_back(sym); + return true; } - prev[0] = cur[0]; - prev[1] = cur[1]; - } - - static_huffman_data_model residual_dm; - - symbol_codec codec; - codec.start_encoding(1024 * 1024); - - // Transmit residuals - if (!residual_dm.init(true, hist, 15)) - return false; - - if (!codec.encode_transmit_static_huffman_data_model(residual_dm, false)) - return false; - - for (uint i = 0; i < residual_syms.size(); i++) { - const uint sym = residual_syms[i]; - codec.encode(sym, residual_dm); - } - - codec.stop_encoding(false); - - packed_data.swap(codec.get_encoding_buf()); - - return true; -} - -bool crn_comp::pack_color_selectors(crnlib::vector& packed_data, const crnlib::vector& remapping) { - crnlib::vector remapped_selectors(m_color_selectors.size()); - for (uint i = 0; i < m_color_selectors.size(); i++) - remapped_selectors[remapping[i]] = m_color_selectors[i]; - symbol_histogram hist(16); - for (uint32 c, selector, prev_selector = 0, i = 0; i < remapped_selectors.size(); i++) { - for (selector = prev_selector ^ remapped_selectors[i], prev_selector ^= selector, c = 8; c; c--, selector >>= 4) - hist.inc_freq(selector & 0xF); - } - static_huffman_data_model dm; - dm.init(true, hist, 15); - symbol_codec codec; - codec.start_encoding(1024 * 1024); - codec.encode_transmit_static_huffman_data_model(dm, false); - for (uint32 c, selector, prev_selector = 0, i = 0; i < remapped_selectors.size(); i++) { - for (selector = prev_selector ^ remapped_selectors[i], prev_selector ^= selector, c = 8; c; c--, selector >>= 4) - codec.encode(selector & 0xF, dm); - } - codec.stop_encoding(false); - packed_data.swap(codec.get_encoding_buf()); - return true; -} - -bool crn_comp::pack_alpha_selectors(crnlib::vector& packed_data, const crnlib::vector& remapping) { - crnlib::vector remapped_selectors(m_alpha_selectors.size()); - for (uint i = 0; i < m_alpha_selectors.size(); i++) - remapped_selectors[remapping[i]] = m_alpha_selectors[i]; - symbol_histogram hist(64); - for (uint64 c, selector, prev_selector = 0, i = 0; i < remapped_selectors.size(); i++) { - for (selector = prev_selector ^ remapped_selectors[i], prev_selector ^= selector, c = 8; c; c--, selector >>= 6) - hist.inc_freq(selector & 0x3F); - } - static_huffman_data_model dm; - dm.init(true, hist, 15); - symbol_codec codec; - codec.start_encoding(1024 * 1024); - codec.encode_transmit_static_huffman_data_model(dm, false); - for (uint64 c, selector, prev_selector = 0, i = 0; i < remapped_selectors.size(); i++) { - for (selector = prev_selector ^ remapped_selectors[i], prev_selector ^= selector, c = 8; c; c--, selector >>= 6) - codec.encode(selector & 0x3F, dm); - } - codec.stop_encoding(false); - packed_data.swap(codec.get_encoding_buf()); - return true; -} - -bool crn_comp::pack_blocks( - uint group, - bool clear_histograms, - symbol_codec* pCodec, - const crnlib::vector* pColor_endpoint_remap, - const crnlib::vector* pColor_selector_remap, - const crnlib::vector* pAlpha_endpoint_remap, - const crnlib::vector* pAlpha_selector_remap - ) { - if (!pCodec) { - m_reference_hist.resize(256); - if (clear_histograms) - m_reference_hist.set_all(0); - - if (pColor_endpoint_remap) { - m_endpoint_index_hist[0].resize(pColor_endpoint_remap->size()); - if (clear_histograms) - m_endpoint_index_hist[0].set_all(0); + bool crn_comp::pack_color_selectors(crnlib::vector& packed_data, const crnlib::vector& remapping) + { + crnlib::vector remapped_selectors(m_color_selectors.size()); + for (uint i = 0; i < m_color_selectors.size(); i++) + { + remapped_selectors[remapping[i]] = m_color_selectors[i]; + } + symbol_histogram hist(16); + for (uint32 c, selector, prev_selector = 0, i = 0; i < remapped_selectors.size(); i++) + { + for (selector = prev_selector ^ remapped_selectors[i], prev_selector ^= selector, c = 8; c; c--, selector >>= 4) + { + hist.inc_freq(selector & 0xF); + } + } + static_huffman_data_model dm; + dm.init(true, hist, 15); + symbol_codec codec; + codec.start_encoding(1024 * 1024); + codec.encode_transmit_static_huffman_data_model(dm, false); + for (uint32 c, selector, prev_selector = 0, i = 0; i < remapped_selectors.size(); i++) + { + for (selector = prev_selector ^ remapped_selectors[i], prev_selector ^= selector, c = 8; c; c--, selector >>= 4) + { + codec.encode(selector & 0xF, dm); + } + } + codec.stop_encoding(false); + packed_data.swap(codec.get_encoding_buf()); + return true; } - if (pColor_selector_remap) { - m_selector_index_hist[0].resize(pColor_selector_remap->size()); - if (clear_histograms) - m_selector_index_hist[0].set_all(0); + bool crn_comp::pack_alpha_selectors(crnlib::vector& packed_data, const crnlib::vector& remapping) + { + crnlib::vector remapped_selectors(m_alpha_selectors.size()); + for (uint i = 0; i < m_alpha_selectors.size(); i++) + { + remapped_selectors[remapping[i]] = m_alpha_selectors[i]; + } + symbol_histogram hist(64); + for (uint64 c, selector, prev_selector = 0, i = 0; i < remapped_selectors.size(); i++) + { + for (selector = prev_selector ^ remapped_selectors[i], prev_selector ^= selector, c = 8; c; c--, selector >>= 6) + { + hist.inc_freq(selector & 0x3F); + } + } + static_huffman_data_model dm; + dm.init(true, hist, 15); + symbol_codec codec; + codec.start_encoding(1024 * 1024); + codec.encode_transmit_static_huffman_data_model(dm, false); + for (uint64 c, selector, prev_selector = 0, i = 0; i < remapped_selectors.size(); i++) + { + for (selector = prev_selector ^ remapped_selectors[i], prev_selector ^= selector, c = 8; c; c--, selector >>= 6) + { + codec.encode(selector & 0x3F, dm); + } + } + codec.stop_encoding(false); + packed_data.swap(codec.get_encoding_buf()); + return true; } - if (pAlpha_endpoint_remap) { - m_endpoint_index_hist[1].resize(pAlpha_endpoint_remap->size()); - if (clear_histograms) - m_endpoint_index_hist[1].set_all(0); + bool crn_comp::pack_blocks( + uint group, + bool clear_histograms, + symbol_codec* pCodec, + const crnlib::vector* pColor_endpoint_remap, + const crnlib::vector* pColor_selector_remap, + const crnlib::vector* pAlpha_endpoint_remap, + const crnlib::vector* pAlpha_selector_remap) + { + if (!pCodec) + { + m_reference_hist.resize(256); + if (clear_histograms) + { + m_reference_hist.set_all(0); + } + + if (pColor_endpoint_remap) + { + m_endpoint_index_hist[0].resize(pColor_endpoint_remap->size()); + if (clear_histograms) + { + m_endpoint_index_hist[0].set_all(0); + } + } + + if (pColor_selector_remap) + { + m_selector_index_hist[0].resize(pColor_selector_remap->size()); + if (clear_histograms) + { + m_selector_index_hist[0].set_all(0); + } + } + + if (pAlpha_endpoint_remap) + { + m_endpoint_index_hist[1].resize(pAlpha_endpoint_remap->size()); + if (clear_histograms) + { + m_endpoint_index_hist[1].set_all(0); + } + } + + if (pAlpha_selector_remap) + { + m_selector_index_hist[1].resize(pAlpha_selector_remap->size()); + if (clear_histograms) + { + m_selector_index_hist[1].set_all(0); + } + } + } + + uint endpoint_index[cNumComps] = {}; + const crnlib::vector* endpoint_remap[cNumComps] = {}; + const crnlib::vector* selector_remap[cNumComps] = {}; + for (uint c = 0; c < cNumComps; c++) + { + if (m_has_comp[c]) + { + endpoint_remap[c] = c ? pAlpha_endpoint_remap : pColor_endpoint_remap; + selector_remap[c] = c ? pAlpha_selector_remap : pColor_selector_remap; + } + } + + uint block_width = m_levels[group].block_width; + for (uint by = 0, b = m_levels[group].first_block, bEnd = b + m_levels[group].num_blocks; b < bEnd; by++) + { + for (uint bx = 0; bx < block_width; bx++, b++) + { + const bool secondary_etc_subblock = m_has_subblocks && bx & 1; + if (!(by & 1) && !(bx & 1)) + { + uint8 reference_group = m_endpoint_indices[b].reference | m_endpoint_indices[b + block_width].reference << 2 | + m_endpoint_indices[b + 1].reference << 4 | m_endpoint_indices[b + block_width + 1].reference << 6; + if (pCodec) + { + pCodec->encode(reference_group, m_reference_dm); + } + else + { + m_reference_hist.inc_freq(reference_group); + } + } + for (uint c = 0, cEnd = secondary_etc_subblock ? cAlpha0 : cNumComps; c < cEnd; c++) + { + if (endpoint_remap[c]) + { + uint index = (*endpoint_remap[c])[m_endpoint_indices[b].component[c]]; + if (secondary_etc_subblock ? m_endpoint_indices[b].reference : !m_endpoint_indices[b].reference) + { + int sym = index - endpoint_index[c]; + if (sym < 0) + { + sym += endpoint_remap[c]->size(); + } + if (!pCodec) + { + m_endpoint_index_hist[c ? 1 : 0].inc_freq(sym); + } + else + { + pCodec->encode(sym, m_endpoint_index_dm[c ? 1 : 0]); + } + } + endpoint_index[c] = index; + } + } + for (uint c = 0, cEnd = secondary_etc_subblock ? 0 : cNumComps; c < cEnd; c++) + { + if (selector_remap[c]) + { + uint index = (*selector_remap[c])[m_selector_indices[b].component[c]]; + if (!pCodec) + { + m_selector_index_hist[c ? 1 : 0].inc_freq(index); + } + else + { + pCodec->encode(index, m_selector_index_dm[c ? 1 : 0]); + } + } + } + } + } + return true; } - if (pAlpha_selector_remap) { - m_selector_index_hist[1].resize(pAlpha_selector_remap->size()); - if (clear_histograms) - m_selector_index_hist[1].set_all(0); + bool crn_comp::alias_images() + { + for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) + { + for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) + { + const uint width = math::maximum(1U, m_pParams->m_width >> level_index); + const uint height = math::maximum(1U, m_pParams->m_height >> level_index); + if (!m_pParams->m_pImages[face_index][level_index]) + { + return false; + } + m_images[face_index][level_index].alias((color_quad_u8*)m_pParams->m_pImages[face_index][level_index], width, height); + } + } + + image_utils::conversion_type conv_type = image_utils::get_image_conversion_type_from_crn_format((crn_format)m_pParams->m_format); + if (conv_type != image_utils::cConversion_Invalid) + { + for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) + { + for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) + { + image_u8 cooked_image(m_images[face_index][level_index]); + image_utils::convert_image(cooked_image, conv_type); + m_images[face_index][level_index].swap(cooked_image); + } + } + } + + m_levels.resize(m_pParams->m_levels); + m_total_blocks = 0; + for (uint level = 0; level < m_pParams->m_levels; level++) + { + uint blockHeight = ((math::maximum(1U, m_pParams->m_height >> level) + 7) & ~7) >> 2; + m_levels[level].block_width = ((math::maximum(1U, m_pParams->m_width >> level) + 7) & ~7) >> (m_has_subblocks ? 1 : 2); + m_levels[level].first_block = m_total_blocks; + m_levels[level].num_blocks = m_pParams->m_faces * m_levels[level].block_width * blockHeight; + m_total_blocks += m_levels[level].num_blocks; + } + + return true; } - } - - uint endpoint_index[cNumComps] = {}; - const crnlib::vector* endpoint_remap[cNumComps] = {}; - const crnlib::vector* selector_remap[cNumComps] = {}; - for (uint c = 0; c < cNumComps; c++) { - if (m_has_comp[c]) { - endpoint_remap[c] = c ? pAlpha_endpoint_remap : pColor_endpoint_remap; - selector_remap[c] = c ? pAlpha_selector_remap : pColor_selector_remap; + + void crn_comp::clear() + { + m_pParams = nullptr; + + for (uint f = 0; f < cCRNMaxFaces; f++) + { + for (uint l = 0; l < cCRNMaxLevels; l++) + { + m_images[f][l].clear(); + } + } + + utils::zero_object(m_has_comp); + m_has_etc_color_blocks = false; + m_has_subblocks = false; + + m_levels.clear(); + + m_total_blocks = 0; + m_color_endpoints.clear(); + m_alpha_endpoints.clear(); + m_color_selectors.clear(); + m_alpha_selectors.clear(); + m_endpoint_indices.clear(); + m_selector_indices.clear(); + + utils::zero_object(m_crn_header); + + m_comp_data.clear(); + + m_hvq.clear(); + + m_reference_hist.clear(); + m_reference_dm.clear(); + for (uint i = 0; i < 2; i++) + { + m_endpoint_remaping[i].clear(); + m_endpoint_index_hist[i].clear(); + m_endpoint_index_dm[i].clear(); + m_selector_remaping[i].clear(); + m_selector_index_hist[i].clear(); + m_selector_index_dm[i].clear(); + } + + for (uint i = 0; i < cCRNMaxLevels; i++) + { + m_packed_blocks[i].clear(); + } + + m_packed_data_models.clear(); + + m_packed_color_endpoints.clear(); + m_packed_color_selectors.clear(); + m_packed_alpha_endpoints.clear(); + m_packed_alpha_selectors.clear(); } - } - - uint block_width = m_levels[group].block_width; - for (uint by = 0, b = m_levels[group].first_block, bEnd = b + m_levels[group].num_blocks; b < bEnd; by++) { - for (uint bx = 0; bx < block_width; bx++, b++) { - const bool secondary_etc_subblock = m_has_subblocks && bx & 1; - if (!(by & 1) && !(bx & 1)) { - uint8 reference_group = m_endpoint_indices[b].reference | m_endpoint_indices[b + block_width].reference << 2 | - m_endpoint_indices[b + 1].reference << 4 | m_endpoint_indices[b + block_width + 1].reference << 6; - if (pCodec) - pCodec->encode(reference_group, m_reference_dm); + + bool crn_comp::quantize_images() + { + dxt_hc::params params; + + params.m_adaptive_tile_alpha_psnr_derating = m_pParams->m_crn_adaptive_tile_alpha_psnr_derating; + params.m_adaptive_tile_color_psnr_derating = m_pParams->m_crn_adaptive_tile_color_psnr_derating; + + if (m_pParams->m_flags & cCRNCompFlagManualPaletteSizes) + { + params.m_color_endpoint_codebook_size = math::clamp(m_pParams->m_crn_color_endpoint_palette_size, cCRNMinPaletteSize, cCRNMaxPaletteSize); + params.m_color_selector_codebook_size = math::clamp(m_pParams->m_crn_color_selector_palette_size, cCRNMinPaletteSize, cCRNMaxPaletteSize); + params.m_alpha_endpoint_codebook_size = math::clamp(m_pParams->m_crn_alpha_endpoint_palette_size, cCRNMinPaletteSize, cCRNMaxPaletteSize); + params.m_alpha_selector_codebook_size = math::clamp(m_pParams->m_crn_alpha_selector_palette_size, cCRNMinPaletteSize, cCRNMaxPaletteSize); + } else - m_reference_hist.inc_freq(reference_group); - } - for (uint c = 0, cEnd = secondary_etc_subblock ? cAlpha0 : cNumComps; c < cEnd; c++) { - if (endpoint_remap[c]) { - uint index = (*endpoint_remap[c])[m_endpoint_indices[b].component[c]]; - if (secondary_etc_subblock ? m_endpoint_indices[b].reference : !m_endpoint_indices[b].reference) { - int sym = index - endpoint_index[c]; - if (sym < 0) - sym += endpoint_remap[c]->size(); - if (!pCodec) - m_endpoint_index_hist[c ? 1 : 0].inc_freq(sym); - else - pCodec->encode(sym, m_endpoint_index_dm[c ? 1 : 0]); - } - endpoint_index[c] = index; - } - } - for (uint c = 0, cEnd = secondary_etc_subblock ? 0 : cNumComps; c < cEnd; c++) { - if (selector_remap[c]) { - uint index = (*selector_remap[c])[m_selector_indices[b].component[c]]; - if (!pCodec) - m_selector_index_hist[c ? 1 : 0].inc_freq(index); - else - pCodec->encode(index, m_selector_index_dm[c ? 1 : 0]); - } - } - } - } - return true; -} - -bool crn_comp::alias_images() { - for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) { - for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) { - const uint width = math::maximum(1U, m_pParams->m_width >> level_index); - const uint height = math::maximum(1U, m_pParams->m_height >> level_index); - if (!m_pParams->m_pImages[face_index][level_index]) - return false; - m_images[face_index][level_index].alias((color_quad_u8*)m_pParams->m_pImages[face_index][level_index], width, height); - } - } - - image_utils::conversion_type conv_type = image_utils::get_image_conversion_type_from_crn_format((crn_format)m_pParams->m_format); - if (conv_type != image_utils::cConversion_Invalid) { - for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) { - for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) { - image_u8 cooked_image(m_images[face_index][level_index]); - image_utils::convert_image(cooked_image, conv_type); - m_images[face_index][level_index].swap(cooked_image); - } - } - } - - m_levels.resize(m_pParams->m_levels); - m_total_blocks = 0; - for (uint level = 0; level < m_pParams->m_levels; level++) { - uint blockHeight = ((math::maximum(1U, m_pParams->m_height >> level) + 7) & ~7) >> 2; - m_levels[level].block_width = ((math::maximum(1U, m_pParams->m_width >> level) + 7) & ~7) >> (m_has_subblocks ? 1 : 2); - m_levels[level].first_block = m_total_blocks; - m_levels[level].num_blocks = m_pParams->m_faces * m_levels[level].block_width * blockHeight; - m_total_blocks += m_levels[level].num_blocks; - } - - return true; -} - -void crn_comp::clear() { - m_pParams = nullptr; - - for (uint f = 0; f < cCRNMaxFaces; f++) - for (uint l = 0; l < cCRNMaxLevels; l++) - m_images[f][l].clear(); - - utils::zero_object(m_has_comp); - m_has_etc_color_blocks = false; - m_has_subblocks = false; - - m_levels.clear(); - - m_total_blocks = 0; - m_color_endpoints.clear(); - m_alpha_endpoints.clear(); - m_color_selectors.clear(); - m_alpha_selectors.clear(); - m_endpoint_indices.clear(); - m_selector_indices.clear(); - - utils::zero_object(m_crn_header); - - m_comp_data.clear(); - - m_hvq.clear(); - - m_reference_hist.clear(); - m_reference_dm.clear(); - for (uint i = 0; i < 2; i++) { - m_endpoint_remaping[i].clear(); - m_endpoint_index_hist[i].clear(); - m_endpoint_index_dm[i].clear(); - m_selector_remaping[i].clear(); - m_selector_index_hist[i].clear(); - m_selector_index_dm[i].clear(); - } - - for (uint i = 0; i < cCRNMaxLevels; i++) - m_packed_blocks[i].clear(); - - m_packed_data_models.clear(); - - m_packed_color_endpoints.clear(); - m_packed_color_selectors.clear(); - m_packed_alpha_endpoints.clear(); - m_packed_alpha_selectors.clear(); -} - -bool crn_comp::quantize_images() { - dxt_hc::params params; - - params.m_adaptive_tile_alpha_psnr_derating = m_pParams->m_crn_adaptive_tile_alpha_psnr_derating; - params.m_adaptive_tile_color_psnr_derating = m_pParams->m_crn_adaptive_tile_color_psnr_derating; - - if (m_pParams->m_flags & cCRNCompFlagManualPaletteSizes) { - params.m_color_endpoint_codebook_size = math::clamp(m_pParams->m_crn_color_endpoint_palette_size, cCRNMinPaletteSize, cCRNMaxPaletteSize); - params.m_color_selector_codebook_size = math::clamp(m_pParams->m_crn_color_selector_palette_size, cCRNMinPaletteSize, cCRNMaxPaletteSize); - params.m_alpha_endpoint_codebook_size = math::clamp(m_pParams->m_crn_alpha_endpoint_palette_size, cCRNMinPaletteSize, cCRNMaxPaletteSize); - params.m_alpha_selector_codebook_size = math::clamp(m_pParams->m_crn_alpha_selector_palette_size, cCRNMinPaletteSize, cCRNMaxPaletteSize); - } else { - uint max_codebook_entries = ((m_pParams->m_width + 3) / 4) * ((m_pParams->m_height + 3) / 4); - - max_codebook_entries = math::clamp(max_codebook_entries, cCRNMinPaletteSize, cCRNMaxPaletteSize); - - float quality = math::clamp((float)m_pParams->m_quality_level / cCRNMaxQualityLevel, 0.0f, 1.0f); - float color_quality_power_mul = 1.0f; - float alpha_quality_power_mul = 1.0f; - if (m_has_etc_color_blocks) { - color_quality_power_mul = m_has_subblocks ? 1.31f : 0.7f; - params.m_adaptive_tile_color_psnr_derating = m_has_subblocks ? 5.0f : 2.0f; - } - if (m_pParams->m_format == cCRNFmtDXT5_CCxY) { - color_quality_power_mul = 3.5f; - alpha_quality_power_mul = .35f; - params.m_adaptive_tile_color_psnr_derating = 5.0f; - } else if (m_pParams->m_format == cCRNFmtDXT5) { - color_quality_power_mul = .75f; - } else if (m_pParams->m_format == cCRNFmtETC2A) { - alpha_quality_power_mul = .9f; - } + { + uint max_codebook_entries = ((m_pParams->m_width + 3) / 4) * ((m_pParams->m_height + 3) / 4); + + max_codebook_entries = math::clamp(max_codebook_entries, cCRNMinPaletteSize, cCRNMaxPaletteSize); + + float quality = math::clamp((float)m_pParams->m_quality_level / cCRNMaxQualityLevel, 0.0f, 1.0f); + float color_quality_power_mul = 1.0f; + float alpha_quality_power_mul = 1.0f; + if (m_has_etc_color_blocks) + { + color_quality_power_mul = m_has_subblocks ? 1.31f : 0.7f; + params.m_adaptive_tile_color_psnr_derating = m_has_subblocks ? 5.0f : 2.0f; + } + if (m_pParams->m_format == cCRNFmtDXT5_CCxY) + { + color_quality_power_mul = 3.5f; + alpha_quality_power_mul = .35f; + params.m_adaptive_tile_color_psnr_derating = 5.0f; + } + else if (m_pParams->m_format == cCRNFmtDXT5) + { + color_quality_power_mul = .75f; + } + else if (m_pParams->m_format == cCRNFmtETC2A) + { + alpha_quality_power_mul = .9f; + } + + float color_endpoint_quality = powf(quality, 1.8f * color_quality_power_mul); + float color_selector_quality = powf(quality, 1.65f * color_quality_power_mul); + params.m_color_endpoint_codebook_size = math::clamp(math::float_to_uint(.5f + math::lerp(math::maximum(64, cCRNMinPaletteSize), (float)max_codebook_entries, color_endpoint_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize); + params.m_color_selector_codebook_size = math::clamp(math::float_to_uint(.5f + math::lerp(math::maximum(96, cCRNMinPaletteSize), (float)max_codebook_entries, color_selector_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize); + + float alpha_endpoint_quality = powf(quality, 2.1f * alpha_quality_power_mul); + float alpha_selector_quality = powf(quality, 1.65f * alpha_quality_power_mul); + params.m_alpha_endpoint_codebook_size = math::clamp(math::float_to_uint(.5f + math::lerp(math::maximum(24, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_endpoint_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize); + params.m_alpha_selector_codebook_size = math::clamp(math::float_to_uint(.5f + math::lerp(math::maximum(48, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_selector_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize); + } - float color_endpoint_quality = powf(quality, 1.8f * color_quality_power_mul); - float color_selector_quality = powf(quality, 1.65f * color_quality_power_mul); - params.m_color_endpoint_codebook_size = math::clamp(math::float_to_uint(.5f + math::lerp(math::maximum(64, cCRNMinPaletteSize), (float)max_codebook_entries, color_endpoint_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize); - params.m_color_selector_codebook_size = math::clamp(math::float_to_uint(.5f + math::lerp(math::maximum(96, cCRNMinPaletteSize), (float)max_codebook_entries, color_selector_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize); - - float alpha_endpoint_quality = powf(quality, 2.1f * alpha_quality_power_mul); - float alpha_selector_quality = powf(quality, 1.65f * alpha_quality_power_mul); - params.m_alpha_endpoint_codebook_size = math::clamp(math::float_to_uint(.5f + math::lerp(math::maximum(24, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_endpoint_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize); - params.m_alpha_selector_codebook_size = math::clamp(math::float_to_uint(.5f + math::lerp(math::maximum(48, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_selector_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize); - } - - if (m_pParams->m_flags & cCRNCompFlagDebugging) { - console::debug("Color endpoints: %u", params.m_color_endpoint_codebook_size); - console::debug("Color selectors: %u", params.m_color_selector_codebook_size); - console::debug("Alpha endpoints: %u", params.m_alpha_endpoint_codebook_size); - console::debug("Alpha selectors: %u", params.m_alpha_selector_codebook_size); - } - - params.m_hierarchical = (m_pParams->m_flags & cCRNCompFlagHierarchical) != 0; - params.m_perceptual = (m_pParams->m_flags & cCRNCompFlagPerceptual) != 0; - - params.m_pProgress_func = m_pParams->m_pProgress_func; - params.m_pProgress_func_data = m_pParams->m_pProgress_func_data; - - switch (m_pParams->m_format) { - case cCRNFmtDXT1: { - params.m_format = cDXT1; - m_has_comp[cColor] = true; - break; - } - case cCRNFmtDXT3: { - m_has_comp[cAlpha0] = true; - return false; - } - case cCRNFmtDXT5: { - params.m_format = cDXT5; - params.m_alpha_component_indices[0] = m_pParams->m_alpha_component; - m_has_comp[cColor] = true; - m_has_comp[cAlpha0] = true; - break; - } - case cCRNFmtDXT5_CCxY: { - params.m_format = cDXT5; - params.m_alpha_component_indices[0] = 3; - m_has_comp[cColor] = true; - m_has_comp[cAlpha0] = true; - params.m_perceptual = false; - - //params.m_adaptive_tile_color_alpha_weighting_ratio = 1.0f; - params.m_adaptive_tile_color_alpha_weighting_ratio = 1.5f; - break; - } - case cCRNFmtDXT5_xGBR: - case cCRNFmtDXT5_AGBR: - case cCRNFmtDXT5_xGxR: { - params.m_format = cDXT5; - params.m_alpha_component_indices[0] = 3; - m_has_comp[cColor] = true; - m_has_comp[cAlpha0] = true; - params.m_perceptual = false; - break; - } - case cCRNFmtDXN_XY: { - params.m_format = cDXN_XY; - params.m_alpha_component_indices[0] = 0; - params.m_alpha_component_indices[1] = 1; - m_has_comp[cAlpha0] = true; - m_has_comp[cAlpha1] = true; - params.m_perceptual = false; - break; - } - case cCRNFmtDXN_YX: { - params.m_format = cDXN_YX; - params.m_alpha_component_indices[0] = 1; - params.m_alpha_component_indices[1] = 0; - m_has_comp[cAlpha0] = true; - m_has_comp[cAlpha1] = true; - params.m_perceptual = false; - break; - } - case cCRNFmtDXT5A: { - params.m_format = cDXT5A; - params.m_alpha_component_indices[0] = m_pParams->m_alpha_component; - m_has_comp[cAlpha0] = true; - params.m_perceptual = false; - break; - } - case cCRNFmtETC1: { - params.m_format = cETC1; - m_has_comp[cColor] = true; - break; - } - case cCRNFmtETC2: { - params.m_format = cETC2; - m_has_comp[cColor] = true; - break; - } - case cCRNFmtETC2A: { - params.m_format = cETC2A; - params.m_alpha_component_indices[0] = m_pParams->m_alpha_component; - m_has_comp[cColor] = true; - m_has_comp[cAlpha0] = true; - break; - } - case cCRNFmtETC1S: { - params.m_format = cETC1S; - m_has_comp[cColor] = true; - break; - } - case cCRNFmtETC2AS: { - params.m_format = cETC2AS; - params.m_alpha_component_indices[0] = m_pParams->m_alpha_component; - m_has_comp[cColor] = true; - m_has_comp[cAlpha0] = true; - break; - } - default: { - return false; - } - } - params.m_debugging = (m_pParams->m_flags & cCRNCompFlagDebugging) != 0; - params.m_pTask_pool = &m_task_pool; - - params.m_num_levels = m_pParams->m_levels; - for (uint i = 0; i < m_pParams->m_levels; i++) { - params.m_levels[i].m_first_block = m_levels[i].first_block; - params.m_levels[i].m_num_blocks = m_levels[i].num_blocks; - params.m_levels[i].m_block_width = m_levels[i].block_width; - params.m_levels[i].m_weight = math::minimum(12.0f, powf(1.3f, (float)i)); - } - params.m_num_faces = m_pParams->m_faces; - params.m_num_blocks = m_total_blocks; - color_quad_u8 (*blocks)[16] = (color_quad_u8(*)[16])crnlib_malloc(params.m_num_blocks * 16 * sizeof(color_quad_u8)); - for (uint b = 0, level = 0; level < m_pParams->m_levels; level++) { - for (uint face = 0; face < m_pParams->m_faces; face++) { - image_u8& image = m_images[face][level]; - uint width = image.get_width(); - uint height = image.get_height(); - uint blockWidth = ((width + 7) & ~7) >> 2; - uint blockHeight = ((height + 7) & ~7) >> 2; - for (uint by = 0; by < blockHeight; by++) { - for (uint y0 = by << 2, bx = 0; bx < blockWidth; bx++, b++) { - for (uint t = 0, x0 = bx << 2, dy = 0; dy < 4; dy++) { - for (uint y = math::minimum(y0 + dy, height - 1), dx = 0; dx < 4; dx++, t++) - blocks[b][t] = image(math::minimum(x0 + dx, width - 1), y); - } - } - } - } - } - bool result = m_hvq.compress(blocks, m_endpoint_indices, m_selector_indices, m_color_endpoints, m_alpha_endpoints, m_color_selectors, m_alpha_selectors, params); - crnlib_free(blocks); - - return result; -} - -struct optimize_color_params { - struct unpacked_endpoint { - color_quad_u8 low, high; - }; - const unpacked_endpoint* unpacked_endpoints; - const uint* hist; - uint16 n; - uint16 selected; - float weight; - struct result { - crnlib::vector endpoint_remapping; - crnlib::vector packed_endpoints; - uint total_bits; - } *pResult; -}; - -static void sort_color_endpoints(crnlib::vector& remapping, const optimize_color_params::unpacked_endpoint* unpacked_endpoints, uint16 n) { - remapping.resize(n); - crnlib::vector endpoints(n); - crnlib::vector indices(n); - for (uint16 i = 0; i < n; i++) { - endpoints[i] = unpacked_endpoints[i]; - indices[i] = i; - } - optimize_color_params::unpacked_endpoint selected_endpoint = {color_quad_u8(0), color_quad_u8(0)}; - for (uint16 left = n; left;) { - uint16 selected_index = 0; - uint min_error = cUINT32_MAX; - for (uint16 i = 0; i < left; i++) { - optimize_color_params::unpacked_endpoint& endpoint = endpoints[i]; - uint error = color::elucidian_distance(endpoint.low, selected_endpoint.low, false) + color::elucidian_distance(endpoint.high, selected_endpoint.high, false); - if (error < min_error) { - min_error = error; - selected_index = i; - } - } - selected_endpoint = endpoints[selected_index]; - remapping[indices[selected_index]] = n - left; - left--; - endpoints[selected_index] = endpoints[left]; - indices[selected_index] = indices[left]; - } -} - -static void remap_color_endpoints(uint16* remapping, const optimize_color_params::unpacked_endpoint* unpacked_endpoints, const uint* hist, uint16 n, uint16 selected, float weight) { - struct Node { - uint index, frequency, front_similarity, back_similarity; - optimize_color_params::unpacked_endpoint e; - Node() { utils::zero_object(*this); } - }; - crnlib::vector remaining(n); - for (uint16 i = 0; i < n; i++) { - remaining[i].index = i; - remaining[i].e = unpacked_endpoints[i]; - } - crnlib::vector chosen(n << 1); - uint remaining_count = n, chosen_front = n, chosen_back = chosen_front; - chosen[chosen_front] = selected; - optimize_color_params::unpacked_endpoint front_e = remaining[selected].e, back_e = front_e; - bool front_updated = true, back_updated = true; - remaining[selected] = remaining[--remaining_count]; - const uint* frequency = hist + selected * n; - - for (uint similarity_base = (uint)(4000 * (1.0f + weight)), frequency_normalizer = 0; remaining_count;) { - uint64 best_value = 0; - uint best_index = 0; - for (uint i = 0; i < remaining_count; i++) { - Node& node = remaining[i]; - node.frequency += frequency[node.index]; - if (front_updated) - node.front_similarity = similarity_base - math::minimum(4000, color::elucidian_distance(node.e.low, front_e.low, false) + color::elucidian_distance(node.e.high, front_e.high, false)); - if (back_updated) - node.back_similarity = similarity_base - math::minimum(4000, color::elucidian_distance(node.e.low, back_e.low, false) + color::elucidian_distance(node.e.high, back_e.high, false)); - uint64 value = math::maximum(node.front_similarity, node.back_similarity) * (node.frequency + frequency_normalizer) + 1; - if (value > best_value || (value == best_value && node.index < selected)) { - best_value = value; - best_index = i; - selected = node.index; - } - } - frequency = hist + selected * n; - uint frequency_front = 0, frequency_back = 0; - for (int front = chosen_front, back = chosen_back, scale = back - front; scale > 0; front++, back--, scale -= 2) { - frequency_front += scale * frequency[chosen[front]]; - frequency_back += scale * frequency[chosen[back]]; - } - front_updated = back_updated = false; - Node& best_node = remaining[best_index]; - frequency_normalizer = best_node.frequency << 3; - if ((uint64)best_node.front_similarity * frequency_front > (uint64)best_node.back_similarity * frequency_back) { - chosen[--chosen_front] = selected; - front_e = best_node.e; - front_updated = true; - } else { - chosen[++chosen_back] = selected; - back_e = best_node.e; - back_updated = true; - } - best_node = remaining[--remaining_count]; - } - - for (uint16 i = chosen_front; i <= chosen_back; i++) - remapping[chosen[i]] = i - chosen_front; -} - -void crn_comp::optimize_color_endpoints_task(uint64 data, void* pData_ptr) { - optimize_color_params* pParams = reinterpret_cast(pData_ptr); - crnlib::vector& remapping = pParams->pResult->endpoint_remapping; - uint16 n = pParams->n; - remapping.resize(n); - - if (data) { - remap_color_endpoints(remapping.get_ptr(), pParams->unpacked_endpoints, pParams->hist, n, pParams->selected, pParams->weight); - } else { - sort_color_endpoints(remapping, pParams->unpacked_endpoints, n); - optimize_color_selectors(); - } - - m_has_etc_color_blocks ? pack_color_endpoints_etc(pParams->pResult->packed_endpoints, remapping) : pack_color_endpoints(pParams->pResult->packed_endpoints, remapping); - uint total_bits = pParams->pResult->packed_endpoints.size() << 3; - - crnlib::vector hist(n); - for (uint level = 0; level < m_levels.size(); level++) { - for (uint endpoint_index = 0, b = m_levels[level].first_block, bEnd = b + m_levels[level].num_blocks; b < bEnd; b++) { - uint index = remapping[m_endpoint_indices[b].component[cColor]]; - if (m_has_subblocks && b & 1 ? m_endpoint_indices[b].reference : !m_endpoint_indices[b].reference) { - int sym = index - endpoint_index; - hist[sym < 0 ? sym + n : sym]++; - } - endpoint_index = index; - } - } - - static_huffman_data_model dm; - dm.init(true, n, hist.get_ptr(), 16); - const uint8* code_sizes = dm.get_code_sizes(); - for (uint16 s = 0; s < n; s++) - total_bits += hist[s] * code_sizes[s]; - - symbol_codec codec; - codec.start_encoding(64 * 1024); - codec.encode_enable_simulation(true); - codec.encode_transmit_static_huffman_data_model(dm, false); - codec.stop_encoding(false); - total_bits += codec.encode_get_total_bits_written(); - - pParams->pResult->total_bits = total_bits; - - crnlib_delete(pParams); -} - -void crn_comp::optimize_color_selectors() { - crnlib::vector& remapping = m_selector_remaping[cColor]; - uint16 n = m_color_selectors.size(); - remapping.resize(n); - - uint8 d[] = {0, 5, 14, 10}; - - uint8 D4[0x100]; - for (uint16 i = 0; i < 0x100; i++) - D4[i] = d[(i ^ i >> 4) & 3] + d[(i >> 2 ^ i >> 6) & 3]; - uint8 D8[0x10000]; - for (uint32 i = 0; i < 0x10000; i++) - D8[i] = D4[(i >> 8 & 0xF0) | (i >> 4 & 0xF)] + D4[(i >> 4 & 0xF0) | (i & 0xF)]; - - crnlib::vector selectors(n); - crnlib::vector indices(n); - for (uint16 i = 0; i < n; i++) { - selectors[i] = m_color_selectors[i]; - indices[i] = i; - } - uint32 selected_selector = 0; - for (uint16 left = n; left;) { - uint16 selected_index = 0; - uint min_error = cUINT32_MAX; - for (uint16 i = 0; i < left; i++) { - uint32 selector = selectors[i]; - uint8 d0 = D8[(selector >> 16 & 0xFF00) | (selected_selector >> 24 & 0xFF)]; - uint8 d1 = D8[(selector >> 8 & 0xFF00) | (selected_selector >> 16 & 0xFF)]; - uint8 d2 = D8[(selector & 0xFF00) | (selected_selector >> 8 & 0xFF)]; - uint8 d3 = D8[(selector << 8 & 0xFF00) | (selected_selector & 0xFF)]; - uint error = d0 + d1 + d2 + d3; - if (error < min_error) { - min_error = error; - selected_index = i; - } - } - selected_selector = selectors[selected_index]; - remapping[indices[selected_index]] = n - left; - left--; - selectors[selected_index] = selectors[left]; - indices[selected_index] = indices[left]; - } - - pack_color_selectors(m_packed_color_selectors, remapping); -} - -void crn_comp::optimize_color() { - uint16 n = m_color_endpoints.size(); - crnlib::vector hist(n * n); - crnlib::vector sum(n); - for (uint i, i_prev = 0, b = 0; b < m_endpoint_indices.size(); b++, i_prev = i) { - i = m_endpoint_indices[b].color; - if ((m_has_subblocks && b & 1 ? m_endpoint_indices[b].reference : !m_endpoint_indices[b].reference) && i != i_prev) { - hist[i * n + i_prev]++; - hist[i_prev * n + i]++; - sum[i]++; - sum[i_prev]++; - } - } - uint16 selected = 0; - uint best_sum = 0; - for (uint16 i = 0; i < n; i++) { - if (best_sum < sum[i]) { - best_sum = sum[i]; - selected = i; - } - } - crnlib::vector unpacked_endpoints(n); - for (uint16 i = 0; i < n; i++) { - unpacked_endpoints[i].low.m_u32 = m_has_etc_color_blocks ? m_color_endpoints[i] & 0xFFFFFF : dxt1_block::unpack_color(m_color_endpoints[i] & 0xFFFF, true).m_u32; - unpacked_endpoints[i].high.m_u32 = m_has_etc_color_blocks ? m_color_endpoints[i] >> 24 : dxt1_block::unpack_color(m_color_endpoints[i] >> 16, true).m_u32; - } - - optimize_color_params::result remapping_trial[4]; - float weights[4] = {0, 0, 1.0f / 6.0f, 0.5f}; - for (uint i = 0; i < 4; i++) { - optimize_color_params* pParams = crnlib_new(); - pParams->unpacked_endpoints = unpacked_endpoints.get_ptr(); - pParams->hist = hist.get_ptr(); - pParams->n = n; - pParams->selected = selected; - pParams->weight = weights[i]; - pParams->pResult = remapping_trial + i; - m_task_pool.queue_object_task(this, &crn_comp::optimize_color_endpoints_task, i, pParams); - } - m_task_pool.join(); - - for (uint best_bits = cUINT32_MAX, i = 0; i < 4; i++) { - if (remapping_trial[i].total_bits < best_bits) { - m_packed_color_endpoints.swap(remapping_trial[i].packed_endpoints); - m_endpoint_remaping[cColor].swap(remapping_trial[i].endpoint_remapping); - best_bits = remapping_trial[i].total_bits; - } - } -} - -struct optimize_alpha_params { - struct unpacked_endpoint { - uint8 low, high; - }; - const unpacked_endpoint* unpacked_endpoints; - const uint* hist; - uint16 n; - uint16 selected; - float weight; - struct result { - crnlib::vector endpoint_remapping; - crnlib::vector packed_endpoints; - uint total_bits; - } *pResult; -}; - -static void sort_alpha_endpoints(crnlib::vector& remapping, const optimize_alpha_params::unpacked_endpoint* unpacked_endpoints, uint16 n) { - remapping.resize(n); - crnlib::vector endpoints(n); - crnlib::vector indices(n); - for (uint16 i = 0; i < n; i++) { - endpoints[i] = unpacked_endpoints[i]; - indices[i] = i; - } - optimize_alpha_params::unpacked_endpoint selected_endpoint = {0, 0}; - for (uint16 left = n; left;) { - uint16 selected_index = 0; - uint min_error = cUINT32_MAX; - for (uint16 i = 0; i < left; i++) { - optimize_alpha_params::unpacked_endpoint& endpoint = endpoints[i]; - uint error = math::square(endpoint.low - selected_endpoint.low) + math::square(endpoint.high - selected_endpoint.high); - if (error < min_error) { - min_error = error; - selected_index = i; - } - } - selected_endpoint = endpoints[selected_index]; - remapping[indices[selected_index]] = n - left; - left--; - endpoints[selected_index] = endpoints[left]; - indices[selected_index] = indices[left]; - } -} - -static void remap_alpha_endpoints(uint16* remapping, const optimize_alpha_params::unpacked_endpoint* unpacked_endpoints, const uint* hist, uint16 n, uint16 selected, float weight) { - const uint* frequency = hist + selected * n; - crnlib::vector chosen, remaining; - crnlib::vector total_frequency(n); - chosen.push_back(selected); - for (uint16 i = 0; i < n; i++) { - if (i != selected) { - remaining.push_back(i); - total_frequency[i] = frequency[i]; + if (m_pParams->m_flags & cCRNCompFlagDebugging) + { + console::debug("Color endpoints: %u", params.m_color_endpoint_codebook_size); + console::debug("Color selectors: %u", params.m_color_selector_codebook_size); + console::debug("Alpha endpoints: %u", params.m_alpha_endpoint_codebook_size); + console::debug("Alpha selectors: %u", params.m_alpha_selector_codebook_size); + } + + params.m_hierarchical = (m_pParams->m_flags & cCRNCompFlagHierarchical) != 0; + params.m_perceptual = (m_pParams->m_flags & cCRNCompFlagPerceptual) != 0; + + params.m_pProgress_func = m_pParams->m_pProgress_func; + params.m_pProgress_func_data = m_pParams->m_pProgress_func_data; + + switch (m_pParams->m_format) + { + case cCRNFmtDXT1: + { + params.m_format = cDXT1; + m_has_comp[cColor] = true; + break; + } + case cCRNFmtDXT3: + { + m_has_comp[cAlpha0] = true; + return false; + } + case cCRNFmtDXT5: + { + params.m_format = cDXT5; + params.m_alpha_component_indices[0] = m_pParams->m_alpha_component; + m_has_comp[cColor] = true; + m_has_comp[cAlpha0] = true; + break; + } + case cCRNFmtDXT5_CCxY: + { + params.m_format = cDXT5; + params.m_alpha_component_indices[0] = 3; + m_has_comp[cColor] = true; + m_has_comp[cAlpha0] = true; + params.m_perceptual = false; + + //params.m_adaptive_tile_color_alpha_weighting_ratio = 1.0f; + params.m_adaptive_tile_color_alpha_weighting_ratio = 1.5f; + break; + } + case cCRNFmtDXT5_xGBR: + case cCRNFmtDXT5_AGBR: + case cCRNFmtDXT5_xGxR: + { + params.m_format = cDXT5; + params.m_alpha_component_indices[0] = 3; + m_has_comp[cColor] = true; + m_has_comp[cAlpha0] = true; + params.m_perceptual = false; + break; + } + case cCRNFmtDXN_XY: + { + params.m_format = cDXN_XY; + params.m_alpha_component_indices[0] = 0; + params.m_alpha_component_indices[1] = 1; + m_has_comp[cAlpha0] = true; + m_has_comp[cAlpha1] = true; + params.m_perceptual = false; + break; + } + case cCRNFmtDXN_YX: + { + params.m_format = cDXN_YX; + params.m_alpha_component_indices[0] = 1; + params.m_alpha_component_indices[1] = 0; + m_has_comp[cAlpha0] = true; + m_has_comp[cAlpha1] = true; + params.m_perceptual = false; + break; + } + case cCRNFmtDXT5A: + { + params.m_format = cDXT5A; + params.m_alpha_component_indices[0] = m_pParams->m_alpha_component; + m_has_comp[cAlpha0] = true; + params.m_perceptual = false; + break; + } + case cCRNFmtETC1: + { + params.m_format = cETC1; + m_has_comp[cColor] = true; + break; + } + case cCRNFmtETC2: + { + params.m_format = cETC2; + m_has_comp[cColor] = true; + break; + } + case cCRNFmtETC2A: + { + params.m_format = cETC2A; + params.m_alpha_component_indices[0] = m_pParams->m_alpha_component; + m_has_comp[cColor] = true; + m_has_comp[cAlpha0] = true; + break; + } + case cCRNFmtETC1S: + { + params.m_format = cETC1S; + m_has_comp[cColor] = true; + break; + } + case cCRNFmtETC2AS: + { + params.m_format = cETC2AS; + params.m_alpha_component_indices[0] = m_pParams->m_alpha_component; + m_has_comp[cColor] = true; + m_has_comp[cAlpha0] = true; + break; + } + default: + { + return false; + } + } + params.m_debugging = (m_pParams->m_flags & cCRNCompFlagDebugging) != 0; + params.m_pTask_pool = &m_task_pool; + + params.m_num_levels = m_pParams->m_levels; + for (uint i = 0; i < m_pParams->m_levels; i++) + { + params.m_levels[i].m_first_block = m_levels[i].first_block; + params.m_levels[i].m_num_blocks = m_levels[i].num_blocks; + params.m_levels[i].m_block_width = m_levels[i].block_width; + params.m_levels[i].m_weight = math::minimum(12.0f, powf(1.3f, (float)i)); + } + params.m_num_faces = m_pParams->m_faces; + params.m_num_blocks = m_total_blocks; + color_quad_u8(*blocks)[16] = (color_quad_u8(*)[16])crnlib_malloc(params.m_num_blocks * 16 * sizeof(color_quad_u8)); + for (uint b = 0, level = 0; level < m_pParams->m_levels; level++) + { + for (uint face = 0; face < m_pParams->m_faces; face++) + { + image_u8& image = m_images[face][level]; + uint width = image.get_width(); + uint height = image.get_height(); + uint blockWidth = ((width + 7) & ~7) >> 2; + uint blockHeight = ((height + 7) & ~7) >> 2; + for (uint by = 0; by < blockHeight; by++) + { + for (uint y0 = by << 2, bx = 0; bx < blockWidth; bx++, b++) + { + for (uint t = 0, x0 = bx << 2, dy = 0; dy < 4; dy++) + { + for (uint y = math::minimum(y0 + dy, height - 1), dx = 0; dx < 4; dx++, t++) + { + blocks[b][t] = image(math::minimum(x0 + dx, width - 1), y); + } + } + } + } + } + } + bool result = m_hvq.compress(blocks, m_endpoint_indices, m_selector_indices, m_color_endpoints, m_alpha_endpoints, m_color_selectors, m_alpha_selectors, params); + crnlib_free(blocks); + + return result; } - } - for (uint similarity_base = (uint)(1000 * (1.0f + weight)), total_frequency_normalizer = 0; remaining.size();) { - const optimize_alpha_params::unpacked_endpoint& e_front = unpacked_endpoints[chosen.front()]; - const optimize_alpha_params::unpacked_endpoint& e_back = unpacked_endpoints[chosen.back()]; - uint16 selected_index = 0; - uint64 best_value = 0, selected_similarity_front = 0, selected_similarity_back = 0; - for (uint16 i = 0; i < remaining.size(); i++) { - uint remaining_index = remaining[i]; - const optimize_alpha_params::unpacked_endpoint& e_remaining = unpacked_endpoints[remaining_index]; - uint error_front = math::square(e_remaining.low - e_front.low) + math::square(e_remaining.high - e_front.high); - uint error_back = math::square(e_remaining.low - e_back.low) + math::square(e_remaining.high - e_back.high); - uint64 similarity_front = similarity_base - math::minimum(error_front, 1000); - uint64 similarity_back = similarity_base - math::minimum(error_back, 1000); - uint64 value = math::maximum(similarity_front, similarity_back) * (total_frequency[remaining_index] + total_frequency_normalizer) + 1; - if (value > best_value) { - best_value = value; - selected_index = i; - selected_similarity_front = similarity_front; - selected_similarity_back = similarity_back; - } + + struct optimize_color_params + { + struct unpacked_endpoint + { + color_quad_u8 low, high; + }; + const unpacked_endpoint* unpacked_endpoints; + const uint* hist; + uint16 n; + uint16 selected; + float weight; + struct result + { + crnlib::vector endpoint_remapping; + crnlib::vector packed_endpoints; + uint total_bits; + } * pResult; + }; + + static void sort_color_endpoints(crnlib::vector& remapping, const optimize_color_params::unpacked_endpoint* unpacked_endpoints, uint16 n) + { + remapping.resize(n); + crnlib::vector endpoints(n); + crnlib::vector indices(n); + for (uint16 i = 0; i < n; i++) + { + endpoints[i] = unpacked_endpoints[i]; + indices[i] = i; + } + optimize_color_params::unpacked_endpoint selected_endpoint = { color_quad_u8(0), color_quad_u8(0) }; + for (uint16 left = n; left;) + { + uint16 selected_index = 0; + uint min_error = cUINT32_MAX; + for (uint16 i = 0; i < left; i++) + { + optimize_color_params::unpacked_endpoint& endpoint = endpoints[i]; + uint error = color::elucidian_distance(endpoint.low, selected_endpoint.low, false) + color::elucidian_distance(endpoint.high, selected_endpoint.high, false); + if (error < min_error) + { + min_error = error; + selected_index = i; + } + } + selected_endpoint = endpoints[selected_index]; + remapping[indices[selected_index]] = n - left; + left--; + endpoints[selected_index] = endpoints[left]; + indices[selected_index] = indices[left]; + } } - selected = remaining[selected_index]; - frequency = hist + selected * n; - total_frequency_normalizer = total_frequency[selected]; - uint frequency_front = 0, frequency_back = 0; - for (int front = 0, back = chosen.size() - 1, scale = back; scale > 0; front++, back--, scale -= 2) { - frequency_front += scale * frequency[chosen[front]]; - frequency_back += scale * frequency[chosen[back]]; + + static void remap_color_endpoints(uint16* remapping, const optimize_color_params::unpacked_endpoint* unpacked_endpoints, const uint* hist, uint16 n, uint16 selected, float weight) + { + struct Node + { + uint index, frequency, front_similarity, back_similarity; + optimize_color_params::unpacked_endpoint e; + Node() + { + utils::zero_object(*this); + } + }; + crnlib::vector remaining(n); + for (uint16 i = 0; i < n; i++) + { + remaining[i].index = i; + remaining[i].e = unpacked_endpoints[i]; + } + crnlib::vector chosen(n << 1); + uint remaining_count = n, chosen_front = n, chosen_back = chosen_front; + chosen[chosen_front] = selected; + optimize_color_params::unpacked_endpoint front_e = remaining[selected].e, back_e = front_e; + bool front_updated = true, back_updated = true; + remaining[selected] = remaining[--remaining_count]; + const uint* frequency = hist + selected * n; + + for (uint similarity_base = (uint)(4000 * (1.0f + weight)), frequency_normalizer = 0; remaining_count;) + { + uint64 best_value = 0; + uint best_index = 0; + for (uint i = 0; i < remaining_count; i++) + { + Node& node = remaining[i]; + node.frequency += frequency[node.index]; + if (front_updated) + { + node.front_similarity = similarity_base - math::minimum(4000, color::elucidian_distance(node.e.low, front_e.low, false) + color::elucidian_distance(node.e.high, front_e.high, false)); + } + if (back_updated) + { + node.back_similarity = similarity_base - math::minimum(4000, color::elucidian_distance(node.e.low, back_e.low, false) + color::elucidian_distance(node.e.high, back_e.high, false)); + } + uint64 value = math::maximum(node.front_similarity, node.back_similarity) * (node.frequency + frequency_normalizer) + 1; + if (value > best_value || (value == best_value && node.index < selected)) + { + best_value = value; + best_index = i; + selected = node.index; + } + } + frequency = hist + selected * n; + uint frequency_front = 0, frequency_back = 0; + for (int front = chosen_front, back = chosen_back, scale = back - front; scale > 0; front++, back--, scale -= 2) + { + frequency_front += scale * frequency[chosen[front]]; + frequency_back += scale * frequency[chosen[back]]; + } + front_updated = back_updated = false; + Node& best_node = remaining[best_index]; + frequency_normalizer = best_node.frequency << 3; + if ((uint64)best_node.front_similarity * frequency_front > (uint64)best_node.back_similarity * frequency_back) + { + chosen[--chosen_front] = selected; + front_e = best_node.e; + front_updated = true; + } + else + { + chosen[++chosen_back] = selected; + back_e = best_node.e; + back_updated = true; + } + best_node = remaining[--remaining_count]; + } + + for (uint16 i = chosen_front; i <= chosen_back; i++) + { + remapping[chosen[i]] = i - chosen_front; + } } - if (selected_similarity_front * frequency_front > selected_similarity_back * frequency_back) { - chosen.push_front(selected); - } else { - chosen.push_back(selected); + + void crn_comp::optimize_color_endpoints_task(uint64 data, void* pData_ptr) + { + optimize_color_params* pParams = reinterpret_cast(pData_ptr); + crnlib::vector& remapping = pParams->pResult->endpoint_remapping; + uint16 n = pParams->n; + remapping.resize(n); + + if (data) + { + remap_color_endpoints(remapping.get_ptr(), pParams->unpacked_endpoints, pParams->hist, n, pParams->selected, pParams->weight); + } + else + { + sort_color_endpoints(remapping, pParams->unpacked_endpoints, n); + optimize_color_selectors(); + } + + m_has_etc_color_blocks ? pack_color_endpoints_etc(pParams->pResult->packed_endpoints, remapping) : pack_color_endpoints(pParams->pResult->packed_endpoints, remapping); + uint total_bits = pParams->pResult->packed_endpoints.size() << 3; + + crnlib::vector hist(n); + for (uint level = 0; level < m_levels.size(); level++) + { + for (uint endpoint_index = 0, b = m_levels[level].first_block, bEnd = b + m_levels[level].num_blocks; b < bEnd; b++) + { + uint index = remapping[m_endpoint_indices[b].component[cColor]]; + if (m_has_subblocks && b & 1 ? m_endpoint_indices[b].reference : !m_endpoint_indices[b].reference) + { + int sym = index - endpoint_index; + hist[sym < 0 ? sym + n : sym]++; + } + endpoint_index = index; + } + } + + static_huffman_data_model dm; + dm.init(true, n, hist.get_ptr(), 16); + const uint8* code_sizes = dm.get_code_sizes(); + for (uint16 s = 0; s < n; s++) + { + total_bits += hist[s] * code_sizes[s]; + } + + symbol_codec codec; + codec.start_encoding(64 * 1024); + codec.encode_enable_simulation(true); + codec.encode_transmit_static_huffman_data_model(dm, false); + codec.stop_encoding(false); + total_bits += codec.encode_get_total_bits_written(); + + pParams->pResult->total_bits = total_bits; + + crnlib_delete(pParams); } - remaining.erase(remaining.begin() + selected_index); - for (uint16 i = 0; i < remaining.size(); i++) - total_frequency[remaining[i]] += frequency[remaining[i]]; - } - for (uint16 i = 0; i < n; i++) - remapping[chosen[i]] = i; -} - -void crn_comp::optimize_alpha_endpoints_task(uint64 data, void* pData_ptr) { - optimize_alpha_params* pParams = reinterpret_cast(pData_ptr); - crnlib::vector& remapping = pParams->pResult->endpoint_remapping; - uint16 n = pParams->n; - remapping.resize(n); - - if (data) { - remap_alpha_endpoints(remapping.get_ptr(), pParams->unpacked_endpoints, pParams->hist, n, pParams->selected, pParams->weight); - } else { - sort_alpha_endpoints(remapping, pParams->unpacked_endpoints, n); - optimize_alpha_selectors(); - } - - pack_alpha_endpoints(pParams->pResult->packed_endpoints, remapping); - uint total_bits = pParams->pResult->packed_endpoints.size() << 3; - - crnlib::vector hist(n); - bool hasAlpha0 = m_has_comp[cAlpha0], hasAlpha1 = m_has_comp[cAlpha1]; - for (uint level = 0; level < m_levels.size(); level++) { - for (uint alpha0_index = 0, alpha1_index = 0, b = m_levels[level].first_block, bEnd = b + m_levels[level].num_blocks; b < bEnd; b++) { - if (hasAlpha0) { - uint index = remapping[m_endpoint_indices[b].component[cAlpha0]]; - if (!m_endpoint_indices[b].reference) { - int sym = index - alpha0_index; - hist[sym < 0 ? sym + n : sym]++; - } - alpha0_index = index; - } - if (hasAlpha1) { - uint index = remapping[m_endpoint_indices[b].component[cAlpha1]]; - if (!m_endpoint_indices[b].reference) { - int sym = index - alpha1_index; - hist[sym < 0 ? sym + n : sym]++; - } - alpha1_index = index; - } + + void crn_comp::optimize_color_selectors() + { + crnlib::vector& remapping = m_selector_remaping[cColor]; + uint16 n = m_color_selectors.size(); + remapping.resize(n); + + uint8 d[] = { 0, 5, 14, 10 }; + + uint8 D4[0x100]; + for (uint16 i = 0; i < 0x100; i++) + { + D4[i] = d[(i ^ i >> 4) & 3] + d[(i >> 2 ^ i >> 6) & 3]; + } + uint8 D8[0x10000]; + for (uint32 i = 0; i < 0x10000; i++) + { + D8[i] = D4[(i >> 8 & 0xF0) | (i >> 4 & 0xF)] + D4[(i >> 4 & 0xF0) | (i & 0xF)]; + } + + crnlib::vector selectors(n); + crnlib::vector indices(n); + for (uint16 i = 0; i < n; i++) + { + selectors[i] = m_color_selectors[i]; + indices[i] = i; + } + uint32 selected_selector = 0; + for (uint16 left = n; left;) + { + uint16 selected_index = 0; + uint min_error = cUINT32_MAX; + for (uint16 i = 0; i < left; i++) + { + uint32 selector = selectors[i]; + uint8 d0 = D8[(selector >> 16 & 0xFF00) | (selected_selector >> 24 & 0xFF)]; + uint8 d1 = D8[(selector >> 8 & 0xFF00) | (selected_selector >> 16 & 0xFF)]; + uint8 d2 = D8[(selector & 0xFF00) | (selected_selector >> 8 & 0xFF)]; + uint8 d3 = D8[(selector << 8 & 0xFF00) | (selected_selector & 0xFF)]; + uint error = d0 + d1 + d2 + d3; + if (error < min_error) + { + min_error = error; + selected_index = i; + } + } + selected_selector = selectors[selected_index]; + remapping[indices[selected_index]] = n - left; + left--; + selectors[selected_index] = selectors[left]; + indices[selected_index] = indices[left]; + } + + pack_color_selectors(m_packed_color_selectors, remapping); } - } - - static_huffman_data_model dm; - dm.init(true, n, hist.get_ptr(), 16); - const uint8* code_sizes = dm.get_code_sizes(); - for (uint16 s = 0; s < n; s++) - total_bits += hist[s] * code_sizes[s]; - - symbol_codec codec; - codec.start_encoding(64 * 1024); - codec.encode_enable_simulation(true); - codec.encode_transmit_static_huffman_data_model(dm, false); - codec.stop_encoding(false); - total_bits += codec.encode_get_total_bits_written(); - - pParams->pResult->total_bits = total_bits; - - crnlib_delete(pParams); -} - -void crn_comp::optimize_alpha_selectors() { - crnlib::vector& remapping = m_selector_remaping[cAlpha0]; - uint16 n = m_alpha_selectors.size(); - remapping.resize(n); - - uint8 d[] = {0, 2, 3, 3, 5, 5, 4, 4}; - - uint8 D6[0x1000]; - for (uint16 i = 0; i < 0x1000; i++) - D6[i] = d[(i ^ i >> 6) & 7] + d[(i >> 3 ^ i >> 9) & 7]; - - crnlib::vector selectors(n); - crnlib::vector indices(n); - for (uint16 i = 0; i < n; i++) { - selectors[i] = m_alpha_selectors[i]; - indices[i] = i; - } - uint64 selected_selector = 0; - for (uint16 left = n; left;) { - uint16 selected_index = 0; - uint min_error = cUINT32_MAX; - for (uint16 i = 0; i < left; i++) { - uint error = 0; - for (uint64 selector = selectors[i] << 6, delta_selector = selected_selector, j = 0; j < 8; j++, selector >>= 6, delta_selector >>= 6) - error += D6[(selector & 0xFC0) | (delta_selector & 0x3F)]; - if (error < min_error) { - min_error = error; - selected_index = i; - } + + void crn_comp::optimize_color() + { + uint16 n = m_color_endpoints.size(); + crnlib::vector hist(n * n); + crnlib::vector sum(n); + for (uint i, i_prev = 0, b = 0; b < m_endpoint_indices.size(); b++, i_prev = i) + { + i = m_endpoint_indices[b].color; + if ((m_has_subblocks && b & 1 ? m_endpoint_indices[b].reference : !m_endpoint_indices[b].reference) && i != i_prev) + { + hist[i * n + i_prev]++; + hist[i_prev * n + i]++; + sum[i]++; + sum[i_prev]++; + } + } + uint16 selected = 0; + uint best_sum = 0; + for (uint16 i = 0; i < n; i++) + { + if (best_sum < sum[i]) + { + best_sum = sum[i]; + selected = i; + } + } + crnlib::vector unpacked_endpoints(n); + for (uint16 i = 0; i < n; i++) + { + unpacked_endpoints[i].low.m_u32 = m_has_etc_color_blocks ? m_color_endpoints[i] & 0xFFFFFF : dxt1_block::unpack_color(m_color_endpoints[i] & 0xFFFF, true).m_u32; + unpacked_endpoints[i].high.m_u32 = m_has_etc_color_blocks ? m_color_endpoints[i] >> 24 : dxt1_block::unpack_color(m_color_endpoints[i] >> 16, true).m_u32; + } + + optimize_color_params::result remapping_trial[4]; + float weights[4] = { 0, 0, 1.0f / 6.0f, 0.5f }; + for (uint i = 0; i < 4; i++) + { + optimize_color_params* pParams = crnlib_new(); + pParams->unpacked_endpoints = unpacked_endpoints.get_ptr(); + pParams->hist = hist.get_ptr(); + pParams->n = n; + pParams->selected = selected; + pParams->weight = weights[i]; + pParams->pResult = remapping_trial + i; + m_task_pool.queue_object_task(this, &crn_comp::optimize_color_endpoints_task, i, pParams); + } + m_task_pool.join(); + + for (uint best_bits = cUINT32_MAX, i = 0; i < 4; i++) + { + if (remapping_trial[i].total_bits < best_bits) + { + m_packed_color_endpoints.swap(remapping_trial[i].packed_endpoints); + m_endpoint_remaping[cColor].swap(remapping_trial[i].endpoint_remapping); + best_bits = remapping_trial[i].total_bits; + } + } } - selected_selector = selectors[selected_index]; - remapping[indices[selected_index]] = n - left; - left--; - selectors[selected_index] = selectors[left]; - indices[selected_index] = indices[left]; - } - - pack_alpha_selectors(m_packed_alpha_selectors, remapping); -} - -void crn_comp::optimize_alpha() { - uint16 n = m_alpha_endpoints.size(); - crnlib::vector hist(n * n); - crnlib::vector sum(n); - bool hasAlpha0 = m_has_comp[cAlpha0], hasAlpha1 = m_has_comp[cAlpha1]; - for (uint i0, i1, i0_prev = 0, i1_prev = 0, b = 0; b < m_endpoint_indices.size(); b++, i0_prev = i0, i1_prev = i1) { - i0 = m_endpoint_indices[b].alpha0; - i1 = m_endpoint_indices[b].alpha1; - if (!m_endpoint_indices[b].reference) { - if (hasAlpha0 && i0 != i0_prev) { - hist[i0 * n + i0_prev]++; - hist[i0_prev * n + i0]++; - sum[i0]++; - sum[i0_prev]++; - } - if (hasAlpha1 && i1 != i1_prev) { - hist[i1 * n + i1_prev]++; - hist[i1_prev * n + i1]++; - sum[i1]++; - sum[i1_prev]++; - } + + struct optimize_alpha_params + { + struct unpacked_endpoint + { + uint8 low, high; + }; + const unpacked_endpoint* unpacked_endpoints; + const uint* hist; + uint16 n; + uint16 selected; + float weight; + struct result + { + crnlib::vector endpoint_remapping; + crnlib::vector packed_endpoints; + uint total_bits; + } * pResult; + }; + + static void sort_alpha_endpoints(crnlib::vector& remapping, const optimize_alpha_params::unpacked_endpoint* unpacked_endpoints, uint16 n) + { + remapping.resize(n); + crnlib::vector endpoints(n); + crnlib::vector indices(n); + for (uint16 i = 0; i < n; i++) + { + endpoints[i] = unpacked_endpoints[i]; + indices[i] = i; + } + optimize_alpha_params::unpacked_endpoint selected_endpoint = { 0, 0 }; + for (uint16 left = n; left;) + { + uint16 selected_index = 0; + uint min_error = cUINT32_MAX; + for (uint16 i = 0; i < left; i++) + { + optimize_alpha_params::unpacked_endpoint& endpoint = endpoints[i]; + uint error = math::square(endpoint.low - selected_endpoint.low) + math::square(endpoint.high - selected_endpoint.high); + if (error < min_error) + { + min_error = error; + selected_index = i; + } + } + selected_endpoint = endpoints[selected_index]; + remapping[indices[selected_index]] = n - left; + left--; + endpoints[selected_index] = endpoints[left]; + indices[selected_index] = indices[left]; + } } - } - uint16 selected = 0; - uint best_sum = 0; - for (uint16 i = 0; i < n; i++) { - if (best_sum < sum[i]) { - best_sum = sum[i]; - selected = i; + + static void remap_alpha_endpoints(uint16* remapping, const optimize_alpha_params::unpacked_endpoint* unpacked_endpoints, const uint* hist, uint16 n, uint16 selected, float weight) + { + const uint* frequency = hist + selected * n; + crnlib::vector chosen, remaining; + crnlib::vector total_frequency(n); + chosen.push_back(selected); + for (uint16 i = 0; i < n; i++) + { + if (i != selected) + { + remaining.push_back(i); + total_frequency[i] = frequency[i]; + } + } + for (uint similarity_base = (uint)(1000 * (1.0f + weight)), total_frequency_normalizer = 0; remaining.size();) + { + const optimize_alpha_params::unpacked_endpoint& e_front = unpacked_endpoints[chosen.front()]; + const optimize_alpha_params::unpacked_endpoint& e_back = unpacked_endpoints[chosen.back()]; + uint16 selected_index = 0; + uint64 best_value = 0, selected_similarity_front = 0, selected_similarity_back = 0; + for (uint16 i = 0; i < remaining.size(); i++) + { + uint remaining_index = remaining[i]; + const optimize_alpha_params::unpacked_endpoint& e_remaining = unpacked_endpoints[remaining_index]; + uint error_front = math::square(e_remaining.low - e_front.low) + math::square(e_remaining.high - e_front.high); + uint error_back = math::square(e_remaining.low - e_back.low) + math::square(e_remaining.high - e_back.high); + uint64 similarity_front = similarity_base - math::minimum(error_front, 1000); + uint64 similarity_back = similarity_base - math::minimum(error_back, 1000); + uint64 value = math::maximum(similarity_front, similarity_back) * (total_frequency[remaining_index] + total_frequency_normalizer) + 1; + if (value > best_value) + { + best_value = value; + selected_index = i; + selected_similarity_front = similarity_front; + selected_similarity_back = similarity_back; + } + } + selected = remaining[selected_index]; + frequency = hist + selected * n; + total_frequency_normalizer = total_frequency[selected]; + uint frequency_front = 0, frequency_back = 0; + for (int front = 0, back = chosen.size() - 1, scale = back; scale > 0; front++, back--, scale -= 2) + { + frequency_front += scale * frequency[chosen[front]]; + frequency_back += scale * frequency[chosen[back]]; + } + if (selected_similarity_front * frequency_front > selected_similarity_back * frequency_back) + { + chosen.push_front(selected); + } + else + { + chosen.push_back(selected); + } + remaining.erase(remaining.begin() + selected_index); + for (uint16 i = 0; i < remaining.size(); i++) + { + total_frequency[remaining[i]] += frequency[remaining[i]]; + } + } + for (uint16 i = 0; i < n; i++) + { + remapping[chosen[i]] = i; + } } - } - crnlib::vector unpacked_endpoints(n); - for (uint16 i = 0; i < n; i++) { - unpacked_endpoints[i].low = dxt5_block::unpack_endpoint(m_alpha_endpoints[i], 0); - unpacked_endpoints[i].high = dxt5_block::unpack_endpoint(m_alpha_endpoints[i], 1); - } - - optimize_alpha_params::result remapping_trial[4]; - float weights[4] = {0, 0, 1.0f / 6.0f, 0.5f}; - for (uint i = 0; i < 4; i++) { - optimize_alpha_params* pParams = crnlib_new(); - pParams->unpacked_endpoints = unpacked_endpoints.get_ptr(); - pParams->hist = hist.get_ptr(); - pParams->n = n; - pParams->selected = selected; - pParams->weight = weights[i]; - pParams->pResult = remapping_trial + i; - m_task_pool.queue_object_task(this, &crn_comp::optimize_alpha_endpoints_task, i, pParams); - } - m_task_pool.join(); - - for (uint best_bits = cUINT32_MAX, i = 0; i < 4; i++) { - if (remapping_trial[i].total_bits < best_bits) { - m_packed_alpha_endpoints.swap(remapping_trial[i].packed_endpoints); - m_endpoint_remaping[cAlpha0].swap(remapping_trial[i].endpoint_remapping); - best_bits = remapping_trial[i].total_bits; + + void crn_comp::optimize_alpha_endpoints_task(uint64 data, void* pData_ptr) + { + optimize_alpha_params* pParams = reinterpret_cast(pData_ptr); + crnlib::vector& remapping = pParams->pResult->endpoint_remapping; + uint16 n = pParams->n; + remapping.resize(n); + + if (data) + { + remap_alpha_endpoints(remapping.get_ptr(), pParams->unpacked_endpoints, pParams->hist, n, pParams->selected, pParams->weight); + } + else + { + sort_alpha_endpoints(remapping, pParams->unpacked_endpoints, n); + optimize_alpha_selectors(); + } + + pack_alpha_endpoints(pParams->pResult->packed_endpoints, remapping); + uint total_bits = pParams->pResult->packed_endpoints.size() << 3; + + crnlib::vector hist(n); + bool hasAlpha0 = m_has_comp[cAlpha0], hasAlpha1 = m_has_comp[cAlpha1]; + for (uint level = 0; level < m_levels.size(); level++) + { + for (uint alpha0_index = 0, alpha1_index = 0, b = m_levels[level].first_block, bEnd = b + m_levels[level].num_blocks; b < bEnd; b++) + { + if (hasAlpha0) + { + uint index = remapping[m_endpoint_indices[b].component[cAlpha0]]; + if (!m_endpoint_indices[b].reference) + { + int sym = index - alpha0_index; + hist[sym < 0 ? sym + n : sym]++; + } + alpha0_index = index; + } + if (hasAlpha1) + { + uint index = remapping[m_endpoint_indices[b].component[cAlpha1]]; + if (!m_endpoint_indices[b].reference) + { + int sym = index - alpha1_index; + hist[sym < 0 ? sym + n : sym]++; + } + alpha1_index = index; + } + } + } + + static_huffman_data_model dm; + dm.init(true, n, hist.get_ptr(), 16); + const uint8* code_sizes = dm.get_code_sizes(); + for (uint16 s = 0; s < n; s++) + { + total_bits += hist[s] * code_sizes[s]; + } + + symbol_codec codec; + codec.start_encoding(64 * 1024); + codec.encode_enable_simulation(true); + codec.encode_transmit_static_huffman_data_model(dm, false); + codec.stop_encoding(false); + total_bits += codec.encode_get_total_bits_written(); + + pParams->pResult->total_bits = total_bits; + + crnlib_delete(pParams); } - } -} -bool crn_comp::pack_data_models() { - symbol_codec codec; - codec.start_encoding(1024 * 1024); + void crn_comp::optimize_alpha_selectors() + { + crnlib::vector& remapping = m_selector_remaping[cAlpha0]; + uint16 n = m_alpha_selectors.size(); + remapping.resize(n); + + uint8 d[] = { 0, 2, 3, 3, 5, 5, 4, 4 }; + + uint8 D6[0x1000]; + for (uint16 i = 0; i < 0x1000; i++) + { + D6[i] = d[(i ^ i >> 6) & 7] + d[(i >> 3 ^ i >> 9) & 7]; + } - if (!codec.encode_transmit_static_huffman_data_model(m_reference_dm, false)) - return false; + crnlib::vector selectors(n); + crnlib::vector indices(n); + for (uint16 i = 0; i < n; i++) + { + selectors[i] = m_alpha_selectors[i]; + indices[i] = i; + } + uint64 selected_selector = 0; + for (uint16 left = n; left;) + { + uint16 selected_index = 0; + uint min_error = cUINT32_MAX; + for (uint16 i = 0; i < left; i++) + { + uint error = 0; + for (uint64 selector = selectors[i] << 6, delta_selector = selected_selector, j = 0; j < 8; j++, selector >>= 6, delta_selector >>= 6) + { + error += D6[(selector & 0xFC0) | (delta_selector & 0x3F)]; + } + if (error < min_error) + { + min_error = error; + selected_index = i; + } + } + selected_selector = selectors[selected_index]; + remapping[indices[selected_index]] = n - left; + left--; + selectors[selected_index] = selectors[left]; + indices[selected_index] = indices[left]; + } - for (uint i = 0; i < 2; i++) { - if (m_endpoint_index_dm[i].get_total_syms()) { - if (!codec.encode_transmit_static_huffman_data_model(m_endpoint_index_dm[i], false)) - return false; + pack_alpha_selectors(m_packed_alpha_selectors, remapping); } - if (m_selector_index_dm[i].get_total_syms()) { - if (!codec.encode_transmit_static_huffman_data_model(m_selector_index_dm[i], false)) - return false; + void crn_comp::optimize_alpha() + { + uint16 n = m_alpha_endpoints.size(); + crnlib::vector hist(n * n); + crnlib::vector sum(n); + bool hasAlpha0 = m_has_comp[cAlpha0], hasAlpha1 = m_has_comp[cAlpha1]; + for (uint i0, i1, i0_prev = 0, i1_prev = 0, b = 0; b < m_endpoint_indices.size(); b++, i0_prev = i0, i1_prev = i1) + { + i0 = m_endpoint_indices[b].alpha0; + i1 = m_endpoint_indices[b].alpha1; + if (!m_endpoint_indices[b].reference) + { + if (hasAlpha0 && i0 != i0_prev) + { + hist[i0 * n + i0_prev]++; + hist[i0_prev * n + i0]++; + sum[i0]++; + sum[i0_prev]++; + } + if (hasAlpha1 && i1 != i1_prev) + { + hist[i1 * n + i1_prev]++; + hist[i1_prev * n + i1]++; + sum[i1]++; + sum[i1_prev]++; + } + } + } + uint16 selected = 0; + uint best_sum = 0; + for (uint16 i = 0; i < n; i++) + { + if (best_sum < sum[i]) + { + best_sum = sum[i]; + selected = i; + } + } + crnlib::vector unpacked_endpoints(n); + for (uint16 i = 0; i < n; i++) + { + unpacked_endpoints[i].low = dxt5_block::unpack_endpoint(m_alpha_endpoints[i], 0); + unpacked_endpoints[i].high = dxt5_block::unpack_endpoint(m_alpha_endpoints[i], 1); + } + + optimize_alpha_params::result remapping_trial[4]; + float weights[4] = { 0, 0, 1.0f / 6.0f, 0.5f }; + for (uint i = 0; i < 4; i++) + { + optimize_alpha_params* pParams = crnlib_new(); + pParams->unpacked_endpoints = unpacked_endpoints.get_ptr(); + pParams->hist = hist.get_ptr(); + pParams->n = n; + pParams->selected = selected; + pParams->weight = weights[i]; + pParams->pResult = remapping_trial + i; + m_task_pool.queue_object_task(this, &crn_comp::optimize_alpha_endpoints_task, i, pParams); + } + m_task_pool.join(); + + for (uint best_bits = cUINT32_MAX, i = 0; i < 4; i++) + { + if (remapping_trial[i].total_bits < best_bits) + { + m_packed_alpha_endpoints.swap(remapping_trial[i].packed_endpoints); + m_endpoint_remaping[cAlpha0].swap(remapping_trial[i].endpoint_remapping); + best_bits = remapping_trial[i].total_bits; + } + } } - } - - codec.stop_encoding(false); - - m_packed_data_models.swap(codec.get_encoding_buf()); - - return true; -} -void crn_comp::append_vec(crnlib::vector& a, const void* p, uint size) { - if (size) { - uint ofs = a.size(); - a.resize(ofs + size); + bool crn_comp::pack_data_models() + { + symbol_codec codec; + codec.start_encoding(1024 * 1024); - memcpy(&a[ofs], p, size); - } -} + if (!codec.encode_transmit_static_huffman_data_model(m_reference_dm, false)) + { + return false; + } -void crn_comp::append_vec(crnlib::vector& a, const crnlib::vector& b) { - if (!b.empty()) { - uint ofs = a.size(); - a.resize(ofs + b.size()); + for (uint i = 0; i < 2; i++) + { + if (m_endpoint_index_dm[i].get_total_syms()) + { + if (!codec.encode_transmit_static_huffman_data_model(m_endpoint_index_dm[i], false)) + { + return false; + } + } + + if (m_selector_index_dm[i].get_total_syms()) + { + if (!codec.encode_transmit_static_huffman_data_model(m_selector_index_dm[i], false)) + { + return false; + } + } + } - memcpy(&a[ofs], &b[0], b.size()); - } -} + codec.stop_encoding(false); -bool crn_comp::create_comp_data() { - utils::zero_object(m_crn_header); + m_packed_data_models.swap(codec.get_encoding_buf()); - m_crn_header.m_width = static_cast(m_pParams->m_width); - m_crn_header.m_height = static_cast(m_pParams->m_height); - m_crn_header.m_levels = static_cast(m_pParams->m_levels); - m_crn_header.m_faces = static_cast(m_pParams->m_faces); - m_crn_header.m_format = static_cast(m_pParams->m_format); - m_crn_header.m_userdata0 = m_pParams->m_userdata0; - m_crn_header.m_userdata1 = m_pParams->m_userdata1; + return true; + } - m_comp_data.clear(); - m_comp_data.reserve(2 * 1024 * 1024); - append_vec(m_comp_data, &m_crn_header, sizeof(m_crn_header)); - // tack on the rest of the variable size m_level_ofs array - m_comp_data.resize(m_comp_data.size() + sizeof(m_crn_header.m_level_ofs[0]) * (m_pParams->m_levels - 1)); + void crn_comp::append_vec(crnlib::vector& a, const void* p, uint size) + { + if (size) + { + uint ofs = a.size(); + a.resize(ofs + size); - if (m_packed_color_endpoints.size()) { - m_crn_header.m_color_endpoints.m_num = static_cast(m_color_endpoints.size()); - m_crn_header.m_color_endpoints.m_size = m_packed_color_endpoints.size(); - m_crn_header.m_color_endpoints.m_ofs = m_comp_data.size(); - append_vec(m_comp_data, m_packed_color_endpoints); - } + memcpy(&a[ofs], p, size); + } + } - if (m_packed_color_selectors.size()) { - m_crn_header.m_color_selectors.m_num = static_cast(m_color_selectors.size()); - m_crn_header.m_color_selectors.m_size = m_packed_color_selectors.size(); - m_crn_header.m_color_selectors.m_ofs = m_comp_data.size(); - append_vec(m_comp_data, m_packed_color_selectors); - } + void crn_comp::append_vec(crnlib::vector& a, const crnlib::vector& b) + { + if (!b.empty()) + { + uint ofs = a.size(); + a.resize(ofs + b.size()); - if (m_packed_alpha_endpoints.size()) { - m_crn_header.m_alpha_endpoints.m_num = static_cast(m_alpha_endpoints.size()); - m_crn_header.m_alpha_endpoints.m_size = m_packed_alpha_endpoints.size(); - m_crn_header.m_alpha_endpoints.m_ofs = m_comp_data.size(); - append_vec(m_comp_data, m_packed_alpha_endpoints); - } + memcpy(&a[ofs], &b[0], b.size()); + } + } - if (m_packed_alpha_selectors.size()) { - m_crn_header.m_alpha_selectors.m_num = static_cast(m_alpha_selectors.size()); - m_crn_header.m_alpha_selectors.m_size = m_packed_alpha_selectors.size(); - m_crn_header.m_alpha_selectors.m_ofs = m_comp_data.size(); - append_vec(m_comp_data, m_packed_alpha_selectors); - } + bool crn_comp::create_comp_data() + { + utils::zero_object(m_crn_header); + + m_crn_header.m_width = static_cast(m_pParams->m_width); + m_crn_header.m_height = static_cast(m_pParams->m_height); + m_crn_header.m_levels = static_cast(m_pParams->m_levels); + m_crn_header.m_faces = static_cast(m_pParams->m_faces); + m_crn_header.m_format = static_cast(m_pParams->m_format); + m_crn_header.m_userdata0 = m_pParams->m_userdata0; + m_crn_header.m_userdata1 = m_pParams->m_userdata1; + + m_comp_data.clear(); + m_comp_data.reserve(2 * 1024 * 1024); + append_vec(m_comp_data, &m_crn_header, sizeof(m_crn_header)); + // tack on the rest of the variable size m_level_ofs array + m_comp_data.resize(m_comp_data.size() + sizeof(m_crn_header.m_level_ofs[0]) * (m_pParams->m_levels - 1)); + + if (m_packed_color_endpoints.size()) + { + m_crn_header.m_color_endpoints.m_num = static_cast(m_color_endpoints.size()); + m_crn_header.m_color_endpoints.m_size = m_packed_color_endpoints.size(); + m_crn_header.m_color_endpoints.m_ofs = m_comp_data.size(); + append_vec(m_comp_data, m_packed_color_endpoints); + } - m_crn_header.m_tables_ofs = m_comp_data.size(); - m_crn_header.m_tables_size = m_packed_data_models.size(); - append_vec(m_comp_data, m_packed_data_models); + if (m_packed_color_selectors.size()) + { + m_crn_header.m_color_selectors.m_num = static_cast(m_color_selectors.size()); + m_crn_header.m_color_selectors.m_size = m_packed_color_selectors.size(); + m_crn_header.m_color_selectors.m_ofs = m_comp_data.size(); + append_vec(m_comp_data, m_packed_color_selectors); + } - uint level_ofs[cCRNMaxLevels]; - for (uint i = 0; i < m_levels.size(); i++) { - level_ofs[i] = m_comp_data.size(); - append_vec(m_comp_data, m_packed_blocks[i]); - } + if (m_packed_alpha_endpoints.size()) + { + m_crn_header.m_alpha_endpoints.m_num = static_cast(m_alpha_endpoints.size()); + m_crn_header.m_alpha_endpoints.m_size = m_packed_alpha_endpoints.size(); + m_crn_header.m_alpha_endpoints.m_ofs = m_comp_data.size(); + append_vec(m_comp_data, m_packed_alpha_endpoints); + } - crnd::crn_header& dst_header = *(crnd::crn_header*)&m_comp_data[0]; - // don't change the m_comp_data vector - or dst_header will be invalidated! + if (m_packed_alpha_selectors.size()) + { + m_crn_header.m_alpha_selectors.m_num = static_cast(m_alpha_selectors.size()); + m_crn_header.m_alpha_selectors.m_size = m_packed_alpha_selectors.size(); + m_crn_header.m_alpha_selectors.m_ofs = m_comp_data.size(); + append_vec(m_comp_data, m_packed_alpha_selectors); + } - memcpy(&dst_header, &m_crn_header, sizeof(dst_header)); + m_crn_header.m_tables_ofs = m_comp_data.size(); + m_crn_header.m_tables_size = m_packed_data_models.size(); + append_vec(m_comp_data, m_packed_data_models); - for (uint i = 0; i < m_levels.size(); i++) - dst_header.m_level_ofs[i] = level_ofs[i]; + uint level_ofs[cCRNMaxLevels]; + for (uint i = 0; i < m_levels.size(); i++) + { + level_ofs[i] = m_comp_data.size(); + append_vec(m_comp_data, m_packed_blocks[i]); + } - const uint actual_header_size = sizeof(crnd::crn_header) + sizeof(dst_header.m_level_ofs[0]) * (m_levels.size() - 1); + crnd::crn_header& dst_header = *(crnd::crn_header*)&m_comp_data[0]; + // don't change the m_comp_data vector - or dst_header will be invalidated! - dst_header.m_sig = crnd::crn_header::cCRNSigValue; + memcpy(&dst_header, &m_crn_header, sizeof(dst_header)); - dst_header.m_data_size = m_comp_data.size(); - dst_header.m_data_crc16 = crc16(&m_comp_data[actual_header_size], m_comp_data.size() - actual_header_size); + for (uint i = 0; i < m_levels.size(); i++) + { + dst_header.m_level_ofs[i] = level_ofs[i]; + } - dst_header.m_header_size = actual_header_size; - dst_header.m_header_crc16 = crc16(&dst_header.m_data_size, actual_header_size - (uint)((uint8*)&dst_header.m_data_size - (uint8*)&dst_header)); + const uint actual_header_size = sizeof(crnd::crn_header) + sizeof(dst_header.m_level_ofs[0]) * (m_levels.size() - 1); - return true; -} - -bool crn_comp::update_progress(uint phase_index, uint subphase_index, uint subphase_total) { - if (!m_pParams->m_pProgress_func) - return true; + dst_header.m_sig = crnd::crn_header::cCRNSigValue; + + dst_header.m_data_size = m_comp_data.size(); + dst_header.m_data_crc16 = crc16(&m_comp_data[actual_header_size], m_comp_data.size() - actual_header_size); + + dst_header.m_header_size = actual_header_size; + dst_header.m_header_crc16 = crc16(&dst_header.m_data_size, actual_header_size - (uint)((uint8*)&dst_header.m_data_size - (uint8*)&dst_header)); + + return true; + } + + bool crn_comp::update_progress(uint phase_index, uint subphase_index, uint subphase_total) + { + if (!m_pParams->m_pProgress_func) + { + return true; + } #if CRNLIB_ENABLE_DEBUG_MESSAGES - if (m_pParams->m_flags & cCRNCompFlagDebugging) - return true; + if (m_pParams->m_flags & cCRNCompFlagDebugging) + { + return true; + } #endif - return (*m_pParams->m_pProgress_func)(phase_index, cTotalCompressionPhases, subphase_index, subphase_total, m_pParams->m_pProgress_func_data) != 0; -} - -bool crn_comp::compress_internal() { - if (!alias_images()) - return false; - if (!quantize_images()) - return false; - - m_reference_hist.clear(); - for (uint i = 0; i < 2; i++) { - m_endpoint_remaping[i].clear(); - m_endpoint_index_hist[i].clear(); - m_endpoint_index_dm[i].clear(); - m_selector_remaping[i].clear(); - m_selector_index_hist[i].clear(); - m_selector_index_dm[i].clear(); - } - - if (m_has_comp[cColor]) - optimize_color(); - - if (m_has_comp[cAlpha0]) - optimize_alpha(); - - for (uint pass = 0; pass < 2; pass++) { - for (uint level = 0; level < m_levels.size(); level++) { - symbol_codec codec; - codec.start_encoding(2 * 1024 * 1024); - - if (!pack_blocks( - level, - !pass && !level, pass ? &codec : nullptr, - m_has_comp[cColor] ? &m_endpoint_remaping[cColor] : nullptr, m_has_comp[cColor] ? &m_selector_remaping[cColor] : nullptr, - m_has_comp[cAlpha0] ? &m_endpoint_remaping[cAlpha0] : nullptr, m_has_comp[cAlpha0] ? &m_selector_remaping[cAlpha0] : nullptr)) { - return false; - } - - codec.stop_encoding(false); - - if (pass) - m_packed_blocks[level].swap(codec.get_encoding_buf()); + return (*m_pParams->m_pProgress_func)(phase_index, cTotalCompressionPhases, subphase_index, subphase_total, m_pParams->m_pProgress_func_data) != 0; } - if (!pass) { - m_reference_dm.init(true, m_reference_hist, 16); + bool crn_comp::compress_internal() + { + if (!alias_images()) + { + return false; + } + if (!quantize_images()) + { + return false; + } - for (uint i = 0; i < 2; i++) { - if (m_endpoint_index_hist[i].size()) - m_endpoint_index_dm[i].init(true, m_endpoint_index_hist[i], 16); + m_reference_hist.clear(); + for (uint i = 0; i < 2; i++) + { + m_endpoint_remaping[i].clear(); + m_endpoint_index_hist[i].clear(); + m_endpoint_index_dm[i].clear(); + m_selector_remaping[i].clear(); + m_selector_index_hist[i].clear(); + m_selector_index_dm[i].clear(); + } - if (m_selector_index_hist[i].size()) - m_selector_index_dm[i].init(true, m_selector_index_hist[i], 16); - } - } - } + if (m_has_comp[cColor]) + { + optimize_color(); + } - if (!pack_data_models()) - return false; + if (m_has_comp[cAlpha0]) + { + optimize_alpha(); + } - if (!create_comp_data()) - return false; + for (uint pass = 0; pass < 2; pass++) + { + for (uint level = 0; level < m_levels.size(); level++) + { + symbol_codec codec; + codec.start_encoding(2 * 1024 * 1024); + + if (!pack_blocks( + level, + !pass && !level, pass ? &codec : nullptr, + m_has_comp[cColor] ? &m_endpoint_remaping[cColor] : nullptr, m_has_comp[cColor] ? &m_selector_remaping[cColor] : nullptr, + m_has_comp[cAlpha0] ? &m_endpoint_remaping[cAlpha0] : nullptr, m_has_comp[cAlpha0] ? &m_selector_remaping[cAlpha0] : nullptr)) + { + return false; + } + + codec.stop_encoding(false); + + if (pass) + { + m_packed_blocks[level].swap(codec.get_encoding_buf()); + } + } + + if (!pass) + { + m_reference_dm.init(true, m_reference_hist, 16); + + for (uint i = 0; i < 2; i++) + { + if (m_endpoint_index_hist[i].size()) + { + m_endpoint_index_dm[i].init(true, m_endpoint_index_hist[i], 16); + } + + if (m_selector_index_hist[i].size()) + { + m_selector_index_dm[i].init(true, m_selector_index_hist[i], 16); + } + } + } + } - if (!update_progress(24, 1, 1)) - return false; + if (!pack_data_models()) + { + return false; + } - if (m_pParams->m_flags & cCRNCompFlagDebugging) { - crnlib_print_mem_stats(); - } + if (!create_comp_data()) + { + return false; + } - return true; -} + if (!update_progress(24, 1, 1)) + { + return false; + } -bool crn_comp::compress_pass(const crn_comp_params& params, float* pEffective_bitrate) { - clear(); + if (m_pParams->m_flags & cCRNCompFlagDebugging) + { + crnlib_print_mem_stats(); + } - if (pEffective_bitrate) - *pEffective_bitrate = 0.0f; + return true; + } - m_pParams = ¶ms; - m_has_etc_color_blocks = params.m_format == cCRNFmtETC1 || params.m_format == cCRNFmtETC2 || params.m_format == cCRNFmtETC2A || params.m_format == cCRNFmtETC1S || params.m_format == cCRNFmtETC2AS; - m_has_subblocks = params.m_format == cCRNFmtETC1 || params.m_format == cCRNFmtETC2 || params.m_format == cCRNFmtETC2A; + bool crn_comp::compress_pass(const crn_comp_params& params, float* pEffective_bitrate) + { + clear(); - if ((math::minimum(m_pParams->m_width, m_pParams->m_height) < 1) || (math::maximum(m_pParams->m_width, m_pParams->m_height) > cCRNMaxLevelResolution)) - return false; + if (pEffective_bitrate) + { + *pEffective_bitrate = 0.0f; + } - if (!m_task_pool.init(params.m_num_helper_threads)) - return false; + m_pParams = ¶ms; + m_has_etc_color_blocks = params.m_format == cCRNFmtETC1 || params.m_format == cCRNFmtETC2 || params.m_format == cCRNFmtETC2A || params.m_format == cCRNFmtETC1S || params.m_format == cCRNFmtETC2AS; + m_has_subblocks = params.m_format == cCRNFmtETC1 || params.m_format == cCRNFmtETC2 || params.m_format == cCRNFmtETC2A; - bool status = compress_internal(); + if ((math::minimum(m_pParams->m_width, m_pParams->m_height) < 1) || (math::maximum(m_pParams->m_width, m_pParams->m_height) > cCRNMaxLevelResolution)) + { + return false; + } - m_task_pool.deinit(); + if (!m_task_pool.init(params.m_num_helper_threads)) + { + return false; + } + + bool status = compress_internal(); + + m_task_pool.deinit(); - if ((status) && (pEffective_bitrate)) { - uint total_pixels = 0; + if ((status) && (pEffective_bitrate)) + { + uint total_pixels = 0; - for (uint f = 0; f < m_pParams->m_faces; f++) - for (uint l = 0; l < m_pParams->m_levels; l++) - total_pixels += m_images[f][l].get_total_pixels(); + for (uint f = 0; f < m_pParams->m_faces; f++) + { + for (uint l = 0; l < m_pParams->m_levels; l++) + { + total_pixels += m_images[f][l].get_total_pixels(); + } + } - *pEffective_bitrate = (m_comp_data.size() * 8.0f) / total_pixels; - } + *pEffective_bitrate = (m_comp_data.size() * 8.0f) / total_pixels; + } - return status; -} + return status; + } -void crn_comp::compress_deinit() { -} + void crn_comp::compress_deinit() + { + } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_comp.h b/crnlib/crn_comp.h index 53b67b3..9117ef2 100644 --- a/crnlib/crn_comp.h +++ b/crnlib/crn_comp.h @@ -1,5 +1,25 @@ -// File: crn_comp.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -18,6 +38,7 @@ namespace crnlib class CRN_EXPORT crn_comp : public itexture_comp { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(crn_comp); + public: crn_comp(); virtual ~crn_comp(); @@ -121,8 +142,7 @@ namespace crnlib const crnlib::vector* pColor_endpoint_remap, const crnlib::vector* pColor_selector_remap, const crnlib::vector* pAlpha_endpoint_remap, - const crnlib::vector* pAlpha_selector_remap - ); + const crnlib::vector* pAlpha_selector_remap); bool alias_images(); void clear(); @@ -145,4 +165,4 @@ namespace crnlib bool compress_internal(); }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_console.cpp b/crnlib/crn_console.cpp index 121fa81..ad1219c 100644 --- a/crnlib/crn_console.cpp +++ b/crnlib/crn_console.cpp @@ -1,5 +1,25 @@ -// File: crn_console.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_console.h" #include "crn_data_stream.h" @@ -232,4 +252,4 @@ namespace crnlib va_end(args); } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_console.h b/crnlib/crn_console.h index f9b357c..9af8a3e 100644 --- a/crnlib/crn_console.h +++ b/crnlib/crn_console.h @@ -1,5 +1,25 @@ -// File: crn_console.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -25,13 +45,13 @@ namespace crnlib enum eConsoleMessageType { - cDebugConsoleMessage, // debugging messages - cProgressConsoleMessage, // progress messages - cInfoConsoleMessage, // ordinary messages - cConsoleConsoleMessage, // user console output - cMessageConsoleMessage, // high importance messages - cWarningConsoleMessage, // warnings - cErrorConsoleMessage, // errors + cDebugConsoleMessage, // debugging messages + cProgressConsoleMessage, // progress messages + cInfoConsoleMessage, // ordinary messages + cConsoleConsoleMessage, // user console output + cMessageConsoleMessage, // high importance messages + cWarningConsoleMessage, // warnings + cErrorConsoleMessage, // errors cCMTTotal, }; @@ -44,7 +64,10 @@ namespace crnlib CRN_EXPORT static void init(); CRN_EXPORT static void deinit(); - static bool is_initialized() { return m_pMutex != nullptr; } + static bool is_initialized() + { + return m_pMutex != nullptr; + } CRN_EXPORT static void set_default_category(eConsoleMessageType category); CRN_EXPORT static eConsoleMessageType get_default_category(); @@ -68,28 +91,55 @@ namespace crnlib // FIXME: All console state is currently global! CRN_EXPORT static void disable_prefixes(); CRN_EXPORT static void enable_prefixes(); - static bool get_prefixes() { return m_prefixes; } - static bool get_at_beginning_of_line() { return m_at_beginning_of_line; } + static bool get_prefixes() + { + return m_prefixes; + } + static bool get_at_beginning_of_line() + { + return m_at_beginning_of_line; + } CRN_EXPORT static void disable_crlf(); CRN_EXPORT static void enable_crlf(); - static bool get_crlf() { return m_crlf; } + static bool get_crlf() + { + return m_crlf; + } - static void disable_output() { m_output_disabled = true; } - static void enable_output() { m_output_disabled = false; } - static bool get_output_disabled() { return m_output_disabled; } + static void disable_output() + { + m_output_disabled = true; + } + static void enable_output() + { + m_output_disabled = false; + } + static bool get_output_disabled() + { + return m_output_disabled; + } - static void set_log_stream(data_stream* pStream) { m_pLog_stream = pStream; } - static data_stream* get_log_stream() { return m_pLog_stream; } + static void set_log_stream(data_stream* pStream) + { + m_pLog_stream = pStream; + } + static data_stream* get_log_stream() + { + return m_pLog_stream; + } - static uint get_num_messages(eConsoleMessageType type) { return m_num_messages[type]; } + static uint get_num_messages(eConsoleMessageType type) + { + return m_num_messages[type]; + } private: static eConsoleMessageType m_default_category; struct console_func { - console_func(console_output_func func = nullptr, void* pData = nullptr): + console_func(console_output_func func = nullptr, void* pData = nullptr) : m_func(func), m_pData(pData) { @@ -137,4 +187,4 @@ namespace crnlib return 0; } #endif -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_core.cpp b/crnlib/crn_core.cpp index 17ced81..c1c35fb 100644 --- a/crnlib/crn_core.cpp +++ b/crnlib/crn_core.cpp @@ -1,9 +1,28 @@ -// File: crn_core.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" namespace crnlib { - } // namespace crnlib diff --git a/crnlib/crn_core.h b/crnlib/crn_core.h index a9f4a00..628f4a4 100644 --- a/crnlib/crn_core.h +++ b/crnlib/crn_core.h @@ -1,28 +1,48 @@ -// File: crn_core.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_sysdetection.h" #if defined(CRN_CC_MSVC) -#pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union -#pragma warning(disable : 4127) // conditional expression is constant -#pragma warning(disable : 4793) // function compiled as native -#pragma warning(disable : 4324) // structure was padded due to __declspec(align()) +#pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union +#pragma warning(disable : 4127) // conditional expression is constant +#pragma warning(disable : 4793) // function compiled as native +#pragma warning(disable : 4324) // structure was padded due to __declspec(align()) #endif #if defined(CRN_CC_MSVC) -# define CRN_NEVER_INLINE __declspec(noinline) -# define CRN_FORCE_INLINE __forceinline +#define CRN_NEVER_INLINE __declspec(noinline) +#define CRN_FORCE_INLINE __forceinline #elif defined(CRN_CC_GNU) -# define CRN_NEVER_INLINE __attribute__((noinline)) -# define CRN_FORCE_INLINE inline __attribute__((always_inline)) +#define CRN_NEVER_INLINE __attribute__((noinline)) +#define CRN_FORCE_INLINE inline __attribute__((always_inline)) #else -# define CRN_NEVER_INLINE -# define CRN_FORCE_INLINE inline +#define CRN_NEVER_INLINE +#define CRN_FORCE_INLINE inline #endif - #if defined(WIN32) && !defined(CRNLIB_ANSI_CPLUSPLUS) // MSVC or MinGW, x86 or x64, Win32 API's for threading and Win32 Interlocked API's or GCC built-ins for atomic ops. #ifdef NDEBUG diff --git a/crnlib/crn_darwin_pthreads.cpp b/crnlib/crn_darwin_pthreads.cpp index 6d46493..bc957de 100644 --- a/crnlib/crn_darwin_pthreads.cpp +++ b/crnlib/crn_darwin_pthreads.cpp @@ -1,3 +1,26 @@ +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include diff --git a/crnlib/crn_darwin_pthreads.h b/crnlib/crn_darwin_pthreads.h index b3f3e54..109bb65 100644 --- a/crnlib/crn_darwin_pthreads.h +++ b/crnlib/crn_darwin_pthreads.h @@ -1,3 +1,26 @@ +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_core.h" diff --git a/crnlib/crn_data_stream.cpp b/crnlib/crn_data_stream.cpp index 518fa51..735aa62 100644 --- a/crnlib/crn_data_stream.cpp +++ b/crnlib/crn_data_stream.cpp @@ -1,12 +1,32 @@ -// File: crn_data_stream.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "crn_data_stream.h" namespace crnlib { - data_stream::data_stream(): + data_stream::data_stream() : m_attribs(0), m_opened(false), m_error(false), @@ -14,7 +34,7 @@ namespace crnlib { } - data_stream::data_stream(const char* pName, uint attribs): + data_stream::data_stream(const char* pName, uint attribs) : m_name(pName), m_attribs(static_cast(attribs)), m_opened(false), @@ -142,4 +162,4 @@ namespace crnlib return true; } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_data_stream.h b/crnlib/crn_data_stream.h index cc207bd..3924405 100644 --- a/crnlib/crn_data_stream.h +++ b/crnlib/crn_data_stream.h @@ -1,5 +1,26 @@ -// File: crn_data_stream.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_export.h" @@ -152,4 +173,4 @@ namespace crnlib } }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_data_stream_serializer.h b/crnlib/crn_data_stream_serializer.h index 6a2c7fb..1f56319 100644 --- a/crnlib/crn_data_stream_serializer.h +++ b/crnlib/crn_data_stream_serializer.h @@ -1,5 +1,25 @@ -// File: data_stream_serializer.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -12,7 +32,7 @@ namespace crnlib class CRN_EXPORT data_stream_serializer { public: - data_stream_serializer(): + data_stream_serializer() : m_pStream(nullptr), m_little_endian(true) { @@ -22,12 +42,12 @@ namespace crnlib m_little_endian(true) { } - data_stream_serializer(data_stream& stream): + data_stream_serializer(data_stream& stream) : m_pStream(&stream), m_little_endian(true) { } - data_stream_serializer(const data_stream_serializer& other): + data_stream_serializer(const data_stream_serializer& other) : m_pStream(other.m_pStream), m_little_endian(other.m_little_endian) { @@ -56,7 +76,7 @@ namespace crnlib bool get_error() { - return m_pStream ? m_pStream->get_error() : false; + return m_pStream != nullptr && m_pStream->get_error(); } bool get_little_endian() const @@ -125,7 +145,7 @@ namespace crnlib return m_pStream->skip(len) == len; } - template + template bool write_object(const T& obj) { if (m_little_endian == c_crnlib_little_endian_platform) @@ -143,7 +163,7 @@ namespace crnlib } } - template + template bool read_object(T& obj) { if (m_little_endian == c_crnlib_little_endian_platform) @@ -166,13 +186,13 @@ namespace crnlib } } - template + template bool write_value(T value) { return write_object(value); } - template + template T read_value(const T& on_error_value = T()) { T result; @@ -183,14 +203,14 @@ namespace crnlib return result; } - template + template bool write_enum(T e) { int val = static_cast(e); return write_object(val); } - template + template T read_enum() { return static_cast(read_value()); @@ -213,8 +233,7 @@ namespace crnlib } val >>= 7; - } - while (val); + } while (val); return true; } @@ -301,7 +320,8 @@ namespace crnlib return false; } - if (len) { + if (len) + { if (!read_chars(str.get_ptr_raw(), len)) { return false; @@ -317,7 +337,7 @@ namespace crnlib return true; } - template + template bool write_vector(const T& vec) { if (!write_uint_vlc(vec.size())) @@ -337,7 +357,7 @@ namespace crnlib return true; }; - template + template bool read_vector(T& vec, uint num_expected = UINT_MAX) { uint size; @@ -571,14 +591,14 @@ namespace crnlib return serializer; } - template + template inline data_stream_serializer& operator<<(data_stream_serializer& serializer, const crnlib::vector& vec) { serializer.write_vector(vec); return serializer; } - template + template inline data_stream_serializer& operator<<(data_stream_serializer& serializer, const T* p) { serializer.write_object(*p); @@ -658,18 +678,18 @@ namespace crnlib return serializer; } - template + template inline data_stream_serializer& operator>>(data_stream_serializer& serializer, crnlib::vector& vec) { serializer.read_vector(vec); return serializer; } - template + template inline data_stream_serializer& operator>>(data_stream_serializer& serializer, T* p) { serializer.read_object(*p); return serializer; } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_dds_comp.cpp b/crnlib/crn_dds_comp.cpp index 430fd89..5abb726 100644 --- a/crnlib/crn_dds_comp.cpp +++ b/crnlib/crn_dds_comp.cpp @@ -1,5 +1,25 @@ -// File: crn_dds_comp.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "crn_dds_comp.h" @@ -8,7 +28,7 @@ namespace crnlib { - dds_comp::dds_comp(): + dds_comp::dds_comp() : m_pParams(nullptr), m_pixel_fmt(PIXEL_FMT_INVALID), m_pQDXT_state(nullptr) @@ -239,7 +259,8 @@ namespace crnlib return true; } - bool dds_comp::compress_pass(const crn_comp_params& params, float* pEffective_bitrate) { + bool dds_comp::compress_pass(const crn_comp_params& params, float* pEffective_bitrate) + { if (pEffective_bitrate) { *pEffective_bitrate = 0.0f; @@ -289,4 +310,4 @@ namespace crnlib { clear(); } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_dds_comp.h b/crnlib/crn_dds_comp.h index 150d3fb..c82f1dd 100644 --- a/crnlib/crn_dds_comp.h +++ b/crnlib/crn_dds_comp.h @@ -1,5 +1,25 @@ -// File: crn_comp.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -13,6 +33,7 @@ namespace crnlib class CRN_EXPORT dds_comp : public itexture_comp { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(dds_comp); + public: dds_comp(); virtual ~dds_comp(); @@ -56,4 +77,4 @@ namespace crnlib bool convert_to_dxt(const crn_comp_params& params); }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_decomp.cpp b/crnlib/crn_decomp.cpp index 1d9faf7..bac604c 100644 --- a/crnlib/crn_decomp.cpp +++ b/crnlib/crn_decomp.cpp @@ -1,5 +1,26 @@ -// File: crn_decomp.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" // Include the single-file header library with no defines, which brings in the full CRN decompressor. diff --git a/crnlib/crn_dxt.cpp b/crnlib/crn_dxt.cpp index 33b4c36..88f7721 100644 --- a/crnlib/crn_dxt.cpp +++ b/crnlib/crn_dxt.cpp @@ -1,5 +1,25 @@ -// File: crn_dxt.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "crn_dxt.h" @@ -104,7 +124,8 @@ namespace crnlib bool get_dxt_format_has_alpha(dxt_format fmt) { - switch (fmt) { + switch (fmt) + { case cDXT1A: case cDXT3: case cDXT5: @@ -184,7 +205,7 @@ namespace crnlib int gdiff = pDst[1].g - pDst[0].g; - if (color4) //(packed_col0 > packed_col1) + if (color4) //(packed_col0 > packed_col1) { pDst[2].r = static_cast(((2 * col0.r + col1.r) * 22) / 8); pDst[2].g = static_cast((256 * pDst[0].g + gdiff / 4 + 128 + gdiff * 80) / 256); @@ -279,7 +300,8 @@ namespace crnlib } } - uint dxt1_block::get_block_colors_round(color_quad_u8* pDst, uint16 color0, uint16 color1) { + uint dxt1_block::get_block_colors_round(color_quad_u8* pDst, uint16 color0, uint16 color1) + { if (color0 > color1) { return get_block_colors4_round(pDst, color0, color1); @@ -431,4 +453,4 @@ namespace crnlib return get_block_values6(pDst, l, h); } } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_dxt.h b/crnlib/crn_dxt.h index bde8a2b..65935dd 100644 --- a/crnlib/crn_dxt.h +++ b/crnlib/crn_dxt.h @@ -1,6 +1,28 @@ -// File: crn_dxt.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once + #include "crnlib.h" #include "crn_color.h" #include "crn_vec.h" @@ -43,8 +65,8 @@ namespace crnlib cDXT5, cDXT5A, - cDXN_XY, // inverted relative to standard ATI2, 360's DXN - cDXN_YX, // standard ATI2, + cDXN_XY, // inverted relative to standard ATI2, 360's DXN + cDXN_YX, // standard ATI2, cETC1, cETC2, @@ -89,7 +111,10 @@ namespace crnlib uint8 m_low_color[2]; uint8 m_high_color[2]; - enum { cNumSelectorBytes = 4 }; + enum + { + cNumSelectorBytes = 4 + }; uint8 m_selectors[cNumSelectorBytes]; inline void clear() @@ -128,7 +153,8 @@ namespace crnlib { return get_low_color() <= get_high_color(); } - inline bool is_non_alpha_block() const { + inline bool is_non_alpha_block() const + { return !is_alpha_block(); } @@ -199,7 +225,10 @@ namespace crnlib struct dxt3_block { - enum { cNumAlphaBytes = 8 }; + enum + { + cNumAlphaBytes = 8 + }; uint8 m_alpha[cNumAlphaBytes]; void set_alpha(uint x, uint y, uint value, bool scaled); @@ -238,7 +267,10 @@ namespace crnlib { uint8 m_endpoints[2]; - enum { cNumSelectorBytes = 6 }; + enum + { + cNumSelectorBytes = 6 + }; uint8 m_selectors[cNumSelectorBytes]; inline void clear() @@ -295,7 +327,9 @@ namespace crnlib uint v = m_selectors[byte_index]; if (byte_index < (cNumSelectorBytes - 1)) + { v |= (m_selectors[byte_index + 1] << 8); + } return (v >> bit_ofs) & 7; } @@ -312,7 +346,9 @@ namespace crnlib uint v = m_selectors[byte_index]; if (byte_index < (cNumSelectorBytes - 1)) + { v |= (m_selectors[byte_index + 1] << 8); + } v &= (~(7 << bit_ofs)); v |= (val << bit_ofs); @@ -350,7 +386,10 @@ namespace crnlib } } - enum { cMaxSelectorValues = 8 }; + enum + { + cMaxSelectorValues = 8 + }; // Results written to alpha channel. static uint get_block_values6(color_quad_u8* pDst, uint l, uint h); @@ -370,7 +409,7 @@ namespace crnlib struct dxt_pixel_block { - color_quad_u8 m_pixels[cDXTBlockSize][cDXTBlockSize]; // [y][x] + color_quad_u8 m_pixels[cDXTBlockSize][cDXTBlockSize]; // [y][x] inline void clear() { @@ -380,4 +419,4 @@ namespace crnlib CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_pixel_block); -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_dxt1.cpp b/crnlib/crn_dxt1.cpp index af331ae..80edb61 100644 --- a/crnlib/crn_dxt1.cpp +++ b/crnlib/crn_dxt1.cpp @@ -1,6 +1,26 @@ -// File: crn_dxt1.cpp -// See Copyright Notice and license at the end of inc/crnlib.h -// +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + // Notes: // This class is not optimized for performance on small blocks, unlike typical DXT1 compressors. It's optimized for scalability and quality: // - Very high quality in terms of avg. RMSE or Luma RMSE. Goal is to always match or beat every other known offline DXTc compressor: ATI_Compress, squish, NVidia texture tools, nvdxt.exe, etc. @@ -8,6 +28,7 @@ // - Any quality optimization which results in even a tiny improvement is worth it -- as long as it's either a constant or linear slowdown. // Tiny quality improvements can be extremely valuable in large clusters. // - Quality should scale well vs. CPU time cost, i.e. the more time you spend the higher the quality. + #include "crn_core.h" #include "crn_dxt1.h" #include "crn_ryg_dxt.hpp" @@ -15,1805 +36,2212 @@ #include "crn_intersect.h" #include "crn_vec_interval.h" -namespace crnlib { -//----------------------------------------------------------------------------------------------------------------------------------------- - -static const int16 g_fast_probe_table[] = {0, 1, 2, 3}; -static const uint cFastProbeTableSize = sizeof(g_fast_probe_table) / sizeof(g_fast_probe_table[0]); - -static const int16 g_normal_probe_table[] = {0, 1, 3, 5, 7}; -static const uint cNormalProbeTableSize = sizeof(g_normal_probe_table) / sizeof(g_normal_probe_table[0]); - -static const int16 g_better_probe_table[] = {0, 1, 2, 3, 5, 9, 15, 19, 27, 43}; -static const uint cBetterProbeTableSize = sizeof(g_better_probe_table) / sizeof(g_better_probe_table[0]); - -static const int16 g_uber_probe_table[] = {0, 1, 2, 3, 5, 7, 9, 10, 13, 15, 19, 27, 43, 59, 91}; -static const uint cUberProbeTableSize = sizeof(g_uber_probe_table) / sizeof(g_uber_probe_table[0]); - -struct unique_color_projection { - unique_color color; - int64 projection; -}; -static struct { - bool operator()(unique_color_projection a, unique_color_projection b) const { return a.projection < b.projection; } -} g_unique_color_projection_sort; - -//----------------------------------------------------------------------------------------------------------------------------------------- - -dxt1_endpoint_optimizer::dxt1_endpoint_optimizer() - : m_pParams(nullptr), - m_pResults(nullptr), - m_perceptual(false), - m_num_prev_results(0) { - m_low_coords.reserve(512); - m_high_coords.reserve(512); - - m_unique_colors.reserve(512); - m_temp_unique_colors.reserve(512); - m_unique_packed_colors.reserve(512); - - m_norm_unique_colors.reserve(512); - m_norm_unique_colors_weighted.reserve(512); - - m_lo_cells.reserve(128); - m_hi_cells.reserve(128); - m_num_prev_results = 0; -} - -// All selectors are equal. Try compressing as if it was solid, using the block's average color, using ryg's optimal single color compression tables. -bool dxt1_endpoint_optimizer::try_average_block_as_solid() { - uint64 tot_r = 0; - uint64 tot_g = 0; - uint64 tot_b = 0; - - uint total_weight = 0; - for (uint i = 0; i < m_unique_colors.size(); i++) { - uint weight = m_unique_colors[i].m_weight; - total_weight += weight; - - tot_r += m_unique_colors[i].m_color.r * static_cast(weight); - tot_g += m_unique_colors[i].m_color.g * static_cast(weight); - tot_b += m_unique_colors[i].m_color.b * static_cast(weight); - } - - const uint half_total_weight = total_weight >> 1; - uint ave_r = static_cast((tot_r + half_total_weight) / total_weight); - uint ave_g = static_cast((tot_g + half_total_weight) / total_weight); - uint ave_b = static_cast((tot_b + half_total_weight) / total_weight); - - uint low_color = (ryg_dxt::OMatch5[ave_r][0] << 11) | (ryg_dxt::OMatch6[ave_g][0] << 5) | ryg_dxt::OMatch5[ave_b][0]; - uint high_color = (ryg_dxt::OMatch5[ave_r][1] << 11) | (ryg_dxt::OMatch6[ave_g][1] << 5) | ryg_dxt::OMatch5[ave_b][1]; - bool improved = evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); - - if ((m_pParams->m_use_alpha_blocks) && (m_best_solution.m_error)) { - low_color = (ryg_dxt::OMatch5_3[ave_r][0] << 11) | (ryg_dxt::OMatch6_3[ave_g][0] << 5) | ryg_dxt::OMatch5_3[ave_b][0]; - high_color = (ryg_dxt::OMatch5_3[ave_r][1] << 11) | (ryg_dxt::OMatch6_3[ave_g][1] << 5) | ryg_dxt::OMatch5_3[ave_b][1]; - improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); - } - - if (m_pParams->m_quality == cCRNDXTQualityUber) { - // Try compressing as all-solid using the other (non-average) colors in the block in uber. - for (uint i = 0; i < m_unique_colors.size(); i++) { - uint r = m_unique_colors[i].m_color[0]; - uint g = m_unique_colors[i].m_color[1]; - uint b = m_unique_colors[i].m_color[2]; - if ((r == ave_r) && (g == ave_g) && (b == ave_b)) - continue; - - uint low_color = (ryg_dxt::OMatch5[r][0] << 11) | (ryg_dxt::OMatch6[g][0] << 5) | ryg_dxt::OMatch5[b][0]; - uint high_color = (ryg_dxt::OMatch5[r][1] << 11) | (ryg_dxt::OMatch6[g][1] << 5) | ryg_dxt::OMatch5[b][1]; - improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); - - if ((m_pParams->m_use_alpha_blocks) && (m_best_solution.m_error)) { - low_color = (ryg_dxt::OMatch5_3[r][0] << 11) | (ryg_dxt::OMatch6_3[g][0] << 5) | ryg_dxt::OMatch5_3[b][0]; - high_color = (ryg_dxt::OMatch5_3[r][1] << 11) | (ryg_dxt::OMatch6_3[g][1] << 5) | ryg_dxt::OMatch5_3[b][1]; - improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); - } - } - } - - return improved; -} - -void dxt1_endpoint_optimizer::compute_vectors(const vec3F& perceptual_weights) { - m_norm_unique_colors.resize(0); - m_norm_unique_colors_weighted.resize(0); - - m_mean_norm_color.clear(); - m_mean_norm_color_weighted.clear(); - - for (uint i = 0; i < m_unique_colors.size(); i++) { - const color_quad_u8& color = m_unique_colors[i].m_color; - const uint weight = m_unique_colors[i].m_weight; - - vec3F norm_color(color.r * 1.0f / 255.0f, color.g * 1.0f / 255.0f, color.b * 1.0f / 255.0f); - vec3F norm_color_weighted(vec3F::mul_components(perceptual_weights, norm_color)); - - m_norm_unique_colors.push_back(norm_color); - m_norm_unique_colors_weighted.push_back(norm_color_weighted); - - m_mean_norm_color += norm_color * (float)weight; - m_mean_norm_color_weighted += norm_color_weighted * (float)weight; - } - - if (m_total_unique_color_weight) { - m_mean_norm_color *= (1.0f / m_total_unique_color_weight); - m_mean_norm_color_weighted *= (1.0f / m_total_unique_color_weight); - } - - for (uint i = 0; i < m_unique_colors.size(); i++) { - m_norm_unique_colors[i] -= m_mean_norm_color; - m_norm_unique_colors_weighted[i] -= m_mean_norm_color_weighted; - } -} - -// Compute PCA (principle axis, i.e. direction of largest variance) of input vectors. -void dxt1_endpoint_optimizer::compute_pca(vec3F& axis, const vec3F_array& norm_colors, const vec3F& def) { - double cov[6] = {0, 0, 0, 0, 0, 0}; - for (uint i = 0; i < norm_colors.size(); i++) { - const vec3F& v = norm_colors[i]; - float r = v[0]; - float g = v[1]; - float b = v[2]; - if (m_unique_colors[i].m_weight > 1) { - const double weight = m_unique_colors[i].m_weight; - cov[0] += r * r * weight; - cov[1] += r * g * weight; - cov[2] += r * b * weight; - cov[3] += g * g * weight; - cov[4] += g * b * weight; - cov[5] += b * b * weight; - } else { - cov[0] += r * r; - cov[1] += r * g; - cov[2] += r * b; - cov[3] += g * g; - cov[4] += g * b; - cov[5] += b * b; - } - } - double vfr = .9f; - double vfg = 1.0f; - double vfb = .7f; - for (uint iter = 0; iter < 8; iter++) { - double r = vfr * cov[0] + vfg * cov[1] + vfb * cov[2]; - double g = vfr * cov[1] + vfg * cov[3] + vfb * cov[4]; - double b = vfr * cov[2] + vfg * cov[4] + vfb * cov[5]; - double m = math::maximum(fabs(r), fabs(g), fabs(b)); - if (m > 1e-10) { - m = 1.0f / m; - r *= m; - g *= m; - b *= m; - } - double delta = math::square(vfr - r) + math::square(vfg - g) + math::square(vfb - b); - vfr = r; - vfg = g; - vfb = b; - if ((iter > 2) && (delta < 1e-8)) - break; - } - double len = vfr * vfr + vfg * vfg + vfb * vfb; - if (len < 1e-10) { - axis = def; - } else { - len = 1.0f / sqrt(len); - axis.set(static_cast(vfr * len), static_cast(vfg * len), static_cast(vfb * len)); - } -} +namespace crnlib +{ + //----------------------------------------------------------------------------------------------------------------------------------------- -static const uint8 g_invTableNull[4] = {0, 1, 2, 3}; -static const uint8 g_invTableAlpha[4] = {1, 0, 2, 3}; -static const uint8 g_invTableColor[4] = {1, 0, 3, 2}; + static const int16 g_fast_probe_table[] = { 0, 1, 2, 3 }; + static const uint cFastProbeTableSize = sizeof(g_fast_probe_table) / sizeof(g_fast_probe_table[0]); -// Computes a valid (encodable) DXT1 solution (low/high colors, swizzled selectors) from input. -void dxt1_endpoint_optimizer::return_solution() { - compute_selectors(); - bool invert_selectors; - - if (m_best_solution.m_alpha_block) - invert_selectors = (m_best_solution.m_coords.m_low_color > m_best_solution.m_coords.m_high_color); - else { - CRNLIB_ASSERT(m_best_solution.m_coords.m_low_color != m_best_solution.m_coords.m_high_color); - - invert_selectors = (m_best_solution.m_coords.m_low_color < m_best_solution.m_coords.m_high_color); - } - - m_pResults->m_alternate_rounding = m_best_solution.m_alternate_rounding; - m_pResults->m_enforce_selector = m_best_solution.m_enforce_selector; - m_pResults->m_enforced_selector = m_best_solution.m_enforced_selector; - m_pResults->m_reordered = invert_selectors; - if (invert_selectors) { - m_pResults->m_low_color = m_best_solution.m_coords.m_high_color; - m_pResults->m_high_color = m_best_solution.m_coords.m_low_color; - } else { - m_pResults->m_low_color = m_best_solution.m_coords.m_low_color; - m_pResults->m_high_color = m_best_solution.m_coords.m_high_color; - } - - const uint8* pInvert_table = g_invTableNull; - if (invert_selectors) - pInvert_table = m_best_solution.m_alpha_block ? g_invTableAlpha : g_invTableColor; - - const uint alpha_thresh = m_pParams->m_pixels_have_alpha ? (m_pParams->m_dxt1a_alpha_threshold << 24U) : 0; - - const uint32* pSrc_pixels = reinterpret_cast(m_pParams->m_pPixels); - uint8* pDst_selectors = m_pResults->m_pSelectors; - - if ((m_unique_colors.size() == 1) && (!m_pParams->m_pixels_have_alpha)) { - uint32 c = utils::read_le32(pSrc_pixels); - - CRNLIB_ASSERT(c >= alpha_thresh); - - c |= 0xFF000000U; - - unique_color_hash_map::const_iterator it(m_unique_color_hash_map.find(c)); - CRNLIB_ASSERT(it != m_unique_color_hash_map.end()); - - uint unique_color_index = it->second; - - uint selector = pInvert_table[m_best_solution.m_selectors[unique_color_index]]; - - memset(pDst_selectors, selector, m_pParams->m_num_pixels); - } else { - uint8* pDst_selectors_end = pDst_selectors + m_pParams->m_num_pixels; - - uint8 prev_selector = 0; - uint32 prev_color = 0; - - do { - uint32 c = utils::read_le32(pSrc_pixels); - pSrc_pixels++; - - uint8 selector = 3; - - if (c >= alpha_thresh) { - c |= 0xFF000000U; - - if (c == prev_color) - selector = prev_selector; - else { - unique_color_hash_map::const_iterator it(m_unique_color_hash_map.find(c)); - - CRNLIB_ASSERT(it != m_unique_color_hash_map.end()); - - uint unique_color_index = it->second; - - selector = pInvert_table[m_best_solution.m_selectors[unique_color_index]]; - - prev_color = c; - prev_selector = selector; - } - } - - *pDst_selectors++ = selector; - - } while (pDst_selectors != pDst_selectors_end); - } - - m_pResults->m_alpha_block = m_best_solution.m_alpha_block; - m_pResults->m_error = m_best_solution.m_error; -} + static const int16 g_normal_probe_table[] = { 0, 1, 3, 5, 7 }; + static const uint cNormalProbeTableSize = sizeof(g_normal_probe_table) / sizeof(g_normal_probe_table[0]); -// Per-component 1D endpoint optimization. + static const int16 g_better_probe_table[] = { 0, 1, 2, 3, 5, 9, 15, 19, 27, 43 }; + static const uint cBetterProbeTableSize = sizeof(g_better_probe_table) / sizeof(g_better_probe_table[0]); -void dxt1_endpoint_optimizer::compute_endpoint_component_errors(uint comp_index, uint64 (&error)[4][256], uint64 (&best_remaining_error)[4]) { - uint64 W[4] = {}, WP2[4] = {}, WPP[4] = {}; - for (uint i = 0; i < m_unique_colors.size(); i++) { - uint p = m_unique_colors[i].m_color[comp_index]; - uint w = m_unique_colors[i].m_weight; - uint8 s = m_best_solution.m_selectors[i]; - W[s] += (int64)w; - WP2[s] += (int64)w * p * 2; - WPP[s] += (int64)w * p * p; - } - const uint comp_limit = comp_index == 1 ? 64 : 32; - for (uint8 s = 0; s < 2; s++) { - uint64 best_error = error[s][0] = WPP[s]; - for (uint8 c = 1; c < comp_limit; c++) { - uint8 p = comp_index == 1 ? c << 2 | c >> 4 : c << 3 | c >> 2; - error[s][c] = W[s] * p * p - WP2[s] * p + WPP[s]; - if (error[s][c] < best_error) - best_error = error[s][c]; - } - best_remaining_error[s] = best_error; - } - for (uint8 s = 2; s < 4; s++) { - uint64 best_error = error[s][0] = WPP[s], d = W[s] - WP2[s], dd = W[s] << 1, e = WPP[s] + d; - for (uint p = 1; p < 256; p++, d += dd, e += d) { - error[s][p] = e; - if (e < best_error) - best_error = e; - } - best_remaining_error[s] = best_error; - } - for (uint8 s = 3; s; s--) - best_remaining_error[s - 1] += best_remaining_error[s]; -} - -void dxt1_endpoint_optimizer::optimize_endpoint_comps() { - compute_selectors(); - if (m_best_solution.m_alpha_block || !m_best_solution.m_error) - return; - color_quad_u8 source_low(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, true)); - color_quad_u8 source_high(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, true)); - uint64 error[4][256], best_remaining_error[4]; - for (uint comp_index = 0; comp_index < 3; comp_index++) { - uint8 p0 = source_low[comp_index]; - uint8 p1 = source_high[comp_index]; - color_quad_u8 low(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false)); - color_quad_u8 high(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false)); - compute_endpoint_component_errors(comp_index, error, best_remaining_error); - uint64 best_error = error[0][low[comp_index]] + error[1][high[comp_index]] + error[2][(p0 * 2 + p1) / 3] + error[3][(p0 + p1 * 2) / 3]; - if (best_remaining_error[0] >= best_error) - continue; - const uint comp_limit = comp_index == 1 ? 64 : 32; - for (uint8 c0 = 0; c0 < comp_limit; c0++) { - uint64 e0 = error[0][c0]; - if (e0 + best_remaining_error[1] >= best_error) - continue; - low[comp_index] = c0; - uint16 packed_low = dxt1_block::pack_color(low, false); - p0 = comp_index == 1 ? c0 << 2 | c0 >> 4 : c0 << 3 | c0 >> 2; - for (uint8 c1 = 0; c1 < comp_limit; c1++) { - uint64 e = e0 + error[1][c1]; - if (e + best_remaining_error[2] >= best_error) - continue; - p1 = comp_index == 1 ? c1 << 2 | c1 >> 4 : c1 << 3 | c1 >> 2; - e += error[2][(p0 * 2 + p1) / 3]; - if (e + best_remaining_error[3] >= best_error) - continue; - e += error[3][(p0 + p1 * 2) / 3]; - if (e >= best_error) - continue; - high[comp_index] = c1; - if (!evaluate_solution(dxt1_solution_coordinates(packed_low, dxt1_block::pack_color(high, false)))) - continue; - if (!m_best_solution.m_error) - return; - compute_selectors(); - compute_endpoint_component_errors(comp_index, error, best_remaining_error); - best_error = error[0][c0] + error[1][c1] + error[2][(p0 * 2 + p1) / 3] + error[3][(p0 + p1 * 2) / 3]; - e0 = error[0][c0]; - if (e0 + best_remaining_error[1] >= best_error) - break; - } - } - } -} - -// Voxel adjacency delta coordinations. -static const struct adjacent_coords { - int8 x, y, z; -} g_adjacency[26] = { - {-1, -1, -1}, - {0, -1, -1}, - {1, -1, -1}, - {-1, 0, -1}, - {0, 0, -1}, - {1, 0, -1}, - {-1, 1, -1}, - {0, 1, -1}, - - {1, 1, -1}, - {-1, -1, 0}, - {0, -1, 0}, - {1, -1, 0}, - {-1, 0, 0}, - {1, 0, 0}, - {-1, 1, 0}, - {0, 1, 0}, - - {1, 1, 0}, - {-1, -1, 1}, - {0, -1, 1}, - {1, -1, 1}, - {-1, 0, 1}, - {0, 0, 1}, - {1, 0, 1}, - {-1, 1, 1}, - - {0, 1, 1}, - {1, 1, 1}}; - -// Attempt to refine current solution's endpoints given the current selectors using least squares. -bool dxt1_endpoint_optimizer::refine_solution(int refinement_level) { - compute_selectors(); - - static const int w1Tab[4] = {3, 0, 2, 1}; - - static const int prods_0[4] = {0x00, 0x00, 0x02, 0x02}; - static const int prods_1[4] = {0x00, 0x09, 0x01, 0x04}; - static const int prods_2[4] = {0x09, 0x00, 0x04, 0x01}; - - double akku_0 = 0; - double akku_1 = 0; - double akku_2 = 0; - double At1_r, At1_g, At1_b; - double At2_r, At2_g, At2_b; - - At1_r = At1_g = At1_b = 0; - At2_r = At2_g = At2_b = 0; - for (uint i = 0; i < m_unique_colors.size(); i++) { - const color_quad_u8& c = m_unique_colors[i].m_color; - const double weight = m_unique_colors[i].m_weight; - - double r = c.r * weight; - double g = c.g * weight; - double b = c.b * weight; - int step = m_best_solution.m_selectors[i] ^ 1; - - int w1 = w1Tab[step]; - - akku_0 += prods_0[step] * weight; - akku_1 += prods_1[step] * weight; - akku_2 += prods_2[step] * weight; - At1_r += w1 * r; - At1_g += w1 * g; - At1_b += w1 * b; - At2_r += r; - At2_g += g; - At2_b += b; - } - - At2_r = 3 * At2_r - At1_r; - At2_g = 3 * At2_g - At1_g; - At2_b = 3 * At2_b - At1_b; - - double xx = akku_2; - double yy = akku_1; - double xy = akku_0; - - double t = xx * yy - xy * xy; - if (!yy || !xx || (fabs(t) < .0000125f)) - return false; - - double frb = (3.0f * 31.0f / 255.0f) / t; - double fg = frb * (63.0f / 31.0f); - - bool improved = false; - - if (refinement_level == 0) { - uint max16; - max16 = math::clamp(static_cast((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31) << 11; - max16 |= math::clamp(static_cast((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63) << 5; - max16 |= math::clamp(static_cast((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31) << 0; - - uint min16; - min16 = math::clamp(static_cast((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31) << 11; - min16 |= math::clamp(static_cast((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63) << 5; - min16 |= math::clamp(static_cast((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31) << 0; - - dxt1_solution_coordinates nc((uint16)min16, (uint16)max16); - nc.canonicalize(); - improved |= evaluate_solution(nc); - } else if (refinement_level == 1) { - // Try exploring the local lattice neighbors of the least squares optimized result. - color_quad_u8 e[2]; - - e[0].clear(); - e[0][0] = (uint8)math::clamp(static_cast((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31); - e[0][1] = (uint8)math::clamp(static_cast((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63); - e[0][2] = (uint8)math::clamp(static_cast((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31); - - e[1].clear(); - e[1][0] = (uint8)math::clamp(static_cast((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31); - e[1][1] = (uint8)math::clamp(static_cast((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63); - e[1][2] = (uint8)math::clamp(static_cast((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31); - - for (uint i = 0; i < 2; i++) { - for (int rr = -1; rr <= 1; rr++) { - for (int gr = -1; gr <= 1; gr++) { - for (int br = -1; br <= 1; br++) { - dxt1_solution_coordinates nc; - - color_quad_u8 c[2]; - c[0] = e[0]; - c[1] = e[1]; - - c[i][0] = (uint8)math::clamp(c[i][0] + rr, 0, 31); - c[i][1] = (uint8)math::clamp(c[i][1] + gr, 0, 63); - c[i][2] = (uint8)math::clamp(c[i][2] + br, 0, 31); - - nc.m_low_color = dxt1_block::pack_color(c[0], false); - nc.m_high_color = dxt1_block::pack_color(c[1], false); + static const int16 g_uber_probe_table[] = { 0, 1, 2, 3, 5, 7, 9, 10, 13, 15, 19, 27, 43, 59, 91 }; + static const uint cUberProbeTableSize = sizeof(g_uber_probe_table) / sizeof(g_uber_probe_table[0]); - nc.canonicalize(); - improved |= evaluate_solution(nc); - } + struct unique_color_projection + { + unique_color color; + int64 projection; + }; + + static struct + { + bool operator()(unique_color_projection a, unique_color_projection b) const + { + return a.projection < b.projection; } - } - } - } else { - // Try even harder to explore the local lattice neighbors of the least squares optimized result. - color_quad_u8 e[2]; - e[0].clear(); - e[0][0] = (uint8)math::clamp(static_cast((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31); - e[0][1] = (uint8)math::clamp(static_cast((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63); - e[0][2] = (uint8)math::clamp(static_cast((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31); - - e[1].clear(); - e[1][0] = (uint8)math::clamp(static_cast((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31); - e[1][1] = (uint8)math::clamp(static_cast((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63); - e[1][2] = (uint8)math::clamp(static_cast((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31); - - for (int orr = -1; orr <= 1; orr++) { - for (int ogr = -1; ogr <= 1; ogr++) { - for (int obr = -1; obr <= 1; obr++) { - dxt1_solution_coordinates nc; - - color_quad_u8 c[2]; - c[0] = e[0]; - c[1] = e[1]; - - c[0][0] = (uint8)math::clamp(c[0][0] + orr, 0, 31); - c[0][1] = (uint8)math::clamp(c[0][1] + ogr, 0, 63); - c[0][2] = (uint8)math::clamp(c[0][2] + obr, 0, 31); - - for (int rr = -1; rr <= 1; rr++) { - for (int gr = -1; gr <= 1; gr++) { - for (int br = -1; br <= 1; br++) { - c[1][0] = (uint8)math::clamp(c[1][0] + rr, 0, 31); - c[1][1] = (uint8)math::clamp(c[1][1] + gr, 0, 63); - c[1][2] = (uint8)math::clamp(c[1][2] + br, 0, 31); - - nc.m_low_color = dxt1_block::pack_color(c[0], false); - nc.m_high_color = dxt1_block::pack_color(c[1], false); - nc.canonicalize(); - - improved |= evaluate_solution(nc); - } - } - } - } - } - } - } - - return improved; -} - -//----------------------------------------------------------------------------------------------------------------------------------------- - -// Primary endpoint optimization entrypoint. -void dxt1_endpoint_optimizer::optimize_endpoints(vec3F& low_color, vec3F& high_color) { - vec3F orig_low_color(low_color); - vec3F orig_high_color(high_color); - - m_trial_solution.clear(); - - uint num_passes; - const int16* pProbe_table = g_uber_probe_table; - uint probe_range; - float dist_per_trial = .015625f; - - // How many probes, and the distance between each probe depends on the quality level. - switch (m_pParams->m_quality) { - case cCRNDXTQualitySuperFast: - pProbe_table = g_fast_probe_table; - probe_range = cFastProbeTableSize; - dist_per_trial = .027063293f; - num_passes = 1; - break; - case cCRNDXTQualityFast: - pProbe_table = g_fast_probe_table; - probe_range = cFastProbeTableSize; - dist_per_trial = .027063293f; - num_passes = 2; - break; - case cCRNDXTQualityNormal: - pProbe_table = g_normal_probe_table; - probe_range = cNormalProbeTableSize; - dist_per_trial = .027063293f; - num_passes = 2; - break; - case cCRNDXTQualityBetter: - pProbe_table = g_better_probe_table; - probe_range = cBetterProbeTableSize; - num_passes = 2; - break; - default: - pProbe_table = g_uber_probe_table; - probe_range = cUberProbeTableSize; - num_passes = 4; - break; - } - - if (m_pParams->m_endpoint_caching) { - // Try the previous X winning endpoints. This may not give us optimal results, but it may increase the probability of early outs while evaluating potential solutions. - const uint num_prev_results = math::minimum(cMaxPrevResults, m_num_prev_results); - for (uint i = 0; i < num_prev_results; i++) - evaluate_solution(m_prev_results[i]); - - if (!m_best_solution.m_error) { - // Got lucky - one of the previous endpoints is optimal. - return_solution(); - return; - } - } - - if (m_pParams->m_quality >= cCRNDXTQualityBetter) { - //evaluate_solution(dxt1_solution_coordinates(low_color, high_color), true, &m_best_solution); - //refine_solution(); - - try_median4(orig_low_color, orig_high_color); - } - - uint probe_low[cUberProbeTableSize * 2 + 1]; - uint probe_high[cUberProbeTableSize * 2 + 1]; - - vec3F scaled_principle_axis[2]; - - scaled_principle_axis[1] = m_principle_axis * dist_per_trial; - scaled_principle_axis[1][0] *= 31.0f; - scaled_principle_axis[1][1] *= 63.0f; - scaled_principle_axis[1][2] *= 31.0f; - - scaled_principle_axis[0] = -scaled_principle_axis[1]; - - //vec3F initial_ofs(scaled_principle_axis * (float)-probe_range); - //initial_ofs[0] += .5f; - //initial_ofs[1] += .5f; - //initial_ofs[2] += .5f; - - low_color[0] = math::clamp(low_color[0] * 31.0f, 0.0f, 31.0f); - low_color[1] = math::clamp(low_color[1] * 63.0f, 0.0f, 63.0f); - low_color[2] = math::clamp(low_color[2] * 31.0f, 0.0f, 31.0f); - - high_color[0] = math::clamp(high_color[0] * 31.0f, 0.0f, 31.0f); - high_color[1] = math::clamp(high_color[1] * 63.0f, 0.0f, 63.0f); - high_color[2] = math::clamp(high_color[2] * 31.0f, 0.0f, 31.0f); - - int d[3]; - for (uint c = 0; c < 3; c++) - d[c] = math::float_to_int_round((high_color[c] - low_color[c]) * (c == 0 ? m_perceptual ? 16 : 2 : c == 1 ? m_perceptual ? 25 : 1 : 2)); - crnlib::vector evaluated_color_projections(m_evaluated_colors.size()); - int64 average_projection = d[0] * (high_color[0] + low_color[0]) * 4 + d[1] * (high_color[1] + low_color[1]) * 2 + d[2] * (high_color[2] + low_color[2]) * 4; - for (uint i = 0; i < m_evaluated_colors.size(); i++) { - int64 delta = d[0] * m_evaluated_colors[i].m_color[0] + d[1] * m_evaluated_colors[i].m_color[1] + d[2] * m_evaluated_colors[i].m_color[2] - average_projection; - evaluated_color_projections[i].projection = delta * m_evaluated_colors[i].m_weight; - evaluated_color_projections[i].color = m_evaluated_colors[i]; - } - std::sort(evaluated_color_projections.begin(), evaluated_color_projections.end(), g_unique_color_projection_sort); - for (uint i = 0, iEnd = m_evaluated_colors.size(); i < iEnd; i++) - m_evaluated_colors[i] = evaluated_color_projections[i & 1 ? i >> 1 : iEnd - 1 - (i >> 1)].color; - - for (uint pass = 0; pass < num_passes; pass++) { - // Now separately sweep or probe the low and high colors along the principle axis, both positively and negatively. - // This results in two arrays of candidate low/high endpoints. Every unique combination of candidate endpoints is tried as a potential solution. - // In higher quality modes, the various nearby lattice neighbors of each candidate endpoint are also explored, which allows the current solution to "wobble" or "migrate" - // to areas with lower error. - // This entire process can be repeated up to X times (depending on the quality level) until a local minimum is established. - // This method is very stable and scalable. It could be implemented more elegantly, but I'm now very cautious of touching this code. - if (pass) { - color_quad_u8 low(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false)); - low_color = vec3F(low.r, low.g, low.b); - color_quad_u8 high(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false)); - high_color = vec3F(high.r, high.g, high.b); + } g_unique_color_projection_sort; + + //----------------------------------------------------------------------------------------------------------------------------------------- + + dxt1_endpoint_optimizer::dxt1_endpoint_optimizer() : + m_pParams(nullptr), + m_pResults(nullptr), + m_perceptual(false), + m_num_prev_results(0) + { + m_low_coords.reserve(512); + m_high_coords.reserve(512); + + m_unique_colors.reserve(512); + m_temp_unique_colors.reserve(512); + m_unique_packed_colors.reserve(512); + + m_norm_unique_colors.reserve(512); + m_norm_unique_colors_weighted.reserve(512); + + m_lo_cells.reserve(128); + m_hi_cells.reserve(128); + m_num_prev_results = 0; } - const uint64 prev_best_error = m_best_solution.m_error; - if (!prev_best_error) - break; + // All selectors are equal. Try compressing as if it was solid, using the block's average color, using ryg's optimal single color compression tables. + bool dxt1_endpoint_optimizer::try_average_block_as_solid() + { + uint64 tot_r = 0; + uint64 tot_g = 0; + uint64 tot_b = 0; + + uint total_weight = 0; + for (uint i = 0; i < m_unique_colors.size(); i++) + { + uint weight = m_unique_colors[i].m_weight; + total_weight += weight; + + tot_r += m_unique_colors[i].m_color.r * static_cast(weight); + tot_g += m_unique_colors[i].m_color.g * static_cast(weight); + tot_b += m_unique_colors[i].m_color.b * static_cast(weight); + } - // Sweep low endpoint along principle axis, record positions - int prev_packed_color[2] = {-1, -1}; - uint num_low_trials = 0; - vec3F initial_probe_low_color(low_color + vec3F(.5f)); - for (uint i = 0; i < probe_range; i++) { - const int ls = i ? 0 : 1; - int x = pProbe_table[i]; + const uint half_total_weight = total_weight >> 1; + uint ave_r = static_cast((tot_r + half_total_weight) / total_weight); + uint ave_g = static_cast((tot_g + half_total_weight) / total_weight); + uint ave_b = static_cast((tot_b + half_total_weight) / total_weight); - for (int s = ls; s < 2; s++) { - vec3F probe_low_color(initial_probe_low_color + scaled_principle_axis[s] * (float)x); + uint low_color = (ryg_dxt::OMatch5[ave_r][0] << 11) | (ryg_dxt::OMatch6[ave_g][0] << 5) | ryg_dxt::OMatch5[ave_b][0]; + uint high_color = (ryg_dxt::OMatch5[ave_r][1] << 11) | (ryg_dxt::OMatch6[ave_g][1] << 5) | ryg_dxt::OMatch5[ave_b][1]; + bool improved = evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); - int r = math::clamp((int)floor(probe_low_color[0]), 0, 31); - int g = math::clamp((int)floor(probe_low_color[1]), 0, 63); - int b = math::clamp((int)floor(probe_low_color[2]), 0, 31); + if ((m_pParams->m_use_alpha_blocks) && (m_best_solution.m_error)) + { + low_color = (ryg_dxt::OMatch5_3[ave_r][0] << 11) | (ryg_dxt::OMatch6_3[ave_g][0] << 5) | ryg_dxt::OMatch5_3[ave_b][0]; + high_color = (ryg_dxt::OMatch5_3[ave_r][1] << 11) | (ryg_dxt::OMatch6_3[ave_g][1] << 5) | ryg_dxt::OMatch5_3[ave_b][1]; + improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); + } - int packed_color = b | (g << 5U) | (r << 11U); - if (packed_color != prev_packed_color[s]) { - probe_low[num_low_trials++] = packed_color; - prev_packed_color[s] = packed_color; + if (m_pParams->m_quality == cCRNDXTQualityUber) + { + // Try compressing as all-solid using the other (non-average) colors in the block in uber. + for (uint i = 0; i < m_unique_colors.size(); i++) + { + uint r = m_unique_colors[i].m_color[0]; + uint g = m_unique_colors[i].m_color[1]; + uint b = m_unique_colors[i].m_color[2]; + if ((r == ave_r) && (g == ave_g) && (b == ave_b)) + { + continue; + } + + uint low_color = (ryg_dxt::OMatch5[r][0] << 11) | (ryg_dxt::OMatch6[g][0] << 5) | ryg_dxt::OMatch5[b][0]; + uint high_color = (ryg_dxt::OMatch5[r][1] << 11) | (ryg_dxt::OMatch6[g][1] << 5) | ryg_dxt::OMatch5[b][1]; + improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); + + if ((m_pParams->m_use_alpha_blocks) && (m_best_solution.m_error)) + { + low_color = (ryg_dxt::OMatch5_3[r][0] << 11) | (ryg_dxt::OMatch6_3[g][0] << 5) | ryg_dxt::OMatch5_3[b][0]; + high_color = (ryg_dxt::OMatch5_3[r][1] << 11) | (ryg_dxt::OMatch6_3[g][1] << 5) | ryg_dxt::OMatch5_3[b][1]; + improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); + } + } } - } + + return improved; } - prev_packed_color[0] = -1; - prev_packed_color[1] = -1; + void dxt1_endpoint_optimizer::compute_vectors(const vec3F& perceptual_weights) + { + m_norm_unique_colors.resize(0); + m_norm_unique_colors_weighted.resize(0); + + m_mean_norm_color.clear(); + m_mean_norm_color_weighted.clear(); - // Sweep high endpoint along principle axis, record positions - uint num_high_trials = 0; - vec3F initial_probe_high_color(high_color + vec3F(.5f)); - for (uint i = 0; i < probe_range; i++) { - const int ls = i ? 0 : 1; - int x = pProbe_table[i]; + for (uint i = 0; i < m_unique_colors.size(); i++) + { + const color_quad_u8& color = m_unique_colors[i].m_color; + const uint weight = m_unique_colors[i].m_weight; - for (int s = ls; s < 2; s++) { - vec3F probe_high_color(initial_probe_high_color + scaled_principle_axis[s] * (float)x); + vec3F norm_color(color.r * 1.0f / 255.0f, color.g * 1.0f / 255.0f, color.b * 1.0f / 255.0f); + vec3F norm_color_weighted(vec3F::mul_components(perceptual_weights, norm_color)); - int r = math::clamp((int)floor(probe_high_color[0]), 0, 31); - int g = math::clamp((int)floor(probe_high_color[1]), 0, 63); - int b = math::clamp((int)floor(probe_high_color[2]), 0, 31); + m_norm_unique_colors.push_back(norm_color); + m_norm_unique_colors_weighted.push_back(norm_color_weighted); + + m_mean_norm_color += norm_color * (float)weight; + m_mean_norm_color_weighted += norm_color_weighted * (float)weight; + } - int packed_color = b | (g << 5U) | (r << 11U); - if (packed_color != prev_packed_color[s]) { - probe_high[num_high_trials++] = packed_color; - prev_packed_color[s] = packed_color; + if (m_total_unique_color_weight) + { + m_mean_norm_color *= (1.0f / m_total_unique_color_weight); + m_mean_norm_color_weighted *= (1.0f / m_total_unique_color_weight); + } + + for (uint i = 0; i < m_unique_colors.size(); i++) + { + m_norm_unique_colors[i] -= m_mean_norm_color; + m_norm_unique_colors_weighted[i] -= m_mean_norm_color_weighted; } - } } - // Now try all unique combinations. - for (uint i = 0; i < num_low_trials; i++) { - for (uint j = 0; j < num_high_trials; j++) { - dxt1_solution_coordinates coords((uint16)probe_low[i], (uint16)probe_high[j]); - coords.canonicalize(); - evaluate_solution(coords); - } + // Compute PCA (principle axis, i.e. direction of largest variance) of input vectors. + void dxt1_endpoint_optimizer::compute_pca(vec3F& axis, const vec3F_array& norm_colors, const vec3F& def) + { + double cov[6] = { 0, 0, 0, 0, 0, 0 }; + for (uint i = 0; i < norm_colors.size(); i++) + { + const vec3F& v = norm_colors[i]; + float r = v[0]; + float g = v[1]; + float b = v[2]; + if (m_unique_colors[i].m_weight > 1) + { + const double weight = m_unique_colors[i].m_weight; + cov[0] += r * r * weight; + cov[1] += r * g * weight; + cov[2] += r * b * weight; + cov[3] += g * g * weight; + cov[4] += g * b * weight; + cov[5] += b * b * weight; + } + else + { + cov[0] += r * r; + cov[1] += r * g; + cov[2] += r * b; + cov[3] += g * g; + cov[4] += g * b; + cov[5] += b * b; + } + } + double vfr = .9f; + double vfg = 1.0f; + double vfb = .7f; + for (uint iter = 0; iter < 8; iter++) + { + double r = vfr * cov[0] + vfg * cov[1] + vfb * cov[2]; + double g = vfr * cov[1] + vfg * cov[3] + vfb * cov[4]; + double b = vfr * cov[2] + vfg * cov[4] + vfb * cov[5]; + double m = math::maximum(fabs(r), fabs(g), fabs(b)); + if (m > 1e-10) + { + m = 1.0f / m; + r *= m; + g *= m; + b *= m; + } + double delta = math::square(vfr - r) + math::square(vfg - g) + math::square(vfb - b); + vfr = r; + vfg = g; + vfb = b; + if ((iter > 2) && (delta < 1e-8)) + { + break; + } + } + double len = vfr * vfr + vfg * vfg + vfb * vfb; + if (len < 1e-10) + { + axis = def; + } + else + { + len = 1.0f / sqrt(len); + axis.set(static_cast(vfr * len), static_cast(vfg * len), static_cast(vfb * len)); + } } - if (m_pParams->m_quality >= cCRNDXTQualityNormal) { - // Generate new candidates by exploring the low color's direct lattice neighbors - color_quad_u8 lc(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false)); + static const uint8 g_invTableNull[4] = { 0, 1, 2, 3 }; + static const uint8 g_invTableAlpha[4] = { 1, 0, 2, 3 }; + static const uint8 g_invTableColor[4] = { 1, 0, 3, 2 }; - for (int i = 0; i < 26; i++) { - int r = lc.r + g_adjacency[i].x; - if ((r < 0) || (r > 31)) - continue; + // Computes a valid (encodable) DXT1 solution (low/high colors, swizzled selectors) from input. + void dxt1_endpoint_optimizer::return_solution() + { + compute_selectors(); + bool invert_selectors; - int g = lc.g + g_adjacency[i].y; - if ((g < 0) || (g > 63)) - continue; + if (m_best_solution.m_alpha_block) + { + invert_selectors = (m_best_solution.m_coords.m_low_color > m_best_solution.m_coords.m_high_color); + } + else + { + CRNLIB_ASSERT(m_best_solution.m_coords.m_low_color != m_best_solution.m_coords.m_high_color); - int b = lc.b + g_adjacency[i].z; - if ((b < 0) || (b > 31)) - continue; + invert_selectors = (m_best_solution.m_coords.m_low_color < m_best_solution.m_coords.m_high_color); + } - dxt1_solution_coordinates coords(dxt1_block::pack_color(r, g, b, false), m_best_solution.m_coords.m_high_color); - coords.canonicalize(); - evaluate_solution(coords); - } + m_pResults->m_alternate_rounding = m_best_solution.m_alternate_rounding; + m_pResults->m_enforce_selector = m_best_solution.m_enforce_selector; + m_pResults->m_enforced_selector = m_best_solution.m_enforced_selector; + m_pResults->m_reordered = invert_selectors; + if (invert_selectors) + { + m_pResults->m_low_color = m_best_solution.m_coords.m_high_color; + m_pResults->m_high_color = m_best_solution.m_coords.m_low_color; + } + else + { + m_pResults->m_low_color = m_best_solution.m_coords.m_low_color; + m_pResults->m_high_color = m_best_solution.m_coords.m_high_color; + } + + const uint8* pInvert_table = g_invTableNull; + if (invert_selectors) + { + pInvert_table = m_best_solution.m_alpha_block ? g_invTableAlpha : g_invTableColor; + } + + const uint alpha_thresh = m_pParams->m_pixels_have_alpha ? (m_pParams->m_dxt1a_alpha_threshold << 24U) : 0; - if (m_pParams->m_quality == cCRNDXTQualityUber) { - // Generate new candidates by exploring the low color's direct lattice neighbors - this time, explore much further separately on each axis. - lc = dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false); + const uint32* pSrc_pixels = reinterpret_cast(m_pParams->m_pPixels); + uint8* pDst_selectors = m_pResults->m_pSelectors; - for (int a = 0; a < 3; a++) { - int limit = (a == 1) ? 63 : 31; + if ((m_unique_colors.size() == 1) && (!m_pParams->m_pixels_have_alpha)) + { + uint32 c = utils::read_le32(pSrc_pixels); - for (int s = -2; s <= 2; s += 4) { - color_quad_u8 c(lc); - int q = c[a] + s; - if ((q < 0) || (q > limit)) - continue; + CRNLIB_ASSERT(c >= alpha_thresh); - c[a] = (uint8)q; + c |= 0xFF000000U; - dxt1_solution_coordinates coords(dxt1_block::pack_color(c, false), m_best_solution.m_coords.m_high_color); - coords.canonicalize(); - evaluate_solution(coords); - } + unique_color_hash_map::const_iterator it(m_unique_color_hash_map.find(c)); + CRNLIB_ASSERT(it != m_unique_color_hash_map.end()); + + uint unique_color_index = it->second; + + uint selector = pInvert_table[m_best_solution.m_selectors[unique_color_index]]; + + memset(pDst_selectors, selector, m_pParams->m_num_pixels); } - } + else + { + uint8* pDst_selectors_end = pDst_selectors + m_pParams->m_num_pixels; - // Generate new candidates by exploring the high color's direct lattice neighbors - color_quad_u8 hc(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false)); + uint8 prev_selector = 0; + uint32 prev_color = 0; - for (int i = 0; i < 26; i++) { - int r = hc.r + g_adjacency[i].x; - if ((r < 0) || (r > 31)) - continue; + do + { + uint32 c = utils::read_le32(pSrc_pixels); + pSrc_pixels++; - int g = hc.g + g_adjacency[i].y; - if ((g < 0) || (g > 63)) - continue; + uint8 selector = 3; - int b = hc.b + g_adjacency[i].z; - if ((b < 0) || (b > 31)) - continue; + if (c >= alpha_thresh) + { + c |= 0xFF000000U; - dxt1_solution_coordinates coords(m_best_solution.m_coords.m_low_color, dxt1_block::pack_color(r, g, b, false)); - coords.canonicalize(); - evaluate_solution(coords); - } + if (c == prev_color) + { + selector = prev_selector; + } + else + { + unique_color_hash_map::const_iterator it(m_unique_color_hash_map.find(c)); - if (m_pParams->m_quality == cCRNDXTQualityUber) { - // Generate new candidates by exploring the high color's direct lattice neighbors - this time, explore much further separately on each axis. - hc = dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false); + CRNLIB_ASSERT(it != m_unique_color_hash_map.end()); - for (int a = 0; a < 3; a++) { - int limit = (a == 1) ? 63 : 31; + uint unique_color_index = it->second; - for (int s = -2; s <= 2; s += 4) { - color_quad_u8 c(hc); - int q = c[a] + s; - if ((q < 0) || (q > limit)) - continue; + selector = pInvert_table[m_best_solution.m_selectors[unique_color_index]]; - c[a] = (uint8)q; + prev_color = c; + prev_selector = selector; + } + } - dxt1_solution_coordinates coords(m_best_solution.m_coords.m_low_color, dxt1_block::pack_color(c, false)); - coords.canonicalize(); - evaluate_solution(coords); - } + *pDst_selectors++ = selector; + } while (pDst_selectors != pDst_selectors_end); } - } - } - - if ((!m_best_solution.m_error) || ((pass) && (m_best_solution.m_error == prev_best_error))) - break; - if (m_pParams->m_quality >= cCRNDXTQualityUber) { - // Attempt to refine current solution's endpoints given the current selectors using least squares. - refine_solution(1); - } - } - - if (m_pParams->m_quality >= cCRNDXTQualityNormal) { - if ((m_best_solution.m_error) && (!m_pParams->m_pixels_have_alpha)) { - bool choose_solid_block = false; - if (m_best_solution.are_selectors_all_equal()) { - // All selectors equal - try various solid-block optimizations - choose_solid_block = try_average_block_as_solid(); - } - - if ((!choose_solid_block) && (m_pParams->m_quality == cCRNDXTQualityUber)) { - // Per-component 1D endpoint optimization. - optimize_endpoint_comps(); - } + m_pResults->m_alpha_block = m_best_solution.m_alpha_block; + m_pResults->m_error = m_best_solution.m_error; } - if (m_pParams->m_quality == cCRNDXTQualityUber) { - if (m_best_solution.m_error) { - // The pixels may have already been DXTc compressed by another compressor. - // It's usually possible to recover the endpoints used to previously pack the block. - try_combinatorial_encoding(); - } + // Per-component 1D endpoint optimization. + + void dxt1_endpoint_optimizer::compute_endpoint_component_errors(uint comp_index, uint64 (&error)[4][256], uint64 (&best_remaining_error)[4]) + { + uint64 W[4] = {}, WP2[4] = {}, WPP[4] = {}; + for (uint i = 0; i < m_unique_colors.size(); i++) + { + uint p = m_unique_colors[i].m_color[comp_index]; + uint w = m_unique_colors[i].m_weight; + uint8 s = m_best_solution.m_selectors[i]; + W[s] += (int64)w; + WP2[s] += (int64)w * p * 2; + WPP[s] += (int64)w * p * p; + } + const uint comp_limit = comp_index == 1 ? 64 : 32; + for (uint8 s = 0; s < 2; s++) + { + uint64 best_error = error[s][0] = WPP[s]; + for (uint8 c = 1; c < comp_limit; c++) + { + uint8 p = comp_index == 1 ? c << 2 | c >> 4 : c << 3 | c >> 2; + error[s][c] = W[s] * p * p - WP2[s] * p + WPP[s]; + if (error[s][c] < best_error) + { + best_error = error[s][c]; + } + } + best_remaining_error[s] = best_error; + } + for (uint8 s = 2; s < 4; s++) + { + uint64 best_error = error[s][0] = WPP[s], d = W[s] - WP2[s], dd = W[s] << 1, e = WPP[s] + d; + for (uint p = 1; p < 256; p++, d += dd, e += d) + { + error[s][p] = e; + if (e < best_error) + { + best_error = e; + } + } + best_remaining_error[s] = best_error; + } + for (uint8 s = 3; s; s--) + { + best_remaining_error[s - 1] += best_remaining_error[s]; + } } - } - - return_solution(); - - if (m_pParams->m_endpoint_caching) { - // Remember result for later reruse. - m_prev_results[m_num_prev_results & (cMaxPrevResults - 1)] = m_best_solution.m_coords; - m_num_prev_results++; - } -} - -void dxt1_endpoint_optimizer::handle_multicolor_block() { - uint num_passes = 1; - vec3F perceptual_weights(1.0f); - - if (m_perceptual) { - // Compute RGB weighting for use in perceptual mode. - // The more saturated the block, the more the weights deviate from (1,1,1). - float ave_redness = 0; - float ave_blueness = 0; - float ave_l = 0; - - for (uint i = 0; i < m_unique_colors.size(); i++) { - const color_quad_u8& c = m_unique_colors[i].m_color; - int l = (c.r + c.g + c.b + 1) / 3; - float scale = (float)m_unique_colors[i].m_weight / math::maximum(1.0f, l); - ave_redness += scale * c.r; - ave_blueness += scale * c.b; - ave_l += l; + + void dxt1_endpoint_optimizer::optimize_endpoint_comps() + { + compute_selectors(); + if (m_best_solution.m_alpha_block || !m_best_solution.m_error) + { + return; + } + color_quad_u8 source_low(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, true)); + color_quad_u8 source_high(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, true)); + uint64 error[4][256], best_remaining_error[4]; + for (uint comp_index = 0; comp_index < 3; comp_index++) + { + uint8 p0 = source_low[comp_index]; + uint8 p1 = source_high[comp_index]; + color_quad_u8 low(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false)); + color_quad_u8 high(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false)); + compute_endpoint_component_errors(comp_index, error, best_remaining_error); + uint64 best_error = error[0][low[comp_index]] + error[1][high[comp_index]] + error[2][(p0 * 2 + p1) / 3] + error[3][(p0 + p1 * 2) / 3]; + if (best_remaining_error[0] >= best_error) + { + continue; + } + const uint comp_limit = comp_index == 1 ? 64 : 32; + for (uint8 c0 = 0; c0 < comp_limit; c0++) + { + uint64 e0 = error[0][c0]; + if (e0 + best_remaining_error[1] >= best_error) + { + continue; + } + low[comp_index] = c0; + uint16 packed_low = dxt1_block::pack_color(low, false); + p0 = comp_index == 1 ? c0 << 2 | c0 >> 4 : c0 << 3 | c0 >> 2; + for (uint8 c1 = 0; c1 < comp_limit; c1++) + { + uint64 e = e0 + error[1][c1]; + if (e + best_remaining_error[2] >= best_error) + { + continue; + } + p1 = comp_index == 1 ? c1 << 2 | c1 >> 4 : c1 << 3 | c1 >> 2; + e += error[2][(p0 * 2 + p1) / 3]; + if (e + best_remaining_error[3] >= best_error) + { + continue; + } + e += error[3][(p0 + p1 * 2) / 3]; + if (e >= best_error) + { + continue; + } + high[comp_index] = c1; + if (!evaluate_solution(dxt1_solution_coordinates(packed_low, dxt1_block::pack_color(high, false)))) + { + continue; + } + if (!m_best_solution.m_error) + { + return; + } + compute_selectors(); + compute_endpoint_component_errors(comp_index, error, best_remaining_error); + best_error = error[0][c0] + error[1][c1] + error[2][(p0 * 2 + p1) / 3] + error[3][(p0 + p1 * 2) / 3]; + e0 = error[0][c0]; + if (e0 + best_remaining_error[1] >= best_error) + { + break; + } + } + } + } } - ave_redness /= m_total_unique_color_weight; - ave_blueness /= m_total_unique_color_weight; - ave_l /= m_total_unique_color_weight; - ave_l = math::minimum(1.0f, ave_l * 16.0f / 255.0f); + // Voxel adjacency delta coordinations. + static const struct adjacent_coords + { + int8 x, y, z; + } g_adjacency[26] = { + { -1, -1, -1 }, + { 0, -1, -1 }, + { 1, -1, -1 }, + { -1, 0, -1 }, + { 0, 0, -1 }, + { 1, 0, -1 }, + { -1, 1, -1 }, + { 0, 1, -1 }, + + { 1, 1, -1 }, + { -1, -1, 0 }, + { 0, -1, 0 }, + { 1, -1, 0 }, + { -1, 0, 0 }, + { 1, 0, 0 }, + { -1, 1, 0 }, + { 0, 1, 0 }, + + { 1, 1, 0 }, + { -1, -1, 1 }, + { 0, -1, 1 }, + { 1, -1, 1 }, + { -1, 0, 1 }, + { 0, 0, 1 }, + { 1, 0, 1 }, + { -1, 1, 1 }, + + { 0, 1, 1 }, + { 1, 1, 1 } + }; + + // Attempt to refine current solution's endpoints given the current selectors using least squares. + bool dxt1_endpoint_optimizer::refine_solution(int refinement_level) + { + compute_selectors(); - float p = ave_l * powf(math::saturate(math::maximum(ave_redness, ave_blueness) * 1.0f / 3.0f), 2.75f); + static const int w1Tab[4] = { 3, 0, 2, 1 }; + + static const int prods_0[4] = { 0x00, 0x00, 0x02, 0x02 }; + static const int prods_1[4] = { 0x00, 0x09, 0x01, 0x04 }; + static const int prods_2[4] = { 0x09, 0x00, 0x04, 0x01 }; + + double akku_0 = 0; + double akku_1 = 0; + double akku_2 = 0; + double At1_r, At1_g, At1_b; + double At2_r, At2_g, At2_b; + + At1_r = At1_g = At1_b = 0; + At2_r = At2_g = At2_b = 0; + for (uint i = 0; i < m_unique_colors.size(); i++) + { + const color_quad_u8& c = m_unique_colors[i].m_color; + const double weight = m_unique_colors[i].m_weight; + + double r = c.r * weight; + double g = c.g * weight; + double b = c.b * weight; + int step = m_best_solution.m_selectors[i] ^ 1; + + int w1 = w1Tab[step]; + + akku_0 += prods_0[step] * weight; + akku_1 += prods_1[step] * weight; + akku_2 += prods_2[step] * weight; + At1_r += w1 * r; + At1_g += w1 * g; + At1_b += w1 * b; + At2_r += r; + At2_g += g; + At2_b += b; + } - if (p >= 1.0f) - num_passes = 1; - else { - num_passes = 2; - perceptual_weights = vec3F::lerp(vec3F(.212f, .72f, .072f), perceptual_weights, p); - } - } - - for (uint pass_index = 0; pass_index < num_passes; pass_index++) { - compute_vectors(perceptual_weights); - compute_pca(m_principle_axis, m_norm_unique_colors_weighted, vec3F(.2837149f, 0.9540631f, 0.096277453f)); - m_principle_axis[0] /= perceptual_weights[0]; - m_principle_axis[1] /= perceptual_weights[1]; - m_principle_axis[2] /= perceptual_weights[2]; - m_principle_axis.normalize_in_place(); - if (num_passes > 1) { - // Check for obviously wild principle axes and try to compensate by backing off the component weightings. - if (fabs(m_principle_axis[0]) >= .795f) - perceptual_weights.set(.424f, .6f, .072f); - else if (fabs(m_principle_axis[2]) >= .795f) - perceptual_weights.set(.212f, .6f, .212f); - else - break; - } - } - - // Find bounds of projection onto (potentially skewed) principle axis. - float l = 1e+9; - float h = -1e+9; - - for (uint i = 0; i < m_norm_unique_colors.size(); i++) { - float d = m_norm_unique_colors[i] * m_principle_axis; - l = math::minimum(l, d); - h = math::maximum(h, d); - } - - vec3F low_color(m_mean_norm_color + l * m_principle_axis); - vec3F high_color(m_mean_norm_color + h * m_principle_axis); - - if (!low_color.is_within_bounds(0.0f, 1.0f)) { - // Low color is outside the lattice, so bring it back in by casting a ray. - vec3F coord; - float t; - aabb3F bounds(vec3F(0.0f), vec3F(1.0f)); - intersection::result res = intersection::ray_aabb(coord, t, ray3F(low_color, m_principle_axis), bounds); - if (res == intersection::cSuccess) - low_color = coord; - } - - if (!high_color.is_within_bounds(0.0f, 1.0f)) { - // High color is outside the lattice, so bring it back in by casting a ray. - vec3F coord; - float t; - aabb3F bounds(vec3F(0.0f), vec3F(1.0f)); - intersection::result res = intersection::ray_aabb(coord, t, ray3F(high_color, -m_principle_axis), bounds); - if (res == intersection::cSuccess) - high_color = coord; - } - - // Now optimize the endpoints using the projection bounds on the (potentially skewed) principle axis as a starting point. - optimize_endpoints(low_color, high_color); -} - -// Tries quantizing the block to 4 colors using vanilla LBG. It tries all combinations of the quantized results as potential endpoints. -bool dxt1_endpoint_optimizer::try_median4(const vec3F& low_color, const vec3F& high_color) { - vec3F means[4]; - - if (m_unique_colors.size() <= 4) { - for (uint i = 0; i < 4; i++) - means[i] = m_norm_unique_colors[math::minimum(m_norm_unique_colors.size() - 1, i)]; - } else { - means[0] = low_color - m_mean_norm_color; - means[3] = high_color - m_mean_norm_color; - means[1] = vec3F::lerp(means[0], means[3], 1.0f / 3.0f); - means[2] = vec3F::lerp(means[0], means[3], 2.0f / 3.0f); - - fast_random rm; - - const uint cMaxIters = 8; - uint reassign_rover = 0; - float prev_total_dist = math::cNearlyInfinite; - for (uint iter = 0; iter < cMaxIters; iter++) { - vec3F new_means[4]; - float new_weights[4]; - utils::zero_object(new_means); - utils::zero_object(new_weights); - - float total_dist = 0; - - for (uint i = 0; i < m_unique_colors.size(); i++) { - const vec3F& v = m_norm_unique_colors[i]; - - float best_dist = means[0].squared_distance(v); - int best_index = 0; - - for (uint j = 1; j < 4; j++) { - float dist = means[j].squared_distance(v); - if (dist < best_dist) { - best_dist = dist; - best_index = j; - } - } - - total_dist += best_dist; - - new_means[best_index] += v * (float)m_unique_colors[i].m_weight; - new_weights[best_index] += (float)m_unique_colors[i].m_weight; - } - - uint highest_index = 0; - float highest_weight = 0; - bool empty_cell = false; - for (uint j = 0; j < 4; j++) { - if (new_weights[j] > 0.0f) { - means[j] = new_means[j] / new_weights[j]; - if (new_weights[j] > highest_weight) { - highest_weight = new_weights[j]; - highest_index = j; - } - } else - empty_cell = true; - } - - if (!empty_cell) { - if (fabs(total_dist - prev_total_dist) < .00001f) - break; - - prev_total_dist = total_dist; - } else - prev_total_dist = math::cNearlyInfinite; - - if ((empty_cell) && (iter != (cMaxIters - 1))) { - const uint ri = (highest_index + reassign_rover) & 3; - reassign_rover++; - - for (uint j = 0; j < 4; j++) { - if (new_weights[j] == 0.0f) { - means[j] = means[ri]; - means[j] += vec3F::make_random(rm, -.00196f, .00196f); - } - } - } - } - } + At2_r = 3 * At2_r - At1_r; + At2_g = 3 * At2_g - At1_g; + At2_b = 3 * At2_b - At1_b; - bool improved = false; + double xx = akku_2; + double yy = akku_1; + double xy = akku_0; - for (uint i = 0; i < 3; i++) { - for (uint j = i + 1; j < 4; j++) { - const vec3F v0(means[i] + m_mean_norm_color); - const vec3F v1(means[j] + m_mean_norm_color); + double t = xx * yy - xy * xy; + if (!yy || !xx || (fabs(t) < .0000125f)) + { + return false; + } - dxt1_solution_coordinates sc( - color_quad_u8((int)floor(.5f + v0[0] * 31.0f), (int)floor(.5f + v0[1] * 63.0f), (int)floor(.5f + v0[2] * 31.0f), 255), - color_quad_u8((int)floor(.5f + v1[0] * 31.0f), (int)floor(.5f + v1[1] * 63.0f), (int)floor(.5f + v1[2] * 31.0f), 255), false); + double frb = (3.0f * 31.0f / 255.0f) / t; + double fg = frb * (63.0f / 31.0f); - sc.canonicalize(); - improved |= evaluate_solution(sc); - } - } - - improved |= refine_solution((m_pParams->m_quality == cCRNDXTQualityUber) ? 1 : 0); - - return improved; -} - -// Given candidate low/high endpoints, find the optimal selectors for 3 and 4 color blocks, compute the resulting error, -// and use the candidate if it results in less error than the best found result so far. -bool dxt1_endpoint_optimizer::evaluate_solution(const dxt1_solution_coordinates& coords, bool alternate_rounding) { - color_quad_u8 c0 = dxt1_block::unpack_color(coords.m_low_color, false); - color_quad_u8 c1 = dxt1_block::unpack_color(coords.m_high_color, false); - uint64 rError = c0.r < c1.r ? m_rDist[c0.r].low + m_rDist[c1.r].high : m_rDist[c0.r].high + m_rDist[c1.r].low; - uint64 gError = c0.g < c1.g ? m_gDist[c0.g].low + m_gDist[c1.g].high : m_gDist[c0.g].high + m_gDist[c1.g].low; - uint64 bError = c0.b < c1.b ? m_bDist[c0.b].low + m_bDist[c1.b].high : m_bDist[c0.b].high + m_bDist[c1.b].low; - if (rError + gError + bError >= m_best_solution.m_error) - return false; - if (!alternate_rounding) { - solution_hash_map::insert_result solution_res(m_solutions_tried.insert(coords.m_low_color | coords.m_high_color << 16)); - if (!solution_res.second) - return false; - } - if (m_evaluate_hc) - return m_perceptual ? evaluate_solution_hc_perceptual(coords, alternate_rounding) : evaluate_solution_hc_uniform(coords, alternate_rounding); - if (m_pParams->m_quality >= cCRNDXTQualityBetter) - return evaluate_solution_uber(coords, alternate_rounding); - return evaluate_solution_fast(coords, alternate_rounding); -} - -inline uint dxt1_endpoint_optimizer::color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha) { - if (perceptual) { - return color::color_distance(true, e1, e2, alpha); - } else if (m_pParams->m_grayscale_sampling) { - // Computes error assuming shader will be converting the result to grayscale. - int y0 = color::RGB_to_Y(e1); - int y1 = color::RGB_to_Y(e2); - int yd = y0 - y1; - if (alpha) { - int da = (int)e1[3] - (int)e2[3]; - return yd * yd + da * da; - } else { - return yd * yd; + bool improved = false; + + if (refinement_level == 0) + { + uint max16; + max16 = math::clamp(static_cast((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31) << 11; + max16 |= math::clamp(static_cast((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63) << 5; + max16 |= math::clamp(static_cast((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31) << 0; + + uint min16; + min16 = math::clamp(static_cast((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31) << 11; + min16 |= math::clamp(static_cast((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63) << 5; + min16 |= math::clamp(static_cast((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31) << 0; + + dxt1_solution_coordinates nc((uint16)min16, (uint16)max16); + nc.canonicalize(); + improved |= evaluate_solution(nc); + } + else if (refinement_level == 1) + { + // Try exploring the local lattice neighbors of the least squares optimized result. + color_quad_u8 e[2]; + + e[0].clear(); + e[0][0] = (uint8)math::clamp(static_cast((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31); + e[0][1] = (uint8)math::clamp(static_cast((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63); + e[0][2] = (uint8)math::clamp(static_cast((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31); + + e[1].clear(); + e[1][0] = (uint8)math::clamp(static_cast((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31); + e[1][1] = (uint8)math::clamp(static_cast((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63); + e[1][2] = (uint8)math::clamp(static_cast((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31); + + for (uint i = 0; i < 2; i++) + { + for (int rr = -1; rr <= 1; rr++) + { + for (int gr = -1; gr <= 1; gr++) + { + for (int br = -1; br <= 1; br++) + { + dxt1_solution_coordinates nc; + + color_quad_u8 c[2]; + c[0] = e[0]; + c[1] = e[1]; + + c[i][0] = (uint8)math::clamp(c[i][0] + rr, 0, 31); + c[i][1] = (uint8)math::clamp(c[i][1] + gr, 0, 63); + c[i][2] = (uint8)math::clamp(c[i][2] + br, 0, 31); + + nc.m_low_color = dxt1_block::pack_color(c[0], false); + nc.m_high_color = dxt1_block::pack_color(c[1], false); + + nc.canonicalize(); + improved |= evaluate_solution(nc); + } + } + } + } + } + else + { + // Try even harder to explore the local lattice neighbors of the least squares optimized result. + color_quad_u8 e[2]; + e[0].clear(); + e[0][0] = (uint8)math::clamp(static_cast((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31); + e[0][1] = (uint8)math::clamp(static_cast((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63); + e[0][2] = (uint8)math::clamp(static_cast((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31); + + e[1].clear(); + e[1][0] = (uint8)math::clamp(static_cast((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31); + e[1][1] = (uint8)math::clamp(static_cast((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63); + e[1][2] = (uint8)math::clamp(static_cast((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31); + + for (int orr = -1; orr <= 1; orr++) + { + for (int ogr = -1; ogr <= 1; ogr++) + { + for (int obr = -1; obr <= 1; obr++) + { + dxt1_solution_coordinates nc; + + color_quad_u8 c[2]; + c[0] = e[0]; + c[1] = e[1]; + + c[0][0] = (uint8)math::clamp(c[0][0] + orr, 0, 31); + c[0][1] = (uint8)math::clamp(c[0][1] + ogr, 0, 63); + c[0][2] = (uint8)math::clamp(c[0][2] + obr, 0, 31); + + for (int rr = -1; rr <= 1; rr++) + { + for (int gr = -1; gr <= 1; gr++) + { + for (int br = -1; br <= 1; br++) + { + c[1][0] = (uint8)math::clamp(c[1][0] + rr, 0, 31); + c[1][1] = (uint8)math::clamp(c[1][1] + gr, 0, 63); + c[1][2] = (uint8)math::clamp(c[1][2] + br, 0, 31); + + nc.m_low_color = dxt1_block::pack_color(c[0], false); + nc.m_high_color = dxt1_block::pack_color(c[1], false); + nc.canonicalize(); + + improved |= evaluate_solution(nc); + } + } + } + } + } + } + } + + return improved; } - } else { - return color::color_distance(false, e1, e2, alpha); - } -} - -bool dxt1_endpoint_optimizer::evaluate_solution_uber(const dxt1_solution_coordinates& coords, bool alternate_rounding) { - m_trial_solution.m_coords = coords; - m_trial_solution.m_selectors.resize(m_unique_colors.size()); - m_trial_solution.m_error = m_best_solution.m_error; - m_trial_solution.m_alpha_block = false; - - uint first_block_type = 0; - uint last_block_type = 1; - - if ((m_pParams->m_pixels_have_alpha) || (m_pParams->m_force_alpha_blocks)) - first_block_type = 1; - else if (!m_pParams->m_use_alpha_blocks) - last_block_type = 0; - - m_trial_selectors.resize(m_unique_colors.size()); - - color_quad_u8 colors[cDXT1SelectorValues]; - - colors[0] = dxt1_block::unpack_color(coords.m_low_color, true); - colors[1] = dxt1_block::unpack_color(coords.m_high_color, true); - - for (uint block_type = first_block_type; block_type <= last_block_type; block_type++) { - uint64 trial_error = 0; - - if (!block_type) { - colors[2].set_noclamp_rgba((colors[0].r * 2 + colors[1].r + alternate_rounding) / 3, (colors[0].g * 2 + colors[1].g + alternate_rounding) / 3, (colors[0].b * 2 + colors[1].b + alternate_rounding) / 3, 0); - colors[3].set_noclamp_rgba((colors[1].r * 2 + colors[0].r + alternate_rounding) / 3, (colors[1].g * 2 + colors[0].g + alternate_rounding) / 3, (colors[1].b * 2 + colors[0].b + alternate_rounding) / 3, 0); - - if (m_perceptual) { - for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { - const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; - - uint best_error = color_distance(true, c, colors[0], false); - uint best_color_index = 0; - - uint err = color_distance(true, c, colors[1], false); - if (err < best_error) { - best_error = err; - best_color_index = 1; - } - - err = color_distance(true, c, colors[2], false); - if (err < best_error) { - best_error = err; - best_color_index = 2; - } - - err = color_distance(true, c, colors[3], false); - if (err < best_error) { - best_error = err; - best_color_index = 3; - } - - trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); - if (trial_error >= m_trial_solution.m_error) + + //----------------------------------------------------------------------------------------------------------------------------------------- + + // Primary endpoint optimization entrypoint. + void dxt1_endpoint_optimizer::optimize_endpoints(vec3F& low_color, vec3F& high_color) + { + vec3F orig_low_color(low_color); + vec3F orig_high_color(high_color); + + m_trial_solution.clear(); + + uint num_passes; + const int16* pProbe_table = g_uber_probe_table; + uint probe_range; + float dist_per_trial = .015625f; + + // How many probes, and the distance between each probe depends on the quality level. + switch (m_pParams->m_quality) + { + case cCRNDXTQualitySuperFast: + pProbe_table = g_fast_probe_table; + probe_range = cFastProbeTableSize; + dist_per_trial = .027063293f; + num_passes = 1; + break; + case cCRNDXTQualityFast: + pProbe_table = g_fast_probe_table; + probe_range = cFastProbeTableSize; + dist_per_trial = .027063293f; + num_passes = 2; + break; + case cCRNDXTQualityNormal: + pProbe_table = g_normal_probe_table; + probe_range = cNormalProbeTableSize; + dist_per_trial = .027063293f; + num_passes = 2; + break; + case cCRNDXTQualityBetter: + pProbe_table = g_better_probe_table; + probe_range = cBetterProbeTableSize; + num_passes = 2; + break; + default: + pProbe_table = g_uber_probe_table; + probe_range = cUberProbeTableSize; + num_passes = 4; break; + } + + if (m_pParams->m_endpoint_caching) + { + // Try the previous X winning endpoints. This may not give us optimal results, but it may increase the probability of early outs while evaluating potential solutions. + const uint num_prev_results = math::minimum(cMaxPrevResults, m_num_prev_results); + for (uint i = 0; i < num_prev_results; i++) + { + evaluate_solution(m_prev_results[i]); + } - m_trial_selectors[unique_color_index] = static_cast(best_color_index); + if (!m_best_solution.m_error) + { + // Got lucky - one of the previous endpoints is optimal. + return_solution(); + return; + } } - } else { - for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { - const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; - uint best_error = color_distance(false, c, colors[0], false); - uint best_color_index = 0; + if (m_pParams->m_quality >= cCRNDXTQualityBetter) + { + //evaluate_solution(dxt1_solution_coordinates(low_color, high_color), true, &m_best_solution); + //refine_solution(); - uint err = color_distance(false, c, colors[1], false); - if (err < best_error) { - best_error = err; - best_color_index = 1; - } + try_median4(orig_low_color, orig_high_color); + } - err = color_distance(false, c, colors[2], false); - if (err < best_error) { - best_error = err; - best_color_index = 2; - } + uint probe_low[cUberProbeTableSize * 2 + 1]; + uint probe_high[cUberProbeTableSize * 2 + 1]; - err = color_distance(false, c, colors[3], false); - if (err < best_error) { - best_error = err; - best_color_index = 3; - } + vec3F scaled_principle_axis[2]; - trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); - if (trial_error >= m_trial_solution.m_error) - break; + scaled_principle_axis[1] = m_principle_axis * dist_per_trial; + scaled_principle_axis[1][0] *= 31.0f; + scaled_principle_axis[1][1] *= 63.0f; + scaled_principle_axis[1][2] *= 31.0f; + + scaled_principle_axis[0] = -scaled_principle_axis[1]; + + //vec3F initial_ofs(scaled_principle_axis * (float)-probe_range); + //initial_ofs[0] += .5f; + //initial_ofs[1] += .5f; + //initial_ofs[2] += .5f; - m_trial_selectors[unique_color_index] = static_cast(best_color_index); + low_color[0] = math::clamp(low_color[0] * 31.0f, 0.0f, 31.0f); + low_color[1] = math::clamp(low_color[1] * 63.0f, 0.0f, 63.0f); + low_color[2] = math::clamp(low_color[2] * 31.0f, 0.0f, 31.0f); + + high_color[0] = math::clamp(high_color[0] * 31.0f, 0.0f, 31.0f); + high_color[1] = math::clamp(high_color[1] * 63.0f, 0.0f, 63.0f); + high_color[2] = math::clamp(high_color[2] * 31.0f, 0.0f, 31.0f); + + int d[3]; + for (uint c = 0; c < 3; c++) + { + d[c] = math::float_to_int_round((high_color[c] - low_color[c]) * (c == 0 ? m_perceptual ? 16 : 2 : c == 1 ? m_perceptual ? 25 : 1 + : 2)); + } + crnlib::vector evaluated_color_projections(m_evaluated_colors.size()); + int64 average_projection = d[0] * (high_color[0] + low_color[0]) * 4 + d[1] * (high_color[1] + low_color[1]) * 2 + d[2] * (high_color[2] + low_color[2]) * 4; + for (uint i = 0; i < m_evaluated_colors.size(); i++) + { + int64 delta = d[0] * m_evaluated_colors[i].m_color[0] + d[1] * m_evaluated_colors[i].m_color[1] + d[2] * m_evaluated_colors[i].m_color[2] - average_projection; + evaluated_color_projections[i].projection = delta * m_evaluated_colors[i].m_weight; + evaluated_color_projections[i].color = m_evaluated_colors[i]; + } + std::sort(evaluated_color_projections.begin(), evaluated_color_projections.end(), g_unique_color_projection_sort); + for (uint i = 0, iEnd = m_evaluated_colors.size(); i < iEnd; i++) + { + m_evaluated_colors[i] = evaluated_color_projections[i & 1 ? i >> 1 : iEnd - 1 - (i >> 1)].color; } - } - } else { - colors[2].set_noclamp_rgba((colors[0].r + colors[1].r + alternate_rounding) >> 1, (colors[0].g + colors[1].g + alternate_rounding) >> 1, (colors[0].b + colors[1].b + alternate_rounding) >> 1, 255U); - if (m_perceptual) { - for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { - const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; + for (uint pass = 0; pass < num_passes; pass++) + { + // Now separately sweep or probe the low and high colors along the principle axis, both positively and negatively. + // This results in two arrays of candidate low/high endpoints. Every unique combination of candidate endpoints is tried as a potential solution. + // In higher quality modes, the various nearby lattice neighbors of each candidate endpoint are also explored, which allows the current solution to "wobble" or "migrate" + // to areas with lower error. + // This entire process can be repeated up to X times (depending on the quality level) until a local minimum is established. + // This method is very stable and scalable. It could be implemented more elegantly, but I'm now very cautious of touching this code. + if (pass) + { + color_quad_u8 low(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false)); + low_color = vec3F(low.r, low.g, low.b); + color_quad_u8 high(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false)); + high_color = vec3F(high.r, high.g, high.b); + } - uint best_error = color_distance(true, c, colors[0], false); - uint best_color_index = 0; + const uint64 prev_best_error = m_best_solution.m_error; + if (!prev_best_error) + { + break; + } - uint err = color_distance(true, c, colors[1], false); - if (err < best_error) { - best_error = err; - best_color_index = 1; - } + // Sweep low endpoint along principle axis, record positions + int prev_packed_color[2] = { -1, -1 }; + uint num_low_trials = 0; + vec3F initial_probe_low_color(low_color + vec3F(.5f)); + for (uint i = 0; i < probe_range; i++) + { + const int ls = i ? 0 : 1; + int x = pProbe_table[i]; + + for (int s = ls; s < 2; s++) + { + vec3F probe_low_color(initial_probe_low_color + scaled_principle_axis[s] * (float)x); + + int r = math::clamp((int)floor(probe_low_color[0]), 0, 31); + int g = math::clamp((int)floor(probe_low_color[1]), 0, 63); + int b = math::clamp((int)floor(probe_low_color[2]), 0, 31); + + int packed_color = b | (g << 5U) | (r << 11U); + if (packed_color != prev_packed_color[s]) + { + probe_low[num_low_trials++] = packed_color; + prev_packed_color[s] = packed_color; + } + } + } - err = color_distance(true, c, colors[2], false); - if (err < best_error) { - best_error = err; - best_color_index = 2; - } + prev_packed_color[0] = -1; + prev_packed_color[1] = -1; + + // Sweep high endpoint along principle axis, record positions + uint num_high_trials = 0; + vec3F initial_probe_high_color(high_color + vec3F(.5f)); + for (uint i = 0; i < probe_range; i++) + { + const int ls = i ? 0 : 1; + int x = pProbe_table[i]; + + for (int s = ls; s < 2; s++) + { + vec3F probe_high_color(initial_probe_high_color + scaled_principle_axis[s] * (float)x); + + int r = math::clamp((int)floor(probe_high_color[0]), 0, 31); + int g = math::clamp((int)floor(probe_high_color[1]), 0, 63); + int b = math::clamp((int)floor(probe_high_color[2]), 0, 31); + + int packed_color = b | (g << 5U) | (r << 11U); + if (packed_color != prev_packed_color[s]) + { + probe_high[num_high_trials++] = packed_color; + prev_packed_color[s] = packed_color; + } + } + } - trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); - if (trial_error >= m_trial_solution.m_error) - break; + // Now try all unique combinations. + for (uint i = 0; i < num_low_trials; i++) + { + for (uint j = 0; j < num_high_trials; j++) + { + dxt1_solution_coordinates coords((uint16)probe_low[i], (uint16)probe_high[j]); + coords.canonicalize(); + evaluate_solution(coords); + } + } - m_trial_selectors[unique_color_index] = static_cast(best_color_index); - } - } else { - for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { - const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; + if (m_pParams->m_quality >= cCRNDXTQualityNormal) + { + // Generate new candidates by exploring the low color's direct lattice neighbors + color_quad_u8 lc(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false)); + + for (int i = 0; i < 26; i++) + { + int r = lc.r + g_adjacency[i].x; + if ((r < 0) || (r > 31)) + { + continue; + } + + int g = lc.g + g_adjacency[i].y; + if ((g < 0) || (g > 63)) + { + continue; + } + + int b = lc.b + g_adjacency[i].z; + if ((b < 0) || (b > 31)) + { + continue; + } + + dxt1_solution_coordinates coords(dxt1_block::pack_color(r, g, b, false), m_best_solution.m_coords.m_high_color); + coords.canonicalize(); + evaluate_solution(coords); + } + + if (m_pParams->m_quality == cCRNDXTQualityUber) + { + // Generate new candidates by exploring the low color's direct lattice neighbors - this time, explore much further separately on each axis. + lc = dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false); + + for (int a = 0; a < 3; a++) + { + int limit = (a == 1) ? 63 : 31; + + for (int s = -2; s <= 2; s += 4) + { + color_quad_u8 c(lc); + int q = c[a] + s; + if ((q < 0) || (q > limit)) + { + continue; + } + + c[a] = (uint8)q; + + dxt1_solution_coordinates coords(dxt1_block::pack_color(c, false), m_best_solution.m_coords.m_high_color); + coords.canonicalize(); + evaluate_solution(coords); + } + } + } + + // Generate new candidates by exploring the high color's direct lattice neighbors + color_quad_u8 hc(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false)); + + for (int i = 0; i < 26; i++) + { + int r = hc.r + g_adjacency[i].x; + if ((r < 0) || (r > 31)) + { + continue; + } + + int g = hc.g + g_adjacency[i].y; + if ((g < 0) || (g > 63)) + { + continue; + } + + int b = hc.b + g_adjacency[i].z; + if ((b < 0) || (b > 31)) + { + continue; + } + + dxt1_solution_coordinates coords(m_best_solution.m_coords.m_low_color, dxt1_block::pack_color(r, g, b, false)); + coords.canonicalize(); + evaluate_solution(coords); + } + + if (m_pParams->m_quality == cCRNDXTQualityUber) + { + // Generate new candidates by exploring the high color's direct lattice neighbors - this time, explore much further separately on each axis. + hc = dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false); + + for (int a = 0; a < 3; a++) + { + int limit = (a == 1) ? 63 : 31; + + for (int s = -2; s <= 2; s += 4) + { + color_quad_u8 c(hc); + int q = c[a] + s; + if ((q < 0) || (q > limit)) + { + continue; + } + + c[a] = (uint8)q; + + dxt1_solution_coordinates coords(m_best_solution.m_coords.m_low_color, dxt1_block::pack_color(c, false)); + coords.canonicalize(); + evaluate_solution(coords); + } + } + } + } - uint best_error = color_distance(false, c, colors[0], false); - uint best_color_index = 0; + if ((!m_best_solution.m_error) || ((pass) && (m_best_solution.m_error == prev_best_error))) + { + break; + } - uint err = color_distance(false, c, colors[1], false); - if (err < best_error) { - best_error = err; - best_color_index = 1; - } + if (m_pParams->m_quality >= cCRNDXTQualityUber) + { + // Attempt to refine current solution's endpoints given the current selectors using least squares. + refine_solution(1); + } + } - err = color_distance(false, c, colors[2], false); - if (err < best_error) { - best_error = err; - best_color_index = 2; - } + if (m_pParams->m_quality >= cCRNDXTQualityNormal) + { + if ((m_best_solution.m_error) && (!m_pParams->m_pixels_have_alpha)) + { + bool choose_solid_block = false; + if (m_best_solution.are_selectors_all_equal()) + { + // All selectors equal - try various solid-block optimizations + choose_solid_block = try_average_block_as_solid(); + } + + if ((!choose_solid_block) && (m_pParams->m_quality == cCRNDXTQualityUber)) + { + // Per-component 1D endpoint optimization. + optimize_endpoint_comps(); + } + } - trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); - if (trial_error >= m_trial_solution.m_error) - break; + if (m_pParams->m_quality == cCRNDXTQualityUber) + { + if (m_best_solution.m_error) + { + // The pixels may have already been DXTc compressed by another compressor. + // It's usually possible to recover the endpoints used to previously pack the block. + try_combinatorial_encoding(); + } + } + } - m_trial_selectors[unique_color_index] = static_cast(best_color_index); + return_solution(); + + if (m_pParams->m_endpoint_caching) + { + // Remember result for later reruse. + m_prev_results[m_num_prev_results & (cMaxPrevResults - 1)] = m_best_solution.m_coords; + m_num_prev_results++; } - } } - if (trial_error < m_trial_solution.m_error) { - m_trial_solution.m_error = trial_error; - m_trial_solution.m_alpha_block = (block_type != 0); - m_trial_solution.m_selectors = m_trial_selectors; - m_trial_solution.m_alternate_rounding = alternate_rounding; - } - } - - m_trial_solution.m_enforce_selector = !m_trial_solution.m_alpha_block && m_trial_solution.m_coords.m_low_color == m_trial_solution.m_coords.m_high_color; - if (m_trial_solution.m_enforce_selector) { - uint s; - if ((m_trial_solution.m_coords.m_low_color & 31) != 31) { - m_trial_solution.m_coords.m_low_color++; - s = 1; - } else { - m_trial_solution.m_coords.m_high_color--; - s = 0; - } + void dxt1_endpoint_optimizer::handle_multicolor_block() + { + uint num_passes = 1; + vec3F perceptual_weights(1.0f); + + if (m_perceptual) + { + // Compute RGB weighting for use in perceptual mode. + // The more saturated the block, the more the weights deviate from (1,1,1). + float ave_redness = 0; + float ave_blueness = 0; + float ave_l = 0; + + for (uint i = 0; i < m_unique_colors.size(); i++) + { + const color_quad_u8& c = m_unique_colors[i].m_color; + int l = (c.r + c.g + c.b + 1) / 3; + float scale = (float)m_unique_colors[i].m_weight / math::maximum(1.0f, l); + ave_redness += scale * c.r; + ave_blueness += scale * c.b; + ave_l += l; + } - for (uint i = 0; i < m_unique_colors.size(); i++) - m_trial_solution.m_selectors[i] = static_cast(s); - m_trial_solution.m_enforced_selector = s; - } + ave_redness /= m_total_unique_color_weight; + ave_blueness /= m_total_unique_color_weight; + ave_l /= m_total_unique_color_weight; + ave_l = math::minimum(1.0f, ave_l * 16.0f / 255.0f); - if (m_trial_solution.m_error < m_best_solution.m_error) { - m_best_solution = m_trial_solution; - return true; - } + float p = ave_l * powf(math::saturate(math::maximum(ave_redness, ave_blueness) * 1.0f / 3.0f), 2.75f); - return false; -} + if (p >= 1.0f) + { + num_passes = 1; + } + else + { + num_passes = 2; + perceptual_weights = vec3F::lerp(vec3F(.212f, .72f, .072f), perceptual_weights, p); + } + } -bool dxt1_endpoint_optimizer::evaluate_solution_fast(const dxt1_solution_coordinates& coords, bool alternate_rounding) { - m_trial_solution.m_coords = coords; - m_trial_solution.m_selectors.resize(m_unique_colors.size()); - m_trial_solution.m_error = m_best_solution.m_error; - m_trial_solution.m_alpha_block = false; + for (uint pass_index = 0; pass_index < num_passes; pass_index++) + { + compute_vectors(perceptual_weights); + compute_pca(m_principle_axis, m_norm_unique_colors_weighted, vec3F(.2837149f, 0.9540631f, 0.096277453f)); + m_principle_axis[0] /= perceptual_weights[0]; + m_principle_axis[1] /= perceptual_weights[1]; + m_principle_axis[2] /= perceptual_weights[2]; + m_principle_axis.normalize_in_place(); + if (num_passes > 1) + { + // Check for obviously wild principle axes and try to compensate by backing off the component weightings. + if (fabs(m_principle_axis[0]) >= .795f) + { + perceptual_weights.set(.424f, .6f, .072f); + } + else if (fabs(m_principle_axis[2]) >= .795f) + { + perceptual_weights.set(.212f, .6f, .212f); + } + else + { + break; + } + } + } - uint first_block_type = 0; - uint last_block_type = 1; + // Find bounds of projection onto (potentially skewed) principle axis. + float l = 1e+9; + float h = -1e+9; - if ((m_pParams->m_pixels_have_alpha) || (m_pParams->m_force_alpha_blocks)) - first_block_type = 1; - else if (!m_pParams->m_use_alpha_blocks) - last_block_type = 0; + for (uint i = 0; i < m_norm_unique_colors.size(); i++) + { + float d = m_norm_unique_colors[i] * m_principle_axis; + l = math::minimum(l, d); + h = math::maximum(h, d); + } - m_trial_selectors.resize(m_unique_colors.size()); + vec3F low_color(m_mean_norm_color + l * m_principle_axis); + vec3F high_color(m_mean_norm_color + h * m_principle_axis); + + if (!low_color.is_within_bounds(0.0f, 1.0f)) + { + // Low color is outside the lattice, so bring it back in by casting a ray. + vec3F coord; + float t; + aabb3F bounds(vec3F(0.0f), vec3F(1.0f)); + intersection::result res = intersection::ray_aabb(coord, t, ray3F(low_color, m_principle_axis), bounds); + if (res == intersection::cSuccess) + { + low_color = coord; + } + } - color_quad_u8 colors[cDXT1SelectorValues]; - colors[0] = dxt1_block::unpack_color(coords.m_low_color, true); - colors[1] = dxt1_block::unpack_color(coords.m_high_color, true); + if (!high_color.is_within_bounds(0.0f, 1.0f)) + { + // High color is outside the lattice, so bring it back in by casting a ray. + vec3F coord; + float t; + aabb3F bounds(vec3F(0.0f), vec3F(1.0f)); + intersection::result res = intersection::ray_aabb(coord, t, ray3F(high_color, -m_principle_axis), bounds); + if (res == intersection::cSuccess) + { + high_color = coord; + } + } - int vr = colors[1].r - colors[0].r; - int vg = colors[1].g - colors[0].g; - int vb = colors[1].b - colors[0].b; - if (m_perceptual) { - vr *= 8; - vg *= 24; - } + // Now optimize the endpoints using the projection bounds on the (potentially skewed) principle axis as a starting point. + optimize_endpoints(low_color, high_color); + } - int stops[4]; - stops[0] = colors[0].r * vr + colors[0].g * vg + colors[0].b * vb; - stops[1] = colors[1].r * vr + colors[1].g * vg + colors[1].b * vb; + // Tries quantizing the block to 4 colors using vanilla LBG. It tries all combinations of the quantized results as potential endpoints. + bool dxt1_endpoint_optimizer::try_median4(const vec3F& low_color, const vec3F& high_color) + { + vec3F means[4]; - int dirr = vr * 2; - int dirg = vg * 2; - int dirb = vb * 2; + if (m_unique_colors.size() <= 4) + { + for (uint i = 0; i < 4; i++) + { + means[i] = m_norm_unique_colors[math::minimum(m_norm_unique_colors.size() - 1, i)]; + } + } + else + { + means[0] = low_color - m_mean_norm_color; + means[3] = high_color - m_mean_norm_color; + means[1] = vec3F::lerp(means[0], means[3], 1.0f / 3.0f); + means[2] = vec3F::lerp(means[0], means[3], 2.0f / 3.0f); + + fast_random rm; + + const uint cMaxIters = 8; + uint reassign_rover = 0; + float prev_total_dist = math::cNearlyInfinite; + for (uint iter = 0; iter < cMaxIters; iter++) + { + vec3F new_means[4]; + float new_weights[4]; + utils::zero_object(new_means); + utils::zero_object(new_weights); + + float total_dist = 0; + + for (uint i = 0; i < m_unique_colors.size(); i++) + { + const vec3F& v = m_norm_unique_colors[i]; + + float best_dist = means[0].squared_distance(v); + int best_index = 0; + + for (uint j = 1; j < 4; j++) + { + float dist = means[j].squared_distance(v); + if (dist < best_dist) + { + best_dist = dist; + best_index = j; + } + } + + total_dist += best_dist; + + new_means[best_index] += v * (float)m_unique_colors[i].m_weight; + new_weights[best_index] += (float)m_unique_colors[i].m_weight; + } + + uint highest_index = 0; + float highest_weight = 0; + bool empty_cell = false; + for (uint j = 0; j < 4; j++) + { + if (new_weights[j] > 0.0f) + { + means[j] = new_means[j] / new_weights[j]; + if (new_weights[j] > highest_weight) + { + highest_weight = new_weights[j]; + highest_index = j; + } + } + else + { + empty_cell = true; + } + } + + if (!empty_cell) + { + if (fabs(total_dist - prev_total_dist) < .00001f) + { + break; + } + + prev_total_dist = total_dist; + } + else + { + prev_total_dist = math::cNearlyInfinite; + } + + if ((empty_cell) && (iter != (cMaxIters - 1))) + { + const uint ri = (highest_index + reassign_rover) & 3; + reassign_rover++; + + for (uint j = 0; j < 4; j++) + { + if (new_weights[j] == 0.0f) + { + means[j] = means[ri]; + means[j] += vec3F::make_random(rm, -.00196f, .00196f); + } + } + } + } + } - for (uint block_type = first_block_type; block_type <= last_block_type; block_type++) { - uint64 trial_error = 0; + bool improved = false; - if (!block_type) { - colors[2].set_noclamp_rgba((colors[0].r * 2 + colors[1].r + alternate_rounding) / 3, (colors[0].g * 2 + colors[1].g + alternate_rounding) / 3, (colors[0].b * 2 + colors[1].b + alternate_rounding) / 3, 255U); - colors[3].set_noclamp_rgba((colors[1].r * 2 + colors[0].r + alternate_rounding) / 3, (colors[1].g * 2 + colors[0].g + alternate_rounding) / 3, (colors[1].b * 2 + colors[0].b + alternate_rounding) / 3, 255U); + for (uint i = 0; i < 3; i++) + { + for (uint j = i + 1; j < 4; j++) + { + const vec3F v0(means[i] + m_mean_norm_color); + const vec3F v1(means[j] + m_mean_norm_color); - stops[2] = colors[2].r * vr + colors[2].g * vg + colors[2].b * vb; - stops[3] = colors[3].r * vr + colors[3].g * vg + colors[3].b * vb; + dxt1_solution_coordinates sc( + color_quad_u8((int)floor(.5f + v0[0] * 31.0f), (int)floor(.5f + v0[1] * 63.0f), (int)floor(.5f + v0[2] * 31.0f), 255), + color_quad_u8((int)floor(.5f + v1[0] * 31.0f), (int)floor(.5f + v1[1] * 63.0f), (int)floor(.5f + v1[2] * 31.0f), 255), false); - // 0 2 3 1 - int c0Point = stops[1] + stops[3]; - int halfPoint = stops[3] + stops[2]; - int c3Point = stops[2] + stops[0]; + sc.canonicalize(); + improved |= evaluate_solution(sc); + } + } - for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { - const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; + improved |= refine_solution((m_pParams->m_quality == cCRNDXTQualityUber) ? 1 : 0); - int dot = c.r * dirr + c.g * dirg + c.b * dirb; + return improved; + } - uint8 best_color_index; - if (dot < halfPoint) - best_color_index = (dot < c3Point) ? 0 : 2; + // Given candidate low/high endpoints, find the optimal selectors for 3 and 4 color blocks, compute the resulting error, + // and use the candidate if it results in less error than the best found result so far. + bool dxt1_endpoint_optimizer::evaluate_solution(const dxt1_solution_coordinates& coords, bool alternate_rounding) + { + color_quad_u8 c0 = dxt1_block::unpack_color(coords.m_low_color, false); + color_quad_u8 c1 = dxt1_block::unpack_color(coords.m_high_color, false); + uint64 rError = c0.r < c1.r ? m_rDist[c0.r].low + m_rDist[c1.r].high : m_rDist[c0.r].high + m_rDist[c1.r].low; + uint64 gError = c0.g < c1.g ? m_gDist[c0.g].low + m_gDist[c1.g].high : m_gDist[c0.g].high + m_gDist[c1.g].low; + uint64 bError = c0.b < c1.b ? m_bDist[c0.b].low + m_bDist[c1.b].high : m_bDist[c0.b].high + m_bDist[c1.b].low; + if (rError + gError + bError >= m_best_solution.m_error) + { + return false; + } + if (!alternate_rounding) + { + solution_hash_map::insert_result solution_res(m_solutions_tried.insert(coords.m_low_color | coords.m_high_color << 16)); + if (!solution_res.second) + { + return false; + } + } + if (m_evaluate_hc) + { + return m_perceptual ? evaluate_solution_hc_perceptual(coords, alternate_rounding) : evaluate_solution_hc_uniform(coords, alternate_rounding); + } + if (m_pParams->m_quality >= cCRNDXTQualityBetter) + { + return evaluate_solution_uber(coords, alternate_rounding); + } + return evaluate_solution_fast(coords, alternate_rounding); + } + + inline uint dxt1_endpoint_optimizer::color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha) + { + if (perceptual) + { + return color::color_distance(true, e1, e2, alpha); + } + else if (m_pParams->m_grayscale_sampling) + { + // Computes error assuming shader will be converting the result to grayscale. + int y0 = color::RGB_to_Y(e1); + int y1 = color::RGB_to_Y(e2); + int yd = y0 - y1; + if (alpha) + { + int da = (int)e1[3] - (int)e2[3]; + return yd * yd + da * da; + } + else + { + return yd * yd; + } + } else - best_color_index = (dot < c0Point) ? 3 : 1; + { + return color::color_distance(false, e1, e2, alpha); + } + } - uint best_error = color_distance(m_perceptual, c, colors[best_color_index], false); + bool dxt1_endpoint_optimizer::evaluate_solution_uber(const dxt1_solution_coordinates& coords, bool alternate_rounding) + { + m_trial_solution.m_coords = coords; + m_trial_solution.m_selectors.resize(m_unique_colors.size()); + m_trial_solution.m_error = m_best_solution.m_error; + m_trial_solution.m_alpha_block = false; - trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); - if (trial_error >= m_trial_solution.m_error) - break; + uint first_block_type = 0; + uint last_block_type = 1; - m_trial_selectors[unique_color_index] = static_cast(best_color_index); - } - } else { - colors[2].set_noclamp_rgba((colors[0].r + colors[1].r + alternate_rounding) >> 1, (colors[0].g + colors[1].g + alternate_rounding) >> 1, (colors[0].b + colors[1].b + alternate_rounding) >> 1, 255U); + if ((m_pParams->m_pixels_have_alpha) || (m_pParams->m_force_alpha_blocks)) + { + first_block_type = 1; + } + else if (!m_pParams->m_use_alpha_blocks) + { + last_block_type = 0; + } - stops[2] = colors[2].r * vr + colors[2].g * vg + colors[2].b * vb; + m_trial_selectors.resize(m_unique_colors.size()); + + color_quad_u8 colors[cDXT1SelectorValues]; + + colors[0] = dxt1_block::unpack_color(coords.m_low_color, true); + colors[1] = dxt1_block::unpack_color(coords.m_high_color, true); + + for (uint block_type = first_block_type; block_type <= last_block_type; block_type++) + { + uint64 trial_error = 0; + + if (!block_type) + { + colors[2].set_noclamp_rgba((colors[0].r * 2 + colors[1].r + alternate_rounding) / 3, (colors[0].g * 2 + colors[1].g + alternate_rounding) / 3, (colors[0].b * 2 + colors[1].b + alternate_rounding) / 3, 0); + colors[3].set_noclamp_rgba((colors[1].r * 2 + colors[0].r + alternate_rounding) / 3, (colors[1].g * 2 + colors[0].g + alternate_rounding) / 3, (colors[1].b * 2 + colors[0].b + alternate_rounding) / 3, 0); + + if (m_perceptual) + { + for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) + { + const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; + + uint best_error = color_distance(true, c, colors[0], false); + uint best_color_index = 0; + + uint err = color_distance(true, c, colors[1], false); + if (err < best_error) + { + best_error = err; + best_color_index = 1; + } + + err = color_distance(true, c, colors[2], false); + if (err < best_error) + { + best_error = err; + best_color_index = 2; + } + + err = color_distance(true, c, colors[3], false); + if (err < best_error) + { + best_error = err; + best_color_index = 3; + } + + trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); + if (trial_error >= m_trial_solution.m_error) + { + break; + } + + m_trial_selectors[unique_color_index] = static_cast(best_color_index); + } + } + else + { + for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) + { + const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; + + uint best_error = color_distance(false, c, colors[0], false); + uint best_color_index = 0; + + uint err = color_distance(false, c, colors[1], false); + if (err < best_error) + { + best_error = err; + best_color_index = 1; + } + + err = color_distance(false, c, colors[2], false); + if (err < best_error) + { + best_error = err; + best_color_index = 2; + } + + err = color_distance(false, c, colors[3], false); + if (err < best_error) + { + best_error = err; + best_color_index = 3; + } + + trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); + if (trial_error >= m_trial_solution.m_error) + { + break; + } + + m_trial_selectors[unique_color_index] = static_cast(best_color_index); + } + } + } + else + { + colors[2].set_noclamp_rgba((colors[0].r + colors[1].r + alternate_rounding) >> 1, (colors[0].g + colors[1].g + alternate_rounding) >> 1, (colors[0].b + colors[1].b + alternate_rounding) >> 1, 255U); + + if (m_perceptual) + { + for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) + { + const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; + + uint best_error = color_distance(true, c, colors[0], false); + uint best_color_index = 0; + + uint err = color_distance(true, c, colors[1], false); + if (err < best_error) + { + best_error = err; + best_color_index = 1; + } + + err = color_distance(true, c, colors[2], false); + if (err < best_error) + { + best_error = err; + best_color_index = 2; + } + + trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); + if (trial_error >= m_trial_solution.m_error) + { + break; + } + + m_trial_selectors[unique_color_index] = static_cast(best_color_index); + } + } + else + { + for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) + { + const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; + + uint best_error = color_distance(false, c, colors[0], false); + uint best_color_index = 0; + + uint err = color_distance(false, c, colors[1], false); + if (err < best_error) + { + best_error = err; + best_color_index = 1; + } + + err = color_distance(false, c, colors[2], false); + if (err < best_error) + { + best_error = err; + best_color_index = 2; + } + + trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); + if (trial_error >= m_trial_solution.m_error) + { + break; + } + + m_trial_selectors[unique_color_index] = static_cast(best_color_index); + } + } + } - // 0 2 1 - int c02Point = stops[0] + stops[2]; - int c21Point = stops[2] + stops[1]; + if (trial_error < m_trial_solution.m_error) + { + m_trial_solution.m_error = trial_error; + m_trial_solution.m_alpha_block = (block_type != 0); + m_trial_solution.m_selectors = m_trial_selectors; + m_trial_solution.m_alternate_rounding = alternate_rounding; + } + } - for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { - const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; + m_trial_solution.m_enforce_selector = !m_trial_solution.m_alpha_block && m_trial_solution.m_coords.m_low_color == m_trial_solution.m_coords.m_high_color; + if (m_trial_solution.m_enforce_selector) + { + uint s; + if ((m_trial_solution.m_coords.m_low_color & 31) != 31) + { + m_trial_solution.m_coords.m_low_color++; + s = 1; + } + else + { + m_trial_solution.m_coords.m_high_color--; + s = 0; + } - int dot = c.r * dirr + c.g * dirg + c.b * dirb; + for (uint i = 0; i < m_unique_colors.size(); i++) + { + m_trial_solution.m_selectors[i] = static_cast(s); + } + m_trial_solution.m_enforced_selector = s; + } - uint8 best_color_index; - if (dot < c02Point) - best_color_index = 0; - else if (dot < c21Point) - best_color_index = 2; - else - best_color_index = 1; + if (m_trial_solution.m_error < m_best_solution.m_error) + { + m_best_solution = m_trial_solution; + return true; + } + + return false; + } + + bool dxt1_endpoint_optimizer::evaluate_solution_fast(const dxt1_solution_coordinates& coords, bool alternate_rounding) + { + m_trial_solution.m_coords = coords; + m_trial_solution.m_selectors.resize(m_unique_colors.size()); + m_trial_solution.m_error = m_best_solution.m_error; + m_trial_solution.m_alpha_block = false; + + uint first_block_type = 0; + uint last_block_type = 1; + + if ((m_pParams->m_pixels_have_alpha) || (m_pParams->m_force_alpha_blocks)) + { + first_block_type = 1; + } + else if (!m_pParams->m_use_alpha_blocks) + { + last_block_type = 0; + } + + m_trial_selectors.resize(m_unique_colors.size()); + + color_quad_u8 colors[cDXT1SelectorValues]; + colors[0] = dxt1_block::unpack_color(coords.m_low_color, true); + colors[1] = dxt1_block::unpack_color(coords.m_high_color, true); + + int vr = colors[1].r - colors[0].r; + int vg = colors[1].g - colors[0].g; + int vb = colors[1].b - colors[0].b; + if (m_perceptual) + { + vr *= 8; + vg *= 24; + } + + int stops[4]; + stops[0] = colors[0].r * vr + colors[0].g * vg + colors[0].b * vb; + stops[1] = colors[1].r * vr + colors[1].g * vg + colors[1].b * vb; + + int dirr = vr * 2; + int dirg = vg * 2; + int dirb = vb * 2; + + for (uint block_type = first_block_type; block_type <= last_block_type; block_type++) + { + uint64 trial_error = 0; + + if (!block_type) + { + colors[2].set_noclamp_rgba((colors[0].r * 2 + colors[1].r + alternate_rounding) / 3, (colors[0].g * 2 + colors[1].g + alternate_rounding) / 3, (colors[0].b * 2 + colors[1].b + alternate_rounding) / 3, 255U); + colors[3].set_noclamp_rgba((colors[1].r * 2 + colors[0].r + alternate_rounding) / 3, (colors[1].g * 2 + colors[0].g + alternate_rounding) / 3, (colors[1].b * 2 + colors[0].b + alternate_rounding) / 3, 255U); + + stops[2] = colors[2].r * vr + colors[2].g * vg + colors[2].b * vb; + stops[3] = colors[3].r * vr + colors[3].g * vg + colors[3].b * vb; + + // 0 2 3 1 + int c0Point = stops[1] + stops[3]; + int halfPoint = stops[3] + stops[2]; + int c3Point = stops[2] + stops[0]; + + for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) + { + const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; + + int dot = c.r * dirr + c.g * dirg + c.b * dirb; + + uint8 best_color_index; + if (dot >= halfPoint) + { + best_color_index = (dot < c0Point) ? 3 : 1; + } + else + { + best_color_index = (dot < c3Point) ? 0 : 2; + } + + uint best_error = color_distance(m_perceptual, c, colors[best_color_index], false); + + trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); + if (trial_error >= m_trial_solution.m_error) + { + break; + } + + m_trial_selectors[unique_color_index] = static_cast(best_color_index); + } + } + else + { + colors[2].set_noclamp_rgba((colors[0].r + colors[1].r + alternate_rounding) >> 1, (colors[0].g + colors[1].g + alternate_rounding) >> 1, (colors[0].b + colors[1].b + alternate_rounding) >> 1, 255U); + + stops[2] = colors[2].r * vr + colors[2].g * vg + colors[2].b * vb; + + // 0 2 1 + int c02Point = stops[0] + stops[2]; + int c21Point = stops[2] + stops[1]; + + for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) + { + const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; + + int dot = c.r * dirr + c.g * dirg + c.b * dirb; + + uint8 best_color_index; + if (dot < c02Point) + { + best_color_index = 0; + } + else if (dot < c21Point) + { + best_color_index = 2; + } + else + { + best_color_index = 1; + } + + uint best_error = color_distance(m_perceptual, c, colors[best_color_index], false); + + trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); + if (trial_error >= m_trial_solution.m_error) + { + break; + } + + m_trial_selectors[unique_color_index] = static_cast(best_color_index); + } + } - uint best_error = color_distance(m_perceptual, c, colors[best_color_index], false); + if (trial_error < m_trial_solution.m_error) + { + m_trial_solution.m_error = trial_error; + m_trial_solution.m_alpha_block = (block_type != 0); + m_trial_solution.m_selectors = m_trial_selectors; + } + } + + if ((!m_trial_solution.m_alpha_block) && (m_trial_solution.m_coords.m_low_color == m_trial_solution.m_coords.m_high_color)) + { + uint s; + if ((m_trial_solution.m_coords.m_low_color & 31) != 31) + { + m_trial_solution.m_coords.m_low_color++; + s = 1; + } + else + { + m_trial_solution.m_coords.m_high_color--; + s = 0; + } + + for (uint i = 0; i < m_unique_colors.size(); i++) + { + m_trial_solution.m_selectors[i] = static_cast(s); + } + } - trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); - if (trial_error >= m_trial_solution.m_error) - break; + if (m_trial_solution.m_error < m_best_solution.m_error) + { + m_best_solution = m_trial_solution; + return true; + } - m_trial_selectors[unique_color_index] = static_cast(best_color_index); - } + return false; } - if (trial_error < m_trial_solution.m_error) { - m_trial_solution.m_error = trial_error; - m_trial_solution.m_alpha_block = (block_type != 0); - m_trial_solution.m_selectors = m_trial_selectors; + bool dxt1_endpoint_optimizer::evaluate_solution_hc_perceptual(const dxt1_solution_coordinates& coords, bool alternate_rounding) + { + color_quad_u8 c0 = dxt1_block::unpack_color(coords.m_low_color, true); + color_quad_u8 c1 = dxt1_block::unpack_color(coords.m_high_color, true); + color_quad_u8 c2((c0.r * 2 + c1.r + alternate_rounding) / 3, (c0.g * 2 + c1.g + alternate_rounding) / 3, (c0.b * 2 + c1.b + alternate_rounding) / 3, 0); + color_quad_u8 c3((c1.r * 2 + c0.r + alternate_rounding) / 3, (c1.g * 2 + c0.g + alternate_rounding) / 3, (c1.b * 2 + c0.b + alternate_rounding) / 3, 0); + uint64 error = 0; + unique_color* color = m_evaluated_colors.get_ptr(); + for (uint count = m_evaluated_colors.size(); count; color++, error < m_best_solution.m_error ? count-- : count = 0) + { + uint e01 = math::minimum(color::color_distance(true, color->m_color, c0, false), color::color_distance(true, color->m_color, c1, false)); + uint e23 = math::minimum(color::color_distance(true, color->m_color, c2, false), color::color_distance(true, color->m_color, c3, false)); + error += math::minimum(e01, e23) * (uint64)color->m_weight; + } + if (error >= m_best_solution.m_error) + { + return false; + } + m_best_solution.m_coords = coords; + m_best_solution.m_error = error; + m_best_solution.m_alpha_block = false; + m_best_solution.m_alternate_rounding = alternate_rounding; + m_best_solution.m_enforce_selector = m_best_solution.m_coords.m_low_color == m_best_solution.m_coords.m_high_color; + if (m_best_solution.m_enforce_selector) + { + if ((m_best_solution.m_coords.m_low_color & 31) != 31) + { + m_best_solution.m_coords.m_low_color++; + m_best_solution.m_enforced_selector = 1; + } + else + { + m_best_solution.m_coords.m_high_color--; + m_best_solution.m_enforced_selector = 0; + } + } + return true; } - } - - if ((!m_trial_solution.m_alpha_block) && (m_trial_solution.m_coords.m_low_color == m_trial_solution.m_coords.m_high_color)) { - uint s; - if ((m_trial_solution.m_coords.m_low_color & 31) != 31) { - m_trial_solution.m_coords.m_low_color++; - s = 1; - } else { - m_trial_solution.m_coords.m_high_color--; - s = 0; + + bool dxt1_endpoint_optimizer::evaluate_solution_hc_uniform(const dxt1_solution_coordinates& coords, bool alternate_rounding) + { + color_quad_u8 c0 = dxt1_block::unpack_color(coords.m_low_color, true); + color_quad_u8 c1 = dxt1_block::unpack_color(coords.m_high_color, true); + color_quad_u8 c2((c0.r * 2 + c1.r + alternate_rounding) / 3, (c0.g * 2 + c1.g + alternate_rounding) / 3, (c0.b * 2 + c1.b + alternate_rounding) / 3, 0); + color_quad_u8 c3((c1.r * 2 + c0.r + alternate_rounding) / 3, (c1.g * 2 + c0.g + alternate_rounding) / 3, (c1.b * 2 + c0.b + alternate_rounding) / 3, 0); + uint64 error = 0; + unique_color* color = m_evaluated_colors.get_ptr(); + for (uint count = m_evaluated_colors.size(); count; color++, error < m_best_solution.m_error ? count-- : count = 0) + { + uint e01 = math::minimum(color::color_distance(false, color->m_color, c0, false), color::color_distance(false, color->m_color, c1, false)); + uint e23 = math::minimum(color::color_distance(false, color->m_color, c2, false), color::color_distance(false, color->m_color, c3, false)); + error += math::minimum(e01, e23) * (uint64)color->m_weight; + } + if (error >= m_best_solution.m_error) + { + return false; + } + m_best_solution.m_coords = coords; + m_best_solution.m_error = error; + m_best_solution.m_alpha_block = false; + m_best_solution.m_alternate_rounding = alternate_rounding; + m_best_solution.m_enforce_selector = m_best_solution.m_coords.m_low_color == m_best_solution.m_coords.m_high_color; + if (m_best_solution.m_enforce_selector) + { + if ((m_best_solution.m_coords.m_low_color & 31) != 31) + { + m_best_solution.m_coords.m_low_color++; + m_best_solution.m_enforced_selector = 1; + } + else + { + m_best_solution.m_coords.m_high_color--; + m_best_solution.m_enforced_selector = 0; + } + } + return true; } - for (uint i = 0; i < m_unique_colors.size(); i++) - m_trial_solution.m_selectors[i] = static_cast(s); - } - - if (m_trial_solution.m_error < m_best_solution.m_error) { - m_best_solution = m_trial_solution; - return true; - } - - return false; -} - -bool dxt1_endpoint_optimizer::evaluate_solution_hc_perceptual(const dxt1_solution_coordinates& coords, bool alternate_rounding) { - color_quad_u8 c0 = dxt1_block::unpack_color(coords.m_low_color, true); - color_quad_u8 c1 = dxt1_block::unpack_color(coords.m_high_color, true); - color_quad_u8 c2((c0.r * 2 + c1.r + alternate_rounding) / 3, (c0.g * 2 + c1.g + alternate_rounding) / 3, (c0.b * 2 + c1.b + alternate_rounding) / 3, 0); - color_quad_u8 c3((c1.r * 2 + c0.r + alternate_rounding) / 3, (c1.g * 2 + c0.g + alternate_rounding) / 3, (c1.b * 2 + c0.b + alternate_rounding) / 3, 0); - uint64 error = 0; - unique_color* color = m_evaluated_colors.get_ptr(); - for (uint count = m_evaluated_colors.size(); count; color++, error < m_best_solution.m_error ? count-- : count = 0) { - uint e01 = math::minimum(color::color_distance(true, color->m_color, c0, false), color::color_distance(true, color->m_color, c1, false)); - uint e23 = math::minimum(color::color_distance(true, color->m_color, c2, false), color::color_distance(true, color->m_color, c3, false)); - error += math::minimum(e01, e23) * (uint64)color->m_weight; - } - if (error >= m_best_solution.m_error) - return false; - m_best_solution.m_coords = coords; - m_best_solution.m_error = error; - m_best_solution.m_alpha_block = false; - m_best_solution.m_alternate_rounding = alternate_rounding; - m_best_solution.m_enforce_selector = m_best_solution.m_coords.m_low_color == m_best_solution.m_coords.m_high_color; - if (m_best_solution.m_enforce_selector) { - if ((m_best_solution.m_coords.m_low_color & 31) != 31) { - m_best_solution.m_coords.m_low_color++; - m_best_solution.m_enforced_selector = 1; - } else { - m_best_solution.m_coords.m_high_color--; - m_best_solution.m_enforced_selector = 0; + void dxt1_endpoint_optimizer::compute_selectors() + { + if (m_evaluate_hc) + { + compute_selectors_hc(); + } } - } - return true; -} - -bool dxt1_endpoint_optimizer::evaluate_solution_hc_uniform(const dxt1_solution_coordinates& coords, bool alternate_rounding) { - color_quad_u8 c0 = dxt1_block::unpack_color(coords.m_low_color, true); - color_quad_u8 c1 = dxt1_block::unpack_color(coords.m_high_color, true); - color_quad_u8 c2((c0.r * 2 + c1.r + alternate_rounding) / 3, (c0.g * 2 + c1.g + alternate_rounding) / 3, (c0.b * 2 + c1.b + alternate_rounding) / 3, 0); - color_quad_u8 c3((c1.r * 2 + c0.r + alternate_rounding) / 3, (c1.g * 2 + c0.g + alternate_rounding) / 3, (c1.b * 2 + c0.b + alternate_rounding) / 3, 0); - uint64 error = 0; - unique_color* color = m_evaluated_colors.get_ptr(); - for (uint count = m_evaluated_colors.size(); count; color++, error < m_best_solution.m_error ? count-- : count = 0) { - uint e01 = math::minimum(color::color_distance(false, color->m_color, c0, false), color::color_distance(false, color->m_color, c1, false)); - uint e23 = math::minimum(color::color_distance(false, color->m_color, c2, false), color::color_distance(false, color->m_color, c3, false)); - error += math::minimum(e01, e23) * (uint64)color->m_weight; - } - if (error >= m_best_solution.m_error) - return false; - m_best_solution.m_coords = coords; - m_best_solution.m_error = error; - m_best_solution.m_alpha_block = false; - m_best_solution.m_alternate_rounding = alternate_rounding; - m_best_solution.m_enforce_selector = m_best_solution.m_coords.m_low_color == m_best_solution.m_coords.m_high_color; - if (m_best_solution.m_enforce_selector) { - if ((m_best_solution.m_coords.m_low_color & 31) != 31) { - m_best_solution.m_coords.m_low_color++; - m_best_solution.m_enforced_selector = 1; - } else { - m_best_solution.m_coords.m_high_color--; - m_best_solution.m_enforced_selector = 0; + + void dxt1_endpoint_optimizer::compute_selectors_hc() + { + m_best_solution.m_selectors.resize(m_unique_colors.size()); + if (m_best_solution.m_enforce_selector) + { + memset(m_best_solution.m_selectors.get_ptr(), m_best_solution.m_enforced_selector, m_best_solution.m_selectors.size()); + return; + } + color_quad_u8 c0 = dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, true); + color_quad_u8 c1 = dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, true); + color_quad_u8 c2((c0.r * 2 + c1.r + m_best_solution.m_alternate_rounding) / 3, (c0.g * 2 + c1.g + m_best_solution.m_alternate_rounding) / 3, (c0.b * 2 + c1.b + m_best_solution.m_alternate_rounding) / 3, 0); + color_quad_u8 c3((c1.r * 2 + c0.r + m_best_solution.m_alternate_rounding) / 3, (c1.g * 2 + c0.g + m_best_solution.m_alternate_rounding) / 3, (c1.b * 2 + c0.b + m_best_solution.m_alternate_rounding) / 3, 0); + for (uint i = 0, iEnd = m_unique_colors.size(); i < iEnd; i++) + { + const color_quad_u8& c = m_unique_colors[i].m_color; + uint e0 = color::color_distance(m_perceptual, c, c0, false); + uint e1 = color::color_distance(m_perceptual, c, c1, false); + uint e2 = color::color_distance(m_perceptual, c, c2, false); + uint e3 = color::color_distance(m_perceptual, c, c3, false); + uint e01 = math::minimum(e0, e1); + uint e23 = math::minimum(e2, e3); + m_best_solution.m_selectors[i] = e01 <= e23 ? e01 == e0 ? 0 : 1 : e23 == e2 ? 2 + : 3; + } } - } - return true; -} - -void dxt1_endpoint_optimizer::compute_selectors() { - if (m_evaluate_hc) - compute_selectors_hc(); -} - -void dxt1_endpoint_optimizer::compute_selectors_hc() { - m_best_solution.m_selectors.resize(m_unique_colors.size()); - if (m_best_solution.m_enforce_selector) { - memset(m_best_solution.m_selectors.get_ptr(), m_best_solution.m_enforced_selector, m_best_solution.m_selectors.size()); - return; - } - color_quad_u8 c0 = dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, true); - color_quad_u8 c1 = dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, true); - color_quad_u8 c2((c0.r * 2 + c1.r + m_best_solution.m_alternate_rounding) / 3, (c0.g * 2 + c1.g + m_best_solution.m_alternate_rounding) / 3, (c0.b * 2 + c1.b + m_best_solution.m_alternate_rounding) / 3, 0); - color_quad_u8 c3((c1.r * 2 + c0.r + m_best_solution.m_alternate_rounding) / 3, (c1.g * 2 + c0.g + m_best_solution.m_alternate_rounding) / 3, (c1.b * 2 + c0.b + m_best_solution.m_alternate_rounding) / 3, 0); - for (uint i = 0, iEnd = m_unique_colors.size(); i < iEnd; i++) { - const color_quad_u8& c = m_unique_colors[i].m_color; - uint e0 = color::color_distance(m_perceptual, c, c0, false); - uint e1 = color::color_distance(m_perceptual, c, c1, false); - uint e2 = color::color_distance(m_perceptual, c, c2, false); - uint e3 = color::color_distance(m_perceptual, c, c3, false); - uint e01 = math::minimum(e0, e1); - uint e23 = math::minimum(e2, e3); - m_best_solution.m_selectors[i] = e01 <= e23 ? e01 == e0 ? 0 : 1 : e23 == e2 ? 2 : 3; - } -} - -unique_color dxt1_endpoint_optimizer::lerp_color(const color_quad_u8& a, const color_quad_u8& b, float f, int rounding) { - color_quad_u8 res; - - float r = rounding ? 1.0f : 0.0f; - res[0] = static_cast(math::clamp(math::float_to_int(r + math::lerp(a[0], b[0], f)), 0, 255)); - res[1] = static_cast(math::clamp(math::float_to_int(r + math::lerp(a[1], b[1], f)), 0, 255)); - res[2] = static_cast(math::clamp(math::float_to_int(r + math::lerp(a[2], b[2], f)), 0, 255)); - res[3] = 255; - - return unique_color(res, 1); -} - -// The block may have been already compressed using another DXTc compressor, such as squish, ATI_Compress, ryg_dxt, etc. -// Attempt to recover the endpoints used by that block compressor. -void dxt1_endpoint_optimizer::try_combinatorial_encoding() { - if ((m_unique_colors.size() < 2) || (m_unique_colors.size() > 4)) - return; - - m_temp_unique_colors = m_unique_colors; - - if (m_temp_unique_colors.size() == 2) { - // a b c d - // 0.0 1/3 2/3 1.0 - - for (uint k = 0; k < 2; k++) { - for (uint q = 0; q < 2; q++) { - const uint r = q ^ 1; - - // a b - m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 2.0f, k)); - m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 3.0f, k)); - - // a c - m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, .5f, k)); - m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 1.5f, k)); - - // a d - - // b c - m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -1.0f, k)); - m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 2.0f, k)); - - // b d - m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -.5f, k)); - m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, .5f, k)); - - // c d - m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -2.0f, k)); - m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -1.0f, k)); - } + + unique_color dxt1_endpoint_optimizer::lerp_color(const color_quad_u8& a, const color_quad_u8& b, float f, int rounding) + { + color_quad_u8 res; + + float r = rounding ? 1.0f : 0.0f; + res[0] = static_cast(math::clamp(math::float_to_int(r + math::lerp(a[0], b[0], f)), 0, 255)); + res[1] = static_cast(math::clamp(math::float_to_int(r + math::lerp(a[1], b[1], f)), 0, 255)); + res[2] = static_cast(math::clamp(math::float_to_int(r + math::lerp(a[2], b[2], f)), 0, 255)); + res[3] = 255; + + return unique_color(res, 1); } - } else if (m_temp_unique_colors.size() == 3) { - // a b c d - // 0.0 1/3 2/3 1.0 - for (uint i = 0; i <= 2; i++) { - for (uint j = 0; j <= 2; j++) { - if (i == j) - continue; + // The block may have been already compressed using another DXTc compressor, such as squish, ATI_Compress, ryg_dxt, etc. + // Attempt to recover the endpoints used by that block compressor. + void dxt1_endpoint_optimizer::try_combinatorial_encoding() + { + if ((m_unique_colors.size() < 2) || (m_unique_colors.size() > 4)) + { + return; + } + + m_temp_unique_colors = m_unique_colors; + + if (m_temp_unique_colors.size() == 2) + { + // a b c d + // 0.0 1/3 2/3 1.0 + + for (uint k = 0; k < 2; k++) + { + for (uint q = 0; q < 2; q++) + { + const uint r = q ^ 1; + + // a b + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 2.0f, k)); + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 3.0f, k)); + + // a c + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, .5f, k)); + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 1.5f, k)); + + // a d + + // b c + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -1.0f, k)); + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 2.0f, k)); + + // b d + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -.5f, k)); + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, .5f, k)); + + // c d + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -2.0f, k)); + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -1.0f, k)); + } + } + } + else if (m_temp_unique_colors.size() == 3) + { + // a b c d + // 0.0 1/3 2/3 1.0 + + for (uint i = 0; i <= 2; i++) + { + for (uint j = 0; j <= 2; j++) + { + if (i == j) + { + continue; + } + + // a b c + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 1.5f)); + + // a b d + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 2.0f / 3.0f)); + + // a c d + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 1.0f / 3.0f)); + + // b c d + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, -.5f)); + } + } + } + + m_unique_packed_colors.resize(0); - // a b c - m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 1.5f)); + for (uint i = 0; i < m_temp_unique_colors.size(); i++) + { + const color_quad_u8& unique_color = m_temp_unique_colors[i].m_color; + const uint16 packed_color = dxt1_block::pack_color(unique_color, true); - // a b d - m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 2.0f / 3.0f)); + if (std::find(m_unique_packed_colors.begin(), m_unique_packed_colors.end(), packed_color) != m_unique_packed_colors.end()) + { + continue; + } - // a c d - m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 1.0f / 3.0f)); + m_unique_packed_colors.push_back(packed_color); + } - // b c d - m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, -.5f)); - } + for (uint i = 0; m_best_solution.m_error && i < m_unique_packed_colors.size() - 1; i++) + { + for (uint j = i + 1; m_best_solution.m_error && j < m_unique_packed_colors.size(); j++) + { + evaluate_solution(dxt1_solution_coordinates(m_unique_packed_colors[i], m_unique_packed_colors[j])); + } + } + uint64 error = m_best_solution.m_error; + if (error) + { + m_best_solution.m_error = 1; + } + for (uint i = 0; m_best_solution.m_error && i < m_unique_packed_colors.size() - 1; i++) + { + for (uint j = i + 1; m_best_solution.m_error && j < m_unique_packed_colors.size(); j++) + { + evaluate_solution(dxt1_solution_coordinates(m_unique_packed_colors[i], m_unique_packed_colors[j]), true); + } + } + if (m_best_solution.m_error) + { + m_best_solution.m_error = error; + } } - } - m_unique_packed_colors.resize(0); + // The fourth (transparent) color in 3 color "transparent" blocks is black, which can be optionally exploited for small gains in DXT1 mode if the caller + // doesn't actually use alpha. (But not in DXT5 mode, because 3-color blocks aren't permitted by GPU's for DXT5.) + bool dxt1_endpoint_optimizer::try_alpha_as_black_optimization() + { + results* pOrig_results = m_pResults; - for (uint i = 0; i < m_temp_unique_colors.size(); i++) { - const color_quad_u8& unique_color = m_temp_unique_colors[i].m_color; - const uint16 packed_color = dxt1_block::pack_color(unique_color, true); + uint num_dark_colors = 0; - if (std::find(m_unique_packed_colors.begin(), m_unique_packed_colors.end(), packed_color) != m_unique_packed_colors.end()) - continue; + for (uint i = 0; i < m_unique_colors.size(); i++) + { + if ((m_unique_colors[i].m_color[0] <= 4) && (m_unique_colors[i].m_color[1] <= 4) && (m_unique_colors[i].m_color[2] <= 4)) + { + num_dark_colors++; + } + } + + if ((!num_dark_colors) || (num_dark_colors == m_unique_colors.size())) + { + return true; + } - m_unique_packed_colors.push_back(packed_color); - } + params trial_params(*m_pParams); + crnlib::vector trial_colors; + trial_colors.insert(0, m_pParams->m_pPixels, m_pParams->m_num_pixels); - for (uint i = 0; m_best_solution.m_error && i < m_unique_packed_colors.size() - 1; i++) { - for (uint j = i + 1; m_best_solution.m_error && j < m_unique_packed_colors.size(); j++) - evaluate_solution(dxt1_solution_coordinates(m_unique_packed_colors[i], m_unique_packed_colors[j])); - } - uint64 error = m_best_solution.m_error; - if (error) - m_best_solution.m_error = 1; - for (uint i = 0; m_best_solution.m_error && i < m_unique_packed_colors.size() - 1; i++) { - for (uint j = i + 1; m_best_solution.m_error && j < m_unique_packed_colors.size(); j++) - evaluate_solution(dxt1_solution_coordinates(m_unique_packed_colors[i], m_unique_packed_colors[j]), true); - } - if (m_best_solution.m_error) - m_best_solution.m_error = error; - -} + trial_params.m_pPixels = trial_colors.get_ptr(); + trial_params.m_pixels_have_alpha = true; -// The fourth (transparent) color in 3 color "transparent" blocks is black, which can be optionally exploited for small gains in DXT1 mode if the caller -// doesn't actually use alpha. (But not in DXT5 mode, because 3-color blocks aren't permitted by GPU's for DXT5.) -bool dxt1_endpoint_optimizer::try_alpha_as_black_optimization() { - results* pOrig_results = m_pResults; + for (uint i = 0; i < trial_colors.size(); i++) + { + if ((trial_colors[i][0] <= 4) && (trial_colors[i][1] <= 4) && (trial_colors[i][2] <= 4)) + { + trial_colors[i][3] = 0; + } + } - uint num_dark_colors = 0; + results trial_results; - for (uint i = 0; i < m_unique_colors.size(); i++) - if ((m_unique_colors[i].m_color[0] <= 4) && (m_unique_colors[i].m_color[1] <= 4) && (m_unique_colors[i].m_color[2] <= 4)) - num_dark_colors++; + crnlib::vector trial_selectors(m_pParams->m_num_pixels); + trial_results.m_pSelectors = trial_selectors.get_ptr(); - if ((!num_dark_colors) || (num_dark_colors == m_unique_colors.size())) - return true; + compute_internal(trial_params, trial_results); - params trial_params(*m_pParams); - crnlib::vector trial_colors; - trial_colors.insert(0, m_pParams->m_pPixels, m_pParams->m_num_pixels); + CRNLIB_ASSERT(trial_results.m_alpha_block); - trial_params.m_pPixels = trial_colors.get_ptr(); - trial_params.m_pixels_have_alpha = true; + color_quad_u8 c[4]; + dxt1_block::get_block_colors3(c, trial_results.m_low_color, trial_results.m_high_color); - for (uint i = 0; i < trial_colors.size(); i++) - if ((trial_colors[i][0] <= 4) && (trial_colors[i][1] <= 4) && (trial_colors[i][2] <= 4)) - trial_colors[i][3] = 0; + uint64 trial_error = 0; - results trial_results; + for (uint i = 0; i < trial_colors.size(); i++) + { + if (trial_colors[i][3] == 0) + { + CRNLIB_ASSERT(trial_selectors[i] == 3); + } + else + { + CRNLIB_ASSERT(trial_selectors[i] != 3); + } - crnlib::vector trial_selectors(m_pParams->m_num_pixels); - trial_results.m_pSelectors = trial_selectors.get_ptr(); + trial_error += color_distance(m_perceptual, trial_colors[i], c[trial_selectors[i]], false); + } - compute_internal(trial_params, trial_results); + if (trial_error < pOrig_results->m_error) + { + pOrig_results->m_error = trial_error; - CRNLIB_ASSERT(trial_results.m_alpha_block); + pOrig_results->m_low_color = trial_results.m_low_color; + pOrig_results->m_high_color = trial_results.m_high_color; - color_quad_u8 c[4]; - dxt1_block::get_block_colors3(c, trial_results.m_low_color, trial_results.m_high_color); + if (pOrig_results->m_pSelectors) + { + memcpy(pOrig_results->m_pSelectors, trial_results.m_pSelectors, m_pParams->m_num_pixels); + } - uint64 trial_error = 0; + pOrig_results->m_alpha_block = true; + } - for (uint i = 0; i < trial_colors.size(); i++) { - if (trial_colors[i][3] == 0) { - CRNLIB_ASSERT(trial_selectors[i] == 3); - } else { - CRNLIB_ASSERT(trial_selectors[i] != 3); + return true; } - trial_error += color_distance(m_perceptual, trial_colors[i], c[trial_selectors[i]], false); - } - - if (trial_error < pOrig_results->m_error) { - pOrig_results->m_error = trial_error; - - pOrig_results->m_low_color = trial_results.m_low_color; - pOrig_results->m_high_color = trial_results.m_high_color; - - if (pOrig_results->m_pSelectors) - memcpy(pOrig_results->m_pSelectors, trial_results.m_pSelectors, m_pParams->m_num_pixels); - - pOrig_results->m_alpha_block = true; - } - - return true; -} - -void dxt1_endpoint_optimizer::compute_internal(const params& p, results& r) { - m_pParams = &p; - m_pResults = &r; - m_evaluate_hc = m_pParams->m_quality == cCRNDXTQualityUber && !m_pParams->m_pixels_have_alpha && !m_pParams->m_force_alpha_blocks - && !m_pParams->m_use_alpha_blocks && !m_pParams->m_grayscale_sampling; - m_perceptual = m_pParams->m_perceptual && !m_pParams->m_grayscale_sampling; - if (m_unique_color_hash_map.get_table_size() > 8192) - m_unique_color_hash_map.clear(); - else - m_unique_color_hash_map.reset(); - if (m_solutions_tried.get_table_size() > 8192) - m_solutions_tried.clear(); - else - m_solutions_tried.reset(); - m_unique_colors.clear(); - m_norm_unique_colors.clear(); - m_mean_norm_color.clear(); - m_norm_unique_colors_weighted.clear(); - m_mean_norm_color_weighted.clear(); - m_principle_axis.clear(); - m_best_solution.clear(); - - m_total_unique_color_weight = 0; - m_unique_colors.reserve(m_pParams->m_num_pixels); - unique_color color(color_quad_u8(0), 1); - for (uint i = 0; i < m_pParams->m_num_pixels; i++) { - if (!m_pParams->m_pixels_have_alpha || m_pParams->m_pPixels[i].a >= m_pParams->m_dxt1a_alpha_threshold) { - color.m_color.m_u32 = m_pParams->m_pPixels[i].m_u32 | 0xFF000000; - unique_color_hash_map::insert_result ins_result(m_unique_color_hash_map.insert(color.m_color.m_u32, m_unique_colors.size())); - if (ins_result.second) { - m_unique_colors.push_back(color); - } else { - m_unique_colors[ins_result.first->second].m_weight++; - } - m_total_unique_color_weight++; - } - } - m_has_transparent_pixels = m_total_unique_color_weight != m_pParams->m_num_pixels; - m_evaluated_colors = m_unique_colors; - - struct { - uint64 weight, weightedColor, weightedSquaredColor; - } rPlane[32] = {}, gPlane[64] = {}, bPlane[32] = {}; - - for (uint i = 0; i < m_unique_colors.size(); i++) { - const unique_color& color = m_unique_colors[i]; - uint8 R = color.m_color.r, r = (R >> 3) + ((R & 7) > (R >> 5) ? 1 : 0); - rPlane[r].weight += color.m_weight; - rPlane[r].weightedColor += (uint64)color.m_weight * R; - rPlane[r].weightedSquaredColor += (uint64)color.m_weight * R * R; - uint8 G = color.m_color.g, g = (G >> 2) + ((G & 3) > (G >> 6) ? 1 : 0); - gPlane[g].weight += color.m_weight; - gPlane[g].weightedColor += (uint64)color.m_weight * G; - gPlane[g].weightedSquaredColor += (uint64)color.m_weight * G * G; - uint8 B = color.m_color.b, b = (B >> 3) + ((B & 7) > (B >> 5) ? 1 : 0); - bPlane[b].weight += color.m_weight; - bPlane[b].weightedColor += (uint64)color.m_weight * B; - bPlane[b].weightedSquaredColor += (uint64)color.m_weight * B * B; - } - - if (m_perceptual) { - for (uint c = 0; c < 32; c++) { - rPlane[c].weight *= 8; - rPlane[c].weightedColor *= 8; - rPlane[c].weightedSquaredColor *= 8; - } - for (uint c = 0; c < 64; c++) { - gPlane[c].weight *= 25; - gPlane[c].weightedColor *= 25; - gPlane[c].weightedSquaredColor *= 25; + void dxt1_endpoint_optimizer::compute_internal(const params& p, results& r) + { + m_pParams = &p; + m_pResults = &r; + m_evaluate_hc = m_pParams->m_quality == cCRNDXTQualityUber && !m_pParams->m_pixels_have_alpha && !m_pParams->m_force_alpha_blocks && !m_pParams->m_use_alpha_blocks && !m_pParams->m_grayscale_sampling; + m_perceptual = m_pParams->m_perceptual && !m_pParams->m_grayscale_sampling; + if (m_unique_color_hash_map.get_table_size() > 8192) + { + m_unique_color_hash_map.clear(); + } + else + { + m_unique_color_hash_map.reset(); + } + if (m_solutions_tried.get_table_size() > 8192) + { + m_solutions_tried.clear(); + } + else + { + m_solutions_tried.reset(); + } + m_unique_colors.clear(); + m_norm_unique_colors.clear(); + m_mean_norm_color.clear(); + m_norm_unique_colors_weighted.clear(); + m_mean_norm_color_weighted.clear(); + m_principle_axis.clear(); + m_best_solution.clear(); + + m_total_unique_color_weight = 0; + m_unique_colors.reserve(m_pParams->m_num_pixels); + unique_color color(color_quad_u8(0), 1); + for (uint i = 0; i < m_pParams->m_num_pixels; i++) + { + if (!m_pParams->m_pixels_have_alpha || m_pParams->m_pPixels[i].a >= m_pParams->m_dxt1a_alpha_threshold) + { + color.m_color.m_u32 = m_pParams->m_pPixels[i].m_u32 | 0xFF000000; + unique_color_hash_map::insert_result ins_result(m_unique_color_hash_map.insert(color.m_color.m_u32, m_unique_colors.size())); + if (ins_result.second) + { + m_unique_colors.push_back(color); + } + else + { + m_unique_colors[ins_result.first->second].m_weight++; + } + m_total_unique_color_weight++; + } + } + m_has_transparent_pixels = m_total_unique_color_weight != m_pParams->m_num_pixels; + m_evaluated_colors = m_unique_colors; + + struct + { + uint64 weight, weightedColor, weightedSquaredColor; + } rPlane[32] = {}, gPlane[64] = {}, bPlane[32] = {}; + + for (uint i = 0; i < m_unique_colors.size(); i++) + { + const unique_color& color = m_unique_colors[i]; + uint8 R = color.m_color.r, r = (R >> 3) + ((R & 7) > (R >> 5) ? 1 : 0); + rPlane[r].weight += color.m_weight; + rPlane[r].weightedColor += (uint64)color.m_weight * R; + rPlane[r].weightedSquaredColor += (uint64)color.m_weight * R * R; + uint8 G = color.m_color.g, g = (G >> 2) + ((G & 3) > (G >> 6) ? 1 : 0); + gPlane[g].weight += color.m_weight; + gPlane[g].weightedColor += (uint64)color.m_weight * G; + gPlane[g].weightedSquaredColor += (uint64)color.m_weight * G * G; + uint8 B = color.m_color.b, b = (B >> 3) + ((B & 7) > (B >> 5) ? 1 : 0); + bPlane[b].weight += color.m_weight; + bPlane[b].weightedColor += (uint64)color.m_weight * B; + bPlane[b].weightedSquaredColor += (uint64)color.m_weight * B * B; + } + + if (m_perceptual) + { + for (uint c = 0; c < 32; c++) + { + rPlane[c].weight *= 8; + rPlane[c].weightedColor *= 8; + rPlane[c].weightedSquaredColor *= 8; + } + for (uint c = 0; c < 64; c++) + { + gPlane[c].weight *= 25; + gPlane[c].weightedColor *= 25; + gPlane[c].weightedSquaredColor *= 25; + } + } + + for (uint c = 1; c < 32; c++) + { + rPlane[c].weight += rPlane[c - 1].weight; + rPlane[c].weightedColor += rPlane[c - 1].weightedColor; + rPlane[c].weightedSquaredColor += rPlane[c - 1].weightedSquaredColor; + bPlane[c].weight += bPlane[c - 1].weight; + bPlane[c].weightedColor += bPlane[c - 1].weightedColor; + bPlane[c].weightedSquaredColor += bPlane[c - 1].weightedSquaredColor; + } + + for (uint c = 1; c < 64; c++) + { + gPlane[c].weight += gPlane[c - 1].weight; + gPlane[c].weightedColor += gPlane[c - 1].weightedColor; + gPlane[c].weightedSquaredColor += gPlane[c - 1].weightedSquaredColor; + } + + for (uint c = 0; c < 32; c++) + { + uint8 C = c << 3 | c >> 2; + m_rDist[c].low = rPlane[c].weightedSquaredColor + C * C * rPlane[c].weight - 2 * C * rPlane[c].weightedColor; + m_rDist[c].high = rPlane[31].weightedSquaredColor + C * C * rPlane[31].weight - 2 * C * rPlane[31].weightedColor - m_rDist[c].low; + m_bDist[c].low = bPlane[c].weightedSquaredColor + C * C * bPlane[c].weight - 2 * C * bPlane[c].weightedColor; + m_bDist[c].high = bPlane[31].weightedSquaredColor + C * C * bPlane[31].weight - 2 * C * bPlane[31].weightedColor - m_bDist[c].low; + } + + for (uint c = 0; c < 64; c++) + { + uint8 C = c << 2 | c >> 4; + m_gDist[c].low = gPlane[c].weightedSquaredColor + C * C * gPlane[c].weight - 2 * C * gPlane[c].weightedColor; + m_gDist[c].high = gPlane[63].weightedSquaredColor + C * C * gPlane[63].weight - 2 * C * gPlane[63].weightedColor - m_gDist[c].low; + } + + if (!m_unique_colors.size()) + { + m_pResults->m_low_color = 0; + m_pResults->m_high_color = 0; + m_pResults->m_alpha_block = true; + memset(m_pResults->m_pSelectors, 3, m_pParams->m_num_pixels); + } + else if (m_unique_colors.size() == 1 && !m_has_transparent_pixels) + { + int r = m_unique_colors[0].m_color.r; + int g = m_unique_colors[0].m_color.g; + int b = m_unique_colors[0].m_color.b; + uint low_color = (ryg_dxt::OMatch5[r][0] << 11) | (ryg_dxt::OMatch6[g][0] << 5) | ryg_dxt::OMatch5[b][0]; + uint high_color = (ryg_dxt::OMatch5[r][1] << 11) | (ryg_dxt::OMatch6[g][1] << 5) | ryg_dxt::OMatch5[b][1]; + evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); + if (m_pParams->m_use_alpha_blocks && m_best_solution.m_error) + { + low_color = (ryg_dxt::OMatch5_3[r][0] << 11) | (ryg_dxt::OMatch6_3[g][0] << 5) | ryg_dxt::OMatch5_3[b][0]; + high_color = (ryg_dxt::OMatch5_3[r][1] << 11) | (ryg_dxt::OMatch6_3[g][1] << 5) | ryg_dxt::OMatch5_3[b][1]; + evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); + } + return_solution(); + } + else + { + handle_multicolor_block(); + } } - } - - for (uint c = 1; c < 32; c++) { - rPlane[c].weight += rPlane[c - 1].weight; - rPlane[c].weightedColor += rPlane[c - 1].weightedColor; - rPlane[c].weightedSquaredColor += rPlane[c - 1].weightedSquaredColor; - bPlane[c].weight += bPlane[c - 1].weight; - bPlane[c].weightedColor += bPlane[c - 1].weightedColor; - bPlane[c].weightedSquaredColor += bPlane[c - 1].weightedSquaredColor; - } - - for (uint c = 1; c < 64; c++) { - gPlane[c].weight += gPlane[c - 1].weight; - gPlane[c].weightedColor += gPlane[c - 1].weightedColor; - gPlane[c].weightedSquaredColor += gPlane[c - 1].weightedSquaredColor; - } - - for (uint c = 0; c < 32; c++) { - uint8 C = c << 3 | c >> 2; - m_rDist[c].low = rPlane[c].weightedSquaredColor + C * C * rPlane[c].weight - 2 * C * rPlane[c].weightedColor; - m_rDist[c].high = rPlane[31].weightedSquaredColor + C * C * rPlane[31].weight - 2 * C * rPlane[31].weightedColor - m_rDist[c].low; - m_bDist[c].low = bPlane[c].weightedSquaredColor + C * C * bPlane[c].weight - 2 * C * bPlane[c].weightedColor; - m_bDist[c].high = bPlane[31].weightedSquaredColor + C * C * bPlane[31].weight - 2 * C * bPlane[31].weightedColor - m_bDist[c].low; - } - - for (uint c = 0; c < 64; c++) { - uint8 C = c << 2 | c >> 4; - m_gDist[c].low = gPlane[c].weightedSquaredColor + C * C * gPlane[c].weight - 2 * C * gPlane[c].weightedColor; - m_gDist[c].high = gPlane[63].weightedSquaredColor + C * C * gPlane[63].weight - 2 * C * gPlane[63].weightedColor - m_gDist[c].low; - } - - if (!m_unique_colors.size()) { - m_pResults->m_low_color = 0; - m_pResults->m_high_color = 0; - m_pResults->m_alpha_block = true; - memset(m_pResults->m_pSelectors, 3, m_pParams->m_num_pixels); - } else if (m_unique_colors.size() == 1 && !m_has_transparent_pixels) { - int r = m_unique_colors[0].m_color.r; - int g = m_unique_colors[0].m_color.g; - int b = m_unique_colors[0].m_color.b; - uint low_color = (ryg_dxt::OMatch5[r][0] << 11) | (ryg_dxt::OMatch6[g][0] << 5) | ryg_dxt::OMatch5[b][0]; - uint high_color = (ryg_dxt::OMatch5[r][1] << 11) | (ryg_dxt::OMatch6[g][1] << 5) | ryg_dxt::OMatch5[b][1]; - evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); - if (m_pParams->m_use_alpha_blocks && m_best_solution.m_error) { - low_color = (ryg_dxt::OMatch5_3[r][0] << 11) | (ryg_dxt::OMatch6_3[g][0] << 5) | ryg_dxt::OMatch5_3[b][0]; - high_color = (ryg_dxt::OMatch5_3[r][1] << 11) | (ryg_dxt::OMatch6_3[g][1] << 5) | ryg_dxt::OMatch5_3[b][1]; - evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); + + bool dxt1_endpoint_optimizer::compute(const params& p, results& r) + { + if (!p.m_pPixels) + { + return false; + } + compute_internal(p, r); + if (m_pParams->m_use_alpha_blocks && m_pParams->m_use_transparent_indices_for_black && !m_pParams->m_pixels_have_alpha) + { + return try_alpha_as_black_optimization(); + } + return true; } - return_solution(); - } else { - handle_multicolor_block(); - } -} - -bool dxt1_endpoint_optimizer::compute(const params& p, results& r) { - if (!p.m_pPixels) - return false; - compute_internal(p, r); - if (m_pParams->m_use_alpha_blocks && m_pParams->m_use_transparent_indices_for_black && !m_pParams->m_pixels_have_alpha) - return try_alpha_as_black_optimization(); - return true; -} - -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_dxt1.h b/crnlib/crn_dxt1.h index 8a9733b..529da42 100644 --- a/crnlib/crn_dxt1.h +++ b/crnlib/crn_dxt1.h @@ -1,5 +1,26 @@ -// File: crn_dxt1.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_dxt.h" @@ -9,19 +30,19 @@ namespace crnlib { struct CRN_EXPORT dxt1_solution_coordinates { - inline dxt1_solution_coordinates(): + inline dxt1_solution_coordinates() : m_low_color(0), m_high_color(0) { } - inline dxt1_solution_coordinates(uint16 l, uint16 h): + inline dxt1_solution_coordinates(uint16 l, uint16 h) : m_low_color(l), m_high_color(h) { } - inline dxt1_solution_coordinates(const color_quad_u8& l, const color_quad_u8& h, bool scaled = true): + inline dxt1_solution_coordinates(const color_quad_u8& l, const color_quad_u8& h, bool scaled = true) : m_low_color(dxt1_block::pack_color(l, scaled)), m_high_color(dxt1_block::pack_color(h, scaled)) { @@ -117,7 +138,7 @@ namespace crnlib inline unique_color() { } - inline unique_color(const color_quad_u8& color, uint weight): + inline unique_color(const color_quad_u8& color, uint weight) : m_color(color), m_weight(weight) { @@ -146,7 +167,7 @@ namespace crnlib struct params { - params(): + params() : m_block_index(0), m_pPixels(nullptr), m_num_pixels(0), @@ -181,7 +202,8 @@ namespace crnlib struct results { - inline results(): m_pSelectors(nullptr) + inline results() : + m_pSelectors(nullptr) { } @@ -213,11 +235,12 @@ namespace crnlib typedef crnlib::hash_map unique_color_hash_map; unique_color_hash_map m_unique_color_hash_map; - unique_color_vec m_unique_colors; // excludes transparent colors! + unique_color_vec m_unique_colors; // excludes transparent colors! unique_color_vec m_evaluated_colors; unique_color_vec m_temp_unique_colors; - struct { + struct + { uint64 low, high; } m_rDist[32], m_gDist[64], m_bDist[32]; @@ -239,7 +262,10 @@ namespace crnlib crnlib::vector m_low_coords; crnlib::vector m_high_coords; - enum { cMaxPrevResults = 4 }; + enum + { + cMaxPrevResults = 4 + }; dxt1_solution_coordinates m_prev_results[cMaxPrevResults]; uint m_num_prev_results; @@ -248,7 +274,7 @@ namespace crnlib struct potential_solution { - potential_solution(): + potential_solution() : m_coords(), m_error(cUINT64_MAX), m_alpha_block(false) @@ -311,7 +337,7 @@ namespace crnlib void compute_vectors(const vec3F& perceptual_weights); void return_solution(); void try_combinatorial_encoding(); - void compute_endpoint_component_errors(uint comp_index, uint64(&error)[4][256], uint64(&best_remaining_error)[4]); + void compute_endpoint_component_errors(uint comp_index, uint64 (&error)[4][256], uint64 (&best_remaining_error)[4]); void optimize_endpoint_comps(); void optimize_endpoints(vec3F& low_color, vec3F& high_color); bool try_alpha_as_black_optimization(); @@ -324,4 +350,4 @@ namespace crnlib inline uint color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha); }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_dxt5a.cpp b/crnlib/crn_dxt5a.cpp index 5267570..a288136 100644 --- a/crnlib/crn_dxt5a.cpp +++ b/crnlib/crn_dxt5a.cpp @@ -1,5 +1,26 @@ -// File: crn_dxt5a.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_dxt5a.h" #include "crn_ryg_dxt.hpp" @@ -8,9 +29,10 @@ namespace crnlib { - dxt5_endpoint_optimizer::dxt5_endpoint_optimizer(): + dxt5_endpoint_optimizer::dxt5_endpoint_optimizer() : m_pParams(nullptr), - m_pResults(nullptr) { + m_pResults(nullptr) + { m_unique_values.reserve(16); m_unique_value_weights.reserve(16); } @@ -151,7 +173,7 @@ namespace crnlib } } } - else if (!(m_pResults->m_first_endpoint > m_pResults->m_second_endpoint)) + else if (m_pResults->m_first_endpoint <= m_pResults->m_second_endpoint) { std::swap(m_pResults->m_first_endpoint, m_pResults->m_second_endpoint); m_pResults->m_reordered = true; @@ -239,4 +261,4 @@ namespace crnlib } } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_dxt5a.h b/crnlib/crn_dxt5a.h index 49abc3b..06d67b1 100644 --- a/crnlib/crn_dxt5a.h +++ b/crnlib/crn_dxt5a.h @@ -1,5 +1,26 @@ -// File: crn_dxt5a.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_dxt.h" @@ -14,7 +35,7 @@ namespace crnlib struct params { - params(): + params() : m_block_index(0), m_pPixels(nullptr), m_num_pixels(0), @@ -44,7 +65,7 @@ namespace crnlib uint8 m_first_endpoint; uint8 m_second_endpoint; - uint8 m_block_type; // 1 if 6-alpha, otherwise 8-alpha + uint8 m_block_type; // 1 if 6-alpha, otherwise 8-alpha bool m_reordered; }; @@ -66,4 +87,4 @@ namespace crnlib void evaluate_solution(uint low_endpoint, uint high_endpoint); }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_dxt_endpoint_refiner.cpp b/crnlib/crn_dxt_endpoint_refiner.cpp index e441942..1b95195 100644 --- a/crnlib/crn_dxt_endpoint_refiner.cpp +++ b/crnlib/crn_dxt_endpoint_refiner.cpp @@ -1,5 +1,25 @@ -// File: crn_dxt_endpoint_refiner.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "crn_dxt_endpoint_refiner.h" @@ -140,7 +160,8 @@ namespace crnlib uint16 solutions[529]; uint solutions_count = 0; - solutions[solutions_count++] = L0 == H0 ? H0 ? H0 - 1 << 8 | L0 : 1 : L0 > H0 ? H0 << 8 | L0 : L0 << 8 | H0; + solutions[solutions_count++] = L0 == H0 ? H0 ? H0 - 1 << 8 | L0 : 1 : L0 > H0 ? H0 << 8 | L0 + : L0 << 8 | H0; uint8 minL = L0 <= 11 ? 0 : L0 - 11, maxL = L0 >= 244 ? 255 : L0 + 11; uint8 minH = H0 <= 11 ? 0 : H0 - 11, maxH = H0 >= 244 ? 255 : H0 + 11; for (uint16 L = minL; L <= maxL; L++) @@ -149,7 +170,8 @@ namespace crnlib { if ((maxH < L || L <= H || H < minL) && (L != L0 || H != H0) && (L != H0 || H != L0)) { - solutions[solutions_count++] = L == H ? H ? H - 1 << 8 | L : 1 : L > H ? H << 8 | L : L << 8 | H; + solutions[solutions_count++] = L == H ? H ? H - 1 << 8 | L : 1 : L > H ? H << 8 | L + : L << 8 | H; } } } @@ -242,8 +264,16 @@ namespace crnlib uint16 H = solutions[i] >> 16; if (L == H) { - L += !preserveL ? ~L & 0x1F ? 0x1 : ~L & 0xF800 ? 0x800 : ~L & 0x7E0 ? 0x20 : 0 : !L ? 0x1 : 0; - H -= preserveL ? H & 0x1F ? 0x1 : H & 0xF800 ? 0x800 : H & 0x7E0 ? 0x20 : 0 : H == 0xFFFF ? 0x1 : 0; + L += !preserveL ? ~L & 0x1F ? 0x1 : ~L & 0xF800 ? 0x800 + : ~L & 0x7E0 ? 0x20 + : 0 + : !L ? 0x1 + : 0; + H -= preserveL ? H & 0x1F ? 0x1 : H & 0xF800 ? 0x800 + : H & 0x7E0 ? 0x20 + : 0 + : H == 0xFFFF ? 0x1 + : 0; } color_quad_u8 block_colors[4]; dxt1_block::get_block_colors4(block_colors, L, H); @@ -270,4 +300,4 @@ namespace crnlib } } } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_dxt_endpoint_refiner.h b/crnlib/crn_dxt_endpoint_refiner.h index fc939bd..7308120 100644 --- a/crnlib/crn_dxt_endpoint_refiner.h +++ b/crnlib/crn_dxt_endpoint_refiner.h @@ -1,5 +1,25 @@ -// File: crn_dxt_endpoint_refiner.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -13,8 +33,9 @@ namespace crnlib public: dxt_endpoint_refiner(); - struct params { - params(): + struct params + { + params() : m_block_index(0), m_pPixels(nullptr), m_num_pixels(0), @@ -59,4 +80,4 @@ namespace crnlib void optimize_dxt1(vec3F low_color, vec3F high_color); void optimize_dxt5(vec3F low_color, vec3F high_color); }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_dxt_fast.cpp b/crnlib/crn_dxt_fast.cpp index 496c2d9..31acfb5 100644 --- a/crnlib/crn_dxt_fast.cpp +++ b/crnlib/crn_dxt_fast.cpp @@ -1,5 +1,26 @@ -// File: crn_dxt_fast.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + // Parts of this module are derived from RYG's excellent public domain DXTx compressor. #include "crn_core.h" @@ -49,7 +70,7 @@ namespace crnlib unpack_color(pColors[0], c0); unpack_color(pColors[1], c1); -#if 0 +#if 0 lerp_color(pColors[2], pColors[0], pColors[1], 0x55); lerp_color(pColors[3], pColors[0], pColors[1], 0xAA); #else @@ -114,7 +135,8 @@ namespace crnlib return status; } - static bool optimize_block_colors(uint n, const color_quad_u8* block, uint& max16, uint& min16, uint ave_color[3], float axis[3]) { + static bool optimize_block_colors(uint n, const color_quad_u8* block, uint& max16, uint& min16, uint ave_color[3], float axis[3]) + { int min[3], max[3]; for (uint ch = 0; ch < 3; ch++) @@ -189,7 +211,7 @@ namespace crnlib double magn = math::maximum(math::maximum(fabs(vfr), fabs(vfg)), fabs(vfb)); int v_r, v_g, v_b; - if (magn < 4.0f) // too small, default to luminance + if (magn < 4.0f) // too small, default to luminance { v_r = 148; v_g = 300; @@ -329,7 +351,8 @@ namespace crnlib return false; } - static uint64 determine_error(uint n, const color_quad_u8* block, uint min16, uint max16, uint64 early_out_error) { + static uint64 determine_error(uint n, const color_quad_u8* block, uint min16, uint max16, uint64 early_out_error) + { color_quad_u8 color[4]; eval_colors(color, min16, max16); @@ -523,10 +546,10 @@ namespace crnlib trial_optimized = true; } - } // t + } // t - } // e - } // axis + } // e + } // axis if (!trial_optimized) { @@ -535,7 +558,7 @@ namespace crnlib optimized = true; - } // for ( ; ; ) + } // for ( ; ; ) return optimized; } @@ -608,7 +631,10 @@ namespace crnlib uint best_l = low16; uint best_h = high16; - enum { cMaxHash = 4 }; + enum + { + cMaxHash = 4 + }; uint64 hash[cMaxHash]; for (uint i = 0; i < cMaxHash; i++) { @@ -678,7 +704,7 @@ namespace crnlib break; } - } // iter + } // iter //uint64 end_error = determine_error(n, pBlock, low16, high16, UINT64_MAX); //if (end_error > orig_error) DebugBreak(); @@ -967,5 +993,5 @@ namespace crnlib lo = lo_color; hi = hi_color; } - } // namespace dxt_fast -} // namespace crnlib + } // namespace dxt_fast +} // namespace crnlib diff --git a/crnlib/crn_dxt_fast.h b/crnlib/crn_dxt_fast.h index 46e972b..65d588d 100644 --- a/crnlib/crn_dxt_fast.h +++ b/crnlib/crn_dxt_fast.h @@ -1,5 +1,25 @@ -// File: crn_dxt_fast.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -18,5 +38,5 @@ namespace crnlib CRN_EXPORT void compress_alpha_block(dxt5_block* pDXT5_block, const color_quad_u8* pBlock, uint comp_index); CRN_EXPORT void find_representative_colors(uint n, const color_quad_u8* pBlock, color_quad_u8& lo, color_quad_u8& hi); - } // namespace dxt_fast -} // namespace crnlib + } // namespace dxt_fast +} // namespace crnlib diff --git a/crnlib/crn_dxt_hc.cpp b/crnlib/crn_dxt_hc.cpp index 60fdd8a..1d24f4d 100644 --- a/crnlib/crn_dxt_hc.cpp +++ b/crnlib/crn_dxt_hc.cpp @@ -1,5 +1,26 @@ -// File: crn_dxt_hc.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_dxt_hc.h" #include "crn_image_utils.h" @@ -7,1310 +28,1722 @@ #include "crn_dxt_fast.h" #include "crn_etc.h" -namespace crnlib { - -typedef vec<6, float> vec6F; -typedef vec<16, float> vec16F; - -static uint8 g_tile_map[8][2][2] = { - {{ 0, 0 }, { 0, 0 }}, - {{ 0, 0 }, { 1, 1 }}, - {{ 0, 1 }, { 0, 1 }}, - {{ 0, 0 }, { 1, 2 }}, - {{ 1, 2 }, { 0, 0 }}, - {{ 0, 1 }, { 0, 2 }}, - {{ 1, 0 }, { 2, 0 }}, - {{ 0, 1 }, { 2, 3 }}, -}; - -dxt_hc::dxt_hc() - : m_num_blocks(0), - m_has_color_blocks(false), - m_has_etc_color_blocks(false), - m_has_subblocks(false), - m_num_alpha_blocks(0), - m_main_thread_id(crn_get_current_thread_id()), - m_canceled(false), - m_pTask_pool(nullptr), - m_prev_phase_index(-1), - m_prev_percentage_complete(-1) { -} - -dxt_hc::~dxt_hc() { -} - -void dxt_hc::clear() { - m_blocks = 0; - m_num_blocks = 0; - m_num_alpha_blocks = 0; - m_has_color_blocks = false; - - m_color_clusters.clear(); - m_alpha_clusters.clear(); - - m_canceled = false; - - m_prev_phase_index = -1; - m_prev_percentage_complete = -1; - - m_block_weights.clear(); - m_block_encodings.clear(); - for (uint c = 0; c < 3; c++) - m_block_selectors[c].clear(); - m_color_selectors.clear(); - m_alpha_selectors.clear(); - m_color_selectors_used.clear(); - m_alpha_selectors_used.clear(); - m_tile_indices.clear(); - m_endpoint_indices.clear(); - m_selector_indices.clear(); - m_tiles.clear(); - m_num_tiles = 0; -} - -bool dxt_hc::compress( - color_quad_u8 (*blocks)[16], - crnlib::vector& endpoint_indices, - crnlib::vector& selector_indices, - crnlib::vector& color_endpoints, - crnlib::vector& alpha_endpoints, - crnlib::vector& color_selectors, - crnlib::vector& alpha_selectors, - const params& p - ) { - clear(); - m_has_etc_color_blocks = p.m_format == cETC1 || p.m_format == cETC2 || p.m_format == cETC2A || p.m_format == cETC1S || p.m_format == cETC2AS; - m_has_subblocks = p.m_format == cETC1 || p.m_format == cETC2 || p.m_format == cETC2A; - m_has_color_blocks = p.m_format == cDXT1 || p.m_format == cDXT5 || m_has_etc_color_blocks; - m_num_alpha_blocks = p.m_format == cDXT5 || p.m_format == cDXT5A || p.m_format == cETC2A || p.m_format == cETC2AS ? 1 : p.m_format == cDXN_XY || p.m_format == cDXN_YX ? 2 : 0; - if (!m_has_color_blocks && !m_num_alpha_blocks) - return false; - m_blocks = blocks; - m_main_thread_id = crn_get_current_thread_id(); - m_pTask_pool = p.m_pTask_pool; - m_params = p; - - uint tile_derating[8] = {0, 1, 1, 2, 2, 2, 2, 3}; - for (uint level = 0; level < p.m_num_levels; level++) { - float adaptive_tile_color_psnr_derating = p.m_adaptive_tile_color_psnr_derating; - if (level && adaptive_tile_color_psnr_derating > .25f) - adaptive_tile_color_psnr_derating = math::maximum(.25f, adaptive_tile_color_psnr_derating / powf(3.0f, static_cast(level))); - for (uint e = 0; e < 8; e++) - m_color_derating[level][e] = math::lerp(0.0f, adaptive_tile_color_psnr_derating, tile_derating[e] / 3.0f); - } - for (uint e = 0; e < 8; e++) - m_alpha_derating[e] = math::lerp(0.0f, m_params.m_adaptive_tile_alpha_psnr_derating, tile_derating[e] / 3.0f); - for (uint i = 0; i < 256; i++) - m_uint8_to_float[i] = i * 1.0f / 255.0f; - - m_num_blocks = m_params.m_num_blocks; - m_block_weights.resize(m_num_blocks); - m_block_encodings.resize(m_num_blocks); - for (uint c = 0; c < 3; c++) - m_block_selectors[c].resize(m_num_blocks); - m_tile_indices.resize(m_num_blocks); - m_endpoint_indices.resize(m_num_blocks); - m_selector_indices.resize(m_num_blocks); - m_tiles.resize(m_num_blocks); - - for (uint level = 0; level < p.m_num_levels; level++) { - float weight = p.m_levels[level].m_weight; - for (uint b = p.m_levels[level].m_first_block, bEnd = b + p.m_levels[level].m_num_blocks; b < bEnd; b++) - m_block_weights[b] = weight; - } - - for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) - m_pTask_pool->queue_object_task(this, m_has_subblocks ? &dxt_hc::determine_tiles_task_etc : &dxt_hc::determine_tiles_task, i); - m_pTask_pool->join(); - - m_num_tiles = 0; - for (uint t = 0; t < m_tiles.size(); t++) { - if (m_tiles[t].pixels.size()) - m_num_tiles++; - } - - if (m_has_color_blocks) - determine_color_endpoints(); - - if (m_num_alpha_blocks) - determine_alpha_endpoints(); - - if (m_has_color_blocks) - create_color_selector_codebook(); - - if (m_num_alpha_blocks) - create_alpha_selector_codebook(); - - color_endpoints.reserve(color_endpoints.size() + m_color_clusters.size()); - crnlib::vector color_endpoints_remap(m_color_clusters.size()); - hash_map color_endpoints_map; - for (uint i = 0; i < m_color_clusters.size(); i++) { - if (m_color_clusters[i].pixels.size()) { - uint32 endpoint = m_has_etc_color_blocks ? m_color_clusters[i].first_endpoint : - dxt1_block::pack_endpoints(m_color_clusters[i].first_endpoint, m_color_clusters[i].second_endpoint); - hash_map::insert_result insert_result = color_endpoints_map.insert(endpoint, color_endpoints.size()); - if (insert_result.second) { - color_endpoints_remap[i] = color_endpoints.size(); - color_endpoints.push_back(endpoint); - } else { - color_endpoints_remap[i] = insert_result.first->second; - } - } - } - - alpha_endpoints.reserve(alpha_endpoints.size() + m_alpha_clusters.size()); - crnlib::vector alpha_endpoints_remap(m_alpha_clusters.size()); - hash_map alpha_endpoints_map; - for (uint i = 0; i < m_alpha_clusters.size(); i++) { - if (m_alpha_clusters[i].pixels.size()) { - uint32 endpoint = dxt5_block::pack_endpoints(m_alpha_clusters[i].first_endpoint, m_alpha_clusters[i].second_endpoint); - hash_map::insert_result insert_result = alpha_endpoints_map.insert(endpoint, alpha_endpoints.size()); - if (insert_result.second) { - alpha_endpoints_remap[i] = alpha_endpoints.size(); - alpha_endpoints.push_back(endpoint); - } else { - alpha_endpoints_remap[i] = insert_result.first->second; - } - } - } - - color_selectors.reserve(color_selectors.size() + m_color_selectors.size()); - crnlib::vector color_selectors_remap(m_color_selectors.size()); - hash_map color_selectors_map; - for (uint i = 0; i < m_color_selectors.size(); i++) { - if (m_color_selectors_used[i]) { - hash_map::insert_result insert_result = color_selectors_map.insert(m_color_selectors[i], color_selectors.size()); - if (insert_result.second) { - color_selectors_remap[i] = color_selectors.size(); - color_selectors.push_back(m_color_selectors[i]); - } else { - color_selectors_remap[i] = insert_result.first->second; - } - } - } - - alpha_selectors.reserve(alpha_selectors.size() + m_alpha_selectors.size()); - crnlib::vector alpha_selectors_remap(m_alpha_selectors.size()); - hash_map alpha_selectors_map; - for (uint i = 0; i < m_alpha_selectors.size(); i++) { - if (m_alpha_selectors_used[i]) { - hash_map::insert_result insert_result = alpha_selectors_map.insert(m_alpha_selectors[i], alpha_selectors.size()); - if (insert_result.second) { - alpha_selectors_remap[i] = alpha_selectors.size(); - alpha_selectors.push_back(m_alpha_selectors[i]); - } else { - alpha_selectors_remap[i] = insert_result.first->second; - } - } - } - - endpoint_indices.resize(m_num_blocks); - selector_indices.resize(m_num_blocks); - for (uint level = 0; level < p.m_num_levels; level++) { - uint first_block = p.m_levels[level].m_first_block; - uint end_block = first_block + p.m_levels[level].m_num_blocks; - uint block_width = p.m_levels[level].m_block_width; - for (uint by = 0, b = first_block; b < end_block; by++) { - for (uint bx = 0; bx < block_width; bx++, b++) { - bool top_match = by != 0; - bool left_match = top_match || bx; - bool diag_match = m_has_subblocks && top_match && bx; - for (uint c = m_has_color_blocks ? 0 : cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) { - uint16 endpoint_index = (c ? alpha_endpoints_remap : color_endpoints_remap)[m_endpoint_indices[b].component[c]]; - left_match = left_match && endpoint_index == endpoint_indices[b - 1].component[c]; - top_match = top_match && endpoint_index == endpoint_indices[b - block_width].component[c]; - diag_match = diag_match && endpoint_index == endpoint_indices[b - block_width - 1].component[c]; - endpoint_indices[b].component[c] = endpoint_index; - uint16 selector_index = (c ? alpha_selectors_remap : color_selectors_remap)[m_selector_indices[b].component[c]]; - selector_indices[b].component[c] = selector_index; - } - endpoint_indices[b].reference = m_has_subblocks && b & 1 ? m_endpoint_indices[b].reference : left_match ? 1 : top_match ? 2 : diag_match ? 3 : 0; - } +namespace crnlib +{ + typedef vec<6, float> vec6F; + typedef vec<16, float> vec16F; + + static uint8 g_tile_map[8][2][2] = { + { { 0, 0 }, { 0, 0 } }, + { { 0, 0 }, { 1, 1 } }, + { { 0, 1 }, { 0, 1 } }, + { { 0, 0 }, { 1, 2 } }, + { { 1, 2 }, { 0, 0 } }, + { { 0, 1 }, { 0, 2 } }, + { { 1, 0 }, { 2, 0 } }, + { { 0, 1 }, { 2, 3 } }, + }; + + dxt_hc::dxt_hc() : + m_num_blocks(0), + m_has_color_blocks(false), + m_has_etc_color_blocks(false), + m_has_subblocks(false), + m_num_alpha_blocks(0), + m_main_thread_id(crn_get_current_thread_id()), + m_canceled(false), + m_pTask_pool(nullptr), + m_prev_phase_index(-1), + m_prev_percentage_complete(-1) + { } - } - - m_pTask_pool = nullptr; - return true; -} - -vec6F dxt_hc::palettize_color(color_quad_u8* pixels, uint pixels_count) { - uint color[64]; - for (uint i = 0; i < pixels_count; i++) - color[i] = pixels[i][0] << 16 | pixels[i][1] << 8 | pixels[i][2]; - std::sort(color, color + pixels_count); - vec3F vectors[64]; - uint weights[64]; - uint size = 0; - for (uint i = 0; i < pixels_count; i++) { - if (!i || color[i] != color[i - 1]) { - vectors[size][0] = m_params.m_perceptual ? m_uint8_to_float[color[i] >> 16] * 0.5f : m_uint8_to_float[color[i] >> 16]; - vectors[size][1] = m_uint8_to_float[color[i] >> 8 & 0xFF]; - vectors[size][2] = m_params.m_perceptual ? m_uint8_to_float[color[i] & 0xFF] * 0.25f : m_uint8_to_float[color[i] & 0xFF]; - weights[size] = 1; - size++; - } else { - weights[size - 1]++; - } - } - vec3F result[2]; - split_vectors(vectors, weights, size, result); - if (result[0].length() > result[1].length()) - std::swap(result[0], result[1]); - return *(vec6F*)result; -} - -vec2F dxt_hc::palettize_alpha(color_quad_u8* pixels, uint pixels_count, uint comp_index) { - uint8 alpha[64]; - for (uint p = 0; p < pixels_count; p++) - alpha[p] = pixels[p][comp_index]; - std::sort(alpha, alpha + pixels_count); - vec1F vectors[64]; - uint weights[64]; - uint size = 0; - for (uint i = 0; i < pixels_count; i++) { - if (!i || alpha[i] != alpha[i - 1]) { - vectors[size][0] = m_uint8_to_float[alpha[i]]; - weights[size] = 1; - size++; - } else { - weights[size - 1]++; + + dxt_hc::~dxt_hc() + { } - } - vec1F result[2]; - split_vectors(vectors, weights, size, result); - if (result[0] > result[1]) - std::swap(result[0], result[1]); - return *(vec2F*)result; -} - -void dxt_hc::determine_tiles_task(uint64 data, void*) { - uint num_tasks = m_pTask_pool->get_num_threads() + 1; - uint offsets[9] = {0, 16, 32, 48, 0, 32, 64, 96, 64}; - uint8 tiles[8][4] = {{8}, {6, 7}, {4, 5}, {6, 1, 3}, {7, 0, 2}, {4, 2, 3}, {5, 0, 1}, {0, 2, 1, 3}}; - color_quad_u8 tilePixels[128]; - uint8 selectors[64]; - uint tile_error[3][9]; - uint total_error[3][8]; - - etc1_optimizer optimizer; - etc1_optimizer::params params; - params.m_use_color4 = false; - params.m_constrain_against_base_color5 = false; - etc1_optimizer::results results; - results.m_pSelectors = selectors; - int scan[] = {-1, 0, 1}; - int refine[] = {-3, -2, 2, 3}; - - for (uint level = 0; level < m_params.m_num_levels; level++) { - float weight = m_params.m_levels[level].m_weight; - uint width = m_params.m_levels[level].m_block_width; - uint height = m_params.m_levels[level].m_num_blocks / width; - uint faceHeight = height / m_params.m_num_faces; - uint h = height * data / num_tasks & ~1; - uint hEnd = height * (data + 1) / num_tasks & ~1; - uint hFace = h % faceHeight; - uint b = m_params.m_levels[level].m_first_block + h * width; - - for (; h < hEnd; h += 2, hFace += 2, b += width) { - uint tile_offset = b; - uint tile_offset_delta = 4; - if (hFace == faceHeight) { - hFace = 0; - } else if (hFace & 2) { - tile_offset_delta = -4; - tile_offset += (width << 1) + tile_offset_delta; - } - for (uint bNext = b + width; b < bNext; b += 2, tile_offset += tile_offset_delta) { - for (int t = 0; t < 64; t += 16) - memcpy(tilePixels + t, m_blocks[b + (t & 16 ? width : 0) + (t & 32 ? 1 : 0)], 64); - for (int t = 0; t < 64; t += 4) - memcpy(tilePixels + 64 + t, m_blocks[b + (t & 32 ? width : 0) + (t & 4 ? 1 : 0)] + (t >> 1 & 12), 16); - - for (uint t = 0; t < 9; t++) { - color_quad_u8* pixels = tilePixels + offsets[t]; - uint size = 16 << (t >> 2); - if (m_has_etc_color_blocks) { - params.m_pSrc_pixels = pixels; - params.m_num_src_pixels = results.m_n = size; - optimizer.init(params, results); - params.m_pScan_deltas = scan; - params.m_scan_delta_size = sizeof(scan) / sizeof(*scan); - optimizer.compute(); - if (results.m_error > 375 * params.m_num_src_pixels) { - params.m_pScan_deltas = refine; - params.m_scan_delta_size = sizeof(refine) / sizeof(*refine); - optimizer.compute(); - } - tile_error[cColor][t] = results.m_error; - } else if (m_has_color_blocks) { - uint low16, high16; - dxt_fast::compress_color_block(size, pixels, low16, high16, selectors); - color_quad_u8 block_colors[4]; - dxt1_block::get_block_colors4(block_colors, low16, high16); - uint error = 0; - for (uint p = 0; p < size; p++) { - for (uint8 c = 0; c < 3; c++) { - uint delta = pixels[p][c] - block_colors[selectors[p]][c]; - error += delta * delta; - } - } - tile_error[cColor][t] = error; - } - for (uint a = 0; a < m_num_alpha_blocks; a++) { - uint8 component = m_params.m_alpha_component_indices[a]; - dxt5_endpoint_optimizer optimizer; - dxt5_endpoint_optimizer::params params; - dxt5_endpoint_optimizer::results results; - params.m_pPixels = pixels; - params.m_num_pixels = size; - params.m_comp_index = component; - params.m_use_both_block_types = false; - params.m_quality = cCRNDXTQualityNormal; - results.m_pSelectors = selectors; - optimizer.compute(params, results); - uint block_values[cDXT5SelectorValues]; - dxt5_block::get_block_values8(block_values, results.m_first_endpoint, results.m_second_endpoint); - tile_error[cAlpha0 + a][t] = results.m_error; - } - } - - for (uint8 c = m_has_color_blocks ? 0 : cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) { - for (uint8 e = 0; e < 8; e++) { - total_error[c][e] = 0; - for (uint8 t = 0, s = e + 1; s; s >>= 1, t++) - total_error[c][e] += tile_error[c][tiles[e][t]]; - } - } - - float best_quality = 0.0f; - uint best_encoding = 0; - for (uint e = 0; e < 8; e++) { - float quality = 0; - if (m_has_color_blocks) { - double peakSNR = total_error[cColor][e] ? log10(255.0f / sqrt(total_error[cColor][e] / 192.0)) * 20.0f : 999999.0f; - quality = (float)math::maximum(peakSNR - m_color_derating[level][e], 0.0f); - if (m_num_alpha_blocks) - quality *= m_params.m_adaptive_tile_color_alpha_weighting_ratio; - } - for (uint a = 0; a < m_num_alpha_blocks; a++) { - double peakSNR = total_error[cAlpha0 + a][e] ? log10(255.0f / sqrt(total_error[cAlpha0 + a][e] / 64.0)) * 20.0f : 999999.0f; - quality += (float)math::maximum(peakSNR - m_alpha_derating[e], 0.0f); - } - if (quality > best_quality) { - best_quality = quality; - best_encoding = e; - } - } - - for (uint tile_index = 0, s = best_encoding + 1; s; s >>= 1, tile_index++) { - tile_details& tile = m_tiles[tile_offset | tile_index]; - uint t = tiles[best_encoding][tile_index]; - tile.pixels.append(tilePixels + offsets[t], 16 << (t >> 2)); - tile.weight = weight; - if (m_has_color_blocks) - tile.color_endpoint = palettize_color(tile.pixels.get_ptr(), tile.pixels.size()); - for (uint a = 0; a < m_num_alpha_blocks; a++) - tile.alpha_endpoints[a] = palettize_alpha(tile.pixels.get_ptr(), tile.pixels.size(), m_params.m_alpha_component_indices[a]); - } - - for (uint by = 0; by < 2; by++) { - for (uint bx = 0; bx < 2; bx++) { - m_block_encodings[b + (by ? width : 0) + bx] = best_encoding; - m_tile_indices[b + (by ? width : 0) + bx] = tile_offset | g_tile_map[best_encoding][by][bx]; - } - } - - } + + void dxt_hc::clear() + { + m_blocks = 0; + m_num_blocks = 0; + m_num_alpha_blocks = 0; + m_has_color_blocks = false; + + m_color_clusters.clear(); + m_alpha_clusters.clear(); + + m_canceled = false; + + m_prev_phase_index = -1; + m_prev_percentage_complete = -1; + + m_block_weights.clear(); + m_block_encodings.clear(); + for (uint c = 0; c < 3; c++) + { + m_block_selectors[c].clear(); + } + m_color_selectors.clear(); + m_alpha_selectors.clear(); + m_color_selectors_used.clear(); + m_alpha_selectors_used.clear(); + m_tile_indices.clear(); + m_endpoint_indices.clear(); + m_selector_indices.clear(); + m_tiles.clear(); + m_num_tiles = 0; } - } -} - -void dxt_hc::determine_tiles_task_etc(uint64 data, void*) { - uint num_tasks = m_pTask_pool->get_num_threads() + 1; - uint offsets[5] = {0, 8, 16, 24, 16}; - uint8 tiles[3][2] = {{4}, {2, 3}, {0, 1}}; - uint8 tile_map[3][2] = {{ 0, 0 }, { 0, 1 }, { 0, 1 }}; - color_quad_u8 tilePixels[32]; - uint8 selectors[32]; - uint tile_error[5]; - uint total_error[3]; - - etc1_optimizer optimizer; - etc1_optimizer::params params; - params.m_use_color4 = false; - params.m_constrain_against_base_color5 = false; - etc1_optimizer::results results; - results.m_pSelectors = selectors; - int scan[] = {-1, 0, 1}; - int refine[] = {-3, -2, 2, 3}; - - for (uint level = 0; level < m_params.m_num_levels; level++) { - float weight = m_params.m_levels[level].m_weight; - uint b = (m_params.m_levels[level].m_first_block + m_params.m_levels[level].m_num_blocks * data / num_tasks) & ~1; - uint bEnd = (m_params.m_levels[level].m_first_block + m_params.m_levels[level].m_num_blocks * (data + 1) / num_tasks) & ~1; - for (; b < bEnd; b += 2) { - for (uint p = 0; p < 16; p++) - tilePixels[p] = m_blocks[b >> 1][(p << 2 & 12) | p >> 2]; - memcpy(tilePixels + 16, m_blocks[b >> 1], 64); - for (uint t = 0; t < 5; t++) { - params.m_pSrc_pixels = tilePixels + offsets[t]; - params.m_num_src_pixels = results.m_n = 8 << (t >> 2); - optimizer.init(params, results); - params.m_pScan_deltas = scan; - params.m_scan_delta_size = sizeof(scan) / sizeof(*scan); - optimizer.compute(); - if (results.m_error > 375 * params.m_num_src_pixels) { - params.m_pScan_deltas = refine; - params.m_scan_delta_size = sizeof(refine) / sizeof(*refine); - optimizer.compute(); - } - tile_error[t] = results.m_error; - } - - for (uint8 e = 0; e < 3; e++) { - total_error[e] = 0; - for (uint8 t = 0, s = e + 1; s; s >>= 1, t++) - total_error[e] += tile_error[tiles[e][t]]; - } - - float best_quality = 0.0f; - uint best_encoding = 0; - for (uint e = 0; e < 3; e++) { - float quality = 0; - double peakSNR = total_error[e] ? log10(255.0f / sqrt(total_error[e] / 48.0)) * 20.0f : 999999.0f; - quality = (float)math::maximum(peakSNR - m_color_derating[level][e], 0.0f); - if (quality > best_quality) { - best_quality = quality; - best_encoding = e; - } - } - - vec2F alpha_endpoints = m_num_alpha_blocks ? palettize_alpha(tilePixels, 16, 3) : vec2F(cClear); - for (uint tile_index = 0, s = best_encoding + 1; s; s >>= 1, tile_index++) { - tile_details& tile = m_tiles[b | tile_index]; - uint t = tiles[best_encoding][tile_index]; - tile.pixels.append(tilePixels + offsets[t], 8 << (t >> 2)); - tile.weight = weight; - tile.color_endpoint = palettize_color(tile.pixels.get_ptr(), tile.pixels.size()); + + bool dxt_hc::compress( + color_quad_u8 (*blocks)[16], + crnlib::vector& endpoint_indices, + crnlib::vector& selector_indices, + crnlib::vector& color_endpoints, + crnlib::vector& alpha_endpoints, + crnlib::vector& color_selectors, + crnlib::vector& alpha_selectors, + const params& p) + { + clear(); + m_has_etc_color_blocks = p.m_format == cETC1 || p.m_format == cETC2 || p.m_format == cETC2A || p.m_format == cETC1S || p.m_format == cETC2AS; + m_has_subblocks = p.m_format == cETC1 || p.m_format == cETC2 || p.m_format == cETC2A; + m_has_color_blocks = p.m_format == cDXT1 || p.m_format == cDXT5 || m_has_etc_color_blocks; + m_num_alpha_blocks = p.m_format == cDXT5 || p.m_format == cDXT5A || p.m_format == cETC2A || p.m_format == cETC2AS ? 1 : p.m_format == cDXN_XY || p.m_format == cDXN_YX ? 2 + : 0; + if (!m_has_color_blocks && !m_num_alpha_blocks) + { + return false; + } + m_blocks = blocks; + m_main_thread_id = crn_get_current_thread_id(); + m_pTask_pool = p.m_pTask_pool; + m_params = p; + + uint tile_derating[8] = { 0, 1, 1, 2, 2, 2, 2, 3 }; + for (uint level = 0; level < p.m_num_levels; level++) + { + float adaptive_tile_color_psnr_derating = p.m_adaptive_tile_color_psnr_derating; + if (level && adaptive_tile_color_psnr_derating > .25f) + { + adaptive_tile_color_psnr_derating = math::maximum(.25f, adaptive_tile_color_psnr_derating / powf(3.0f, static_cast(level))); + } + for (uint e = 0; e < 8; e++) + { + m_color_derating[level][e] = math::lerp(0.0f, adaptive_tile_color_psnr_derating, tile_derating[e] / 3.0f); + } + } + for (uint e = 0; e < 8; e++) + { + m_alpha_derating[e] = math::lerp(0.0f, m_params.m_adaptive_tile_alpha_psnr_derating, tile_derating[e] / 3.0f); + } + for (uint i = 0; i < 256; i++) + { + m_uint8_to_float[i] = i * 1.0f / 255.0f; + } + + m_num_blocks = m_params.m_num_blocks; + m_block_weights.resize(m_num_blocks); + m_block_encodings.resize(m_num_blocks); + for (uint c = 0; c < 3; c++) + { + m_block_selectors[c].resize(m_num_blocks); + } + m_tile_indices.resize(m_num_blocks); + m_endpoint_indices.resize(m_num_blocks); + m_selector_indices.resize(m_num_blocks); + m_tiles.resize(m_num_blocks); + + for (uint level = 0; level < p.m_num_levels; level++) + { + float weight = p.m_levels[level].m_weight; + for (uint b = p.m_levels[level].m_first_block, bEnd = b + p.m_levels[level].m_num_blocks; b < bEnd; b++) + { + m_block_weights[b] = weight; + } + } + + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + { + m_pTask_pool->queue_object_task(this, m_has_subblocks ? &dxt_hc::determine_tiles_task_etc : &dxt_hc::determine_tiles_task, i); + } + m_pTask_pool->join(); + + m_num_tiles = 0; + for (uint t = 0; t < m_tiles.size(); t++) + { + if (m_tiles[t].pixels.size()) + { + m_num_tiles++; + } + } + + if (m_has_color_blocks) + { + determine_color_endpoints(); + } + if (m_num_alpha_blocks) - tile.alpha_endpoints[0] = alpha_endpoints; - } - - for (uint bx = 0; bx < 2; bx++) { - m_block_encodings[b | bx] = best_encoding; - m_tile_indices[b | bx] = b | tile_map[best_encoding][bx]; - m_endpoint_indices[b | bx].reference = bx ? best_encoding : 0; - } - if (best_encoding >> 1) - memcpy(m_blocks[b >> 1], tilePixels, 64); - } - } -} - -void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void*) { - const uint num_tasks = m_pTask_pool->get_num_threads() + 1; - dxt1_endpoint_optimizer optimizer; - dxt_endpoint_refiner refiner; - crnlib::vector selectors; - - for (uint cluster_index = (uint)data; cluster_index < m_color_clusters.size(); cluster_index += num_tasks) { - color_cluster& cluster = m_color_clusters[cluster_index]; - if (cluster.pixels.empty()) - continue; - - dxt1_endpoint_optimizer::params params; - params.m_block_index = cluster_index; - params.m_pPixels = cluster.pixels.get_ptr(); - params.m_num_pixels = cluster.pixels.size(); - params.m_pixels_have_alpha = false; - params.m_use_alpha_blocks = false; - params.m_perceptual = m_params.m_perceptual; - params.m_quality = cCRNDXTQualityUber; - params.m_endpoint_caching = false; - - dxt1_endpoint_optimizer::results results; - selectors.resize(params.m_num_pixels); - results.m_pSelectors = selectors.get_ptr(); - - optimizer.compute(params, results); - cluster.first_endpoint = results.m_low_color; - cluster.second_endpoint = results.m_high_color; - color_quad_u8 block_values[4], color_values[4]; - dxt1_block::get_block_colors4(block_values, cluster.first_endpoint, cluster.second_endpoint); - for (uint i = 0; i < 4; i++) - color_values[i] = cluster.color_values[i] = block_values[g_dxt1_from_linear[i]]; - for (uint c = 0; results.m_alternate_rounding && c < 3; c++) { - color_values[1].c[c] = ((color_values[0].c[c] << 1) + color_values[3].c[c] + 1) / 3; - color_values[2].c[c] = ((color_values[3].c[c] << 1) + color_values[0].c[c] + 1) / 3; - } + { + determine_alpha_endpoints(); + } - uint endpoint_weight = color::color_distance(m_params.m_perceptual, color_values[0], color_values[3], false) / 2000; - float encoding_weight[8]; - for (uint i = 0; i < 8; i++) - encoding_weight[i] = math::lerp(1.15f, 1.0f, i / 7.0f); - - crnlib::vector& blocks = cluster.blocks[cColor]; - for (uint i = 0; i < blocks.size(); i++) { - uint b = blocks[i]; - uint weight = (uint)(math::clamp(endpoint_weight * m_block_weights[b], 1, 2048) * encoding_weight[m_block_encodings[b]]); - uint32 selector = 0; - for (uint p = 0; p < 16; p++) { - uint error_best = cUINT32_MAX; - uint8 s_best = 0; - for (uint8 t = 0; t < 4; t++) { - uint8 s = results.m_reordered ? 3 - g_dxt1_to_linear[t] : g_dxt1_to_linear[t]; - uint error = color::color_distance(m_params.m_perceptual, (color_quad_u8&)m_blocks[b][p], color_values[s], false); - if (error < error_best) { - s_best = s; - error_best = error; - } - } - selector = selector << 2 | s_best; - } - m_block_selectors[cColor][b] = (uint64)selector << 32 | weight; - } + if (m_has_color_blocks) + { + create_color_selector_codebook(); + } - dxt_endpoint_refiner::params refinerParams; - dxt_endpoint_refiner::results refinerResults; - refinerParams.m_perceptual = m_params.m_perceptual; - refinerParams.m_pSelectors = selectors.get_ptr(); - refinerParams.m_pPixels = cluster.pixels.get_ptr(); - refinerParams.m_num_pixels = cluster.pixels.size(); - refinerParams.m_dxt1_selectors = true; - refinerParams.m_error_to_beat = results.m_error; - refinerParams.m_block_index = cluster_index; - if (refiner.refine(refinerParams, refinerResults)) { - cluster.first_endpoint = refinerResults.m_low_color; - cluster.second_endpoint = refinerResults.m_high_color; - } - } -} - -void dxt_hc::determine_color_endpoint_codebook_task_etc(uint64 data, void*) { - uint num_tasks = m_pTask_pool->get_num_threads() + 1; - uint8 delta[8][2] = { {2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183} }; - int scan[] = {-1, 0, 1}; - int refine[] = {-3, -2, 2, 3}; - for (uint iCluster = m_color_clusters.size() * data / num_tasks, iEnd = m_color_clusters.size() * (data + 1) / num_tasks; iCluster < iEnd; iCluster++) { - color_cluster& cluster = m_color_clusters[iCluster]; - if (cluster.pixels.size()) { - etc1_optimizer optimizer; - etc1_optimizer::params params; - params.m_use_color4 = false; - params.m_constrain_against_base_color5 = false; - etc1_optimizer::results results; - crnlib::vector selectors(cluster.pixels.size()); - params.m_pSrc_pixels = cluster.pixels.get_ptr(); - results.m_pSelectors = selectors.get_ptr(); - results.m_n = params.m_num_src_pixels = cluster.pixels.size(); - optimizer.init(params, results); - params.m_pScan_deltas = scan; - params.m_scan_delta_size = sizeof(scan) / sizeof(*scan); - optimizer.compute(); - if (results.m_error > 375 * params.m_num_src_pixels) { - params.m_pScan_deltas = refine; - params.m_scan_delta_size = sizeof(refine) / sizeof(*refine); - optimizer.compute(); - } - color_quad_u8 endpoint; - for (int c = 0; c < 3; c++) - endpoint.c[c] = results.m_block_color_unscaled.c[c] << 3 | results.m_block_color_unscaled.c[c] >> 2; - endpoint.c[3] = results.m_block_inten_table; - cluster.first_endpoint = endpoint.m_u32; - for (uint8 d0 = delta[endpoint.c[3]][0], d1 = delta[endpoint.c[3]][1], c = 0; c < 3; c++) { - uint8 q = endpoint.c[c]; - cluster.color_values[0].c[c] = q <= d1 ? 0 : q - d1; - cluster.color_values[1].c[c] = q <= d0 ? 0 : q - d0; - cluster.color_values[2].c[c] = q >= 255 - d0 ? 255 : q + d0; - cluster.color_values[3].c[c] = q >= 255 - d1 ? 255 : q + d1; - } - for (int t = 0; t < 4; t++) - cluster.color_values[t].c[3] = 0xFF; - float endpoint_weight = powf(math::minimum((cluster.color_values[3].get_luma() - cluster.color_values[0].get_luma()) / 100.0f, 1.0f), 2.7f); - - crnlib::vector& blocks = cluster.blocks[cColor]; - uint blockSize = m_has_subblocks ? 8 : 16; - for (uint i = 0; i < blocks.size(); i++) { - uint b = blocks[i]; - color_quad_u8* pixels = m_has_subblocks ? ((color_quad_u8(*)[8])m_blocks)[b] : m_blocks[b]; - uint weight = (uint)(math::clamp(0x8000 * endpoint_weight * m_block_weights[b] * (m_block_encodings[b] ? 0.972f : 1.0f), 1, 0xFFFF)); - uint32 selector = 0; - for (uint p = 0; p < blockSize; p++) { - uint error_best = cUINT32_MAX; - uint8 s_best = 0; - for (uint8 s = 0; s < 4; s++) { - uint error = color::color_distance(m_params.m_perceptual, pixels[p], cluster.color_values[s], false); - if (error < error_best) { - s_best = s; - error_best = error; - } - } - selector = selector << 2 | s_best; - } - m_block_selectors[cColor][b] = (uint64)selector << (!m_has_subblocks || (b & 1) ? 32 : 48) | weight; - } - } - } -} - -void dxt_hc::determine_color_endpoint_clusters_task(uint64 data, void* pData_ptr) { - tree_clusterizer* vq = (tree_clusterizer*)pData_ptr; - const crnlib::vector& codebook = vq->get_codebook(); - uint num_tasks = m_pTask_pool->get_num_threads() + 1; - for (uint t = m_tiles.size() * data / num_tasks, tEnd = m_tiles.size() * (data + 1) / num_tasks; t < tEnd; t++) { - if (m_tiles[t].pixels.size()) { - const vec6F& v = m_tiles[t].color_endpoint; - float node_dist = codebook[vq->get_node_index(v)].squared_distance(v); - float best_dist = math::cNearlyInfinite; - uint best_index = 0; - for (uint i = 0; i < codebook.size(); i++) { - const vec6F& c = codebook[i]; - float dist = 0; - float d0 = c[0] - v[0]; dist += d0 * d0; - float d1 = c[1] - v[1]; dist += d1 * d1; - if (dist > node_dist) - continue; - float d2 = c[2] - v[2]; dist += d2 * d2; - float d3 = c[3] - v[3]; dist += d3 * d3; - if (dist > node_dist) - continue; - float d4 = c[4] - v[4]; dist += d4 * d4; - float d5 = c[5] - v[5]; dist += d5 * d5; - if (dist < best_dist) { - best_dist = dist; - best_index = i; - if (best_dist == 0.0f) - break; - } - } - m_tiles[t].cluster_indices[cColor] = best_index; - } - } -} - -void dxt_hc::determine_color_endpoints() { - uint num_tasks = m_pTask_pool->get_num_threads() + 1; - crnlib::vector > endpoints; - for (uint t = 0; t < m_tiles.size(); t++) { - if (m_tiles[t].pixels.size()) - endpoints.push_back(std::make_pair(m_tiles[t].color_endpoint, (uint)(m_tiles[t].pixels.size() * m_tiles[t].weight))); - } - - struct Node { - std::pair *p, *pEnd; - Node (std::pair* begin, std::pair* end) : p(begin), pEnd(end) {} - bool operator<(const Node& other) const { return *p > *other.p; } - static void sort_task(uint64 data, void* ptr) { std::sort(((Node*)ptr)->p, ((Node*)ptr)->pEnd); } - }; - - crnlib::vector nodes; - Node node(0, endpoints.get_ptr()); - for (uint i = 0; i < num_tasks; i++) { - node.p = node.pEnd; - node.pEnd = endpoints.get_ptr() + endpoints.size() * (i + 1) / num_tasks; - if (node.p != node.pEnd) - nodes.push_back(node); - } - - for (uint i = 0; i < nodes.size(); i++) - m_pTask_pool->queue_task(&Node::sort_task, i, &nodes[i]); - m_pTask_pool->join(); - - std::priority_queue queue; - for (uint i = 0; i < nodes.size(); i++) - queue.push(nodes[i]); - - crnlib::vector vectors; - crnlib::vector weights; - vectors.reserve(endpoints.size()); - weights.reserve(endpoints.size()); - while (queue.size()) { - Node node = queue.top(); - std::pair* endpoint = node.p++; - queue.pop(); - if (node.p != node.pEnd) - queue.push(node); - if (!vectors.size() || endpoint->first != vectors.back()) { - vectors.push_back(endpoint->first); - weights.push_back(endpoint->second); - } else if (weights.back() > UINT_MAX - endpoint->second) { - weights.back() = UINT_MAX; - } else { - weights.back() += endpoint->second; - } - } - - tree_clusterizer vq; - vq.generate_codebook(vectors.get_ptr(), weights.get_ptr(), vectors.size(), math::minimum(m_num_tiles, m_params.m_color_endpoint_codebook_size), true, m_pTask_pool); - m_color_clusters.resize(vq.get_codebook_size()); - - for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) - m_pTask_pool->queue_object_task(this, &dxt_hc::determine_color_endpoint_clusters_task, i, &vq); - m_pTask_pool->join(); - - for (uint t = 0; t < m_num_blocks; t++) { - if (m_tiles[t].pixels.size()) - m_color_clusters[m_tiles[t].cluster_indices[cColor]].pixels.append(m_tiles[t].pixels); - } - - for (uint b = 0; b < m_num_blocks; b++) { - uint cluster_index = m_tiles[m_tile_indices[b]].cluster_indices[cColor]; - m_endpoint_indices[b].component[cColor] = cluster_index; - m_color_clusters[cluster_index].blocks[cColor].push_back(b); - if (m_has_subblocks && m_endpoint_indices[b].reference && cluster_index == m_endpoint_indices[b - 1].component[cColor]) { - if (m_endpoint_indices[b].reference >> 1) { - color_quad_u8 mirror[16]; - for (uint p = 0; p < 16; p++) - mirror[p] = m_blocks[b >> 1][(p << 2 & 12) | p >> 2]; - memcpy(m_blocks[b >> 1], mirror, 64); - } - m_endpoint_indices[b].reference = 0; - } - } - - for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) - m_pTask_pool->queue_object_task(this, m_has_etc_color_blocks ? &dxt_hc::determine_color_endpoint_codebook_task_etc : &dxt_hc::determine_color_endpoint_codebook_task, i, nullptr); - m_pTask_pool->join(); -} - -void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void*) { - const uint num_tasks = m_pTask_pool->get_num_threads() + 1; - dxt5_endpoint_optimizer optimizer; - dxt_endpoint_refiner refiner; - crnlib::vector selectors; - - for (uint cluster_index = (uint)data; cluster_index < m_alpha_clusters.size(); cluster_index += num_tasks) { - alpha_cluster& cluster = m_alpha_clusters[cluster_index]; - if (cluster.pixels.empty()) - continue; - - dxt5_endpoint_optimizer::params params; - params.m_pPixels = cluster.pixels.get_ptr(); - params.m_num_pixels = cluster.pixels.size(); - params.m_comp_index = 0; - params.m_quality = cCRNDXTQualityUber; - params.m_use_both_block_types = false; - - dxt5_endpoint_optimizer::results results; - selectors.resize(params.m_num_pixels); - results.m_pSelectors = selectors.get_ptr(); - - optimizer.compute(params, results); - cluster.first_endpoint = results.m_first_endpoint; - cluster.second_endpoint = results.m_second_endpoint; - uint block_values[8], alpha_values[8]; - dxt5_block::get_block_values(block_values, cluster.first_endpoint, cluster.second_endpoint); - for (uint i = 0; i < 8; i++) - alpha_values[i] = cluster.alpha_values[i] = block_values[g_dxt5_from_linear[i]]; - int delta = cluster.first_endpoint - cluster.second_endpoint; - uint encoding_weight[8]; - for (uint endpoint_weight = math::clamp(delta * delta >> 3, 1, 2048), i = 0; i < 8; i++) - encoding_weight[i] = (uint)(endpoint_weight * math::lerp(1.15f, 1.0f, i / 7.0f)); - - if (m_has_etc_color_blocks) { - static const int stripped_modifier_table[2][8] = { - {-10, -7, -5, -2, 1, 4, 6, 9}, - {-10, -3, -2, -1, 0, 1, 2, 9} - }; - int base_codeword = (results.m_first_endpoint + results.m_second_endpoint + 1) >> 1; - int modifier_index = delta <= 6 ? 13 : 11; - int multiplier = delta <= 6 ? 1 : math::clamp((delta + 12) / 18, 1, 15); - const int* modifier = stripped_modifier_table[modifier_index == 11 ? 0 : 1]; - for (int i = 0; i < 8; i++) - alpha_values[i] = cluster.alpha_values[i] = math::clamp(base_codeword + modifier[i] * multiplier, 0, 255); - cluster.first_endpoint = base_codeword; - cluster.second_endpoint = multiplier << 4 | modifier_index; - } + if (m_num_alpha_blocks) + { + create_alpha_selector_codebook(); + } + + color_endpoints.reserve(color_endpoints.size() + m_color_clusters.size()); + crnlib::vector color_endpoints_remap(m_color_clusters.size()); + hash_map color_endpoints_map; + for (uint i = 0; i < m_color_clusters.size(); i++) + { + if (m_color_clusters[i].pixels.size()) + { + uint32 endpoint = m_has_etc_color_blocks ? m_color_clusters[i].first_endpoint : dxt1_block::pack_endpoints(m_color_clusters[i].first_endpoint, m_color_clusters[i].second_endpoint); + hash_map::insert_result insert_result = color_endpoints_map.insert(endpoint, color_endpoints.size()); + if (insert_result.second) + { + color_endpoints_remap[i] = color_endpoints.size(); + color_endpoints.push_back(endpoint); + } + else + { + color_endpoints_remap[i] = insert_result.first->second; + } + } + } + + alpha_endpoints.reserve(alpha_endpoints.size() + m_alpha_clusters.size()); + crnlib::vector alpha_endpoints_remap(m_alpha_clusters.size()); + hash_map alpha_endpoints_map; + for (uint i = 0; i < m_alpha_clusters.size(); i++) + { + if (m_alpha_clusters[i].pixels.size()) + { + uint32 endpoint = dxt5_block::pack_endpoints(m_alpha_clusters[i].first_endpoint, m_alpha_clusters[i].second_endpoint); + hash_map::insert_result insert_result = alpha_endpoints_map.insert(endpoint, alpha_endpoints.size()); + if (insert_result.second) + { + alpha_endpoints_remap[i] = alpha_endpoints.size(); + alpha_endpoints.push_back(endpoint); + } + else + { + alpha_endpoints_remap[i] = insert_result.first->second; + } + } + } + + color_selectors.reserve(color_selectors.size() + m_color_selectors.size()); + crnlib::vector color_selectors_remap(m_color_selectors.size()); + hash_map color_selectors_map; + for (uint i = 0; i < m_color_selectors.size(); i++) + { + if (m_color_selectors_used[i]) + { + hash_map::insert_result insert_result = color_selectors_map.insert(m_color_selectors[i], color_selectors.size()); + if (insert_result.second) + { + color_selectors_remap[i] = color_selectors.size(); + color_selectors.push_back(m_color_selectors[i]); + } + else + { + color_selectors_remap[i] = insert_result.first->second; + } + } + } - for (uint a = 0; a < m_num_alpha_blocks; a++) { - uint component_index = m_params.m_alpha_component_indices[a]; - crnlib::vector& blocks = cluster.blocks[cAlpha0 + a]; - for (uint i = 0; i < blocks.size(); i++) { - uint b = blocks[i]; - uint weight = encoding_weight[m_block_encodings[b]]; - uint64 selector = 0; - for (uint p = 0; p < 16; p++) { - uint error_best = cUINT32_MAX; - uint8 s_best = 0; - for (uint8 t = 0; t < 8; t++) { - uint8 s = m_has_etc_color_blocks ? t : results.m_reordered ? 7 - g_dxt5_to_linear[t] : g_dxt5_to_linear[t]; - int delta = m_blocks[m_has_subblocks ? b >> 1 : b][p][component_index] - alpha_values[s]; - uint error = delta >= 0 ? delta : -delta; - if (error < error_best) { - s_best = s; - error_best = error; - } - } - selector = selector << 3 | s_best; - } - m_block_selectors[cAlpha0 + a][b] = selector << 16 | weight; - } + alpha_selectors.reserve(alpha_selectors.size() + m_alpha_selectors.size()); + crnlib::vector alpha_selectors_remap(m_alpha_selectors.size()); + hash_map alpha_selectors_map; + for (uint i = 0; i < m_alpha_selectors.size(); i++) + { + if (m_alpha_selectors_used[i]) + { + hash_map::insert_result insert_result = alpha_selectors_map.insert(m_alpha_selectors[i], alpha_selectors.size()); + if (insert_result.second) + { + alpha_selectors_remap[i] = alpha_selectors.size(); + alpha_selectors.push_back(m_alpha_selectors[i]); + } + else + { + alpha_selectors_remap[i] = insert_result.first->second; + } + } + } + + endpoint_indices.resize(m_num_blocks); + selector_indices.resize(m_num_blocks); + for (uint level = 0; level < p.m_num_levels; level++) + { + uint first_block = p.m_levels[level].m_first_block; + uint end_block = first_block + p.m_levels[level].m_num_blocks; + uint block_width = p.m_levels[level].m_block_width; + for (uint by = 0, b = first_block; b < end_block; by++) + { + for (uint bx = 0; bx < block_width; bx++, b++) + { + bool top_match = by != 0; + bool left_match = top_match || bx; + bool diag_match = m_has_subblocks && top_match && bx; + for (uint c = m_has_color_blocks ? 0 : cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) + { + uint16 endpoint_index = (c ? alpha_endpoints_remap : color_endpoints_remap)[m_endpoint_indices[b].component[c]]; + left_match = left_match && endpoint_index == endpoint_indices[b - 1].component[c]; + top_match = top_match && endpoint_index == endpoint_indices[b - block_width].component[c]; + diag_match = diag_match && endpoint_index == endpoint_indices[b - block_width - 1].component[c]; + endpoint_indices[b].component[c] = endpoint_index; + uint16 selector_index = (c ? alpha_selectors_remap : color_selectors_remap)[m_selector_indices[b].component[c]]; + selector_indices[b].component[c] = selector_index; + } + endpoint_indices[b].reference = m_has_subblocks && b & 1 ? m_endpoint_indices[b].reference : left_match ? 1 + : top_match ? 2 + : diag_match ? 3 + : 0; + } + } + } + + m_pTask_pool = nullptr; + return true; } - dxt_endpoint_refiner::params refinerParams; - dxt_endpoint_refiner::results refinerResults; - refinerParams.m_perceptual = m_params.m_perceptual; - refinerParams.m_pSelectors = selectors.get_ptr(); - refinerParams.m_pPixels = cluster.pixels.get_ptr(); - refinerParams.m_num_pixels = cluster.pixels.size(); - refinerParams.m_dxt1_selectors = false; - refinerParams.m_error_to_beat = results.m_error; - refinerParams.m_block_index = cluster_index; - cluster.refined_alpha = !m_has_etc_color_blocks && refiner.refine(refinerParams, refinerResults); - if (cluster.refined_alpha) { - cluster.first_endpoint = refinerResults.m_low_color; - cluster.second_endpoint = refinerResults.m_high_color; - dxt5_block::get_block_values(block_values, cluster.first_endpoint, cluster.second_endpoint); - for (uint i = 0; i < 8; i++) - cluster.refined_alpha_values[i] = block_values[g_dxt5_from_linear[i]]; - } else { - memcpy(cluster.refined_alpha_values, cluster.alpha_values, sizeof(cluster.refined_alpha_values)); + vec6F dxt_hc::palettize_color(color_quad_u8* pixels, uint pixels_count) + { + uint color[64]; + for (uint i = 0; i < pixels_count; i++) + { + color[i] = pixels[i][0] << 16 | pixels[i][1] << 8 | pixels[i][2]; + } + std::sort(color, color + pixels_count); + vec3F vectors[64]; + uint weights[64]; + uint size = 0; + for (uint i = 0; i < pixels_count; i++) + { + if (!i || color[i] != color[i - 1]) + { + vectors[size][0] = m_params.m_perceptual ? m_uint8_to_float[color[i] >> 16] * 0.5f : m_uint8_to_float[color[i] >> 16]; + vectors[size][1] = m_uint8_to_float[color[i] >> 8 & 0xFF]; + vectors[size][2] = m_params.m_perceptual ? m_uint8_to_float[color[i] & 0xFF] * 0.25f : m_uint8_to_float[color[i] & 0xFF]; + weights[size] = 1; + size++; + } + else + { + weights[size - 1]++; + } + } + vec3F result[2]; + split_vectors(vectors, weights, size, result); + if (result[0].length() > result[1].length()) + { + std::swap(result[0], result[1]); + } + return *(vec6F*)result; } - } -} - -void dxt_hc::determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr) { - tree_clusterizer* vq = (tree_clusterizer*)pData_ptr; - const crnlib::vector& codebook = vq->get_codebook(); - uint num_tasks = m_pTask_pool->get_num_threads() + 1; - for (uint t = m_tiles.size() * data / num_tasks, tEnd = m_tiles.size() * (data + 1) / num_tasks; t < tEnd; t++) { - if (m_tiles[t].pixels.size()) { - for (uint a = 0; a < m_num_alpha_blocks; a++) { - const vec2F& v = m_tiles[t].alpha_endpoints[a]; - float best_dist = math::cNearlyInfinite; - uint best_index = 0; - for (uint i = 0; i < codebook.size(); i++) { - float dist = (codebook[i][0] - v[0]) * (codebook[i][0] - v[0]) + (codebook[i][1] - v[1]) * (codebook[i][1] - v[1]); - if (dist < best_dist) { - best_dist = dist; - best_index = i; - if (best_dist == 0.0f) - break; - } - } - m_tiles[t].cluster_indices[cAlpha0 + a] = best_index; - } + + vec2F dxt_hc::palettize_alpha(color_quad_u8* pixels, uint pixels_count, uint comp_index) + { + uint8 alpha[64]; + for (uint p = 0; p < pixels_count; p++) + { + alpha[p] = pixels[p][comp_index]; + } + std::sort(alpha, alpha + pixels_count); + vec1F vectors[64]; + uint weights[64]; + uint size = 0; + for (uint i = 0; i < pixels_count; i++) + { + if (!i || alpha[i] != alpha[i - 1]) + { + vectors[size][0] = m_uint8_to_float[alpha[i]]; + weights[size] = 1; + size++; + } + else + { + weights[size - 1]++; + } + } + vec1F result[2]; + split_vectors(vectors, weights, size, result); + if (result[0] > result[1]) + { + std::swap(result[0], result[1]); + } + return *(vec2F*)result; } - } -} - -void dxt_hc::determine_alpha_endpoints() { - uint num_tasks = m_pTask_pool->get_num_threads() + 1; - crnlib::vector > endpoints; - for (uint a = 0; a < m_num_alpha_blocks; a++) { - for (uint t = 0; t < m_tiles.size(); t++) { - if (m_tiles[t].pixels.size()) - endpoints.push_back(std::make_pair(m_tiles[t].alpha_endpoints[a], m_tiles[t].pixels.size())); + + void dxt_hc::determine_tiles_task(uint64 data, void*) + { + uint num_tasks = m_pTask_pool->get_num_threads() + 1; + uint offsets[9] = { 0, 16, 32, 48, 0, 32, 64, 96, 64 }; + uint8 tiles[8][4] = { { 8 }, { 6, 7 }, { 4, 5 }, { 6, 1, 3 }, { 7, 0, 2 }, { 4, 2, 3 }, { 5, 0, 1 }, { 0, 2, 1, 3 } }; + color_quad_u8 tilePixels[128]; + uint8 selectors[64]; + uint tile_error[3][9]; + uint total_error[3][8]; + + etc1_optimizer optimizer; + etc1_optimizer::params params; + params.m_use_color4 = false; + params.m_constrain_against_base_color5 = false; + etc1_optimizer::results results; + results.m_pSelectors = selectors; + int scan[] = { -1, 0, 1 }; + int refine[] = { -3, -2, 2, 3 }; + + for (uint level = 0; level < m_params.m_num_levels; level++) + { + float weight = m_params.m_levels[level].m_weight; + uint width = m_params.m_levels[level].m_block_width; + uint height = m_params.m_levels[level].m_num_blocks / width; + uint faceHeight = height / m_params.m_num_faces; + uint h = height * data / num_tasks & ~1; + uint hEnd = height * (data + 1) / num_tasks & ~1; + uint hFace = h % faceHeight; + uint b = m_params.m_levels[level].m_first_block + h * width; + + for (; h < hEnd; h += 2, hFace += 2, b += width) + { + uint tile_offset = b; + uint tile_offset_delta = 4; + if (hFace == faceHeight) + { + hFace = 0; + } + else if (hFace & 2) + { + tile_offset_delta = -4; + tile_offset += (width << 1) + tile_offset_delta; + } + for (uint bNext = b + width; b < bNext; b += 2, tile_offset += tile_offset_delta) + { + for (int t = 0; t < 64; t += 16) + { + memcpy(tilePixels + t, m_blocks[b + (t & 16 ? width : 0) + (t & 32 ? 1 : 0)], 64); + } + for (int t = 0; t < 64; t += 4) + { + memcpy(tilePixels + 64 + t, m_blocks[b + (t & 32 ? width : 0) + (t & 4 ? 1 : 0)] + (t >> 1 & 12), 16); + } + + for (uint t = 0; t < 9; t++) + { + color_quad_u8* pixels = tilePixels + offsets[t]; + uint size = 16 << (t >> 2); + if (m_has_etc_color_blocks) + { + params.m_pSrc_pixels = pixels; + params.m_num_src_pixels = results.m_n = size; + optimizer.init(params, results); + params.m_pScan_deltas = scan; + params.m_scan_delta_size = sizeof(scan) / sizeof(*scan); + optimizer.compute(); + if (results.m_error > 375 * params.m_num_src_pixels) + { + params.m_pScan_deltas = refine; + params.m_scan_delta_size = sizeof(refine) / sizeof(*refine); + optimizer.compute(); + } + tile_error[cColor][t] = results.m_error; + } + else if (m_has_color_blocks) + { + uint low16, high16; + dxt_fast::compress_color_block(size, pixels, low16, high16, selectors); + color_quad_u8 block_colors[4]; + dxt1_block::get_block_colors4(block_colors, low16, high16); + uint error = 0; + for (uint p = 0; p < size; p++) + { + for (uint8 c = 0; c < 3; c++) + { + uint delta = pixels[p][c] - block_colors[selectors[p]][c]; + error += delta * delta; + } + } + tile_error[cColor][t] = error; + } + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + uint8 component = m_params.m_alpha_component_indices[a]; + dxt5_endpoint_optimizer optimizer; + dxt5_endpoint_optimizer::params params; + dxt5_endpoint_optimizer::results results; + params.m_pPixels = pixels; + params.m_num_pixels = size; + params.m_comp_index = component; + params.m_use_both_block_types = false; + params.m_quality = cCRNDXTQualityNormal; + results.m_pSelectors = selectors; + optimizer.compute(params, results); + uint block_values[cDXT5SelectorValues]; + dxt5_block::get_block_values8(block_values, results.m_first_endpoint, results.m_second_endpoint); + tile_error[cAlpha0 + a][t] = results.m_error; + } + } + + for (uint8 c = m_has_color_blocks ? 0 : cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) + { + for (uint8 e = 0; e < 8; e++) + { + total_error[c][e] = 0; + for (uint8 t = 0, s = e + 1; s; s >>= 1, t++) + { + total_error[c][e] += tile_error[c][tiles[e][t]]; + } + } + } + + float best_quality = 0.0f; + uint best_encoding = 0; + for (uint e = 0; e < 8; e++) + { + float quality = 0; + if (m_has_color_blocks) + { + double peakSNR = total_error[cColor][e] ? log10(255.0f / sqrt(total_error[cColor][e] / 192.0)) * 20.0f : 999999.0f; + quality = (float)math::maximum(peakSNR - m_color_derating[level][e], 0.0f); + if (m_num_alpha_blocks) + { + quality *= m_params.m_adaptive_tile_color_alpha_weighting_ratio; + } + } + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + double peakSNR = total_error[cAlpha0 + a][e] ? log10(255.0f / sqrt(total_error[cAlpha0 + a][e] / 64.0)) * 20.0f : 999999.0f; + quality += (float)math::maximum(peakSNR - m_alpha_derating[e], 0.0f); + } + if (quality > best_quality) + { + best_quality = quality; + best_encoding = e; + } + } + + for (uint tile_index = 0, s = best_encoding + 1; s; s >>= 1, tile_index++) + { + tile_details& tile = m_tiles[tile_offset | tile_index]; + uint t = tiles[best_encoding][tile_index]; + tile.pixels.append(tilePixels + offsets[t], 16 << (t >> 2)); + tile.weight = weight; + if (m_has_color_blocks) + { + tile.color_endpoint = palettize_color(tile.pixels.get_ptr(), tile.pixels.size()); + } + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + tile.alpha_endpoints[a] = palettize_alpha(tile.pixels.get_ptr(), tile.pixels.size(), m_params.m_alpha_component_indices[a]); + } + } + + for (uint by = 0; by < 2; by++) + { + for (uint bx = 0; bx < 2; bx++) + { + m_block_encodings[b + (by ? width : 0) + bx] = best_encoding; + m_tile_indices[b + (by ? width : 0) + bx] = tile_offset | g_tile_map[best_encoding][by][bx]; + } + } + } + } + } } - } - - struct Node { - std::pair *p, *pEnd; - Node (std::pair* begin, std::pair* end) : p(begin), pEnd(end) {} - bool operator<(const Node& other) const { return *p > *other.p; } - static void sort_task(uint64 data, void* ptr) { std::sort(((Node*)ptr)->p, ((Node*)ptr)->pEnd); } - }; - - crnlib::vector nodes; - Node node(0, endpoints.get_ptr()); - for (uint i = 0; i < num_tasks; i++) { - node.p = node.pEnd; - node.pEnd = endpoints.get_ptr() + endpoints.size() * (i + 1) / num_tasks; - if (node.p != node.pEnd) - nodes.push_back(node); - } - - for (uint i = 0; i < nodes.size(); i++) - m_pTask_pool->queue_task(&Node::sort_task, i, &nodes[i]); - m_pTask_pool->join(); - - std::priority_queue queue; - for (uint i = 0; i < nodes.size(); i++) - queue.push(nodes[i]); - - crnlib::vector vectors; - crnlib::vector weights; - vectors.reserve(endpoints.size()); - weights.reserve(endpoints.size()); - while (queue.size()) { - Node node = queue.top(); - std::pair* endpoint = node.p++; - queue.pop(); - if (node.p != node.pEnd) - queue.push(node); - if (!vectors.size() || endpoint->first != vectors.back()) { - vectors.push_back(endpoint->first); - weights.push_back(endpoint->second); - } else if (weights.back() > UINT_MAX - endpoint->second) { - weights.back() = UINT_MAX; - } else { - weights.back() += endpoint->second; + + void dxt_hc::determine_tiles_task_etc(uint64 data, void*) + { + uint num_tasks = m_pTask_pool->get_num_threads() + 1; + uint offsets[5] = { 0, 8, 16, 24, 16 }; + uint8 tiles[3][2] = { { 4 }, { 2, 3 }, { 0, 1 } }; + uint8 tile_map[3][2] = { { 0, 0 }, { 0, 1 }, { 0, 1 } }; + color_quad_u8 tilePixels[32]; + uint8 selectors[32]; + uint tile_error[5]; + uint total_error[3]; + + etc1_optimizer optimizer; + etc1_optimizer::params params; + params.m_use_color4 = false; + params.m_constrain_against_base_color5 = false; + etc1_optimizer::results results; + results.m_pSelectors = selectors; + int scan[] = { -1, 0, 1 }; + int refine[] = { -3, -2, 2, 3 }; + + for (uint level = 0; level < m_params.m_num_levels; level++) + { + float weight = m_params.m_levels[level].m_weight; + uint b = (m_params.m_levels[level].m_first_block + m_params.m_levels[level].m_num_blocks * data / num_tasks) & ~1; + uint bEnd = (m_params.m_levels[level].m_first_block + m_params.m_levels[level].m_num_blocks * (data + 1) / num_tasks) & ~1; + for (; b < bEnd; b += 2) + { + for (uint p = 0; p < 16; p++) + { + tilePixels[p] = m_blocks[b >> 1][(p << 2 & 12) | p >> 2]; + } + memcpy(tilePixels + 16, m_blocks[b >> 1], 64); + for (uint t = 0; t < 5; t++) + { + params.m_pSrc_pixels = tilePixels + offsets[t]; + params.m_num_src_pixels = results.m_n = 8 << (t >> 2); + optimizer.init(params, results); + params.m_pScan_deltas = scan; + params.m_scan_delta_size = sizeof(scan) / sizeof(*scan); + optimizer.compute(); + if (results.m_error > 375 * params.m_num_src_pixels) + { + params.m_pScan_deltas = refine; + params.m_scan_delta_size = sizeof(refine) / sizeof(*refine); + optimizer.compute(); + } + tile_error[t] = results.m_error; + } + + for (uint8 e = 0; e < 3; e++) + { + total_error[e] = 0; + for (uint8 t = 0, s = e + 1; s; s >>= 1, t++) + { + total_error[e] += tile_error[tiles[e][t]]; + } + } + + float best_quality = 0.0f; + uint best_encoding = 0; + for (uint e = 0; e < 3; e++) + { + float quality = 0; + double peakSNR = total_error[e] ? log10(255.0f / sqrt(total_error[e] / 48.0)) * 20.0f : 999999.0f; + quality = (float)math::maximum(peakSNR - m_color_derating[level][e], 0.0f); + if (quality > best_quality) + { + best_quality = quality; + best_encoding = e; + } + } + + vec2F alpha_endpoints = m_num_alpha_blocks ? palettize_alpha(tilePixels, 16, 3) : vec2F(cClear); + for (uint tile_index = 0, s = best_encoding + 1; s; s >>= 1, tile_index++) + { + tile_details& tile = m_tiles[b | tile_index]; + uint t = tiles[best_encoding][tile_index]; + tile.pixels.append(tilePixels + offsets[t], 8 << (t >> 2)); + tile.weight = weight; + tile.color_endpoint = palettize_color(tile.pixels.get_ptr(), tile.pixels.size()); + if (m_num_alpha_blocks) + { + tile.alpha_endpoints[0] = alpha_endpoints; + } + } + + for (uint bx = 0; bx < 2; bx++) + { + m_block_encodings[b | bx] = best_encoding; + m_tile_indices[b | bx] = b | tile_map[best_encoding][bx]; + m_endpoint_indices[b | bx].reference = bx ? best_encoding : 0; + } + if (best_encoding >> 1) + { + memcpy(m_blocks[b >> 1], tilePixels, 64); + } + } + } } - } - - tree_clusterizer vq; - vq.generate_codebook(vectors.get_ptr(), weights.get_ptr(), vectors.size(), math::minimum(m_num_tiles, m_params.m_alpha_endpoint_codebook_size), false, m_pTask_pool); - m_alpha_clusters.resize(vq.get_codebook_size()); - - for (uint i = 0; i < num_tasks; i++) - m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_clusters_task, i, &vq); - m_pTask_pool->join(); - - for (uint a = 0; a < m_num_alpha_blocks; a++) { - uint component_index = m_params.m_alpha_component_indices[a]; - for (uint t = 0; t < m_num_blocks; t++) { - crnlib::vector& source = m_tiles[t].pixels; - if (source.size()) { - crnlib::vector& destination = m_alpha_clusters[m_tiles[t].cluster_indices[cAlpha0 + a]].pixels; - for (uint p = 0; p < source.size(); p++) - destination.push_back(color_quad_u8(source[p][component_index])); - } + + void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void*) + { + const uint num_tasks = m_pTask_pool->get_num_threads() + 1; + dxt1_endpoint_optimizer optimizer; + dxt_endpoint_refiner refiner; + crnlib::vector selectors; + + for (uint cluster_index = (uint)data; cluster_index < m_color_clusters.size(); cluster_index += num_tasks) + { + color_cluster& cluster = m_color_clusters[cluster_index]; + if (cluster.pixels.empty()) + { + continue; + } + + dxt1_endpoint_optimizer::params params; + params.m_block_index = cluster_index; + params.m_pPixels = cluster.pixels.get_ptr(); + params.m_num_pixels = cluster.pixels.size(); + params.m_pixels_have_alpha = false; + params.m_use_alpha_blocks = false; + params.m_perceptual = m_params.m_perceptual; + params.m_quality = cCRNDXTQualityUber; + params.m_endpoint_caching = false; + + dxt1_endpoint_optimizer::results results; + selectors.resize(params.m_num_pixels); + results.m_pSelectors = selectors.get_ptr(); + + optimizer.compute(params, results); + cluster.first_endpoint = results.m_low_color; + cluster.second_endpoint = results.m_high_color; + color_quad_u8 block_values[4], color_values[4]; + dxt1_block::get_block_colors4(block_values, cluster.first_endpoint, cluster.second_endpoint); + for (uint i = 0; i < 4; i++) + { + color_values[i] = cluster.color_values[i] = block_values[g_dxt1_from_linear[i]]; + } + for (uint c = 0; results.m_alternate_rounding && c < 3; c++) + { + color_values[1].c[c] = ((color_values[0].c[c] << 1) + color_values[3].c[c] + 1) / 3; + color_values[2].c[c] = ((color_values[3].c[c] << 1) + color_values[0].c[c] + 1) / 3; + } + + uint endpoint_weight = color::color_distance(m_params.m_perceptual, color_values[0], color_values[3], false) / 2000; + float encoding_weight[8]; + for (uint i = 0; i < 8; i++) + { + encoding_weight[i] = math::lerp(1.15f, 1.0f, i / 7.0f); + } + + crnlib::vector& blocks = cluster.blocks[cColor]; + for (uint i = 0; i < blocks.size(); i++) + { + uint b = blocks[i]; + uint weight = (uint)(math::clamp(endpoint_weight * m_block_weights[b], 1, 2048) * encoding_weight[m_block_encodings[b]]); + uint32 selector = 0; + for (uint p = 0; p < 16; p++) + { + uint error_best = cUINT32_MAX; + uint8 s_best = 0; + for (uint8 t = 0; t < 4; t++) + { + uint8 s = results.m_reordered ? 3 - g_dxt1_to_linear[t] : g_dxt1_to_linear[t]; + uint error = color::color_distance(m_params.m_perceptual, (color_quad_u8&)m_blocks[b][p], color_values[s], false); + if (error < error_best) + { + s_best = s; + error_best = error; + } + } + selector = selector << 2 | s_best; + } + m_block_selectors[cColor][b] = (uint64)selector << 32 | weight; + } + + dxt_endpoint_refiner::params refinerParams; + dxt_endpoint_refiner::results refinerResults; + refinerParams.m_perceptual = m_params.m_perceptual; + refinerParams.m_pSelectors = selectors.get_ptr(); + refinerParams.m_pPixels = cluster.pixels.get_ptr(); + refinerParams.m_num_pixels = cluster.pixels.size(); + refinerParams.m_dxt1_selectors = true; + refinerParams.m_error_to_beat = results.m_error; + refinerParams.m_block_index = cluster_index; + if (refiner.refine(refinerParams, refinerResults)) + { + cluster.first_endpoint = refinerResults.m_low_color; + cluster.second_endpoint = refinerResults.m_high_color; + } + } } - } - - for (uint b = 0; b < m_num_blocks; b++) { - for (uint a = 0; a < m_num_alpha_blocks; a++) { - uint cluster_index = m_tiles[m_tile_indices[b]].cluster_indices[cAlpha0 + a]; - m_endpoint_indices[b].component[cAlpha0 + a] = cluster_index; - if (!(m_has_subblocks && b & 1)) - m_alpha_clusters[cluster_index].blocks[cAlpha0 + a].push_back(b); + + void dxt_hc::determine_color_endpoint_codebook_task_etc(uint64 data, void*) + { + uint num_tasks = m_pTask_pool->get_num_threads() + 1; + uint8 delta[8][2] = { { 2, 8 }, { 5, 17 }, { 9, 29 }, { 13, 42 }, { 18, 60 }, { 24, 80 }, { 33, 106 }, { 47, 183 } }; + int scan[] = { -1, 0, 1 }; + int refine[] = { -3, -2, 2, 3 }; + for (uint iCluster = m_color_clusters.size() * data / num_tasks, iEnd = m_color_clusters.size() * (data + 1) / num_tasks; iCluster < iEnd; iCluster++) + { + color_cluster& cluster = m_color_clusters[iCluster]; + if (cluster.pixels.size()) + { + etc1_optimizer optimizer; + etc1_optimizer::params params; + params.m_use_color4 = false; + params.m_constrain_against_base_color5 = false; + etc1_optimizer::results results; + crnlib::vector selectors(cluster.pixels.size()); + params.m_pSrc_pixels = cluster.pixels.get_ptr(); + results.m_pSelectors = selectors.get_ptr(); + results.m_n = params.m_num_src_pixels = cluster.pixels.size(); + optimizer.init(params, results); + params.m_pScan_deltas = scan; + params.m_scan_delta_size = sizeof(scan) / sizeof(*scan); + optimizer.compute(); + if (results.m_error > 375 * params.m_num_src_pixels) + { + params.m_pScan_deltas = refine; + params.m_scan_delta_size = sizeof(refine) / sizeof(*refine); + optimizer.compute(); + } + color_quad_u8 endpoint; + for (int c = 0; c < 3; c++) + { + endpoint.c[c] = results.m_block_color_unscaled.c[c] << 3 | results.m_block_color_unscaled.c[c] >> 2; + } + endpoint.c[3] = results.m_block_inten_table; + cluster.first_endpoint = endpoint.m_u32; + for (uint8 d0 = delta[endpoint.c[3]][0], d1 = delta[endpoint.c[3]][1], c = 0; c < 3; c++) + { + uint8 q = endpoint.c[c]; + cluster.color_values[0].c[c] = q <= d1 ? 0 : q - d1; + cluster.color_values[1].c[c] = q <= d0 ? 0 : q - d0; + cluster.color_values[2].c[c] = q >= 255 - d0 ? 255 : q + d0; + cluster.color_values[3].c[c] = q >= 255 - d1 ? 255 : q + d1; + } + for (int t = 0; t < 4; t++) + { + cluster.color_values[t].c[3] = 0xFF; + } + float endpoint_weight = powf(math::minimum((cluster.color_values[3].get_luma() - cluster.color_values[0].get_luma()) / 100.0f, 1.0f), 2.7f); + + crnlib::vector& blocks = cluster.blocks[cColor]; + uint blockSize = m_has_subblocks ? 8 : 16; + for (uint i = 0; i < blocks.size(); i++) + { + uint b = blocks[i]; + color_quad_u8* pixels = m_has_subblocks ? ((color_quad_u8(*)[8])m_blocks)[b] : m_blocks[b]; + uint weight = (uint)(math::clamp(0x8000 * endpoint_weight * m_block_weights[b] * (m_block_encodings[b] ? 0.972f : 1.0f), 1, 0xFFFF)); + uint32 selector = 0; + for (uint p = 0; p < blockSize; p++) + { + uint error_best = cUINT32_MAX; + uint8 s_best = 0; + for (uint8 s = 0; s < 4; s++) + { + uint error = color::color_distance(m_params.m_perceptual, pixels[p], cluster.color_values[s], false); + if (error < error_best) + { + s_best = s; + error_best = error; + } + } + selector = selector << 2 | s_best; + } + m_block_selectors[cColor][b] = (uint64)selector << (!m_has_subblocks || (b & 1) ? 32 : 48) | weight; + } + } + } } - } - - for (uint i = 0; i < num_tasks; i++) - m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_codebook_task, i, nullptr); - m_pTask_pool->join(); -} - -struct color_selector_details { - color_selector_details() { utils::zero_object(*this); } - uint error[16][4]; - bool used; -}; - -void dxt_hc::create_color_selector_codebook_task(uint64 data, void* pData_ptr) { - crnlib::vector& selector_details = *static_cast*>(pData_ptr); - uint num_tasks = m_pTask_pool->get_num_threads() + 1; - uint E2[16][4]; - uint E4[8][16]; - uint E8[4][256]; - for (uint n = m_has_subblocks ? m_num_blocks >> 1 : m_num_blocks, b = n * data / num_tasks, bEnd = n * (data + 1) / num_tasks; b < bEnd; b++) { - color_cluster& cluster = m_color_clusters[m_endpoint_indices[b].color]; - color_quad_u8* endpoint_colors = cluster.color_values; - for (uint p = 0; p < 16; p++) { - for (uint s = 0; s < 4; s++) - E2[p][s] = m_has_subblocks ? color::color_distance(m_params.m_perceptual, m_blocks[b][p], m_color_clusters[m_endpoint_indices[b << 1 | p >> 3].color].color_values[s], false) : - color::color_distance(m_params.m_perceptual, m_blocks[b][p], endpoint_colors[s], false); + + void dxt_hc::determine_color_endpoint_clusters_task(uint64 data, void* pData_ptr) + { + tree_clusterizer* vq = (tree_clusterizer*)pData_ptr; + const crnlib::vector& codebook = vq->get_codebook(); + uint num_tasks = m_pTask_pool->get_num_threads() + 1; + for (uint t = m_tiles.size() * data / num_tasks, tEnd = m_tiles.size() * (data + 1) / num_tasks; t < tEnd; t++) + { + if (m_tiles[t].pixels.size()) + { + const vec6F& v = m_tiles[t].color_endpoint; + float node_dist = codebook[vq->get_node_index(v)].squared_distance(v); + float best_dist = math::cNearlyInfinite; + uint best_index = 0; + for (uint i = 0; i < codebook.size(); i++) + { + const vec6F& c = codebook[i]; + float dist = 0; + float d0 = c[0] - v[0]; + dist += d0 * d0; + float d1 = c[1] - v[1]; + dist += d1 * d1; + if (dist > node_dist) + { + continue; + } + float d2 = c[2] - v[2]; + dist += d2 * d2; + float d3 = c[3] - v[3]; + dist += d3 * d3; + if (dist > node_dist) + { + continue; + } + float d4 = c[4] - v[4]; + dist += d4 * d4; + float d5 = c[5] - v[5]; + dist += d5 * d5; + if (dist < best_dist) + { + best_dist = dist; + best_index = i; + if (best_dist == 0.0f) + { + break; + } + } + } + m_tiles[t].cluster_indices[cColor] = best_index; + } + } } - for (uint p = 0; p < 8; p++) { - for (uint s = 0; s < 16; s++) - E4[p][s] = E2[p << 1][s & 3] + E2[p << 1 | 1][s >> 2]; + + void dxt_hc::determine_color_endpoints() + { + uint num_tasks = m_pTask_pool->get_num_threads() + 1; + crnlib::vector> endpoints; + for (uint t = 0; t < m_tiles.size(); t++) + { + if (m_tiles[t].pixels.size()) + { + endpoints.push_back(std::make_pair(m_tiles[t].color_endpoint, (uint)(m_tiles[t].pixels.size() * m_tiles[t].weight))); + } + } + + struct Node + { + std::pair*p, *pEnd; + Node(std::pair* begin, std::pair* end) : + p(begin), pEnd(end) + { + } + bool operator<(const Node& other) const + { + return *p > *other.p; + } + static void sort_task(uint64 data, void* ptr) + { + std::sort(((Node*)ptr)->p, ((Node*)ptr)->pEnd); + } + }; + + crnlib::vector nodes; + Node node(0, endpoints.get_ptr()); + for (uint i = 0; i < num_tasks; i++) + { + node.p = node.pEnd; + node.pEnd = endpoints.get_ptr() + endpoints.size() * (i + 1) / num_tasks; + if (node.p != node.pEnd) + { + nodes.push_back(node); + } + } + + for (uint i = 0; i < nodes.size(); i++) + { + m_pTask_pool->queue_task(&Node::sort_task, i, &nodes[i]); + } + m_pTask_pool->join(); + + std::priority_queue queue; + for (uint i = 0; i < nodes.size(); i++) + { + queue.push(nodes[i]); + } + + crnlib::vector vectors; + crnlib::vector weights; + vectors.reserve(endpoints.size()); + weights.reserve(endpoints.size()); + while (queue.size()) + { + Node node = queue.top(); + std::pair* endpoint = node.p++; + queue.pop(); + if(node.p != node.pEnd) + { + queue.push(node); + } + if (!vectors.size() || endpoint->first != vectors.back()) + { + vectors.push_back(endpoint->first); + weights.push_back(endpoint->second); + } + else if (weights.back() > UINT_MAX - endpoint->second) + { + weights.back() = UINT_MAX; + } + else + { + weights.back() += endpoint->second; + } + } + + tree_clusterizer vq; + vq.generate_codebook(vectors.get_ptr(), weights.get_ptr(), vectors.size(), math::minimum(m_num_tiles, m_params.m_color_endpoint_codebook_size), true, m_pTask_pool); + m_color_clusters.resize(vq.get_codebook_size()); + + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + { + m_pTask_pool->queue_object_task(this, &dxt_hc::determine_color_endpoint_clusters_task, i, &vq); + } + m_pTask_pool->join(); + + for (uint t = 0; t < m_num_blocks; t++) + { + if (m_tiles[t].pixels.size()) + { + m_color_clusters[m_tiles[t].cluster_indices[cColor]].pixels.append(m_tiles[t].pixels); + } + } + + for (uint b = 0; b < m_num_blocks; b++) + { + uint cluster_index = m_tiles[m_tile_indices[b]].cluster_indices[cColor]; + m_endpoint_indices[b].component[cColor] = cluster_index; + m_color_clusters[cluster_index].blocks[cColor].push_back(b); + if (m_has_subblocks && m_endpoint_indices[b].reference && cluster_index == m_endpoint_indices[b - 1].component[cColor]) + { + if (m_endpoint_indices[b].reference >> 1) + { + color_quad_u8 mirror[16]; + for (uint p = 0; p < 16; p++) + { + mirror[p] = m_blocks[b >> 1][(p << 2 & 12) | p >> 2]; + } + memcpy(m_blocks[b >> 1], mirror, 64); + } + m_endpoint_indices[b].reference = 0; + } + } + + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + { + m_pTask_pool->queue_object_task(this, m_has_etc_color_blocks ? &dxt_hc::determine_color_endpoint_codebook_task_etc : &dxt_hc::determine_color_endpoint_codebook_task, i, nullptr); + } + m_pTask_pool->join(); } - for (uint p = 0; p < 4; p++) { - for (uint s = 0; s < 256; s++) - E8[p][s] = E4[p << 1][s & 15] + E4[p << 1 | 1][s >> 4]; + + void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void*) + { + const uint num_tasks = m_pTask_pool->get_num_threads() + 1; + dxt5_endpoint_optimizer optimizer; + dxt_endpoint_refiner refiner; + crnlib::vector selectors; + + for (uint cluster_index = (uint)data; cluster_index < m_alpha_clusters.size(); cluster_index += num_tasks) + { + alpha_cluster& cluster = m_alpha_clusters[cluster_index]; + if (cluster.pixels.empty()) + { + continue; + } + + dxt5_endpoint_optimizer::params params; + params.m_pPixels = cluster.pixels.get_ptr(); + params.m_num_pixels = cluster.pixels.size(); + params.m_comp_index = 0; + params.m_quality = cCRNDXTQualityUber; + params.m_use_both_block_types = false; + + dxt5_endpoint_optimizer::results results; + selectors.resize(params.m_num_pixels); + results.m_pSelectors = selectors.get_ptr(); + + optimizer.compute(params, results); + cluster.first_endpoint = results.m_first_endpoint; + cluster.second_endpoint = results.m_second_endpoint; + uint block_values[8], alpha_values[8]; + dxt5_block::get_block_values(block_values, cluster.first_endpoint, cluster.second_endpoint); + for (uint i = 0; i < 8; i++) + { + alpha_values[i] = cluster.alpha_values[i] = block_values[g_dxt5_from_linear[i]]; + } + int delta = cluster.first_endpoint - cluster.second_endpoint; + uint encoding_weight[8]; + for (uint endpoint_weight = math::clamp(delta * delta >> 3, 1, 2048), i = 0; i < 8; i++) + { + encoding_weight[i] = (uint)(endpoint_weight * math::lerp(1.15f, 1.0f, i / 7.0f)); + } + + if (m_has_etc_color_blocks) + { + static const int stripped_modifier_table[2][8] = { + { -10, -7, -5, -2, 1, 4, 6, 9 }, + { -10, -3, -2, -1, 0, 1, 2, 9 } + }; + int base_codeword = (results.m_first_endpoint + results.m_second_endpoint + 1) >> 1; + int modifier_index = delta <= 6 ? 13 : 11; + int multiplier = delta <= 6 ? 1 : math::clamp((delta + 12) / 18, 1, 15); + const int* modifier = stripped_modifier_table[modifier_index == 11 ? 0 : 1]; + for (int i = 0; i < 8; i++) + { + alpha_values[i] = cluster.alpha_values[i] = math::clamp(base_codeword + modifier[i] * multiplier, 0, 255); + } + cluster.first_endpoint = base_codeword; + cluster.second_endpoint = multiplier << 4 | modifier_index; + } + + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + uint component_index = m_params.m_alpha_component_indices[a]; + crnlib::vector& blocks = cluster.blocks[cAlpha0 + a]; + for (uint i = 0; i < blocks.size(); i++) + { + uint b = blocks[i]; + uint weight = encoding_weight[m_block_encodings[b]]; + uint64 selector = 0; + for (uint p = 0; p < 16; p++) + { + uint error_best = cUINT32_MAX; + uint8 s_best = 0; + for (uint8 t = 0; t < 8; t++) + { + uint8 s = m_has_etc_color_blocks ? t : results.m_reordered ? 7 - g_dxt5_to_linear[t] + : g_dxt5_to_linear[t]; + int delta = m_blocks[m_has_subblocks ? b >> 1 : b][p][component_index] - alpha_values[s]; + uint error = delta >= 0 ? delta : -delta; + if (error < error_best) + { + s_best = s; + error_best = error; + } + } + selector = selector << 3 | s_best; + } + m_block_selectors[cAlpha0 + a][b] = selector << 16 | weight; + } + } + + dxt_endpoint_refiner::params refinerParams; + dxt_endpoint_refiner::results refinerResults; + refinerParams.m_perceptual = m_params.m_perceptual; + refinerParams.m_pSelectors = selectors.get_ptr(); + refinerParams.m_pPixels = cluster.pixels.get_ptr(); + refinerParams.m_num_pixels = cluster.pixels.size(); + refinerParams.m_dxt1_selectors = false; + refinerParams.m_error_to_beat = results.m_error; + refinerParams.m_block_index = cluster_index; + cluster.refined_alpha = !m_has_etc_color_blocks && refiner.refine(refinerParams, refinerResults); + if (cluster.refined_alpha) + { + cluster.first_endpoint = refinerResults.m_low_color; + cluster.second_endpoint = refinerResults.m_high_color; + dxt5_block::get_block_values(block_values, cluster.first_endpoint, cluster.second_endpoint); + for (uint i = 0; i < 8; i++) + { + cluster.refined_alpha_values[i] = block_values[g_dxt5_from_linear[i]]; + } + } + else + { + memcpy(cluster.refined_alpha_values, cluster.alpha_values, sizeof(cluster.refined_alpha_values)); + } + } } - uint best_index = 0; - for (uint best_error = cUINT32_MAX, s = 0; s < m_color_selectors.size(); s++) { - uint32 selector = m_color_selectors[s]; - uint error = E8[0][selector & 255] + E8[1][selector >> 8 & 255] + E8[2][selector >> 16 & 255] + E8[3][selector >> 24 & 255]; - if (error < best_error) { - best_error = error; - best_index = s; - } + + void dxt_hc::determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr) + { + tree_clusterizer* vq = (tree_clusterizer*)pData_ptr; + const crnlib::vector& codebook = vq->get_codebook(); + uint num_tasks = m_pTask_pool->get_num_threads() + 1; + for (uint t = m_tiles.size() * data / num_tasks, tEnd = m_tiles.size() * (data + 1) / num_tasks; t < tEnd; t++) + { + if (m_tiles[t].pixels.size()) + { + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + const vec2F& v = m_tiles[t].alpha_endpoints[a]; + float best_dist = math::cNearlyInfinite; + uint best_index = 0; + for (uint i = 0; i < codebook.size(); i++) + { + float dist = (codebook[i][0] - v[0]) * (codebook[i][0] - v[0]) + (codebook[i][1] - v[1]) * (codebook[i][1] - v[1]); + if (dist < best_dist) + { + best_dist = dist; + best_index = i; + if (best_dist == 0.0f) + { + break; + } + } + } + m_tiles[t].cluster_indices[cAlpha0 + a] = best_index; + } + } + } } - uint (&total_errors)[16][4] = selector_details[best_index].error; - for (uint p = 0; p < 16; p++) { - for (uint s = 0; s < 4; s++) - total_errors[p][s] += E2[p][s]; + + void dxt_hc::determine_alpha_endpoints() + { + uint num_tasks = m_pTask_pool->get_num_threads() + 1; + crnlib::vector> endpoints; + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + for (uint t = 0; t < m_tiles.size(); t++) + { + if (m_tiles[t].pixels.size()) + { + endpoints.push_back(std::make_pair(m_tiles[t].alpha_endpoints[a], m_tiles[t].pixels.size())); + } + } + } + + struct Node + { + std::pair*p, *pEnd; + Node(std::pair* begin, std::pair* end) : + p(begin), pEnd(end) + { + } + bool operator<(const Node& other) const + { + return *p > *other.p; + } + static void sort_task(uint64 data, void* ptr) + { + std::sort(((Node*)ptr)->p, ((Node*)ptr)->pEnd); + } + }; + + crnlib::vector nodes; + Node node(0, endpoints.get_ptr()); + for (uint i = 0; i < num_tasks; i++) + { + node.p = node.pEnd; + node.pEnd = endpoints.get_ptr() + endpoints.size() * (i + 1) / num_tasks; + if (node.p != node.pEnd) + { + nodes.push_back(node); + } + } + + for (uint i = 0; i < nodes.size(); i++) + { + m_pTask_pool->queue_task(&Node::sort_task, i, &nodes[i]); + } + m_pTask_pool->join(); + + std::priority_queue queue; + for (uint i = 0; i < nodes.size(); i++) + { + queue.push(nodes[i]); + } + + crnlib::vector vectors; + crnlib::vector weights; + vectors.reserve(endpoints.size()); + weights.reserve(endpoints.size()); + while (queue.size()) + { + Node node = queue.top(); + std::pair* endpoint = node.p++; + queue.pop(); + if (node.p != node.pEnd) + { + queue.push(node); + } + if (!vectors.size() || endpoint->first != vectors.back()) + { + vectors.push_back(endpoint->first); + weights.push_back(endpoint->second); + } + else if (weights.back() > UINT_MAX - endpoint->second) + { + weights.back() = UINT_MAX; + } + else + { + weights.back() += endpoint->second; + } + } + + tree_clusterizer vq; + vq.generate_codebook(vectors.get_ptr(), weights.get_ptr(), vectors.size(), math::minimum(m_num_tiles, m_params.m_alpha_endpoint_codebook_size), false, m_pTask_pool); + m_alpha_clusters.resize(vq.get_codebook_size()); + + for (uint i = 0; i < num_tasks; i++) + { + m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_clusters_task, i, &vq); + } + m_pTask_pool->join(); + + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + uint component_index = m_params.m_alpha_component_indices[a]; + for (uint t = 0; t < m_num_blocks; t++) + { + crnlib::vector& source = m_tiles[t].pixels; + if (source.size()) + { + crnlib::vector& destination = m_alpha_clusters[m_tiles[t].cluster_indices[cAlpha0 + a]].pixels; + for (uint p = 0; p < source.size(); p++) + { + destination.push_back(color_quad_u8(source[p][component_index])); + } + } + } + } + + for (uint b = 0; b < m_num_blocks; b++) + { + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + uint cluster_index = m_tiles[m_tile_indices[b]].cluster_indices[cAlpha0 + a]; + m_endpoint_indices[b].component[cAlpha0 + a] = cluster_index; + if (!(m_has_subblocks && b & 1)) + { + m_alpha_clusters[cluster_index].blocks[cAlpha0 + a].push_back(b); + } + } + } + + for (uint i = 0; i < num_tasks; i++) + { + m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_codebook_task, i, nullptr); + } + m_pTask_pool->join(); } - selector_details[best_index].used = true; - m_selector_indices[m_has_subblocks ? b << 1 : b].color = best_index; - } -} - -struct SelectorNode { - uint64 *p, *pEnd; - SelectorNode (uint64* begin, uint64* end) : p(begin), pEnd(end) {} - bool operator<(const SelectorNode& other) const { return *p > *other.p; } - static void sort_task(uint64 data, void* ptr) { std::sort(((SelectorNode*)ptr)->p, ((SelectorNode*)ptr)->pEnd); } -}; - -void dxt_hc::create_color_selector_codebook() { - uint num_tasks = m_pTask_pool->get_num_threads() + 1; - crnlib::vector selectors(m_has_subblocks ? m_num_blocks >> 1 : m_num_blocks); - for (uint i = 0, b = 0, step = m_has_subblocks ? 2 : 1; b < m_num_blocks; b += step) - selectors[i++] = m_block_selectors[cColor][b] + (m_has_subblocks ? m_block_selectors[cColor][b + 1] : 0); - - crnlib::vector nodes; - SelectorNode node(0, selectors.get_ptr()); - for (uint i = 0; i < num_tasks; i++) { - node.p = node.pEnd; - node.pEnd = selectors.get_ptr() + selectors.size() * (i + 1) / num_tasks; - if (node.p != node.pEnd) - nodes.push_back(node); - } - - for (uint i = 0; i < nodes.size(); i++) - m_pTask_pool->queue_task(&SelectorNode::sort_task, i, &nodes[i]); - m_pTask_pool->join(); - - std::priority_queue queue; - for (uint i = 0; i < nodes.size(); i++) - queue.push(nodes[i]); - - float v[4]; - for (uint s = 0; s < 4; s++) - v[s] = (s + 0.5f) * 0.25f; - - crnlib::vector vectors; - crnlib::vector weights; - vectors.reserve(selectors.size()); - weights.reserve(selectors.size()); - for (uint64 prev_selector = 0; queue.size();) { - SelectorNode node = queue.top(); - uint64 selector = *node.p++; - queue.pop(); - if (node.p != node.pEnd) - queue.push(node); - uint weight = (uint)selector; - selector >>= 32; - if (!vectors.size() || selector != prev_selector) { - prev_selector = selector; - vec16F vector; - for (uint p = 0; p < 16; p++, selector >>= 2) - vector[15 - p] = v[selector & 3]; - vectors.push_back(vector); - weights.push_back(weight); - } else if (weights.back() > UINT_MAX - weight) { - weights.back() = UINT_MAX; - } else { - weights.back() += weight; + + struct color_selector_details + { + color_selector_details() + { + utils::zero_object(*this); + } + uint error[16][4]; + bool used; + }; + + void dxt_hc::create_color_selector_codebook_task(uint64 data, void* pData_ptr) + { + crnlib::vector& selector_details = *static_cast*>(pData_ptr); + uint num_tasks = m_pTask_pool->get_num_threads() + 1; + uint E2[16][4]; + uint E4[8][16]; + uint E8[4][256]; + for (uint n = m_has_subblocks ? m_num_blocks >> 1 : m_num_blocks, b = n * data / num_tasks, bEnd = n * (data + 1) / num_tasks; b < bEnd; b++) + { + color_cluster& cluster = m_color_clusters[m_endpoint_indices[b].color]; + color_quad_u8* endpoint_colors = cluster.color_values; + for (uint p = 0; p < 16; p++) + { + for (uint s = 0; s < 4; s++) + { + E2[p][s] = m_has_subblocks ? color::color_distance(m_params.m_perceptual, m_blocks[b][p], m_color_clusters[m_endpoint_indices[b << 1 | p >> 3].color].color_values[s], false) : color::color_distance(m_params.m_perceptual, m_blocks[b][p], endpoint_colors[s], false); + } + } + for (uint p = 0; p < 8; p++) + { + for (uint s = 0; s < 16; s++) + { + E4[p][s] = E2[p << 1][s & 3] + E2[p << 1 | 1][s >> 2]; + } + } + for (uint p = 0; p < 4; p++) + { + for (uint s = 0; s < 256; s++) + { + E8[p][s] = E4[p << 1][s & 15] + E4[p << 1 | 1][s >> 4]; + } + } + uint best_index = 0; + for (uint best_error = cUINT32_MAX, s = 0; s < m_color_selectors.size(); s++) + { + uint32 selector = m_color_selectors[s]; + uint error = E8[0][selector & 255] + E8[1][selector >> 8 & 255] + E8[2][selector >> 16 & 255] + E8[3][selector >> 24 & 255]; + if (error < best_error) + { + best_error = error; + best_index = s; + } + } + uint(&total_errors)[16][4] = selector_details[best_index].error; + for (uint p = 0; p < 16; p++) + { + for (uint s = 0; s < 4; s++) + { + total_errors[p][s] += E2[p][s]; + } + } + selector_details[best_index].used = true; + m_selector_indices[m_has_subblocks ? b << 1 : b].color = best_index; + } } - } - - tree_clusterizer selector_vq; - selector_vq.generate_codebook(vectors.get_ptr(), weights.get_ptr(), vectors.size(), m_params.m_color_selector_codebook_size, false, m_pTask_pool); - m_color_selectors.resize(selector_vq.get_codebook_size()); - m_color_selectors_used.resize(selector_vq.get_codebook_size()); - for (uint i = 0; i < selector_vq.get_codebook_size(); i++) { - const vec16F& v = selector_vq.get_codebook_entry(i); - m_color_selectors[i] = 0; - for (uint sh = 0, j = 0; j < 16; j++, sh += 2) - m_color_selectors[i] |= (uint)(v[j] * 4.0f) << sh; - } - - crnlib::vector > selector_details(num_tasks); - for (uint t = 0; t < num_tasks; t++) { - selector_details[t].resize(m_color_selectors.size()); - m_pTask_pool->queue_object_task(this, &dxt_hc::create_color_selector_codebook_task, t, &selector_details[t]); - } - m_pTask_pool->join(); - - for (uint t = 1; t < num_tasks; t++) { - for (uint i = 0; i < m_color_selectors.size(); i++) { - for (uint8 p = 0; p < 16; p++) { - for (uint8 s = 0; s < 4; s++) - selector_details[0][i].error[p][s] += selector_details[t][i].error[p][s]; - } - selector_details[0][i].used = selector_details[0][i].used || selector_details[t][i].used; + + struct SelectorNode + { + uint64 *p, *pEnd; + SelectorNode(uint64* begin, uint64* end) : + p(begin), pEnd(end) + { + } + bool operator<(const SelectorNode& other) const + { + return *p > *other.p; + } + static void sort_task(uint64 data, void* ptr) + { + std::sort(((SelectorNode*)ptr)->p, ((SelectorNode*)ptr)->pEnd); + } + }; + + void dxt_hc::create_color_selector_codebook() + { + uint num_tasks = m_pTask_pool->get_num_threads() + 1; + crnlib::vector selectors(m_has_subblocks ? m_num_blocks >> 1 : m_num_blocks); + for (uint i = 0, b = 0, step = m_has_subblocks ? 2 : 1; b < m_num_blocks; b += step) + { + selectors[i++] = m_block_selectors[cColor][b] + (m_has_subblocks ? m_block_selectors[cColor][b + 1] : 0); + } + + crnlib::vector nodes; + SelectorNode node(0, selectors.get_ptr()); + for (uint i = 0; i < num_tasks; i++) + { + node.p = node.pEnd; + node.pEnd = selectors.get_ptr() + selectors.size() * (i + 1) / num_tasks; + if (node.p != node.pEnd) + { + nodes.push_back(node); + } + } + + for (uint i = 0; i < nodes.size(); i++) + { + m_pTask_pool->queue_task(&SelectorNode::sort_task, i, &nodes[i]); + } + m_pTask_pool->join(); + + std::priority_queue queue; + for (uint i = 0; i < nodes.size(); i++) + { + queue.push(nodes[i]); + } + + float v[4]; + for (uint s = 0; s < 4; s++) + { + v[s] = (s + 0.5f) * 0.25f; + } + + crnlib::vector vectors; + crnlib::vector weights; + vectors.reserve(selectors.size()); + weights.reserve(selectors.size()); + for (uint64 prev_selector = 0; queue.size();) + { + SelectorNode node = queue.top(); + uint64 selector = *node.p++; + queue.pop(); + if (node.p != node.pEnd) + { + queue.push(node); + } + uint weight = (uint)selector; + selector >>= 32; + if (!vectors.size() || selector != prev_selector) + { + prev_selector = selector; + vec16F vector; + for (uint p = 0; p < 16; p++, selector >>= 2) + { + vector[15 - p] = v[selector & 3]; + } + vectors.push_back(vector); + weights.push_back(weight); + } + else if (weights.back() > UINT_MAX - weight) + { + weights.back() = UINT_MAX; + } + else + { + weights.back() += weight; + } + } + + tree_clusterizer selector_vq; + selector_vq.generate_codebook(vectors.get_ptr(), weights.get_ptr(), vectors.size(), m_params.m_color_selector_codebook_size, false, m_pTask_pool); + m_color_selectors.resize(selector_vq.get_codebook_size()); + m_color_selectors_used.resize(selector_vq.get_codebook_size()); + for (uint i = 0; i < selector_vq.get_codebook_size(); i++) + { + const vec16F& v = selector_vq.get_codebook_entry(i); + m_color_selectors[i] = 0; + for (uint sh = 0, j = 0; j < 16; j++, sh += 2) + { + m_color_selectors[i] |= (uint)(v[j] * 4.0f) << sh; + } + } + + crnlib::vector> selector_details(num_tasks); + for (uint t = 0; t < num_tasks; t++) + { + selector_details[t].resize(m_color_selectors.size()); + m_pTask_pool->queue_object_task(this, &dxt_hc::create_color_selector_codebook_task, t, &selector_details[t]); + } + m_pTask_pool->join(); + + for (uint t = 1; t < num_tasks; t++) + { + for (uint i = 0; i < m_color_selectors.size(); i++) + { + for (uint8 p = 0; p < 16; p++) + { + for (uint8 s = 0; s < 4; s++) + { + selector_details[0][i].error[p][s] += selector_details[t][i].error[p][s]; + } + } + selector_details[0][i].used = selector_details[0][i].used || selector_details[t][i].used; + } + } + + for (uint i = 0; i < m_color_selectors.size(); i++) + { + m_color_selectors_used[i] = selector_details[0][i].used; + uint(&errors)[16][4] = selector_details[0][i].error; + m_color_selectors[i] = 0; + for (uint sh = 0, p = 0; p < 16; p++, sh += 2) + { + uint* e = errors[p]; + uint8 s03 = e[3] < e[0] ? 3 : 0; + uint8 s12 = e[2] < e[1] ? 2 : 1; + m_color_selectors[i] |= (e[s12] < e[s03] ? s12 : s03) << sh; + } + } } - } - - for (uint i = 0; i < m_color_selectors.size(); i++) { - m_color_selectors_used[i] = selector_details[0][i].used; - uint (&errors)[16][4] = selector_details[0][i].error; - m_color_selectors[i] = 0; - for (uint sh = 0, p = 0; p < 16; p++, sh += 2) { - uint* e = errors[p]; - uint8 s03 = e[3] < e[0] ? 3 : 0; - uint8 s12 = e[2] < e[1] ? 2 : 1; - m_color_selectors[i] |= (e[s12] < e[s03] ? s12 : s03) << sh; + + struct alpha_selector_details + { + alpha_selector_details() + { + utils::zero_object(*this); + } + uint error[16][8]; + bool used; + }; + + void dxt_hc::create_alpha_selector_codebook_task(uint64 data, void* pData_ptr) + { + crnlib::vector& selector_details = *static_cast*>(pData_ptr); + uint num_tasks = m_pTask_pool->get_num_threads() + 1; + uint E3[16][8]; + uint E6[8][64]; + for (uint n = m_has_subblocks ? m_num_blocks >> 1 : m_num_blocks, b = n * data / num_tasks, bEnd = n * (data + 1) / num_tasks; b < bEnd; b++) + { + for (uint c = cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) + { + const uint alpha_pixel_comp = m_params.m_alpha_component_indices[c - cAlpha0]; + alpha_cluster& cluster = m_alpha_clusters[m_endpoint_indices[m_has_subblocks ? b << 1 : b].component[c]]; + uint* block_values = cluster.alpha_values; + for (uint p = 0; p < 16; p++) + { + for (uint s = 0; s < 8; s++) + { + int delta = m_blocks[b][p][alpha_pixel_comp] - block_values[s]; + E3[p][s] = delta * delta; + } + } + for (uint p = 0; p < 8; p++) + { + for (uint s = 0; s < 64; s++) + { + E6[p][s] = E3[p << 1][s & 7] + E3[p << 1 | 1][s >> 3]; + } + } + uint best_index = 0; + for (uint best_error = cUINT32_MAX, s = 0; s < m_alpha_selectors.size(); s++) + { + uint64 selector = m_alpha_selectors[s]; + uint error = E6[0][selector & 63]; + error += E6[1][selector >> 6 & 63]; + error += E6[2][selector >> 12 & 63]; + error += E6[3][selector >> 18 & 63]; + error += E6[4][selector >> 24 & 63]; + error += E6[5][selector >> 30 & 63]; + error += E6[6][selector >> 36 & 63]; + error += E6[7][selector >> 42 & 63]; + if (error < best_error) + { + best_error = error; + best_index = s; + } + } + if (cluster.refined_alpha) + { + block_values = cluster.refined_alpha_values; + for (uint p = 0; p < 16; p++) + { + for (uint s = 0; s < 8; s++) + { + int delta = m_blocks[b][p][alpha_pixel_comp] - block_values[s]; + E3[p][s] = delta * delta; + } + } + } + uint(&total_errors)[16][8] = selector_details[best_index].error; + for (uint p = 0; p < 16; p++) + { + for (uint s = 0; s < 8; s++) + { + total_errors[p][s] += E3[p][s]; + } + } + selector_details[best_index].used = true; + m_selector_indices[m_has_subblocks ? b << 1 : b].component[c] = best_index; + } + } } - } -} - -struct alpha_selector_details { - alpha_selector_details() { utils::zero_object(*this); } - uint error[16][8]; - bool used; -}; - -void dxt_hc::create_alpha_selector_codebook_task(uint64 data, void* pData_ptr) { - crnlib::vector& selector_details = *static_cast*>(pData_ptr); - uint num_tasks = m_pTask_pool->get_num_threads() + 1; - uint E3[16][8]; - uint E6[8][64]; - for (uint n = m_has_subblocks ? m_num_blocks >> 1 : m_num_blocks, b = n * data / num_tasks, bEnd = n * (data + 1) / num_tasks; b < bEnd; b++) { - for (uint c = cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) { - const uint alpha_pixel_comp = m_params.m_alpha_component_indices[c - cAlpha0]; - alpha_cluster& cluster = m_alpha_clusters[m_endpoint_indices[m_has_subblocks ? b << 1 : b].component[c]]; - uint* block_values = cluster.alpha_values; - for (uint p = 0; p < 16; p++) { - for (uint s = 0; s < 8; s++) { - int delta = m_blocks[b][p][alpha_pixel_comp] - block_values[s]; - E3[p][s] = delta * delta; - } - } - for (uint p = 0; p < 8; p++) { - for (uint s = 0; s < 64; s++) - E6[p][s] = E3[p << 1][s & 7] + E3[p << 1 | 1][s >> 3]; - } - uint best_index = 0; - for (uint best_error = cUINT32_MAX, s = 0; s < m_alpha_selectors.size(); s++) { - uint64 selector = m_alpha_selectors[s]; - uint error = E6[0][selector & 63]; - error += E6[1][selector >> 6 & 63]; - error += E6[2][selector >> 12 & 63]; - error += E6[3][selector >> 18 & 63]; - error += E6[4][selector >> 24 & 63]; - error += E6[5][selector >> 30 & 63]; - error += E6[6][selector >> 36 & 63]; - error += E6[7][selector >> 42 & 63]; - if (error < best_error) { - best_error = error; - best_index = s; - } - } - if (cluster.refined_alpha) { - block_values = cluster.refined_alpha_values; - for (uint p = 0; p < 16; p++) { - for (uint s = 0; s < 8; s++) { - int delta = m_blocks[b][p][alpha_pixel_comp] - block_values[s]; - E3[p][s] = delta * delta; - } - } - } - uint (&total_errors)[16][8] = selector_details[best_index].error; - for (uint p = 0; p < 16; p++) { + + void dxt_hc::create_alpha_selector_codebook() + { + uint num_tasks = m_pTask_pool->get_num_threads() + 1; + crnlib::vector selectors(m_num_alpha_blocks * (m_has_subblocks ? m_num_blocks >> 1 : m_num_blocks)); + for (uint i = 0, c = cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) + { + for (uint b = 0, step = m_has_subblocks ? 2 : 1; b < m_num_blocks; b += step) + { + selectors[i++] = m_block_selectors[c][b]; + } + } + + crnlib::vector nodes; + SelectorNode node(0, selectors.get_ptr()); + for (uint i = 0; i < num_tasks; i++) + { + node.p = node.pEnd; + node.pEnd = selectors.get_ptr() + selectors.size() * (i + 1) / num_tasks; + if (node.p != node.pEnd) + { + nodes.push_back(node); + } + } + + for (uint i = 0; i < nodes.size(); i++) + { + m_pTask_pool->queue_task(&SelectorNode::sort_task, i, &nodes[i]); + } + m_pTask_pool->join(); + + std::priority_queue queue; + for (uint i = 0; i < nodes.size(); i++) + { + queue.push(nodes[i]); + } + + float v[8]; for (uint s = 0; s < 8; s++) - total_errors[p][s] += E3[p][s]; - } - selector_details[best_index].used = true; - m_selector_indices[m_has_subblocks ? b << 1 : b].component[c] = best_index; - } - } -} - -void dxt_hc::create_alpha_selector_codebook() { - uint num_tasks = m_pTask_pool->get_num_threads() + 1; - crnlib::vector selectors(m_num_alpha_blocks * (m_has_subblocks ? m_num_blocks >> 1 : m_num_blocks)); - for (uint i = 0, c = cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) { - for (uint b = 0, step = m_has_subblocks ? 2 : 1; b < m_num_blocks; b += step) - selectors[i++] = m_block_selectors[c][b]; - } - - crnlib::vector nodes; - SelectorNode node(0, selectors.get_ptr()); - for (uint i = 0; i < num_tasks; i++) { - node.p = node.pEnd; - node.pEnd = selectors.get_ptr() + selectors.size() * (i + 1) / num_tasks; - if (node.p != node.pEnd) - nodes.push_back(node); - } - - for (uint i = 0; i < nodes.size(); i++) - m_pTask_pool->queue_task(&SelectorNode::sort_task, i, &nodes[i]); - m_pTask_pool->join(); - - std::priority_queue queue; - for (uint i = 0; i < nodes.size(); i++) - queue.push(nodes[i]); - - float v[8]; - for (uint s = 0; s < 8; s++) - v[s] = (s + 0.5f) * 0.125f; - - crnlib::vector vectors; - crnlib::vector weights; - vectors.reserve(selectors.size()); - weights.reserve(selectors.size()); - for (uint64 prev_selector = 0; queue.size();) { - SelectorNode node = queue.top(); - uint64 selector = *node.p++; - queue.pop(); - if (node.p != node.pEnd) - queue.push(node); - uint weight = (uint16)selector; - selector >>= 16; - if (!vectors.size() || selector != prev_selector) { - prev_selector = selector; - vec16F vector; - for (uint p = 0; p < 16; p++, selector >>= 3) - vector[15 - p] = v[selector & 7]; - vectors.push_back(vector); - weights.push_back(weight); - } else if (weights.back() > UINT_MAX - weight) { - weights.back() = UINT_MAX; - } else { - weights.back() += weight; - } - } - - tree_clusterizer selector_vq; - selector_vq.generate_codebook(vectors.get_ptr(), weights.get_ptr(), vectors.size(), m_params.m_alpha_selector_codebook_size, false, m_pTask_pool); - m_alpha_selectors.resize(selector_vq.get_codebook_size()); - m_alpha_selectors_used.resize(selector_vq.get_codebook_size()); - for (uint i = 0; i < selector_vq.get_codebook_size(); i++) { - const vec16F& v = selector_vq.get_codebook_entry(i); - m_alpha_selectors[i] = 0; - for (uint sh = 0, j = 0; j < 16; j++, sh += 3) - m_alpha_selectors[i] |= (uint64)(v[j] * 8.0f) << sh; - } - - crnlib::vector > selector_details(num_tasks); - for (uint t = 0; t < num_tasks; t++) { - selector_details[t].resize(m_alpha_selectors.size()); - m_pTask_pool->queue_object_task(this, &dxt_hc::create_alpha_selector_codebook_task, t, &selector_details[t]); - } - m_pTask_pool->join(); - - for (uint t = 1; t < num_tasks; t++) { - for (uint i = 0; i < m_alpha_selectors.size(); i++) { - for (uint8 p = 0; p < 16; p++) { - for (uint8 s = 0; s < 8; s++) - selector_details[0][i].error[p][s] += selector_details[t][i].error[p][s]; - } - selector_details[0][i].used = selector_details[0][i].used || selector_details[t][i].used; - } - } - - for (uint i = 0; i < m_alpha_selectors.size(); i++) { - m_alpha_selectors_used[i] = selector_details[0][i].used; - uint (&errors)[16][8] = selector_details[0][i].error; - m_alpha_selectors[i] = 0; - for (uint sh = 0, p = 0; p < 16; p++, sh += 3) { - uint* e = errors[p]; - uint8 s07 = e[7] < e[0] ? 7 : 0; - uint8 s12 = e[2] < e[1] ? 2 : 1; - uint8 s34 = e[4] < e[3] ? 4 : 3; - uint8 s56 = e[6] < e[5] ? 6 : 5; - uint8 s02 = e[s12] < e[s07] ? s12 : s07; - uint8 s36 = e[s56] < e[s34] ? s56 : s34; - m_alpha_selectors[i] |= (uint64)(e[s36] < e[s02] ? s36 : s02) << sh; + { + v[s] = (s + 0.5f) * 0.125f; + } + + crnlib::vector vectors; + crnlib::vector weights; + vectors.reserve(selectors.size()); + weights.reserve(selectors.size()); + for (uint64 prev_selector = 0; queue.size();) + { + SelectorNode node = queue.top(); + uint64 selector = *node.p++; + queue.pop(); + if (node.p != node.pEnd) + { + queue.push(node); + } + uint weight = (uint16)selector; + selector >>= 16; + if (!vectors.size() || selector != prev_selector) + { + prev_selector = selector; + vec16F vector; + for (uint p = 0; p < 16; p++, selector >>= 3) + { + vector[15 - p] = v[selector & 7]; + } + vectors.push_back(vector); + weights.push_back(weight); + } + else if (weights.back() > UINT_MAX - weight) + { + weights.back() = UINT_MAX; + } + else + { + weights.back() += weight; + } + } + + tree_clusterizer selector_vq; + selector_vq.generate_codebook(vectors.get_ptr(), weights.get_ptr(), vectors.size(), m_params.m_alpha_selector_codebook_size, false, m_pTask_pool); + m_alpha_selectors.resize(selector_vq.get_codebook_size()); + m_alpha_selectors_used.resize(selector_vq.get_codebook_size()); + for (uint i = 0; i < selector_vq.get_codebook_size(); i++) + { + const vec16F& v = selector_vq.get_codebook_entry(i); + m_alpha_selectors[i] = 0; + for (uint sh = 0, j = 0; j < 16; j++, sh += 3) + { + m_alpha_selectors[i] |= (uint64)(v[j] * 8.0f) << sh; + } + } + + crnlib::vector> selector_details(num_tasks); + for (uint t = 0; t < num_tasks; t++) + { + selector_details[t].resize(m_alpha_selectors.size()); + m_pTask_pool->queue_object_task(this, &dxt_hc::create_alpha_selector_codebook_task, t, &selector_details[t]); + } + m_pTask_pool->join(); + + for (uint t = 1; t < num_tasks; t++) + { + for (uint i = 0; i < m_alpha_selectors.size(); i++) + { + for (uint8 p = 0; p < 16; p++) + { + for (uint8 s = 0; s < 8; s++) + { + selector_details[0][i].error[p][s] += selector_details[t][i].error[p][s]; + } + } + selector_details[0][i].used = selector_details[0][i].used || selector_details[t][i].used; + } + } + + for (uint i = 0; i < m_alpha_selectors.size(); i++) + { + m_alpha_selectors_used[i] = selector_details[0][i].used; + uint(&errors)[16][8] = selector_details[0][i].error; + m_alpha_selectors[i] = 0; + for (uint sh = 0, p = 0; p < 16; p++, sh += 3) + { + uint* e = errors[p]; + uint8 s07 = e[7] < e[0] ? 7 : 0; + uint8 s12 = e[2] < e[1] ? 2 : 1; + uint8 s34 = e[4] < e[3] ? 4 : 3; + uint8 s56 = e[6] < e[5] ? 6 : 5; + uint8 s02 = e[s12] < e[s07] ? s12 : s07; + uint8 s36 = e[s56] < e[s34] ? s56 : s34; + m_alpha_selectors[i] |= (uint64)(e[s36] < e[s02] ? s36 : s02) << sh; + } + } } - } -} -bool dxt_hc::update_progress(uint phase_index, uint subphase_index, uint subphase_total) { - CRNLIB_ASSERT(crn_get_current_thread_id() == m_main_thread_id); + bool dxt_hc::update_progress(uint phase_index, uint subphase_index, uint subphase_total) + { + CRNLIB_ASSERT(crn_get_current_thread_id() == m_main_thread_id); - if (!m_params.m_pProgress_func) - return true; + if (!m_params.m_pProgress_func) + { + return true; + } - const int percentage_complete = (subphase_total > 1) ? ((100 * subphase_index) / (subphase_total - 1)) : 100; - if (((int)phase_index == m_prev_phase_index) && (m_prev_percentage_complete == percentage_complete)) - return !m_canceled; + const int percentage_complete = (subphase_total > 1) ? ((100 * subphase_index) / (subphase_total - 1)) : 100; + if (((int)phase_index == m_prev_phase_index) && (m_prev_percentage_complete == percentage_complete)) + { + return !m_canceled; + } - m_prev_percentage_complete = percentage_complete; + m_prev_percentage_complete = percentage_complete; - bool status = (*m_params.m_pProgress_func)(phase_index, cTotalCompressionPhases, subphase_index, subphase_total, m_params.m_pProgress_func_data) != 0; - if (!status) { - m_canceled = true; - return false; - } + bool status = (*m_params.m_pProgress_func)(phase_index, cTotalCompressionPhases, subphase_index, subphase_total, m_params.m_pProgress_func_data) != 0; + if (!status) + { + m_canceled = true; + return false; + } - return true; -} + return true; + } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_dxt_hc.h b/crnlib/crn_dxt_hc.h index 7e31217..30bb21e 100644 --- a/crnlib/crn_dxt_hc.h +++ b/crnlib/crn_dxt_hc.h @@ -1,6 +1,28 @@ -// File: crn_dxt_hc.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once + #include "crn_dxt1.h" #include "crn_dxt5a.h" #include "crn_dxt_endpoint_refiner.h" @@ -26,8 +48,10 @@ namespace crnlib struct endpoint_indices_details { - union { - struct { + union + { + struct + { uint16 color; uint16 alpha0; uint16 alpha1; @@ -43,8 +67,10 @@ namespace crnlib struct selector_indices_details { - union { - struct { + union + { + struct + { uint16 color; uint16 alpha0; uint16 alpha1; @@ -71,7 +97,7 @@ namespace crnlib float m_alpha_derating[8]; float m_uint8_to_float[256]; - color_quad_u8(*m_blocks)[16]; + color_quad_u8 (*m_blocks)[16]; uint m_num_blocks; crnlib::vector m_block_weights; crnlib::vector m_block_encodings; @@ -86,7 +112,7 @@ namespace crnlib struct params { - params(): + params() : m_num_blocks(0), m_num_levels(0), m_num_faces(0), @@ -118,7 +144,8 @@ namespace crnlib uint m_num_levels; uint m_num_faces; - struct { + struct + { uint m_first_block; uint m_num_blocks; uint m_block_width; @@ -147,15 +174,14 @@ namespace crnlib void clear(); bool compress( - color_quad_u8(*blocks)[16], + color_quad_u8 (*blocks)[16], crnlib::vector& endpoint_indices, crnlib::vector& selector_indices, crnlib::vector& color_endpoints, crnlib::vector& alpha_endpoints, crnlib::vector& color_selectors, crnlib::vector& alpha_selectors, - const params& p - ); + const params& p); private: params m_params; @@ -165,7 +191,8 @@ namespace crnlib bool m_has_etc_color_blocks; bool m_has_subblocks; - enum { + enum + { cColor = 0, cAlpha0 = 1, cAlpha1 = 2, @@ -190,7 +217,7 @@ namespace crnlib struct alpha_cluster { - alpha_cluster(): + alpha_cluster() : first_endpoint(0), second_endpoint(0) { @@ -236,4 +263,4 @@ namespace crnlib bool update_progress(uint phase_index, uint subphase_index, uint subphase_total); }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_dxt_hc_common.cpp b/crnlib/crn_dxt_hc_common.cpp index e7f19bc..297765c 100644 --- a/crnlib/crn_dxt_hc_common.cpp +++ b/crnlib/crn_dxt_hc_common.cpp @@ -1,44 +1,62 @@ -// File: crn_dxt_hc_common.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "crn_dxt_hc_common.h" namespace crnlib { - chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings] = - { - {1, {{0, 0, 8, 8, 0}}}, + chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings] = { + { 1, { { 0, 0, 8, 8, 0 } } }, - {2, {{0, 0, 8, 4, 1}, {0, 4, 8, 4, 2}}}, - {2, {{0, 0, 4, 8, 3}, {4, 0, 4, 8, 4}}}, + { 2, { { 0, 0, 8, 4, 1 }, { 0, 4, 8, 4, 2 } } }, + { 2, { { 0, 0, 4, 8, 3 }, { 4, 0, 4, 8, 4 } } }, - {3, {{0, 0, 8, 4, 1}, {0, 4, 4, 4, 7}, {4, 4, 4, 4, 8}}}, - {3, {{0, 4, 8, 4, 2}, {0, 0, 4, 4, 5}, {4, 0, 4, 4, 6}}}, + { 3, { { 0, 0, 8, 4, 1 }, { 0, 4, 4, 4, 7 }, { 4, 4, 4, 4, 8 } } }, + { 3, { { 0, 4, 8, 4, 2 }, { 0, 0, 4, 4, 5 }, { 4, 0, 4, 4, 6 } } }, - {3, {{0, 0, 4, 8, 3}, {4, 0, 4, 4, 6}, {4, 4, 4, 4, 8}}}, - {3, {{4, 0, 4, 8, 4}, {0, 0, 4, 4, 5}, {0, 4, 4, 4, 7}}}, + { 3, { { 0, 0, 4, 8, 3 }, { 4, 0, 4, 4, 6 }, { 4, 4, 4, 4, 8 } } }, + { 3, { { 4, 0, 4, 8, 4 }, { 0, 0, 4, 4, 5 }, { 0, 4, 4, 4, 7 } } }, - {4, {{0, 0, 4, 4, 5}, {4, 0, 4, 4, 6}, {0, 4, 4, 4, 7}, {4, 4, 4, 4, 8}}} + { 4, { { 0, 0, 4, 4, 5 }, { 4, 0, 4, 4, 6 }, { 0, 4, 4, 4, 7 }, { 4, 4, 4, 4, 8 } } } }; - chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts] = - { + chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts] = { // 2x2 - {0, 0, 8, 8, 0}, + { 0, 0, 8, 8, 0 }, // 2x1 - {0, 0, 8, 4, 1}, - {0, 4, 8, 4, 2}, + { 0, 0, 8, 4, 1 }, + { 0, 4, 8, 4, 2 }, // 1x2 - {0, 0, 4, 8, 3}, - {4, 0, 4, 8, 4}, + { 0, 0, 4, 8, 3 }, + { 4, 0, 4, 8, 4 }, // 1x1 - {0, 0, 4, 4, 5}, - {4, 0, 4, 4, 6}, - {0, 4, 4, 4, 7}, - {4, 4, 4, 4, 8} + { 0, 0, 4, 4, 5 }, + { 4, 0, 4, 4, 6 }, + { 0, 4, 4, 4, 7 }, + { 4, 4, 4, 4, 8 } }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_dxt_hc_common.h b/crnlib/crn_dxt_hc_common.h index 6e46ecd..07df9ec 100644 --- a/crnlib/crn_dxt_hc_common.h +++ b/crnlib/crn_dxt_hc_common.h @@ -1,5 +1,25 @@ -// File: crn_dxt_hc_common.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -42,4 +62,4 @@ namespace crnlib const uint cNumChunkTileLayouts = 9; const uint cFirst4x4ChunkTileLayout = 5; CRN_EXPORT extern chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts]; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_dxt_image.cpp b/crnlib/crn_dxt_image.cpp index afc6534..25c3763 100644 --- a/crnlib/crn_dxt_image.cpp +++ b/crnlib/crn_dxt_image.cpp @@ -1,5 +1,26 @@ -// File: crn_dxt_image.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_dxt_image.h" #if CRNLIB_SUPPORT_SQUISH @@ -23,1613 +44,1977 @@ #include "crn_etc.h" #define CRNLIB_USE_RG_ETC1 1 -namespace crnlib { -dxt_image::dxt_image() - : m_pElements(nullptr), - m_width(0), - m_height(0), - m_blocks_x(0), - m_blocks_y(0), - m_total_blocks(0), - m_total_elements(0), - m_num_elements_per_block(0), - m_bytes_per_block(0), - m_format(cDXTInvalid) { - utils::zero_object(m_element_type); - utils::zero_object(m_element_component_index); -} - -dxt_image::dxt_image(const dxt_image& other) - : m_pElements(nullptr) { - *this = other; -} - -dxt_image& dxt_image::operator=(const dxt_image& rhs) { - if (this == &rhs) - return *this; - - clear(); - - m_width = rhs.m_width; - m_height = rhs.m_height; - m_blocks_x = rhs.m_blocks_x; - m_blocks_y = rhs.m_blocks_y; - m_num_elements_per_block = rhs.m_num_elements_per_block; - m_bytes_per_block = rhs.m_bytes_per_block; - m_format = rhs.m_format; - m_total_blocks = rhs.m_total_blocks; - m_total_elements = rhs.m_total_elements; - m_pElements = nullptr; - memcpy(m_element_type, rhs.m_element_type, sizeof(m_element_type)); - memcpy(m_element_component_index, rhs.m_element_component_index, sizeof(m_element_component_index)); - - if (rhs.m_pElements) { - m_elements.resize(m_total_elements); - memcpy(&m_elements[0], rhs.m_pElements, sizeof(element) * m_total_elements); - m_pElements = &m_elements[0]; - } - - return *this; -} - -void dxt_image::clear() { - m_elements.clear(); - m_width = 0; - m_height = 0; - m_blocks_x = 0; - m_blocks_y = 0; - m_num_elements_per_block = 0; - m_bytes_per_block = 0; - m_format = cDXTInvalid; - utils::zero_object(m_element_type); - utils::zero_object(m_element_component_index); - m_total_blocks = 0; - m_total_elements = 0; - m_pElements = nullptr; -} - -bool dxt_image::init_internal(dxt_format fmt, uint width, uint height) { - CRNLIB_ASSERT((fmt != cDXTInvalid) && (width > 0) && (height > 0)); - - clear(); - - m_width = width; - m_height = height; - - m_blocks_x = (m_width + 3) >> cDXTBlockShift; - m_blocks_y = (m_height + 3) >> cDXTBlockShift; - - m_num_elements_per_block = 2; - if ((fmt == cDXT1) || (fmt == cDXT1A) || (fmt == cDXT5A) || (fmt == cETC1) || (fmt == cETC2) || (fmt == cETC1S)) - m_num_elements_per_block = 1; - - m_total_blocks = m_blocks_x * m_blocks_y; - m_total_elements = m_total_blocks * m_num_elements_per_block; - - CRNLIB_ASSUME((uint)cDXT1BytesPerBlock == (uint)cETC1BytesPerBlock); - m_bytes_per_block = cDXT1BytesPerBlock * m_num_elements_per_block; - - m_format = fmt; - - switch (m_format) { - case cDXT1: - case cDXT1A: { - m_element_type[0] = cColorDXT1; - m_element_component_index[0] = -1; - break; - } - case cDXT3: { - m_element_type[0] = cAlphaDXT3; - m_element_type[1] = cColorDXT1; - m_element_component_index[0] = 3; - m_element_component_index[1] = -1; - break; - } - case cDXT5: { - m_element_type[0] = cAlphaDXT5; - m_element_type[1] = cColorDXT1; - m_element_component_index[0] = 3; - m_element_component_index[1] = -1; - break; - } - case cDXT5A: { - m_element_type[0] = cAlphaDXT5; - m_element_component_index[0] = 3; - break; - } - case cDXN_XY: { - m_element_type[0] = cAlphaDXT5; - m_element_type[1] = cAlphaDXT5; - m_element_component_index[0] = 0; - m_element_component_index[1] = 1; - break; +namespace crnlib +{ + dxt_image::dxt_image() : + m_pElements(nullptr), + m_width(0), + m_height(0), + m_blocks_x(0), + m_blocks_y(0), + m_total_blocks(0), + m_total_elements(0), + m_num_elements_per_block(0), + m_bytes_per_block(0), + m_format(cDXTInvalid) + { + utils::zero_object(m_element_type); + utils::zero_object(m_element_component_index); } - case cDXN_YX: { - m_element_type[0] = cAlphaDXT5; - m_element_type[1] = cAlphaDXT5; - m_element_component_index[0] = 1; - m_element_component_index[1] = 0; - break; - } - case cETC1: - case cETC1S: { - m_element_type[0] = cColorETC1; - m_element_component_index[0] = -1; - break; - } - case cETC2: { - m_element_type[0] = cColorETC2; - m_element_component_index[0] = -1; - break; + + dxt_image::dxt_image(const dxt_image& other) : + m_pElements(nullptr) + { + *this = other; } - case cETC2A: - case cETC2AS: { - m_element_type[0] = cAlphaETC2; - m_element_type[1] = cColorETC2; - m_element_component_index[0] = 3; - m_element_component_index[1] = -1; - break; + + dxt_image& dxt_image::operator=(const dxt_image& rhs) + { + if (this == &rhs) + { + return *this; + } + + clear(); + + m_width = rhs.m_width; + m_height = rhs.m_height; + m_blocks_x = rhs.m_blocks_x; + m_blocks_y = rhs.m_blocks_y; + m_num_elements_per_block = rhs.m_num_elements_per_block; + m_bytes_per_block = rhs.m_bytes_per_block; + m_format = rhs.m_format; + m_total_blocks = rhs.m_total_blocks; + m_total_elements = rhs.m_total_elements; + m_pElements = nullptr; + memcpy(m_element_type, rhs.m_element_type, sizeof(m_element_type)); + memcpy(m_element_component_index, rhs.m_element_component_index, sizeof(m_element_component_index)); + + if (rhs.m_pElements) + { + m_elements.resize(m_total_elements); + memcpy(&m_elements[0], rhs.m_pElements, sizeof(element) * m_total_elements); + m_pElements = &m_elements[0]; + } + + return *this; } - default: { - CRNLIB_ASSERT(0); - clear(); - return false; + + void dxt_image::clear() + { + m_elements.clear(); + m_width = 0; + m_height = 0; + m_blocks_x = 0; + m_blocks_y = 0; + m_num_elements_per_block = 0; + m_bytes_per_block = 0; + m_format = cDXTInvalid; + utils::zero_object(m_element_type); + utils::zero_object(m_element_component_index); + m_total_blocks = 0; + m_total_elements = 0; + m_pElements = nullptr; } - } - return true; -} + bool dxt_image::init_internal(dxt_format fmt, uint width, uint height) + { + CRNLIB_ASSERT((fmt != cDXTInvalid) && (width > 0) && (height > 0)); -bool dxt_image::init(dxt_format fmt, uint width, uint height, bool clear_elements) { - if (!init_internal(fmt, width, height)) - return false; + clear(); - m_elements.resize(m_total_elements); - m_pElements = &m_elements[0]; + m_width = width; + m_height = height; - if (clear_elements) - memset(m_pElements, 0, sizeof(element) * m_total_elements); + m_blocks_x = (m_width + 3) >> cDXTBlockShift; + m_blocks_y = (m_height + 3) >> cDXTBlockShift; - return true; -} + m_num_elements_per_block = 2; + if ((fmt == cDXT1) || (fmt == cDXT1A) || (fmt == cDXT5A) || (fmt == cETC1) || (fmt == cETC2) || (fmt == cETC1S)) + { + m_num_elements_per_block = 1; + } -bool dxt_image::init(dxt_format fmt, uint width, uint height, uint num_elements, element* pElements, bool create_copy) { - CRNLIB_ASSERT(num_elements && pElements); + m_total_blocks = m_blocks_x * m_blocks_y; + m_total_elements = m_total_blocks * m_num_elements_per_block; - if (!init_internal(fmt, width, height)) - return false; + CRNLIB_ASSUME((uint)cDXT1BytesPerBlock == (uint)cETC1BytesPerBlock); + m_bytes_per_block = cDXT1BytesPerBlock * m_num_elements_per_block; - if (num_elements != m_total_elements) { - clear(); - return false; - } + m_format = fmt; - if (create_copy) { - m_elements.resize(m_total_elements); - m_pElements = &m_elements[0]; + switch (m_format) + { + case cDXT1: + case cDXT1A: + { + m_element_type[0] = cColorDXT1; + m_element_component_index[0] = -1; + break; + } + case cDXT3: + { + m_element_type[0] = cAlphaDXT3; + m_element_type[1] = cColorDXT1; + m_element_component_index[0] = 3; + m_element_component_index[1] = -1; + break; + } + case cDXT5: + { + m_element_type[0] = cAlphaDXT5; + m_element_type[1] = cColorDXT1; + m_element_component_index[0] = 3; + m_element_component_index[1] = -1; + break; + } + case cDXT5A: + { + m_element_type[0] = cAlphaDXT5; + m_element_component_index[0] = 3; + break; + } + case cDXN_XY: + { + m_element_type[0] = cAlphaDXT5; + m_element_type[1] = cAlphaDXT5; + m_element_component_index[0] = 0; + m_element_component_index[1] = 1; + break; + } + case cDXN_YX: + { + m_element_type[0] = cAlphaDXT5; + m_element_type[1] = cAlphaDXT5; + m_element_component_index[0] = 1; + m_element_component_index[1] = 0; + break; + } + case cETC1: + case cETC1S: + { + m_element_type[0] = cColorETC1; + m_element_component_index[0] = -1; + break; + } + case cETC2: + { + m_element_type[0] = cColorETC2; + m_element_component_index[0] = -1; + break; + } + case cETC2A: + case cETC2AS: + { + m_element_type[0] = cAlphaETC2; + m_element_type[1] = cColorETC2; + m_element_component_index[0] = 3; + m_element_component_index[1] = -1; + break; + } + default: + { + CRNLIB_ASSERT(0); + clear(); + return false; + } + } - memcpy(m_pElements, pElements, m_total_elements * sizeof(element)); - } else - m_pElements = pElements; + return true; + } - return true; -} + bool dxt_image::init(dxt_format fmt, uint width, uint height, bool clear_elements) + { + if (!init_internal(fmt, width, height)) + { + return false; + } -struct init_task_params { - dxt_format m_fmt; - const image_u8* m_pImg; - const dxt_image::pack_params* m_pParams; - crn_thread_id_t m_main_thread; - atomic32_t m_canceled; -}; + m_elements.resize(m_total_elements); + m_pElements = &m_elements[0]; -void dxt_image::init_task(uint64 data, void* pData_ptr) { - const uint thread_index = static_cast(data); - init_task_params* pInit_params = static_cast(pData_ptr); + if (clear_elements) + { + memset(m_pElements, 0, sizeof(element) * m_total_elements); + } - const image_u8& img = *pInit_params->m_pImg; - const pack_params& p = *pInit_params->m_pParams; - const bool is_main_thread = (crn_get_current_thread_id() == pInit_params->m_main_thread); + return true; + } - uint block_index = 0; + bool dxt_image::init(dxt_format fmt, uint width, uint height, uint num_elements, element* pElements, bool create_copy) + { + CRNLIB_ASSERT(num_elements && pElements); - set_block_pixels_context optimizer_context; - int prev_progress_percentage = -1; + if (!init_internal(fmt, width, height)) + { + return false; + } - for (uint block_y = 0; block_y < m_blocks_y; block_y++) { - const uint pixel_ofs_y = block_y * cDXTBlockSize; + if (num_elements != m_total_elements) + { + clear(); + return false; + } - for (uint block_x = 0; block_x < m_blocks_x; block_x++, block_index++) { - if (pInit_params->m_canceled) - return; + if (create_copy) + { + m_elements.resize(m_total_elements); + m_pElements = &m_elements[0]; - if (p.m_pProgress_callback && is_main_thread && ((block_index & 63) == 63)) { - const uint progress_percentage = p.m_progress_start + ((block_index * p.m_progress_range + get_total_blocks() / 2) / get_total_blocks()); - if ((int)progress_percentage != prev_progress_percentage) { - prev_progress_percentage = progress_percentage; - if (!(p.m_pProgress_callback)(progress_percentage, p.m_pProgress_callback_user_data_ptr)) { - atomic_exchange32(&pInit_params->m_canceled, CRNLIB_TRUE); - return; - } + memcpy(m_pElements, pElements, m_total_elements * sizeof(element)); + } + else + { + m_pElements = pElements; } - } - if (p.m_num_helper_threads) { - if ((block_index % (p.m_num_helper_threads + 1)) != thread_index) - continue; - } + return true; + } - color_quad_u8 pixels[cDXTBlockSize * cDXTBlockSize]; + struct init_task_params + { + dxt_format m_fmt; + const image_u8* m_pImg; + const dxt_image::pack_params* m_pParams; + crn_thread_id_t m_main_thread; + atomic32_t m_canceled; + }; + + void dxt_image::init_task(uint64 data, void* pData_ptr) + { + const uint thread_index = static_cast(data); + init_task_params* pInit_params = static_cast(pData_ptr); + + const image_u8& img = *pInit_params->m_pImg; + const pack_params& p = *pInit_params->m_pParams; + const bool is_main_thread = (crn_get_current_thread_id() == pInit_params->m_main_thread); + + uint block_index = 0; + + set_block_pixels_context optimizer_context; + int prev_progress_percentage = -1; + + for (uint block_y = 0; block_y < m_blocks_y; block_y++) + { + const uint pixel_ofs_y = block_y * cDXTBlockSize; + + for (uint block_x = 0; block_x < m_blocks_x; block_x++, block_index++) + { + if (pInit_params->m_canceled) + { + return; + } + + if (p.m_pProgress_callback && is_main_thread && ((block_index & 63) == 63)) + { + const uint progress_percentage = p.m_progress_start + ((block_index * p.m_progress_range + get_total_blocks() / 2) / get_total_blocks()); + if ((int)progress_percentage != prev_progress_percentage) + { + prev_progress_percentage = progress_percentage; + if (!(p.m_pProgress_callback)(progress_percentage, p.m_pProgress_callback_user_data_ptr)) + { + atomic_exchange32(&pInit_params->m_canceled, CRNLIB_TRUE); + return; + } + } + } + + if (p.m_num_helper_threads) + { + if ((block_index % (p.m_num_helper_threads + 1)) != thread_index) + { + continue; + } + } + + color_quad_u8 pixels[cDXTBlockSize * cDXTBlockSize]; + + const uint pixel_ofs_x = block_x * cDXTBlockSize; + + for (uint y = 0; y < cDXTBlockSize; y++) + { + const uint iy = math::minimum(pixel_ofs_y + y, img.get_height() - 1); + + for (uint x = 0; x < cDXTBlockSize; x++) + { + const uint ix = math::minimum(pixel_ofs_x + x, img.get_width() - 1); + + pixels[x + y * cDXTBlockSize] = img(ix, iy); + } + } + + set_block_pixels(block_x, block_y, pixels, p, optimizer_context); + } + } + } - const uint pixel_ofs_x = block_x * cDXTBlockSize; +#if CRNLIB_SUPPORT_ATI_COMPRESS + bool dxt_image::init_ati_compress(dxt_format fmt, const image_u8& img, const pack_params& p) + { + image_u8 tmp_img(img); + for (uint y = 0; y < img.get_height(); y++) + { + for (uint x = 0; x < img.get_width(); x++) + { + color_quad_u8 c(img(x, y)); + std::swap(c.r, c.b); + tmp_img(x, y) = c; + } + } - for (uint y = 0; y < cDXTBlockSize; y++) { - const uint iy = math::minimum(pixel_ofs_y + y, img.get_height() - 1); + ATI_TC_Texture src_tex; + utils::zero_object(src_tex); + src_tex.dwSize = sizeof(ATI_TC_Texture); + src_tex.dwWidth = tmp_img.get_width(); + src_tex.dwHeight = tmp_img.get_height(); + src_tex.dwPitch = tmp_img.get_pitch_in_bytes(); + src_tex.format = ATI_TC_FORMAT_ARGB_8888; + src_tex.dwDataSize = src_tex.dwPitch * tmp_img.get_height(); + src_tex.pData = (ATI_TC_BYTE*)tmp_img.get_ptr(); + + ATI_TC_Texture dst_tex; + utils::zero_object(dst_tex); + dst_tex.dwSize = sizeof(ATI_TC_Texture); + dst_tex.dwWidth = tmp_img.get_width(); + dst_tex.dwHeight = tmp_img.get_height(); + dst_tex.dwDataSize = get_size_in_bytes(); + dst_tex.pData = (ATI_TC_BYTE*)get_element_ptr(); + + switch (fmt) + { + case cDXT1: + case cDXT1A: + dst_tex.format = ATI_TC_FORMAT_DXT1; + break; + case cDXT3: + dst_tex.format = ATI_TC_FORMAT_DXT3; + break; + case cDXT5: + dst_tex.format = ATI_TC_FORMAT_DXT5; + break; + case cDXT5A: + dst_tex.format = ATI_TC_FORMAT_ATI1N; + break; + case cDXN_XY: + dst_tex.format = ATI_TC_FORMAT_ATI2N_XY; + break; + case cDXN_YX: + dst_tex.format = ATI_TC_FORMAT_ATI2N; + break; + default: + { + CRNLIB_ASSERT(false); + return false; + } + } - for (uint x = 0; x < cDXTBlockSize; x++) { - const uint ix = math::minimum(pixel_ofs_x + x, img.get_width() - 1); + ATI_TC_CompressOptions options; + utils::zero_object(options); + options.dwSize = sizeof(ATI_TC_CompressOptions); - pixels[x + y * cDXTBlockSize] = img(ix, iy); + if (fmt == cDXT1A) + { + options.bDXT1UseAlpha = true; + options.nAlphaThreshold = (ATI_TC_BYTE)p.m_dxt1a_alpha_threshold; + } + options.bDisableMultiThreading = (p.m_num_helper_threads == 0); + switch (p.m_quality) + { + case cCRNDXTQualityFast: + options.nCompressionSpeed = ATI_TC_Speed_Fast; + break; + case cCRNDXTQualitySuperFast: + options.nCompressionSpeed = ATI_TC_Speed_SuperFast; + break; + default: + options.nCompressionSpeed = ATI_TC_Speed_Normal; + break; } - } - set_block_pixels(block_x, block_y, pixels, p, optimizer_context); - } - } -} + if (p.m_perceptual) + { + options.bUseChannelWeighting = true; + options.fWeightingRed = .212671f; + options.fWeightingGreen = .715160f; + options.fWeightingBlue = .072169f; + } -#if CRNLIB_SUPPORT_ATI_COMPRESS -bool dxt_image::init_ati_compress(dxt_format fmt, const image_u8& img, const pack_params& p) { - image_u8 tmp_img(img); - for (uint y = 0; y < img.get_height(); y++) { - for (uint x = 0; x < img.get_width(); x++) { - color_quad_u8 c(img(x, y)); - std::swap(c.r, c.b); - tmp_img(x, y) = c; + ATI_TC_ERROR err = ATI_TC_ConvertTexture(&src_tex, &dst_tex, &options, nullptr, nullptr, nullptr); + return err == ATI_TC_OK; } - } - - ATI_TC_Texture src_tex; - utils::zero_object(src_tex); - src_tex.dwSize = sizeof(ATI_TC_Texture); - src_tex.dwWidth = tmp_img.get_width(); - src_tex.dwHeight = tmp_img.get_height(); - src_tex.dwPitch = tmp_img.get_pitch_in_bytes(); - src_tex.format = ATI_TC_FORMAT_ARGB_8888; - src_tex.dwDataSize = src_tex.dwPitch * tmp_img.get_height(); - src_tex.pData = (ATI_TC_BYTE*)tmp_img.get_ptr(); - - ATI_TC_Texture dst_tex; - utils::zero_object(dst_tex); - dst_tex.dwSize = sizeof(ATI_TC_Texture); - dst_tex.dwWidth = tmp_img.get_width(); - dst_tex.dwHeight = tmp_img.get_height(); - dst_tex.dwDataSize = get_size_in_bytes(); - dst_tex.pData = (ATI_TC_BYTE*)get_element_ptr(); - - switch (fmt) { - case cDXT1: - case cDXT1A: - dst_tex.format = ATI_TC_FORMAT_DXT1; - break; - case cDXT3: - dst_tex.format = ATI_TC_FORMAT_DXT3; - break; - case cDXT5: - dst_tex.format = ATI_TC_FORMAT_DXT5; - break; - case cDXT5A: - dst_tex.format = ATI_TC_FORMAT_ATI1N; - break; - case cDXN_XY: - dst_tex.format = ATI_TC_FORMAT_ATI2N_XY; - break; - case cDXN_YX: - dst_tex.format = ATI_TC_FORMAT_ATI2N; - break; - default: { - CRNLIB_ASSERT(false); - return false; - } - } - - ATI_TC_CompressOptions options; - utils::zero_object(options); - options.dwSize = sizeof(ATI_TC_CompressOptions); - - if (fmt == cDXT1A) { - options.bDXT1UseAlpha = true; - options.nAlphaThreshold = (ATI_TC_BYTE)p.m_dxt1a_alpha_threshold; - } - options.bDisableMultiThreading = (p.m_num_helper_threads == 0); - switch (p.m_quality) { - case cCRNDXTQualityFast: - options.nCompressionSpeed = ATI_TC_Speed_Fast; - break; - case cCRNDXTQualitySuperFast: - options.nCompressionSpeed = ATI_TC_Speed_SuperFast; - break; - default: - options.nCompressionSpeed = ATI_TC_Speed_Normal; - break; - } - - if (p.m_perceptual) { - options.bUseChannelWeighting = true; - options.fWeightingRed = .212671f; - options.fWeightingGreen = .715160f; - options.fWeightingBlue = .072169f; - } - - ATI_TC_ERROR err = ATI_TC_ConvertTexture(&src_tex, &dst_tex, &options, nullptr, nullptr, nullptr); - return err == ATI_TC_OK; -} #endif -bool dxt_image::init(dxt_format fmt, const image_u8& img, const pack_params& p) { - if (!init(fmt, img.get_width(), img.get_height(), false)) - return false; + bool dxt_image::init(dxt_format fmt, const image_u8& img, const pack_params& p) + { + if (!init(fmt, img.get_width(), img.get_height(), false)) + { + return false; + } #if CRNLIB_SUPPORT_ATI_COMPRESS - if (p.m_compressor == cCRNDXTCompressorATI) - return init_ati_compress(fmt, img, p); + if (p.m_compressor == cCRNDXTCompressorATI) + { + return init_ati_compress(fmt, img, p); + } #endif - task_pool* pPool = p.m_pTask_pool; + task_pool* pPool = p.m_pTask_pool; + + task_pool tmp_pool; + if (!pPool) + { + if (!tmp_pool.init(p.m_num_helper_threads)) + { + return false; + } + pPool = &tmp_pool; + } + + init_task_params init_params; + init_params.m_fmt = fmt; + init_params.m_pImg = &img; + init_params.m_pParams = &p; + init_params.m_main_thread = crn_get_current_thread_id(); + init_params.m_canceled = false; - task_pool tmp_pool; - if (!pPool) { - if (!tmp_pool.init(p.m_num_helper_threads)) - return false; - pPool = &tmp_pool; - } + for (uint i = 0; i <= p.m_num_helper_threads; i++) + { + pPool->queue_object_task(this, &dxt_image::init_task, i, &init_params); + } - init_task_params init_params; - init_params.m_fmt = fmt; - init_params.m_pImg = &img; - init_params.m_pParams = &p; - init_params.m_main_thread = crn_get_current_thread_id(); - init_params.m_canceled = false; + pPool->join(); - for (uint i = 0; i <= p.m_num_helper_threads; i++) - pPool->queue_object_task(this, &dxt_image::init_task, i, &init_params); + if (init_params.m_canceled) + { + return false; + } - pPool->join(); + return true; + } - if (init_params.m_canceled) - return false; + bool dxt_image::unpack(image_u8& img) const + { + if (!m_total_elements) + { + return false; + } - return true; -} + img.resize(m_width, m_height); -bool dxt_image::unpack(image_u8& img) const { - if (!m_total_elements) - return false; + color_quad_u8 pixels[cDXTBlockSize * cDXTBlockSize]; + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + pixels[i].set(0, 0, 0, 255); + } - img.resize(m_width, m_height); + bool all_blocks_valid = true; + for (uint block_y = 0; block_y < m_blocks_y; block_y++) + { + const uint pixel_ofs_y = block_y * cDXTBlockSize; + const uint limit_y = math::minimum(cDXTBlockSize, img.get_height() - pixel_ofs_y); - color_quad_u8 pixels[cDXTBlockSize * cDXTBlockSize]; - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) - pixels[i].set(0, 0, 0, 255); + for (uint block_x = 0; block_x < m_blocks_x; block_x++) + { + if (!get_block_pixels(block_x, block_y, pixels)) + { + all_blocks_valid = false; + } - bool all_blocks_valid = true; - for (uint block_y = 0; block_y < m_blocks_y; block_y++) { - const uint pixel_ofs_y = block_y * cDXTBlockSize; - const uint limit_y = math::minimum(cDXTBlockSize, img.get_height() - pixel_ofs_y); + const uint pixel_ofs_x = block_x * cDXTBlockSize; - for (uint block_x = 0; block_x < m_blocks_x; block_x++) { - if (!get_block_pixels(block_x, block_y, pixels)) - all_blocks_valid = false; + const uint limit_x = math::minimum(cDXTBlockSize, img.get_width() - pixel_ofs_x); - const uint pixel_ofs_x = block_x * cDXTBlockSize; + for (uint y = 0; y < limit_y; y++) + { + const uint iy = pixel_ofs_y + y; - const uint limit_x = math::minimum(cDXTBlockSize, img.get_width() - pixel_ofs_x); + for (uint x = 0; x < limit_x; x++) + { + const uint ix = pixel_ofs_x + x; - for (uint y = 0; y < limit_y; y++) { - const uint iy = pixel_ofs_y + y; + img(ix, iy) = pixels[x + (y << cDXTBlockShift)]; + } + } + } + } - for (uint x = 0; x < limit_x; x++) { - const uint ix = pixel_ofs_x + x; + if (!all_blocks_valid) + { + console::error("dxt_image::unpack: One or more invalid blocks encountered!"); + } - img(ix, iy) = pixels[x + (y << cDXTBlockShift)]; + img.reset_comp_flags(); + img.set_component_valid(0, false); + img.set_component_valid(1, false); + img.set_component_valid(2, false); + for (uint i = 0; i < m_num_elements_per_block; i++) + { + if (m_element_component_index[i] < 0) + { + img.set_component_valid(0, true); + img.set_component_valid(1, true); + img.set_component_valid(2, true); + } + else + { + img.set_component_valid(m_element_component_index[i], true); + } } - } + + img.set_component_valid(3, get_dxt_format_has_alpha(m_format)); + + return true; } - } - - if (!all_blocks_valid) - console::error("dxt_image::unpack: One or more invalid blocks encountered!"); - - img.reset_comp_flags(); - img.set_component_valid(0, false); - img.set_component_valid(1, false); - img.set_component_valid(2, false); - for (uint i = 0; i < m_num_elements_per_block; i++) { - if (m_element_component_index[i] < 0) { - img.set_component_valid(0, true); - img.set_component_valid(1, true); - img.set_component_valid(2, true); - } else - img.set_component_valid(m_element_component_index[i], true); - } - - img.set_component_valid(3, get_dxt_format_has_alpha(m_format)); - - return true; -} - -void dxt_image::endian_swap() { - utils::endian_switch_words(reinterpret_cast(m_elements.get_ptr()), m_elements.size_in_bytes() / sizeof(uint16)); -} - -const dxt_image::element& dxt_image::get_element(uint block_x, uint block_y, uint element_index) const { - CRNLIB_ASSERT((block_x < m_blocks_x) && (block_y < m_blocks_y) && (element_index < m_num_elements_per_block)); - return m_pElements[(block_x + block_y * m_blocks_x) * m_num_elements_per_block + element_index]; -} - -dxt_image::element& dxt_image::get_element(uint block_x, uint block_y, uint element_index) { - CRNLIB_ASSERT((block_x < m_blocks_x) && (block_y < m_blocks_y) && (element_index < m_num_elements_per_block)); - return m_pElements[(block_x + block_y * m_blocks_x) * m_num_elements_per_block + element_index]; -} - -bool dxt_image::has_alpha() const { - switch (m_format) { - case cDXT1: { - for (uint i = 0; i < m_total_elements; i++) { - const dxt1_block& blk = *(dxt1_block*)&m_pElements[i]; - - if (blk.get_low_color() <= blk.get_high_color()) { - for (uint y = 0; y < cDXTBlockSize; y++) - for (uint x = 0; x < cDXTBlockSize; x++) - if (blk.get_selector(x, y) == 3) - return true; - } - } - - break; + + void dxt_image::endian_swap() + { + utils::endian_switch_words(reinterpret_cast(m_elements.get_ptr()), m_elements.size_in_bytes() / sizeof(uint16)); } - case cDXT1A: - case cDXT3: - case cDXT5: - case cDXT5A: - case cETC2A: - case cETC2AS: - return true; - default: - break; - } - - return false; -} - -color_quad_u8 dxt_image::get_pixel(uint x, uint y) const { - CRNLIB_ASSERT((x < m_width) && (y < m_height)); - - const uint block_x = x >> cDXTBlockShift; - const uint block_y = y >> cDXTBlockShift; - - const element* pElement = reinterpret_cast(&get_element(block_x, block_y, 0)); - - color_quad_u8 result(0, 0, 0, 255); - - for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) { - switch (m_element_type[element_index]) { - case cColorETC1: { - const etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); - - const bool diff_flag = block.get_diff_bit(); - const bool flip_flag = block.get_flip_bit(); - const uint table_index0 = block.get_inten_table(0); - const uint table_index1 = block.get_inten_table(1); - color_quad_u8 subblock_colors0[4], subblock_colors1[4]; - - if (diff_flag) { - const uint16 base_color5 = block.get_base5_color(); - const uint16 delta_color3 = block.get_delta3_color(); - etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); - etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1); - } else { - const uint16 base_color4_0 = block.get_base4_color(0); - etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); - const uint16 base_color4_1 = block.get_base4_color(1); - etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); - } - - const uint bx = x & 3; - const uint by = y & 3; - - const uint selector_index = block.get_selector(bx, by); - if (flip_flag) { - if (by <= 2) - result = subblock_colors0[selector_index]; - else - result = subblock_colors1[selector_index]; - } else { - if (bx <= 2) - result = subblock_colors0[selector_index]; - else - result = subblock_colors1[selector_index]; - } - - break; - } - case cColorDXT1: { - const dxt1_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); - - const uint l = pBlock->get_low_color(); - const uint h = pBlock->get_high_color(); - - color_quad_u8 c0(dxt1_block::unpack_color(static_cast(l), true)); - color_quad_u8 c1(dxt1_block::unpack_color(static_cast(h), true)); - - const uint s = pBlock->get_selector(x & 3, y & 3); - - if (l > h) { - switch (s) { - case 0: - result.set_noclamp_rgb(c0.r, c0.g, c0.b); - break; - case 1: - result.set_noclamp_rgb(c1.r, c1.g, c1.b); - break; - case 2: - result.set_noclamp_rgb((c0.r * 2 + c1.r) / 3, (c0.g * 2 + c1.g) / 3, (c0.b * 2 + c1.b) / 3); - break; - case 3: - result.set_noclamp_rgb((c1.r * 2 + c0.r) / 3, (c1.g * 2 + c0.g) / 3, (c1.b * 2 + c0.b) / 3); - break; - } - } else { - switch (s) { - case 0: - result.set_noclamp_rgb(c0.r, c0.g, c0.b); - break; - case 1: - result.set_noclamp_rgb(c1.r, c1.g, c1.b); - break; - case 2: - result.set_noclamp_rgb((c0.r + c1.r) >> 1U, (c0.g + c1.g) >> 1U, (c0.b + c1.b) >> 1U); - break; - case 3: { - if (m_format <= cDXT1A) - result.set_noclamp_rgba(0, 0, 0, 0); - else - result.set_noclamp_rgb(0, 0, 0); - break; + + const dxt_image::element& dxt_image::get_element(uint block_x, uint block_y, uint element_index) const + { + CRNLIB_ASSERT((block_x < m_blocks_x) && (block_y < m_blocks_y) && (element_index < m_num_elements_per_block)); + return m_pElements[(block_x + block_y * m_blocks_x) * m_num_elements_per_block + element_index]; + } + + dxt_image::element& dxt_image::get_element(uint block_x, uint block_y, uint element_index) + { + CRNLIB_ASSERT((block_x < m_blocks_x) && (block_y < m_blocks_y) && (element_index < m_num_elements_per_block)); + return m_pElements[(block_x + block_y * m_blocks_x) * m_num_elements_per_block + element_index]; + } + + bool dxt_image::has_alpha() const + { + switch (m_format) + { + case cDXT1: + { + for (uint i = 0; i < m_total_elements; i++) + { + const dxt1_block& blk = *(dxt1_block*)&m_pElements[i]; + + if (blk.get_low_color() <= blk.get_high_color()) + { + for (uint y = 0; y < cDXTBlockSize; y++) + { + for (uint x = 0; x < cDXTBlockSize; x++) + { + if (blk.get_selector(x, y) == 3) + { + return true; + } + } + } + } } - } - } - - break; - } - case cAlphaDXT5: { - const int comp_index = m_element_component_index[element_index]; - - const dxt5_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); - - const uint l = pBlock->get_low_alpha(); - const uint h = pBlock->get_high_alpha(); - - const uint s = pBlock->get_selector(x & 3, y & 3); - - if (l > h) { - switch (s) { - case 0: - result[comp_index] = static_cast(l); - break; - case 1: - result[comp_index] = static_cast(h); - break; - case 2: - result[comp_index] = static_cast((l * 6 + h) / 7); - break; - case 3: - result[comp_index] = static_cast((l * 5 + h * 2) / 7); - break; - case 4: - result[comp_index] = static_cast((l * 4 + h * 3) / 7); - break; - case 5: - result[comp_index] = static_cast((l * 3 + h * 4) / 7); - break; - case 6: - result[comp_index] = static_cast((l * 2 + h * 5) / 7); - break; - case 7: - result[comp_index] = static_cast((l + h * 6) / 7); - break; - } - } else { - switch (s) { - case 0: - result[comp_index] = static_cast(l); - break; - case 1: - result[comp_index] = static_cast(h); - break; - case 2: - result[comp_index] = static_cast((l * 4 + h) / 5); - break; - case 3: - result[comp_index] = static_cast((l * 3 + h * 2) / 5); - break; - case 4: - result[comp_index] = static_cast((l * 2 + h * 3) / 5); - break; - case 5: - result[comp_index] = static_cast((l + h * 4) / 5); - break; - case 6: - result[comp_index] = 0; - break; - case 7: - result[comp_index] = 255; - break; - } - } - - break; - } - case cAlphaDXT3: { - const int comp_index = m_element_component_index[element_index]; - - const dxt3_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); - - result[comp_index] = static_cast(pBlock->get_alpha(x & 3, y & 3, true)); - - break; - } - default: - break; + + break; + } + case cDXT1A: + case cDXT3: + case cDXT5: + case cDXT5A: + case cETC2A: + case cETC2AS: + return true; + default: + break; + } + + return false; } - } - return result; -} + color_quad_u8 dxt_image::get_pixel(uint x, uint y) const + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + + const uint block_x = x >> cDXTBlockShift; + const uint block_y = y >> cDXTBlockShift; + + const element* pElement = reinterpret_cast(&get_element(block_x, block_y, 0)); + + color_quad_u8 result(0, 0, 0, 255); + + for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) + { + switch (m_element_type[element_index]) + { + case cColorETC1: + { + const etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); + + const bool diff_flag = block.get_diff_bit(); + const bool flip_flag = block.get_flip_bit(); + const uint table_index0 = block.get_inten_table(0); + const uint table_index1 = block.get_inten_table(1); + color_quad_u8 subblock_colors0[4], subblock_colors1[4]; + + if (diff_flag) + { + const uint16 base_color5 = block.get_base5_color(); + const uint16 delta_color3 = block.get_delta3_color(); + etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); + etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1); + } + else + { + const uint16 base_color4_0 = block.get_base4_color(0); + etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); + const uint16 base_color4_1 = block.get_base4_color(1); + etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); + } + + const uint bx = x & 3; + const uint by = y & 3; + + const uint selector_index = block.get_selector(bx, by); + if (flip_flag) + { + if (by <= 2) + { + result = subblock_colors0[selector_index]; + } + else + { + result = subblock_colors1[selector_index]; + } + } + else + { + if (bx > 2) + { + result = subblock_colors1[selector_index]; + } + else + { + result = subblock_colors0[selector_index]; + } + } -uint dxt_image::get_pixel_alpha(uint x, uint y, uint element_index) const { - CRNLIB_ASSERT((x < m_width) && (y < m_height) && (element_index < m_num_elements_per_block)); + break; + } + case cColorDXT1: + { + const dxt1_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); + + const uint l = pBlock->get_low_color(); + const uint h = pBlock->get_high_color(); + + color_quad_u8 c0(dxt1_block::unpack_color(static_cast(l), true)); + color_quad_u8 c1(dxt1_block::unpack_color(static_cast(h), true)); + + const uint s = pBlock->get_selector(x & 3, y & 3); + + if (l > h) + { + switch (s) + { + case 0: + result.set_noclamp_rgb(c0.r, c0.g, c0.b); + break; + case 1: + result.set_noclamp_rgb(c1.r, c1.g, c1.b); + break; + case 2: + result.set_noclamp_rgb((c0.r * 2 + c1.r) / 3, (c0.g * 2 + c1.g) / 3, (c0.b * 2 + c1.b) / 3); + break; + case 3: + result.set_noclamp_rgb((c1.r * 2 + c0.r) / 3, (c1.g * 2 + c0.g) / 3, (c1.b * 2 + c0.b) / 3); + break; + } + } + else + { + switch (s) + { + case 0: + result.set_noclamp_rgb(c0.r, c0.g, c0.b); + break; + case 1: + result.set_noclamp_rgb(c1.r, c1.g, c1.b); + break; + case 2: + result.set_noclamp_rgb((c0.r + c1.r) >> 1U, (c0.g + c1.g) >> 1U, (c0.b + c1.b) >> 1U); + break; + case 3: + { + if (m_format <= cDXT1A) + { + result.set_noclamp_rgba(0, 0, 0, 0); + } + else + { + result.set_noclamp_rgb(0, 0, 0); + } + break; + } + } + } - const uint block_x = x >> cDXTBlockShift; - const uint block_y = y >> cDXTBlockShift; + break; + } + case cAlphaDXT5: + { + const int comp_index = m_element_component_index[element_index]; + + const dxt5_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); + + const uint l = pBlock->get_low_alpha(); + const uint h = pBlock->get_high_alpha(); + + const uint s = pBlock->get_selector(x & 3, y & 3); + + if (l > h) + { + switch (s) + { + case 0: + result[comp_index] = static_cast(l); + break; + case 1: + result[comp_index] = static_cast(h); + break; + case 2: + result[comp_index] = static_cast((l * 6 + h) / 7); + break; + case 3: + result[comp_index] = static_cast((l * 5 + h * 2) / 7); + break; + case 4: + result[comp_index] = static_cast((l * 4 + h * 3) / 7); + break; + case 5: + result[comp_index] = static_cast((l * 3 + h * 4) / 7); + break; + case 6: + result[comp_index] = static_cast((l * 2 + h * 5) / 7); + break; + case 7: + result[comp_index] = static_cast((l + h * 6) / 7); + break; + } + } + else + { + switch (s) + { + case 0: + result[comp_index] = static_cast(l); + break; + case 1: + result[comp_index] = static_cast(h); + break; + case 2: + result[comp_index] = static_cast((l * 4 + h) / 5); + break; + case 3: + result[comp_index] = static_cast((l * 3 + h * 2) / 5); + break; + case 4: + result[comp_index] = static_cast((l * 2 + h * 3) / 5); + break; + case 5: + result[comp_index] = static_cast((l + h * 4) / 5); + break; + case 6: + result[comp_index] = 0; + break; + case 7: + result[comp_index] = 255; + break; + } + } - switch (m_element_type[element_index]) { - case cColorDXT1: { - if (m_format <= cDXT1A) { - const dxt1_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); + break; + } + case cAlphaDXT3: + { + const int comp_index = m_element_component_index[element_index]; - const uint l = pBlock->get_low_color(); - const uint h = pBlock->get_high_color(); + const dxt3_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); - if (l <= h) { - uint s = pBlock->get_selector(x & 3, y & 3); + result[comp_index] = static_cast(pBlock->get_alpha(x & 3, y & 3, true)); - return (s == 3) ? 0 : 255; - } else { - return 255; + break; + } + default: + break; + } } - } - break; + return result; } - case cAlphaDXT5: { - const dxt5_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); - - const uint l = pBlock->get_low_alpha(); - const uint h = pBlock->get_high_alpha(); - - const uint s = pBlock->get_selector(x & 3, y & 3); - - if (l > h) { - switch (s) { - case 0: - return l; - case 1: - return h; - case 2: - return (l * 6 + h) / 7; - case 3: - return (l * 5 + h * 2) / 7; - case 4: - return (l * 4 + h * 3) / 7; - case 5: - return (l * 3 + h * 4) / 7; - case 6: - return (l * 2 + h * 5) / 7; - case 7: - return (l + h * 6) / 7; - } - } else { - switch (s) { - case 0: - return l; - case 1: - return h; - case 2: - return (l * 4 + h) / 5; - case 3: - return (l * 3 + h * 2) / 5; - case 4: - return (l * 2 + h * 3) / 5; - case 5: - return (l + h * 4) / 5; - case 6: - return 0; - case 7: - return 255; - } - } - } - case cAlphaDXT3: { - const dxt3_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); - return pBlock->get_alpha(x & 3, y & 3, true); - } - default: - break; - } - - return 255; -} - -void dxt_image::set_pixel(uint x, uint y, const color_quad_u8& c, bool perceptual) { - CRNLIB_ASSERT((x < m_width) && (y < m_height)); - - const uint block_x = x >> cDXTBlockShift; - const uint block_y = y >> cDXTBlockShift; - - element* pElement = &get_element(block_x, block_y, 0); - - for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) { - switch (m_element_type[element_index]) { - case cColorETC1: { - etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); - - const bool diff_flag = block.get_diff_bit(); - const bool flip_flag = block.get_flip_bit(); - const uint table_index0 = block.get_inten_table(0); - const uint table_index1 = block.get_inten_table(1); - color_quad_u8 subblock_colors0[4], subblock_colors1[4]; - - if (diff_flag) { - const uint16 base_color5 = block.get_base5_color(); - const uint16 delta_color3 = block.get_delta3_color(); - etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); - etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1); - } else { - const uint16 base_color4_0 = block.get_base4_color(0); - etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); - const uint16 base_color4_1 = block.get_base4_color(1); - etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); - } - - const uint bx = x & 3; - const uint by = y & 3; - - color_quad_u8* pColors = subblock_colors1; - if (flip_flag) { - if (by <= 2) - pColors = subblock_colors0; - } else { - if (bx <= 2) - pColors = subblock_colors0; - } - - uint best_error = UINT_MAX; - uint best_selector = 0; - - for (uint i = 0; i < 4; i++) { - uint error = color::color_distance(perceptual, pColors[i], c, false); - if (error < best_error) { - best_error = error; - best_selector = i; - } - } - - block.set_selector(bx, by, best_selector); - break; - } - case cColorDXT1: { - dxt1_block* pDXT1_block = reinterpret_cast(pElement); - - color_quad_u8 colors[cDXT1SelectorValues]; - const uint n = pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); - - if ((m_format == cDXT1A) && (c.a < 128)) - pDXT1_block->set_selector(x & 3, y & 3, 3); - else { - uint best_error = UINT_MAX; - uint best_selector = 0; - - for (uint i = 0; i < n; i++) { - uint error = color::color_distance(perceptual, colors[i], c, false); - if (error < best_error) { - best_error = error; - best_selector = i; + uint dxt_image::get_pixel_alpha(uint x, uint y, uint element_index) const + { + CRNLIB_ASSERT((x < m_width) && (y < m_height) && (element_index < m_num_elements_per_block)); + + const uint block_x = x >> cDXTBlockShift; + const uint block_y = y >> cDXTBlockShift; + + switch (m_element_type[element_index]) + { + case cColorDXT1: + { + if (m_format <= cDXT1A) + { + const dxt1_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); + + const uint l = pBlock->get_low_color(); + const uint h = pBlock->get_high_color(); + + if (l <= h) + { + uint s = pBlock->get_selector(x & 3, y & 3); + + return (s == 3) ? 0 : 255; + } + else + { + return 255; + } } - } - pDXT1_block->set_selector(x & 3, y & 3, best_selector); + break; + } + case cAlphaDXT5: + { + const dxt5_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); + + const uint l = pBlock->get_low_alpha(); + const uint h = pBlock->get_high_alpha(); + + const uint s = pBlock->get_selector(x & 3, y & 3); + + if (l > h) + { + switch (s) + { + case 0: + return l; + case 1: + return h; + case 2: + return (l * 6 + h) / 7; + case 3: + return (l * 5 + h * 2) / 7; + case 4: + return (l * 4 + h * 3) / 7; + case 5: + return (l * 3 + h * 4) / 7; + case 6: + return (l * 2 + h * 5) / 7; + case 7: + return (l + h * 6) / 7; + } + } + else + { + switch (s) + { + case 0: + return l; + case 1: + return h; + case 2: + return (l * 4 + h) / 5; + case 3: + return (l * 3 + h * 2) / 5; + case 4: + return (l * 2 + h * 3) / 5; + case 5: + return (l + h * 4) / 5; + case 6: + return 0; + case 7: + return 255; + } + } } + case cAlphaDXT3: + { + const dxt3_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); - break; - } - case cAlphaDXT5: { - dxt5_block* pDXT5_block = reinterpret_cast(pElement); + return pBlock->get_alpha(x & 3, y & 3, true); + } + default: + break; + } - uint values[cDXT5SelectorValues]; - dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); + return 255; + } - const int comp_index = m_element_component_index[element_index]; + void dxt_image::set_pixel(uint x, uint y, const color_quad_u8& c, bool perceptual) + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + + const uint block_x = x >> cDXTBlockShift; + const uint block_y = y >> cDXTBlockShift; + + element* pElement = &get_element(block_x, block_y, 0); + + for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) + { + switch (m_element_type[element_index]) + { + case cColorETC1: + { + etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); + + const bool diff_flag = block.get_diff_bit(); + const bool flip_flag = block.get_flip_bit(); + const uint table_index0 = block.get_inten_table(0); + const uint table_index1 = block.get_inten_table(1); + color_quad_u8 subblock_colors0[4], subblock_colors1[4]; + + if (diff_flag) + { + const uint16 base_color5 = block.get_base5_color(); + const uint16 delta_color3 = block.get_delta3_color(); + etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); + etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1); + } + else + { + const uint16 base_color4_0 = block.get_base4_color(0); + etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); + const uint16 base_color4_1 = block.get_base4_color(1); + etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); + } + + const uint bx = x & 3; + const uint by = y & 3; + + color_quad_u8* pColors = subblock_colors1; + if (flip_flag) + { + if (by <= 2) + { + pColors = subblock_colors0; + } + } + else + { + if (bx <= 2) + { + pColors = subblock_colors0; + } + } + + uint best_error = UINT_MAX; + uint best_selector = 0; + + for (uint i = 0; i < 4; i++) + { + uint error = color::color_distance(perceptual, pColors[i], c, false); + if (error < best_error) + { + best_error = error; + best_selector = i; + } + } + + block.set_selector(bx, by, best_selector); + break; + } + case cColorDXT1: + { + dxt1_block* pDXT1_block = reinterpret_cast(pElement); + + color_quad_u8 colors[cDXT1SelectorValues]; + const uint n = pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); + + if ((m_format == cDXT1A) && (c.a < 128)) + { + pDXT1_block->set_selector(x & 3, y & 3, 3); + } + else + { + uint best_error = UINT_MAX; + uint best_selector = 0; + + for (uint i = 0; i < n; i++) + { + uint error = color::color_distance(perceptual, colors[i], c, false); + if (error < best_error) + { + best_error = error; + best_selector = i; + } + } + + pDXT1_block->set_selector(x & 3, y & 3, best_selector); + } - uint best_error = UINT_MAX; - uint best_selector = 0; + break; + } + case cAlphaDXT5: + { + dxt5_block* pDXT5_block = reinterpret_cast(pElement); - for (uint i = 0; i < cDXT5SelectorValues; i++) { - uint error = labs((int)values[i] - (int)c[comp_index]); // no need to square + uint values[cDXT5SelectorValues]; + dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); - if (error < best_error) { - best_error = error; - best_selector = i; - } - } + const int comp_index = m_element_component_index[element_index]; - pDXT5_block->set_selector(x & 3, y & 3, best_selector); + uint best_error = UINT_MAX; + uint best_selector = 0; - break; - } - case cAlphaDXT3: { - const int comp_index = m_element_component_index[element_index]; + for (uint i = 0; i < cDXT5SelectorValues; i++) + { + uint error = labs((int)values[i] - (int)c[comp_index]); // no need to square - dxt3_block* pDXT3_block = reinterpret_cast(pElement); + if (error < best_error) + { + best_error = error; + best_selector = i; + } + } - pDXT3_block->set_alpha(x & 3, y & 3, c[comp_index], true); + pDXT5_block->set_selector(x & 3, y & 3, best_selector); - break; - } - default: - break; - } - } // element_index -} + break; + } + case cAlphaDXT3: + { + const int comp_index = m_element_component_index[element_index]; -bool dxt_image::get_block_pixels(uint block_x, uint block_y, color_quad_u8* pPixels) const { - bool success = true; - const element* pElement = &get_element(block_x, block_y, 0); + dxt3_block* pDXT3_block = reinterpret_cast(pElement); - for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) { - switch (m_element_type[element_index]) { - case cColorETC1: { - const etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); + pDXT3_block->set_alpha(x & 3, y & 3, c[comp_index], true); + + break; + } + default: + break; + } + } // element_index + } + + bool dxt_image::get_block_pixels(uint block_x, uint block_y, color_quad_u8* pPixels) const + { + bool success = true; + const element* pElement = &get_element(block_x, block_y, 0); + + for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) + { + switch (m_element_type[element_index]) + { + case cColorETC1: + { + const etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); // Preserve alpha if the format is something weird (like ETC1 for color and DXT5A for alpha) - which isn't currently supported. #if CRNLIB_USE_RG_ETC1 - if (!rg_etc1::unpack_etc1_block(&block, (uint32*)pPixels, m_format != cETC1)) - success = false; + if (!rg_etc1::unpack_etc1_block(&block, (uint32*)pPixels, m_format != cETC1)) + { + success = false; + } #else - if (!unpack_etc1(block, pPixels, m_format != cETC1)) - success = false; + if (!unpack_etc1(block, pPixels, m_format != cETC1)) + { + success = false; + } #endif - break; - } - case cColorETC2: { - const etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); - if (!rg_etc1::unpack_etc2_color(&block, (uint32*)pPixels, m_format != cETC2)) - success = false; - break; - } - case cAlphaETC2: { - const etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); - if (!rg_etc1::unpack_etc2_alpha(&block, (uint32*)pPixels, m_element_component_index[element_index])) - success = false; - break; - } - case cColorDXT1: { - const dxt1_block* pDXT1_block = reinterpret_cast(pElement); + break; + } + case cColorETC2: + { + const etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); + if (!rg_etc1::unpack_etc2_color(&block, (uint32*)pPixels, m_format != cETC2)) + { + success = false; + } + break; + } + case cAlphaETC2: + { + const etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); + if (!rg_etc1::unpack_etc2_alpha(&block, (uint32*)pPixels, m_element_component_index[element_index])) + { + success = false; + } + break; + } + case cColorDXT1: + { + const dxt1_block* pDXT1_block = reinterpret_cast(pElement); - color_quad_u8 colors[cDXT1SelectorValues]; - pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); + color_quad_u8 colors[cDXT1SelectorValues]; + pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { - uint s = pDXT1_block->get_selector(i & 3, i >> 2); + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + uint s = pDXT1_block->get_selector(i & 3, i >> 2); - pPixels[i].r = colors[s].r; - pPixels[i].g = colors[s].g; - pPixels[i].b = colors[s].b; + pPixels[i].r = colors[s].r; + pPixels[i].g = colors[s].g; + pPixels[i].b = colors[s].b; - if (m_format <= cDXT1A) - pPixels[i].a = colors[s].a; - } + if (m_format <= cDXT1A) + { + pPixels[i].a = colors[s].a; + } + } - break; - } - case cAlphaDXT5: { - const dxt5_block* pDXT5_block = reinterpret_cast(pElement); + break; + } + case cAlphaDXT5: + { + const dxt5_block* pDXT5_block = reinterpret_cast(pElement); - uint values[cDXT5SelectorValues]; - dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); + uint values[cDXT5SelectorValues]; + dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); - const int comp_index = m_element_component_index[element_index]; + const int comp_index = m_element_component_index[element_index]; - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { - uint s = pDXT5_block->get_selector(i & 3, i >> 2); + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + uint s = pDXT5_block->get_selector(i & 3, i >> 2); - pPixels[i][comp_index] = static_cast(values[s]); - } + pPixels[i][comp_index] = static_cast(values[s]); + } - break; - } - case cAlphaDXT3: { - const dxt3_block* pDXT3_block = reinterpret_cast(pElement); + break; + } + case cAlphaDXT3: + { + const dxt3_block* pDXT3_block = reinterpret_cast(pElement); - const int comp_index = m_element_component_index[element_index]; + const int comp_index = m_element_component_index[element_index]; - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { - uint a = pDXT3_block->get_alpha(i & 3, i >> 2, true); + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + uint a = pDXT3_block->get_alpha(i & 3, i >> 2, true); - pPixels[i][comp_index] = static_cast(a); - } + pPixels[i][comp_index] = static_cast(a); + } - break; - } - default: - break; + break; + } + default: + break; + } + } // element_index + return success; } - } // element_index - return success; -} -void dxt_image::set_block_pixels(uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p) { - set_block_pixels_context context; - set_block_pixels(block_x, block_y, pPixels, p, context); -} + void dxt_image::set_block_pixels(uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p) + { + set_block_pixels_context context; + set_block_pixels(block_x, block_y, pPixels, p, context); + } -void dxt_image::set_block_pixels( - uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p, - set_block_pixels_context& context) { - element* pElement = &get_element(block_x, block_y, 0); + void dxt_image::set_block_pixels( + uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p, + set_block_pixels_context& context) + { + element* pElement = &get_element(block_x, block_y, 0); - if (m_format == cETC1) { - etc1_block& dst_block = *reinterpret_cast(pElement); + if (m_format == cETC1) + { + etc1_block& dst_block = *reinterpret_cast(pElement); #if CRNLIB_USE_RG_ETC1 - rg_etc1::etc1_quality etc_quality = rg_etc1::cHighQuality; - if (p.m_quality <= cCRNDXTQualityFast) - etc_quality = rg_etc1::cLowQuality; - else if (p.m_quality <= cCRNDXTQualityNormal) - etc_quality = rg_etc1::cMediumQuality; - - rg_etc1::etc1_pack_params pack_params; - pack_params.m_dithering = p.m_dithering; - //pack_params.m_perceptual = p.m_perceptual; - pack_params.m_quality = etc_quality; - rg_etc1::pack_etc1_block(&dst_block, (uint32*)pPixels, pack_params); + rg_etc1::etc1_quality etc_quality = rg_etc1::cHighQuality; + if (p.m_quality <= cCRNDXTQualityFast) + { + etc_quality = rg_etc1::cLowQuality; + } + else if (p.m_quality <= cCRNDXTQualityNormal) + { + etc_quality = rg_etc1::cMediumQuality; + } + + rg_etc1::etc1_pack_params pack_params; + pack_params.m_dithering = p.m_dithering; + //pack_params.m_perceptual = p.m_perceptual; + pack_params.m_quality = etc_quality; + rg_etc1::pack_etc1_block(&dst_block, (uint32*)pPixels, pack_params); #else - crn_etc_quality etc_quality = cCRNETCQualitySlow; - if (p.m_quality <= cCRNDXTQualityFast) - etc_quality = cCRNETCQualityFast; - else if (p.m_quality <= cCRNDXTQualityNormal) - etc_quality = cCRNETCQualityMedium; - - crn_etc1_pack_params pack_params; - pack_params.m_perceptual = p.m_perceptual; - pack_params.m_quality = etc_quality; - pack_params.m_dithering = p.m_dithering; - - pack_etc1_block(dst_block, pPixels, pack_params, context.m_etc1_optimizer); + crn_etc_quality etc_quality = cCRNETCQualitySlow; + if (p.m_quality <= cCRNDXTQualityFast) + etc_quality = cCRNETCQualityFast; + else if (p.m_quality <= cCRNDXTQualityNormal) + etc_quality = cCRNETCQualityMedium; + + crn_etc1_pack_params pack_params; + pack_params.m_perceptual = p.m_perceptual; + pack_params.m_quality = etc_quality; + pack_params.m_dithering = p.m_dithering; + + pack_etc1_block(dst_block, pPixels, pack_params, context.m_etc1_optimizer); #endif - } else if (m_format == cETC2) { - etc1_block& dst_block = *reinterpret_cast(pElement); - rg_etc1::etc1_pack_params pack_params; - pack_params.m_dithering = p.m_dithering; - pack_params.m_quality = p.m_quality <= cCRNDXTQualityFast ? rg_etc1::cLowQuality : p.m_quality <= cCRNDXTQualityNormal ? rg_etc1::cMediumQuality : rg_etc1::cHighQuality; - rg_etc1::pack_etc1_block(&dst_block, (uint32*)pPixels, pack_params); - - } else if (m_format == cETC2A) { - rg_etc1::etc1_quality etc_quality = p.m_quality <= cCRNDXTQualityFast ? rg_etc1::cLowQuality : p.m_quality <= cCRNDXTQualityNormal ? rg_etc1::cMediumQuality : rg_etc1::cHighQuality; - for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) { - if (m_element_type[element_index] == cAlphaETC2) { - rg_etc1::etc2a_pack_params pack_params; - pack_params.m_quality = etc_quality; - pack_params.comp_index = m_element_component_index[element_index]; - rg_etc1::pack_etc2_alpha(pElement, (uint32*)pPixels, pack_params); - } else { - rg_etc1::etc1_pack_params pack_params; - pack_params.m_dithering = p.m_dithering; - pack_params.m_quality = etc_quality; - rg_etc1::pack_etc1_block(pElement, (uint32*)pPixels, pack_params); - } - } - - } else if (m_format == cETC1S) { - crn_etc1_pack_params pack_params; - pack_params.m_perceptual = p.m_perceptual; - pack_params.m_dithering = p.m_dithering; - pack_params.m_quality = p.m_quality <= cCRNDXTQualityFast ? cCRNETCQualityFast : p.m_quality <= cCRNDXTQualityNormal ? cCRNETCQualityMedium : cCRNETCQualitySlow; - pack_etc1s_block(*(etc1_block*)pElement, pPixels, pack_params); - - } else if (m_format == cETC2AS) { - for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) { - if (m_element_type[element_index] == cAlphaETC2) { - rg_etc1::etc2a_pack_params pack_params; - pack_params.m_quality = p.m_quality <= cCRNDXTQualityFast ? rg_etc1::cLowQuality : p.m_quality <= cCRNDXTQualityNormal ? rg_etc1::cMediumQuality : rg_etc1::cHighQuality; - pack_params.comp_index = m_element_component_index[element_index]; - rg_etc1::pack_etc2_alpha(pElement, (uint32*)pPixels, pack_params); - } else { - crn_etc1_pack_params pack_params; - pack_params.m_perceptual = p.m_perceptual; - pack_params.m_dithering = p.m_dithering; - pack_params.m_quality = p.m_quality <= cCRNDXTQualityFast ? cCRNETCQualityFast : p.m_quality <= cCRNDXTQualityNormal ? cCRNETCQualityMedium : cCRNETCQualitySlow; - pack_etc1s_block(*(etc1_block*)pElement, pPixels, pack_params); - } - } - - } else -#if CRNLIB_SUPPORT_SQUISH - if ((p.m_compressor == cCRNDXTCompressorSquish) && ((m_format == cDXT1) || (m_format == cDXT1A) || (m_format == cDXT3) || (m_format == cDXT5) || (m_format == cDXT5A))) { - uint squish_flags = 0; - if ((m_format == cDXT1) || (m_format == cDXT1A)) - squish_flags = squish::kDxt1; - else if (m_format == cDXT3) - squish_flags = squish::kDxt3; - else if (m_format == cDXT5A) - squish_flags = squish::kDxt5A; - else - squish_flags = squish::kDxt5; - - if (p.m_perceptual) - squish_flags |= squish::kColourMetricPerceptual; - else - squish_flags |= squish::kColourMetricUniform; - - if (p.m_quality >= cCRNDXTQualityBetter) - squish_flags |= squish::kColourIterativeClusterFit; - else if (p.m_quality == cCRNDXTQualitySuperFast) - squish_flags |= squish::kColourRangeFit; - - color_quad_u8 pixels[cDXTBlockSize * cDXTBlockSize]; - - memcpy(pixels, pPixels, sizeof(color_quad_u8) * cDXTBlockSize * cDXTBlockSize); - - if (m_format == cDXT1) { - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) - pixels[i].a = 255; - } else if (m_format == cDXT1A) { - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) - if (pixels[i].a < p.m_dxt1a_alpha_threshold) - pixels[i].a = 0; + } + else if (m_format == cETC2) + { + etc1_block& dst_block = *reinterpret_cast(pElement); + rg_etc1::etc1_pack_params pack_params; + pack_params.m_dithering = p.m_dithering; + pack_params.m_quality = p.m_quality <= cCRNDXTQualityFast ? rg_etc1::cLowQuality : p.m_quality <= cCRNDXTQualityNormal ? rg_etc1::cMediumQuality + : rg_etc1::cHighQuality; + rg_etc1::pack_etc1_block(&dst_block, (uint32*)pPixels, pack_params); + } + else if (m_format == cETC2A) + { + rg_etc1::etc1_quality etc_quality = p.m_quality <= cCRNDXTQualityFast ? rg_etc1::cLowQuality : p.m_quality <= cCRNDXTQualityNormal ? rg_etc1::cMediumQuality + : rg_etc1::cHighQuality; + for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) + { + if (m_element_type[element_index] == cAlphaETC2) + { + rg_etc1::etc2a_pack_params pack_params; + pack_params.m_quality = etc_quality; + pack_params.comp_index = m_element_component_index[element_index]; + rg_etc1::pack_etc2_alpha(pElement, (uint32*)pPixels, pack_params); + } + else + { + rg_etc1::etc1_pack_params pack_params; + pack_params.m_dithering = p.m_dithering; + pack_params.m_quality = etc_quality; + rg_etc1::pack_etc1_block(pElement, (uint32*)pPixels, pack_params); + } + } + } + else if (m_format == cETC1S) + { + crn_etc1_pack_params pack_params; + pack_params.m_perceptual = p.m_perceptual; + pack_params.m_dithering = p.m_dithering; + pack_params.m_quality = p.m_quality <= cCRNDXTQualityFast ? cCRNETCQualityFast : p.m_quality <= cCRNDXTQualityNormal ? cCRNETCQualityMedium + : cCRNETCQualitySlow; + pack_etc1s_block(*(etc1_block*)pElement, pPixels, pack_params); + } + else if (m_format == cETC2AS) + { + for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) + { + if (m_element_type[element_index] == cAlphaETC2) + { + rg_etc1::etc2a_pack_params pack_params; + pack_params.m_quality = p.m_quality <= cCRNDXTQualityFast ? rg_etc1::cLowQuality : p.m_quality <= cCRNDXTQualityNormal ? rg_etc1::cMediumQuality + : rg_etc1::cHighQuality; + pack_params.comp_index = m_element_component_index[element_index]; + rg_etc1::pack_etc2_alpha(pElement, (uint32*)pPixels, pack_params); + } + else + { + crn_etc1_pack_params pack_params; + pack_params.m_perceptual = p.m_perceptual; + pack_params.m_dithering = p.m_dithering; + pack_params.m_quality = p.m_quality <= cCRNDXTQualityFast ? cCRNETCQualityFast : p.m_quality <= cCRNDXTQualityNormal ? cCRNETCQualityMedium + : cCRNETCQualitySlow; + pack_etc1s_block(*(etc1_block*)pElement, pPixels, pack_params); + } + } + } else - pixels[i].a = 255; - } +#if CRNLIB_SUPPORT_SQUISH + if ((p.m_compressor == cCRNDXTCompressorSquish) && ((m_format == cDXT1) || (m_format == cDXT1A) || (m_format == cDXT3) || (m_format == cDXT5) || (m_format == cDXT5A))) + { + uint squish_flags = 0; + if ((m_format == cDXT1) || (m_format == cDXT1A)) + squish_flags = squish::kDxt1; + else if (m_format == cDXT3) + squish_flags = squish::kDxt3; + else if (m_format == cDXT5A) + squish_flags = squish::kDxt5A; + else + squish_flags = squish::kDxt5; + + if (p.m_perceptual) + squish_flags |= squish::kColourMetricPerceptual; + else + squish_flags |= squish::kColourMetricUniform; + + if (p.m_quality >= cCRNDXTQualityBetter) + squish_flags |= squish::kColourIterativeClusterFit; + else if (p.m_quality == cCRNDXTQualitySuperFast) + squish_flags |= squish::kColourRangeFit; + + color_quad_u8 pixels[cDXTBlockSize * cDXTBlockSize]; + + memcpy(pixels, pPixels, sizeof(color_quad_u8) * cDXTBlockSize * cDXTBlockSize); + + if (m_format == cDXT1) + { + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + pixels[i].a = 255; + } + else if (m_format == cDXT1A) + { + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + if (pixels[i].a < p.m_dxt1a_alpha_threshold) + pixels[i].a = 0; + else + pixels[i].a = 255; + } - squish::Compress(reinterpret_cast(pixels), pElement, squish_flags); - } + squish::Compress(reinterpret_cast(pixels), pElement, squish_flags); + } - else -#endif // CRNLIB_SUPPORT_SQUISH - // RYG doesn't support DXT1A - if ((p.m_compressor == cCRNDXTCompressorRYG) && ((m_format == cDXT1) || (m_format == cDXT5) || (m_format == cDXT5A))) { - color_quad_u8 pixels[cDXTBlockSize * cDXTBlockSize]; + else +#endif // CRNLIB_SUPPORT_SQUISH \ + // RYG doesn't support DXT1A + if ((p.m_compressor == cCRNDXTCompressorRYG) && ((m_format == cDXT1) || (m_format == cDXT5) || (m_format == cDXT5A))) + { + color_quad_u8 pixels[cDXTBlockSize * cDXTBlockSize]; - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { - pixels[i].r = pPixels[i].b; - pixels[i].g = pPixels[i].g; - pixels[i].b = pPixels[i].r; + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + pixels[i].r = pPixels[i].b; + pixels[i].g = pPixels[i].g; + pixels[i].b = pPixels[i].r; + + if (m_format != cDXT1) + { + pixels[i].a = pPixels[i].a; + } + else + { + pixels[i].a = 255; + } + } - if (m_format == cDXT1) - pixels[i].a = 255; - else - pixels[i].a = pPixels[i].a; + if (m_format == cDXT5A) + { + ryg_dxt::sCompressDXT5ABlock((sU8*)pElement, (const sU32*)pixels); + } + else + { + ryg_dxt::sCompressDXTBlock((sU8*)pElement, (const sU32*)pixels, m_format == cDXT5, 0); + } + } + else if ((p.m_compressor == cCRNDXTCompressorCRNF) && (m_format != cDXT1A)) + { + for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) + { + switch (m_element_type[element_index]) + { + case cColorDXT1: + { + dxt1_block* pDXT1_block = reinterpret_cast(pElement); + dxt_fast::compress_color_block(pDXT1_block, pPixels, p.m_quality >= cCRNDXTQualityNormal); + + break; + } + case cAlphaDXT5: + { + dxt5_block* pDXT5_block = reinterpret_cast(pElement); + dxt_fast::compress_alpha_block(pDXT5_block, pPixels, m_element_component_index[element_index]); + + break; + } + case cAlphaDXT3: + { + const int comp_index = m_element_component_index[element_index]; + + dxt3_block* pDXT3_block = reinterpret_cast(pElement); + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + pDXT3_block->set_alpha(i & 3, i >> 2, pPixels[i][comp_index], true); + } + + break; + } + default: + break; + } + } + } + else + { + dxt1_endpoint_optimizer& dxt1_optimizer = context.m_dxt1_optimizer; + dxt5_endpoint_optimizer& dxt5_optimizer = context.m_dxt5_optimizer; + + for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) + { + switch (m_element_type[element_index]) + { + case cColorDXT1: + { + dxt1_block* pDXT1_block = reinterpret_cast(pElement); + + bool pixels_have_alpha = false; + if (m_format == cDXT1A) + { + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + if (pPixels[i].a < p.m_dxt1a_alpha_threshold) + { + pixels_have_alpha = true; + break; + } + } + } + + dxt1_endpoint_optimizer::results results; + uint8 selectors[cDXTBlockSize * cDXTBlockSize]; + results.m_pSelectors = selectors; + + dxt1_endpoint_optimizer::params params; + params.m_block_index = block_x + block_y * m_blocks_x; + params.m_quality = p.m_quality; + params.m_perceptual = p.m_perceptual; + params.m_grayscale_sampling = p.m_grayscale_sampling; + params.m_pixels_have_alpha = pixels_have_alpha; + params.m_use_alpha_blocks = p.m_use_both_block_types; + params.m_use_transparent_indices_for_black = p.m_use_transparent_indices_for_black; + params.m_dxt1a_alpha_threshold = p.m_dxt1a_alpha_threshold; + params.m_pPixels = pPixels; + params.m_num_pixels = cDXTBlockSize * cDXTBlockSize; + params.m_endpoint_caching = p.m_endpoint_caching; + + if ((m_format != cDXT1) && (m_format != cDXT1A)) + { + params.m_use_alpha_blocks = false; + } + + if (!dxt1_optimizer.compute(params, results)) + { + CRNLIB_ASSERT(0); + break; + } + + pDXT1_block->set_low_color(results.m_low_color); + pDXT1_block->set_high_color(results.m_high_color); + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + pDXT1_block->set_selector(i & 3, i >> 2, selectors[i]); + } + + break; + } + case cAlphaDXT5: + { + dxt5_block* pDXT5_block = reinterpret_cast(pElement); + + dxt5_endpoint_optimizer::results results; + + uint8 selectors[cDXTBlockSize * cDXTBlockSize]; + results.m_pSelectors = selectors; + + dxt5_endpoint_optimizer::params params; + params.m_block_index = block_x + block_y * m_blocks_x; + params.m_pPixels = pPixels; + params.m_num_pixels = cDXTBlockSize * cDXTBlockSize; + params.m_comp_index = m_element_component_index[element_index]; + params.m_quality = p.m_quality; + params.m_use_both_block_types = p.m_use_both_block_types; + + if (!dxt5_optimizer.compute(params, results)) + { + CRNLIB_ASSERT(0); + break; + } + + pDXT5_block->set_low_alpha(results.m_first_endpoint); + pDXT5_block->set_high_alpha(results.m_second_endpoint); + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + pDXT5_block->set_selector(i & 3, i >> 2, selectors[i]); + } + + break; + } + case cAlphaDXT3: + { + const int comp_index = m_element_component_index[element_index]; + + dxt3_block* pDXT3_block = reinterpret_cast(pElement); + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + pDXT3_block->set_alpha(i & 3, i >> 2, pPixels[i][comp_index], true); + } + + break; + } + default: + break; + } + } + } } - if (m_format == cDXT5A) - ryg_dxt::sCompressDXT5ABlock((sU8*)pElement, (const sU32*)pixels); - else - ryg_dxt::sCompressDXTBlock((sU8*)pElement, (const sU32*)pixels, m_format == cDXT5, 0); - } else if ((p.m_compressor == cCRNDXTCompressorCRNF) && (m_format != cDXT1A)) { - for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) { - switch (m_element_type[element_index]) { - case cColorDXT1: { - dxt1_block* pDXT1_block = reinterpret_cast(pElement); - dxt_fast::compress_color_block(pDXT1_block, pPixels, p.m_quality >= cCRNDXTQualityNormal); + void dxt_image::get_block_endpoints(uint block_x, uint block_y, uint element_index, uint& packed_low_endpoint, uint& packed_high_endpoint) const + { + const element& block = get_element(block_x, block_y, element_index); + + switch (m_element_type[element_index]) + { + case cColorETC1: + { + const etc1_block& src_block = *reinterpret_cast(&block); + if (src_block.get_diff_bit()) + { + packed_low_endpoint = src_block.get_base5_color(); + packed_high_endpoint = src_block.get_delta3_color(); + } + else + { + packed_low_endpoint = src_block.get_base4_color(0); + packed_high_endpoint = src_block.get_base4_color(1); + } - break; + break; } - case cAlphaDXT5: { - dxt5_block* pDXT5_block = reinterpret_cast(pElement); - dxt_fast::compress_alpha_block(pDXT5_block, pPixels, m_element_component_index[element_index]); + case cColorDXT1: + { + const dxt1_block& block1 = *reinterpret_cast(&block); - break; + packed_low_endpoint = block1.get_low_color(); + packed_high_endpoint = block1.get_high_color(); + + break; } - case cAlphaDXT3: { - const int comp_index = m_element_component_index[element_index]; + case cAlphaDXT5: + { + const dxt5_block& block5 = *reinterpret_cast(&block); - dxt3_block* pDXT3_block = reinterpret_cast(pElement); + packed_low_endpoint = block5.get_low_alpha(); + packed_high_endpoint = block5.get_high_alpha(); - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) - pDXT3_block->set_alpha(i & 3, i >> 2, pPixels[i][comp_index], true); + break; + } + case cAlphaDXT3: + { + packed_low_endpoint = 0; + packed_high_endpoint = 255; - break; + break; } default: - break; - } + break; + } } - } else { - dxt1_endpoint_optimizer& dxt1_optimizer = context.m_dxt1_optimizer; - dxt5_endpoint_optimizer& dxt5_optimizer = context.m_dxt5_optimizer; - for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) { - switch (m_element_type[element_index]) { - case cColorDXT1: { - dxt1_block* pDXT1_block = reinterpret_cast(pElement); + int dxt_image::get_block_endpoints(uint block_x, uint block_y, uint element_index, color_quad_u8& low_endpoint, color_quad_u8& high_endpoint, bool scaled) const + { + uint l = 0, h = 0; + get_block_endpoints(block_x, block_y, element_index, l, h); + + switch (m_element_type[element_index]) + { + case cColorETC1: + { + const etc1_block& src_block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); + if (src_block.get_diff_bit()) + { + low_endpoint = etc1_block::unpack_color5(static_cast(l), scaled); + etc1_block::unpack_color5(high_endpoint, static_cast(l), static_cast(h), scaled); + } + else + { + low_endpoint = etc1_block::unpack_color4(static_cast(l), scaled); + high_endpoint = etc1_block::unpack_color4(static_cast(h), scaled); + } - bool pixels_have_alpha = false; - if (m_format == cDXT1A) { - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) - if (pPixels[i].a < p.m_dxt1a_alpha_threshold) { - pixels_have_alpha = true; - break; - } - } - - dxt1_endpoint_optimizer::results results; - uint8 selectors[cDXTBlockSize * cDXTBlockSize]; - results.m_pSelectors = selectors; - - dxt1_endpoint_optimizer::params params; - params.m_block_index = block_x + block_y * m_blocks_x; - params.m_quality = p.m_quality; - params.m_perceptual = p.m_perceptual; - params.m_grayscale_sampling = p.m_grayscale_sampling; - params.m_pixels_have_alpha = pixels_have_alpha; - params.m_use_alpha_blocks = p.m_use_both_block_types; - params.m_use_transparent_indices_for_black = p.m_use_transparent_indices_for_black; - params.m_dxt1a_alpha_threshold = p.m_dxt1a_alpha_threshold; - params.m_pPixels = pPixels; - params.m_num_pixels = cDXTBlockSize * cDXTBlockSize; - params.m_endpoint_caching = p.m_endpoint_caching; - - if ((m_format != cDXT1) && (m_format != cDXT1A)) - params.m_use_alpha_blocks = false; - - if (!dxt1_optimizer.compute(params, results)) { - CRNLIB_ASSERT(0); - break; - } + return -1; + } + case cColorDXT1: + { + uint r, g, b; - pDXT1_block->set_low_color(results.m_low_color); - pDXT1_block->set_high_color(results.m_high_color); + dxt1_block::unpack_color(r, g, b, static_cast(l), scaled); + low_endpoint.r = static_cast(r); + low_endpoint.g = static_cast(g); + low_endpoint.b = static_cast(b); - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) - pDXT1_block->set_selector(i & 3, i >> 2, selectors[i]); + dxt1_block::unpack_color(r, g, b, static_cast(h), scaled); + high_endpoint.r = static_cast(r); + high_endpoint.g = static_cast(g); + high_endpoint.b = static_cast(b); - break; + return -1; } - case cAlphaDXT5: { - dxt5_block* pDXT5_block = reinterpret_cast(pElement); + case cAlphaDXT5: + { + const int component = m_element_component_index[element_index]; - dxt5_endpoint_optimizer::results results; + low_endpoint[component] = static_cast(l); + high_endpoint[component] = static_cast(h); - uint8 selectors[cDXTBlockSize * cDXTBlockSize]; - results.m_pSelectors = selectors; + return component; + } + case cAlphaDXT3: + { + const int component = m_element_component_index[element_index]; - dxt5_endpoint_optimizer::params params; - params.m_block_index = block_x + block_y * m_blocks_x; - params.m_pPixels = pPixels; - params.m_num_pixels = cDXTBlockSize * cDXTBlockSize; - params.m_comp_index = m_element_component_index[element_index]; - params.m_quality = p.m_quality; - params.m_use_both_block_types = p.m_use_both_block_types; + low_endpoint[component] = static_cast(l); + high_endpoint[component] = static_cast(h); - if (!dxt5_optimizer.compute(params, results)) { - CRNLIB_ASSERT(0); + return component; + } + default: break; - } + } - pDXT5_block->set_low_alpha(results.m_first_endpoint); - pDXT5_block->set_high_alpha(results.m_second_endpoint); + return 0; + } - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) - pDXT5_block->set_selector(i & 3, i >> 2, selectors[i]); + uint dxt_image::get_block_colors(uint block_x, uint block_y, uint element_index, color_quad_u8* pColors, uint subblock_index) + { + const element& block = get_element(block_x, block_y, element_index); + + switch (m_element_type[element_index]) + { + case cColorETC1: + { + const etc1_block& src_block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); + const uint table_index0 = src_block.get_inten_table(0); + const uint table_index1 = src_block.get_inten_table(1); + if (src_block.get_diff_bit()) + { + const uint16 base_color5 = src_block.get_base5_color(); + const uint16 delta_color3 = src_block.get_delta3_color(); + if (subblock_index) + { + etc1_block::get_diff_subblock_colors(pColors, base_color5, delta_color3, table_index1); + } + else + { + etc1_block::get_diff_subblock_colors(pColors, base_color5, table_index0); + } + } + else + { + if (subblock_index) + { + const uint16 base_color4_1 = src_block.get_base4_color(1); + etc1_block::get_abs_subblock_colors(pColors, base_color4_1, table_index1); + } + else + { + const uint16 base_color4_0 = src_block.get_base4_color(0); + etc1_block::get_abs_subblock_colors(pColors, base_color4_0, table_index0); + } + } - break; + break; } - case cAlphaDXT3: { - const int comp_index = m_element_component_index[element_index]; + case cColorDXT1: + { + const dxt1_block& block1 = *reinterpret_cast(&block); + return dxt1_block::get_block_colors(pColors, static_cast(block1.get_low_color()), static_cast(block1.get_high_color())); + } + case cAlphaDXT5: + { + const dxt5_block& block5 = *reinterpret_cast(&block); + + uint values[cDXT5SelectorValues]; - dxt3_block* pDXT3_block = reinterpret_cast(pElement); + const uint n = dxt5_block::get_block_values(values, block5.get_low_alpha(), block5.get_high_alpha()); - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) - pDXT3_block->set_alpha(i & 3, i >> 2, pPixels[i][comp_index], true); + const int comp_index = m_element_component_index[element_index]; + for (uint i = 0; i < n; i++) + { + pColors[i][comp_index] = static_cast(values[i]); + } - break; + return n; + } + case cAlphaDXT3: + { + const int comp_index = m_element_component_index[element_index]; + for (uint i = 0; i < 16; i++) + { + pColors[i][comp_index] = static_cast((i << 4) | i); + } + + return 16; } default: - break; - } - } - } -} - -void dxt_image::get_block_endpoints(uint block_x, uint block_y, uint element_index, uint& packed_low_endpoint, uint& packed_high_endpoint) const { - const element& block = get_element(block_x, block_y, element_index); - - switch (m_element_type[element_index]) { - case cColorETC1: { - const etc1_block& src_block = *reinterpret_cast(&block); - if (src_block.get_diff_bit()) { - packed_low_endpoint = src_block.get_base5_color(); - packed_high_endpoint = src_block.get_delta3_color(); - } else { - packed_low_endpoint = src_block.get_base4_color(0); - packed_high_endpoint = src_block.get_base4_color(1); - } - - break; + break; + } + + return 0; } - case cColorDXT1: { - const dxt1_block& block1 = *reinterpret_cast(&block); - packed_low_endpoint = block1.get_low_color(); - packed_high_endpoint = block1.get_high_color(); + uint dxt_image::get_subblock_index(uint x, uint y, uint element_index) const + { + if (m_element_type[element_index] != cColorETC1) + { + return 0; + } - break; - } - case cAlphaDXT5: { - const dxt5_block& block5 = *reinterpret_cast(&block); + const uint block_x = x >> cDXTBlockShift; + const uint block_y = y >> cDXTBlockShift; - packed_low_endpoint = block5.get_low_alpha(); - packed_high_endpoint = block5.get_high_alpha(); + const element& block = get_element(block_x, block_y, element_index); - break; + const etc1_block& src_block = *reinterpret_cast(&block); + if (src_block.get_flip_bit()) + { + return ((y & 3) >= 2) ? 1 : 0; + } + else + { + return ((x & 3) >= 2) ? 1 : 0; + } } - case cAlphaDXT3: { - packed_low_endpoint = 0; - packed_high_endpoint = 255; - break; + uint dxt_image::get_total_subblocks(uint element_index) const + { + return (m_element_type[element_index] == cColorETC1) ? 2 : 0; } - default: - break; - } -} - -int dxt_image::get_block_endpoints(uint block_x, uint block_y, uint element_index, color_quad_u8& low_endpoint, color_quad_u8& high_endpoint, bool scaled) const { - uint l = 0, h = 0; - get_block_endpoints(block_x, block_y, element_index, l, h); - - switch (m_element_type[element_index]) { - case cColorETC1: { - const etc1_block& src_block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); - if (src_block.get_diff_bit()) { - low_endpoint = etc1_block::unpack_color5(static_cast(l), scaled); - etc1_block::unpack_color5(high_endpoint, static_cast(l), static_cast(h), scaled); - } else { - low_endpoint = etc1_block::unpack_color4(static_cast(l), scaled); - high_endpoint = etc1_block::unpack_color4(static_cast(h), scaled); - } - - return -1; - } - case cColorDXT1: { - uint r, g, b; - dxt1_block::unpack_color(r, g, b, static_cast(l), scaled); - low_endpoint.r = static_cast(r); - low_endpoint.g = static_cast(g); - low_endpoint.b = static_cast(b); + uint dxt_image::get_selector(uint x, uint y, uint element_index) const + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); - dxt1_block::unpack_color(r, g, b, static_cast(h), scaled); - high_endpoint.r = static_cast(r); - high_endpoint.g = static_cast(g); - high_endpoint.b = static_cast(b); + const uint block_x = x >> cDXTBlockShift; + const uint block_y = y >> cDXTBlockShift; - return -1; - } - case cAlphaDXT5: { - const int component = m_element_component_index[element_index]; + const element& block = get_element(block_x, block_y, element_index); - low_endpoint[component] = static_cast(l); - high_endpoint[component] = static_cast(h); + switch (m_element_type[element_index]) + { + case cColorETC1: + { + const etc1_block& src_block = *reinterpret_cast(&block); + return src_block.get_selector(x & 3, y & 3); + } + case cColorDXT1: + { + const dxt1_block& block1 = *reinterpret_cast(&block); + return block1.get_selector(x & 3, y & 3); + } + case cAlphaDXT5: + { + const dxt5_block& block5 = *reinterpret_cast(&block); + return block5.get_selector(x & 3, y & 3); + } + case cAlphaDXT3: + { + const dxt3_block& block3 = *reinterpret_cast(&block); + return block3.get_alpha(x & 3, y & 3, false); + } + default: + break; + } - return component; + return 0; } - case cAlphaDXT3: { - const int component = m_element_component_index[element_index]; - - low_endpoint[component] = static_cast(l); - high_endpoint[component] = static_cast(h); - return component; - } - default: - break; - } - - return 0; -} - -uint dxt_image::get_block_colors(uint block_x, uint block_y, uint element_index, color_quad_u8* pColors, uint subblock_index) { - const element& block = get_element(block_x, block_y, element_index); - - switch (m_element_type[element_index]) { - case cColorETC1: { - const etc1_block& src_block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); - const uint table_index0 = src_block.get_inten_table(0); - const uint table_index1 = src_block.get_inten_table(1); - if (src_block.get_diff_bit()) { - const uint16 base_color5 = src_block.get_base5_color(); - const uint16 delta_color3 = src_block.get_delta3_color(); - if (subblock_index) - etc1_block::get_diff_subblock_colors(pColors, base_color5, delta_color3, table_index1); - else - etc1_block::get_diff_subblock_colors(pColors, base_color5, table_index0); - } else { - if (subblock_index) { - const uint16 base_color4_1 = src_block.get_base4_color(1); - etc1_block::get_abs_subblock_colors(pColors, base_color4_1, table_index1); - } else { - const uint16 base_color4_0 = src_block.get_base4_color(0); - etc1_block::get_abs_subblock_colors(pColors, base_color4_0, table_index0); - } - } - - break; - } - case cColorDXT1: { - const dxt1_block& block1 = *reinterpret_cast(&block); - return dxt1_block::get_block_colors(pColors, static_cast(block1.get_low_color()), static_cast(block1.get_high_color())); + void dxt_image::change_dxt1_to_dxt1a() + { + if (m_format == cDXT1) + { + m_format = cDXT1A; + } } - case cAlphaDXT5: { - const dxt5_block& block5 = *reinterpret_cast(&block); - uint values[cDXT5SelectorValues]; - - const uint n = dxt5_block::get_block_values(values, block5.get_low_alpha(), block5.get_high_alpha()); - - const int comp_index = m_element_component_index[element_index]; - for (uint i = 0; i < n; i++) - pColors[i][comp_index] = static_cast(values[i]); - - return n; + void dxt_image::flip_col(uint x) + { + const uint other_x = (m_blocks_x - 1) - x; + for (uint y = 0; y < m_blocks_y; y++) + { + for (uint e = 0; e < get_elements_per_block(); e++) + { + element tmp[2] = { get_element(x, y, e), get_element(other_x, y, e) }; + + for (uint i = 0; i < 2; i++) + { + switch (get_element_type(e)) + { + case cColorDXT1: + reinterpret_cast(&tmp[i])->flip_x(); + break; + case cAlphaDXT3: + reinterpret_cast(&tmp[i])->flip_x(); + break; + case cAlphaDXT5: + reinterpret_cast(&tmp[i])->flip_x(); + break; + default: + CRNLIB_ASSERT(0); + break; + } + } + + get_element(x, y, e) = tmp[1]; + get_element(other_x, y, e) = tmp[0]; + } + } } - case cAlphaDXT3: { - const int comp_index = m_element_component_index[element_index]; - for (uint i = 0; i < 16; i++) - pColors[i][comp_index] = static_cast((i << 4) | i); - return 16; + void dxt_image::flip_row(uint y) + { + const uint other_y = (m_blocks_y - 1) - y; + for (uint x = 0; x < m_blocks_x; x++) + { + for (uint e = 0; e < get_elements_per_block(); e++) + { + element tmp[2] = { get_element(x, y, e), get_element(x, other_y, e) }; + + for (uint i = 0; i < 2; i++) + { + switch (get_element_type(e)) + { + case cColorDXT1: + reinterpret_cast(&tmp[i])->flip_y(); + break; + case cAlphaDXT3: + reinterpret_cast(&tmp[i])->flip_y(); + break; + case cAlphaDXT5: + reinterpret_cast(&tmp[i])->flip_y(); + break; + default: + CRNLIB_ASSERT(0); + break; + } + } + + get_element(x, y, e) = tmp[1]; + get_element(x, other_y, e) = tmp[0]; + } + } } - default: - break; - } - return 0; -} - -uint dxt_image::get_subblock_index(uint x, uint y, uint element_index) const { - if (m_element_type[element_index] != cColorETC1) - return 0; + bool dxt_image::can_flip(uint axis_index) + { + if (m_format == cETC1 || m_format == cETC2 || m_format == cETC2A || m_format == cETC1S || m_format == cETC2AS) + { + // Can't reliably flip ETCn textures (because of asymmetry in the 555/333 differential coding of subblock colors). + return false; + } - const uint block_x = x >> cDXTBlockShift; - const uint block_y = y >> cDXTBlockShift; + uint d; + if (!axis_index) + { + d = m_width; + } + else + { + d = m_height; + } - const element& block = get_element(block_x, block_y, element_index); + if (d & 3) + { + if (d > 4) + { + return false; + } + } - const etc1_block& src_block = *reinterpret_cast(&block); - if (src_block.get_flip_bit()) { - return ((y & 3) >= 2) ? 1 : 0; - } else { - return ((x & 3) >= 2) ? 1 : 0; - } -} + return true; + } -uint dxt_image::get_total_subblocks(uint element_index) const { - return (m_element_type[element_index] == cColorETC1) ? 2 : 0; -} + bool dxt_image::flip_x() + { + if (m_format == cETC1 || m_format == cETC2 || m_format == cETC2A || m_format == cETC1S || m_format == cETC2AS) + { + // Can't reliably flip ETCn textures (because of asymmetry in the 555/333 differential coding of subblock colors). + return false; + } -uint dxt_image::get_selector(uint x, uint y, uint element_index) const { - CRNLIB_ASSERT((x < m_width) && (y < m_height)); + if ((m_width & 3) && (m_width > 4)) + { + return false; + } - const uint block_x = x >> cDXTBlockShift; - const uint block_y = y >> cDXTBlockShift; + if (m_width == 1) + { + return true; + } - const element& block = get_element(block_x, block_y, element_index); + const uint mid_x = m_blocks_x / 2; - switch (m_element_type[element_index]) { - case cColorETC1: { - const etc1_block& src_block = *reinterpret_cast(&block); - return src_block.get_selector(x & 3, y & 3); - } - case cColorDXT1: { - const dxt1_block& block1 = *reinterpret_cast(&block); - return block1.get_selector(x & 3, y & 3); - } - case cAlphaDXT5: { - const dxt5_block& block5 = *reinterpret_cast(&block); - return block5.get_selector(x & 3, y & 3); - } - case cAlphaDXT3: { - const dxt3_block& block3 = *reinterpret_cast(&block); - return block3.get_alpha(x & 3, y & 3, false); - } - default: - break; - } - - return 0; -} - -void dxt_image::change_dxt1_to_dxt1a() { - if (m_format == cDXT1) - m_format = cDXT1A; -} - -void dxt_image::flip_col(uint x) { - const uint other_x = (m_blocks_x - 1) - x; - for (uint y = 0; y < m_blocks_y; y++) { - for (uint e = 0; e < get_elements_per_block(); e++) { - element tmp[2] = {get_element(x, y, e), get_element(other_x, y, e)}; - - for (uint i = 0; i < 2; i++) { - switch (get_element_type(e)) { - case cColorDXT1: - reinterpret_cast(&tmp[i])->flip_x(); - break; - case cAlphaDXT3: - reinterpret_cast(&tmp[i])->flip_x(); - break; - case cAlphaDXT5: - reinterpret_cast(&tmp[i])->flip_x(); - break; - default: - CRNLIB_ASSERT(0); - break; + for (uint x = 0; x < mid_x; x++) + { + flip_col(x); } - } - get_element(x, y, e) = tmp[1]; - get_element(other_x, y, e) = tmp[0]; - } - } -} - -void dxt_image::flip_row(uint y) { - const uint other_y = (m_blocks_y - 1) - y; - for (uint x = 0; x < m_blocks_x; x++) { - for (uint e = 0; e < get_elements_per_block(); e++) { - element tmp[2] = {get_element(x, y, e), get_element(x, other_y, e)}; - - for (uint i = 0; i < 2; i++) { - switch (get_element_type(e)) { - case cColorDXT1: - reinterpret_cast(&tmp[i])->flip_y(); - break; - case cAlphaDXT3: - reinterpret_cast(&tmp[i])->flip_y(); - break; - case cAlphaDXT5: - reinterpret_cast(&tmp[i])->flip_y(); - break; - default: - CRNLIB_ASSERT(0); - break; + if (m_blocks_x & 1) + { + const uint w = math::minimum(m_width, 4U); + for (uint y = 0; y < m_blocks_y; y++) + { + for (uint e = 0; e < get_elements_per_block(); e++) + { + element tmp(get_element(mid_x, y, e)); + switch (get_element_type(e)) + { + case cColorDXT1: + reinterpret_cast(&tmp)->flip_x(w, 4); + break; + case cAlphaDXT3: + reinterpret_cast(&tmp)->flip_x(w, 4); + break; + case cAlphaDXT5: + reinterpret_cast(&tmp)->flip_x(w, 4); + break; + default: + CRNLIB_ASSERT(0); + break; + } + get_element(mid_x, y, e) = tmp; + } + } } - } - get_element(x, y, e) = tmp[1]; - get_element(x, other_y, e) = tmp[0]; + return true; } - } -} - -bool dxt_image::can_flip(uint axis_index) { - if (m_format == cETC1 || m_format == cETC2 || m_format == cETC2A || m_format == cETC1S || m_format == cETC2AS) { - // Can't reliably flip ETCn textures (because of asymmetry in the 555/333 differential coding of subblock colors). - return false; - } - - uint d; - if (axis_index) - d = m_height; - else - d = m_width; - - if (d & 3) { - if (d > 4) - return false; - } - - return true; -} - -bool dxt_image::flip_x() { - if (m_format == cETC1 || m_format == cETC2 || m_format == cETC2A || m_format == cETC1S || m_format == cETC2AS) { - // Can't reliably flip ETCn textures (because of asymmetry in the 555/333 differential coding of subblock colors). - return false; - } - - if ((m_width & 3) && (m_width > 4)) - return false; - - if (m_width == 1) - return true; - - const uint mid_x = m_blocks_x / 2; - - for (uint x = 0; x < mid_x; x++) - flip_col(x); - - if (m_blocks_x & 1) { - const uint w = math::minimum(m_width, 4U); - for (uint y = 0; y < m_blocks_y; y++) { - for (uint e = 0; e < get_elements_per_block(); e++) { - element tmp(get_element(mid_x, y, e)); - switch (get_element_type(e)) { - case cColorDXT1: - reinterpret_cast(&tmp)->flip_x(w, 4); - break; - case cAlphaDXT3: - reinterpret_cast(&tmp)->flip_x(w, 4); - break; - case cAlphaDXT5: - reinterpret_cast(&tmp)->flip_x(w, 4); - break; - default: - CRNLIB_ASSERT(0); - break; - } - get_element(mid_x, y, e) = tmp; - } - } - } - - return true; -} -bool dxt_image::flip_y() { - if (m_format == cETC1 || m_format == cETC2 || m_format == cETC2A || m_format == cETC1S || m_format == cETC2AS) { - // Can't reliably flip ETCn textures (because of asymmetry in the 555/333 differential coding of subblock colors). - return false; - } - - if ((m_height & 3) && (m_height > 4)) - return false; + bool dxt_image::flip_y() + { + if (m_format == cETC1 || m_format == cETC2 || m_format == cETC2A || m_format == cETC1S || m_format == cETC2AS) + { + // Can't reliably flip ETCn textures (because of asymmetry in the 555/333 differential coding of subblock colors). + return false; + } - if (m_height == 1) - return true; + if ((m_height & 3) && (m_height > 4)) + { + return false; + } - const uint mid_y = m_blocks_y / 2; + if (m_height == 1) + { + return true; + } - for (uint y = 0; y < mid_y; y++) - flip_row(y); + const uint mid_y = m_blocks_y / 2; - if (m_blocks_y & 1) { - const uint h = math::minimum(m_height, 4U); - for (uint x = 0; x < m_blocks_x; x++) { - for (uint e = 0; e < get_elements_per_block(); e++) { - element tmp(get_element(x, mid_y, e)); - switch (get_element_type(e)) { - case cColorDXT1: - reinterpret_cast(&tmp)->flip_y(4, h); - break; - case cAlphaDXT3: - reinterpret_cast(&tmp)->flip_y(4, h); - break; - case cAlphaDXT5: - reinterpret_cast(&tmp)->flip_y(4, h); - break; - default: - CRNLIB_ASSERT(0); - break; + for (uint y = 0; y < mid_y; y++) + { + flip_row(y); } - get_element(x, mid_y, e) = tmp; - } - } - } - return true; -} + if (m_blocks_y & 1) + { + const uint h = math::minimum(m_height, 4U); + for (uint x = 0; x < m_blocks_x; x++) + { + for (uint e = 0; e < get_elements_per_block(); e++) + { + element tmp(get_element(x, mid_y, e)); + switch (get_element_type(e)) + { + case cColorDXT1: + reinterpret_cast(&tmp)->flip_y(4, h); + break; + case cAlphaDXT3: + reinterpret_cast(&tmp)->flip_y(4, h); + break; + case cAlphaDXT5: + reinterpret_cast(&tmp)->flip_y(4, h); + break; + default: + CRNLIB_ASSERT(0); + break; + } + get_element(x, mid_y, e) = tmp; + } + } + } -} // namespace crnlib + return true; + } +} // namespace crnlib diff --git a/crnlib/crn_dxt_image.h b/crnlib/crn_dxt_image.h index 79c6cf5..62ae81e 100644 --- a/crnlib/crn_dxt_image.h +++ b/crnlib/crn_dxt_image.h @@ -1,6 +1,28 @@ -// File: crn_dxt_image.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once + #include "crn_dxt1.h" #include "crn_dxt5a.h" #include "crn_etc.h" @@ -12,246 +34,312 @@ #define CRNLIB_SUPPORT_ATI_COMPRESS 0 -namespace crnlib { -class task_pool; - -class CRN_EXPORT dxt_image { - public: - dxt_image(); - dxt_image(const dxt_image& other); - dxt_image& operator=(const dxt_image& rhs); - - void clear(); - - inline bool is_valid() const { return m_blocks_x > 0; } - - uint get_width() const { return m_width; } - uint get_height() const { return m_height; } - - uint get_blocks_x() const { return m_blocks_x; } - uint get_blocks_y() const { return m_blocks_y; } - uint get_total_blocks() const { return m_blocks_x * m_blocks_y; } - - uint get_elements_per_block() const { return m_num_elements_per_block; } - uint get_bytes_per_block() const { return m_bytes_per_block; } - - dxt_format get_format() const { return m_format; } - - bool has_color() const { return (m_format == cDXT1) || (m_format == cDXT1A) || (m_format == cDXT3) || (m_format == cDXT5) || (m_format == cETC1) || (m_format == cETC2) || (m_format == cETC2A) || (m_format == cETC1S) || (m_format == cETC2AS); } - - // Will be pretty slow if the image is DXT1, as this method scans for alpha blocks/selectors. - bool has_alpha() const; - - enum element_type { - cUnused = 0, - - cColorDXT1, // DXT1 color block - - cAlphaDXT3, // DXT3 alpha block (only) - cAlphaDXT5, // DXT5 alpha block (only) - - cColorETC1, // ETC1 color block - cColorETC2, // ETC2 color block - - cAlphaETC2, // ETC2 alpha block (only) - }; - - element_type get_element_type(uint element_index) const { - CRNLIB_ASSERT(element_index < m_num_elements_per_block); - return m_element_type[element_index]; - } - - //Returns -1 for RGB, or [0,3] - int8 get_element_component_index(uint element_index) const { - CRNLIB_ASSERT(element_index < m_num_elements_per_block); - return m_element_component_index[element_index]; - } - - struct element { - uint8 m_bytes[8]; - - uint get_le_word(uint index) const { - CRNLIB_ASSERT(index < 4); - return m_bytes[index * 2] | (m_bytes[index * 2 + 1] << 8); - } - uint get_be_word(uint index) const { - CRNLIB_ASSERT(index < 4); - return m_bytes[index * 2 + 1] | (m_bytes[index * 2] << 8); - } - - void set_le_word(uint index, uint val) { - CRNLIB_ASSERT((index < 4) && (val <= cUINT16_MAX)); - m_bytes[index * 2] = static_cast(val & 0xFF); - m_bytes[index * 2 + 1] = static_cast((val >> 8) & 0xFF); - } - void set_be_word(uint index, uint val) { - CRNLIB_ASSERT((index < 4) && (val <= cUINT16_MAX)); - m_bytes[index * 2 + 1] = static_cast(val & 0xFF); - m_bytes[index * 2] = static_cast((val >> 8) & 0xFF); - } - - void clear() { - memset(this, 0, sizeof(*this)); - } - }; - - typedef crnlib::vector element_vec; - - bool init(dxt_format fmt, uint width, uint height, bool clear_elements); - bool init(dxt_format fmt, uint width, uint height, uint num_elements, element* pElements, bool create_copy); - - struct pack_params { - pack_params() { - clear(); - } - - void clear() { - m_quality = cCRNDXTQualityUber; - m_perceptual = true; - m_dithering = false; - m_grayscale_sampling = false; - m_use_both_block_types = true; - m_endpoint_caching = true; - m_compressor = cCRNDXTCompressorCRN; - m_pProgress_callback = nullptr; - m_pProgress_callback_user_data_ptr = nullptr; - m_dxt1a_alpha_threshold = 128; - m_num_helper_threads = 0; - m_progress_start = 0; - m_progress_range = 100; - m_use_transparent_indices_for_black = false; - m_pTask_pool = nullptr; - } - - void init(const crn_comp_params& params) { - m_perceptual = (params.m_flags & cCRNCompFlagPerceptual) != 0; - m_num_helper_threads = params.m_num_helper_threads; - m_use_both_block_types = (params.m_flags & cCRNCompFlagUseBothBlockTypes) != 0; - m_use_transparent_indices_for_black = (params.m_flags & cCRNCompFlagUseTransparentIndicesForBlack) != 0; - m_dxt1a_alpha_threshold = params.m_dxt1a_alpha_threshold; - m_quality = params.m_dxt_quality; - m_endpoint_caching = (params.m_flags & cCRNCompFlagDisableEndpointCaching) == 0; - m_grayscale_sampling = (params.m_flags & cCRNCompFlagGrayscaleSampling) != 0; - m_compressor = params.m_dxt_compressor_type; - } - - uint m_dxt1a_alpha_threshold; - - uint m_num_helper_threads; - - crn_dxt_quality m_quality; - - crn_dxt_compressor_type m_compressor; - - bool m_perceptual; - bool m_dithering; - bool m_grayscale_sampling; - bool m_use_both_block_types; - bool m_endpoint_caching; - bool m_use_transparent_indices_for_black; - - typedef bool (*progress_callback_func)(uint percentage_complete, void* pUser_data_ptr); - progress_callback_func m_pProgress_callback; - void* m_pProgress_callback_user_data_ptr; - - uint m_progress_start; - uint m_progress_range; - - task_pool* m_pTask_pool; - }; - - bool init(dxt_format fmt, const image_u8& img, const pack_params& p = dxt_image::pack_params()); - - bool unpack(image_u8& img) const; - - void endian_swap(); - - uint get_total_elements() const { return m_elements.size(); } - - const element_vec& get_element_vec() const { return m_elements; } - element_vec& get_element_vec() { return m_elements; } - - const element& get_element(uint block_x, uint block_y, uint element_index) const; - element& get_element(uint block_x, uint block_y, uint element_index); - - const element* get_element_ptr() const { return m_pElements; } - element* get_element_ptr() { return m_pElements; } - - uint get_size_in_bytes() const { return m_elements.size() * sizeof(element); } - uint get_row_pitch_in_bytes() const { return m_blocks_x * m_bytes_per_block; } - - color_quad_u8 get_pixel(uint x, uint y) const; - uint get_pixel_alpha(uint x, uint y, uint element_index) const; - - void set_pixel(uint x, uint y, const color_quad_u8& c, bool perceptual = true); - - // get_block_pixels() only sets those components stored in the image! - bool get_block_pixels(uint block_x, uint block_y, color_quad_u8* pPixels) const; - - struct set_block_pixels_context { - dxt1_endpoint_optimizer m_dxt1_optimizer; - dxt5_endpoint_optimizer m_dxt5_optimizer; - pack_etc1_block_context m_etc1_optimizer; +namespace crnlib +{ + class task_pool; + + class CRN_EXPORT dxt_image + { + public: + dxt_image(); + dxt_image(const dxt_image& other); + dxt_image& operator=(const dxt_image& rhs); + + void clear(); + + inline bool is_valid() const + { + return m_blocks_x > 0; + } + + uint get_width() const + { + return m_width; + } + uint get_height() const + { + return m_height; + } + + uint get_blocks_x() const + { + return m_blocks_x; + } + uint get_blocks_y() const + { + return m_blocks_y; + } + uint get_total_blocks() const + { + return m_blocks_x * m_blocks_y; + } + + uint get_elements_per_block() const + { + return m_num_elements_per_block; + } + uint get_bytes_per_block() const + { + return m_bytes_per_block; + } + + dxt_format get_format() const + { + return m_format; + } + + bool has_color() const + { + return (m_format == cDXT1) || (m_format == cDXT1A) || (m_format == cDXT3) || (m_format == cDXT5) || (m_format == cETC1) || (m_format == cETC2) || (m_format == cETC2A) || (m_format == cETC1S) || (m_format == cETC2AS); + } + + // Will be pretty slow if the image is DXT1, as this method scans for alpha blocks/selectors. + bool has_alpha() const; + + enum element_type + { + cUnused = 0, + + cColorDXT1, // DXT1 color block + + cAlphaDXT3, // DXT3 alpha block (only) + cAlphaDXT5, // DXT5 alpha block (only) + + cColorETC1, // ETC1 color block + cColorETC2, // ETC2 color block + + cAlphaETC2, // ETC2 alpha block (only) + }; + + element_type get_element_type(uint element_index) const + { + CRNLIB_ASSERT(element_index < m_num_elements_per_block); + return m_element_type[element_index]; + } + + //Returns -1 for RGB, or [0,3] + int8 get_element_component_index(uint element_index) const + { + CRNLIB_ASSERT(element_index < m_num_elements_per_block); + return m_element_component_index[element_index]; + } + + struct element + { + uint8 m_bytes[8]; + + uint get_le_word(uint index) const + { + CRNLIB_ASSERT(index < 4); + return m_bytes[index * 2] | (m_bytes[index * 2 + 1] << 8); + } + uint get_be_word(uint index) const + { + CRNLIB_ASSERT(index < 4); + return m_bytes[index * 2 + 1] | (m_bytes[index * 2] << 8); + } + + void set_le_word(uint index, uint val) + { + CRNLIB_ASSERT((index < 4) && (val <= cUINT16_MAX)); + m_bytes[index * 2] = static_cast(val & 0xFF); + m_bytes[index * 2 + 1] = static_cast((val >> 8) & 0xFF); + } + void set_be_word(uint index, uint val) + { + CRNLIB_ASSERT((index < 4) && (val <= cUINT16_MAX)); + m_bytes[index * 2 + 1] = static_cast(val & 0xFF); + m_bytes[index * 2] = static_cast((val >> 8) & 0xFF); + } + + void clear() + { + memset(this, 0, sizeof(*this)); + } + }; + + typedef crnlib::vector element_vec; + + bool init(dxt_format fmt, uint width, uint height, bool clear_elements); + bool init(dxt_format fmt, uint width, uint height, uint num_elements, element* pElements, bool create_copy); + + struct pack_params + { + pack_params() + { + clear(); + } + + void clear() + { + m_quality = cCRNDXTQualityUber; + m_perceptual = true; + m_dithering = false; + m_grayscale_sampling = false; + m_use_both_block_types = true; + m_endpoint_caching = true; + m_compressor = cCRNDXTCompressorCRN; + m_pProgress_callback = nullptr; + m_pProgress_callback_user_data_ptr = nullptr; + m_dxt1a_alpha_threshold = 128; + m_num_helper_threads = 0; + m_progress_start = 0; + m_progress_range = 100; + m_use_transparent_indices_for_black = false; + m_pTask_pool = nullptr; + } + + void init(const crn_comp_params& params) + { + m_perceptual = (params.m_flags & cCRNCompFlagPerceptual) != 0; + m_num_helper_threads = params.m_num_helper_threads; + m_use_both_block_types = (params.m_flags & cCRNCompFlagUseBothBlockTypes) != 0; + m_use_transparent_indices_for_black = (params.m_flags & cCRNCompFlagUseTransparentIndicesForBlack) != 0; + m_dxt1a_alpha_threshold = params.m_dxt1a_alpha_threshold; + m_quality = params.m_dxt_quality; + m_endpoint_caching = (params.m_flags & cCRNCompFlagDisableEndpointCaching) == 0; + m_grayscale_sampling = (params.m_flags & cCRNCompFlagGrayscaleSampling) != 0; + m_compressor = params.m_dxt_compressor_type; + } + + uint m_dxt1a_alpha_threshold; + + uint m_num_helper_threads; + + crn_dxt_quality m_quality; + + crn_dxt_compressor_type m_compressor; + + bool m_perceptual; + bool m_dithering; + bool m_grayscale_sampling; + bool m_use_both_block_types; + bool m_endpoint_caching; + bool m_use_transparent_indices_for_black; + + typedef bool (*progress_callback_func)(uint percentage_complete, void* pUser_data_ptr); + progress_callback_func m_pProgress_callback; + void* m_pProgress_callback_user_data_ptr; + + uint m_progress_start; + uint m_progress_range; + + task_pool* m_pTask_pool; + }; + + bool init(dxt_format fmt, const image_u8& img, const pack_params& p = dxt_image::pack_params()); + + bool unpack(image_u8& img) const; + + void endian_swap(); + + uint get_total_elements() const + { + return m_elements.size(); + } + + const element_vec& get_element_vec() const + { + return m_elements; + } + element_vec& get_element_vec() + { + return m_elements; + } + + const element& get_element(uint block_x, uint block_y, uint element_index) const; + element& get_element(uint block_x, uint block_y, uint element_index); + + const element* get_element_ptr() const + { + return m_pElements; + } + element* get_element_ptr() + { + return m_pElements; + } + + uint get_size_in_bytes() const + { + return m_elements.size() * sizeof(element); + } + uint get_row_pitch_in_bytes() const + { + return m_blocks_x * m_bytes_per_block; + } + + color_quad_u8 get_pixel(uint x, uint y) const; + uint get_pixel_alpha(uint x, uint y, uint element_index) const; + + void set_pixel(uint x, uint y, const color_quad_u8& c, bool perceptual = true); + + // get_block_pixels() only sets those components stored in the image! + bool get_block_pixels(uint block_x, uint block_y, color_quad_u8* pPixels) const; + + struct set_block_pixels_context + { + dxt1_endpoint_optimizer m_dxt1_optimizer; + dxt5_endpoint_optimizer m_dxt5_optimizer; + pack_etc1_block_context m_etc1_optimizer; #if CRNLIB_SUPPORT_ETC_A1 - etc_a1::pack_etc1_block_context m_etc1_a1_optimizer; + etc_a1::pack_etc1_block_context m_etc1_a1_optimizer; #endif - }; + }; - void set_block_pixels(uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p, set_block_pixels_context& context); - void set_block_pixels(uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p); + void set_block_pixels(uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p, set_block_pixels_context& context); + void set_block_pixels(uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p); - void get_block_endpoints(uint block_x, uint block_y, uint element_index, uint& packed_low_endpoint, uint& packed_high_endpoint) const; + void get_block_endpoints(uint block_x, uint block_y, uint element_index, uint& packed_low_endpoint, uint& packed_high_endpoint) const; - // Returns a value representing the component(s) that where actually set, where -1 = RGB. - // This method does not always set every component! - int get_block_endpoints(uint block_x, uint block_y, uint element_index, color_quad_u8& low_endpoint, color_quad_u8& high_endpoint, bool scaled = true) const; + // Returns a value representing the component(s) that where actually set, where -1 = RGB. + // This method does not always set every component! + int get_block_endpoints(uint block_x, uint block_y, uint element_index, color_quad_u8& low_endpoint, color_quad_u8& high_endpoint, bool scaled = true) const; - // pColors should point to a 16 entry array, to handle DXT3. - // Returns the number of block colors: 3, 4, 6, 8, or 16. - uint get_block_colors(uint block_x, uint block_y, uint element_index, color_quad_u8* pColors, uint subblock_index = 0); + // pColors should point to a 16 entry array, to handle DXT3. + // Returns the number of block colors: 3, 4, 6, 8, or 16. + uint get_block_colors(uint block_x, uint block_y, uint element_index, color_quad_u8* pColors, uint subblock_index = 0); - uint get_subblock_index(uint x, uint y, uint element_index) const; - uint get_total_subblocks(uint element_index) const; + uint get_subblock_index(uint x, uint y, uint element_index) const; + uint get_total_subblocks(uint element_index) const; - uint get_selector(uint x, uint y, uint element_index) const; + uint get_selector(uint x, uint y, uint element_index) const; - void change_dxt1_to_dxt1a(); + void change_dxt1_to_dxt1a(); - bool can_flip(uint axis_index); + bool can_flip(uint axis_index); - // Returns true if the texture can actually be flipped. - bool flip_x(); - bool flip_y(); + // Returns true if the texture can actually be flipped. + bool flip_x(); + bool flip_y(); - private: - element_vec m_elements; - element* m_pElements; + private: + element_vec m_elements; + element* m_pElements; - uint m_width; - uint m_height; + uint m_width; + uint m_height; - uint m_blocks_x; - uint m_blocks_y; - uint m_total_blocks; - uint m_total_elements; + uint m_blocks_x; + uint m_blocks_y; + uint m_total_blocks; + uint m_total_elements; - uint m_num_elements_per_block; // 1 or 2 - uint m_bytes_per_block; // 8 or 16 + uint m_num_elements_per_block; // 1 or 2 + uint m_bytes_per_block; // 8 or 16 - int8 m_element_component_index[2]; - element_type m_element_type[2]; + int8 m_element_component_index[2]; + element_type m_element_type[2]; - dxt_format m_format; // DXT1, 1A, 3, 5, N/3DC, or 5A + dxt_format m_format; // DXT1, 1A, 3, 5, N/3DC, or 5A - bool init_internal(dxt_format fmt, uint width, uint height); - void init_task(uint64 data, void* pData_ptr); + bool init_internal(dxt_format fmt, uint width, uint height); + void init_task(uint64 data, void* pData_ptr); #if CRNLIB_SUPPORT_ATI_COMPRESS - bool init_ati_compress(dxt_format fmt, const image_u8& img, const pack_params& p); + bool init_ati_compress(dxt_format fmt, const image_u8& img, const pack_params& p); #endif - void flip_col(uint x); - void flip_row(uint y); -}; - -} // namespace crnlib + void flip_col(uint x); + void flip_row(uint y); + }; +} // namespace crnlib diff --git a/crnlib/crn_dynamic_stream.h b/crnlib/crn_dynamic_stream.h index c1e19f5..fe460ae 100644 --- a/crnlib/crn_dynamic_stream.h +++ b/crnlib/crn_dynamic_stream.h @@ -1,5 +1,25 @@ -// File: crn_dynamic_stream.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -11,19 +31,22 @@ namespace crnlib class CRN_EXPORT dynamic_stream : public data_stream { public: - dynamic_stream(uint initial_size, const char* pName = "dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable): data_stream(pName, attribs), + dynamic_stream(uint initial_size, const char* pName = "dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) : + data_stream(pName, attribs), m_ofs(0) { open(initial_size, pName, attribs); } - dynamic_stream(const void* pBuf, uint size, const char* pName = "dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) : data_stream(pName, attribs), + dynamic_stream(const void* pBuf, uint size, const char* pName = "dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) : + data_stream(pName, attribs), m_ofs(0) { open(pBuf, size, pName, attribs); } - dynamic_stream() : data_stream(), + dynamic_stream() : + data_stream(), m_ofs(0) { open(); @@ -232,4 +255,4 @@ namespace crnlib crnlib::vector m_buf; uint m_ofs; }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_dynamic_string.cpp b/crnlib/crn_dynamic_string.cpp index ec0e1dd..37b30ef 100644 --- a/crnlib/crn_dynamic_string.cpp +++ b/crnlib/crn_dynamic_string.cpp @@ -1,5 +1,25 @@ -// File: crn_dynamic_string.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "crn_strutils.h" @@ -8,7 +28,7 @@ namespace crnlib { dynamic_string g_empty_dynamic_string; - dynamic_string::dynamic_string(eVarArg, const char* p, ...): + dynamic_string::dynamic_string(eVarArg, const char* p, ...) : m_buf_size(0), m_len(0), m_pStr(nullptr) @@ -21,7 +41,7 @@ namespace crnlib va_end(args); } - dynamic_string::dynamic_string(const char* p): + dynamic_string::dynamic_string(const char* p) : m_buf_size(0), m_len(0), m_pStr(nullptr) @@ -30,7 +50,7 @@ namespace crnlib set(p); } - dynamic_string::dynamic_string(const char* p, uint len): + dynamic_string::dynamic_string(const char* p, uint len) : m_buf_size(0), m_len(0), m_pStr(nullptr) @@ -39,7 +59,7 @@ namespace crnlib set_from_buf(p, len); } - dynamic_string::dynamic_string(const dynamic_string& other): + dynamic_string::dynamic_string(const dynamic_string& other) : m_buf_size(0), m_len(0), m_pStr(nullptr) @@ -553,7 +573,8 @@ namespace crnlib return -1; } - dynamic_string& dynamic_string::trim() { + dynamic_string& dynamic_string::trim() + { int s, e; for (s = 0; s < (int)m_len; s++) { @@ -574,7 +595,8 @@ namespace crnlib return crop(s, e - s + 1); } - dynamic_string& dynamic_string::trim_crlf() { + dynamic_string& dynamic_string::trim_crlf() + { int s = 0, e; for (e = m_len - 1; e > s; e--) @@ -588,7 +610,8 @@ namespace crnlib return crop(s, e - s + 1); } - dynamic_string& dynamic_string::remap(int from_char, int to_char) { + dynamic_string& dynamic_string::remap(int from_char, int to_char) + { for (uint i = 0; i < m_len; i++) { if (m_pStr[i] == from_char) @@ -628,7 +651,9 @@ namespace crnlib if (buf_size_needed <= cUINT16_MAX) { if (buf_size_needed > m_buf_size) + { expand_buf(buf_size_needed, preserve_contents); + } } return m_buf_size >= buf_size_needed; @@ -749,4 +774,4 @@ namespace crnlib swap(tmp); } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_dynamic_string.h b/crnlib/crn_dynamic_string.h index 42fe1d1..02b0167 100644 --- a/crnlib/crn_dynamic_string.h +++ b/crnlib/crn_dynamic_string.h @@ -1,5 +1,25 @@ -// File: crn_dynamic_string.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -7,12 +27,15 @@ namespace crnlib { - enum { cMaxDynamicStringLen = cUINT16_MAX - 1 }; + enum + { + cMaxDynamicStringLen = cUINT16_MAX - 1 + }; class CRN_EXPORT dynamic_string { public: - inline dynamic_string(): + inline dynamic_string() : m_buf_size(0), m_len(0), m_pStr(nullptr) @@ -109,7 +132,8 @@ namespace crnlib { return compare(rhs) != 0; } - inline bool operator!=(const char* p) const { + inline bool operator!=(const char* p) const + { return compare(p) != 0; } @@ -235,7 +259,10 @@ namespace crnlib void translate_lf_to_crlf(); static inline char* create_raw_buffer(uint& buf_size_in_chars); - static inline void free_raw_buffer(char* p) { crnlib_delete_array(p); } + static inline void free_raw_buffer(char* p) + { + crnlib_delete_array(p); + } dynamic_string& set_from_raw_buf_and_assume_ownership(char* pBuf, uint buf_size_in_chars, uint len_in_chars); private: @@ -246,7 +273,9 @@ namespace crnlib #ifdef CRNLIB_BUILD_DEBUG void check() const; #else - inline void check() const {} + inline void check() const + { + } #endif bool expand_buf(uint new_buf_size, bool preserve_contents); @@ -284,4 +313,4 @@ namespace crnlib buf_size_in_chars = math::minimum(cUINT16_MAX, math::next_pow2(buf_size_in_chars)); return crnlib_new_array(buf_size_in_chars); } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_etc.cpp b/crnlib/crn_etc.cpp index 9db9635..347764e 100644 --- a/crnlib/crn_etc.cpp +++ b/crnlib/crn_etc.cpp @@ -1,1626 +1,1931 @@ -// File: crn_etc.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_etc.h" #include "crn_radix_sort.h" #include "crn_ryg_dxt.hpp" -namespace crnlib { -const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = +namespace crnlib +{ + const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = { + { -8, -2, 2, 8 }, + { -17, -5, 5, 17 }, + { -29, -9, 9, 29 }, + { -42, -13, 13, 42 }, + { -60, -18, 18, 60 }, + { -80, -24, 24, 80 }, + { -106, -33, 33, 106 }, + { -183, -47, 47, 183 } + }; + + const uint8 g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; + const uint8 g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; + + // [flip][subblock][pixel_index] + const etc1_coord2 g_etc1_pixel_coords[2][2][8] = { + { { { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } }, + { { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } } }, + { + { { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } }, + { { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } }, + } + }; + + // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte. + static uint16 g_etc1_inverse_lookup[2 * 8 * 4][256]; // [diff/inten_table/selector][desired_color] + + // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color. + // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8) + static const uint16 g_color8_to_etc_block_config_0_255[2][33] = { + { 0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E, + 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF }, + { 0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E, + 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF }, + }; + + // Really only [254][11]. + static const uint16 g_color8_to_etc_block_config_1_to_254[254][12] = { + { 0x021C, 0x0D0D, 0xFFFF }, + { 0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF }, + { 0x0113, 0x0217, 0xFFFF }, + { 0x0116, 0x031E, + 0x0B0E, 0x0405, 0xFFFF }, + { 0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF }, + { 0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF }, + { 0x0303, 0x0215, 0x0607, 0xFFFF }, + { 0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF }, + { 0x0100, 0x0024, 0x0306, + 0x0025, 0x041B, 0x0E0D, 0xFFFF }, + { 0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF }, + { 0x0213, 0x0317, 0xFFFF }, + { 0x0112, + 0x0505, 0xFFFF }, + { 0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF }, + { 0x0211, 0x0909, 0xFFFF }, + { 0x0110, 0x0315, 0x0707, + 0x0419, 0x180F, 0xFFFF }, + { 0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF }, + { 0x0032, 0x0202, 0x0033, 0x0125, 0x051B, + 0x0F0D, 0xFFFF }, + { 0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF }, + { 0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF }, + { 0x0605, + 0x0417, 0xFFFF }, + { 0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF }, + { 0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF }, + { 0x0519, 0x190F, 0xFFFF }, + { 0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF }, + { 0x0130, 0x0214, + 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF }, + { 0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF }, + { 0x031A, + 0x0D0B, 0x091F, 0xFFFF }, + { 0x0413, 0x0705, 0x0517, 0xFFFF }, + { 0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF }, + { 0x0126, 0x080C, 0x0B09, 0xFFFF }, + { 0x0411, 0x0619, 0x1A0F, 0xFFFF }, + { 0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B, + 0xFFFF }, + { 0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF }, + { 0x0132, 0x0302, 0x0229, 0x110D, + 0xFFFF }, + { 0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF }, + { 0x0220, 0x0513, 0x0617, 0xFFFF }, + { 0x0135, 0x0805, + 0x0327, 0xFFFF }, + { 0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF }, + { 0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F, + 0xFFFF }, + { 0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF }, + { 0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF }, + { 0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF }, + { 0x0300, 0x0224, 0x0506, 0x0521, + 0x0F0B, 0x0B1F, 0xFFFF }, + { 0x041A, 0x0613, 0x0717, 0xFFFF }, + { 0x0235, 0x0905, 0xFFFF }, + { 0x0312, 0x0134, 0x0523, + 0x0427, 0xFFFF }, + { 0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF }, + { 0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F, + 0xFFFF }, + { 0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF }, + { 0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B, + 0x130D, 0xFFFF }, + { 0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF }, + { 0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF }, + { 0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF }, + { 0x0623, 0x0527, 0xFFFF }, + { 0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F, + 0xFFFF }, + { 0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF }, + { 0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D, + 0xFFFF }, + { 0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF }, + { 0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529, + 0x140D, 0xFFFF }, + { 0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF }, + { 0x051A, 0x0813, 0x0B05, 0x0917, + 0xFFFF }, + { 0x0723, 0x0435, 0x0627, 0xFFFF }, + { 0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF }, + { 0x0326, 0x0A0C, 0x012E, + 0x0811, 0x0A19, 0x1E0F, 0xFFFF }, + { 0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF }, + { 0x0410, 0x0901, 0x0633, 0x0725, + 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF }, + { 0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF }, + { 0x0332, 0x0502, 0x0821, 0x0139, + 0x120B, 0x0E1F, 0xFFFF }, + { 0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF }, + { 0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF }, + { 0x0823, 0x032F, 0xFFFF }, + { 0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF }, + { 0x0422, 0x0604, 0x090A, + 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF }, + { 0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF }, + { 0x032A, 0x0825, 0x0437, + 0x0729, 0x0C1B, 0x160D, 0xFFFF }, + { 0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF }, + { 0x0500, + 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF }, + { 0x061A, 0x0635, 0x0D05, 0xFFFF }, + { 0x0923, 0x0827, 0xFFFF }, + { 0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF }, + { 0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19, + 0x072B, 0xFFFF }, + { 0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF }, + { 0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D, + 0xFFFF }, + { 0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF }, + { 0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF }, + { 0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF }, + { 0x0520, 0x0A23, 0x0927, 0xFFFF }, + { 0x0B11, 0x1209, 0x013B, 0x052F, + 0xFFFF }, + { 0x0616, 0x081E, 0x0D19, 0xFFFF }, + { 0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D, + 0x0F1D, 0xFFFF }, + { 0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF }, + { 0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF }, + { 0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF }, + { 0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05, + 0x0D17, 0xFFFF }, + { 0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF }, + { 0x1309, 0x023B, 0x062F, 0xFFFF }, + { 0x0612, 0x0434, + 0x013A, 0x0C11, 0x0E19, 0xFFFF }, + { 0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF }, + { 0x0D01, + 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF }, + { 0x0610, 0x0A29, 0x190D, 0xFFFF }, + { 0x0718, 0x042C, 0x0C21, + 0x0539, 0x160B, 0x121F, 0xFFFF }, + { 0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF }, + { 0x0528, 0x081C, 0x0935, 0x1005, 0x0B27, + 0xFFFF }, + { 0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF }, + { 0x0D11, 0x0F19, 0x1409, 0xFFFF }, + { 0x0716, 0x003C, 0x091E, + 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF }, + { 0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D, + 0xFFFF }, + { 0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF }, + { 0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF }, + { 0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF }, + { 0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF }, + { 0x081A, 0x0D23, 0x0C27, 0xFFFF }, + { 0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF }, + { 0x0712, 0x0534, 0x023A, 0x0F15, 0x1307, + 0x1019, 0x0B2B, 0x013D, 0xFFFF }, + { 0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF }, + { 0x0C33, + 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF }, + { 0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF }, + { 0x0818, 0x052C, 0x0F13, 0x180B, + 0x141F, 0xFFFF }, + { 0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF }, + { 0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF }, + { 0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF }, + { 0x1119, 0x023D, 0xFFFF }, + { 0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103, + 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF }, + { 0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B, + 0xFFFF }, + { 0x0F21, 0x0D29, 0x1C0D, 0xFFFF }, + { 0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF }, + { 0x0730, 0x0814, 0x0536, + 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF }, + { 0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF }, + { 0x091A, + 0x1709, 0x063B, 0x0A2F, 0xFFFF }, + { 0x1011, 0x1219, 0x033D, 0xFFFF }, + { 0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115, + 0x1507, 0x0D2B, 0xFFFF }, + { 0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF }, + { 0x0E29, 0x1D0D, 0xFFFF }, + { 0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF }, + { 0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF }, + { 0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF }, + { 0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF }, + { 0x0820, + 0x1111, 0x1319, 0x1809, 0xFFFF }, + { 0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF }, + { 0x0916, 0x023C, 0x0B1E, 0x1031, + 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF }, + { 0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF }, + { 0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF }, + { 0x072A, 0x1213, 0x1317, 0xFFFF }, + { 0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35, + 0x1505, 0xFFFF }, + { 0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF }, + { 0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F, + 0xFFFF }, + { 0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF }, + { 0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D, + 0x161D, 0xFFFF }, + { 0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF }, + { 0x1221, 0x0B39, 0x1029, + 0xFFFF }, + { 0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF }, + { 0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF }, + { 0x0832, + 0x0A02, 0x1223, 0x1127, 0xFFFF }, + { 0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF }, + { 0x0920, 0x1519, 0x063D, + 0xFFFF }, + { 0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF }, + { 0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133, + 0x1225, 0x0E37, 0x161B, 0xFFFF }, + { 0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF }, + { 0x0C39, 0x1D0B, 0x191F, 0xFFFF }, + { 0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF }, + { 0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF }, + { 0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF }, + { 0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF }, + { 0x1331, + 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF }, + { 0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D, + 0x181D, 0xFFFF }, + { 0x0926, 0x072E, 0x1229, 0xFFFF }, + { 0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF }, + { 0x0A10, 0x1513, + 0x1617, 0xFFFF }, + { 0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF }, + { 0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF }, + { 0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF }, + { 0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF }, + { 0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF }, + { 0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF }, + { 0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF }, + { 0x1613, 0x1717, 0xFFFF }, + { 0x092A, 0x1235, 0x1905, + 0xFFFF }, + { 0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF }, + { 0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09, + 0x0C3B, 0x102F, 0xFFFF }, + { 0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF }, + { 0x1531, 0x1701, 0x1803, 0x122D, + 0x1A1D, 0xFFFF }, + { 0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF }, + { 0x0A26, 0x003E, 0x082E, 0x1621, + 0x0F39, 0x1429, 0x003F, 0xFFFF }, + { 0x1713, 0x1C1F, 0xFFFF }, + { 0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF }, + { 0x0C18, + 0x092C, 0x1623, 0x1527, 0xFFFF }, + { 0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF }, + { 0x0A28, 0x0D1C, 0x1919, + 0x0A3D, 0xFFFF }, + { 0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF }, + { 0x1801, 0x1533, 0x1625, + 0x1237, 0x1A1B, 0xFFFF }, + { 0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF }, + { 0x0B22, 0x0D04, 0x1039, 0x1D1F, + 0xFFFF }, + { 0x1813, 0x1B05, 0x1917, 0xFFFF }, + { 0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF }, + { 0x0B30, 0x0C14, 0x0936, + 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF }, + { 0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF }, + { 0x0D1A, + 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF }, + { 0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF }, + { 0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF }, + { 0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF }, + { 0x1913, + 0x1A17, 0xFFFF }, + { 0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF }, + { 0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF }, + { 0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF }, + { 0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF }, + { 0x0C20, + 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF }, + { 0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF }, + { 0x0D16, 0x063C, + 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF }, + { 0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF }, + { 0x1635, 0x1D05, 0xFFFF }, + { 0x0B2A, 0x1923, 0x1827, 0xFFFF }, + { 0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF }, + { 0x0D00, 0x0C24, 0x0F06, + 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF }, + { 0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF }, + { 0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF }, + { 0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF }, + { 0x0C26, + 0x023E, 0x0A2E, 0x1B13, 0xFFFF }, + { 0x1735, 0x1E05, 0x1C17, 0xFFFF }, + { 0x0D10, 0x1A23, 0x1927, 0xFFFF }, + { 0x0E18, + 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF }, + { 0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF }, + { 0x0C28, 0x0F1C, 0x1A31, 0x1D03, + 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF }, + { 0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF }, + { 0x1B21, 0x1929, + 0x053F, 0xFFFF }, + { 0x0E16, 0x073C, 0x1439, 0xFFFF }, + { 0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF }, + { 0x1B23, + 0x1835, 0x1A27, 0xFFFF }, + { 0x0C2A, 0x123B, 0x162F, 0xFFFF }, + { 0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF }, + { 0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF }, + { 0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B, + 0x182D, 0xFFFF }, + { 0x1A29, 0x063F, 0xFFFF }, + { 0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF }, + { 0x0D26, 0x033E, + 0x0B2E, 0x1D13, 0x1E17, 0xFFFF }, + { 0x1935, 0x1B27, 0xFFFF }, + { 0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF }, + { 0x0F18, + 0x0C2C, 0x1D11, 0x1F19, 0xFFFF }, + { 0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF }, + { 0x0D28, 0x1C31, 0x1E01, + 0x1B33, 0x192D, 0xFFFF }, + { 0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF }, + { 0x1D21, 0x1639, 0xFFFF }, + { 0x0F16, + 0x083C, 0x1E13, 0x1F17, 0xFFFF }, + { 0x0E22, 0x1A35, 0xFFFF }, + { 0x1D23, 0x1C27, 0xFFFF }, + { 0x0D2A, 0x1E11, 0x143B, + 0x182F, 0xFFFF }, + { 0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF }, + { 0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01, + 0x1A2D, 0xFFFF }, + { 0x1C33, 0x1D25, 0x1937, 0xFFFF }, + { 0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF }, + { 0x0F12, 0x0D34, + 0x0A3A, 0x1F13, 0xFFFF }, + { 0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF }, + { 0x1E23, 0x1D27, 0xFFFF }, + { 0x0F10, 0x1F11, + 0x153B, 0x192F, 0xFFFF }, + { 0x0D2C, 0x123D, 0xFFFF }, + }; + + uint16 etc1_block::pack_color5(const color_quad_u8& color, bool scaled, uint bias) { - {-8, -2, 2, 8}, - {-17, -5, 5, 17}, - {-29, -9, 9, 29}, - {-42, -13, 13, 42}, - {-60, -18, 18, 60}, - {-80, -24, 24, 80}, - {-106, -33, 33, 106}, - {-183, -47, 47, 183}}; - -const uint8 g_etc1_to_selector_index[cETC1SelectorValues] = {2, 3, 1, 0}; -const uint8 g_selector_index_to_etc1[cETC1SelectorValues] = {3, 2, 0, 1}; - -// [flip][subblock][pixel_index] -const etc1_coord2 g_etc1_pixel_coords[2][2][8] = + return pack_color5(color.r, color.g, color.b, scaled, bias); + } + + uint16 etc1_block::pack_color5(uint r, uint g, uint b, bool scaled, uint bias) { - {{{0, 0}, {0, 1}, {0, 2}, {0, 3}, {1, 0}, {1, 1}, {1, 2}, {1, 3}}, - {{2, 0}, {2, 1}, {2, 2}, {2, 3}, {3, 0}, {3, 1}, {3, 2}, {3, 3}}}, + if (scaled) { - {{0, 0}, {1, 0}, {2, 0}, {3, 0}, {0, 1}, {1, 1}, {2, 1}, {3, 1}}, - {{0, 2}, {1, 2}, {2, 2}, {3, 2}, {0, 3}, {1, 3}, {2, 3}, {3, 3}}, - }}; + r = (r * 31U + bias) / 255U; + g = (g * 31U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } -// Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte. -static uint16 g_etc1_inverse_lookup[2 * 8 * 4][256]; // [diff/inten_table/selector][desired_color] + r = math::minimum(r, 31U); + g = math::minimum(g, 31U); + b = math::minimum(b, 31U); -// g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color. -// To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8) -static const uint16 g_color8_to_etc_block_config_0_255[2][33] = - { - {0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E, - 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF}, - {0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E, - 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF}, -}; - -// Really only [254][11]. -static const uint16 g_color8_to_etc_block_config_1_to_254[254][12] = + return static_cast(b | (g << 5U) | (r << 10U)); + } + + color_quad_u8 etc1_block::unpack_color5(uint16 packed_color5, bool scaled, uint alpha) { - {0x021C, 0x0D0D, 0xFFFF}, - {0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF}, - {0x0113, 0x0217, 0xFFFF}, - {0x0116, 0x031E, - 0x0B0E, 0x0405, 0xFFFF}, - {0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF}, - {0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF}, - {0x0303, 0x0215, 0x0607, 0xFFFF}, - {0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF}, - {0x0100, 0x0024, 0x0306, - 0x0025, 0x041B, 0x0E0D, 0xFFFF}, - {0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF}, - {0x0213, 0x0317, 0xFFFF}, - {0x0112, - 0x0505, 0xFFFF}, - {0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF}, - {0x0211, 0x0909, 0xFFFF}, - {0x0110, 0x0315, 0x0707, - 0x0419, 0x180F, 0xFFFF}, - {0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF}, - {0x0032, 0x0202, 0x0033, 0x0125, 0x051B, - 0x0F0D, 0xFFFF}, - {0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF}, - {0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF}, - {0x0605, - 0x0417, 0xFFFF}, - {0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF}, - {0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF}, - {0x0519, 0x190F, 0xFFFF}, - {0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF}, - {0x0130, 0x0214, - 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF}, - {0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF}, - {0x031A, - 0x0D0B, 0x091F, 0xFFFF}, - {0x0413, 0x0705, 0x0517, 0xFFFF}, - {0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF}, - {0x0126, 0x080C, 0x0B09, 0xFFFF}, - {0x0411, 0x0619, 0x1A0F, 0xFFFF}, - {0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B, - 0xFFFF}, - {0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF}, - {0x0132, 0x0302, 0x0229, 0x110D, - 0xFFFF}, - {0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF}, - {0x0220, 0x0513, 0x0617, 0xFFFF}, - {0x0135, 0x0805, - 0x0327, 0xFFFF}, - {0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF}, - {0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F, - 0xFFFF}, - {0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF}, - {0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF}, - {0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF}, - {0x0300, 0x0224, 0x0506, 0x0521, - 0x0F0B, 0x0B1F, 0xFFFF}, - {0x041A, 0x0613, 0x0717, 0xFFFF}, - {0x0235, 0x0905, 0xFFFF}, - {0x0312, 0x0134, 0x0523, - 0x0427, 0xFFFF}, - {0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF}, - {0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F, - 0xFFFF}, - {0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF}, - {0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B, - 0x130D, 0xFFFF}, - {0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF}, - {0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF}, - {0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF}, - {0x0623, 0x0527, 0xFFFF}, - {0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F, - 0xFFFF}, - {0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF}, - {0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D, - 0xFFFF}, - {0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF}, - {0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529, - 0x140D, 0xFFFF}, - {0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF}, - {0x051A, 0x0813, 0x0B05, 0x0917, - 0xFFFF}, - {0x0723, 0x0435, 0x0627, 0xFFFF}, - {0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF}, - {0x0326, 0x0A0C, 0x012E, - 0x0811, 0x0A19, 0x1E0F, 0xFFFF}, - {0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF}, - {0x0410, 0x0901, 0x0633, 0x0725, - 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF}, - {0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF}, - {0x0332, 0x0502, 0x0821, 0x0139, - 0x120B, 0x0E1F, 0xFFFF}, - {0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF}, - {0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF}, - {0x0823, 0x032F, 0xFFFF}, - {0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF}, - {0x0422, 0x0604, 0x090A, - 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF}, - {0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF}, - {0x032A, 0x0825, 0x0437, - 0x0729, 0x0C1B, 0x160D, 0xFFFF}, - {0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF}, - {0x0500, - 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF}, - {0x061A, 0x0635, 0x0D05, 0xFFFF}, - {0x0923, 0x0827, 0xFFFF}, - {0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF}, - {0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19, - 0x072B, 0xFFFF}, - {0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF}, - {0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D, - 0xFFFF}, - {0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF}, - {0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF}, - {0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF}, - {0x0520, 0x0A23, 0x0927, 0xFFFF}, - {0x0B11, 0x1209, 0x013B, 0x052F, - 0xFFFF}, - {0x0616, 0x081E, 0x0D19, 0xFFFF}, - {0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D, - 0x0F1D, 0xFFFF}, - {0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF}, - {0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF}, - {0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF}, - {0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05, - 0x0D17, 0xFFFF}, - {0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF}, - {0x1309, 0x023B, 0x062F, 0xFFFF}, - {0x0612, 0x0434, - 0x013A, 0x0C11, 0x0E19, 0xFFFF}, - {0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF}, - {0x0D01, - 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF}, - {0x0610, 0x0A29, 0x190D, 0xFFFF}, - {0x0718, 0x042C, 0x0C21, - 0x0539, 0x160B, 0x121F, 0xFFFF}, - {0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF}, - {0x0528, 0x081C, 0x0935, 0x1005, 0x0B27, - 0xFFFF}, - {0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF}, - {0x0D11, 0x0F19, 0x1409, 0xFFFF}, - {0x0716, 0x003C, 0x091E, - 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF}, - {0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D, - 0xFFFF}, - {0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF}, - {0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF}, - {0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF}, - {0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF}, - {0x081A, 0x0D23, 0x0C27, 0xFFFF}, - {0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF}, - {0x0712, 0x0534, 0x023A, 0x0F15, 0x1307, - 0x1019, 0x0B2B, 0x013D, 0xFFFF}, - {0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF}, - {0x0C33, - 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF}, - {0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF}, - {0x0818, 0x052C, 0x0F13, 0x180B, - 0x141F, 0xFFFF}, - {0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF}, - {0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF}, - {0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF}, - {0x1119, 0x023D, 0xFFFF}, - {0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103, - 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF}, - {0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B, - 0xFFFF}, - {0x0F21, 0x0D29, 0x1C0D, 0xFFFF}, - {0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF}, - {0x0730, 0x0814, 0x0536, - 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF}, - {0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF}, - {0x091A, - 0x1709, 0x063B, 0x0A2F, 0xFFFF}, - {0x1011, 0x1219, 0x033D, 0xFFFF}, - {0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115, - 0x1507, 0x0D2B, 0xFFFF}, - {0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF}, - {0x0E29, 0x1D0D, 0xFFFF}, - {0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF}, - {0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF}, - {0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF}, - {0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF}, - {0x0820, - 0x1111, 0x1319, 0x1809, 0xFFFF}, - {0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF}, - {0x0916, 0x023C, 0x0B1E, 0x1031, - 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF}, - {0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF}, - {0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF}, - {0x072A, 0x1213, 0x1317, 0xFFFF}, - {0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35, - 0x1505, 0xFFFF}, - {0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF}, - {0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F, - 0xFFFF}, - {0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF}, - {0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D, - 0x161D, 0xFFFF}, - {0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF}, - {0x1221, 0x0B39, 0x1029, - 0xFFFF}, - {0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF}, - {0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF}, - {0x0832, - 0x0A02, 0x1223, 0x1127, 0xFFFF}, - {0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF}, - {0x0920, 0x1519, 0x063D, - 0xFFFF}, - {0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF}, - {0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133, - 0x1225, 0x0E37, 0x161B, 0xFFFF}, - {0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF}, - {0x0C39, 0x1D0B, 0x191F, 0xFFFF}, - {0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF}, - {0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF}, - {0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF}, - {0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF}, - {0x1331, - 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF}, - {0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D, - 0x181D, 0xFFFF}, - {0x0926, 0x072E, 0x1229, 0xFFFF}, - {0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF}, - {0x0A10, 0x1513, - 0x1617, 0xFFFF}, - {0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF}, - {0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF}, - {0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF}, - {0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF}, - {0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF}, - {0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF}, - {0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF}, - {0x1613, 0x1717, 0xFFFF}, - {0x092A, 0x1235, 0x1905, - 0xFFFF}, - {0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF}, - {0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09, - 0x0C3B, 0x102F, 0xFFFF}, - {0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF}, - {0x1531, 0x1701, 0x1803, 0x122D, - 0x1A1D, 0xFFFF}, - {0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF}, - {0x0A26, 0x003E, 0x082E, 0x1621, - 0x0F39, 0x1429, 0x003F, 0xFFFF}, - {0x1713, 0x1C1F, 0xFFFF}, - {0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF}, - {0x0C18, - 0x092C, 0x1623, 0x1527, 0xFFFF}, - {0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF}, - {0x0A28, 0x0D1C, 0x1919, - 0x0A3D, 0xFFFF}, - {0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF}, - {0x1801, 0x1533, 0x1625, - 0x1237, 0x1A1B, 0xFFFF}, - {0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF}, - {0x0B22, 0x0D04, 0x1039, 0x1D1F, - 0xFFFF}, - {0x1813, 0x1B05, 0x1917, 0xFFFF}, - {0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF}, - {0x0B30, 0x0C14, 0x0936, - 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF}, - {0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF}, - {0x0D1A, - 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF}, - {0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF}, - {0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF}, - {0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF}, - {0x1913, - 0x1A17, 0xFFFF}, - {0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF}, - {0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF}, - {0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF}, - {0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF}, - {0x0C20, - 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF}, - {0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF}, - {0x0D16, 0x063C, - 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF}, - {0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF}, - {0x1635, 0x1D05, 0xFFFF}, - {0x0B2A, 0x1923, 0x1827, 0xFFFF}, - {0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF}, - {0x0D00, 0x0C24, 0x0F06, - 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF}, - {0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF}, - {0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF}, - {0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF}, - {0x0C26, - 0x023E, 0x0A2E, 0x1B13, 0xFFFF}, - {0x1735, 0x1E05, 0x1C17, 0xFFFF}, - {0x0D10, 0x1A23, 0x1927, 0xFFFF}, - {0x0E18, - 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF}, - {0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF}, - {0x0C28, 0x0F1C, 0x1A31, 0x1D03, - 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF}, - {0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF}, - {0x1B21, 0x1929, - 0x053F, 0xFFFF}, - {0x0E16, 0x073C, 0x1439, 0xFFFF}, - {0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF}, - {0x1B23, - 0x1835, 0x1A27, 0xFFFF}, - {0x0C2A, 0x123B, 0x162F, 0xFFFF}, - {0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF}, - {0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF}, - {0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B, - 0x182D, 0xFFFF}, - {0x1A29, 0x063F, 0xFFFF}, - {0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF}, - {0x0D26, 0x033E, - 0x0B2E, 0x1D13, 0x1E17, 0xFFFF}, - {0x1935, 0x1B27, 0xFFFF}, - {0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF}, - {0x0F18, - 0x0C2C, 0x1D11, 0x1F19, 0xFFFF}, - {0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF}, - {0x0D28, 0x1C31, 0x1E01, - 0x1B33, 0x192D, 0xFFFF}, - {0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF}, - {0x1D21, 0x1639, 0xFFFF}, - {0x0F16, - 0x083C, 0x1E13, 0x1F17, 0xFFFF}, - {0x0E22, 0x1A35, 0xFFFF}, - {0x1D23, 0x1C27, 0xFFFF}, - {0x0D2A, 0x1E11, 0x143B, - 0x182F, 0xFFFF}, - {0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF}, - {0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01, - 0x1A2D, 0xFFFF}, - {0x1C33, 0x1D25, 0x1937, 0xFFFF}, - {0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF}, - {0x0F12, 0x0D34, - 0x0A3A, 0x1F13, 0xFFFF}, - {0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF}, - {0x1E23, 0x1D27, 0xFFFF}, - {0x0F10, 0x1F11, - 0x153B, 0x192F, 0xFFFF}, - {0x0D2C, 0x123D, 0xFFFF}, -}; - -uint16 etc1_block::pack_color5(const color_quad_u8& color, bool scaled, uint bias) { - return pack_color5(color.r, color.g, color.b, scaled, bias); -} - -uint16 etc1_block::pack_color5(uint r, uint g, uint b, bool scaled, uint bias) { - if (scaled) { - r = (r * 31U + bias) / 255U; - g = (g * 31U + bias) / 255U; - b = (b * 31U + bias) / 255U; - } - - r = math::minimum(r, 31U); - g = math::minimum(g, 31U); - b = math::minimum(b, 31U); - - return static_cast(b | (g << 5U) | (r << 10U)); -} - -color_quad_u8 etc1_block::unpack_color5(uint16 packed_color5, bool scaled, uint alpha) { - uint b = packed_color5 & 31U; - uint g = (packed_color5 >> 5U) & 31U; - uint r = (packed_color5 >> 10U) & 31U; - - if (scaled) { - b = (b << 3U) | (b >> 2U); - g = (g << 3U) | (g >> 2U); - r = (r << 3U) | (r >> 2U); - } - - return color_quad_u8(cNoClamp, r, g, b, math::minimum(alpha, 255U)); -} - -void etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, bool scaled) { - color_quad_u8 c(unpack_color5(packed_color5, scaled, 0)); - r = c.r; - g = c.g; - b = c.b; -} - -bool etc1_block::unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) { - color_quad_i16 dc(unpack_delta3(packed_delta3)); - - int b = (packed_color5 & 31U) + dc.b; - int g = ((packed_color5 >> 5U) & 31U) + dc.g; - int r = ((packed_color5 >> 10U) & 31U) + dc.r; - - bool success = true; - if (static_cast(r | g | b) > 31U) { - success = false; - r = math::clamp(r, 0, 31); - g = math::clamp(g, 0, 31); - b = math::clamp(b, 0, 31); - } - - if (scaled) { - b = (b << 3U) | (b >> 2U); - g = (g << 3U) | (g >> 2U); - r = (r << 3U) | (r >> 2U); - } - - result.set_noclamp_rgba(r, g, b, math::minimum(alpha, 255U)); - return success; -} - -bool etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) { - color_quad_u8 result; - const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha); - r = result.r; - g = result.g; - b = result.b; - return success; -} - -uint16 etc1_block::pack_delta3(const color_quad_i16& color) { - return pack_delta3(color.r, color.g, color.b); -} - -uint16 etc1_block::pack_delta3(int r, int g, int b) { - CRNLIB_ASSERT((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); - CRNLIB_ASSERT((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); - CRNLIB_ASSERT((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); - if (r < 0) - r += 8; - if (g < 0) - g += 8; - if (b < 0) - b += 8; - return static_cast(b | (g << 3) | (r << 6)); -} - -color_quad_i16 etc1_block::unpack_delta3(uint16 packed_delta3) { - int r = (packed_delta3 >> 6) & 7; - int g = (packed_delta3 >> 3) & 7; - int b = packed_delta3 & 7; - if (r >= 4) - r -= 8; - if (g >= 4) - g -= 8; - if (b >= 4) - b -= 8; - return color_quad_i16(r, g, b, 0); -} - -void etc1_block::unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3) { - r = (packed_delta3 >> 6) & 7; - g = (packed_delta3 >> 3) & 7; - b = packed_delta3 & 7; - if (r >= 4) - r -= 8; - if (g >= 4) - g -= 8; - if (b >= 4) - b -= 8; -} - -uint16 etc1_block::pack_color4(const color_quad_u8& color, bool scaled, uint bias) { - return pack_color4(color.r, color.g, color.b, scaled, bias); -} - -uint16 etc1_block::pack_color4(uint r, uint g, uint b, bool scaled, uint bias) { - if (scaled) { - r = (r * 15U + bias) / 255U; - g = (g * 15U + bias) / 255U; - b = (b * 15U + bias) / 255U; - } - - r = math::minimum(r, 15U); - g = math::minimum(g, 15U); - b = math::minimum(b, 15U); - - return static_cast(b | (g << 4U) | (r << 8U)); -} - -color_quad_u8 etc1_block::unpack_color4(uint16 packed_color4, bool scaled, uint alpha) { - uint b = packed_color4 & 15U; - uint g = (packed_color4 >> 4U) & 15U; - uint r = (packed_color4 >> 8U) & 15U; - - if (scaled) { - b = (b << 4U) | b; - g = (g << 4U) | g; - r = (r << 4U) | r; - } - - return color_quad_u8(cNoClamp, r, g, b, math::minimum(alpha, 255U)); -} - -void etc1_block::unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled) { - color_quad_u8 c(unpack_color4(packed_color4, scaled, 0)); - r = c.r; - g = c.g; - b = c.b; -} - -void etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx) { - CRNLIB_ASSERT(table_idx < cETC1IntenModifierValues); - const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; - - uint r, g, b; - unpack_color5(r, g, b, packed_color5, true); - - const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); - - const int y0 = pInten_modifer_table[0]; - pDst[0].set(ir + y0, ig + y0, ib + y0); - - const int y1 = pInten_modifer_table[1]; - pDst[1].set(ir + y1, ig + y1, ib + y1); - - const int y2 = pInten_modifer_table[2]; - pDst[2].set(ir + y2, ig + y2, ib + y2); - - const int y3 = pInten_modifer_table[3]; - pDst[3].set(ir + y3, ig + y3, ib + y3); -} - -bool etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx) { - CRNLIB_ASSERT(table_idx < cETC1IntenModifierValues); - const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; - - uint r, g, b; - bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true); - - const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); - - const int y0 = pInten_modifer_table[0]; - pDst[0].set(ir + y0, ig + y0, ib + y0); - - const int y1 = pInten_modifer_table[1]; - pDst[1].set(ir + y1, ig + y1, ib + y1); - - const int y2 = pInten_modifer_table[2]; - pDst[2].set(ir + y2, ig + y2, ib + y2); - - const int y3 = pInten_modifer_table[3]; - pDst[3].set(ir + y3, ig + y3, ib + y3); - - return success; -} - -void etc1_block::get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx) { - CRNLIB_ASSERT(table_idx < cETC1IntenModifierValues); - const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; - - uint r, g, b; - unpack_color4(r, g, b, packed_color4, true); - - const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); - - const int y0 = pInten_modifer_table[0]; - pDst[0].set(ir + y0, ig + y0, ib + y0); - - const int y1 = pInten_modifer_table[1]; - pDst[1].set(ir + y1, ig + y1, ib + y1); - - const int y2 = pInten_modifer_table[2]; - pDst[2].set(ir + y2, ig + y2, ib + y2); - - const int y3 = pInten_modifer_table[3]; - pDst[3].set(ir + y3, ig + y3, ib + y3); -} - -bool unpack_etc1(const etc1_block& block, color_quad_u8* pDst, bool preserve_alpha) { - const bool diff_flag = block.get_diff_bit(); - const bool flip_flag = block.get_flip_bit(); - const uint table_index0 = block.get_inten_table(0); - const uint table_index1 = block.get_inten_table(1); - - color_quad_u8 subblock_colors0[4]; - color_quad_u8 subblock_colors1[4]; - bool success = true; - - if (diff_flag) { - const uint16 base_color5 = block.get_base5_color(); - const uint16 delta_color3 = block.get_delta3_color(); - etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); - - if (!etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1)) - success = false; - } else { - const uint16 base_color4_0 = block.get_base4_color(0); - etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); - - const uint16 base_color4_1 = block.get_base4_color(1); - etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); - } - - if (preserve_alpha) { - if (flip_flag) { - for (uint y = 0; y < 2; y++) { - pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); - pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); - pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]); - pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]); - pDst += 4; - } - - for (uint y = 2; y < 4; y++) { - pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]); - pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]); - pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); - pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); - pDst += 4; - } - } else { - for (uint y = 0; y < 4; y++) { - pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); - pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); - pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); - pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); - pDst += 4; - } + uint b = packed_color5 & 31U; + uint g = (packed_color5 >> 5U) & 31U; + uint r = (packed_color5 >> 10U) & 31U; + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + return color_quad_u8(cNoClamp, r, g, b, math::minimum(alpha, 255U)); } - } else { - if (flip_flag) { - // 0000 - // 0000 - // 1111 - // 1111 - for (uint y = 0; y < 2; y++) { - pDst[0] = subblock_colors0[block.get_selector(0, y)]; - pDst[1] = subblock_colors0[block.get_selector(1, y)]; - pDst[2] = subblock_colors0[block.get_selector(2, y)]; - pDst[3] = subblock_colors0[block.get_selector(3, y)]; - pDst += 4; - } - - for (uint y = 2; y < 4; y++) { - pDst[0] = subblock_colors1[block.get_selector(0, y)]; - pDst[1] = subblock_colors1[block.get_selector(1, y)]; - pDst[2] = subblock_colors1[block.get_selector(2, y)]; - pDst[3] = subblock_colors1[block.get_selector(3, y)]; - pDst += 4; - } - } else { - // 0011 - // 0011 - // 0011 - // 0011 - for (uint y = 0; y < 4; y++) { - pDst[0] = subblock_colors0[block.get_selector(0, y)]; - pDst[1] = subblock_colors0[block.get_selector(1, y)]; - pDst[2] = subblock_colors1[block.get_selector(2, y)]; - pDst[3] = subblock_colors1[block.get_selector(3, y)]; - pDst += 4; - } + + void etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, bool scaled) + { + color_quad_u8 c(unpack_color5(packed_color5, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; } - } - - return success; -} - -bool etc1_optimizer::compute() { - const uint n = m_pParams->m_num_src_pixels; - const int scan_delta_size = m_pParams->m_scan_delta_size; - - // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color. - // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index. - for (int zdi = 0; zdi < scan_delta_size; zdi++) { - const int zd = m_pParams->m_pScan_deltas[zdi]; - const int mbb = m_bb + zd; - if (mbb < 0) - continue; - else if (mbb > m_limit) - break; - - for (int ydi = 0; ydi < scan_delta_size; ydi++) { - const int yd = m_pParams->m_pScan_deltas[ydi]; - const int mbg = m_bg + yd; - if (mbg < 0) - continue; - else if (mbg > m_limit) - break; - - for (int xdi = 0; xdi < scan_delta_size; xdi++) { - const int xd = m_pParams->m_pScan_deltas[xdi]; - const int mbr = m_br + xd; - if (mbr < 0) - continue; - else if (mbr > m_limit) - break; - - etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4); - if (m_pParams->m_quality == cCRNETCQualitySlow) { - if (!evaluate_solution(coords, m_trial_solution, &m_best_solution)) - continue; - } else { - if (!evaluate_solution_fast(coords, m_trial_solution, &m_best_solution)) - continue; + + bool etc1_block::unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) + { + color_quad_i16 dc(unpack_delta3(packed_delta3)); + + int b = (packed_color5 & 31U) + dc.b; + int g = ((packed_color5 >> 5U) & 31U) + dc.g; + int r = ((packed_color5 >> 10U) & 31U) + dc.r; + + bool success = true; + if (static_cast(r | g | b) > 31U) + { + success = false; + r = math::clamp(r, 0, 31); + g = math::clamp(g, 0, 31); + b = math::clamp(b, 0, 31); } - // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index. - // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors: - // The goal is: - // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0 - // Rearranging this: - // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0 - // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0 - // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0 - // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0 - // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0 - // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 - // So what this means: - // optimal_block_color = avg_input - avg_inten_delta - // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta. - // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula. - // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping. - - const uint max_refinement_trials = (m_pParams->m_quality == cCRNETCQualityFast) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2); - for (uint refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++) { - const uint8* pSelectors = m_best_solution.m_selectors.get_ptr(); - const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; - - int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; - const color_quad_u8 base_color(m_best_solution.m_coords.get_scaled_color()); - for (uint r = 0; r < n; r++) { - const uint s = *pSelectors++; - const int yd = pInten_table[s]; - // Compute actual delta being applied to each pixel, taking into account clamping. - delta_sum_r += math::clamp(base_color.r + yd, 0, 255) - base_color.r; - delta_sum_g += math::clamp(base_color.g + yd, 0, 255) - base_color.g; - delta_sum_b += math::clamp(base_color.b + yd, 0, 255) - base_color.b; - } - if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) - break; - const float avg_delta_r_f = static_cast(delta_sum_r) / n; - const float avg_delta_g_f = static_cast(delta_sum_g) / n; - const float avg_delta_b_f = static_cast(delta_sum_b) / n; - const int br1 = math::clamp(static_cast((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); - const int bg1 = math::clamp(static_cast((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); - const int bb1 = math::clamp(static_cast((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); - - bool skip = false; - - if ((mbr == br1) && (mbg == bg1) && (mbb == bb1)) - skip = true; - else if ((br1 == m_best_solution.m_coords.m_unscaled_color.r) && (bg1 == m_best_solution.m_coords.m_unscaled_color.g) && (bb1 == m_best_solution.m_coords.m_unscaled_color.b)) - skip = true; - else if ((m_br == br1) && (m_bg == bg1) && (m_bb == bb1)) - skip = true; - - if (skip) - break; - - etc1_solution_coordinates coords1(br1, bg1, bb1, 0, m_pParams->m_use_color4); - if (m_pParams->m_quality == cCRNETCQualitySlow) { - if (!evaluate_solution(coords1, m_trial_solution, &m_best_solution)) - break; - } else { - if (!evaluate_solution_fast(coords1, m_trial_solution, &m_best_solution)) - break; - } - - } // refinement_trial - - } // xdi - } // ydi - } // zdi - - if (!m_best_solution.m_valid) { - m_pResult->m_error = cUINT32_MAX; - return false; - } - - const uint8* pSelectors = m_best_solution.m_selectors.get_ptr(); + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } -#ifdef CRNLIB_BUILD_DEBUG - { - color_quad_u8 block_colors[4]; - m_best_solution.m_coords.get_block_colors(block_colors); + result.set_noclamp_rgba(r, g, b, math::minimum(alpha, 255U)); + return success; + } - const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; - uint64 actual_error = 0; - for (uint i = 0; i < n; i++) - actual_error += color::elucidian_distance(pSrc_pixels[i], block_colors[pSelectors[i]], false); + bool etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) + { + color_quad_u8 result; + const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha); + r = result.r; + g = result.g; + b = result.b; + return success; + } - CRNLIB_ASSERT(actual_error == m_best_solution.m_error); - } -#endif + uint16 etc1_block::pack_delta3(const color_quad_i16& color) + { + return pack_delta3(color.r, color.g, color.b); + } - m_pResult->m_error = m_best_solution.m_error; + uint16 etc1_block::pack_delta3(int r, int g, int b) + { + CRNLIB_ASSERT((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); + CRNLIB_ASSERT((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); + CRNLIB_ASSERT((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); + if (r < 0) + { + r += 8; + } + if (g < 0) + { + g += 8; + } + if (b < 0) + { + b += 8; + } + return static_cast(b | (g << 3) | (r << 6)); + } - m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color; - m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4; + color_quad_i16 etc1_block::unpack_delta3(uint16 packed_delta3) + { + int r = (packed_delta3 >> 6) & 7; + int g = (packed_delta3 >> 3) & 7; + int b = packed_delta3 & 7; + if (r >= 4) + { + r -= 8; + } + if (g >= 4) + { + g -= 8; + } + if (b >= 4) + { + b -= 8; + } + return color_quad_i16(r, g, b, 0); + } - m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table; - memcpy(m_pResult->m_pSelectors, pSelectors, n); - m_pResult->m_n = n; + void etc1_block::unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3) + { + r = (packed_delta3 >> 6) & 7; + g = (packed_delta3 >> 3) & 7; + b = packed_delta3 & 7; + if (r >= 4) + { + r -= 8; + } + if (g >= 4) + { + g -= 8; + } + if (b >= 4) + { + b -= 8; + } + } - return true; -} + uint16 etc1_block::pack_color4(const color_quad_u8& color, bool scaled, uint bias) + { + return pack_color4(color.r, color.g, color.b, scaled, bias); + } -void etc1_optimizer::init(const params& params, results& result) { - m_pParams = ¶ms; - m_pResult = &result; + uint16 etc1_block::pack_color4(uint r, uint g, uint b, bool scaled, uint bias) + { + if (scaled) + { + r = (r * 15U + bias) / 255U; + g = (g * 15U + bias) / 255U; + b = (b * 15U + bias) / 255U; + } - const uint n = m_pParams->m_num_src_pixels; + r = math::minimum(r, 15U); + g = math::minimum(g, 15U); + b = math::minimum(b, 15U); - m_selectors.resize(n); - m_best_selectors.resize(n); - m_temp_selectors.resize(n); - m_trial_solution.m_selectors.resize(n); - m_best_solution.m_selectors.resize(n); + return static_cast(b | (g << 4U) | (r << 8U)); + } - m_limit = m_pParams->m_use_color4 ? 15 : 31; + color_quad_u8 etc1_block::unpack_color4(uint16 packed_color4, bool scaled, uint alpha) + { + uint b = packed_color4 & 15U; + uint g = (packed_color4 >> 4U) & 15U; + uint r = (packed_color4 >> 8U) & 15U; - vec3F avg_color(0.0f); + if (scaled) + { + b = (b << 4U) | b; + g = (g << 4U) | g; + r = (r << 4U) | r; + } - m_luma.resize(n); - m_sorted_luma[0].resize(n); - m_sorted_luma[1].resize(n); + return color_quad_u8(cNoClamp, r, g, b, math::minimum(alpha, 255U)); + } - for (uint i = 0; i < n; i++) { - const color_quad_u8& c = m_pParams->m_pSrc_pixels[i]; - const vec3F fc(c.r, c.g, c.b); + void etc1_block::unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled) + { + color_quad_u8 c(unpack_color4(packed_color4, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } - avg_color += fc; + void etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx) + { + CRNLIB_ASSERT(table_idx < cETC1IntenModifierValues); + const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; - m_luma[i] = static_cast(c.r + c.g + c.b); - m_sorted_luma[0][i] = i; - } - avg_color /= static_cast(n); - m_avg_color = avg_color; + uint r, g, b; + unpack_color5(r, g, b, packed_color5, true); - m_br = math::clamp(static_cast(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit); - m_bg = math::clamp(static_cast(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit); - m_bb = math::clamp(static_cast(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit); + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); - if (m_pParams->m_quality <= cCRNETCQualityMedium) { - m_pSorted_luma_indices = indirect_radix_sort(n, m_sorted_luma[0].get_ptr(), m_sorted_luma[1].get_ptr(), m_luma.get_ptr(), 0, sizeof(m_luma[0]), false); - m_pSorted_luma = m_sorted_luma[0].get_ptr(); - if (m_pSorted_luma_indices == m_sorted_luma[0].get_ptr()) - m_pSorted_luma = m_sorted_luma[1].get_ptr(); + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0); - for (uint i = 0; i < n; i++) - m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; - } + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1); - m_best_solution.m_coords.clear(); - m_best_solution.m_valid = false; - m_best_solution.m_error = cUINT64_MAX; -} + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2); -bool etc1_optimizer::evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) { - trial_solution.m_valid = false; + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3); + } - if (m_pParams->m_constrain_against_base_color5) { - const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; - const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; - const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; + bool etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx) + { + CRNLIB_ASSERT(table_idx < cETC1IntenModifierValues); + const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; - if ((math::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (math::maximum(dr, dg, db) > cETC1ColorDeltaMax)) - return false; - } + uint r, g, b; + bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true); - const color_quad_u8 base_color(coords.get_scaled_color()); + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); - const uint n = m_pParams->m_num_src_pixels; - CRNLIB_ASSERT(trial_solution.m_selectors.size() == n); + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0); - trial_solution.m_error = cUINT64_MAX; + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1); - for (uint inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) { - const int* pInten_table = g_etc1_inten_tables[inten_table]; + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2); - color_quad_u8 block_colors[4]; - for (uint s = 0; s < 4; s++) { - const int yd = pInten_table[s]; - block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); - } + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3); - uint64 total_error = 0; + return success; + } - const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; - for (uint c = 0; c < n; c++) { - const color_quad_u8& src_pixel = *pSrc_pixels++; + void etc1_block::get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx) + { + CRNLIB_ASSERT(table_idx < cETC1IntenModifierValues); + const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; - uint best_selector_index = 0; - uint best_error = math::square(src_pixel.r - block_colors[0].r) + math::square(src_pixel.g - block_colors[0].g) + math::square(src_pixel.b - block_colors[0].b); + uint r, g, b; + unpack_color4(r, g, b, packed_color4, true); - uint trial_error = math::square(src_pixel.r - block_colors[1].r) + math::square(src_pixel.g - block_colors[1].g) + math::square(src_pixel.b - block_colors[1].b); - if (trial_error < best_error) { - best_error = trial_error; - best_selector_index = 1; - } + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); - trial_error = math::square(src_pixel.r - block_colors[2].r) + math::square(src_pixel.g - block_colors[2].g) + math::square(src_pixel.b - block_colors[2].b); - if (trial_error < best_error) { - best_error = trial_error; - best_selector_index = 2; - } + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0); - trial_error = math::square(src_pixel.r - block_colors[3].r) + math::square(src_pixel.g - block_colors[3].g) + math::square(src_pixel.b - block_colors[3].b); - if (trial_error < best_error) { - best_error = trial_error; - best_selector_index = 3; - } + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1); - m_temp_selectors[c] = static_cast(best_selector_index); + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2); - total_error += best_error; - if (total_error >= trial_solution.m_error) - break; + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3); } - if (total_error < trial_solution.m_error) { - trial_solution.m_error = total_error; - trial_solution.m_coords.m_inten_table = inten_table; - trial_solution.m_selectors.swap(m_temp_selectors); - trial_solution.m_valid = true; - } - } - trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; - trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; - - bool success = false; - if (pBest_solution) { - if (trial_solution.m_error < pBest_solution->m_error) { - *pBest_solution = trial_solution; - success = true; - } - } + bool unpack_etc1(const etc1_block& block, color_quad_u8* pDst, bool preserve_alpha) + { + const bool diff_flag = block.get_diff_bit(); + const bool flip_flag = block.get_flip_bit(); + const uint table_index0 = block.get_inten_table(0); + const uint table_index1 = block.get_inten_table(1); - return success; -} + color_quad_u8 subblock_colors0[4]; + color_quad_u8 subblock_colors1[4]; + bool success = true; -bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) { - if (m_pParams->m_constrain_against_base_color5) { - const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; - const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; - const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; + if (diff_flag) + { + const uint16 base_color5 = block.get_base5_color(); + const uint16 delta_color3 = block.get_delta3_color(); + etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); - if ((math::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (math::maximum(dr, dg, db) > cETC1ColorDeltaMax)) { - trial_solution.m_valid = false; - return false; + if (!etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1)) + { + success = false; + } + } + else + { + const uint16 base_color4_0 = block.get_base4_color(0); + etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); + + const uint16 base_color4_1 = block.get_base4_color(1); + etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); + } + + if (preserve_alpha) + { + if (flip_flag) + { + for (uint y = 0; y < 2; y++) + { + pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]); + pDst += 4; + } + + for (uint y = 2; y < 4; y++) + { + pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); + pDst += 4; + } + } + else + { + for (uint y = 0; y < 4; y++) + { + pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); + pDst += 4; + } + } + } + else + { + if (flip_flag) + { + // 0000 + // 0000 + // 1111 + // 1111 + for (uint y = 0; y < 2; y++) + { + pDst[0] = subblock_colors0[block.get_selector(0, y)]; + pDst[1] = subblock_colors0[block.get_selector(1, y)]; + pDst[2] = subblock_colors0[block.get_selector(2, y)]; + pDst[3] = subblock_colors0[block.get_selector(3, y)]; + pDst += 4; + } + + for (uint y = 2; y < 4; y++) + { + pDst[0] = subblock_colors1[block.get_selector(0, y)]; + pDst[1] = subblock_colors1[block.get_selector(1, y)]; + pDst[2] = subblock_colors1[block.get_selector(2, y)]; + pDst[3] = subblock_colors1[block.get_selector(3, y)]; + pDst += 4; + } + } + else + { + // 0011 + // 0011 + // 0011 + // 0011 + for (uint y = 0; y < 4; y++) + { + pDst[0] = subblock_colors0[block.get_selector(0, y)]; + pDst[1] = subblock_colors0[block.get_selector(1, y)]; + pDst[2] = subblock_colors1[block.get_selector(2, y)]; + pDst[3] = subblock_colors1[block.get_selector(3, y)]; + pDst += 4; + } + } + } + + return success; } - } - const color_quad_u8 base_color(coords.get_scaled_color()); + bool etc1_optimizer::compute() + { + const uint n = m_pParams->m_num_src_pixels; + const int scan_delta_size = m_pParams->m_scan_delta_size; - const uint n = m_pParams->m_num_src_pixels; - CRNLIB_ASSERT(trial_solution.m_selectors.size() == n); + // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color. + // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index. + for (int zdi = 0; zdi < scan_delta_size; zdi++) + { + const int zd = m_pParams->m_pScan_deltas[zdi]; + const int mbb = m_bb + zd; + if (mbb < 0) + { + continue; + } + else if (mbb > m_limit) + { + break; + } - trial_solution.m_error = cUINT64_MAX; + for (int ydi = 0; ydi < scan_delta_size; ydi++) + { + const int yd = m_pParams->m_pScan_deltas[ydi]; + const int mbg = m_bg + yd; + if (mbg < 0) + { + continue; + } + else if (mbg > m_limit) + { + break; + } + + for (int xdi = 0; xdi < scan_delta_size; xdi++) + { + const int xd = m_pParams->m_pScan_deltas[xdi]; + const int mbr = m_br + xd; + if (mbr < 0) + { + continue; + } + else if (mbr > m_limit) + { + break; + } + + etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4); + if (m_pParams->m_quality == cCRNETCQualitySlow) + { + if (!evaluate_solution(coords, m_trial_solution, &m_best_solution)) + { + continue; + } + } + else + { + if (!evaluate_solution_fast(coords, m_trial_solution, &m_best_solution)) + { + continue; + } + } + + // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index. + // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors: + // The goal is: + // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0 + // Rearranging this: + // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0 + // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0 + // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 + // So what this means: + // optimal_block_color = avg_input - avg_inten_delta + // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta. + // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula. + // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping. + + const uint max_refinement_trials = (m_pParams->m_quality == cCRNETCQualityFast) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2); + for (uint refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++) + { + const uint8* pSelectors = m_best_solution.m_selectors.get_ptr(); + const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; + + int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; + const color_quad_u8 base_color(m_best_solution.m_coords.get_scaled_color()); + for (uint r = 0; r < n; r++) + { + const uint s = *pSelectors++; + const int yd = pInten_table[s]; + // Compute actual delta being applied to each pixel, taking into account clamping. + delta_sum_r += math::clamp(base_color.r + yd, 0, 255) - base_color.r; + delta_sum_g += math::clamp(base_color.g + yd, 0, 255) - base_color.g; + delta_sum_b += math::clamp(base_color.b + yd, 0, 255) - base_color.b; + } + if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) + { + break; + } + const float avg_delta_r_f = static_cast(delta_sum_r) / n; + const float avg_delta_g_f = static_cast(delta_sum_g) / n; + const float avg_delta_b_f = static_cast(delta_sum_b) / n; + const int br1 = math::clamp(static_cast((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bg1 = math::clamp(static_cast((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bb1 = math::clamp(static_cast((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); + + bool skip = false; + + if ((mbr == br1) && (mbg == bg1) && (mbb == bb1)) + { + skip = true; + } + else if ((br1 == m_best_solution.m_coords.m_unscaled_color.r) && (bg1 == m_best_solution.m_coords.m_unscaled_color.g) && (bb1 == m_best_solution.m_coords.m_unscaled_color.b)) + { + skip = true; + } + else if ((m_br == br1) && (m_bg == bg1) && (m_bb == bb1)) + { + skip = true; + } + + if (skip) + { + break; + } + + etc1_solution_coordinates coords1(br1, bg1, bb1, 0, m_pParams->m_use_color4); + if (m_pParams->m_quality == cCRNETCQualitySlow) + { + if (!evaluate_solution(coords1, m_trial_solution, &m_best_solution)) + { + break; + } + } + else + { + if (!evaluate_solution_fast(coords1, m_trial_solution, &m_best_solution)) + { + break; + } + } + + } // refinement_trial + + } // xdi + } // ydi + } // zdi + + if (!m_best_solution.m_valid) + { + m_pResult->m_error = cUINT32_MAX; + return false; + } - for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) { - const int* pInten_table = g_etc1_inten_tables[inten_table]; + const uint8* pSelectors = m_best_solution.m_selectors.get_ptr(); - uint block_inten[4]; - color_quad_u8 block_colors[4]; - for (uint s = 0; s < 4; s++) { - const int yd = pInten_table[s]; - color_quad_u8 block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); - block_colors[s] = block_color; - block_inten[s] = block_color.r + block_color.g + block_color.b; - } +#ifdef CRNLIB_BUILD_DEBUG + { + color_quad_u8 block_colors[4]; + m_best_solution.m_coords.get_block_colors(block_colors); - // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors. - // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast. - // 0 1 2 3 - // 01 12 23 - const uint block_inten_midpoints[3] = {block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3]}; - - uint64 total_error = 0; - const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; - if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) { - if (block_inten[0] > m_pSorted_luma[n - 1]) { - const uint min_error = block_inten[0] - m_pSorted_luma[n - 1]; - if (min_error >= trial_solution.m_error) - continue; - } - - memset(&m_temp_selectors[0], 0, n); - - for (uint c = 0; c < n; c++) - total_error += color::elucidian_distance(block_colors[0], pSrc_pixels[c], false); - } else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) { - if (m_pSorted_luma[0] > block_inten[3]) { - const uint min_error = m_pSorted_luma[0] - block_inten[3]; - if (min_error >= trial_solution.m_error) - continue; - } - - memset(&m_temp_selectors[0], 3, n); - - for (uint c = 0; c < n; c++) - total_error += color::elucidian_distance(block_colors[3], pSrc_pixels[c], false); - } else { - uint cur_selector = 0, c; - for (c = 0; c < n; c++) { - const uint y = m_pSorted_luma[c]; - while ((y * 2) >= block_inten_midpoints[cur_selector]) - if (++cur_selector > 2) - goto done; - const uint sorted_pixel_index = m_pSorted_luma_indices[c]; - m_temp_selectors[sorted_pixel_index] = static_cast(cur_selector); - total_error += color::elucidian_distance(block_colors[cur_selector], pSrc_pixels[sorted_pixel_index], false); - } - done: - while (c < n) { - const uint sorted_pixel_index = m_pSorted_luma_indices[c]; - m_temp_selectors[sorted_pixel_index] = 3; - total_error += color::elucidian_distance(block_colors[3], pSrc_pixels[sorted_pixel_index], false); - ++c; - } - } + const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; + uint64 actual_error = 0; + for (uint i = 0; i < n; i++) + actual_error += color::elucidian_distance(pSrc_pixels[i], block_colors[pSelectors[i]], false); - if (total_error < trial_solution.m_error) { - trial_solution.m_error = total_error; - trial_solution.m_coords.m_inten_table = inten_table; - trial_solution.m_selectors.swap(m_temp_selectors); - trial_solution.m_valid = true; - if (!total_error) - break; - } - } - trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; - trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; - - bool success = false; - if (pBest_solution) { - if (trial_solution.m_error < pBest_solution->m_error) { - *pBest_solution = trial_solution; - success = true; + CRNLIB_ASSERT(actual_error == m_best_solution.m_error); + } +#endif + + m_pResult->m_error = m_best_solution.m_error; + + m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color; + m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4; + + m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table; + memcpy(m_pResult->m_pSelectors, pSelectors, n); + m_pResult->m_n = n; + + return true; } - } - - return success; -} - -// Dither function from RYG's public domain real-time DXT1 compressor, modified for 555. -static void DitherBlock(color_quad_u8* dest, const color_quad_u8* block) { - int err[8], *ep1 = err, *ep2 = err + 4; - uint8* quant = ryg_dxt::QuantRBTab + 8; - - // process channels seperately - for (int ch = 0; ch < 3; ch++) { - uint8* bp = (uint8*)block; - uint8* dp = (uint8*)dest; - - bp += ch; - dp += ch; - memset(err, 0, sizeof(err)); - - for (int y = 0; y < 4; y++) { - // pixel 0 - dp[0] = quant[bp[0] + ((3 * ep2[1] + 5 * ep2[0]) >> 4)]; - ep1[0] = bp[0] - dp[0]; - - // pixel 1 - dp[4] = quant[bp[4] + ((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) >> 4)]; - ep1[1] = bp[4] - dp[4]; - - // pixel 2 - dp[8] = quant[bp[8] + ((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) >> 4)]; - ep1[2] = bp[8] - dp[8]; - - // pixel 3 - dp[12] = quant[bp[12] + ((7 * ep1[2] + 5 * ep2[3] + ep2[2]) >> 4)]; - ep1[3] = bp[12] - dp[12]; - - // advance to next line - std::swap(ep1, ep2); - bp += 16; - dp += 16; + + void etc1_optimizer::init(const params& params, results& result) + { + m_pParams = ¶ms; + m_pResult = &result; + + const uint n = m_pParams->m_num_src_pixels; + + m_selectors.resize(n); + m_best_selectors.resize(n); + m_temp_selectors.resize(n); + m_trial_solution.m_selectors.resize(n); + m_best_solution.m_selectors.resize(n); + + m_limit = m_pParams->m_use_color4 ? 15 : 31; + + vec3F avg_color(0.0f); + + m_luma.resize(n); + m_sorted_luma[0].resize(n); + m_sorted_luma[1].resize(n); + + for (uint i = 0; i < n; i++) + { + const color_quad_u8& c = m_pParams->m_pSrc_pixels[i]; + const vec3F fc(c.r, c.g, c.b); + + avg_color += fc; + + m_luma[i] = static_cast(c.r + c.g + c.b); + m_sorted_luma[0][i] = i; + } + avg_color /= static_cast(n); + m_avg_color = avg_color; + + m_br = math::clamp(static_cast(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit); + m_bg = math::clamp(static_cast(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit); + m_bb = math::clamp(static_cast(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit); + + if (m_pParams->m_quality <= cCRNETCQualityMedium) + { + m_pSorted_luma_indices = indirect_radix_sort(n, m_sorted_luma[0].get_ptr(), m_sorted_luma[1].get_ptr(), m_luma.get_ptr(), 0, sizeof(m_luma[0]), false); + m_pSorted_luma = m_sorted_luma[0].get_ptr(); + if (m_pSorted_luma_indices == m_sorted_luma[0].get_ptr()) + { + m_pSorted_luma = m_sorted_luma[1].get_ptr(); + } + + for (uint i = 0; i < n; i++) + { + m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; + } + } + + m_best_solution.m_coords.clear(); + m_best_solution.m_valid = false; + m_best_solution.m_error = cUINT64_MAX; } - } -} - -static uint etc1_decode_value(uint diff, uint inten, uint selector, uint packed_c) { - CRNLIB_ASSERT((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < (diff ? 32 : 16))); - int c; - if (diff) - c = (packed_c >> 2) | (packed_c << 3); - else - c = packed_c | (packed_c << 4); - c += g_etc1_inten_tables[inten][selector]; - c = math::clamp(c, 0, 255); - return c; -} - -void pack_etc1_block_init() { - for (uint diff = 0; diff < 2; diff++) { - const uint limit = diff ? 32 : 16; - - for (uint inten = 0; inten < 8; inten++) { - for (uint selector = 0; selector < 4; selector++) { - const uint inverse_table_index = diff + (inten << 1) + (selector << 4); - for (int color = 0; color < 256; color++) { - uint best_error = cUINT32_MAX, best_packed_c = 0; - for (uint packed_c = 0; packed_c < limit; packed_c++) { - int v = etc1_decode_value(diff, inten, selector, packed_c); - uint err = labs(v - color); - if (err < best_error) { - best_error = err; - best_packed_c = packed_c; - if (!best_error) - break; + + bool etc1_optimizer::evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + trial_solution.m_valid = false; + + if (m_pParams->m_constrain_against_base_color5) + { + const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; + const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; + const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; + + if ((math::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (math::maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { + return false; } - } - CRNLIB_ASSERT(best_error <= 255); - g_etc1_inverse_lookup[inverse_table_index][color] = static_cast(best_packed_c | (best_error << 8)); } - } + + const color_quad_u8 base_color(coords.get_scaled_color()); + + const uint n = m_pParams->m_num_src_pixels; + CRNLIB_ASSERT(trial_solution.m_selectors.size() == n); + + trial_solution.m_error = cUINT64_MAX; + + for (uint inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) + { + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + color_quad_u8 block_colors[4]; + for (uint s = 0; s < 4; s++) + { + const int yd = pInten_table[s]; + block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); + } + + uint64 total_error = 0; + + const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; + for (uint c = 0; c < n; c++) + { + const color_quad_u8& src_pixel = *pSrc_pixels++; + + uint best_selector_index = 0; + uint best_error = math::square(src_pixel.r - block_colors[0].r) + math::square(src_pixel.g - block_colors[0].g) + math::square(src_pixel.b - block_colors[0].b); + + uint trial_error = math::square(src_pixel.r - block_colors[1].r) + math::square(src_pixel.g - block_colors[1].g) + math::square(src_pixel.b - block_colors[1].b); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 1; + } + + trial_error = math::square(src_pixel.r - block_colors[2].r) + math::square(src_pixel.g - block_colors[2].g) + math::square(src_pixel.b - block_colors[2].b); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 2; + } + + trial_error = math::square(src_pixel.r - block_colors[3].r) + math::square(src_pixel.g - block_colors[3].g) + math::square(src_pixel.b - block_colors[3].b); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 3; + } + + m_temp_selectors[c] = static_cast(best_selector_index); + + total_error += best_error; + if (total_error >= trial_solution.m_error) + { + break; + } + } + + if (total_error < trial_solution.m_error) + { + trial_solution.m_error = total_error; + trial_solution.m_coords.m_inten_table = inten_table; + trial_solution.m_selectors.swap(m_temp_selectors); + trial_solution.m_valid = true; + } + } + trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; + trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; + + bool success = false; + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { + *pBest_solution = trial_solution; + success = true; + } + } + + return success; } - } -} -// Packs solid color blocks efficiently using a set of small precomputed tables. -// For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time. -static uint64 pack_etc1_block_solid_color(etc1_block& block, const uint8* pColor, crn_etc1_pack_params& pack_params, pack_etc1_block_context& context) { - CRNLIB_ASSERT(g_etc1_inverse_lookup[0][255]); + bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + if (m_pParams->m_constrain_against_base_color5) + { + const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; + const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; + const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; + + if ((math::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (math::maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { + trial_solution.m_valid = false; + return false; + } + } - context, pack_params; - static uint s_next_comp[4] = {1, 2, 0, 1}; + const color_quad_u8 base_color(coords.get_scaled_color()); - uint best_error = cUINT32_MAX, best_i = 0; - int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; + const uint n = m_pParams->m_num_src_pixels; + CRNLIB_ASSERT(trial_solution.m_selectors.size() == n); - // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. - for (uint i = 0; i < 3; i++) { - const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; + trial_solution.m_error = cUINT64_MAX; - const int delta_range = 1; - for (int delta = -delta_range; delta <= delta_range; delta++) { - const int c_plus_delta = math::clamp(pColor[i] + delta, 0, 255); + for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) + { + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + uint block_inten[4]; + color_quad_u8 block_colors[4]; + for (uint s = 0; s < 4; s++) + { + const int yd = pInten_table[s]; + color_quad_u8 block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); + block_colors[s] = block_color; + block_inten[s] = block_color.r + block_color.g + block_color.b; + } - const uint16* pTable; - if (!c_plus_delta) - pTable = g_color8_to_etc_block_config_0_255[0]; - else if (c_plus_delta == 255) - pTable = g_color8_to_etc_block_config_0_255[1]; - else - pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; + // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors. + // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast. + // 0 1 2 3 + // 01 12 23 + const uint block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] }; + + uint64 total_error = 0; + const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; + if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) + { + if (block_inten[0] > m_pSorted_luma[n - 1]) + { + const uint min_error = block_inten[0] - m_pSorted_luma[n - 1]; + if (min_error >= trial_solution.m_error) + { + continue; + } + } + + memset(&m_temp_selectors[0], 0, n); + + for (uint c = 0; c < n; c++) + { + total_error += color::elucidian_distance(block_colors[0], pSrc_pixels[c], false); + } + } + else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) + { + if (m_pSorted_luma[0] > block_inten[3]) + { + const uint min_error = m_pSorted_luma[0] - block_inten[3]; + if (min_error >= trial_solution.m_error) + { + continue; + } + } + + memset(&m_temp_selectors[0], 3, n); + + for (uint c = 0; c < n; c++) + { + total_error += color::elucidian_distance(block_colors[3], pSrc_pixels[c], false); + } + } + else + { + uint cur_selector = 0, c; + for (c = 0; c < n; c++) + { + const uint y = m_pSorted_luma[c]; + while ((y * 2) >= block_inten_midpoints[cur_selector]) + { + if (++cur_selector > 2) + { + goto done; + } + } + const uint sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = static_cast(cur_selector); + total_error += color::elucidian_distance(block_colors[cur_selector], pSrc_pixels[sorted_pixel_index], false); + } + done: + while (c < n) + { + const uint sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = 3; + total_error += color::elucidian_distance(block_colors[3], pSrc_pixels[sorted_pixel_index], false); + ++c; + } + } - do { - const uint x = *pTable++; + if (total_error < trial_solution.m_error) + { + trial_solution.m_error = total_error; + trial_solution.m_coords.m_inten_table = inten_table; + trial_solution.m_selectors.swap(m_temp_selectors); + trial_solution.m_valid = true; + if (!total_error) + { + break; + } + } + } + trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; + trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; -#ifdef CRNLIB_BUILD_DEBUG - const uint diff = x & 1; - const uint inten = (x >> 1) & 7; - const uint selector = (x >> 4) & 3; - const uint p0 = (x >> 8) & 255; - CRNLIB_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta); -#endif + bool success = false; + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { + *pBest_solution = trial_solution; + success = true; + } + } + + return success; + } + + // Dither function from RYG's public domain real-time DXT1 compressor, modified for 555. + static void DitherBlock(color_quad_u8* dest, const color_quad_u8* block) + { + int err[8], *ep1 = err, *ep2 = err + 4; + uint8* quant = ryg_dxt::QuantRBTab + 8; - const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; - uint16 p1 = pInverse_table[c1]; - uint16 p2 = pInverse_table[c2]; - const uint trial_error = math::square(c_plus_delta - pColor[i]) + math::square(p1 >> 8) + math::square(p2 >> 8); - if (trial_error < best_error) { - best_error = trial_error; - best_x = x; - best_packed_c1 = p1 & 0xFF; - best_packed_c2 = p2 & 0xFF; - best_i = i; - if (!best_error) - goto found_perfect_match; + // process channels seperately + for (int ch = 0; ch < 3; ch++) + { + uint8* bp = (uint8*)block; + uint8* dp = (uint8*)dest; + + bp += ch; + dp += ch; + memset(err, 0, sizeof(err)); + + for (int y = 0; y < 4; y++) + { + // pixel 0 + dp[0] = quant[bp[0] + ((3 * ep2[1] + 5 * ep2[0]) >> 4)]; + ep1[0] = bp[0] - dp[0]; + + // pixel 1 + dp[4] = quant[bp[4] + ((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) >> 4)]; + ep1[1] = bp[4] - dp[4]; + + // pixel 2 + dp[8] = quant[bp[8] + ((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) >> 4)]; + ep1[2] = bp[8] - dp[8]; + + // pixel 3 + dp[12] = quant[bp[12] + ((7 * ep1[2] + 5 * ep2[3] + ep2[2]) >> 4)]; + ep1[3] = bp[12] - dp[12]; + + // advance to next line + std::swap(ep1, ep2); + bp += 16; + dp += 16; + } } - } while (*pTable != 0xFFFF); } - } -found_perfect_match: - - const uint diff = best_x & 1; - const uint inten = (best_x >> 1) & 7; - - block.m_bytes[3] = static_cast(((inten | (inten << 3)) << 2) | (diff << 1)); - - const uint etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3]; - *reinterpret_cast(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0; - *reinterpret_cast(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0; - - const uint best_packed_c0 = (best_x >> 8) & 255; - if (diff) { - block.m_bytes[best_i] = static_cast(best_packed_c0 << 3); - block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 << 3); - block.m_bytes[s_next_comp[best_i + 1]] = static_cast(best_packed_c2 << 3); - } else { - block.m_bytes[best_i] = static_cast(best_packed_c0 | (best_packed_c0 << 4)); - block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 | (best_packed_c1 << 4)); - block.m_bytes[s_next_comp[best_i + 1]] = static_cast(best_packed_c2 | (best_packed_c2 << 4)); - } - - return best_error; -} - -static uint pack_etc1_block_solid_color_constrained( - etc1_optimizer::results& results, - uint num_colors, const uint8* pColor, - crn_etc1_pack_params& pack_params, - pack_etc1_block_context& context, - bool use_diff, - const color_quad_u8* pBase_color5_unscaled) { - CRNLIB_ASSERT(g_etc1_inverse_lookup[0][255]); - - context, pack_params; - static uint s_next_comp[4] = {1, 2, 0, 1}; - - uint best_error = cUINT32_MAX, best_i = 0; - int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; - - // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. - for (uint i = 0; i < 3; i++) { - const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; - - const int delta_range = 1; - for (int delta = -delta_range; delta <= delta_range; delta++) { - const int c_plus_delta = math::clamp(pColor[i] + delta, 0, 255); - - const uint16* pTable; - if (!c_plus_delta) - pTable = g_color8_to_etc_block_config_0_255[0]; - else if (c_plus_delta == 255) - pTable = g_color8_to_etc_block_config_0_255[1]; - else - pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; - - do { - const uint x = *pTable++; - const uint diff = x & 1; - if (static_cast(use_diff) != diff) { - if (*pTable == 0xFFFF) - break; - continue; + + static uint etc1_decode_value(uint diff, uint inten, uint selector, uint packed_c) + { + CRNLIB_ASSERT((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < (diff ? 32 : 16))); + int c; + if (diff) + { + c = (packed_c >> 2) | (packed_c << 3); } + else + { + c = packed_c | (packed_c << 4); + } + c += g_etc1_inten_tables[inten][selector]; + c = math::clamp(c, 0, 255); + return c; + } - if ((diff) && (pBase_color5_unscaled)) { - const int p0 = (x >> 8) & 255; - int delta = p0 - static_cast(pBase_color5_unscaled->c[i]); - if ((delta < cETC1ColorDeltaMin) || (delta > cETC1ColorDeltaMax)) { - if (*pTable == 0xFFFF) - break; - continue; - } + void pack_etc1_block_init() + { + for (uint diff = 0; diff < 2; diff++) + { + const uint limit = diff ? 32 : 16; + + for (uint inten = 0; inten < 8; inten++) + { + for (uint selector = 0; selector < 4; selector++) + { + const uint inverse_table_index = diff + (inten << 1) + (selector << 4); + for (int color = 0; color < 256; color++) + { + uint best_error = cUINT32_MAX, best_packed_c = 0; + for (uint packed_c = 0; packed_c < limit; packed_c++) + { + int v = etc1_decode_value(diff, inten, selector, packed_c); + uint err = labs(v - color); + if (err < best_error) + { + best_error = err; + best_packed_c = packed_c; + if (!best_error) + { + break; + } + } + } + CRNLIB_ASSERT(best_error <= 255); + g_etc1_inverse_lookup[inverse_table_index][color] = static_cast(best_packed_c | (best_error << 8)); + } + } + } } + } + + // Packs solid color blocks efficiently using a set of small precomputed tables. + // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time. + static uint64 pack_etc1_block_solid_color(etc1_block& block, const uint8* pColor, crn_etc1_pack_params& pack_params, pack_etc1_block_context& context) + { + CRNLIB_ASSERT(g_etc1_inverse_lookup[0][255]); + + context, pack_params; + static uint s_next_comp[4] = { 1, 2, 0, 1 }; + + uint best_error = cUINT32_MAX, best_i = 0; + int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; + + // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. + for (uint i = 0; i < 3; i++) + { + const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; + + const int delta_range = 1; + for (int delta = -delta_range; delta <= delta_range; delta++) + { + const int c_plus_delta = math::clamp(pColor[i] + delta, 0, 255); + + const uint16* pTable; + if (!c_plus_delta) + { + pTable = g_color8_to_etc_block_config_0_255[0]; + } + else if (c_plus_delta == 255) + { + pTable = g_color8_to_etc_block_config_0_255[1]; + } + else + { + pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; + } + + do + { + const uint x = *pTable++; #ifdef CRNLIB_BUILD_DEBUG + const uint diff = x & 1; + const uint inten = (x >> 1) & 7; + const uint selector = (x >> 4) & 3; + const uint p0 = (x >> 8) & 255; + CRNLIB_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta); +#endif + + const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; + uint16 p1 = pInverse_table[c1]; + uint16 p2 = pInverse_table[c2]; + const uint trial_error = math::square(c_plus_delta - pColor[i]) + math::square(p1 >> 8) + math::square(p2 >> 8); + if (trial_error < best_error) + { + best_error = trial_error; + best_x = x; + best_packed_c1 = p1 & 0xFF; + best_packed_c2 = p2 & 0xFF; + best_i = i; + if (!best_error) + { + goto found_perfect_match; + } + } + } while (*pTable != 0xFFFF); + } + } + found_perfect_match: + + const uint diff = best_x & 1; + const uint inten = (best_x >> 1) & 7; + + block.m_bytes[3] = static_cast(((inten | (inten << 3)) << 2) | (diff << 1)); + + const uint etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3]; + *reinterpret_cast(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0; + *reinterpret_cast(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0; + + const uint best_packed_c0 = (best_x >> 8) & 255; + if (diff) + { + block.m_bytes[best_i] = static_cast(best_packed_c0 << 3); + block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 << 3); + block.m_bytes[s_next_comp[best_i + 1]] = static_cast(best_packed_c2 << 3); + } + else { - const uint inten = (x >> 1) & 7; - const uint selector = (x >> 4) & 3; - const uint p0 = (x >> 8) & 255; - CRNLIB_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta); + block.m_bytes[best_i] = static_cast(best_packed_c0 | (best_packed_c0 << 4)); + block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 | (best_packed_c1 << 4)); + block.m_bytes[s_next_comp[best_i + 1]] = static_cast(best_packed_c2 | (best_packed_c2 << 4)); } + + return best_error; + } + + static uint pack_etc1_block_solid_color_constrained( + etc1_optimizer::results& results, + uint num_colors, const uint8* pColor, + crn_etc1_pack_params& pack_params, + pack_etc1_block_context& context, + bool use_diff, + const color_quad_u8* pBase_color5_unscaled) + { + CRNLIB_ASSERT(g_etc1_inverse_lookup[0][255]); + + static uint s_next_comp[4] = { 1, 2, 0, 1 }; + + uint best_error = cUINT32_MAX, best_i = 0; + int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; + + // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. + for (uint i = 0; i < 3; i++) + { + const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; + + const int delta_range = 1; + for (int delta = -delta_range; delta <= delta_range; delta++) + { + const int c_plus_delta = math::clamp(pColor[i] + delta, 0, 255); + + const uint16* pTable; + if (!c_plus_delta) + { + pTable = g_color8_to_etc_block_config_0_255[0]; + } + else if (c_plus_delta == 255) + { + pTable = g_color8_to_etc_block_config_0_255[1]; + } + else + { + pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; + } + + do + { + const uint x = *pTable++; + const uint diff = x & 1; + if (static_cast(use_diff) != diff) + { + if (*pTable == 0xFFFF) + { + break; + } + continue; + } + + if ((diff) && (pBase_color5_unscaled)) + { + const int p0 = (x >> 8) & 255; + int delta = p0 - static_cast(pBase_color5_unscaled->c[i]); + if ((delta < cETC1ColorDeltaMin) || (delta > cETC1ColorDeltaMax)) + { + if (*pTable == 0xFFFF) + { + break; + } + continue; + } + } + +#ifdef CRNLIB_BUILD_DEBUG + { + const uint inten = (x >> 1) & 7; + const uint selector = (x >> 4) & 3; + const uint p0 = (x >> 8) & 255; + CRNLIB_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta); + } #endif - const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; - uint16 p1 = pInverse_table[c1]; - uint16 p2 = pInverse_table[c2]; - - if ((diff) && (pBase_color5_unscaled)) { - int delta1 = (p1 & 0xFF) - static_cast(pBase_color5_unscaled->c[s_next_comp[i]]); - int delta2 = (p2 & 0xFF) - static_cast(pBase_color5_unscaled->c[s_next_comp[i + 1]]); - if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax)) { - if (*pTable == 0xFFFF) - break; - continue; - } + const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; + uint16 p1 = pInverse_table[c1]; + uint16 p2 = pInverse_table[c2]; + + if ((diff) && (pBase_color5_unscaled)) + { + int delta1 = (p1 & 0xFF) - static_cast(pBase_color5_unscaled->c[s_next_comp[i]]); + int delta2 = (p2 & 0xFF) - static_cast(pBase_color5_unscaled->c[s_next_comp[i + 1]]); + if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax)) + { + if (*pTable == 0xFFFF) + { + break; + } + continue; + } + } + + const uint trial_error = math::square(c_plus_delta - pColor[i]) + math::square(p1 >> 8) + math::square(p2 >> 8); + if (trial_error < best_error) + { + best_error = trial_error; + best_x = x; + best_packed_c1 = p1 & 0xFF; + best_packed_c2 = p2 & 0xFF; + best_i = i; + if (!best_error) + { + goto found_perfect_match; + } + } + } while (*pTable != 0xFFFF); + } } + found_perfect_match: - const uint trial_error = math::square(c_plus_delta - pColor[i]) + math::square(p1 >> 8) + math::square(p2 >> 8); - if (trial_error < best_error) { - best_error = trial_error; - best_x = x; - best_packed_c1 = p1 & 0xFF; - best_packed_c2 = p2 & 0xFF; - best_i = i; - if (!best_error) - goto found_perfect_match; + if (best_error == cUINT32_MAX) + { + return best_error; } - } while (*pTable != 0xFFFF); + + best_error *= num_colors; + + results.m_n = num_colors; + results.m_block_color4 = !(best_x & 1); + results.m_block_inten_table = (best_x >> 1) & 7; + memset(results.m_pSelectors, (best_x >> 4) & 3, num_colors); + + const uint best_packed_c0 = (best_x >> 8) & 255; + results.m_block_color_unscaled[best_i] = static_cast(best_packed_c0); + results.m_block_color_unscaled[s_next_comp[best_i]] = static_cast(best_packed_c1); + results.m_block_color_unscaled[s_next_comp[best_i + 1]] = static_cast(best_packed_c2); + results.m_error = best_error; + + return best_error; } - } -found_perfect_match: - - if (best_error == cUINT32_MAX) - return best_error; - - best_error *= num_colors; - - results.m_n = num_colors; - results.m_block_color4 = !(best_x & 1); - results.m_block_inten_table = (best_x >> 1) & 7; - memset(results.m_pSelectors, (best_x >> 4) & 3, num_colors); - - const uint best_packed_c0 = (best_x >> 8) & 255; - results.m_block_color_unscaled[best_i] = static_cast(best_packed_c0); - results.m_block_color_unscaled[s_next_comp[best_i]] = static_cast(best_packed_c1); - results.m_block_color_unscaled[s_next_comp[best_i + 1]] = static_cast(best_packed_c2); - results.m_error = best_error; - - return best_error; -} - -uint64 pack_etc1_block(etc1_block& dst_block, const color_quad_u8* pSrc_pixels, crn_etc1_pack_params& pack_params, pack_etc1_block_context& context) { - color_quad_u8 src_pixel0(pSrc_pixels[0]); - - int r; - for (r = 15; r >= 1; --r) - if ((pSrc_pixels[r].r != src_pixel0.r) || (pSrc_pixels[r].g != src_pixel0.g) || (pSrc_pixels[r].b != src_pixel0.b)) - break; - if (!r) - return 16 * pack_etc1_block_solid_color(dst_block, &pSrc_pixels[0].r, pack_params, context); - - color_quad_u8 dithered_pixels[16]; - if (pack_params.m_dithering) { - DitherBlock(dithered_pixels, pSrc_pixels); - pSrc_pixels = dithered_pixels; - } - - uint64 best_error = cUINT64_MAX; - uint best_flip = false, best_use_color4 = false; - - uint8 best_selectors[2][8]; - etc1_optimizer::results best_results[2]; - for (uint i = 0; i < 2; i++) { - best_results[i].m_n = 8; - best_results[i].m_pSelectors = best_selectors[i]; - } - - uint8 selectors[3][8]; - etc1_optimizer::results results[3]; - - for (uint i = 0; i < 3; i++) { - results[i].m_n = 8; - results[i].m_pSelectors = selectors[i]; - } - - color_quad_u8 subblock_pixels[8]; - - etc1_optimizer::params params(pack_params); - params.m_num_src_pixels = 8; - params.m_pSrc_pixels = subblock_pixels; - - for (uint flip = 0; flip < 2; flip++) { - for (uint use_color4 = 0; use_color4 < 2; use_color4++) { - uint64 trial_error = 0; - - uint subblock; - for (subblock = 0; subblock < 2; subblock++) { - if (flip) - memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8); - else { - const color_quad_u8* pSrc_col = pSrc_pixels + subblock * 2; - subblock_pixels[0] = pSrc_col[0]; - subblock_pixels[1] = pSrc_col[4]; - subblock_pixels[2] = pSrc_col[8]; - subblock_pixels[3] = pSrc_col[12]; - subblock_pixels[4] = pSrc_col[1]; - subblock_pixels[5] = pSrc_col[5]; - subblock_pixels[6] = pSrc_col[9]; - subblock_pixels[7] = pSrc_col[13]; + + uint64 pack_etc1_block(etc1_block& dst_block, const color_quad_u8* pSrc_pixels, crn_etc1_pack_params& pack_params, pack_etc1_block_context& context) + { + color_quad_u8 src_pixel0(pSrc_pixels[0]); + + int r; + for (r = 15; r >= 1; --r) + { + if ((pSrc_pixels[r].r != src_pixel0.r) || (pSrc_pixels[r].g != src_pixel0.g) || (pSrc_pixels[r].b != src_pixel0.b)) + { + break; + } + } + if (!r) + { + return 16 * pack_etc1_block_solid_color(dst_block, &pSrc_pixels[0].r, pack_params, context); } - results[2].m_error = cUINT64_MAX; - if ((params.m_quality >= cCRNETCQualityMedium) && ((subblock) || (use_color4))) { - color_quad_u8 subblock_pixel0(subblock_pixels[0]); - for (r = 7; r >= 1; --r) - if ((subblock_pixels[r].r != subblock_pixel0.r) || (subblock_pixels[r].g != subblock_pixel0.g) || (subblock_pixels[r].b != subblock_pixel0.b)) - break; - if (!r) { - pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixel0.r, pack_params, context, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : nullptr); - } + color_quad_u8 dithered_pixels[16]; + if (pack_params.m_dithering) + { + DitherBlock(dithered_pixels, pSrc_pixels); + pSrc_pixels = dithered_pixels; } - params.m_use_color4 = (use_color4 != 0); - params.m_constrain_against_base_color5 = false; + uint64 best_error = cUINT64_MAX; + uint best_flip = false, best_use_color4 = false; - if ((!use_color4) && (subblock)) { - params.m_constrain_against_base_color5 = true; - params.m_base_color5 = results[0].m_block_color_unscaled; + uint8 best_selectors[2][8]; + etc1_optimizer::results best_results[2]; + for (uint i = 0; i < 2; i++) + { + best_results[i].m_n = 8; + best_results[i].m_pSelectors = best_selectors[i]; } - if (params.m_quality == cCRNETCQualitySlow) { - static const int s_scan_delta_0_to_4[] = {-4, -3, -2, -1, 0, 1, 2, 3, 4}; - params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_0_to_4); - params.m_pScan_deltas = s_scan_delta_0_to_4; - } else if (params.m_quality == cCRNETCQualityMedium) { - static const int s_scan_delta_0_to_1[] = {-1, 0, 1}; - params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_0_to_1); - params.m_pScan_deltas = s_scan_delta_0_to_1; - } else { - static const int s_scan_delta_0[] = {0}; - params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_0); - params.m_pScan_deltas = s_scan_delta_0; + uint8 selectors[3][8]; + etc1_optimizer::results results[3]; + + for (uint i = 0; i < 3; i++) + { + results[i].m_n = 8; + results[i].m_pSelectors = selectors[i]; } - context.m_optimizer.init(params, results[subblock]); - - if (!context.m_optimizer.compute()) - break; - - // Fairly arbitrary/unrefined thresholds that control how far away to scan for potentially better solutions. - const uint refinement_error_thresh0 = 3000; - const uint refinement_error_thresh1 = 6000; - if ((params.m_quality >= cCRNETCQualityMedium) && (results[subblock].m_error > refinement_error_thresh0)) { - if (params.m_quality == cCRNETCQualityMedium) { - static const int s_scan_delta_2_to_3[] = {-3, -2, 2, 3}; - params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_2_to_3); - params.m_pScan_deltas = s_scan_delta_2_to_3; - } else { - static const int s_scan_delta_5_to_5[] = {-5, 5}; - static const int s_scan_delta_5_to_8[] = {-8, -7, -6, -5, 5, 6, 7, 8}; - if (results[subblock].m_error > refinement_error_thresh1) { - params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_5_to_8); - params.m_pScan_deltas = s_scan_delta_5_to_8; - } else { - params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_5_to_5); - params.m_pScan_deltas = s_scan_delta_5_to_5; + color_quad_u8 subblock_pixels[8]; + + etc1_optimizer::params params(pack_params); + params.m_num_src_pixels = 8; + params.m_pSrc_pixels = subblock_pixels; + + for (uint flip = 0; flip < 2; flip++) + { + for (uint use_color4 = 0; use_color4 < 2; use_color4++) + { + uint64 trial_error = 0; + + uint subblock; + for (subblock = 0; subblock < 2; subblock++) + { + if (flip) + { + memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8); + } + else + { + const color_quad_u8* pSrc_col = pSrc_pixels + subblock * 2; + subblock_pixels[0] = pSrc_col[0]; + subblock_pixels[1] = pSrc_col[4]; + subblock_pixels[2] = pSrc_col[8]; + subblock_pixels[3] = pSrc_col[12]; + subblock_pixels[4] = pSrc_col[1]; + subblock_pixels[5] = pSrc_col[5]; + subblock_pixels[6] = pSrc_col[9]; + subblock_pixels[7] = pSrc_col[13]; + } + + results[2].m_error = cUINT64_MAX; + if ((params.m_quality >= cCRNETCQualityMedium) && ((subblock) || (use_color4))) + { + color_quad_u8 subblock_pixel0(subblock_pixels[0]); + for (r = 7; r >= 1; --r) + { + if ((subblock_pixels[r].r != subblock_pixel0.r) || (subblock_pixels[r].g != subblock_pixel0.g) || (subblock_pixels[r].b != subblock_pixel0.b)) + { + break; + } + } + if (!r) + { + pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixel0.r, pack_params, context, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : nullptr); + } + } + + params.m_use_color4 = (use_color4 != 0); + params.m_constrain_against_base_color5 = false; + + if ((!use_color4) && (subblock)) + { + params.m_constrain_against_base_color5 = true; + params.m_base_color5 = results[0].m_block_color_unscaled; + } + + if (params.m_quality == cCRNETCQualitySlow) + { + static const int s_scan_delta_0_to_4[] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 }; + params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_0_to_4); + params.m_pScan_deltas = s_scan_delta_0_to_4; + } + else if (params.m_quality == cCRNETCQualityMedium) + { + static const int s_scan_delta_0_to_1[] = { -1, 0, 1 }; + params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_0_to_1); + params.m_pScan_deltas = s_scan_delta_0_to_1; + } + else + { + static const int s_scan_delta_0[] = { 0 }; + params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_0); + params.m_pScan_deltas = s_scan_delta_0; + } + + context.m_optimizer.init(params, results[subblock]); + + if (!context.m_optimizer.compute()) + { + break; + } + + // Fairly arbitrary/unrefined thresholds that control how far away to scan for potentially better solutions. + const uint refinement_error_thresh0 = 3000; + const uint refinement_error_thresh1 = 6000; + if ((params.m_quality >= cCRNETCQualityMedium) && (results[subblock].m_error > refinement_error_thresh0)) + { + if (params.m_quality == cCRNETCQualityMedium) + { + static const int s_scan_delta_2_to_3[] = { -3, -2, 2, 3 }; + params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_2_to_3); + params.m_pScan_deltas = s_scan_delta_2_to_3; + } + else + { + static const int s_scan_delta_5_to_5[] = { -5, 5 }; + static const int s_scan_delta_5_to_8[] = { -8, -7, -6, -5, 5, 6, 7, 8 }; + if (results[subblock].m_error > refinement_error_thresh1) + { + params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_5_to_8); + params.m_pScan_deltas = s_scan_delta_5_to_8; + } + else + { + params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_5_to_5); + params.m_pScan_deltas = s_scan_delta_5_to_5; + } + } + + if (!context.m_optimizer.compute()) + { + break; + } + } + + if (results[2].m_error < results[subblock].m_error) + { + results[subblock] = results[2]; + } + + trial_error += results[subblock].m_error; + if (trial_error >= best_error) + { + break; + } + } + + if (subblock < 2) + { + continue; + } + + best_error = trial_error; + best_results[0] = results[0]; + best_results[1] = results[1]; + best_flip = flip; + best_use_color4 = use_color4; + + } // use_color4 + + } // flip + + int dr = best_results[1].m_block_color_unscaled.r - best_results[0].m_block_color_unscaled.r; + int dg = best_results[1].m_block_color_unscaled.g - best_results[0].m_block_color_unscaled.g; + int db = best_results[1].m_block_color_unscaled.b - best_results[0].m_block_color_unscaled.b; + if (!best_use_color4) + { + if ((math::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (math::maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { + // Shouldn't ever happen + CRNLIB_VERIFY(0); } - } + } - if (!context.m_optimizer.compute()) - break; + if (best_use_color4) + { + dst_block.m_bytes[0] = static_cast(best_results[1].m_block_color_unscaled.r | (best_results[0].m_block_color_unscaled.r << 4)); + dst_block.m_bytes[1] = static_cast(best_results[1].m_block_color_unscaled.g | (best_results[0].m_block_color_unscaled.g << 4)); + dst_block.m_bytes[2] = static_cast(best_results[1].m_block_color_unscaled.b | (best_results[0].m_block_color_unscaled.b << 4)); + } + else + { + if (dr < 0) + { + dr += 8; + } + if (dg < 0) + { + dg += 8; + } + if (db < 0) + { + db += 8; + } + dst_block.m_bytes[0] = static_cast((best_results[0].m_block_color_unscaled.r << 3) | dr); + dst_block.m_bytes[1] = static_cast((best_results[0].m_block_color_unscaled.g << 3) | dg); + dst_block.m_bytes[2] = static_cast((best_results[0].m_block_color_unscaled.b << 3) | db); } - if (results[2].m_error < results[subblock].m_error) - results[subblock] = results[2]; + dst_block.m_bytes[3] = static_cast((best_results[1].m_block_inten_table << 2) | (best_results[0].m_block_inten_table << 5) | ((~best_use_color4 & 1) << 1) | best_flip); - trial_error += results[subblock].m_error; - if (trial_error >= best_error) - break; - } + uint selector0 = 0, selector1 = 0; + if (best_flip) + { + // flipped: + // { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, + // { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } + // + // { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, + // { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } + const uint8* pSelectors0 = best_results[0].m_pSelectors; + const uint8* pSelectors1 = best_results[1].m_pSelectors; + for (int x = 3; x >= 0; --x) + { + uint b; + b = g_selector_index_to_etc1[pSelectors1[4 + x]]; + selector0 = (selector0 << 1) | (b & 1); + selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors1[x]]; + selector0 = (selector0 << 1) | (b & 1); + selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors0[4 + x]]; + selector0 = (selector0 << 1) | (b & 1); + selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors0[x]]; + selector0 = (selector0 << 1) | (b & 1); + selector1 = (selector1 << 1) | (b >> 1); + } + } + else + { + // non-flipped: + // { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, + // { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } + // + // { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, + // { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } + for (int subblock = 1; subblock >= 0; --subblock) + { + const uint8* pSelectors = best_results[subblock].m_pSelectors + 4; + for (uint i = 0; i < 2; i++) + { + uint b; + b = g_selector_index_to_etc1[pSelectors[3]]; + selector0 = (selector0 << 1) | (b & 1); + selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors[2]]; + selector0 = (selector0 << 1) | (b & 1); + selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors[1]]; + selector0 = (selector0 << 1) | (b & 1); + selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors[0]]; + selector0 = (selector0 << 1) | (b & 1); + selector1 = (selector1 << 1) | (b >> 1); + + pSelectors -= 4; + } + } + } - if (subblock < 2) - continue; + dst_block.m_bytes[4] = static_cast(selector1 >> 8); + dst_block.m_bytes[5] = static_cast(selector1 & 0xFF); + dst_block.m_bytes[6] = static_cast(selector0 >> 8); + dst_block.m_bytes[7] = static_cast(selector0 & 0xFF); - best_error = trial_error; - best_results[0] = results[0]; - best_results[1] = results[1]; - best_flip = flip; - best_use_color4 = use_color4; + return best_error; + } - } // use_color4 + uint64 pack_etc1s_block(etc1_block& dst_block, const color_quad_u8* pSrc_pixels, crn_etc1_pack_params& pack_params) + { + uint8 selectors[16]; + etc1_optimizer optimizer; + etc1_optimizer::params params; + params.m_pSrc_pixels = pSrc_pixels; + params.m_num_src_pixels = 16; + params.m_use_color4 = false; + params.m_constrain_against_base_color5 = false; + etc1_optimizer::results results; + results.m_pSelectors = selectors; + results.m_n = 16; + optimizer.init(params, results); + + const int scan[] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 }; + params.m_scan_delta_size = pack_params.m_quality == cCRNETCQualitySlow ? CRNLIB_ARRAY_SIZE(scan) : pack_params.m_quality == cCRNETCQualityMedium ? 3 + : 1; + params.m_pScan_deltas = scan + ((CRNLIB_ARRAY_SIZE(scan) - params.m_scan_delta_size) >> 1); + optimizer.compute(); + + if (params.m_quality >= cCRNETCQualityMedium && results.m_error > 6000) + { + const int refine_medium[] = { -3, -2, 2, 3 }; + const int refine_high[] = { -8, -7, -6, -5, 5, 6, 7, 8 }; + if (params.m_quality == cCRNETCQualityMedium) + { + params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(refine_medium); + params.m_pScan_deltas = refine_medium; + } + else + { + params.m_scan_delta_size = results.m_error > 12000 ? CRNLIB_ARRAY_SIZE(refine_high) : 2; + params.m_pScan_deltas = refine_high + ((CRNLIB_ARRAY_SIZE(refine_high) - params.m_scan_delta_size) >> 1); + } + optimizer.compute(); + } - } // flip + uint32 selector = 0; + for (uint32 i = 0, t = 8, h = 0; h < 4; h++, t -= 15) + { + for (uint32 w = 0; w < 4; w++, t += 4, i++) + { + uint32 s = g_selector_index_to_etc1[selectors[i]]; + selector |= (s >> 1 | (s & 1) << 16) << (t & 15); + } + } - int dr = best_results[1].m_block_color_unscaled.r - best_results[0].m_block_color_unscaled.r; - int dg = best_results[1].m_block_color_unscaled.g - best_results[0].m_block_color_unscaled.g; - int db = best_results[1].m_block_color_unscaled.b - best_results[0].m_block_color_unscaled.b; - if (!best_use_color4) { - if ((math::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (math::maximum(dr, dg, db) > cETC1ColorDeltaMax)) { - // Shouldn't ever happen - CRNLIB_VERIFY(0); - } - } - - if (best_use_color4) { - dst_block.m_bytes[0] = static_cast(best_results[1].m_block_color_unscaled.r | (best_results[0].m_block_color_unscaled.r << 4)); - dst_block.m_bytes[1] = static_cast(best_results[1].m_block_color_unscaled.g | (best_results[0].m_block_color_unscaled.g << 4)); - dst_block.m_bytes[2] = static_cast(best_results[1].m_block_color_unscaled.b | (best_results[0].m_block_color_unscaled.b << 4)); - } else { - if (dr < 0) - dr += 8; - if (dg < 0) - dg += 8; - if (db < 0) - db += 8; - dst_block.m_bytes[0] = static_cast((best_results[0].m_block_color_unscaled.r << 3) | dr); - dst_block.m_bytes[1] = static_cast((best_results[0].m_block_color_unscaled.g << 3) | dg); - dst_block.m_bytes[2] = static_cast((best_results[0].m_block_color_unscaled.b << 3) | db); - } - - dst_block.m_bytes[3] = static_cast((best_results[1].m_block_inten_table << 2) | (best_results[0].m_block_inten_table << 5) | ((~best_use_color4 & 1) << 1) | best_flip); - - uint selector0 = 0, selector1 = 0; - if (best_flip) { - // flipped: - // { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, - // { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } - // - // { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, - // { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } - const uint8* pSelectors0 = best_results[0].m_pSelectors; - const uint8* pSelectors1 = best_results[1].m_pSelectors; - for (int x = 3; x >= 0; --x) { - uint b; - b = g_selector_index_to_etc1[pSelectors1[4 + x]]; - selector0 = (selector0 << 1) | (b & 1); - selector1 = (selector1 << 1) | (b >> 1); - - b = g_selector_index_to_etc1[pSelectors1[x]]; - selector0 = (selector0 << 1) | (b & 1); - selector1 = (selector1 << 1) | (b >> 1); - - b = g_selector_index_to_etc1[pSelectors0[4 + x]]; - selector0 = (selector0 << 1) | (b & 1); - selector1 = (selector1 << 1) | (b >> 1); - - b = g_selector_index_to_etc1[pSelectors0[x]]; - selector0 = (selector0 << 1) | (b & 1); - selector1 = (selector1 << 1) | (b >> 1); - } - } else { - // non-flipped: - // { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, - // { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } - // - // { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, - // { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } - for (int subblock = 1; subblock >= 0; --subblock) { - const uint8* pSelectors = best_results[subblock].m_pSelectors + 4; - for (uint i = 0; i < 2; i++) { - uint b; - b = g_selector_index_to_etc1[pSelectors[3]]; - selector0 = (selector0 << 1) | (b & 1); - selector1 = (selector1 << 1) | (b >> 1); - - b = g_selector_index_to_etc1[pSelectors[2]]; - selector0 = (selector0 << 1) | (b & 1); - selector1 = (selector1 << 1) | (b >> 1); - - b = g_selector_index_to_etc1[pSelectors[1]]; - selector0 = (selector0 << 1) | (b & 1); - selector1 = (selector1 << 1) | (b >> 1); - - b = g_selector_index_to_etc1[pSelectors[0]]; - selector0 = (selector0 << 1) | (b & 1); - selector1 = (selector1 << 1) | (b >> 1); - - pSelectors -= 4; - } + dst_block.m_uint64 = (uint64)selector << 32 | results.m_block_inten_table << 29 | results.m_block_inten_table << 26 | 1 << 25 | (results.m_block_color_unscaled.m_u32 & 0xFFFFFF) << 3; + return results.m_error; } - } - - dst_block.m_bytes[4] = static_cast(selector1 >> 8); - dst_block.m_bytes[5] = static_cast(selector1 & 0xFF); - dst_block.m_bytes[6] = static_cast(selector0 >> 8); - dst_block.m_bytes[7] = static_cast(selector0 & 0xFF); - - return best_error; -} - -uint64 pack_etc1s_block(etc1_block& dst_block, const color_quad_u8* pSrc_pixels, crn_etc1_pack_params& pack_params) { - uint8 selectors[16]; - etc1_optimizer optimizer; - etc1_optimizer::params params; - params.m_pSrc_pixels = pSrc_pixels; - params.m_num_src_pixels = 16; - params.m_use_color4 = false; - params.m_constrain_against_base_color5 = false; - etc1_optimizer::results results; - results.m_pSelectors = selectors; - results.m_n = 16; - optimizer.init(params, results); - - const int scan[] = {-4, -3, -2, -1, 0, 1, 2, 3, 4}; - params.m_scan_delta_size = pack_params.m_quality == cCRNETCQualitySlow ? CRNLIB_ARRAY_SIZE(scan) : pack_params.m_quality == cCRNETCQualityMedium ? 3 : 1; - params.m_pScan_deltas = scan + ((CRNLIB_ARRAY_SIZE(scan) - params.m_scan_delta_size) >> 1); - optimizer.compute(); - - if (params.m_quality >= cCRNETCQualityMedium && results.m_error > 6000) { - const int refine_medium[] = {-3, -2, 2, 3}; - const int refine_high[] = {-8, -7, -6, -5, 5, 6, 7, 8}; - if (params.m_quality == cCRNETCQualityMedium) { - params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(refine_medium); - params.m_pScan_deltas = refine_medium; - } else { - params.m_scan_delta_size = results.m_error > 12000 ? CRNLIB_ARRAY_SIZE(refine_high) : 2; - params.m_pScan_deltas = refine_high + ((CRNLIB_ARRAY_SIZE(refine_high) - params.m_scan_delta_size) >> 1); - } - optimizer.compute(); - } - - uint32 selector = 0; - for (uint32 i = 0, t = 8, h = 0; h < 4; h++, t -= 15) { - for (uint32 w = 0; w < 4; w++, t += 4, i++) { - uint32 s = g_selector_index_to_etc1[selectors[i]]; - selector |= (s >> 1 | (s & 1) << 16) << (t & 15); - } - } - - dst_block.m_uint64 = (uint64)selector << 32 | results.m_block_inten_table << 29 | results.m_block_inten_table << 26 | 1 << 25 | (results.m_block_color_unscaled.m_u32 & 0xFFFFFF) << 3; - return results.m_error; -} - -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_etc.h b/crnlib/crn_etc.h index 88d23c2..1b65de1 100644 --- a/crnlib/crn_etc.h +++ b/crnlib/crn_etc.h @@ -1,544 +1,643 @@ -// File: crn_etc.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once + #include "crnlib.h" #include "crn_dxt.h" #include "crn_export.h" -namespace crnlib { -enum etc_constants { - cETC1BytesPerBlock = 8U, - - cETC1SelectorBits = 2U, - cETC1SelectorValues = 1U << cETC1SelectorBits, - cETC1SelectorMask = cETC1SelectorValues - 1U, - - cETC1BlockShift = 2U, - cETC1BlockSize = 1U << cETC1BlockShift, - - cETC1LSBSelectorIndicesBitOffset = 0, - cETC1MSBSelectorIndicesBitOffset = 16, - - cETC1FlipBitOffset = 32, - cETC1DiffBitOffset = 33, - - cETC1IntenModifierNumBits = 3, - cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits, - cETC1RightIntenModifierTableBitOffset = 34, - cETC1LeftIntenModifierTableBitOffset = 37, - - // Base+Delta encoding (5 bit bases, 3 bit delta) - cETC1BaseColorCompNumBits = 5, - cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits, - - cETC1DeltaColorCompNumBits = 3, - cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits, - cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits, - - cETC1BaseColor5RBitOffset = 59, - cETC1BaseColor5GBitOffset = 51, - cETC1BaseColor5BBitOffset = 43, - - cETC1DeltaColor3RBitOffset = 56, - cETC1DeltaColor3GBitOffset = 48, - cETC1DeltaColor3BBitOffset = 40, - - // Absolute (non-delta) encoding (two 4-bit per component bases) - cETC1AbsColorCompNumBits = 4, - cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits, - - cETC1AbsColor4R1BitOffset = 60, - cETC1AbsColor4G1BitOffset = 52, - cETC1AbsColor4B1BitOffset = 44, - - cETC1AbsColor4R2BitOffset = 56, - cETC1AbsColor4G2BitOffset = 48, - cETC1AbsColor4B2BitOffset = 40, - - cETC1ColorDeltaMin = -4, - cETC1ColorDeltaMax = 3, - - // Delta3: - // 0 1 2 3 4 5 6 7 - // 000 001 010 011 100 101 110 111 - // 0 1 2 3 -4 -3 -2 -1 -}; - -CRN_EXPORT extern const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues]; -CRN_EXPORT extern const uint8 g_etc1_to_selector_index[cETC1SelectorValues]; -CRN_EXPORT extern const uint8 g_selector_index_to_etc1[cETC1SelectorValues]; - -struct etc1_coord2 { - uint8 m_x, m_y; -}; -CRN_EXPORT extern const etc1_coord2 g_etc1_pixel_coords[2][2][8]; // [flipped][subblock][subblock_pixel] - -struct CRN_EXPORT etc1_block { - // big endian uint64: - // bit ofs: 56 48 40 32 24 16 8 0 - // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 - union { - uint64 m_uint64; - uint8 m_bytes[8]; - }; - - uint8 m_low_color[2]; - uint8 m_high_color[2]; - - enum { cNumSelectorBytes = 4 }; - uint8 m_selectors[cNumSelectorBytes]; - - inline void clear() { - utils::zero_this(this); - } - - inline uint get_general_bits(uint ofs, uint num) const { - CRNLIB_ASSERT((ofs + num) <= 64U); - CRNLIB_ASSERT(num && (num < 32U)); - return (utils::read_be64(&m_uint64) >> ofs) & ((1UL << num) - 1UL); - } - - inline void set_general_bits(uint ofs, uint num, uint bits) { - CRNLIB_ASSERT((ofs + num) <= 64U); - CRNLIB_ASSERT(num && (num < 32U)); - - uint64 x = utils::read_be64(&m_uint64); - uint64 msk = ((1ULL << static_cast(num)) - 1ULL) << static_cast(ofs); - x &= ~msk; - x |= (static_cast(bits) << static_cast(ofs)); - utils::write_be64(&m_uint64, x); - } - - inline uint get_byte_bits(uint ofs, uint num) const { - CRNLIB_ASSERT((ofs + num) <= 64U); - CRNLIB_ASSERT(num && (num <= 8U)); - CRNLIB_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); - const uint byte_ofs = 7 - (ofs >> 3); - const uint byte_bit_ofs = ofs & 7; - return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1); - } - - inline void set_byte_bits(uint ofs, uint num, uint bits) { - CRNLIB_ASSERT((ofs + num) <= 64U); - CRNLIB_ASSERT(num && (num < 32U)); - CRNLIB_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); - CRNLIB_ASSERT(bits < (1U << num)); - const uint byte_ofs = 7 - (ofs >> 3); - const uint byte_bit_ofs = ofs & 7; - const uint mask = (1 << num) - 1; - m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs); - m_bytes[byte_ofs] |= (bits << byte_bit_ofs); - } - - // false = left/right subblocks - // true = upper/lower subblocks - inline bool get_flip_bit() const { - return (m_bytes[3] & 1) != 0; - } - - inline void set_flip_bit(bool flip) { - m_bytes[3] &= ~1; - m_bytes[3] |= static_cast(flip); - } - - inline bool get_diff_bit() const { - return (m_bytes[3] & 2) != 0; - } - - inline void set_diff_bit(bool diff) { - m_bytes[3] &= ~2; - m_bytes[3] |= (static_cast(diff) << 1); - } - - // Returns intensity modifier table (0-7) used by subblock subblock_id. - // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2) - inline uint get_inten_table(uint subblock_id) const { - CRNLIB_ASSERT(subblock_id < 2); - const uint ofs = subblock_id ? 2 : 5; - return (m_bytes[3] >> ofs) & 7; - } - - // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1) - inline void set_inten_table(uint subblock_id, uint t) { - CRNLIB_ASSERT(subblock_id < 2); - CRNLIB_ASSERT(t < 8); - const uint ofs = subblock_id ? 2 : 5; - m_bytes[3] &= ~(7 << ofs); - m_bytes[3] |= (t << ofs); - } - - // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. - inline uint get_selector(uint x, uint y) const { - CRNLIB_ASSERT((x | y) < 4); - - const uint bit_index = x * 4 + y; - const uint byte_bit_ofs = bit_index & 7; - const uint8* p = &m_bytes[7 - (bit_index >> 3)]; - const uint lsb = (p[0] >> byte_bit_ofs) & 1; - const uint msb = (p[-2] >> byte_bit_ofs) & 1; - const uint val = lsb | (msb << 1); - - return g_etc1_to_selector_index[val]; - } - - // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables. - inline void set_selector(uint x, uint y, uint val) { - CRNLIB_ASSERT((x | y | val) < 4); - const uint bit_index = x * 4 + y; - - uint8* p = &m_bytes[7 - (bit_index >> 3)]; - - const uint byte_bit_ofs = bit_index & 7; - const uint mask = 1 << byte_bit_ofs; - - const uint etc1_val = g_selector_index_to_etc1[val]; - - const uint lsb = etc1_val & 1; - const uint msb = etc1_val >> 1; - - p[0] &= ~mask; - p[0] |= (lsb << byte_bit_ofs); - - p[-2] &= ~mask; - p[-2] |= (msb << byte_bit_ofs); - } - - inline void set_base4_color(uint idx, uint16 c) { - if (idx) { - set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15); - set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15); - set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15); - } else { - set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15); - set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15); - set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15); - } - } - - inline uint16 get_base4_color(uint idx) const { - uint r, g, b; - if (idx) { - r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4); - g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4); - b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4); - } else { - r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4); - g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4); - b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4); - } - return static_cast(b | (g << 4U) | (r << 8U)); - } - - inline void set_base5_color(uint16 c) { - set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31); - set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31); - set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31); - } - - inline uint16 get_base5_color() const { - const uint r = get_byte_bits(cETC1BaseColor5RBitOffset, 5); - const uint g = get_byte_bits(cETC1BaseColor5GBitOffset, 5); - const uint b = get_byte_bits(cETC1BaseColor5BBitOffset, 5); - return static_cast(b | (g << 5U) | (r << 10U)); - } - - void set_delta3_color(uint16 c) { - set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7); - set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7); - set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7); - } - - inline uint16 get_delta3_color() const { - const uint r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3); - const uint g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3); - const uint b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); - return static_cast(b | (g << 3U) | (r << 6U)); - } - - // Base color 5 - static uint16 pack_color5(const color_quad_u8& color, bool scaled, uint bias = 127U); - static uint16 pack_color5(uint r, uint g, uint b, bool scaled, uint bias = 127U); - - static color_quad_u8 unpack_color5(uint16 packed_color5, bool scaled, uint alpha = 255U); - static void unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled); - - static bool unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); - static bool unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); - - // Delta color 3 - // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) - static uint16 pack_delta3(const color_quad_i16& color); - static uint16 pack_delta3(int r, int g, int b); - - // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) - static color_quad_i16 unpack_delta3(uint16 packed_delta3); - static void unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3); - - // Abs color 4 - static uint16 pack_color4(const color_quad_u8& color, bool scaled, uint bias = 127U); - static uint16 pack_color4(uint r, uint g, uint b, bool scaled, uint bias = 127U); - - static color_quad_u8 unpack_color4(uint16 packed_color4, bool scaled, uint alpha = 255U); - static void unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled); - - // subblock colors - static void get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx); - static bool get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx); - static void get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx); - - static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4) { - if (color4) { - dst.r = src.r | (src.r << 4); - dst.g = src.g | (src.g << 4); - dst.b = src.b | (src.b << 4); - } else { - dst.r = (src.r >> 2) | (src.r << 3); - dst.g = (src.g >> 2) | (src.g << 3); - dst.b = (src.b >> 2) | (src.b << 3); - } - dst.a = src.a; - } -}; - -CRNLIB_DEFINE_BITWISE_COPYABLE(etc1_block); - -// Returns false if the block is invalid (it will still be unpacked with clamping). -CRN_EXPORT bool unpack_etc1(const etc1_block& block, color_quad_u8* pDst, bool preserve_alpha = false); - -enum crn_etc_quality { - cCRNETCQualityFast, - cCRNETCQualityMedium, - cCRNETCQualitySlow, - - cCRNETCQualityTotal, - - cCRNETCQualityForceDWORD = 0xFFFFFFFF -}; - -struct CRN_EXPORT crn_etc1_pack_params { - crn_etc_quality m_quality; - bool m_perceptual; - bool m_dithering; - - inline crn_etc1_pack_params() { - clear(); - } - - void clear() { - m_quality = cCRNETCQualitySlow; - m_perceptual = true; - m_dithering = false; - } -}; - -struct CRN_EXPORT etc1_solution_coordinates { - inline etc1_solution_coordinates() - : m_unscaled_color(0, 0, 0, 0), - m_inten_table(0), - m_color4(false) { - } - - inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) - : m_unscaled_color(r, g, b, 255), - m_inten_table(inten_table), - m_color4(color4) { - } - - inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) - : m_unscaled_color(c), - m_inten_table(inten_table), - m_color4(color4) { - } - - inline etc1_solution_coordinates(const etc1_solution_coordinates& other) { - *this = other; - } - - inline etc1_solution_coordinates& operator=(const etc1_solution_coordinates& rhs) { - m_unscaled_color = rhs.m_unscaled_color; - m_inten_table = rhs.m_inten_table; - m_color4 = rhs.m_color4; - return *this; - } - - inline void clear() { - m_unscaled_color.clear(); - m_inten_table = 0; - m_color4 = false; - } - - inline color_quad_u8 get_scaled_color() const { - int br, bg, bb; - if (m_color4) { - br = m_unscaled_color.r | (m_unscaled_color.r << 4); - bg = m_unscaled_color.g | (m_unscaled_color.g << 4); - bb = m_unscaled_color.b | (m_unscaled_color.b << 4); - } else { - br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); - bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); - bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); - } - return color_quad_u8(br, bg, bb); - } - - inline void get_block_colors(color_quad_u8* pBlock_colors) { - int br, bg, bb; - if (m_color4) { - br = m_unscaled_color.r | (m_unscaled_color.r << 4); - bg = m_unscaled_color.g | (m_unscaled_color.g << 4); - bb = m_unscaled_color.b | (m_unscaled_color.b << 4); - } else { - br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); - bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); - bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); - } - const int* pInten_table = g_etc1_inten_tables[m_inten_table]; - pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0]); - pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1]); - pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2]); - pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3]); - } - - color_quad_u8 m_unscaled_color; - uint m_inten_table; - bool m_color4; -}; - -class CRN_EXPORT etc1_optimizer { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(etc1_optimizer); - - public: - etc1_optimizer() { - clear(); - } - - void clear() { - m_pParams = nullptr; - m_pResult = nullptr; - m_pSorted_luma = nullptr; - m_pSorted_luma_indices = nullptr; - } - - struct params : crn_etc1_pack_params { - params() { - clear(); - } - - params(const crn_etc1_pack_params& base_params) - : crn_etc1_pack_params(base_params) { - clear_optimizer_params(); - } - - void clear() { - crn_etc1_pack_params::clear(); - clear_optimizer_params(); - } - - void clear_optimizer_params() { - m_num_src_pixels = 0; - m_pSrc_pixels = 0; - - m_use_color4 = false; - static const int s_default_scan_delta[] = {0}; - m_pScan_deltas = s_default_scan_delta; - m_scan_delta_size = 1; - - m_base_color5.clear(); - m_constrain_against_base_color5 = false; - } - - uint m_num_src_pixels; - const color_quad_u8* m_pSrc_pixels; - - bool m_use_color4; - const int* m_pScan_deltas; - uint m_scan_delta_size; - - color_quad_u8 m_base_color5; - bool m_constrain_against_base_color5; - }; - - struct results { - uint64 m_error; - color_quad_u8 m_block_color_unscaled; - uint m_block_inten_table; - uint m_n; - uint8* m_pSelectors; - bool m_block_color4; - - inline results& operator=(const results& rhs) { - m_block_color_unscaled = rhs.m_block_color_unscaled; - m_block_color4 = rhs.m_block_color4; - m_block_inten_table = rhs.m_block_inten_table; - m_error = rhs.m_error; - CRNLIB_ASSERT(m_n == rhs.m_n); - memcpy(m_pSelectors, rhs.m_pSelectors, rhs.m_n); - return *this; - } - }; - - void init(const params& params, results& result); - bool compute(); - - private: - struct potential_solution { - potential_solution() - : m_coords(), m_error(cUINT64_MAX), m_valid(false) { - } - - etc1_solution_coordinates m_coords; - crnlib::vector m_selectors; - uint64 m_error; - bool m_valid; - - void clear() { - m_coords.clear(); - m_selectors.resize(0); - m_error = cUINT64_MAX; - m_valid = false; - } - - bool are_selectors_all_equal() const { - if (m_selectors.empty()) - return false; - const uint s = m_selectors[0]; - for (uint i = 1; i < m_selectors.size(); i++) - if (m_selectors[i] != s) - return false; - return true; - } - }; - - const params* m_pParams; - results* m_pResult; - - int m_limit; - - vec3F m_avg_color; - int m_br, m_bg, m_bb; - crnlib::vector m_luma; - crnlib::vector m_sorted_luma[2]; - const uint32* m_pSorted_luma_indices; - uint32* m_pSorted_luma; - - crnlib::vector m_selectors; - crnlib::vector m_best_selectors; - - potential_solution m_best_solution; - potential_solution m_trial_solution; - crnlib::vector m_temp_selectors; - - bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); - bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); -}; - -struct CRN_EXPORT pack_etc1_block_context { - etc1_optimizer m_optimizer; -}; - -CRN_EXPORT void pack_etc1_block_init(); - -CRN_EXPORT uint64 pack_etc1_block(etc1_block& block, const color_quad_u8* pSrc_pixels, crn_etc1_pack_params& pack_params, pack_etc1_block_context& context); -CRN_EXPORT uint64 pack_etc1s_block(etc1_block& block, const color_quad_u8* pSrc_pixels, crn_etc1_pack_params& pack_params); - -} // namespace crnlib +namespace crnlib +{ + enum etc_constants + { + cETC1BytesPerBlock = 8U, + + cETC1SelectorBits = 2U, + cETC1SelectorValues = 1U << cETC1SelectorBits, + cETC1SelectorMask = cETC1SelectorValues - 1U, + + cETC1BlockShift = 2U, + cETC1BlockSize = 1U << cETC1BlockShift, + + cETC1LSBSelectorIndicesBitOffset = 0, + cETC1MSBSelectorIndicesBitOffset = 16, + + cETC1FlipBitOffset = 32, + cETC1DiffBitOffset = 33, + + cETC1IntenModifierNumBits = 3, + cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits, + cETC1RightIntenModifierTableBitOffset = 34, + cETC1LeftIntenModifierTableBitOffset = 37, + + // Base+Delta encoding (5 bit bases, 3 bit delta) + cETC1BaseColorCompNumBits = 5, + cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits, + + cETC1DeltaColorCompNumBits = 3, + cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits, + cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits, + + cETC1BaseColor5RBitOffset = 59, + cETC1BaseColor5GBitOffset = 51, + cETC1BaseColor5BBitOffset = 43, + + cETC1DeltaColor3RBitOffset = 56, + cETC1DeltaColor3GBitOffset = 48, + cETC1DeltaColor3BBitOffset = 40, + + // Absolute (non-delta) encoding (two 4-bit per component bases) + cETC1AbsColorCompNumBits = 4, + cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits, + + cETC1AbsColor4R1BitOffset = 60, + cETC1AbsColor4G1BitOffset = 52, + cETC1AbsColor4B1BitOffset = 44, + + cETC1AbsColor4R2BitOffset = 56, + cETC1AbsColor4G2BitOffset = 48, + cETC1AbsColor4B2BitOffset = 40, + + cETC1ColorDeltaMin = -4, + cETC1ColorDeltaMax = 3, + + // Delta3: + // 0 1 2 3 4 5 6 7 + // 000 001 010 011 100 101 110 111 + // 0 1 2 3 -4 -3 -2 -1 + }; + + CRN_EXPORT extern const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues]; + CRN_EXPORT extern const uint8 g_etc1_to_selector_index[cETC1SelectorValues]; + CRN_EXPORT extern const uint8 g_selector_index_to_etc1[cETC1SelectorValues]; + + struct etc1_coord2 + { + uint8 m_x, m_y; + }; + CRN_EXPORT extern const etc1_coord2 g_etc1_pixel_coords[2][2][8]; // [flipped][subblock][subblock_pixel] + + struct CRN_EXPORT etc1_block + { + // big endian uint64: + // bit ofs: 56 48 40 32 24 16 8 0 + // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 + union + { + uint64 m_uint64; + uint8 m_bytes[8]; + }; + + uint8 m_low_color[2]; + uint8 m_high_color[2]; + + enum + { + cNumSelectorBytes = 4 + }; + uint8 m_selectors[cNumSelectorBytes]; + + inline void clear() + { + utils::zero_this(this); + } + + inline uint get_general_bits(uint ofs, uint num) const + { + CRNLIB_ASSERT((ofs + num) <= 64U); + CRNLIB_ASSERT(num && (num < 32U)); + return (utils::read_be64(&m_uint64) >> ofs) & ((1UL << num) - 1UL); + } + + inline void set_general_bits(uint ofs, uint num, uint bits) + { + CRNLIB_ASSERT((ofs + num) <= 64U); + CRNLIB_ASSERT(num && (num < 32U)); + + uint64 x = utils::read_be64(&m_uint64); + uint64 msk = ((1ULL << static_cast(num)) - 1ULL) << static_cast(ofs); + x &= ~msk; + x |= (static_cast(bits) << static_cast(ofs)); + utils::write_be64(&m_uint64, x); + } + + inline uint get_byte_bits(uint ofs, uint num) const + { + CRNLIB_ASSERT((ofs + num) <= 64U); + CRNLIB_ASSERT(num && (num <= 8U)); + CRNLIB_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); + const uint byte_ofs = 7 - (ofs >> 3); + const uint byte_bit_ofs = ofs & 7; + return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1); + } + + inline void set_byte_bits(uint ofs, uint num, uint bits) + { + CRNLIB_ASSERT((ofs + num) <= 64U); + CRNLIB_ASSERT(num && (num < 32U)); + CRNLIB_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); + CRNLIB_ASSERT(bits < (1U << num)); + const uint byte_ofs = 7 - (ofs >> 3); + const uint byte_bit_ofs = ofs & 7; + const uint mask = (1 << num) - 1; + m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs); + m_bytes[byte_ofs] |= (bits << byte_bit_ofs); + } + + // false = left/right subblocks + // true = upper/lower subblocks + inline bool get_flip_bit() const + { + return (m_bytes[3] & 1) != 0; + } + + inline void set_flip_bit(bool flip) + { + m_bytes[3] &= ~1; + m_bytes[3] |= static_cast(flip); + } + + inline bool get_diff_bit() const + { + return (m_bytes[3] & 2) != 0; + } + + inline void set_diff_bit(bool diff) + { + m_bytes[3] &= ~2; + m_bytes[3] |= (static_cast(diff) << 1); + } + + // Returns intensity modifier table (0-7) used by subblock subblock_id. + // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2) + inline uint get_inten_table(uint subblock_id) const + { + CRNLIB_ASSERT(subblock_id < 2); + const uint ofs = subblock_id ? 2 : 5; + return (m_bytes[3] >> ofs) & 7; + } + + // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1) + inline void set_inten_table(uint subblock_id, uint t) + { + CRNLIB_ASSERT(subblock_id < 2); + CRNLIB_ASSERT(t < 8); + const uint ofs = subblock_id ? 2 : 5; + m_bytes[3] &= ~(7 << ofs); + m_bytes[3] |= (t << ofs); + } + + // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline uint get_selector(uint x, uint y) const + { + CRNLIB_ASSERT((x | y) < 4); + + const uint bit_index = x * 4 + y; + const uint byte_bit_ofs = bit_index & 7; + const uint8* p = &m_bytes[7 - (bit_index >> 3)]; + const uint lsb = (p[0] >> byte_bit_ofs) & 1; + const uint msb = (p[-2] >> byte_bit_ofs) & 1; + const uint val = lsb | (msb << 1); + + return g_etc1_to_selector_index[val]; + } + + // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline void set_selector(uint x, uint y, uint val) + { + CRNLIB_ASSERT((x | y | val) < 4); + const uint bit_index = x * 4 + y; + + uint8* p = &m_bytes[7 - (bit_index >> 3)]; + + const uint byte_bit_ofs = bit_index & 7; + const uint mask = 1 << byte_bit_ofs; + + const uint etc1_val = g_selector_index_to_etc1[val]; + + const uint lsb = etc1_val & 1; + const uint msb = etc1_val >> 1; + + p[0] &= ~mask; + p[0] |= (lsb << byte_bit_ofs); + + p[-2] &= ~mask; + p[-2] |= (msb << byte_bit_ofs); + } + + inline void set_base4_color(uint idx, uint16 c) + { + if (idx) + { + set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15); + set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15); + set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15); + } + else + { + set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15); + set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15); + set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15); + } + } + + inline uint16 get_base4_color(uint idx) const + { + uint r, g, b; + if (idx) + { + r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4); + } + else + { + r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4); + } + return static_cast(b | (g << 4U) | (r << 8U)); + } + + inline void set_base5_color(uint16 c) + { + set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31); + set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31); + set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31); + } + + inline uint16 get_base5_color() const + { + const uint r = get_byte_bits(cETC1BaseColor5RBitOffset, 5); + const uint g = get_byte_bits(cETC1BaseColor5GBitOffset, 5); + const uint b = get_byte_bits(cETC1BaseColor5BBitOffset, 5); + return static_cast(b | (g << 5U) | (r << 10U)); + } + + void set_delta3_color(uint16 c) + { + set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7); + set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7); + set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7); + } + + inline uint16 get_delta3_color() const + { + const uint r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3); + const uint g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3); + const uint b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); + return static_cast(b | (g << 3U) | (r << 6U)); + } + + // Base color 5 + static uint16 pack_color5(const color_quad_u8& color, bool scaled, uint bias = 127U); + static uint16 pack_color5(uint r, uint g, uint b, bool scaled, uint bias = 127U); + + static color_quad_u8 unpack_color5(uint16 packed_color5, bool scaled, uint alpha = 255U); + static void unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled); + + static bool unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); + static bool unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); + + // Delta color 3 + // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) + static uint16 pack_delta3(const color_quad_i16& color); + static uint16 pack_delta3(int r, int g, int b); + + // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) + static color_quad_i16 unpack_delta3(uint16 packed_delta3); + static void unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3); + + // Abs color 4 + static uint16 pack_color4(const color_quad_u8& color, bool scaled, uint bias = 127U); + static uint16 pack_color4(uint r, uint g, uint b, bool scaled, uint bias = 127U); + + static color_quad_u8 unpack_color4(uint16 packed_color4, bool scaled, uint alpha = 255U); + static void unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled); + + // subblock colors + static void get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx); + static bool get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx); + static void get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx); + + static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4) + { + if (color4) + { + dst.r = src.r | (src.r << 4); + dst.g = src.g | (src.g << 4); + dst.b = src.b | (src.b << 4); + } + else + { + dst.r = (src.r >> 2) | (src.r << 3); + dst.g = (src.g >> 2) | (src.g << 3); + dst.b = (src.b >> 2) | (src.b << 3); + } + dst.a = src.a; + } + }; + + CRNLIB_DEFINE_BITWISE_COPYABLE(etc1_block); + + // Returns false if the block is invalid (it will still be unpacked with clamping). + CRN_EXPORT bool unpack_etc1(const etc1_block& block, color_quad_u8* pDst, bool preserve_alpha = false); + + enum crn_etc_quality + { + cCRNETCQualityFast, + cCRNETCQualityMedium, + cCRNETCQualitySlow, + + cCRNETCQualityTotal, + + cCRNETCQualityForceDWORD = 0xFFFFFFFF + }; + + struct CRN_EXPORT crn_etc1_pack_params + { + crn_etc_quality m_quality; + bool m_perceptual; + bool m_dithering; + + inline crn_etc1_pack_params() + { + clear(); + } + + void clear() + { + m_quality = cCRNETCQualitySlow; + m_perceptual = true; + m_dithering = false; + } + }; + + struct CRN_EXPORT etc1_solution_coordinates + { + inline etc1_solution_coordinates() : + m_unscaled_color(0, 0, 0, 0), + m_inten_table(0), + m_color4(false) + { + } + + inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) : + m_unscaled_color(r, g, b, 255), + m_inten_table(inten_table), + m_color4(color4) + { + } + + inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) : + m_unscaled_color(c), + m_inten_table(inten_table), + m_color4(color4) + { + } + + inline etc1_solution_coordinates(const etc1_solution_coordinates& other) + { + *this = other; + } + + inline etc1_solution_coordinates& operator=(const etc1_solution_coordinates& rhs) + { + m_unscaled_color = rhs.m_unscaled_color; + m_inten_table = rhs.m_inten_table; + m_color4 = rhs.m_color4; + return *this; + } + + inline void clear() + { + m_unscaled_color.clear(); + m_inten_table = 0; + m_color4 = false; + } + + inline color_quad_u8 get_scaled_color() const + { + int br, bg, bb; + if (m_color4) + { + br = m_unscaled_color.r | (m_unscaled_color.r << 4); + bg = m_unscaled_color.g | (m_unscaled_color.g << 4); + bb = m_unscaled_color.b | (m_unscaled_color.b << 4); + } + else + { + br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); + bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); + bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); + } + return color_quad_u8(br, bg, bb); + } + + inline void get_block_colors(color_quad_u8* pBlock_colors) + { + int br, bg, bb; + if (m_color4) + { + br = m_unscaled_color.r | (m_unscaled_color.r << 4); + bg = m_unscaled_color.g | (m_unscaled_color.g << 4); + bb = m_unscaled_color.b | (m_unscaled_color.b << 4); + } + else + { + br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); + bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); + bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); + } + const int* pInten_table = g_etc1_inten_tables[m_inten_table]; + pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0]); + pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1]); + pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2]); + pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3]); + } + + color_quad_u8 m_unscaled_color; + uint m_inten_table; + bool m_color4; + }; + + class CRN_EXPORT etc1_optimizer + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(etc1_optimizer); + + public: + etc1_optimizer() + { + clear(); + } + + void clear() + { + m_pParams = nullptr; + m_pResult = nullptr; + m_pSorted_luma = nullptr; + m_pSorted_luma_indices = nullptr; + } + + struct params : crn_etc1_pack_params + { + params() + { + clear(); + } + + params(const crn_etc1_pack_params& base_params) : + crn_etc1_pack_params(base_params) + { + clear_optimizer_params(); + } + + void clear() + { + crn_etc1_pack_params::clear(); + clear_optimizer_params(); + } + + void clear_optimizer_params() + { + m_num_src_pixels = 0; + m_pSrc_pixels = 0; + + m_use_color4 = false; + static const int s_default_scan_delta[] = { 0 }; + m_pScan_deltas = s_default_scan_delta; + m_scan_delta_size = 1; + + m_base_color5.clear(); + m_constrain_against_base_color5 = false; + } + + uint m_num_src_pixels; + const color_quad_u8* m_pSrc_pixels; + + bool m_use_color4; + const int* m_pScan_deltas; + uint m_scan_delta_size; + + color_quad_u8 m_base_color5; + bool m_constrain_against_base_color5; + }; + + struct results + { + uint64 m_error; + color_quad_u8 m_block_color_unscaled; + uint m_block_inten_table; + uint m_n; + uint8* m_pSelectors; + bool m_block_color4; + + inline results& operator=(const results& rhs) + { + m_block_color_unscaled = rhs.m_block_color_unscaled; + m_block_color4 = rhs.m_block_color4; + m_block_inten_table = rhs.m_block_inten_table; + m_error = rhs.m_error; + CRNLIB_ASSERT(m_n == rhs.m_n); + memcpy(m_pSelectors, rhs.m_pSelectors, rhs.m_n); + return *this; + } + }; + + void init(const params& params, results& result); + bool compute(); + + private: + struct potential_solution + { + potential_solution() : + m_coords(), m_error(cUINT64_MAX), m_valid(false) + { + } + + etc1_solution_coordinates m_coords; + crnlib::vector m_selectors; + uint64 m_error; + bool m_valid; + + void clear() + { + m_coords.clear(); + m_selectors.resize(0); + m_error = cUINT64_MAX; + m_valid = false; + } + + bool are_selectors_all_equal() const + { + if (m_selectors.empty()) + { + return false; + } + const uint s = m_selectors[0]; + for (uint i = 1; i < m_selectors.size(); i++) + { + if (m_selectors[i] != s) + { + return false; + } + } + return true; + } + }; + + const params* m_pParams; + results* m_pResult; + + int m_limit; + + vec3F m_avg_color; + int m_br, m_bg, m_bb; + crnlib::vector m_luma; + crnlib::vector m_sorted_luma[2]; + const uint32* m_pSorted_luma_indices; + uint32* m_pSorted_luma; + + crnlib::vector m_selectors; + crnlib::vector m_best_selectors; + + potential_solution m_best_solution; + potential_solution m_trial_solution; + crnlib::vector m_temp_selectors; + + bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); + bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); + }; + + struct CRN_EXPORT pack_etc1_block_context + { + etc1_optimizer m_optimizer; + }; + + CRN_EXPORT void pack_etc1_block_init(); + + CRN_EXPORT uint64 pack_etc1_block(etc1_block& block, const color_quad_u8* pSrc_pixels, crn_etc1_pack_params& pack_params, pack_etc1_block_context& context); + CRN_EXPORT uint64 pack_etc1s_block(etc1_block& block, const color_quad_u8* pSrc_pixels, crn_etc1_pack_params& pack_params); + +} // namespace crnlib diff --git a/crnlib/crn_file_utils.cpp b/crnlib/crn_file_utils.cpp index 708760e..569cda0 100644 --- a/crnlib/crn_file_utils.cpp +++ b/crnlib/crn_file_utils.cpp @@ -1,5 +1,25 @@ -// File: crn_file_utils.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "crn_file_utils.h" @@ -76,11 +96,13 @@ namespace crnlib bool file_utils::does_file_exist(const char* pFilename) { const DWORD fullAttributes = GetFileAttributesA(pFilename); - if (fullAttributes == INVALID_FILE_ATTRIBUTES) + if (fullAttributes == INVALID_FILE_ATTRIBUTES) { return false; +} - if (fullAttributes & FILE_ATTRIBUTE_DIRECTORY) + if (fullAttributes & FILE_ATTRIBUTE_DIRECTORY) { return false; +} return true; } diff --git a/crnlib/crn_file_utils.h b/crnlib/crn_file_utils.h index ca02b04..4a57c8a 100644 --- a/crnlib/crn_file_utils.h +++ b/crnlib/crn_file_utils.h @@ -1,5 +1,26 @@ -// File: crn_file_utils.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_export.h" @@ -40,5 +61,5 @@ namespace crnlib static bool write_buf_to_file(const char* pPath, const void* pData, size_t data_size); - }; // struct file_utils -} // namespace crnlib + }; // struct file_utils +} // namespace crnlib diff --git a/crnlib/crn_find_files.cpp b/crnlib/crn_find_files.cpp index 7331297..676c3a1 100644 --- a/crnlib/crn_find_files.cpp +++ b/crnlib/crn_find_files.cpp @@ -1,5 +1,26 @@ -// File: crn_win32_find_files.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_find_files.h" #include "crn_file_utils.h" @@ -104,8 +125,7 @@ namespace crnlib } } - } - while (FindNextFileA(handle, &find_data) != 0); + } while (FindNextFileA(handle, &find_data) != 0); HRESULT hres = GetLastError(); @@ -185,8 +205,7 @@ namespace crnlib } } - } - while (FindNextFileA(handle, &find_data) != 0); + } while (FindNextFileA(handle, &find_data) != 0); HRESULT hres = GetLastError(); @@ -344,4 +363,4 @@ namespace crnlib #error Unimplemented #endif -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_find_files.h b/crnlib/crn_find_files.h index 4cab3c6..9efe637 100644 --- a/crnlib/crn_find_files.h +++ b/crnlib/crn_find_files.h @@ -1,5 +1,25 @@ -// File: crn_win32_find_files.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -12,7 +32,7 @@ namespace crnlib public: struct file_desc { - inline file_desc(): + inline file_desc() : m_is_dir(false) { } @@ -42,7 +62,7 @@ namespace crnlib inline find_files() { - m_last_error = 0; // S_OK; + m_last_error = 0; // S_OK; } enum flags @@ -76,6 +96,5 @@ namespace crnlib bool find_internal(const char* pBasepath, const char* pRelpath, const char* pFilespec, uint flags, int level); - }; // class find_files - -} // namespace crnlib + }; // class find_files +} // namespace crnlib diff --git a/crnlib/crn_freeimage_image_utils.h b/crnlib/crn_freeimage_image_utils.h index 853c7e3..6e9de9d 100644 --- a/crnlib/crn_freeimage_image_utils.h +++ b/crnlib/crn_freeimage_image_utils.h @@ -1,5 +1,26 @@ -// File: crn_freeimage_image_utils.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + // Note: This header file requires FreeImage/FreeImagePlus. #include "crn_image_utils.h" diff --git a/crnlib/crn_hash.cpp b/crnlib/crn_hash.cpp index 64db4b3..f7b2a76 100644 --- a/crnlib/crn_hash.cpp +++ b/crnlib/crn_hash.cpp @@ -1,4 +1,26 @@ -// File: crn_hash.cpp +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + // See Paul Hsieh's page at: http://www.azillionmonkeys.com/qed/hash.html // Also see http://www.concentric.net/~Ttwang/tech/inthash.htm, // http://burtleburtle.net/bob/hash/integer.html @@ -71,4 +93,4 @@ namespace crnlib return hash; } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_hash.h b/crnlib/crn_hash.h index 0fc7b40..a23940c 100644 --- a/crnlib/crn_hash.h +++ b/crnlib/crn_hash.h @@ -1,5 +1,25 @@ -// File: crn_hash.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -33,4 +53,4 @@ namespace crnlib a ^= (a >> 15); return a; } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_hash_map.cpp b/crnlib/crn_hash_map.cpp index 00fe25a..38fd2e8 100644 --- a/crnlib/crn_hash_map.cpp +++ b/crnlib/crn_hash_map.cpp @@ -1,10 +1,32 @@ -// File: crn_hash_map.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_hash_map.h" #include "crn_rand.h" -namespace crnlib { +namespace crnlib +{ #if 0 class counted_obj { @@ -151,4 +173,4 @@ namespace crnlib { } #endif -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_hash_map.h b/crnlib/crn_hash_map.h index 5bb1b16..086391c 100644 --- a/crnlib/crn_hash_map.h +++ b/crnlib/crn_hash_map.h @@ -1,765 +1,1063 @@ -// File: crn_hash_map.h -// See Copyright Notice and license at the end of inc/crnlib.h -// +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + // Notes: // stl-like hash map/hash set, with predictable performance across platforms/compilers/C run times/etc. // Hash function ref: http://www.brpreiss.com/books/opus4/html/page215.html // Compared for performance against VC9's std::hash_map. // Linear probing, auto resizes on ~50% load factor. // Uses Knuth's multiplicative method (Fibonacci hashing). + #pragma once + #include "crn_sparse_array.h" #include "crn_sparse_bit_array.h" #include "crn_hash.h" -namespace crnlib { -template -struct hasher { - inline size_t operator()(const T& key) const { return static_cast(key); } -}; - -template -struct bit_hasher { - inline size_t operator()(const T& key) const { return static_cast(fast_hash(&key, sizeof(key))); } -}; - -template -struct equal_to { - inline bool operator()(const T& a, const T& b) const { return a == b; } -}; - -// Important: The Hasher and Equals objects must be bitwise movable! -template , typename Equals = equal_to > -class hash_map { - friend class iterator; - friend class const_iterator; - - enum state { - cStateInvalid = 0, - cStateValid = 1 - }; - - enum { - cMinHashSize = 4U - }; - - public: - typedef hash_map hash_map_type; - typedef std::pair value_type; - typedef Key key_type; - typedef Value referent_type; - typedef Hasher hasher_type; - typedef Equals equals_type; - - hash_map() - : m_hash_shift(32), m_num_valid(0), m_grow_threshold(0) { - } - - hash_map(const hash_map& other) - : m_values(other.m_values), - m_hash_shift(other.m_hash_shift), - m_hasher(other.m_hasher), - m_equals(other.m_equals), - m_num_valid(other.m_num_valid), - m_grow_threshold(other.m_grow_threshold) { - } - - hash_map& operator=(const hash_map& other) { - if (this == &other) - return *this; - - clear(); - - m_values = other.m_values; - m_hash_shift = other.m_hash_shift; - m_num_valid = other.m_num_valid; - m_grow_threshold = other.m_grow_threshold; - m_hasher = other.m_hasher; - m_equals = other.m_equals; - - return *this; - } - - inline ~hash_map() { - clear(); - } - - const Equals& get_equals() const { return m_equals; } - Equals& get_equals() { return m_equals; } - - void set_equals(const Equals& equals) { m_equals = equals; } - - const Hasher& get_hasher() const { return m_hasher; } - Hasher& get_hasher() { return m_hasher; } - - void set_hasher(const Hasher& hasher) { m_hasher = hasher; } - - inline void clear() { - if (!m_values.empty()) { - if (CRNLIB_HAS_DESTRUCTOR(Key) || CRNLIB_HAS_DESTRUCTOR(Value)) { - node* p = &get_node(0); - node* p_end = p + m_values.size(); - - uint num_remaining = m_num_valid; - while (p != p_end) { - if (p->state) { - destruct_value_type(p); - num_remaining--; - if (!num_remaining) - break; - } - - p++; - } - } - - m_values.clear_no_destruction(); - - m_hash_shift = 32; - m_num_valid = 0; - m_grow_threshold = 0; - } - } - - inline void reset() { - if (!m_num_valid) - return; - - if (CRNLIB_HAS_DESTRUCTOR(Key) || CRNLIB_HAS_DESTRUCTOR(Value)) { - node* p = &get_node(0); - node* p_end = p + m_values.size(); - - uint num_remaining = m_num_valid; - while (p != p_end) { - if (p->state) { - destruct_value_type(p); - p->state = cStateInvalid; - - num_remaining--; - if (!num_remaining) - break; +namespace crnlib +{ + template + struct hasher + { + inline size_t operator()(const T& key) const + { + return static_cast(key); } - - p++; - } - } else if (sizeof(node) <= 32) { - memset(&m_values[0], 0, m_values.size_in_bytes()); - } else { - node* p = &get_node(0); - node* p_end = p + m_values.size(); - - uint num_remaining = m_num_valid; - while (p != p_end) { - if (p->state) { - p->state = cStateInvalid; - - num_remaining--; - if (!num_remaining) - break; + }; + + template + struct bit_hasher + { + inline size_t operator()(const T& key) const + { + return static_cast(fast_hash(&key, sizeof(key))); + } + }; + + template + struct equal_to + { + inline bool operator()(const T& a, const T& b) const + { + return a == b; + } + }; + + // Important: The Hasher and Equals objects must be bitwise movable! + template, typename Equals = equal_to> + class hash_map + { + friend class iterator; + friend class const_iterator; + + enum state + { + cStateInvalid = 0, + cStateValid = 1 + }; + + enum + { + cMinHashSize = 4U + }; + + public: + typedef hash_map hash_map_type; + typedef std::pair value_type; + typedef Key key_type; + typedef Value referent_type; + typedef Hasher hasher_type; + typedef Equals equals_type; + + hash_map() : + m_hash_shift(32), m_num_valid(0), m_grow_threshold(0) + { } - p++; - } - } - - m_num_valid = 0; - } - - inline uint size() { - return m_num_valid; - } - - inline uint get_table_size() { - return m_values.size(); - } - - inline bool empty() { - return !m_num_valid; - } - - inline void reserve(uint new_capacity) { - uint new_hash_size = math::maximum(1U, new_capacity); - - new_hash_size = new_hash_size * 2U; - - if (!math::is_power_of_2(new_hash_size)) - new_hash_size = math::next_pow2(new_hash_size); - - new_hash_size = math::maximum(cMinHashSize, new_hash_size); - - if (new_hash_size > m_values.size()) - rehash(new_hash_size); - } - - class const_iterator; - - class iterator { - friend class hash_map; - friend class hash_map::const_iterator; - - public: - inline iterator() - : m_pTable(nullptr), m_index(0) {} - inline iterator(hash_map_type& table, uint index) - : m_pTable(&table), m_index(index) {} - inline iterator(const iterator& other) - : m_pTable(other.m_pTable), m_index(other.m_index) {} - - inline iterator& operator=(const iterator& other) { - m_pTable = other.m_pTable; - m_index = other.m_index; - return *this; - } - - // post-increment - inline iterator operator++(int) { - iterator result(*this); - ++*this; - return result; - } - - // pre-increment - inline iterator& operator++() { - probe(); - return *this; - } - - inline value_type& operator*() const { return *get_cur(); } - inline value_type* operator->() const { return get_cur(); } - - inline bool operator==(const iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } - inline bool operator!=(const iterator& b) const { return !(*this == b); } - inline bool operator==(const const_iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } - inline bool operator!=(const const_iterator& b) const { return !(*this == b); } - - private: - hash_map_type* m_pTable; - uint m_index; - - inline value_type* get_cur() const { - CRNLIB_ASSERT(m_pTable && (m_index < m_pTable->m_values.size())); - CRNLIB_ASSERT(m_pTable->get_node_state(m_index) == cStateValid); - - return &m_pTable->get_node(m_index); - } - - inline void probe() { - CRNLIB_ASSERT(m_pTable); - m_index = m_pTable->find_next(m_index); - } - }; - - class const_iterator { - friend class hash_map; - friend class hash_map::iterator; - - public: - inline const_iterator() - : m_pTable(nullptr), m_index(0) {} - inline const_iterator(const hash_map_type& table, uint index) - : m_pTable(&table), m_index(index) {} - inline const_iterator(const iterator& other) - : m_pTable(other.m_pTable), m_index(other.m_index) {} - inline const_iterator(const const_iterator& other) - : m_pTable(other.m_pTable), m_index(other.m_index) {} - - inline const_iterator& operator=(const const_iterator& other) { - m_pTable = other.m_pTable; - m_index = other.m_index; - return *this; - } - - inline const_iterator& operator=(const iterator& other) { - m_pTable = other.m_pTable; - m_index = other.m_index; - return *this; - } - - // post-increment - inline const_iterator operator++(int) { - const_iterator result(*this); - ++*this; - return result; - } - - // pre-increment - inline const_iterator& operator++() { - probe(); - return *this; - } - - inline const value_type& operator*() const { return *get_cur(); } - inline const value_type* operator->() const { return get_cur(); } - - inline bool operator==(const const_iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } - inline bool operator!=(const const_iterator& b) const { return !(*this == b); } - inline bool operator==(const iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } - inline bool operator!=(const iterator& b) const { return !(*this == b); } - - private: - const hash_map_type* m_pTable; - uint m_index; - - inline const value_type* get_cur() const { - CRNLIB_ASSERT(m_pTable && (m_index < m_pTable->m_values.size())); - CRNLIB_ASSERT(m_pTable->get_node_state(m_index) == cStateValid); - - return &m_pTable->get_node(m_index); - } - - inline void probe() { - CRNLIB_ASSERT(m_pTable); - m_index = m_pTable->find_next(m_index); - } - }; - - inline const_iterator begin() const { - if (!m_num_valid) - return end(); + hash_map(const hash_map& other) : + m_values(other.m_values), + m_hash_shift(other.m_hash_shift), + m_hasher(other.m_hasher), + m_equals(other.m_equals), + m_num_valid(other.m_num_valid), + m_grow_threshold(other.m_grow_threshold) + { + } - return const_iterator(*this, find_next(-1)); - } + hash_map& operator=(const hash_map& other) + { + if (this == &other) + { + return *this; + } - inline const_iterator end() const { - return const_iterator(*this, m_values.size()); - } + clear(); - inline iterator begin() { - if (!m_num_valid) - return end(); + m_values = other.m_values; + m_hash_shift = other.m_hash_shift; + m_num_valid = other.m_num_valid; + m_grow_threshold = other.m_grow_threshold; + m_hasher = other.m_hasher; + m_equals = other.m_equals; - return iterator(*this, find_next(-1)); - } + return *this; + } - inline iterator end() { - return iterator(*this, m_values.size()); - } + inline ~hash_map() + { + clear(); + } - // insert_result.first will always point to inserted key/value (or the already existing key/value). - // insert_resutt.second will be true if a new key/value was inserted, or false if the key already existed (in which case first will point to the already existing value). - typedef std::pair insert_result; + const Equals& get_equals() const + { + return m_equals; + } + Equals& get_equals() + { + return m_equals; + } - inline insert_result insert(const Key& k, const Value& v = Value()) { - insert_result result; - if (!insert_no_grow(result, k, v)) { - grow(); + void set_equals(const Equals& equals) + { + m_equals = equals; + } - // This must succeed. - if (!insert_no_grow(result, k, v)) { - CRNLIB_FAIL("insert() failed"); - } - } + const Hasher& get_hasher() const + { + return m_hasher; + } + Hasher& get_hasher() + { + return m_hasher; + } - return result; - } + void set_hasher(const Hasher& hasher) + { + m_hasher = hasher; + } - inline insert_result insert(const value_type& v) { - return insert(v.first, v.second); - } + inline void clear() + { + if (!m_values.empty()) + { + if (CRNLIB_HAS_DESTRUCTOR(Key) || CRNLIB_HAS_DESTRUCTOR(Value)) + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + uint num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + destruct_value_type(p); + num_remaining--; + if (!num_remaining) + { + break; + } + } + + p++; + } + } + + m_values.clear_no_destruction(); + + m_hash_shift = 32; + m_num_valid = 0; + m_grow_threshold = 0; + } + } - inline const_iterator find(const Key& k) const { - return const_iterator(*this, find_index(k)); - } + inline void reset() + { + if (!m_num_valid) + { + return; + } + + if (CRNLIB_HAS_DESTRUCTOR(Key) || CRNLIB_HAS_DESTRUCTOR(Value)) + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + uint num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + destruct_value_type(p); + p->state = cStateInvalid; + + num_remaining--; + if (!num_remaining) + { + break; + } + } + + p++; + } + } + else if (sizeof(node) <= 32) + { + memset(&m_values[0], 0, m_values.size_in_bytes()); + } + else + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + uint num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + p->state = cStateInvalid; + + num_remaining--; + if (!num_remaining) + { + break; + } + } + + p++; + } + } + + m_num_valid = 0; + } - inline iterator find(const Key& k) { - return iterator(*this, find_index(k)); - } + inline uint size() + { + return m_num_valid; + } - inline bool erase(const Key& k) { - int i = find_index(k); + inline uint get_table_size() + { + return m_values.size(); + } - if (i >= static_cast(m_values.size())) - return false; + inline bool empty() + { + return !m_num_valid; + } - node* pDst = &get_node(i); - destruct_value_type(pDst); - pDst->state = cStateInvalid; + inline void reserve(uint new_capacity) + { + uint new_hash_size = math::maximum(1U, new_capacity); - m_num_valid--; + new_hash_size = new_hash_size * 2U; - for (;;) { - int r, j = i; + if (!math::is_power_of_2(new_hash_size)) + { + new_hash_size = math::next_pow2(new_hash_size); + } - node* pSrc = pDst; + new_hash_size = math::maximum(cMinHashSize, new_hash_size); - do { - if (!i) { - i = m_values.size() - 1; - pSrc = &get_node(i); - } else { - i--; - pSrc--; + if (new_hash_size > m_values.size()) + { + rehash(new_hash_size); + } } - if (!pSrc->state) - return true; - - r = hash_key(pSrc->first); - - } while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r)); - - move_node(pDst, pSrc); - - pDst = pSrc; - } - } - - inline void swap(hash_map_type& other) { - m_values.swap(other.m_values); - std::swap(m_hash_shift, other.m_hash_shift); - std::swap(m_num_valid, other.m_num_valid); - std::swap(m_grow_threshold, other.m_grow_threshold); - std::swap(m_hasher, other.m_hasher); - std::swap(m_equals, other.m_equals); - } - - private: - struct node : public value_type { - uint8 state; - }; - - static inline void construct_value_type(value_type* pDst, const Key& k, const Value& v) { - if (CRNLIB_IS_BITWISE_COPYABLE(Key)) - memcpy(&pDst->first, &k, sizeof(Key)); - else - scalar_type::construct(&pDst->first, k); - - if (CRNLIB_IS_BITWISE_COPYABLE(Value)) - memcpy(&pDst->second, &v, sizeof(Value)); - else - scalar_type::construct(&pDst->second, v); - } - - static inline void construct_value_type(value_type* pDst, const value_type* pSrc) { - if ((CRNLIB_IS_BITWISE_COPYABLE(Key)) && (CRNLIB_IS_BITWISE_COPYABLE(Value))) { - memcpy(pDst, pSrc, sizeof(value_type)); - } else { - if (CRNLIB_IS_BITWISE_COPYABLE(Key)) - memcpy(&pDst->first, &pSrc->first, sizeof(Key)); - else - scalar_type::construct(&pDst->first, pSrc->first); - - if (CRNLIB_IS_BITWISE_COPYABLE(Value)) - memcpy(&pDst->second, &pSrc->second, sizeof(Value)); - else - scalar_type::construct(&pDst->second, pSrc->second); - } - } - - static inline void destruct_value_type(value_type* p) { - scalar_type::destruct(&p->first); - scalar_type::destruct(&p->second); - } - - // Moves *pSrc to *pDst efficiently. - // pDst should NOT be constructed on entry. - static inline void move_node(node* pDst, node* pSrc) { - CRNLIB_ASSERT(!pDst->state); - - if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(Key) && CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) { - memcpy(pDst, pSrc, sizeof(node)); - } else { - if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(Key)) - memcpy(&pDst->first, &pSrc->first, sizeof(Key)); - else { - scalar_type::construct(&pDst->first, pSrc->first); - scalar_type::destruct(&pSrc->first); - } - - if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) - memcpy(&pDst->second, &pSrc->second, sizeof(Value)); - else { - scalar_type::construct(&pDst->second, pSrc->second); - scalar_type::destruct(&pSrc->second); - } - - pDst->state = cStateValid; - } + class const_iterator; + + class iterator + { + friend class hash_map; + friend class hash_map::const_iterator; + + public: + inline iterator() : + m_pTable(nullptr), m_index(0) + { + } + inline iterator(hash_map_type& table, uint index) : + m_pTable(&table), m_index(index) + { + } + inline iterator(const iterator& other) : + m_pTable(other.m_pTable), m_index(other.m_index) + { + } + + inline iterator& operator=(const iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + // post-increment + inline iterator operator++(int) + { + iterator result(*this); + ++*this; + return result; + } + + // pre-increment + inline iterator& operator++() + { + probe(); + return *this; + } + + inline value_type& operator*() const + { + return *get_cur(); + } + inline value_type* operator->() const + { + return get_cur(); + } + + inline bool operator==(const iterator& b) const + { + return (m_pTable == b.m_pTable) && (m_index == b.m_index); + } + inline bool operator!=(const iterator& b) const + { + return *this != b; + } + inline bool operator==(const const_iterator& b) const + { + return (m_pTable == b.m_pTable) && (m_index == b.m_index); + } + inline bool operator!=(const const_iterator& b) const + { + return *this != b; + } + + private: + hash_map_type* m_pTable; + uint m_index; + + inline value_type* get_cur() const + { + CRNLIB_ASSERT(m_pTable && (m_index < m_pTable->m_values.size())); + CRNLIB_ASSERT(m_pTable->get_node_state(m_index) == cStateValid); + + return &m_pTable->get_node(m_index); + } + + inline void probe() + { + CRNLIB_ASSERT(m_pTable); + m_index = m_pTable->find_next(m_index); + } + }; + + class const_iterator + { + friend class hash_map; + friend class hash_map::iterator; + + public: + inline const_iterator() : + m_pTable(nullptr), m_index(0) + { + } + inline const_iterator(const hash_map_type& table, uint index) : + m_pTable(&table), m_index(index) + { + } + inline const_iterator(const iterator& other) : + m_pTable(other.m_pTable), m_index(other.m_index) + { + } + inline const_iterator(const const_iterator& other) : + m_pTable(other.m_pTable), m_index(other.m_index) + { + } + + inline const_iterator& operator=(const const_iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + inline const_iterator& operator=(const iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + // post-increment + inline const_iterator operator++(int) + { + const_iterator result(*this); + ++*this; + return result; + } + + // pre-increment + inline const_iterator& operator++() + { + probe(); + return *this; + } + + inline const value_type& operator*() const + { + return *get_cur(); + } + inline const value_type* operator->() const + { + return get_cur(); + } + + inline bool operator==(const const_iterator& b) const + { + return (m_pTable == b.m_pTable) && (m_index == b.m_index); + } + inline bool operator!=(const const_iterator& b) const + { + return *this != b; + } + inline bool operator==(const iterator& b) const + { + return (m_pTable == b.m_pTable) && (m_index == b.m_index); + } + inline bool operator!=(const iterator& b) const + { + return *this != b; + } + + private: + const hash_map_type* m_pTable; + uint m_index; + + inline const value_type* get_cur() const + { + CRNLIB_ASSERT(m_pTable && (m_index < m_pTable->m_values.size())); + CRNLIB_ASSERT(m_pTable->get_node_state(m_index) == cStateValid); + + return &m_pTable->get_node(m_index); + } + + inline void probe() + { + CRNLIB_ASSERT(m_pTable); + m_index = m_pTable->find_next(m_index); + } + }; + + inline const_iterator begin() const + { + if (!m_num_valid) + { + return end(); + } + + return const_iterator(*this, find_next(-1)); + } - pSrc->state = cStateInvalid; - } + inline const_iterator end() const + { + return const_iterator(*this, m_values.size()); + } - struct raw_node { - inline raw_node() { - node* p = reinterpret_cast(this); - p->state = cStateInvalid; - } + inline iterator begin() + { + if (!m_num_valid) + { + return end(); + } - inline ~raw_node() { - node* p = reinterpret_cast(this); - if (p->state) - hash_map_type::destruct_value_type(p); - } + return iterator(*this, find_next(-1)); + } - inline raw_node(const raw_node& other) { - node* pDst = reinterpret_cast(this); - const node* pSrc = reinterpret_cast(&other); + inline iterator end() + { + return iterator(*this, m_values.size()); + } - if (pSrc->state) { - hash_map_type::construct_value_type(pDst, pSrc); - pDst->state = cStateValid; - } else - pDst->state = cStateInvalid; - } + // insert_result.first will always point to inserted key/value (or the already existing key/value). + // insert_resutt.second will be true if a new key/value was inserted, or false if the key already existed (in which case first will point to the already existing value). + typedef std::pair insert_result; + + inline insert_result insert(const Key& k, const Value& v = Value()) + { + insert_result result; + if (!insert_no_grow(result, k, v)) + { + grow(); + + // This must succeed. + if (!insert_no_grow(result, k, v)) + { + CRNLIB_FAIL("insert() failed"); + } + } + + return result; + } - inline raw_node& operator=(const raw_node& rhs) { - if (this == &rhs) - return *this; + inline insert_result insert(const value_type& v) + { + return insert(v.first, v.second); + } - node* pDst = reinterpret_cast(this); - const node* pSrc = reinterpret_cast(&rhs); + inline const_iterator find(const Key& k) const + { + return const_iterator(*this, find_index(k)); + } - if (pSrc->state) { - if (pDst->state) { - pDst->first = pSrc->first; - pDst->second = pSrc->second; - } else { - hash_map_type::construct_value_type(pDst, pSrc); - pDst->state = cStateValid; + inline iterator find(const Key& k) + { + return iterator(*this, find_index(k)); } - } else if (pDst->state) { - hash_map_type::destruct_value_type(pDst); - pDst->state = cStateInvalid; - } - return *this; - } + inline bool erase(const Key& k) + { + int i = find_index(k); - uint8 m_bits[sizeof(node)]; - }; + if (i >= static_cast(m_values.size())) + { + return false; + } - typedef crnlib::vector node_vector; + node* pDst = &get_node(i); + destruct_value_type(pDst); + pDst->state = cStateInvalid; - node_vector m_values; - uint m_hash_shift; + m_num_valid--; - Hasher m_hasher; - Equals m_equals; + for (;;) + { + int r, j = i; - uint m_num_valid; + node* pSrc = pDst; - uint m_grow_threshold; + do + { + if (!i) + { + i = m_values.size() - 1; + pSrc = &get_node(i); + } + else + { + i--; + pSrc--; + } - inline int hash_key(const Key& k) const { - CRNLIB_ASSERT((1U << (32U - m_hash_shift)) == m_values.size()); + if (!pSrc->state) + { + return true; + } - uint hash = static_cast(m_hasher(k)); + r = hash_key(pSrc->first); - // Fibonacci hashing - hash = (2654435769U * hash) >> m_hash_shift; + } while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r)); - CRNLIB_ASSERT(hash < m_values.size()); - return hash; - } + move_node(pDst, pSrc); - inline const node& get_node(uint index) const { - return *reinterpret_cast(&m_values[index]); - } + pDst = pSrc; + } + } - inline node& get_node(uint index) { - return *reinterpret_cast(&m_values[index]); - } + inline void swap(hash_map_type& other) + { + m_values.swap(other.m_values); + std::swap(m_hash_shift, other.m_hash_shift); + std::swap(m_num_valid, other.m_num_valid); + std::swap(m_grow_threshold, other.m_grow_threshold); + std::swap(m_hasher, other.m_hasher); + std::swap(m_equals, other.m_equals); + } - inline state get_node_state(uint index) const { - return static_cast(get_node(index).state); - } + private: + struct node : public value_type + { + uint8 state; + }; + + static inline void construct_value_type(value_type* pDst, const Key& k, const Value& v) + { + if (CRNLIB_IS_BITWISE_COPYABLE(Key)) + { + memcpy(&pDst->first, &k, sizeof(Key)); + } + else + { + scalar_type::construct(&pDst->first, k); + } + + if (CRNLIB_IS_BITWISE_COPYABLE(Value)) + { + memcpy(&pDst->second, &v, sizeof(Value)); + } + else + { + scalar_type::construct(&pDst->second, v); + } + } - inline void set_node_state(uint index, bool valid) { - get_node(index).state = valid; - } + static inline void construct_value_type(value_type* pDst, const value_type* pSrc) + { + if ((CRNLIB_IS_BITWISE_COPYABLE(Key)) && (CRNLIB_IS_BITWISE_COPYABLE(Value))) + { + memcpy(pDst, pSrc, sizeof(value_type)); + } + else + { + if (CRNLIB_IS_BITWISE_COPYABLE(Key)) + { + memcpy(&pDst->first, &pSrc->first, sizeof(Key)); + } + else + { + scalar_type::construct(&pDst->first, pSrc->first); + } + + if (CRNLIB_IS_BITWISE_COPYABLE(Value)) + { + memcpy(&pDst->second, &pSrc->second, sizeof(Value)); + } + else + { + scalar_type::construct(&pDst->second, pSrc->second); + } + } + } - inline void grow() { - rehash(math::maximum(cMinHashSize, m_values.size() * 2U)); - } + static inline void destruct_value_type(value_type* p) + { + scalar_type::destruct(&p->first); + scalar_type::destruct(&p->second); + } - inline void rehash(uint new_hash_size) { - CRNLIB_ASSERT(new_hash_size >= m_num_valid); - CRNLIB_ASSERT(math::is_power_of_2(new_hash_size)); + // Moves *pSrc to *pDst efficiently. + // pDst should NOT be constructed on entry. + static inline void move_node(node* pDst, node* pSrc) + { + CRNLIB_ASSERT(!pDst->state); + + if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(Key) && CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) + { + memcpy(pDst, pSrc, sizeof(node)); + } + else + { + if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(Key)) + { + memcpy(&pDst->first, &pSrc->first, sizeof(Key)); + } + else + { + scalar_type::construct(&pDst->first, pSrc->first); + scalar_type::destruct(&pSrc->first); + } + + if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) + { + memcpy(&pDst->second, &pSrc->second, sizeof(Value)); + } + else + { + scalar_type::construct(&pDst->second, pSrc->second); + scalar_type::destruct(&pSrc->second); + } + + pDst->state = cStateValid; + } + + pSrc->state = cStateInvalid; + } - if ((new_hash_size < m_num_valid) || (new_hash_size == m_values.size())) - return; + struct raw_node + { + inline raw_node() + { + node* p = reinterpret_cast(this); + p->state = cStateInvalid; + } + + inline ~raw_node() + { + node* p = reinterpret_cast(this); + if (p->state) + { + hash_map_type::destruct_value_type(p); + } + } + + inline raw_node(const raw_node& other) + { + node* pDst = reinterpret_cast(this); + const node* pSrc = reinterpret_cast(&other); + + if (pSrc->state) + { + hash_map_type::construct_value_type(pDst, pSrc); + pDst->state = cStateValid; + } + else + { + pDst->state = cStateInvalid; + } + } + + inline raw_node& operator=(const raw_node& rhs) + { + if (this == &rhs) + { + return *this; + } + + node* pDst = reinterpret_cast(this); + const node* pSrc = reinterpret_cast(&rhs); + + if (pSrc->state) + { + if (pDst->state) + { + pDst->first = pSrc->first; + pDst->second = pSrc->second; + } + else + { + hash_map_type::construct_value_type(pDst, pSrc); + pDst->state = cStateValid; + } + } + else if (pDst->state) + { + hash_map_type::destruct_value_type(pDst); + pDst->state = cStateInvalid; + } + + return *this; + } + + uint8 m_bits[sizeof(node)]; + }; + + typedef crnlib::vector node_vector; + + node_vector m_values; + uint m_hash_shift; + + Hasher m_hasher; + Equals m_equals; + + uint m_num_valid; + + uint m_grow_threshold; + + inline int hash_key(const Key& k) const + { + CRNLIB_ASSERT((1U << (32U - m_hash_shift)) == m_values.size()); + + uint hash = static_cast(m_hasher(k)); + + // Fibonacci hashing + hash = (2654435769U * hash) >> m_hash_shift; + + CRNLIB_ASSERT(hash < m_values.size()); + return hash; + } - hash_map new_map; - new_map.m_values.resize(new_hash_size); - new_map.m_hash_shift = 32U - math::floor_log2i(new_hash_size); - CRNLIB_ASSERT(new_hash_size == (1U << (32U - new_map.m_hash_shift))); - new_map.m_grow_threshold = UINT_MAX; + inline const node& get_node(uint index) const + { + return *reinterpret_cast(&m_values[index]); + } - node* pNode = reinterpret_cast(m_values.begin()); - node* pNode_end = pNode + m_values.size(); + inline node& get_node(uint index) + { + return *reinterpret_cast(&m_values[index]); + } - while (pNode != pNode_end) { - if (pNode->state) { - new_map.move_into(pNode); + inline state get_node_state(uint index) const + { + return static_cast(get_node(index).state); + } - if (new_map.m_num_valid == m_num_valid) - break; - } + inline void set_node_state(uint index, bool valid) + { + get_node(index).state = valid; + } - pNode++; - } + inline void grow() + { + rehash(math::maximum(cMinHashSize, m_values.size() * 2U)); + } - new_map.m_grow_threshold = (new_hash_size + 1U) >> 1U; + inline void rehash(uint new_hash_size) + { + CRNLIB_ASSERT(new_hash_size >= m_num_valid); + CRNLIB_ASSERT(math::is_power_of_2(new_hash_size)); - m_values.clear_no_destruction(); - m_hash_shift = 32; + if ((new_hash_size < m_num_valid) || (new_hash_size == m_values.size())) + { + return; + } - swap(new_map); - } + hash_map new_map; + new_map.m_values.resize(new_hash_size); + new_map.m_hash_shift = 32U - math::floor_log2i(new_hash_size); + CRNLIB_ASSERT(new_hash_size == (1U << (32U - new_map.m_hash_shift))); + new_map.m_grow_threshold = UINT_MAX; - inline uint find_next(int index) const { - index++; + node* pNode = reinterpret_cast(m_values.begin()); + node* pNode_end = pNode + m_values.size(); - if (index >= static_cast(m_values.size())) - return index; + while (pNode != pNode_end) + { + if (pNode->state) + { + new_map.move_into(pNode); - const node* pNode = &get_node(index); + if (new_map.m_num_valid == m_num_valid) + { + break; + } + } - for (;;) { - if (pNode->state) - break; + pNode++; + } - if (++index >= static_cast(m_values.size())) - break; + new_map.m_grow_threshold = (new_hash_size + 1U) >> 1U; - pNode++; - } + m_values.clear_no_destruction(); + m_hash_shift = 32; - return index; - } + swap(new_map); + } - inline uint find_index(const Key& k) const { - if (m_num_valid) { - int index = hash_key(k); - const node* pNode = &get_node(index); + inline uint find_next(int index) const + { + index++; - if (pNode->state) { - if (m_equals(pNode->first, k)) - return index; + if (index >= static_cast(m_values.size())) + { + return index; + } - const int orig_index = index; + const node* pNode = &get_node(index); - for (;;) { - if (!index) { - index = m_values.size() - 1; - pNode = &get_node(index); - } else { - index--; - pNode--; - } + for (;;) + { + if (pNode->state) + { + break; + } - if (index == orig_index) - break; + if (++index >= static_cast(m_values.size())) + { + break; + } - if (!pNode->state) - break; + pNode++; + } - if (m_equals(pNode->first, k)) return index; } - } - } - - return m_values.size(); - } - - inline bool insert_no_grow(insert_result& result, const Key& k, const Value& v = Value()) { - if (!m_values.size()) - return false; - - int index = hash_key(k); - node* pNode = &get_node(index); - - if (pNode->state) { - if (m_equals(pNode->first, k)) { - result.first = iterator(*this, index); - result.second = false; - return true; - } - - const int orig_index = index; - - for (;;) { - if (!index) { - index = m_values.size() - 1; - pNode = &get_node(index); - } else { - index--; - pNode--; - } - - if (orig_index == index) - return false; - if (!pNode->state) - break; - - if (m_equals(pNode->first, k)) { - result.first = iterator(*this, index); - result.second = false; - return true; + inline uint find_index(const Key& k) const + { + if (m_num_valid) + { + int index = hash_key(k); + const node* pNode = &get_node(index); + + if (pNode->state) + { + if (m_equals(pNode->first, k)) + { + return index; + } + + const int orig_index = index; + + for (;;) + { + if (!index) + { + index = m_values.size() - 1; + pNode = &get_node(index); + } + else + { + index--; + pNode--; + } + + if (index == orig_index) + { + break; + } + + if (!pNode->state) + { + break; + } + + if (m_equals(pNode->first, k)) + { + return index; + } + } + } + } + + return m_values.size(); } - } - } - - if (m_num_valid >= m_grow_threshold) - return false; - - construct_value_type(pNode, k, v); - - pNode->state = cStateValid; - - m_num_valid++; - CRNLIB_ASSERT(m_num_valid <= m_values.size()); - - result.first = iterator(*this, index); - result.second = true; - - return true; - } - inline void move_into(node* pNode) { - int index = hash_key(pNode->first); - node* pDst_node = &get_node(index); - - if (pDst_node->state) { - const int orig_index = index; - - for (;;) { - if (!index) { - index = m_values.size() - 1; - pDst_node = &get_node(index); - } else { - index--; - pDst_node--; + inline bool insert_no_grow(insert_result& result, const Key& k, const Value& v = Value()) + { + if (!m_values.size()) + { + return false; + } + + int index = hash_key(k); + node* pNode = &get_node(index); + + if (pNode->state) + { + if (m_equals(pNode->first, k)) + { + result.first = iterator(*this, index); + result.second = false; + return true; + } + + const int orig_index = index; + + for (;;) + { + if (!index) + { + index = m_values.size() - 1; + pNode = &get_node(index); + } + else + { + index--; + pNode--; + } + + if (orig_index == index) + { + return false; + } + + if (!pNode->state) + { + break; + } + + if (m_equals(pNode->first, k)) + { + result.first = iterator(*this, index); + result.second = false; + return true; + } + } + } + + if (m_num_valid >= m_grow_threshold) + { + return false; + } + + construct_value_type(pNode, k, v); + + pNode->state = cStateValid; + + m_num_valid++; + CRNLIB_ASSERT(m_num_valid <= m_values.size()); + + result.first = iterator(*this, index); + result.second = true; + + return true; } - if (index == orig_index) { - CRNLIB_ASSERT(false); - return; + inline void move_into(node* pNode) + { + int index = hash_key(pNode->first); + node* pDst_node = &get_node(index); + + if (pDst_node->state) + { + const int orig_index = index; + + for (;;) + { + if (!index) + { + index = m_values.size() - 1; + pDst_node = &get_node(index); + } + else + { + index--; + pDst_node--; + } + + if (index == orig_index) + { + CRNLIB_ASSERT(false); + return; + } + + if (!pDst_node->state) + { + break; + } + } + } + + move_node(pDst_node, pNode); + + m_num_valid++; } - - if (!pDst_node->state) - break; - } + }; + + template + struct bitwise_movable> + { + enum + { + cFlag = true + }; + }; + + template + inline void swap(hash_map& a, hash_map& b) + { + a.swap(b); } - move_node(pDst_node, pNode); - - m_num_valid++; - } -}; - -template -struct bitwise_movable > { - enum { cFlag = true }; -}; - -template -inline void swap(hash_map& a, hash_map& b) { - a.swap(b); -} - -extern void hash_map_test(); - -} // namespace crnlib + extern void hash_map_test(); +} // namespace crnlib diff --git a/crnlib/crn_helpers.h b/crnlib/crn_helpers.h index 23cbb44..5de0658 100644 --- a/crnlib/crn_helpers.h +++ b/crnlib/crn_helpers.h @@ -1,42 +1,74 @@ -// File: crn_helpers.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once #define CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(c) \ - c(const c&); \ - c& operator=(const c&); -#define CRNLIB_NO_HEAP_ALLOC() \ - private: \ - static void* operator new(size_t); \ - static void* operator new[](size_t); + c(const c&); \ + c& operator=(const c&); +#define CRNLIB_NO_HEAP_ALLOC() \ +private: \ + static void* operator new(size_t); \ + static void* operator new[](size_t); namespace crnlib { namespace helpers { - template + template struct rel_ops { - friend bool operator!=(const T& x, const T& y) { return (!(x == y)); } - friend bool operator>(const T& x, const T& y) { return (y < x); } - friend bool operator<=(const T& x, const T& y) { return (!(y < x)); } - friend bool operator>=(const T& x, const T& y) { return (!(x < y)); } + friend bool operator!=(const T& x, const T& y) + { + return x != y; + } + friend bool operator>(const T& x, const T& y) + { + return (y < x); + } + friend bool operator<=(const T& x, const T& y) + { + return y >= x; + } + friend bool operator>=(const T& x, const T& y) + { + return x >= y; + } }; - template + template inline T* construct(T* p) { return new (static_cast(p)) T; } - template + template inline T* construct(T* p, const U& init) { return new (static_cast(p)) T(init); } - template + template inline void construct_array(T* p, uint n) { T* q = p + n; @@ -46,7 +78,7 @@ namespace crnlib } } - template + template inline void construct_array(T* p, uint n, const U& init) { T* q = p + n; @@ -56,15 +88,16 @@ namespace crnlib } } - template + template inline void destruct(T* p) { (void)p; p->~T(); } - template - inline void destruct_array(T* p, uint n) { + template + inline void destruct_array(T* p, uint n) + { T* q = p + n; for (; p != q; ++p) { @@ -72,5 +105,5 @@ namespace crnlib } } - } // namespace helpers -} // namespace crnlib + } // namespace helpers +} // namespace crnlib diff --git a/crnlib/crn_huffman_codes.cpp b/crnlib/crn_huffman_codes.cpp index cd48f8a..c86b305 100644 --- a/crnlib/crn_huffman_codes.cpp +++ b/crnlib/crn_huffman_codes.cpp @@ -1,5 +1,25 @@ -// File: crn_huffman_codes.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "crn_huffman_codes.h" @@ -134,7 +154,10 @@ namespace crnlib struct huffman_work_tables { - enum { cMaxInternalNodes = cHuffmanMaxSupportedSyms }; + enum + { + cMaxInternalNodes = cHuffmanMaxSupportedSyms + }; sym_freq syms0[cHuffmanMaxSupportedSyms + 1 + cMaxInternalNodes]; sym_freq syms1[cHuffmanMaxSupportedSyms + 1 + cMaxInternalNodes]; @@ -185,7 +208,8 @@ namespace crnlib for (next = 1; next < n - 1; next++) { /* select first item for a pairing */ - if (leaf >= n || A[root] < A[leaf]) { + if (leaf >= n || A[root] < A[leaf]) + { A[next] = A[root]; A[root++] = next; } @@ -361,8 +385,7 @@ namespace crnlib num_nodes_remaining--; - } - while (num_nodes_remaining > 1); + } while (num_nodes_remaining > 1); CRNLIB_ASSERT(next_lowest_sym == num_used_syms); CRNLIB_ASSERT((queue_end - queue_front) == 1); @@ -428,4 +451,4 @@ namespace crnlib #endif return true; } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_huffman_codes.h b/crnlib/crn_huffman_codes.h index 41964d8..42ef9ce 100644 --- a/crnlib/crn_huffman_codes.h +++ b/crnlib/crn_huffman_codes.h @@ -1,5 +1,25 @@ -// File: crn_huffman_codes.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -13,4 +33,4 @@ namespace crnlib CRN_EXPORT void free_generate_huffman_codes_tables(void* p); CRN_EXPORT bool generate_huffman_codes(void* pContext, uint num_syms, const uint16* pFreq, uint8* pCodesizes, uint& max_code_size, uint& total_freq_ret); -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_image.h b/crnlib/crn_image.h index e5ea98b..db3e78c 100644 --- a/crnlib/crn_image.h +++ b/crnlib/crn_image.h @@ -1,635 +1,883 @@ -// File: crn_image.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once + #include "crn_color.h" #include "crn_vec.h" #include "crn_pixel_format.h" #include "crn_rect.h" -namespace crnlib { -template -class image { - public: - typedef color_type color_t; - - typedef crnlib::vector pixel_buf_t; - - image() - : m_width(0), - m_height(0), - m_pitch(0), - m_total(0), - m_comp_flags(pixel_format_helpers::cDefaultCompFlags), - m_pPixels(nullptr) { - } - - // pitch is in PIXELS, not bytes. - image(uint width, uint height, uint pitch = UINT_MAX, const color_type& background = color_type::make_black(), uint flags = pixel_format_helpers::cDefaultCompFlags) - : m_comp_flags(flags) { - CRNLIB_ASSERT((width > 0) && (height > 0)); - if (pitch == UINT_MAX) - pitch = width; - - m_pixel_buf.resize(pitch * height); - - m_width = width; - m_height = height; - m_pitch = pitch; - m_total = m_pitch * m_height; - - m_pPixels = &m_pixel_buf.front(); - - set_all(background); - } - - // pitch is in PIXELS, not bytes. - image(color_type* pPixels, uint width, uint height, uint pitch = UINT_MAX, uint flags = pixel_format_helpers::cDefaultCompFlags) { - alias(pPixels, width, height, pitch, flags); - } - - image& operator=(const image& other) { - if (this == &other) - return *this; - - if (other.m_pixel_buf.empty()) { - // This doesn't look very safe - let's make a new instance. - //m_pixel_buf.clear(); - //m_pPixels = other.m_pPixels; - - const uint total_pixels = other.m_pitch * other.m_height; - if ((total_pixels) && (other.m_pPixels)) { - m_pixel_buf.resize(total_pixels); - m_pixel_buf.insert(0, other.m_pPixels, m_pixel_buf.size()); - m_pPixels = &m_pixel_buf.front(); - } else { - m_pixel_buf.clear(); - m_pPixels = nullptr; - } - } else { - m_pixel_buf = other.m_pixel_buf; - m_pPixels = &m_pixel_buf.front(); - } - - m_width = other.m_width; - m_height = other.m_height; - m_pitch = other.m_pitch; - m_total = other.m_total; - m_comp_flags = other.m_comp_flags; - - return *this; - } - - image(const image& other) - : m_width(0), m_height(0), m_pitch(0), m_total(0), m_comp_flags(pixel_format_helpers::cDefaultCompFlags), m_pPixels(nullptr) { - *this = other; - } - - // pitch is in PIXELS, not bytes. - void alias(color_type* pPixels, uint width, uint height, uint pitch = UINT_MAX, uint flags = pixel_format_helpers::cDefaultCompFlags) { - m_pixel_buf.clear(); - - m_pPixels = pPixels; - - m_width = width; - m_height = height; - m_pitch = (pitch == UINT_MAX) ? width : pitch; - m_total = m_pitch * m_height; - m_comp_flags = flags; - } - - // pitch is in PIXELS, not bytes. - bool grant_ownership(color_type* pPixels, uint width, uint height, uint pitch = UINT_MAX, uint flags = pixel_format_helpers::cDefaultCompFlags) { - if (pitch == UINT_MAX) - pitch = width; - - if ((!pPixels) || (!width) || (!height) || (pitch < width)) { - CRNLIB_ASSERT(0); - return false; - } +namespace crnlib +{ + template + class image + { + public: + typedef color_type color_t; + + typedef crnlib::vector pixel_buf_t; + + image() : + m_width(0), + m_height(0), + m_pitch(0), + m_total(0), + m_comp_flags(pixel_format_helpers::cDefaultCompFlags), + m_pPixels(nullptr) + { + } - if (pPixels == get_ptr()) { - CRNLIB_ASSERT(0); - return false; - } + // pitch is in PIXELS, not bytes. + image(uint width, uint height, uint pitch = UINT_MAX, const color_type& background = color_type::make_black(), uint flags = pixel_format_helpers::cDefaultCompFlags) : + m_comp_flags(flags) + { + CRNLIB_ASSERT((width > 0) && (height > 0)); + if (pitch == UINT_MAX) + { + pitch = width; + } - clear(); - - if (!m_pixel_buf.grant_ownership(pPixels, height * pitch, height * pitch)) - return false; - - m_pPixels = pPixels; - - m_width = width; - m_height = height; - m_pitch = pitch; - m_total = pitch * height; - m_comp_flags = flags; - - return true; - } - - void clear() { - m_pPixels = nullptr; - m_pixel_buf.clear(); - m_width = 0; - m_height = 0; - m_pitch = 0; - m_total = 0; - m_comp_flags = pixel_format_helpers::cDefaultCompFlags; - } - - inline bool is_valid() const { return m_total > 0; } - - inline pixel_format_helpers::component_flags get_comp_flags() const { return static_cast(m_comp_flags); } - inline void set_comp_flags(pixel_format_helpers::component_flags new_flags) { m_comp_flags = new_flags; } - inline void reset_comp_flags() { m_comp_flags = pixel_format_helpers::cDefaultCompFlags; } - - inline bool is_component_valid(uint index) const { - CRNLIB_ASSERT(index < 4U); - return utils::is_flag_set(m_comp_flags, index); - } - inline void set_component_valid(uint index, bool state) { - CRNLIB_ASSERT(index < 4U); - utils::set_flag(m_comp_flags, index, state); - } - - inline bool has_rgb() const { return is_component_valid(0) || is_component_valid(1) || is_component_valid(2); } - inline bool has_alpha() const { return is_component_valid(3); } - - inline bool is_grayscale() const { return utils::is_bit_set(m_comp_flags, pixel_format_helpers::cCompFlagGrayscale); } - inline void set_grayscale(bool state) { utils::set_bit(m_comp_flags, pixel_format_helpers::cCompFlagGrayscale, state); } - - void set_all(const color_type& c) { - for (uint i = 0; i < m_total; i++) - m_pPixels[i] = c; - } - - void flip_x() { - const uint half_width = m_width / 2; - for (uint y = 0; y < m_height; y++) { - for (uint x = 0; x < half_width; x++) { - color_type c((*this)(x, y)); - (*this)(x, y) = (*this)(m_width - 1 - x, y); - (*this)(m_width - 1 - x, y) = c; - } - } - } - - void flip_y() { - const uint half_height = m_height / 2; - for (uint y = 0; y < half_height; y++) { - for (uint x = 0; x < m_width; x++) { - color_type c((*this)(x, y)); - (*this)(x, y) = (*this)(x, m_height - 1 - y); - (*this)(x, m_height - 1 - y) = c; - } - } - } - - void convert_to_grayscale() { - for (uint y = 0; y < m_height; y++) - for (uint x = 0; x < m_width; x++) { - color_type c((*this)(x, y)); - typename color_type::component_t l = static_cast(c.get_luma()); - c.r = l; - c.g = l; - c.b = l; - (*this)(x, y) = c; - } - - set_grayscale(true); - } - - void swizzle(uint r, uint g, uint b, uint a) { - for (uint y = 0; y < m_height; y++) - for (uint x = 0; x < m_width; x++) { - const color_type& c = (*this)(x, y); - - (*this)(x, y) = color_type(c[r], c[g], c[b], c[a]); - } - } - - void set_alpha_to_luma() { - for (uint y = 0; y < m_height; y++) - for (uint x = 0; x < m_width; x++) { - color_type c((*this)(x, y)); - typename color_type::component_t l = static_cast(c.get_luma()); - c.a = l; - (*this)(x, y) = c; - } - - set_component_valid(3, true); - } - - bool extract_block(color_type* pDst, uint x, uint y, uint w, uint h, bool flip_xy = false) const { - if ((x >= m_width) || (y >= m_height)) { - CRNLIB_ASSERT(0); - return false; - } + m_pixel_buf.resize(pitch * height); - if (flip_xy) { - for (uint y_ofs = 0; y_ofs < h; y_ofs++) - for (uint x_ofs = 0; x_ofs < w; x_ofs++) - pDst[x_ofs * h + y_ofs] = get_clamped(x_ofs + x, y_ofs + y); // 5/4/12 - this was incorrectly x_ofs * 4 - } else if (((x + w) > m_width) || ((y + h) > m_height)) { - for (uint y_ofs = 0; y_ofs < h; y_ofs++) - for (uint x_ofs = 0; x_ofs < w; x_ofs++) - *pDst++ = get_clamped(x_ofs + x, y_ofs + y); - } else { - const color_type* pSrc = get_scanline(y) + x; - - for (uint i = h; i; i--) { - memcpy(pDst, pSrc, w * sizeof(color_type)); - pDst += w; - - pSrc += m_pitch; - } - } + m_width = width; + m_height = height; + m_pitch = pitch; + m_total = m_pitch * m_height; - return true; - } + m_pPixels = &m_pixel_buf.front(); - // No clipping! - void unclipped_fill_box(uint x, uint y, uint w, uint h, const color_type& c) { - if (((x + w) > m_width) || ((y + h) > m_height)) { - CRNLIB_ASSERT(0); - return; - } - - color_type* p = get_scanline(y) + x; - - for (uint i = h; i; i--) { - color_type* q = p; - for (uint j = w; j; j--) - *q++ = c; - p += m_pitch; - } - } - - void draw_rect(int x, int y, uint width, uint height, const color_type& c) { - draw_line(x, y, x + width - 1, y, c); - draw_line(x, y, x, y + height - 1, c); - draw_line(x + width - 1, y, x + width - 1, y + height - 1, c); - draw_line(x, y + height - 1, x + width - 1, y + height - 1, c); - } - - // No clipping! - bool unclipped_blit(uint src_x, uint src_y, uint src_w, uint src_h, uint dst_x, uint dst_y, const image& src) { - if ((!is_valid()) || (!src.is_valid())) { - CRNLIB_ASSERT(0); - return false; - } + set_all(background); + } - if (((src_x + src_w) > src.get_width()) || ((src_y + src_h) > src.get_height())) { - CRNLIB_ASSERT(0); - return false; - } + // pitch is in PIXELS, not bytes. + image(color_type* pPixels, uint width, uint height, uint pitch = UINT_MAX, uint flags = pixel_format_helpers::cDefaultCompFlags) + { + alias(pPixels, width, height, pitch, flags); + } - if (((dst_x + src_w) > get_width()) || ((dst_y + src_h) > get_height())) { - CRNLIB_ASSERT(0); - return false; - } + image& operator=(const image& other) + { + if (this == &other) + { + return *this; + } + + if (other.m_pixel_buf.empty()) + { + // This doesn't look very safe - let's make a new instance. + //m_pixel_buf.clear(); + //m_pPixels = other.m_pPixels; + + const uint total_pixels = other.m_pitch * other.m_height; + if ((total_pixels) && (other.m_pPixels)) + { + m_pixel_buf.resize(total_pixels); + m_pixel_buf.insert(0, other.m_pPixels, m_pixel_buf.size()); + m_pPixels = &m_pixel_buf.front(); + } + else + { + m_pixel_buf.clear(); + m_pPixels = nullptr; + } + } + else + { + m_pixel_buf = other.m_pixel_buf; + m_pPixels = &m_pixel_buf.front(); + } + + m_width = other.m_width; + m_height = other.m_height; + m_pitch = other.m_pitch; + m_total = other.m_total; + m_comp_flags = other.m_comp_flags; + + return *this; + } - const color_type* pS = &src(src_x, src_y); - color_type* pD = &(*this)(dst_x, dst_y); + image(const image& other) : + m_width(0), m_height(0), m_pitch(0), m_total(0), m_comp_flags(pixel_format_helpers::cDefaultCompFlags), m_pPixels(nullptr) + { + *this = other; + } - const uint bytes_to_copy = src_w * sizeof(color_type); - for (uint i = src_h; i; i--) { - memcpy(pD, pS, bytes_to_copy); + // pitch is in PIXELS, not bytes. + void alias(color_type* pPixels, uint width, uint height, uint pitch = UINT_MAX, uint flags = pixel_format_helpers::cDefaultCompFlags) + { + m_pixel_buf.clear(); - pS += src.get_pitch(); - pD += get_pitch(); - } + m_pPixels = pPixels; - return true; - } + m_width = width; + m_height = height; + m_pitch = (pitch == UINT_MAX) ? width : pitch; + m_total = m_pitch * m_height; + m_comp_flags = flags; + } - // With clipping. - bool blit(int dst_x, int dst_y, const image& src) { - if ((!is_valid()) || (!src.is_valid())) { - CRNLIB_ASSERT(0); - return false; - } + // pitch is in PIXELS, not bytes. + bool grant_ownership(color_type* pPixels, uint width, uint height, uint pitch = UINT_MAX, uint flags = pixel_format_helpers::cDefaultCompFlags) + { + if (pitch == UINT_MAX) + { + pitch = width; + } + + if ((!pPixels) || (!width) || (!height) || (pitch < width)) + { + CRNLIB_ASSERT(0); + return false; + } + + if (pPixels == get_ptr()) + { + CRNLIB_ASSERT(0); + return false; + } + + clear(); + + if (!m_pixel_buf.grant_ownership(pPixels, height * pitch, height * pitch)) + { + return false; + } + + m_pPixels = pPixels; + + m_width = width; + m_height = height; + m_pitch = pitch; + m_total = pitch * height; + m_comp_flags = flags; + + return true; + } - int src_x = 0; - int src_y = 0; + void clear() + { + m_pPixels = nullptr; + m_pixel_buf.clear(); + m_width = 0; + m_height = 0; + m_pitch = 0; + m_total = 0; + m_comp_flags = pixel_format_helpers::cDefaultCompFlags; + } - if (dst_x < 0) { - src_x = -dst_x; - if (src_x >= static_cast(src.get_width())) - return false; - dst_x = 0; - } + inline bool is_valid() const + { + return m_total > 0; + } - if (dst_y < 0) { - src_y = -dst_y; - if (src_y >= static_cast(src.get_height())) - return false; - dst_y = 0; - } + inline pixel_format_helpers::component_flags get_comp_flags() const + { + return static_cast(m_comp_flags); + } + inline void set_comp_flags(pixel_format_helpers::component_flags new_flags) + { + m_comp_flags = new_flags; + } + inline void reset_comp_flags() + { + m_comp_flags = pixel_format_helpers::cDefaultCompFlags; + } - if ((dst_x >= (int)m_width) || (dst_y >= (int)m_height)) - return false; + inline bool is_component_valid(uint index) const + { + CRNLIB_ASSERT(index < 4U); + return utils::is_flag_set(m_comp_flags, index); + } + inline void set_component_valid(uint index, bool state) + { + CRNLIB_ASSERT(index < 4U); + utils::set_flag(m_comp_flags, index, state); + } - uint width = math::minimum(m_width - dst_x, src.get_width() - src_x); - uint height = math::minimum(m_height - dst_y, src.get_height() - src_y); + inline bool has_rgb() const + { + return is_component_valid(0) || is_component_valid(1) || is_component_valid(2); + } + inline bool has_alpha() const + { + return is_component_valid(3); + } - bool success = unclipped_blit(src_x, src_y, width, height, dst_x, dst_y, src); - (void)success; - CRNLIB_ASSERT(success); + inline bool is_grayscale() const + { + return utils::is_bit_set(m_comp_flags, pixel_format_helpers::cCompFlagGrayscale); + } + inline void set_grayscale(bool state) + { + utils::set_bit(m_comp_flags, pixel_format_helpers::cCompFlagGrayscale, state); + } - return true; - } + void set_all(const color_type& c) + { + for (uint i = 0; i < m_total; i++) + { + m_pPixels[i] = c; + } + } - // With clipping. - bool blit(int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y, const image& src) { - if ((!is_valid()) || (!src.is_valid())) { - CRNLIB_ASSERT(0); - return false; - } + void flip_x() + { + const uint half_width = m_width / 2; + for (uint y = 0; y < m_height; y++) + { + for (uint x = 0; x < half_width; x++) + { + color_type c((*this)(x, y)); + (*this)(x, y) = (*this)(m_width - 1 - x, y); + (*this)(m_width - 1 - x, y) = c; + } + } + } - rect src_rect(src_x, src_y, src_x + src_w, src_y + src_h); - if (!src_rect.intersect(src.get_bounds())) - return false; + void flip_y() + { + const uint half_height = m_height / 2; + for (uint y = 0; y < half_height; y++) + { + for (uint x = 0; x < m_width; x++) + { + color_type c((*this)(x, y)); + (*this)(x, y) = (*this)(x, m_height - 1 - y); + (*this)(x, m_height - 1 - y) = c; + } + } + } - rect dst_rect(dst_x, dst_y, dst_x + src_rect.get_width(), dst_y + src_rect.get_height()); - if (!dst_rect.intersect(get_bounds())) - return false; + void convert_to_grayscale() + { + for (uint y = 0; y < m_height; y++) + { + for (uint x = 0; x < m_width; x++) + { + color_type c((*this)(x, y)); + typename color_type::component_t l = static_cast(c.get_luma()); + c.r = l; + c.g = l; + c.b = l; + (*this)(x, y) = c; + } + } + + set_grayscale(true); + } - bool success = unclipped_blit( - src_rect.get_left(), src_rect.get_top(), - math::minimum(src_rect.get_width(), dst_rect.get_width()), math::minimum(src_rect.get_height(), dst_rect.get_height()), - dst_rect.get_left(), dst_rect.get_top(), src); - (void)success; - CRNLIB_ASSERT(success); + void swizzle(uint r, uint g, uint b, uint a) + { + for (uint y = 0; y < m_height; y++) + { + for (uint x = 0; x < m_width; x++) + { + const color_type& c = (*this)(x, y); + + (*this)(x, y) = color_type(c[r], c[g], c[b], c[a]); + } + } + } - return true; - } + void set_alpha_to_luma() + { + for (uint y = 0; y < m_height; y++) + { + for (uint x = 0; x < m_width; x++) + { + color_type c((*this)(x, y)); + typename color_type::component_t l = static_cast(c.get_luma()); + c.a = l; + (*this)(x, y) = c; + } + } + + set_component_valid(3, true); + } - // In-place resize of image dimensions (cropping). - bool resize(uint new_width, uint new_height, uint new_pitch = UINT_MAX, const color_type background = color_type::make_black()) { - if (new_pitch == UINT_MAX) - new_pitch = new_width; + bool extract_block(color_type* pDst, uint x, uint y, uint w, uint h, bool flip_xy = false) const + { + if ((x >= m_width) || (y >= m_height)) + { + CRNLIB_ASSERT(0); + return false; + } + + if (flip_xy) + { + for (uint y_ofs = 0; y_ofs < h; y_ofs++) + { + for (uint x_ofs = 0; x_ofs < w; x_ofs++) + { + pDst[x_ofs * h + y_ofs] = get_clamped(x_ofs + x, y_ofs + y); // 5/4/12 - this was incorrectly x_ofs * 4 + } + } + } + else if (((x + w) > m_width) || ((y + h) > m_height)) + { + for (uint y_ofs = 0; y_ofs < h; y_ofs++) + { + for (uint x_ofs = 0; x_ofs < w; x_ofs++) + { + *pDst++ = get_clamped(x_ofs + x, y_ofs + y); + } + } + } + else + { + const color_type* pSrc = get_scanline(y) + x; + + for (uint i = h; i; i--) + { + memcpy(pDst, pSrc, w * sizeof(color_type)); + pDst += w; + + pSrc += m_pitch; + } + } + + return true; + } - if ((new_width == m_width) && (new_height == m_height) && (new_pitch == m_pitch)) - return true; + // No clipping! + void unclipped_fill_box(uint x, uint y, uint w, uint h, const color_type& c) + { + if (((x + w) > m_width) || ((y + h) > m_height)) + { + CRNLIB_ASSERT(0); + return; + } + + color_type* p = get_scanline(y) + x; + + for (uint i = h; i; i--) + { + color_type* q = p; + for (uint j = w; j; j--) + { + *q++ = c; + } + p += m_pitch; + } + } - if ((!new_width) || (!new_height) || (!new_pitch)) { - clear(); - return false; - } + void draw_rect(int x, int y, uint width, uint height, const color_type& c) + { + draw_line(x, y, x + width - 1, y, c); + draw_line(x, y, x, y + height - 1, c); + draw_line(x + width - 1, y, x + width - 1, y + height - 1, c); + draw_line(x, y + height - 1, x + width - 1, y + height - 1, c); + } - pixel_buf_t existing_pixels; - existing_pixels.swap(m_pixel_buf); + // No clipping! + bool unclipped_blit(uint src_x, uint src_y, uint src_w, uint src_h, uint dst_x, uint dst_y, const image& src) + { + if ((!is_valid()) || (!src.is_valid())) + { + CRNLIB_ASSERT(0); + return false; + } + + if (((src_x + src_w) > src.get_width()) || ((src_y + src_h) > src.get_height())) + { + CRNLIB_ASSERT(0); + return false; + } + + if (((dst_x + src_w) > get_width()) || ((dst_y + src_h) > get_height())) + { + CRNLIB_ASSERT(0); + return false; + } + + const color_type* pS = &src(src_x, src_y); + color_type* pD = &(*this)(dst_x, dst_y); + + const uint bytes_to_copy = src_w * sizeof(color_type); + for (uint i = src_h; i; i--) + { + memcpy(pD, pS, bytes_to_copy); + + pS += src.get_pitch(); + pD += get_pitch(); + } + + return true; + } - if (!m_pixel_buf.try_resize(new_height * new_pitch)) { - clear(); - return false; - } + // With clipping. + bool blit(int dst_x, int dst_y, const image& src) + { + if ((!is_valid()) || (!src.is_valid())) + { + CRNLIB_ASSERT(0); + return false; + } + + int src_x = 0; + int src_y = 0; + + if (dst_x < 0) + { + src_x = -dst_x; + if (src_x >= static_cast(src.get_width())) + { + return false; + } + dst_x = 0; + } + + if (dst_y < 0) + { + src_y = -dst_y; + if (src_y >= static_cast(src.get_height())) + { + return false; + } + dst_y = 0; + } + + if ((dst_x >= (int)m_width) || (dst_y >= (int)m_height)) + { + return false; + } + + uint width = math::minimum(m_width - dst_x, src.get_width() - src_x); + uint height = math::minimum(m_height - dst_y, src.get_height() - src_y); + + bool success = unclipped_blit(src_x, src_y, width, height, dst_x, dst_y, src); + (void)success; + CRNLIB_ASSERT(success); + + return true; + } - for (uint y = 0; y < new_height; y++) { - for (uint x = 0; x < new_width; x++) { - if ((x < m_width) && (y < m_height)) - m_pixel_buf[x + y * new_pitch] = existing_pixels[x + y * m_pitch]; - else - m_pixel_buf[x + y * new_pitch] = background; - } - } + // With clipping. + bool blit(int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y, const image& src) + { + if ((!is_valid()) || (!src.is_valid())) + { + CRNLIB_ASSERT(0); + return false; + } + + rect src_rect(src_x, src_y, src_x + src_w, src_y + src_h); + if (!src_rect.intersect(src.get_bounds())) + { + return false; + } + + rect dst_rect(dst_x, dst_y, dst_x + src_rect.get_width(), dst_y + src_rect.get_height()); + if (!dst_rect.intersect(get_bounds())) + { + return false; + } + + bool success = unclipped_blit( + src_rect.get_left(), src_rect.get_top(), + math::minimum(src_rect.get_width(), dst_rect.get_width()), math::minimum(src_rect.get_height(), dst_rect.get_height()), + dst_rect.get_left(), dst_rect.get_top(), src); + (void)success; + CRNLIB_ASSERT(success); + + return true; + } - m_width = new_width; - m_height = new_height; - m_pitch = new_pitch; - m_total = new_pitch * new_height; - m_pPixels = &m_pixel_buf.front(); + // In-place resize of image dimensions (cropping). + bool resize(uint new_width, uint new_height, uint new_pitch = UINT_MAX, const color_type background = color_type::make_black()) + { + if (new_pitch == UINT_MAX) + { + new_pitch = new_width; + } + + if ((new_width == m_width) && (new_height == m_height) && (new_pitch == m_pitch)) + { + return true; + } + + if ((!new_width) || (!new_height) || (!new_pitch)) + { + clear(); + return false; + } + + pixel_buf_t existing_pixels; + existing_pixels.swap(m_pixel_buf); + + if (!m_pixel_buf.try_resize(new_height * new_pitch)) + { + clear(); + return false; + } + + for (uint y = 0; y < new_height; y++) + { + for (uint x = 0; x < new_width; x++) + { + if ((x < m_width) && (y < m_height)) + { + m_pixel_buf[x + y * new_pitch] = existing_pixels[x + y * m_pitch]; + } + else + { + m_pixel_buf[x + y * new_pitch] = background; + } + } + } + + m_width = new_width; + m_height = new_height; + m_pitch = new_pitch; + m_total = new_pitch * new_height; + m_pPixels = &m_pixel_buf.front(); + + return true; + } - return true; - } + inline uint get_width() const + { + return m_width; + } + inline uint get_height() const + { + return m_height; + } + inline uint get_total_pixels() const + { + return m_width * m_height; + } - inline uint get_width() const { return m_width; } - inline uint get_height() const { return m_height; } - inline uint get_total_pixels() const { return m_width * m_height; } + inline rect get_bounds() const + { + return rect(0, 0, m_width, m_height); + } - inline rect get_bounds() const { return rect(0, 0, m_width, m_height); } + inline uint get_pitch() const + { + return m_pitch; + } + inline uint get_pitch_in_bytes() const + { + return m_pitch * sizeof(color_type); + } - inline uint get_pitch() const { return m_pitch; } - inline uint get_pitch_in_bytes() const { return m_pitch * sizeof(color_type); } + // Returns pitch * height, NOT width * height! + inline uint get_total() const + { + return m_total; + } - // Returns pitch * height, NOT width * height! - inline uint get_total() const { return m_total; } + inline uint get_block_width(uint block_size) const + { + return (m_width + block_size - 1) / block_size; + } + inline uint get_block_height(uint block_size) const + { + return (m_height + block_size - 1) / block_size; + } + inline uint get_total_blocks(uint block_size) const + { + return get_block_width(block_size) * get_block_height(block_size); + } - inline uint get_block_width(uint block_size) const { return (m_width + block_size - 1) / block_size; } - inline uint get_block_height(uint block_size) const { return (m_height + block_size - 1) / block_size; } - inline uint get_total_blocks(uint block_size) const { return get_block_width(block_size) * get_block_height(block_size); } + inline uint get_size_in_bytes() const + { + return sizeof(color_type) * m_total; + } - inline uint get_size_in_bytes() const { return sizeof(color_type) * m_total; } + inline const color_type* get_pixels() const + { + return m_pPixels; + } + inline color_type* get_pixels() + { + return m_pPixels; + } - inline const color_type* get_pixels() const { return m_pPixels; } - inline color_type* get_pixels() { return m_pPixels; } + inline const color_type& operator()(uint x, uint y) const + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + return m_pPixels[x + y * m_pitch]; + } - inline const color_type& operator()(uint x, uint y) const { - CRNLIB_ASSERT((x < m_width) && (y < m_height)); - return m_pPixels[x + y * m_pitch]; - } + inline color_type& operator()(uint x, uint y) + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + return m_pPixels[x + y * m_pitch]; + } - inline color_type& operator()(uint x, uint y) { - CRNLIB_ASSERT((x < m_width) && (y < m_height)); - return m_pPixels[x + y * m_pitch]; - } + inline const color_type& get_unclamped(uint x, uint y) const + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + return m_pPixels[x + y * m_pitch]; + } - inline const color_type& get_unclamped(uint x, uint y) const { - CRNLIB_ASSERT((x < m_width) && (y < m_height)); - return m_pPixels[x + y * m_pitch]; - } + inline const color_type& get_clamped(int x, int y) const + { + x = math::clamp(x, 0, m_width - 1); + y = math::clamp(y, 0, m_height - 1); + return m_pPixels[x + y * m_pitch]; + } - inline const color_type& get_clamped(int x, int y) const { - x = math::clamp(x, 0, m_width - 1); - y = math::clamp(y, 0, m_height - 1); - return m_pPixels[x + y * m_pitch]; - } + // Sample image with bilinear filtering. + // (x,y) - Continuous coordinates, where pixel centers are at (.5,.5), valid image coords are [0,width] and [0,height]. + void get_filtered(float x, float y, color_type& result) const + { + x -= .5f; + y -= .5f; + + int ix = (int)floor(x); + int iy = (int)floor(y); + float wx = x - ix; + float wy = y - iy; + + color_type a(get_clamped(ix, iy)); + color_type b(get_clamped(ix + 1, iy)); + color_type c(get_clamped(ix, iy + 1)); + color_type d(get_clamped(ix + 1, iy + 1)); + + for (uint i = 0; i < 4; i++) + { + double top = math::lerp(a[i], b[i], wx); + double bot = math::lerp(c[i], d[i], wx); + double m = math::lerp(top, bot, wy); + + if (!color_type::component_traits::cFloat) + { + m += .5f; + } + + result.set_component(i, static_cast(m)); + } + } - // Sample image with bilinear filtering. - // (x,y) - Continuous coordinates, where pixel centers are at (.5,.5), valid image coords are [0,width] and [0,height]. - void get_filtered(float x, float y, color_type& result) const { - x -= .5f; - y -= .5f; + void get_filtered(float x, float y, vec4F& result) const + { + x -= .5f; + y -= .5f; + + int ix = (int)floor(x); + int iy = (int)floor(y); + float wx = x - ix; + float wy = y - iy; + + color_type a(get_clamped(ix, iy)); + color_type b(get_clamped(ix + 1, iy)); + color_type c(get_clamped(ix, iy + 1)); + color_type d(get_clamped(ix + 1, iy + 1)); + + for (uint i = 0; i < 4; i++) + { + float top = math::lerp(a[i], b[i], wx); + float bot = math::lerp(c[i], d[i], wx); + float m = math::lerp(top, bot, wy); + + result[i] = m; + } + } - int ix = (int)floor(x); - int iy = (int)floor(y); - float wx = x - ix; - float wy = y - iy; + inline void set_pixel_unclipped(uint x, uint y, const color_type& c) + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + m_pPixels[x + y * m_pitch] = c; + } - color_type a(get_clamped(ix, iy)); - color_type b(get_clamped(ix + 1, iy)); - color_type c(get_clamped(ix, iy + 1)); - color_type d(get_clamped(ix + 1, iy + 1)); + inline void set_pixel_clipped(int x, int y, const color_type& c) + { + if ((static_cast(x) >= m_width) || (static_cast(y) >= m_height)) + { + return; + } - for (uint i = 0; i < 4; i++) { - double top = math::lerp(a[i], b[i], wx); - double bot = math::lerp(c[i], d[i], wx); - double m = math::lerp(top, bot, wy); + m_pPixels[x + y * m_pitch] = c; + } - if (!color_type::component_traits::cFloat) - m += .5f; + inline const color_type* get_scanline(uint y) const + { + CRNLIB_ASSERT(y < m_height); + return &m_pPixels[y * m_pitch]; + } - result.set_component(i, static_cast(m)); - } - } + inline color_type* get_scanline(uint y) + { + CRNLIB_ASSERT(y < m_height); + return &m_pPixels[y * m_pitch]; + } - void get_filtered(float x, float y, vec4F& result) const { - x -= .5f; - y -= .5f; + inline const color_type* get_ptr() const + { + return m_pPixels; + } - int ix = (int)floor(x); - int iy = (int)floor(y); - float wx = x - ix; - float wy = y - iy; + inline color_type* get_ptr() + { + return m_pPixels; + } - color_type a(get_clamped(ix, iy)); - color_type b(get_clamped(ix + 1, iy)); - color_type c(get_clamped(ix, iy + 1)); - color_type d(get_clamped(ix + 1, iy + 1)); + inline void swap(image& other) + { + std::swap(m_width, other.m_width); + std::swap(m_height, other.m_height); + std::swap(m_pitch, other.m_pitch); + std::swap(m_total, other.m_total); + std::swap(m_comp_flags, other.m_comp_flags); + std::swap(m_pPixels, other.m_pPixels); + m_pixel_buf.swap(other.m_pixel_buf); + } - for (uint i = 0; i < 4; i++) { - float top = math::lerp(a[i], b[i], wx); - float bot = math::lerp(c[i], d[i], wx); - float m = math::lerp(top, bot, wy); + void draw_line(int xs, int ys, int xe, int ye, const color_type& color) + { + if (xs > xe) + { + std::swap(xs, xe); + std::swap(ys, ye); + } + + int dx = xe - xs, dy = ye - ys; + if (!dx) + { + if (ys > ye) + { + std::swap(ys, ye); + } + for (int i = ys; i <= ye; i++) + { + set_pixel_clipped(xs, i, color); + } + } + else if (!dy) + { + for (int i = xs; i < xe; i++) + { + set_pixel_clipped(i, ys, color); + } + } + else if (dy > 0) + { + if (dy <= dx) + { + int e = 2 * dy - dx, e_no_inc = 2 * dy, e_inc = 2 * (dy - dx); + rasterize_line(xs, ys, xe, ye, 0, 1, e, e_inc, e_no_inc, color); + } + else + { + int e = 2 * dx - dy, e_no_inc = 2 * dx, e_inc = 2 * (dx - dy); + rasterize_line(xs, ys, xe, ye, 1, 1, e, e_inc, e_no_inc, color); + } + } + else + { + dy = -dy; + if (dy <= dx) + { + int e = 2 * dy - dx, e_no_inc = 2 * dy, e_inc = 2 * (dy - dx); + rasterize_line(xs, ys, xe, ye, 0, -1, e, e_inc, e_no_inc, color); + } + else + { + int e = 2 * dx - dy, e_no_inc = (2 * dx), e_inc = 2 * (dx - dy); + rasterize_line(xe, ye, xs, ys, 1, -1, e, e_inc, e_no_inc, color); + } + } + } - result[i] = m; - } - } - - inline void set_pixel_unclipped(uint x, uint y, const color_type& c) { - CRNLIB_ASSERT((x < m_width) && (y < m_height)); - m_pPixels[x + y * m_pitch] = c; - } - - inline void set_pixel_clipped(int x, int y, const color_type& c) { - if ((static_cast(x) >= m_width) || (static_cast(y) >= m_height)) - return; - - m_pPixels[x + y * m_pitch] = c; - } - - inline const color_type* get_scanline(uint y) const { - CRNLIB_ASSERT(y < m_height); - return &m_pPixels[y * m_pitch]; - } - - inline color_type* get_scanline(uint y) { - CRNLIB_ASSERT(y < m_height); - return &m_pPixels[y * m_pitch]; - } - - inline const color_type* get_ptr() const { - return m_pPixels; - } - - inline color_type* get_ptr() { - return m_pPixels; - } - - inline void swap(image& other) { - std::swap(m_width, other.m_width); - std::swap(m_height, other.m_height); - std::swap(m_pitch, other.m_pitch); - std::swap(m_total, other.m_total); - std::swap(m_comp_flags, other.m_comp_flags); - std::swap(m_pPixels, other.m_pPixels); - m_pixel_buf.swap(other.m_pixel_buf); - } - - void draw_line(int xs, int ys, int xe, int ye, const color_type& color) { - if (xs > xe) { - std::swap(xs, xe); - std::swap(ys, ye); - } + const pixel_buf_t& get_pixel_buf() const + { + return m_pixel_buf; + } + pixel_buf_t& get_pixel_buf() + { + return m_pixel_buf; + } - int dx = xe - xs, dy = ye - ys; - if (!dx) { - if (ys > ye) - std::swap(ys, ye); - for (int i = ys; i <= ye; i++) - set_pixel_clipped(xs, i, color); - } else if (!dy) { - for (int i = xs; i < xe; i++) - set_pixel_clipped(i, ys, color); - } else if (dy > 0) { - if (dy <= dx) { - int e = 2 * dy - dx, e_no_inc = 2 * dy, e_inc = 2 * (dy - dx); - rasterize_line(xs, ys, xe, ye, 0, 1, e, e_inc, e_no_inc, color); - } else { - int e = 2 * dx - dy, e_no_inc = 2 * dx, e_inc = 2 * (dx - dy); - rasterize_line(xs, ys, xe, ye, 1, 1, e, e_inc, e_no_inc, color); - } - } else { - dy = -dy; - if (dy <= dx) { - int e = 2 * dy - dx, e_no_inc = 2 * dy, e_inc = 2 * (dy - dx); - rasterize_line(xs, ys, xe, ye, 0, -1, e, e_inc, e_no_inc, color); - } else { - int e = 2 * dx - dy, e_no_inc = (2 * dx), e_inc = 2 * (dx - dy); - rasterize_line(xe, ye, xs, ys, 1, -1, e, e_inc, e_no_inc, color); - } - } - } - - const pixel_buf_t& get_pixel_buf() const { return m_pixel_buf; } - pixel_buf_t& get_pixel_buf() { return m_pixel_buf; } - - private: - uint m_width; - uint m_height; - uint m_pitch; - uint m_total; - uint m_comp_flags; - - color_type* m_pPixels; - - pixel_buf_t m_pixel_buf; - - void rasterize_line(int xs, int ys, int xe, int ye, int pred, int inc_dec, int e, int e_inc, int e_no_inc, const color_type& color) { - int start, end, var; - - if (pred) { - start = ys; - end = ye; - var = xs; - for (int i = start; i <= end; i++) { - set_pixel_clipped(var, i, color); - if (e < 0) - e += e_no_inc; - else { - var += inc_dec; - e += e_inc; - } - } - } else { - start = xs; - end = xe; - var = ys; - for (int i = start; i <= end; i++) { - set_pixel_clipped(i, var, color); - if (e < 0) - e += e_no_inc; - else { - var += inc_dec; - e += e_inc; - } - } + private: + uint m_width; + uint m_height; + uint m_pitch; + uint m_total; + uint m_comp_flags; + + color_type* m_pPixels; + + pixel_buf_t m_pixel_buf; + + void rasterize_line(int xs, int ys, int xe, int ye, int pred, int inc_dec, int e, int e_inc, int e_no_inc, const color_type& color) + { + int start, end, var; + + if (pred) + { + start = ys; + end = ye; + var = xs; + for (int i = start; i <= end; i++) + { + set_pixel_clipped(var, i, color); + if (e < 0) + { + e += e_no_inc; + } + else + { + var += inc_dec; + e += e_inc; + } + } + } + else + { + start = xs; + end = xe; + var = ys; + for (int i = start; i <= end; i++) + { + set_pixel_clipped(i, var, color); + if (e < 0) + { + e += e_no_inc; + } + else + { + var += inc_dec; + e += e_inc; + } + } + } + } + }; + + typedef image image_u8; + typedef image image_i16; + typedef image image_u16; + typedef image image_i32; + typedef image image_u32; + typedef image image_f; + + template + inline void swap(image& a, image& b) + { + a.swap(b); } - } -}; - -typedef image image_u8; -typedef image image_i16; -typedef image image_u16; -typedef image image_i32; -typedef image image_u32; -typedef image image_f; - -template -inline void swap(image& a, image& b) { - a.swap(b); -} - -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_image_utils.cpp b/crnlib/crn_image_utils.cpp index b0b0edb..8a91236 100644 --- a/crnlib/crn_image_utils.cpp +++ b/crnlib/crn_image_utils.cpp @@ -1,5 +1,25 @@ -// File: crn_image_utils.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include #include @@ -29,1197 +49,1543 @@ #include "crn_pixel_format.h" -namespace crnlib { -const float cInfinitePSNR = 999999.0f; -const uint CRNLIB_LARGEST_SUPPORTED_IMAGE_DIMENSION = 16384; +namespace crnlib +{ + const float cInfinitePSNR = 999999.0f; + const uint CRNLIB_LARGEST_SUPPORTED_IMAGE_DIMENSION = 16384; + + namespace image_utils + { + bool read_from_stream_stb(data_stream_serializer& serializer, image_u8& img) + { + uint8_vec buf; + if (!serializer.read_entire_file(buf)) + { + return false; + } + + int x = 0, y = 0, n = 0; + unsigned char* pData = stbi_load_from_memory(buf.get_ptr(), buf.size_in_bytes(), &x, &y, &n, 4); + + if (!pData) + { + return false; + } + + if ((x > (int)CRNLIB_LARGEST_SUPPORTED_IMAGE_DIMENSION) || (y > (int)CRNLIB_LARGEST_SUPPORTED_IMAGE_DIMENSION)) + { + stbi_image_free(pData); + return false; + } + + const bool has_alpha = ((n == 2) || (n == 4)); + + img.resize(x, y); + + bool grayscale = true; + + for (int py = 0; py < y; py++) + { + const color_quad_u8* pSrc = reinterpret_cast(pData) + (py * x); + color_quad_u8* pDst = img.get_scanline(py); + color_quad_u8* pDst_end = pDst + x; + + while (pDst != pDst_end) + { + color_quad_u8 c(*pSrc++); + if (!has_alpha) + { + c.a = 255; + } + + if (!c.is_grayscale()) + { + grayscale = false; + } + + *pDst++ = c; + } + } + + stbi_image_free(pData); + + img.reset_comp_flags(); + img.set_grayscale(grayscale); + img.set_component_valid(3, has_alpha); + + return true; + } -namespace image_utils { -bool read_from_stream_stb(data_stream_serializer& serializer, image_u8& img) { - uint8_vec buf; - if (!serializer.read_entire_file(buf)) - return false; + bool read_from_stream_jpgd(data_stream_serializer& serializer, image_u8& img) + { + uint8_vec buf; + if (!serializer.read_entire_file(buf)) + { + return false; + } + + int width = 0, height = 0, actual_comps = 0; + unsigned char* pSrc_img = jpgd::decompress_jpeg_image_from_memory(buf.get_ptr(), buf.size_in_bytes(), &width, &height, &actual_comps, 4); + if (!pSrc_img) + { + return false; + } + + if (math::maximum(width, height) > (int)CRNLIB_LARGEST_SUPPORTED_IMAGE_DIMENSION) + { + crnlib_free(pSrc_img); + return false; + } + + if (!img.grant_ownership(reinterpret_cast(pSrc_img), width, height)) + { + crnlib_free(pSrc_img); + return false; + } + + img.reset_comp_flags(); + img.set_grayscale(actual_comps == 1); + img.set_component_valid(3, false); + + return true; + } - int x = 0, y = 0, n = 0; - unsigned char* pData = stbi_load_from_memory(buf.get_ptr(), buf.size_in_bytes(), &x, &y, &n, 4); + bool read_from_stream(image_u8& dest, data_stream_serializer& serializer, uint read_flags) + { + if (read_flags > cReadFlagsAllFlags) + { + CRNLIB_ASSERT(0); + return false; + } + + if (!serializer.get_stream()) + { + CRNLIB_ASSERT(0); + return false; + } + + dynamic_string ext(serializer.get_name()); + file_utils::get_extension(ext); + + if ((ext == "jpg") || (ext == "jpeg")) + { + // Use my jpeg decoder by default because it supports progressive jpeg's. + if ((read_flags & cReadFlagForceSTB) == 0) + { + return image_utils::read_from_stream_jpgd(serializer, dest); + } + } + + return image_utils::read_from_stream_stb(serializer, dest); + } - if (!pData) - return false; + bool read_from_file(image_u8& dest, const char* pFilename, uint read_flags) + { + if (read_flags > cReadFlagsAllFlags) + { + CRNLIB_ASSERT(0); + return false; + } + + cfile_stream file_stream; + if (!file_stream.open(pFilename)) + { + return false; + } + + data_stream_serializer serializer(file_stream); + return read_from_stream(dest, serializer, read_flags); + } - if ((x > (int)CRNLIB_LARGEST_SUPPORTED_IMAGE_DIMENSION) || (y > (int)CRNLIB_LARGEST_SUPPORTED_IMAGE_DIMENSION)) { - stbi_image_free(pData); - return false; - } + bool write_to_file(const char* pFilename, const image_u8& img, uint write_flags, int grayscale_comp_index) + { + if ((grayscale_comp_index < -1) || (grayscale_comp_index > 3)) + { + CRNLIB_ASSERT(0); + return false; + } + + if (!img.get_width()) + { + CRNLIB_ASSERT(0); + return false; + } + + dynamic_string ext(pFilename); + bool is_jpeg = false; + if (file_utils::get_extension(ext)) + { + is_jpeg = ((ext == "jpg") || (ext == "jpeg")); + + if ((ext != "png") && (ext != "bmp") && (ext != "tga") && (!is_jpeg)) + { + console::error("crnlib::image_utils::write_to_file: Can only write .BMP, .TGA, .PNG, or .JPG files!\n"); + return false; + } + } + + crnlib::vector temp; + uint num_src_chans = 0; + const void* pSrc_img = nullptr; + + if (is_jpeg) + { + write_flags |= cWriteFlagIgnoreAlpha; + } + + if ((img.get_comp_flags() & pixel_format_helpers::cCompFlagGrayscale) || (write_flags & image_utils::cWriteFlagGrayscale)) + { + CRNLIB_ASSERT(grayscale_comp_index < 4); + if (grayscale_comp_index > 3) + { + grayscale_comp_index = 3; + } + + temp.resize(img.get_total_pixels()); + + for (uint y = 0; y < img.get_height(); y++) + { + const color_quad_u8* pSrc = img.get_scanline(y); + const color_quad_u8* pSrc_end = pSrc + img.get_width(); + uint8* pDst = &temp[y * img.get_width()]; + + if (img.get_comp_flags() & pixel_format_helpers::cCompFlagGrayscale) + { + while (pSrc != pSrc_end) + { + *pDst++ = (*pSrc++)[1]; + } + } + else if (grayscale_comp_index < 0) + { + while (pSrc != pSrc_end) + { + *pDst++ = static_cast((*pSrc++).get_luma()); + } + } + else + { + while (pSrc != pSrc_end) + { + *pDst++ = (*pSrc++)[grayscale_comp_index]; + } + } + } + + pSrc_img = &temp[0]; + num_src_chans = 1; + } + else if ((!img.is_component_valid(3)) || (write_flags & cWriteFlagIgnoreAlpha)) + { + temp.resize(img.get_total_pixels() * 3); + + for (uint y = 0; y < img.get_height(); y++) + { + const color_quad_u8* pSrc = img.get_scanline(y); + const color_quad_u8* pSrc_end = pSrc + img.get_width(); + uint8* pDst = &temp[y * img.get_width() * 3]; + + while (pSrc != pSrc_end) + { + const color_quad_u8 c(*pSrc++); + + pDst[0] = c.r; + pDst[1] = c.g; + pDst[2] = c.b; + + pDst += 3; + } + } + + num_src_chans = 3; + pSrc_img = &temp[0]; + } + else + { + num_src_chans = 4; + pSrc_img = img.get_ptr(); + } + + bool success = false; + if (ext == "png") + { + size_t png_image_size = 0; + void* pPNG_image_data = tdefl_write_image_to_png_file_in_memory(pSrc_img, img.get_width(), img.get_height(), num_src_chans, &png_image_size); + if (!pPNG_image_data) + { + return false; + } + success = file_utils::write_buf_to_file(pFilename, pPNG_image_data, png_image_size); + mz_free(pPNG_image_data); + } + else if (is_jpeg) + { + jpge::params params; + if (write_flags & cWriteFlagJPEGQualityLevelMask) + { + params.m_quality = math::clamp((write_flags & cWriteFlagJPEGQualityLevelMask) >> cWriteFlagJPEGQualityLevelShift, 1U, 100U); + } + params.m_two_pass_flag = (write_flags & cWriteFlagJPEGTwoPass) != 0; + params.m_no_chroma_discrim_flag = (write_flags & cWriteFlagJPEGNoChromaDiscrim) != 0; + + if (write_flags & cWriteFlagJPEGH1V1) + { + params.m_subsampling = jpge::H1V1; + } + else if (write_flags & cWriteFlagJPEGH2V1) + { + params.m_subsampling = jpge::H2V1; + } + else if (write_flags & cWriteFlagJPEGH2V2) + { + params.m_subsampling = jpge::H2V2; + } + + success = jpge::compress_image_to_jpeg_file(pFilename, img.get_width(), img.get_height(), num_src_chans, (const jpge::uint8*)pSrc_img, params); + } + else + { + success = ((ext == "bmp" ? stbi_write_bmp : stbi_write_tga)(pFilename, img.get_width(), img.get_height(), num_src_chans, pSrc_img) == CRNLIB_TRUE); + } + return success; + } - const bool has_alpha = ((n == 2) || (n == 4)); - - img.resize(x, y); + bool has_alpha(const image_u8& img) + { + for (uint y = 0; y < img.get_height(); y++) + { + for (uint x = 0; x < img.get_width(); x++) + { + if (img(x, y).a < 255) + { + return true; + } + } + } + + return false; + } - bool grayscale = true; - - for (int py = 0; py < y; py++) { - const color_quad_u8* pSrc = reinterpret_cast(pData) + (py * x); - color_quad_u8* pDst = img.get_scanline(py); - color_quad_u8* pDst_end = pDst + x; - - while (pDst != pDst_end) { - color_quad_u8 c(*pSrc++); - if (!has_alpha) - c.a = 255; - - if (!c.is_grayscale()) - grayscale = false; - - *pDst++ = c; - } - } - - stbi_image_free(pData); - - img.reset_comp_flags(); - img.set_grayscale(grayscale); - img.set_component_valid(3, has_alpha); - - return true; -} - -bool read_from_stream_jpgd(data_stream_serializer& serializer, image_u8& img) { - uint8_vec buf; - if (!serializer.read_entire_file(buf)) - return false; - - int width = 0, height = 0, actual_comps = 0; - unsigned char* pSrc_img = jpgd::decompress_jpeg_image_from_memory(buf.get_ptr(), buf.size_in_bytes(), &width, &height, &actual_comps, 4); - if (!pSrc_img) - return false; - - if (math::maximum(width, height) > (int)CRNLIB_LARGEST_SUPPORTED_IMAGE_DIMENSION) { - crnlib_free(pSrc_img); - return false; - } - - if (!img.grant_ownership(reinterpret_cast(pSrc_img), width, height)) { - crnlib_free(pSrc_img); - return false; - } - - img.reset_comp_flags(); - img.set_grayscale(actual_comps == 1); - img.set_component_valid(3, false); - - return true; -} - -bool read_from_stream(image_u8& dest, data_stream_serializer& serializer, uint read_flags) { - if (read_flags > cReadFlagsAllFlags) { - CRNLIB_ASSERT(0); - return false; - } - - if (!serializer.get_stream()) { - CRNLIB_ASSERT(0); - return false; - } - - dynamic_string ext(serializer.get_name()); - file_utils::get_extension(ext); - - if ((ext == "jpg") || (ext == "jpeg")) { - // Use my jpeg decoder by default because it supports progressive jpeg's. - if ((read_flags & cReadFlagForceSTB) == 0) { - return image_utils::read_from_stream_jpgd(serializer, dest); - } - } - - return image_utils::read_from_stream_stb(serializer, dest); -} - -bool read_from_file(image_u8& dest, const char* pFilename, uint read_flags) { - if (read_flags > cReadFlagsAllFlags) { - CRNLIB_ASSERT(0); - return false; - } - - cfile_stream file_stream; - if (!file_stream.open(pFilename)) - return false; - - data_stream_serializer serializer(file_stream); - return read_from_stream(dest, serializer, read_flags); -} - -bool write_to_file(const char* pFilename, const image_u8& img, uint write_flags, int grayscale_comp_index) { - if ((grayscale_comp_index < -1) || (grayscale_comp_index > 3)) { - CRNLIB_ASSERT(0); - return false; - } - - if (!img.get_width()) { - CRNLIB_ASSERT(0); - return false; - } - - dynamic_string ext(pFilename); - bool is_jpeg = false; - if (file_utils::get_extension(ext)) { - is_jpeg = ((ext == "jpg") || (ext == "jpeg")); - - if ((ext != "png") && (ext != "bmp") && (ext != "tga") && (!is_jpeg)) { - console::error("crnlib::image_utils::write_to_file: Can only write .BMP, .TGA, .PNG, or .JPG files!\n"); - return false; - } - } - - crnlib::vector temp; - uint num_src_chans = 0; - const void* pSrc_img = nullptr; - - if (is_jpeg) { - write_flags |= cWriteFlagIgnoreAlpha; - } - - if ((img.get_comp_flags() & pixel_format_helpers::cCompFlagGrayscale) || (write_flags & image_utils::cWriteFlagGrayscale)) { - CRNLIB_ASSERT(grayscale_comp_index < 4); - if (grayscale_comp_index > 3) - grayscale_comp_index = 3; - - temp.resize(img.get_total_pixels()); - - for (uint y = 0; y < img.get_height(); y++) { - const color_quad_u8* pSrc = img.get_scanline(y); - const color_quad_u8* pSrc_end = pSrc + img.get_width(); - uint8* pDst = &temp[y * img.get_width()]; - - if (img.get_comp_flags() & pixel_format_helpers::cCompFlagGrayscale) { - while (pSrc != pSrc_end) - *pDst++ = (*pSrc++)[1]; - } else if (grayscale_comp_index < 0) { - while (pSrc != pSrc_end) - *pDst++ = static_cast((*pSrc++).get_luma()); - } else { - while (pSrc != pSrc_end) - *pDst++ = (*pSrc++)[grayscale_comp_index]; - } - } - - pSrc_img = &temp[0]; - num_src_chans = 1; - } else if ((!img.is_component_valid(3)) || (write_flags & cWriteFlagIgnoreAlpha)) { - temp.resize(img.get_total_pixels() * 3); - - for (uint y = 0; y < img.get_height(); y++) { - const color_quad_u8* pSrc = img.get_scanline(y); - const color_quad_u8* pSrc_end = pSrc + img.get_width(); - uint8* pDst = &temp[y * img.get_width() * 3]; - - while (pSrc != pSrc_end) { - const color_quad_u8 c(*pSrc++); - - pDst[0] = c.r; - pDst[1] = c.g; - pDst[2] = c.b; - - pDst += 3; - } - } - - num_src_chans = 3; - pSrc_img = &temp[0]; - } else { - num_src_chans = 4; - pSrc_img = img.get_ptr(); - } - - bool success = false; - if (ext == "png") { - size_t png_image_size = 0; - void* pPNG_image_data = tdefl_write_image_to_png_file_in_memory(pSrc_img, img.get_width(), img.get_height(), num_src_chans, &png_image_size); - if (!pPNG_image_data) - return false; - success = file_utils::write_buf_to_file(pFilename, pPNG_image_data, png_image_size); - mz_free(pPNG_image_data); - } else if (is_jpeg) { - jpge::params params; - if (write_flags & cWriteFlagJPEGQualityLevelMask) - params.m_quality = math::clamp((write_flags & cWriteFlagJPEGQualityLevelMask) >> cWriteFlagJPEGQualityLevelShift, 1U, 100U); - params.m_two_pass_flag = (write_flags & cWriteFlagJPEGTwoPass) != 0; - params.m_no_chroma_discrim_flag = (write_flags & cWriteFlagJPEGNoChromaDiscrim) != 0; - - if (write_flags & cWriteFlagJPEGH1V1) - params.m_subsampling = jpge::H1V1; - else if (write_flags & cWriteFlagJPEGH2V1) - params.m_subsampling = jpge::H2V1; - else if (write_flags & cWriteFlagJPEGH2V2) - params.m_subsampling = jpge::H2V2; - - success = jpge::compress_image_to_jpeg_file(pFilename, img.get_width(), img.get_height(), num_src_chans, (const jpge::uint8*)pSrc_img, params); - } else { - success = ((ext == "bmp" ? stbi_write_bmp : stbi_write_tga)(pFilename, img.get_width(), img.get_height(), num_src_chans, pSrc_img) == CRNLIB_TRUE); - } - return success; -} - -bool has_alpha(const image_u8& img) { - for (uint y = 0; y < img.get_height(); y++) - for (uint x = 0; x < img.get_width(); x++) - if (img(x, y).a < 255) - return true; - - return false; -} - -void renorm_normal_map(image_u8& img) { - for (uint y = 0; y < img.get_height(); y++) { - for (uint x = 0; x < img.get_width(); x++) { - color_quad_u8& c = img(x, y); - if ((c.r == 128) && (c.g == 128) && (c.b == 128)) - continue; - - vec3F v(c.r, c.g, c.b); - v *= 1.0f / 255.0f; - v *= 2.0f; - v -= vec3F(1.0f); - v.clamp(-1.0f, 1.0f); - - float length = v.length(); - if (length < .077f) - c.set(128, 128, 128, c.a); - else if (fabs(length - 1.0f) > .077f) { - if (length) - v /= length; - - for (uint i = 0; i < 3; i++) - c[i] = static_cast(math::clamp(floor((v[i] + 1.0f) * .5f * 255.0f + .5f), 0.0f, 255.0f)); - - if ((c.r == 128) && (c.g == 128)) { - if (c.b < 128) - c.b = 0; - else - c.b = 255; + void renorm_normal_map(image_u8& img) + { + for (uint y = 0; y < img.get_height(); y++) + { + for (uint x = 0; x < img.get_width(); x++) + { + color_quad_u8& c = img(x, y); + if ((c.r == 128) && (c.g == 128) && (c.b == 128)) + { + continue; + } + + vec3F v(c.r, c.g, c.b); + v *= 1.0f / 255.0f; + v *= 2.0f; + v -= vec3F(1.0f); + v.clamp(-1.0f, 1.0f); + + float length = v.length(); + if (length < .077f) + { + c.set(128, 128, 128, c.a); + } + else if (fabs(length - 1.0f) > .077f) + { + if (length) + { + v /= length; + } + + for (uint i = 0; i < 3; i++) + { + c[i] = static_cast(math::clamp(floor((v[i] + 1.0f) * .5f * 255.0f + .5f), 0.0f, 255.0f)); + } + + if ((c.r == 128) && (c.g == 128)) + { + if (c.b < 128) + { + c.b = 0; + } + else + { + c.b = 255; + } + } + } + } + } } - } - } - } -} - -bool is_normal_map(const image_u8& img, const char* pFilename) { - float score = 0.0f; - - uint num_invalid_pixels = 0; - - // TODO: Derive better score from pixel mean, eigenvecs/vals - //crnlib::vector pixels; - - for (uint y = 0; y < img.get_height(); y++) { - for (uint x = 0; x < img.get_width(); x++) { - const color_quad_u8& c = img(x, y); - - if (c.b < 123) { - num_invalid_pixels++; - continue; - } else if ((c.r != 128) || (c.g != 128) || (c.b != 128)) { - vec3F v(c.r, c.g, c.b); - v -= vec3F(128.0f); - v /= vec3F(127.0f); - //pixels.push_back(v); - v.clamp(-1.0f, 1.0f); - - float norm = v.norm(); - if ((norm < 0.83f) || (norm > 1.29f)) - num_invalid_pixels++; - } - } - } - - score -= math::clamp(float(num_invalid_pixels) / (img.get_width() * img.get_height()) - .026f, 0.0f, 1.0f) * 5.0f; - - if (pFilename) { - dynamic_string str(pFilename); - str.tolower(); - - if (str.contains("normal") || str.contains("local") || str.contains("nmap")) - score += 1.0f; - - if (str.contains("diffuse") || str.contains("spec") || str.contains("gloss")) - score -= 1.0f; - } - - return score >= 0.0f; -} - -bool resample_single_thread(const image_u8& src, image_u8& dst, const resample_params& params) { - const uint src_width = src.get_width(); - const uint src_height = src.get_height(); - - if (math::maximum(src_width, src_height) > CRNLIB_RESAMPLER_MAX_DIMENSION) { - printf("Image is too large!\n"); - return EXIT_FAILURE; - } - - const int cMaxComponents = 4; - if (((int)params.m_num_comps < 1) || ((int)params.m_num_comps > (int)cMaxComponents)) - return false; - - const uint dst_width = params.m_dst_width; - const uint dst_height = params.m_dst_height; - - if ((math::minimum(dst_width, dst_height) < 1) || (math::maximum(dst_width, dst_height) > CRNLIB_RESAMPLER_MAX_DIMENSION)) { - printf("Image is too large!\n"); - return EXIT_FAILURE; - } - - if ((src_width == dst_width) && (src_height == dst_height)) { - dst = src; - return true; - } - - dst.clear(); - dst.resize(params.m_dst_width, params.m_dst_height); - - // Partial gamma correction looks better on mips. Set to 1.0 to disable gamma correction. - const float source_gamma = params.m_source_gamma; //1.75f; - - float srgb_to_linear[256]; - if (params.m_srgb) { - for (int i = 0; i < 256; ++i) - srgb_to_linear[i] = (float)pow(i * 1.0f / 255.0f, source_gamma); - } - - const int linear_to_srgb_table_size = 8192; - unsigned char linear_to_srgb[linear_to_srgb_table_size]; - - const float inv_linear_to_srgb_table_size = 1.0f / linear_to_srgb_table_size; - const float inv_source_gamma = 1.0f / source_gamma; - - if (params.m_srgb) { - for (int i = 0; i < linear_to_srgb_table_size; ++i) { - int k = (int)(255.0f * pow(i * inv_linear_to_srgb_table_size, inv_source_gamma) + .5f); - if (k < 0) - k = 0; - else if (k > 255) - k = 255; - linear_to_srgb[i] = (unsigned char)k; - } - } - - Resampler* resamplers[cMaxComponents]; - crnlib::vector samples[cMaxComponents]; - - resamplers[0] = crnlib_new(src_width, src_height, dst_width, dst_height, - params.m_wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, - params.m_pFilter, (Resampler::Contrib_List*)nullptr, (Resampler::Contrib_List*)nullptr, params.m_filter_scale, params.m_filter_scale); - samples[0].resize(src_width); - - for (uint i = 1; i < params.m_num_comps; i++) { - resamplers[i] = crnlib_new(src_width, src_height, dst_width, dst_height, - params.m_wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, - params.m_pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), params.m_filter_scale, params.m_filter_scale); - samples[i].resize(src_width); - } - - uint dst_y = 0; - - for (uint src_y = 0; src_y < src_height; src_y++) { - const color_quad_u8* pSrc = src.get_scanline(src_y); - - for (uint x = 0; x < src_width; x++) { - for (uint c = 0; c < params.m_num_comps; c++) { - const uint comp_index = params.m_first_comp + c; - const uint8 v = (*pSrc)[comp_index]; - - if (!params.m_srgb || (comp_index == 3)) - samples[c][x] = v * (1.0f / 255.0f); - else - samples[c][x] = srgb_to_linear[v]; - } - - pSrc++; - } - - for (uint c = 0; c < params.m_num_comps; c++) { - if (!resamplers[c]->put_line(&samples[c][0])) { - for (uint i = 0; i < params.m_num_comps; i++) - crnlib_delete(resamplers[i]); - return false; - } - } - - for (;;) { - uint c; - for (c = 0; c < params.m_num_comps; c++) { - const uint comp_index = params.m_first_comp + c; - - const float* pOutput_samples = resamplers[c]->get_line(); - if (!pOutput_samples) - break; - - const bool linear = !params.m_srgb || (comp_index == 3); - CRNLIB_ASSERT(dst_y < dst_height); - color_quad_u8* pDst = dst.get_scanline(dst_y); - - for (uint x = 0; x < dst_width; x++) { - if (linear) { - int c = (int)(255.0f * pOutput_samples[x] + .5f); - if (c < 0) - c = 0; - else if (c > 255) - c = 255; - (*pDst)[comp_index] = (unsigned char)c; - } else { - int j = (int)(linear_to_srgb_table_size * pOutput_samples[x] + .5f); - if (j < 0) - j = 0; - else if (j >= linear_to_srgb_table_size) - j = linear_to_srgb_table_size - 1; - (*pDst)[comp_index] = linear_to_srgb[j]; - } - - pDst++; + + bool is_normal_map(const image_u8& img, const char* pFilename) + { + float score = 0.0f; + + uint num_invalid_pixels = 0; + + // TODO: Derive better score from pixel mean, eigenvecs/vals + //crnlib::vector pixels; + + for (uint y = 0; y < img.get_height(); y++) + { + for (uint x = 0; x < img.get_width(); x++) + { + const color_quad_u8& c = img(x, y); + + if (c.b < 123) + { + num_invalid_pixels++; + continue; + } + else if ((c.r != 128) || (c.g != 128) || (c.b != 128)) + { + vec3F v(c.r, c.g, c.b); + v -= vec3F(128.0f); + v /= vec3F(127.0f); + //pixels.push_back(v); + v.clamp(-1.0f, 1.0f); + + float norm = v.norm(); + if ((norm < 0.83f) || (norm > 1.29f)) + { + num_invalid_pixels++; + } + } + } + } + + score -= math::clamp(float(num_invalid_pixels) / (img.get_width() * img.get_height()) - .026f, 0.0f, 1.0f) * 5.0f; + + if (pFilename) + { + dynamic_string str(pFilename); + str.tolower(); + + if (str.contains("normal") || str.contains("local") || str.contains("nmap")) + { + score += 1.0f; + } + + if (str.contains("diffuse") || str.contains("spec") || str.contains("gloss")) + { + score -= 1.0f; + } + } + + return score >= 0.0f; } - } - if (c < params.m_num_comps) - break; - - dst_y++; - } - } - - for (uint i = 0; i < params.m_num_comps; i++) - crnlib_delete(resamplers[i]); - - return true; -} - -bool resample_multithreaded(const image_u8& src, image_u8& dst, const resample_params& params) { - const uint src_width = src.get_width(); - const uint src_height = src.get_height(); - - if (math::maximum(src_width, src_height) > CRNLIB_RESAMPLER_MAX_DIMENSION) { - printf("Image is too large!\n"); - return EXIT_FAILURE; - } - - const int cMaxComponents = 4; - if (((int)params.m_num_comps < 1) || ((int)params.m_num_comps > (int)cMaxComponents)) - return false; - - const uint dst_width = params.m_dst_width; - const uint dst_height = params.m_dst_height; - - if ((math::minimum(dst_width, dst_height) < 1) || (math::maximum(dst_width, dst_height) > CRNLIB_RESAMPLER_MAX_DIMENSION)) { - printf("Image is too large!\n"); - return EXIT_FAILURE; - } - - if ((src_width == dst_width) && (src_height == dst_height)) { - dst = src; - return true; - } - - dst.clear(); - - // Partial gamma correction looks better on mips. Set to 1.0 to disable gamma correction. - const float source_gamma = params.m_source_gamma; //1.75f; - - float srgb_to_linear[256]; - if (params.m_srgb) { - for (int i = 0; i < 256; ++i) - srgb_to_linear[i] = (float)pow(i * 1.0f / 255.0f, source_gamma); - } - - const int linear_to_srgb_table_size = 8192; - unsigned char linear_to_srgb[linear_to_srgb_table_size]; - - const float inv_linear_to_srgb_table_size = 1.0f / linear_to_srgb_table_size; - const float inv_source_gamma = 1.0f / source_gamma; - - if (params.m_srgb) { - for (int i = 0; i < linear_to_srgb_table_size; ++i) { - int k = (int)(255.0f * pow(i * inv_linear_to_srgb_table_size, inv_source_gamma) + .5f); - if (k < 0) - k = 0; - else if (k > 255) - k = 255; - linear_to_srgb[i] = (unsigned char)k; - } - } - - task_pool tp; - tp.init(g_number_of_processors - 1); - - threaded_resampler resampler(tp); - threaded_resampler::params p; - p.m_src_width = src_width; - p.m_src_height = src_height; - p.m_dst_width = dst_width; - p.m_dst_height = dst_height; - p.m_sample_low = 0.0f; - p.m_sample_high = 1.0f; - p.m_boundary_op = params.m_wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP; - p.m_Pfilter_name = params.m_pFilter; - p.m_filter_x_scale = params.m_filter_scale; - p.m_filter_y_scale = params.m_filter_scale; - - uint resampler_comps = 4; - if (params.m_num_comps == 1) { - p.m_fmt = threaded_resampler::cPF_Y_F32; - resampler_comps = 1; - } else if (params.m_num_comps <= 3) - p.m_fmt = threaded_resampler::cPF_RGBX_F32; - else - p.m_fmt = threaded_resampler::cPF_RGBA_F32; - - crnlib::vector src_samples; - crnlib::vector dst_samples; - - if (!src_samples.try_resize(src_width * src_height * resampler_comps)) - return false; - - if (!dst_samples.try_resize(dst_width * dst_height * resampler_comps)) - return false; - - p.m_pSrc_pixels = src_samples.get_ptr(); - p.m_src_pitch = src_width * resampler_comps * sizeof(float); - p.m_pDst_pixels = dst_samples.get_ptr(); - p.m_dst_pitch = dst_width * resampler_comps * sizeof(float); - - for (uint src_y = 0; src_y < src_height; src_y++) { - const color_quad_u8* pSrc = src.get_scanline(src_y); - float* pDst = src_samples.get_ptr() + src_width * resampler_comps * src_y; - - for (uint x = 0; x < src_width; x++) { - for (uint c = 0; c < params.m_num_comps; c++) { - const uint comp_index = params.m_first_comp + c; - const uint8 v = (*pSrc)[comp_index]; - - if (!params.m_srgb || (comp_index == 3)) - pDst[c] = v * (1.0f / 255.0f); - else - pDst[c] = srgb_to_linear[v]; - } - - pSrc++; - pDst += resampler_comps; - } - } - - if (!resampler.resample(p)) - return false; - - src_samples.clear(); - - if (!dst.resize(params.m_dst_width, params.m_dst_height)) - return false; - - for (uint dst_y = 0; dst_y < dst_height; dst_y++) { - const float* pSrc = dst_samples.get_ptr() + dst_width * resampler_comps * dst_y; - color_quad_u8* pDst = dst.get_scanline(dst_y); - - for (uint x = 0; x < dst_width; x++) { - color_quad_u8 dst(0, 0, 0, 255); - - for (uint c = 0; c < params.m_num_comps; c++) { - const uint comp_index = params.m_first_comp + c; - const float v = pSrc[c]; - - if ((!params.m_srgb) || (comp_index == 3)) { - int c = static_cast(255.0f * v + .5f); - if (c < 0) - c = 0; - else if (c > 255) - c = 255; - dst[comp_index] = (unsigned char)c; - } else { - int j = static_cast(linear_to_srgb_table_size * v + .5f); - if (j < 0) - j = 0; - else if (j >= linear_to_srgb_table_size) - j = linear_to_srgb_table_size - 1; - dst[comp_index] = linear_to_srgb[j]; + + bool resample_single_thread(const image_u8& src, image_u8& dst, const resample_params& params) + { + const uint src_width = src.get_width(); + const uint src_height = src.get_height(); + + if (math::maximum(src_width, src_height) > CRNLIB_RESAMPLER_MAX_DIMENSION) + { + printf("Image is too large!\n"); + return EXIT_FAILURE; + } + + const int cMaxComponents = 4; + if (((int)params.m_num_comps < 1) || ((int)params.m_num_comps > (int)cMaxComponents)) + { + return false; + } + + const uint dst_width = params.m_dst_width; + const uint dst_height = params.m_dst_height; + + if ((math::minimum(dst_width, dst_height) < 1) || (math::maximum(dst_width, dst_height) > CRNLIB_RESAMPLER_MAX_DIMENSION)) + { + printf("Image is too large!\n"); + return EXIT_FAILURE; + } + + if ((src_width == dst_width) && (src_height == dst_height)) + { + dst = src; + return true; + } + + dst.clear(); + dst.resize(params.m_dst_width, params.m_dst_height); + + // Partial gamma correction looks better on mips. Set to 1.0 to disable gamma correction. + const float source_gamma = params.m_source_gamma; //1.75f; + + float srgb_to_linear[256]; + if (params.m_srgb) + { + for (int i = 0; i < 256; ++i) + { + srgb_to_linear[i] = (float)pow(i * 1.0f / 255.0f, source_gamma); + } + } + + const int linear_to_srgb_table_size = 8192; + unsigned char linear_to_srgb[linear_to_srgb_table_size]; + + const float inv_linear_to_srgb_table_size = 1.0f / linear_to_srgb_table_size; + const float inv_source_gamma = 1.0f / source_gamma; + + if (params.m_srgb) + { + for (int i = 0; i < linear_to_srgb_table_size; ++i) + { + int k = (int)(255.0f * pow(i * inv_linear_to_srgb_table_size, inv_source_gamma) + .5f); + if (k < 0) + { + k = 0; + } + else if (k > 255) + { + k = 255; + } + linear_to_srgb[i] = (unsigned char)k; + } + } + + Resampler* resamplers[cMaxComponents]; + crnlib::vector samples[cMaxComponents]; + + resamplers[0] = crnlib_new(src_width, src_height, dst_width, dst_height, + params.m_wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, + params.m_pFilter, (Resampler::Contrib_List*)nullptr, (Resampler::Contrib_List*)nullptr, params.m_filter_scale, params.m_filter_scale); + samples[0].resize(src_width); + + for (uint i = 1; i < params.m_num_comps; i++) + { + resamplers[i] = crnlib_new(src_width, src_height, dst_width, dst_height, + params.m_wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, + params.m_pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), params.m_filter_scale, params.m_filter_scale); + samples[i].resize(src_width); + } + + uint dst_y = 0; + + for (uint src_y = 0; src_y < src_height; src_y++) + { + const color_quad_u8* pSrc = src.get_scanline(src_y); + + for (uint x = 0; x < src_width; x++) + { + for (uint c = 0; c < params.m_num_comps; c++) + { + const uint comp_index = params.m_first_comp + c; + const uint8 v = (*pSrc)[comp_index]; + + if (!params.m_srgb || (comp_index == 3)) + { + samples[c][x] = v * (1.0f / 255.0f); + } + else + { + samples[c][x] = srgb_to_linear[v]; + } + } + + pSrc++; + } + + for (uint c = 0; c < params.m_num_comps; c++) + { + if (!resamplers[c]->put_line(&samples[c][0])) + { + for (uint i = 0; i < params.m_num_comps; i++) + { + crnlib_delete(resamplers[i]); + } + return false; + } + } + + for (;;) + { + uint c; + for (c = 0; c < params.m_num_comps; c++) + { + const uint comp_index = params.m_first_comp + c; + + const float* pOutput_samples = resamplers[c]->get_line(); + if (!pOutput_samples) + { + break; + } + + const bool linear = !params.m_srgb || (comp_index == 3); + CRNLIB_ASSERT(dst_y < dst_height); + color_quad_u8* pDst = dst.get_scanline(dst_y); + + for (uint x = 0; x < dst_width; x++) + { + if (linear) + { + int c = (int)(255.0f * pOutput_samples[x] + .5f); + if (c < 0) + { + c = 0; + } + else if (c > 255) + { + c = 255; + } + (*pDst)[comp_index] = (unsigned char)c; + } + else + { + int j = (int)(linear_to_srgb_table_size * pOutput_samples[x] + .5f); + if (j < 0) + { + j = 0; + } + else if (j >= linear_to_srgb_table_size) + { + j = linear_to_srgb_table_size - 1; + } + (*pDst)[comp_index] = linear_to_srgb[j]; + } + + pDst++; + } + } + if (c < params.m_num_comps) + { + break; + } + + dst_y++; + } + } + + for (uint i = 0; i < params.m_num_comps; i++) + { + crnlib_delete(resamplers[i]); + } + + return true; } - } - - *pDst++ = dst; - - pSrc += resampler_comps; - } - } - - return true; -} - -bool resample(const image_u8& src, image_u8& dst, const resample_params& params) { - if ((params.m_multithreaded) && (g_number_of_processors > 1)) - return resample_multithreaded(src, dst, params); - else - return resample_single_thread(src, dst, params); -} - -bool compute_delta(image_u8& dest, image_u8& a, image_u8& b, uint scale) { - if ((a.get_width() != b.get_width()) || (a.get_height() != b.get_height())) - return false; - - dest.resize(a.get_width(), b.get_height()); - - for (uint y = 0; y < a.get_height(); y++) { - for (uint x = 0; x < a.get_width(); x++) { - const color_quad_u8& ca = a(x, y); - const color_quad_u8& cb = b(x, y); - - color_quad_u8 cd; - for (uint c = 0; c < 4; c++) { - int d = (ca[c] - cb[c]) * scale + 128; - d = math::clamp(d, 0, 255); - cd[c] = static_cast(d); - } - - dest(x, y) = cd; - } - } - - return true; -} - -// FIXME: Totally hack-ass computation. -// Perhaps port http://www.lomont.org/Software/Misc/SSIM/SSIM.html? -double compute_block_ssim(uint t, const uint8* pX, const uint8* pY) { - double ave_x = 0.0f; - double ave_y = 0.0f; - for (uint i = 0; i < t; i++) { - ave_x += pX[i]; - ave_y += pY[i]; - } - - ave_x /= t; - ave_y /= t; - - double var_x = 0.0f; - double var_y = 0.0f; - for (uint i = 0; i < t; i++) { - var_x += math::square(pX[i] - ave_x); - var_y += math::square(pY[i] - ave_y); - } - - var_x = sqrt(var_x / (t - 1)); - var_y = sqrt(var_y / (t - 1)); - - double covar_xy = 0.0f; - for (uint i = 0; i < t; i++) - covar_xy += (pX[i] - ave_x) * (pY[i] - ave_y); - - covar_xy /= (t - 1); - - const double c1 = 6.5025; //(255*.01)^2 - const double c2 = 58.5225; //(255*.03)^2 - - double n = (2.0f * ave_x * ave_y + c1) * (2.0f * covar_xy + c2); - double d = (ave_x * ave_x + ave_y * ave_y + c1) * (var_x * var_x + var_y * var_y + c2); - - return n / d; -} - -double compute_ssim(const image_u8& a, const image_u8& b, int channel_index) { - const uint N = 6; - uint8 sx[N * N], sy[N * N]; - - double total_ssim = 0.0f; - uint total_blocks = 0; - - //image_u8 yimg((a.get_width() + N - 1) / N, (a.get_height() + N - 1) / N); - - for (uint y = 0; y < a.get_height(); y += N) { - for (uint x = 0; x < a.get_width(); x += N) { - for (uint iy = 0; iy < N; iy++) { - for (uint ix = 0; ix < N; ix++) { - if (channel_index < 0) - sx[ix + iy * N] = (uint8)a.get_clamped(x + ix, y + iy).get_luma(); - else - sx[ix + iy * N] = (uint8)a.get_clamped(x + ix, y + iy)[channel_index]; - - if (channel_index < 0) - sy[ix + iy * N] = (uint8)b.get_clamped(x + ix, y + iy).get_luma(); - else - sy[ix + iy * N] = (uint8)b.get_clamped(x + ix, y + iy)[channel_index]; + + bool resample_multithreaded(const image_u8& src, image_u8& dst, const resample_params& params) + { + const uint src_width = src.get_width(); + const uint src_height = src.get_height(); + + if (math::maximum(src_width, src_height) > CRNLIB_RESAMPLER_MAX_DIMENSION) + { + printf("Image is too large!\n"); + return EXIT_FAILURE; + } + + const int cMaxComponents = 4; + if (((int)params.m_num_comps < 1) || ((int)params.m_num_comps > (int)cMaxComponents)) + { + return false; + } + + const uint dst_width = params.m_dst_width; + const uint dst_height = params.m_dst_height; + + if ((math::minimum(dst_width, dst_height) < 1) || (math::maximum(dst_width, dst_height) > CRNLIB_RESAMPLER_MAX_DIMENSION)) + { + printf("Image is too large!\n"); + return EXIT_FAILURE; + } + + if ((src_width == dst_width) && (src_height == dst_height)) + { + dst = src; + return true; + } + + dst.clear(); + + // Partial gamma correction looks better on mips. Set to 1.0 to disable gamma correction. + const float source_gamma = params.m_source_gamma; //1.75f; + + float srgb_to_linear[256]; + if (params.m_srgb) + { + for (int i = 0; i < 256; ++i) + { + srgb_to_linear[i] = (float)pow(i * 1.0f / 255.0f, source_gamma); + } + } + + const int linear_to_srgb_table_size = 8192; + unsigned char linear_to_srgb[linear_to_srgb_table_size]; + + const float inv_linear_to_srgb_table_size = 1.0f / linear_to_srgb_table_size; + const float inv_source_gamma = 1.0f / source_gamma; + + if (params.m_srgb) + { + for (int i = 0; i < linear_to_srgb_table_size; ++i) + { + int k = (int)(255.0f * pow(i * inv_linear_to_srgb_table_size, inv_source_gamma) + .5f); + if (k < 0) + { + k = 0; + } + else if (k > 255) + { + k = 255; + } + linear_to_srgb[i] = (unsigned char)k; + } + } + + task_pool tp; + tp.init(g_number_of_processors - 1); + + threaded_resampler resampler(tp); + threaded_resampler::params p; + p.m_src_width = src_width; + p.m_src_height = src_height; + p.m_dst_width = dst_width; + p.m_dst_height = dst_height; + p.m_sample_low = 0.0f; + p.m_sample_high = 1.0f; + p.m_boundary_op = params.m_wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP; + p.m_Pfilter_name = params.m_pFilter; + p.m_filter_x_scale = params.m_filter_scale; + p.m_filter_y_scale = params.m_filter_scale; + + uint resampler_comps = 4; + if (params.m_num_comps == 1) + { + p.m_fmt = threaded_resampler::cPF_Y_F32; + resampler_comps = 1; + } + else if (params.m_num_comps <= 3) + { + p.m_fmt = threaded_resampler::cPF_RGBX_F32; + } + else + { + p.m_fmt = threaded_resampler::cPF_RGBA_F32; + } + + crnlib::vector src_samples; + crnlib::vector dst_samples; + + if (!src_samples.try_resize(src_width * src_height * resampler_comps)) + { + return false; + } + + if (!dst_samples.try_resize(dst_width * dst_height * resampler_comps)) + { + return false; + } + + p.m_pSrc_pixels = src_samples.get_ptr(); + p.m_src_pitch = src_width * resampler_comps * sizeof(float); + p.m_pDst_pixels = dst_samples.get_ptr(); + p.m_dst_pitch = dst_width * resampler_comps * sizeof(float); + + for (uint src_y = 0; src_y < src_height; src_y++) + { + const color_quad_u8* pSrc = src.get_scanline(src_y); + float* pDst = src_samples.get_ptr() + src_width * resampler_comps * src_y; + + for (uint x = 0; x < src_width; x++) + { + for (uint c = 0; c < params.m_num_comps; c++) + { + const uint comp_index = params.m_first_comp + c; + const uint8 v = (*pSrc)[comp_index]; + + if (!params.m_srgb || (comp_index == 3)) + { + pDst[c] = v * (1.0f / 255.0f); + } + else + { + pDst[c] = srgb_to_linear[v]; + } + } + + pSrc++; + pDst += resampler_comps; + } + } + + if (!resampler.resample(p)) + { + return false; + } + + src_samples.clear(); + + if (!dst.resize(params.m_dst_width, params.m_dst_height)) + { + return false; + } + + for (uint dst_y = 0; dst_y < dst_height; dst_y++) + { + const float* pSrc = dst_samples.get_ptr() + dst_width * resampler_comps * dst_y; + color_quad_u8* pDst = dst.get_scanline(dst_y); + + for (uint x = 0; x < dst_width; x++) + { + color_quad_u8 dst(0, 0, 0, 255); + + for (uint c = 0; c < params.m_num_comps; c++) + { + const uint comp_index = params.m_first_comp + c; + const float v = pSrc[c]; + + if ((!params.m_srgb) || (comp_index == 3)) + { + int c = static_cast(255.0f * v + .5f); + if (c < 0) + { + c = 0; + } + else if (c > 255) + { + c = 255; + } + dst[comp_index] = (unsigned char)c; + } + else + { + int j = static_cast(linear_to_srgb_table_size * v + .5f); + if (j < 0) + { + j = 0; + } + else if (j >= linear_to_srgb_table_size) + { + j = linear_to_srgb_table_size - 1; + } + dst[comp_index] = linear_to_srgb[j]; + } + } + + *pDst++ = dst; + + pSrc += resampler_comps; + } + } + + return true; } - } - double ssim = compute_block_ssim(N * N, sx, sy); - total_ssim += ssim; - total_blocks++; + bool resample(const image_u8& src, image_u8& dst, const resample_params& params) + { + if ((params.m_multithreaded) && (g_number_of_processors > 1)) + { + return resample_multithreaded(src, dst, params); + } + else + { + return resample_single_thread(src, dst, params); + } + } - //uint ssim_c = (uint)math::clamp(ssim * 127.0f + 128.0f, 0, 255); - //yimg(x / N, y / N).set(ssim_c, ssim_c, ssim_c, 255); - } - } + bool compute_delta(image_u8& dest, image_u8& a, image_u8& b, uint scale) + { + if ((a.get_width() != b.get_width()) || (a.get_height() != b.get_height())) + { + return false; + } + + dest.resize(a.get_width(), b.get_height()); + + for (uint y = 0; y < a.get_height(); y++) + { + for (uint x = 0; x < a.get_width(); x++) + { + const color_quad_u8& ca = a(x, y); + const color_quad_u8& cb = b(x, y); + + color_quad_u8 cd; + for (uint c = 0; c < 4; c++) + { + int d = (ca[c] - cb[c]) * scale + 128; + d = math::clamp(d, 0, 255); + cd[c] = static_cast(d); + } + + dest(x, y) = cd; + } + } + + return true; + } - if (!total_blocks) - return 0.0f; + // FIXME: Totally hack-ass computation. + // Perhaps port http://www.lomont.org/Software/Misc/SSIM/SSIM.html? + double compute_block_ssim(uint t, const uint8* pX, const uint8* pY) + { + double ave_x = 0.0f; + double ave_y = 0.0f; + for (uint i = 0; i < t; i++) + { + ave_x += pX[i]; + ave_y += pY[i]; + } + + ave_x /= t; + ave_y /= t; + + double var_x = 0.0f; + double var_y = 0.0f; + for (uint i = 0; i < t; i++) + { + var_x += math::square(pX[i] - ave_x); + var_y += math::square(pY[i] - ave_y); + } + + var_x = sqrt(var_x / (t - 1)); + var_y = sqrt(var_y / (t - 1)); + + double covar_xy = 0.0f; + for (uint i = 0; i < t; i++) + { + covar_xy += (pX[i] - ave_x) * (pY[i] - ave_y); + } + + covar_xy /= (t - 1); + + const double c1 = 6.5025; //(255*.01)^2 + const double c2 = 58.5225; //(255*.03)^2 + + double n = (2.0f * ave_x * ave_y + c1) * (2.0f * covar_xy + c2); + double d = (ave_x * ave_x + ave_y * ave_y + c1) * (var_x * var_x + var_y * var_y + c2); + + return n / d; + } - //save_to_file_stb_or_miniz("ssim.tga", yimg, cWriteFlagGrayscale); + double compute_ssim(const image_u8& a, const image_u8& b, int channel_index) + { + const uint N = 6; + uint8 sx[N * N], sy[N * N]; + + double total_ssim = 0.0f; + uint total_blocks = 0; + + //image_u8 yimg((a.get_width() + N - 1) / N, (a.get_height() + N - 1) / N); + + for (uint y = 0; y < a.get_height(); y += N) + { + for (uint x = 0; x < a.get_width(); x += N) + { + for (uint iy = 0; iy < N; iy++) + { + for (uint ix = 0; ix < N; ix++) + { + if (channel_index < 0) + { + sx[ix + iy * N] = (uint8)a.get_clamped(x + ix, y + iy).get_luma(); + } + else + { + sx[ix + iy * N] = (uint8)a.get_clamped(x + ix, y + iy)[channel_index]; + } + + if (channel_index < 0) + { + sy[ix + iy * N] = (uint8)b.get_clamped(x + ix, y + iy).get_luma(); + } + else + { + sy[ix + iy * N] = (uint8)b.get_clamped(x + ix, y + iy)[channel_index]; + } + } + } + + double ssim = compute_block_ssim(N * N, sx, sy); + total_ssim += ssim; + total_blocks++; + + //uint ssim_c = (uint)math::clamp(ssim * 127.0f + 128.0f, 0, 255); + //yimg(x / N, y / N).set(ssim_c, ssim_c, ssim_c, 255); + } + } + + if (!total_blocks) + { + return 0.0f; + } + + //save_to_file_stb_or_miniz("ssim.tga", yimg, cWriteFlagGrayscale); + + return total_ssim / total_blocks; + } - return total_ssim / total_blocks; -} + void print_ssim(const image_u8& src_img, const image_u8& dst_img) + { + (void)src_img; + (void)dst_img; + //double y_ssim = compute_ssim(src_img, dst_img, -1); + //console::printf("Luma MSSIM: %f, Scaled: %f", y_ssim, (y_ssim - .8f) / .2f); -void print_ssim(const image_u8& src_img, const image_u8& dst_img) { - (void)src_img; - (void)dst_img; - //double y_ssim = compute_ssim(src_img, dst_img, -1); - //console::printf("Luma MSSIM: %f, Scaled: %f", y_ssim, (y_ssim - .8f) / .2f); + //double r_ssim = compute_ssim(src_img, dst_img, 0); + //console::printf(" R MSSIM: %f", r_ssim); - //double r_ssim = compute_ssim(src_img, dst_img, 0); - //console::printf(" R MSSIM: %f", r_ssim); + //double g_ssim = compute_ssim(src_img, dst_img, 1); + //console::printf(" G MSSIM: %f", g_ssim); - //double g_ssim = compute_ssim(src_img, dst_img, 1); - //console::printf(" G MSSIM: %f", g_ssim); + //double b_ssim = compute_ssim(src_img, dst_img, 2); + //console::printf(" B MSSIM: %f", b_ssim); + } - //double b_ssim = compute_ssim(src_img, dst_img, 2); - //console::printf(" B MSSIM: %f", b_ssim); -} + void error_metrics::print(const char* pName) const + { + if (mPeakSNR >= cInfinitePSNR) + { + console::printf("%s Error: Max: %3u, Mean: %3.3f, MSE: %3.3f, RMSE: %3.3f, PSNR: Infinite", pName, mMax, mMean, mMeanSquared, mRootMeanSquared); + } + else + { + console::printf("%s Error: Max: %3u, Mean: %3.3f, MSE: %3.3f, RMSE: %3.3f, PSNR: %3.3f", pName, mMax, mMean, mMeanSquared, mRootMeanSquared, mPeakSNR); + } + } -void error_metrics::print(const char* pName) const { - if (mPeakSNR >= cInfinitePSNR) - console::printf("%s Error: Max: %3u, Mean: %3.3f, MSE: %3.3f, RMSE: %3.3f, PSNR: Infinite", pName, mMax, mMean, mMeanSquared, mRootMeanSquared); - else - console::printf("%s Error: Max: %3u, Mean: %3.3f, MSE: %3.3f, RMSE: %3.3f, PSNR: %3.3f", pName, mMax, mMean, mMeanSquared, mRootMeanSquared, mPeakSNR); -} + bool error_metrics::compute(const image_u8& a, const image_u8& b, uint first_channel, uint num_channels, bool average_component_error) + { + //if ( (!a.get_width()) || (!b.get_height()) || (a.get_width() != b.get_width()) || (a.get_height() != b.get_height()) ) + // return false; + + const uint width = math::minimum(a.get_width(), b.get_width()); + const uint height = math::minimum(a.get_height(), b.get_height()); + + CRNLIB_ASSERT((first_channel < 4U) && (first_channel + num_channels <= 4U)); + + // Histogram approach due to Charles Bloom. + double hist[256]; + utils::zero_object(hist); + + for (uint y = 0; y < height; y++) + { + for (uint x = 0; x < width; x++) + { + const color_quad_u8& ca = a(x, y); + const color_quad_u8& cb = b(x, y); + + if (!num_channels) + { + hist[labs(ca.get_luma() - cb.get_luma())]++; + } + else + { + for (uint c = 0; c < num_channels; c++) + { + hist[labs(ca[first_channel + c] - cb[first_channel + c])]++; + } + } + } + } + + mMax = 0; + double sum = 0.0f, sum2 = 0.0f; + for (uint i = 0; i < 256; i++) + { + if (!hist[i]) + { + continue; + } + + mMax = math::maximum(mMax, i); + + double x = i * hist[i]; + + sum += x; + sum2 += i * x; + } + + // See http://bmrc.berkeley.edu/courseware/cs294/fall97/assignment/psnr.html + double total_values = width * height; + + if (average_component_error) + { + total_values *= math::clamp(num_channels, 1, 4); + } + + mMean = math::clamp(sum / total_values, 0.0f, 255.0f); + mMeanSquared = math::clamp(sum2 / total_values, 0.0f, 255.0f * 255.0f); + + mRootMeanSquared = sqrt(mMeanSquared); + + if (!mRootMeanSquared) + { + mPeakSNR = cInfinitePSNR; + } + else + { + mPeakSNR = math::clamp(log10(255.0f / mRootMeanSquared) * 20.0f, 0.0f, 500.0f); + } + + return true; + } -bool error_metrics::compute(const image_u8& a, const image_u8& b, uint first_channel, uint num_channels, bool average_component_error) { - //if ( (!a.get_width()) || (!b.get_height()) || (a.get_width() != b.get_width()) || (a.get_height() != b.get_height()) ) - // return false; + void print_image_metrics(const image_u8& src_img, const image_u8& dst_img) + { + if ((!src_img.get_width()) || (!dst_img.get_height()) || (src_img.get_width() != dst_img.get_width()) || (src_img.get_height() != dst_img.get_height())) + { + console::printf("print_image_metrics: Image resolutions don't match exactly (%ux%u) vs. (%ux%u)", src_img.get_width(), src_img.get_height(), dst_img.get_width(), dst_img.get_height()); + } - const uint width = math::minimum(a.get_width(), b.get_width()); - const uint height = math::minimum(a.get_height(), b.get_height()); + image_utils::error_metrics error_metrics; - CRNLIB_ASSERT((first_channel < 4U) && (first_channel + num_channels <= 4U)); + if (src_img.has_rgb() || dst_img.has_rgb()) + { + error_metrics.compute(src_img, dst_img, 0, 3, false); + error_metrics.print("RGB Total "); - // Histogram approach due to Charles Bloom. - double hist[256]; - utils::zero_object(hist); + error_metrics.compute(src_img, dst_img, 0, 3, true); + error_metrics.print("RGB Average"); - for (uint y = 0; y < height; y++) { - for (uint x = 0; x < width; x++) { - const color_quad_u8& ca = a(x, y); - const color_quad_u8& cb = b(x, y); + error_metrics.compute(src_img, dst_img, 0, 0); + error_metrics.print("Luma "); - if (!num_channels) - hist[labs(ca.get_luma() - cb.get_luma())]++; - else { - for (uint c = 0; c < num_channels; c++) - hist[labs(ca[first_channel + c] - cb[first_channel + c])]++; - } - } - } - - mMax = 0; - double sum = 0.0f, sum2 = 0.0f; - for (uint i = 0; i < 256; i++) { - if (!hist[i]) - continue; - - mMax = math::maximum(mMax, i); - - double x = i * hist[i]; - - sum += x; - sum2 += i * x; - } - - // See http://bmrc.berkeley.edu/courseware/cs294/fall97/assignment/psnr.html - double total_values = width * height; - - if (average_component_error) - total_values *= math::clamp(num_channels, 1, 4); - - mMean = math::clamp(sum / total_values, 0.0f, 255.0f); - mMeanSquared = math::clamp(sum2 / total_values, 0.0f, 255.0f * 255.0f); - - mRootMeanSquared = sqrt(mMeanSquared); - - if (!mRootMeanSquared) - mPeakSNR = cInfinitePSNR; - else - mPeakSNR = math::clamp(log10(255.0f / mRootMeanSquared) * 20.0f, 0.0f, 500.0f); - - return true; -} - -void print_image_metrics(const image_u8& src_img, const image_u8& dst_img) { - if ((!src_img.get_width()) || (!dst_img.get_height()) || (src_img.get_width() != dst_img.get_width()) || (src_img.get_height() != dst_img.get_height())) - console::printf("print_image_metrics: Image resolutions don't match exactly (%ux%u) vs. (%ux%u)", src_img.get_width(), src_img.get_height(), dst_img.get_width(), dst_img.get_height()); - - image_utils::error_metrics error_metrics; - - if (src_img.has_rgb() || dst_img.has_rgb()) { - error_metrics.compute(src_img, dst_img, 0, 3, false); - error_metrics.print("RGB Total "); - - error_metrics.compute(src_img, dst_img, 0, 3, true); - error_metrics.print("RGB Average"); - - error_metrics.compute(src_img, dst_img, 0, 0); - error_metrics.print("Luma "); - - error_metrics.compute(src_img, dst_img, 0, 1); - error_metrics.print("Red "); - - error_metrics.compute(src_img, dst_img, 1, 1); - error_metrics.print("Green "); - - error_metrics.compute(src_img, dst_img, 2, 1); - error_metrics.print("Blue "); - } - - if (src_img.has_alpha() || dst_img.has_alpha()) { - error_metrics.compute(src_img, dst_img, 3, 1); - error_metrics.print("Alpha "); - } -} - -static uint8 regen_z(uint x, uint y) { - float vx = math::clamp((x - 128.0f) * 1.0f / 127.0f, -1.0f, 1.0f); - float vy = math::clamp((y - 128.0f) * 1.0f / 127.0f, -1.0f, 1.0f); - float vz = sqrt(math::clamp(1.0f - vx * vx - vy * vy, 0.0f, 1.0f)); - - vz = vz * 127.0f + 128.0f; - - if (vz < 128.0f) - vz -= .5f; - else - vz += .5f; - - int ib = math::float_to_int(vz); - - return static_cast(math::clamp(ib, 0, 255)); -} - -void convert_image(image_u8& img, image_utils::conversion_type conv_type) { - switch (conv_type) { - case image_utils::cConversion_To_CCxY: { - img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagLumaChroma)); - break; - } - case image_utils::cConversion_From_CCxY: { - img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid)); - break; - } - case image_utils::cConversion_To_xGxR: { - img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagNormalMap)); - break; - } - case image_utils::cConversion_From_xGxR: { - img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagNormalMap)); - break; - } - case image_utils::cConversion_To_xGBR: { - img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagNormalMap)); - break; - } - case image_utils::cConversion_To_AGBR: { - img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagNormalMap)); - break; - } - case image_utils::cConversion_From_xGBR: { - img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagNormalMap)); - break; - } - case image_utils::cConversion_From_AGBR: { - img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagNormalMap)); - break; - } - case image_utils::cConversion_XY_to_XYZ: { - img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagNormalMap)); - break; - } - case cConversion_Y_To_A: { - img.set_comp_flags(static_cast(img.get_comp_flags() | pixel_format_helpers::cCompFlagAValid)); - break; - } - case cConversion_A_To_RGBA: { - img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagAValid)); - break; - } - case cConversion_Y_To_RGB: { - img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagGrayscale | (img.has_alpha() ? pixel_format_helpers::cCompFlagAValid : 0))); - break; - } - case cConversion_To_Y: { - img.set_comp_flags(static_cast(img.get_comp_flags() | pixel_format_helpers::cCompFlagGrayscale)); - break; - } - default: { - CRNLIB_ASSERT(false); - return; - } - } - - for (uint y = 0; y < img.get_height(); y++) { - for (uint x = 0; x < img.get_width(); x++) { - color_quad_u8 src(img(x, y)); - color_quad_u8 dst; - - switch (conv_type) { - case image_utils::cConversion_To_CCxY: { - color::RGB_to_YCC(dst, src); - break; - } - case image_utils::cConversion_From_CCxY: { - color::YCC_to_RGB(dst, src); - break; - } - case image_utils::cConversion_To_xGxR: { - dst.r = 0; - dst.g = src.g; - dst.b = 0; - dst.a = src.r; - break; - } - case image_utils::cConversion_From_xGxR: { - dst.r = src.a; - dst.g = src.g; - // This is kinda iffy, we're assuming the image is a normal map here. - dst.b = regen_z(src.a, src.g); - dst.a = 255; - break; - } - case image_utils::cConversion_To_xGBR: { - dst.r = 0; - dst.g = src.g; - dst.b = src.b; - dst.a = src.r; - break; - } - case image_utils::cConversion_To_AGBR: { - dst.r = src.a; - dst.g = src.g; - dst.b = src.b; - dst.a = src.r; - break; - } - case image_utils::cConversion_From_xGBR: { - dst.r = src.a; - dst.g = src.g; - dst.b = src.b; - dst.a = 255; - break; - } - case image_utils::cConversion_From_AGBR: { - dst.r = src.a; - dst.g = src.g; - dst.b = src.b; - dst.a = src.r; - break; + error_metrics.compute(src_img, dst_img, 0, 1); + error_metrics.print("Red "); + + error_metrics.compute(src_img, dst_img, 1, 1); + error_metrics.print("Green "); + + error_metrics.compute(src_img, dst_img, 2, 1); + error_metrics.print("Blue "); + } + + if (src_img.has_alpha() || dst_img.has_alpha()) + { + error_metrics.compute(src_img, dst_img, 3, 1); + error_metrics.print("Alpha "); + } } - case image_utils::cConversion_XY_to_XYZ: { - dst.r = src.r; - dst.g = src.g; - // This is kinda iffy, we're assuming the image is a normal map here. - dst.b = regen_z(src.r, src.g); - dst.a = 255; - break; + + static uint8 regen_z(uint x, uint y) + { + float vx = math::clamp((x - 128.0f) * 1.0f / 127.0f, -1.0f, 1.0f); + float vy = math::clamp((y - 128.0f) * 1.0f / 127.0f, -1.0f, 1.0f); + float vz = sqrt(math::clamp(1.0f - vx * vx - vy * vy, 0.0f, 1.0f)); + + vz = vz * 127.0f + 128.0f; + + if (vz < 128.0f) + { + vz -= .5f; + } + else + { + vz += .5f; + } + + int ib = math::float_to_int(vz); + + return static_cast(math::clamp(ib, 0, 255)); } - case image_utils::cConversion_Y_To_A: { - dst.r = src.r; - dst.g = src.g; - dst.b = src.b; - dst.a = static_cast(src.get_luma()); - break; + + void convert_image(image_u8& img, image_utils::conversion_type conv_type) + { + switch (conv_type) + { + case image_utils::cConversion_To_CCxY: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagLumaChroma)); + break; + } + case image_utils::cConversion_From_CCxY: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid)); + break; + } + case image_utils::cConversion_To_xGxR: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagNormalMap)); + break; + } + case image_utils::cConversion_From_xGxR: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagNormalMap)); + break; + } + case image_utils::cConversion_To_xGBR: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagNormalMap)); + break; + } + case image_utils::cConversion_To_AGBR: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagNormalMap)); + break; + } + case image_utils::cConversion_From_xGBR: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagNormalMap)); + break; + } + case image_utils::cConversion_From_AGBR: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagNormalMap)); + break; + } + case image_utils::cConversion_XY_to_XYZ: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagNormalMap)); + break; + } + case cConversion_Y_To_A: + { + img.set_comp_flags(static_cast(img.get_comp_flags() | pixel_format_helpers::cCompFlagAValid)); + break; + } + case cConversion_A_To_RGBA: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagAValid)); + break; + } + case cConversion_Y_To_RGB: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagGrayscale | (img.has_alpha() ? pixel_format_helpers::cCompFlagAValid : 0))); + break; + } + case cConversion_To_Y: + { + img.set_comp_flags(static_cast(img.get_comp_flags() | pixel_format_helpers::cCompFlagGrayscale)); + break; + } + default: + { + CRNLIB_ASSERT(false); + return; + } + } + + for (uint y = 0; y < img.get_height(); y++) + { + for (uint x = 0; x < img.get_width(); x++) + { + color_quad_u8 src(img(x, y)); + color_quad_u8 dst; + + switch (conv_type) + { + case image_utils::cConversion_To_CCxY: + { + color::RGB_to_YCC(dst, src); + break; + } + case image_utils::cConversion_From_CCxY: + { + color::YCC_to_RGB(dst, src); + break; + } + case image_utils::cConversion_To_xGxR: + { + dst.r = 0; + dst.g = src.g; + dst.b = 0; + dst.a = src.r; + break; + } + case image_utils::cConversion_From_xGxR: + { + dst.r = src.a; + dst.g = src.g; + // This is kinda iffy, we're assuming the image is a normal map here. + dst.b = regen_z(src.a, src.g); + dst.a = 255; + break; + } + case image_utils::cConversion_To_xGBR: + { + dst.r = 0; + dst.g = src.g; + dst.b = src.b; + dst.a = src.r; + break; + } + case image_utils::cConversion_To_AGBR: + { + dst.r = src.a; + dst.g = src.g; + dst.b = src.b; + dst.a = src.r; + break; + } + case image_utils::cConversion_From_xGBR: + { + dst.r = src.a; + dst.g = src.g; + dst.b = src.b; + dst.a = 255; + break; + } + case image_utils::cConversion_From_AGBR: + { + dst.r = src.a; + dst.g = src.g; + dst.b = src.b; + dst.a = src.r; + break; + } + case image_utils::cConversion_XY_to_XYZ: + { + dst.r = src.r; + dst.g = src.g; + // This is kinda iffy, we're assuming the image is a normal map here. + dst.b = regen_z(src.r, src.g); + dst.a = 255; + break; + } + case image_utils::cConversion_Y_To_A: + { + dst.r = src.r; + dst.g = src.g; + dst.b = src.b; + dst.a = static_cast(src.get_luma()); + break; + } + case image_utils::cConversion_Y_To_RGB: + { + uint8 y = static_cast(src.get_luma()); + dst.r = y; + dst.g = y; + dst.b = y; + dst.a = src.a; + break; + } + case image_utils::cConversion_A_To_RGBA: + { + dst.r = src.a; + dst.g = src.a; + dst.b = src.a; + dst.a = src.a; + break; + } + case image_utils::cConversion_To_Y: + { + uint8 y = static_cast(src.get_luma()); + dst.r = y; + dst.g = y; + dst.b = y; + dst.a = src.a; + break; + } + default: + { + CRNLIB_ASSERT(false); + dst = src; + break; + } + } + + img(x, y) = dst; + } + } } - case image_utils::cConversion_Y_To_RGB: { - uint8 y = static_cast(src.get_luma()); - dst.r = y; - dst.g = y; - dst.b = y; - dst.a = src.a; - break; + + image_utils::conversion_type get_conversion_type(bool cooking, pixel_format fmt) + { + image_utils::conversion_type conv_type = image_utils::cConversion_Invalid; + + if (cooking) + { + switch (fmt) + { + case PIXEL_FMT_DXT5_CCxY: + { + conv_type = image_utils::cConversion_To_CCxY; + break; + } + case PIXEL_FMT_DXT5_xGxR: + { + conv_type = image_utils::cConversion_To_xGxR; + break; + } + case PIXEL_FMT_DXT5_xGBR: + { + conv_type = image_utils::cConversion_To_xGBR; + break; + } + case PIXEL_FMT_DXT5_AGBR: + { + conv_type = image_utils::cConversion_To_AGBR; + break; + } + default: + break; + } + } + else + { + switch (fmt) + { + case PIXEL_FMT_3DC: + case PIXEL_FMT_DXN: + { + conv_type = image_utils::cConversion_XY_to_XYZ; + break; + } + case PIXEL_FMT_DXT5_CCxY: + { + conv_type = image_utils::cConversion_From_CCxY; + break; + } + case PIXEL_FMT_DXT5_xGxR: + { + conv_type = image_utils::cConversion_From_xGxR; + break; + } + case PIXEL_FMT_DXT5_xGBR: + { + conv_type = image_utils::cConversion_From_xGBR; + break; + } + case PIXEL_FMT_DXT5_AGBR: + { + conv_type = image_utils::cConversion_From_AGBR; + break; + } + default: + break; + } + } + + return conv_type; } - case image_utils::cConversion_A_To_RGBA: { - dst.r = src.a; - dst.g = src.a; - dst.b = src.a; - dst.a = src.a; - break; + + image_utils::conversion_type get_image_conversion_type_from_crn_format(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXT5_CCxY: + return image_utils::cConversion_To_CCxY; + case cCRNFmtDXT5_xGxR: + return image_utils::cConversion_To_xGxR; + case cCRNFmtDXT5_xGBR: + return image_utils::cConversion_To_xGBR; + case cCRNFmtDXT5_AGBR: + return image_utils::cConversion_To_AGBR; + default: + break; + } + return image_utils::cConversion_Invalid; } - case image_utils::cConversion_To_Y: { - uint8 y = static_cast(src.get_luma()); - dst.r = y; - dst.g = y; - dst.b = y; - dst.a = src.a; - break; + + double compute_std_dev(uint n, const color_quad_u8* pPixels, uint first_channel, uint num_channels) + { + if (!n) + { + return 0.0f; + } + + double sum = 0.0f; + double sum2 = 0.0f; + + for (uint i = 0; i < n; i++) + { + const color_quad_u8& cp = pPixels[i]; + + if (!num_channels) + { + uint l = cp.get_luma(); + sum += l; + sum2 += l * l; + } + else + { + for (uint c = 0; c < num_channels; c++) + { + uint l = cp[first_channel + c]; + sum += l; + sum2 += l * l; + } + } + } + + double w = math::maximum(1U, num_channels) * n; + sum /= w; + sum2 /= w; + + double var = sum2 - sum * sum; + var = math::maximum(var, 0.0f); + + return sqrt(var); } - default: { - CRNLIB_ASSERT(false); - dst = src; - break; + + uint8* read_image_from_memory(const uint8* pImage, int nSize, int* pWidth, int* pHeight, int* pActualComps, int req_comps, const char* pFilename) + { + *pWidth = 0; + *pHeight = 0; + *pActualComps = 0; + + if ((req_comps < 1) || (req_comps > 4)) + { + return nullptr; + } + + mipmapped_texture tex; + + buffer_stream buf_stream(pImage, nSize); + buf_stream.set_name(pFilename); + data_stream_serializer serializer(buf_stream); + + if (!tex.read_from_stream(serializer)) + { + return nullptr; + } + + if (tex.is_packed()) + { + if (!tex.unpack_from_dxt(true)) + { + return nullptr; + } + } + + image_u8 img; + image_u8* pImg = tex.get_level_image(0, 0, img); + if (!pImg) + { + return nullptr; + } + + *pWidth = tex.get_width(); + *pHeight = tex.get_height(); + + if (pImg->has_alpha()) + { + *pActualComps = 4; + } + else if (pImg->is_grayscale()) + { + *pActualComps = 1; + } + else + { + *pActualComps = 3; + } + + uint8* pDst = nullptr; + if (req_comps == 4) + { + pDst = (uint8*)malloc(tex.get_total_pixels() * sizeof(uint32)); + uint8* pSrc = (uint8*)pImg->get_ptr(); + memcpy(pDst, pSrc, tex.get_total_pixels() * sizeof(uint32)); + } + else + { + image_u8 luma_img; + if (req_comps == 1) + { + luma_img = *pImg; + luma_img.convert_to_grayscale(); + pImg = &luma_img; + } + + pixel_packer packer(req_comps, 8); + uint32 n; + pDst = image_utils::pack_image(*pImg, packer, n); + } + + return pDst; } - } - - img(x, y) = dst; - } - } -} - -image_utils::conversion_type get_conversion_type(bool cooking, pixel_format fmt) { - image_utils::conversion_type conv_type = image_utils::cConversion_Invalid; - - if (cooking) { - switch (fmt) { - case PIXEL_FMT_DXT5_CCxY: { - conv_type = image_utils::cConversion_To_CCxY; - break; - } - case PIXEL_FMT_DXT5_xGxR: { - conv_type = image_utils::cConversion_To_xGxR; - break; - } - case PIXEL_FMT_DXT5_xGBR: { - conv_type = image_utils::cConversion_To_xGBR; - break; - } - case PIXEL_FMT_DXT5_AGBR: { - conv_type = image_utils::cConversion_To_AGBR; - break; - } - default: - break; - } - } else { - switch (fmt) { - case PIXEL_FMT_3DC: - case PIXEL_FMT_DXN: { - conv_type = image_utils::cConversion_XY_to_XYZ; - break; - } - case PIXEL_FMT_DXT5_CCxY: { - conv_type = image_utils::cConversion_From_CCxY; - break; - } - case PIXEL_FMT_DXT5_xGxR: { - conv_type = image_utils::cConversion_From_xGxR; - break; - } - case PIXEL_FMT_DXT5_xGBR: { - conv_type = image_utils::cConversion_From_xGBR; - break; - } - case PIXEL_FMT_DXT5_AGBR: { - conv_type = image_utils::cConversion_From_AGBR; - break; - } - default: - break; - } - } - - return conv_type; -} - -image_utils::conversion_type get_image_conversion_type_from_crn_format(crn_format fmt) { - switch (fmt) { - case cCRNFmtDXT5_CCxY: - return image_utils::cConversion_To_CCxY; - case cCRNFmtDXT5_xGxR: - return image_utils::cConversion_To_xGxR; - case cCRNFmtDXT5_xGBR: - return image_utils::cConversion_To_xGBR; - case cCRNFmtDXT5_AGBR: - return image_utils::cConversion_To_AGBR; - default: - break; - } - return image_utils::cConversion_Invalid; -} - -double compute_std_dev(uint n, const color_quad_u8* pPixels, uint first_channel, uint num_channels) { - if (!n) - return 0.0f; - - double sum = 0.0f; - double sum2 = 0.0f; - - for (uint i = 0; i < n; i++) { - const color_quad_u8& cp = pPixels[i]; - - if (!num_channels) { - uint l = cp.get_luma(); - sum += l; - sum2 += l * l; - } else { - for (uint c = 0; c < num_channels; c++) { - uint l = cp[first_channel + c]; - sum += l; - sum2 += l * l; - } - } - } - - double w = math::maximum(1U, num_channels) * n; - sum /= w; - sum2 /= w; - - double var = sum2 - sum * sum; - var = math::maximum(var, 0.0f); - - return sqrt(var); -} - -uint8* read_image_from_memory(const uint8* pImage, int nSize, int* pWidth, int* pHeight, int* pActualComps, int req_comps, const char* pFilename) { - *pWidth = 0; - *pHeight = 0; - *pActualComps = 0; - - if ((req_comps < 1) || (req_comps > 4)) - return nullptr; - - mipmapped_texture tex; - - buffer_stream buf_stream(pImage, nSize); - buf_stream.set_name(pFilename); - data_stream_serializer serializer(buf_stream); - - if (!tex.read_from_stream(serializer)) - return nullptr; - - if (tex.is_packed()) { - if (!tex.unpack_from_dxt(true)) - return nullptr; - } - - image_u8 img; - image_u8* pImg = tex.get_level_image(0, 0, img); - if (!pImg) - return nullptr; - - *pWidth = tex.get_width(); - *pHeight = tex.get_height(); - - if (pImg->has_alpha()) - *pActualComps = 4; - else if (pImg->is_grayscale()) - *pActualComps = 1; - else - *pActualComps = 3; - - uint8* pDst = nullptr; - if (req_comps == 4) { - pDst = (uint8*)malloc(tex.get_total_pixels() * sizeof(uint32)); - uint8* pSrc = (uint8*)pImg->get_ptr(); - memcpy(pDst, pSrc, tex.get_total_pixels() * sizeof(uint32)); - } else { - image_u8 luma_img; - if (req_comps == 1) { - luma_img = *pImg; - luma_img.convert_to_grayscale(); - pImg = &luma_img; - } - - pixel_packer packer(req_comps, 8); - uint32 n; - pDst = image_utils::pack_image(*pImg, packer, n); - } - - return pDst; -} - -} // namespace image_utils - -} // namespace crnlib + } // namespace image_utils +} // namespace crnlib diff --git a/crnlib/crn_image_utils.h b/crnlib/crn_image_utils.h index 5463e58..44c36ad 100644 --- a/crnlib/crn_image_utils.h +++ b/crnlib/crn_image_utils.h @@ -1,12 +1,32 @@ -// File: crn_image_utils.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_image.h" #include "crn_data_stream_serializer.h" #include "crn_export.h" - namespace crnlib { enum pixel_format; @@ -29,7 +49,8 @@ namespace crnlib // *pActual_comps is set to 1, 3, or 4. req_comps must range from 1-4. CRN_EXPORT uint8* read_from_memory(const uint8* pImage, int nSize, int* pWidth, int* pHeight, int* pActualComps, int req_comps, const char* pFilename); - enum { + enum + { cWriteFlagIgnoreAlpha = 0x00000001, cWriteFlagGrayscale = 0x00000002, @@ -58,7 +79,7 @@ namespace crnlib struct resample_params { - resample_params(): + resample_params() : m_dst_width(0), m_dst_height(0), m_pFilter("lanczos4"), @@ -67,7 +88,7 @@ namespace crnlib m_wrapping(false), m_first_comp(0), m_num_comps(4), - m_source_gamma(2.2f), // 1.75f + m_source_gamma(2.2f), // 1.75f m_multithreaded(true) { } @@ -162,7 +183,7 @@ namespace crnlib CRN_EXPORT void convert_image(image_u8& img, conversion_type conv_type); - template + template inline uint8* pack_image(const image_type& img, const pixel_packer& packer, uint& n) { n = 0; @@ -200,5 +221,5 @@ namespace crnlib CRN_EXPORT double compute_std_dev(uint n, const color_quad_u8* pPixels, uint first_channel, uint num_channels); CRN_EXPORT uint8* read_image_from_memory(const uint8* pImage, int nSize, int* pWidth, int* pHeight, int* pActualComps, int req_comps, const char* pFilename); - } // namespace image_utils -} // namespace crnlib + } // namespace image_utils +} // namespace crnlib diff --git a/crnlib/crn_intersect.h b/crnlib/crn_intersect.h index 74c3617..6121482 100644 --- a/crnlib/crn_intersect.h +++ b/crnlib/crn_intersect.h @@ -1,5 +1,25 @@ -// File: crn_intersect.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -9,7 +29,8 @@ namespace crnlib { namespace intersection { - enum result { + enum result + { cBackfacing = -1, cFailure = 0, cSuccess, @@ -19,10 +40,11 @@ namespace crnlib // Returns cInside, cSuccess, or cFailure. // Algorithm: Graphics Gems 1 - template + template result ray_aabb(vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) { - enum { + enum + { cNumDim = vector_type::num_elements, cRight = 0, cLeft = 1, @@ -109,7 +131,7 @@ namespace crnlib return cSuccess; } - template + template result ray_aabb(bool& started_within, vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) { if (!box.contains(ray.get_origin())) diff --git a/crnlib/crn_ktx_texture.cpp b/crnlib/crn_ktx_texture.cpp index 7b07552..b0b2629 100644 --- a/crnlib/crn_ktx_texture.cpp +++ b/crnlib/crn_ktx_texture.cpp @@ -1,4 +1,26 @@ -// File: crn_ktx_texture.cpp +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_ktx_texture.h" #include "crn_console.h" @@ -6,349 +28,415 @@ // Set #if CRNLIB_KTX_PVRTEX_WORKAROUNDS to 1 to enable various workarounds for oddball KTX files written by PVRTexTool. #define CRNLIB_KTX_PVRTEX_WORKAROUNDS 1 -namespace crnlib { -const uint8 s_ktx_file_id[12] = {0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A}; - -bool is_packed_pixel_ogl_type(uint32 ogl_type) { - switch (ogl_type) { - case KTX_UNSIGNED_BYTE_3_3_2: - case KTX_UNSIGNED_BYTE_2_3_3_REV: - case KTX_UNSIGNED_SHORT_5_6_5: - case KTX_UNSIGNED_SHORT_5_6_5_REV: - case KTX_UNSIGNED_SHORT_4_4_4_4: - case KTX_UNSIGNED_SHORT_4_4_4_4_REV: - case KTX_UNSIGNED_SHORT_5_5_5_1: - case KTX_UNSIGNED_SHORT_1_5_5_5_REV: - case KTX_UNSIGNED_INT_8_8_8_8: - case KTX_UNSIGNED_INT_8_8_8_8_REV: - case KTX_UNSIGNED_INT_10_10_10_2: - case KTX_UNSIGNED_INT_2_10_10_10_REV: - case KTX_UNSIGNED_INT_24_8: - case KTX_UNSIGNED_INT_10F_11F_11F_REV: - case KTX_UNSIGNED_INT_5_9_9_9_REV: - return true; - } - return false; -} - -uint get_ogl_type_size(uint32 ogl_type) { - switch (ogl_type) { - case KTX_UNSIGNED_BYTE: - case KTX_BYTE: - return 1; - case KTX_HALF_FLOAT: - case KTX_UNSIGNED_SHORT: - case KTX_SHORT: - return 2; - case KTX_FLOAT: - case KTX_UNSIGNED_INT: - case KTX_INT: - return 4; - case KTX_UNSIGNED_BYTE_3_3_2: - case KTX_UNSIGNED_BYTE_2_3_3_REV: - return 1; - case KTX_UNSIGNED_SHORT_5_6_5: - case KTX_UNSIGNED_SHORT_5_6_5_REV: - case KTX_UNSIGNED_SHORT_4_4_4_4: - case KTX_UNSIGNED_SHORT_4_4_4_4_REV: - case KTX_UNSIGNED_SHORT_5_5_5_1: - case KTX_UNSIGNED_SHORT_1_5_5_5_REV: - return 2; - case KTX_UNSIGNED_INT_8_8_8_8: - case KTX_UNSIGNED_INT_8_8_8_8_REV: - case KTX_UNSIGNED_INT_10_10_10_2: - case KTX_UNSIGNED_INT_2_10_10_10_REV: - case KTX_UNSIGNED_INT_24_8: - case KTX_UNSIGNED_INT_10F_11F_11F_REV: - case KTX_UNSIGNED_INT_5_9_9_9_REV: - return 4; - } - return 0; -} - -uint32 get_ogl_base_internal_fmt(uint32 ogl_fmt) { - switch (ogl_fmt) { - case KTX_ETC1_RGB8_OES: - case KTX_COMPRESSED_RGB8_ETC2: - case KTX_RGB_S3TC: - case KTX_RGB4_S3TC: - case KTX_COMPRESSED_RGB_S3TC_DXT1_EXT: - case KTX_COMPRESSED_SRGB_S3TC_DXT1_EXT: - return KTX_RGB; - case KTX_COMPRESSED_RGBA8_ETC2_EAC: - case KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT: - case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: - case KTX_RGBA_S3TC: - case KTX_RGBA4_S3TC: - case KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT: - case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: - case KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT: - case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: - case KTX_RGBA_DXT5_S3TC: - case KTX_RGBA4_DXT5_S3TC: - return KTX_RGBA; - case 1: - case KTX_RED: - case KTX_RED_INTEGER: - case KTX_GREEN: - case KTX_GREEN_INTEGER: - case KTX_BLUE: - case KTX_BLUE_INTEGER: - case KTX_R8: - case KTX_R8UI: - case KTX_LUMINANCE8: - case KTX_ALPHA: - case KTX_LUMINANCE: - case KTX_COMPRESSED_RED_RGTC1_EXT: - case KTX_COMPRESSED_SIGNED_RED_RGTC1_EXT: - case KTX_COMPRESSED_LUMINANCE_LATC1_EXT: - case KTX_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT: - return KTX_RED; - case 2: - case KTX_RG: - case KTX_RG8: - case KTX_RG_INTEGER: - case KTX_LUMINANCE_ALPHA: - case KTX_COMPRESSED_RED_GREEN_RGTC2_EXT: - case KTX_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT: - case KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT: - case KTX_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT: - return KTX_RG; - case 3: - case KTX_SRGB: - case KTX_RGB: - case KTX_RGB_INTEGER: - case KTX_BGR: - case KTX_BGR_INTEGER: - case KTX_RGB8: - case KTX_SRGB8: - return KTX_RGB; - case 4: - case KTX_RGBA: - case KTX_BGRA: - case KTX_RGBA_INTEGER: - case KTX_BGRA_INTEGER: - case KTX_SRGB_ALPHA: - case KTX_SRGB8_ALPHA8: - case KTX_RGBA8: - return KTX_RGBA; - } - return 0; -} - -bool get_ogl_fmt_desc(uint32 ogl_fmt, uint32 ogl_type, uint& block_dim, uint& bytes_per_block) { - uint ogl_type_size = get_ogl_type_size(ogl_type); - - block_dim = 1; - bytes_per_block = 0; - - switch (ogl_fmt) { - case KTX_COMPRESSED_RED_RGTC1_EXT: - case KTX_COMPRESSED_SIGNED_RED_RGTC1_EXT: - case KTX_COMPRESSED_LUMINANCE_LATC1_EXT: - case KTX_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT: - case KTX_ETC1_RGB8_OES: - case KTX_COMPRESSED_RGB8_ETC2: - case KTX_RGB_S3TC: - case KTX_RGB4_S3TC: - case KTX_COMPRESSED_RGB_S3TC_DXT1_EXT: - case KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT: - case KTX_COMPRESSED_SRGB_S3TC_DXT1_EXT: - case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: { - block_dim = 4; - bytes_per_block = 8; - break; - } - case KTX_COMPRESSED_RGBA8_ETC2_EAC: - case KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT: - case KTX_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT: - case KTX_COMPRESSED_RED_GREEN_RGTC2_EXT: - case KTX_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT: - case KTX_RGBA_S3TC: - case KTX_RGBA4_S3TC: - case KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT: - case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: - case KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT: - case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: - case KTX_RGBA_DXT5_S3TC: - case KTX_RGBA4_DXT5_S3TC: { - block_dim = 4; - bytes_per_block = 16; - break; - } - case 1: - case KTX_ALPHA: - case KTX_RED: - case KTX_GREEN: - case KTX_BLUE: - case KTX_RED_INTEGER: - case KTX_GREEN_INTEGER: - case KTX_BLUE_INTEGER: - case KTX_LUMINANCE: { - bytes_per_block = ogl_type_size; - break; - } - case KTX_R8: - case KTX_R8UI: - case KTX_ALPHA8: - case KTX_LUMINANCE8: { - bytes_per_block = 1; - break; - } - case 2: - case KTX_RG: - case KTX_RG_INTEGER: - case KTX_LUMINANCE_ALPHA: { - bytes_per_block = 2 * ogl_type_size; - break; - } - case KTX_RG8: - case KTX_LUMINANCE8_ALPHA8: { - bytes_per_block = 2; - break; - } - case 3: - case KTX_SRGB: - case KTX_RGB: - case KTX_BGR: - case KTX_RGB_INTEGER: - case KTX_BGR_INTEGER: { - bytes_per_block = is_packed_pixel_ogl_type(ogl_type) ? ogl_type_size : (3 * ogl_type_size); - break; +namespace crnlib +{ + const uint8 s_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; + + bool is_packed_pixel_ogl_type(uint32 ogl_type) + { + switch (ogl_type) + { + case KTX_UNSIGNED_BYTE_3_3_2: + case KTX_UNSIGNED_BYTE_2_3_3_REV: + case KTX_UNSIGNED_SHORT_5_6_5: + case KTX_UNSIGNED_SHORT_5_6_5_REV: + case KTX_UNSIGNED_SHORT_4_4_4_4: + case KTX_UNSIGNED_SHORT_4_4_4_4_REV: + case KTX_UNSIGNED_SHORT_5_5_5_1: + case KTX_UNSIGNED_SHORT_1_5_5_5_REV: + case KTX_UNSIGNED_INT_8_8_8_8: + case KTX_UNSIGNED_INT_8_8_8_8_REV: + case KTX_UNSIGNED_INT_10_10_10_2: + case KTX_UNSIGNED_INT_2_10_10_10_REV: + case KTX_UNSIGNED_INT_24_8: + case KTX_UNSIGNED_INT_10F_11F_11F_REV: + case KTX_UNSIGNED_INT_5_9_9_9_REV: + return true; + } + return false; } - case KTX_RGB8: - case KTX_SRGB8: { - bytes_per_block = 3; - break; + + uint get_ogl_type_size(uint32 ogl_type) + { + switch (ogl_type) + { + case KTX_UNSIGNED_BYTE: + case KTX_BYTE: + return 1; + case KTX_HALF_FLOAT: + case KTX_UNSIGNED_SHORT: + case KTX_SHORT: + return 2; + case KTX_FLOAT: + case KTX_UNSIGNED_INT: + case KTX_INT: + return 4; + case KTX_UNSIGNED_BYTE_3_3_2: + case KTX_UNSIGNED_BYTE_2_3_3_REV: + return 1; + case KTX_UNSIGNED_SHORT_5_6_5: + case KTX_UNSIGNED_SHORT_5_6_5_REV: + case KTX_UNSIGNED_SHORT_4_4_4_4: + case KTX_UNSIGNED_SHORT_4_4_4_4_REV: + case KTX_UNSIGNED_SHORT_5_5_5_1: + case KTX_UNSIGNED_SHORT_1_5_5_5_REV: + return 2; + case KTX_UNSIGNED_INT_8_8_8_8: + case KTX_UNSIGNED_INT_8_8_8_8_REV: + case KTX_UNSIGNED_INT_10_10_10_2: + case KTX_UNSIGNED_INT_2_10_10_10_REV: + case KTX_UNSIGNED_INT_24_8: + case KTX_UNSIGNED_INT_10F_11F_11F_REV: + case KTX_UNSIGNED_INT_5_9_9_9_REV: + return 4; + } + return 0; } - case 4: - case KTX_RGBA: - case KTX_BGRA: - case KTX_RGBA_INTEGER: - case KTX_BGRA_INTEGER: - case KTX_SRGB_ALPHA: { - bytes_per_block = is_packed_pixel_ogl_type(ogl_type) ? ogl_type_size : (4 * ogl_type_size); - break; + + uint32 get_ogl_base_internal_fmt(uint32 ogl_fmt) + { + switch (ogl_fmt) + { + case KTX_ETC1_RGB8_OES: + case KTX_COMPRESSED_RGB8_ETC2: + case KTX_RGB_S3TC: + case KTX_RGB4_S3TC: + case KTX_COMPRESSED_RGB_S3TC_DXT1_EXT: + case KTX_COMPRESSED_SRGB_S3TC_DXT1_EXT: + return KTX_RGB; + case KTX_COMPRESSED_RGBA8_ETC2_EAC: + case KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: + case KTX_RGBA_S3TC: + case KTX_RGBA4_S3TC: + case KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT: + case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: + case KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT: + case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: + case KTX_RGBA_DXT5_S3TC: + case KTX_RGBA4_DXT5_S3TC: + return KTX_RGBA; + case 1: + case KTX_RED: + case KTX_RED_INTEGER: + case KTX_GREEN: + case KTX_GREEN_INTEGER: + case KTX_BLUE: + case KTX_BLUE_INTEGER: + case KTX_R8: + case KTX_R8UI: + case KTX_LUMINANCE8: + case KTX_ALPHA: + case KTX_LUMINANCE: + case KTX_COMPRESSED_RED_RGTC1_EXT: + case KTX_COMPRESSED_SIGNED_RED_RGTC1_EXT: + case KTX_COMPRESSED_LUMINANCE_LATC1_EXT: + case KTX_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT: + return KTX_RED; + case 2: + case KTX_RG: + case KTX_RG8: + case KTX_RG_INTEGER: + case KTX_LUMINANCE_ALPHA: + case KTX_COMPRESSED_RED_GREEN_RGTC2_EXT: + case KTX_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT: + case KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT: + case KTX_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT: + return KTX_RG; + case 3: + case KTX_SRGB: + case KTX_RGB: + case KTX_RGB_INTEGER: + case KTX_BGR: + case KTX_BGR_INTEGER: + case KTX_RGB8: + case KTX_SRGB8: + return KTX_RGB; + case 4: + case KTX_RGBA: + case KTX_BGRA: + case KTX_RGBA_INTEGER: + case KTX_BGRA_INTEGER: + case KTX_SRGB_ALPHA: + case KTX_SRGB8_ALPHA8: + case KTX_RGBA8: + return KTX_RGBA; + } + return 0; } - case KTX_SRGB8_ALPHA8: - case KTX_RGBA8: { - bytes_per_block = 4; - break; + + bool get_ogl_fmt_desc(uint32 ogl_fmt, uint32 ogl_type, uint& block_dim, uint& bytes_per_block) + { + uint ogl_type_size = get_ogl_type_size(ogl_type); + + block_dim = 1; + bytes_per_block = 0; + + switch (ogl_fmt) + { + case KTX_COMPRESSED_RED_RGTC1_EXT: + case KTX_COMPRESSED_SIGNED_RED_RGTC1_EXT: + case KTX_COMPRESSED_LUMINANCE_LATC1_EXT: + case KTX_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT: + case KTX_ETC1_RGB8_OES: + case KTX_COMPRESSED_RGB8_ETC2: + case KTX_RGB_S3TC: + case KTX_RGB4_S3TC: + case KTX_COMPRESSED_RGB_S3TC_DXT1_EXT: + case KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case KTX_COMPRESSED_SRGB_S3TC_DXT1_EXT: + case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: + { + block_dim = 4; + bytes_per_block = 8; + break; + } + case KTX_COMPRESSED_RGBA8_ETC2_EAC: + case KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT: + case KTX_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT: + case KTX_COMPRESSED_RED_GREEN_RGTC2_EXT: + case KTX_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT: + case KTX_RGBA_S3TC: + case KTX_RGBA4_S3TC: + case KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT: + case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: + case KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT: + case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: + case KTX_RGBA_DXT5_S3TC: + case KTX_RGBA4_DXT5_S3TC: + { + block_dim = 4; + bytes_per_block = 16; + break; + } + case 1: + case KTX_ALPHA: + case KTX_RED: + case KTX_GREEN: + case KTX_BLUE: + case KTX_RED_INTEGER: + case KTX_GREEN_INTEGER: + case KTX_BLUE_INTEGER: + case KTX_LUMINANCE: + { + bytes_per_block = ogl_type_size; + break; + } + case KTX_R8: + case KTX_R8UI: + case KTX_ALPHA8: + case KTX_LUMINANCE8: + { + bytes_per_block = 1; + break; + } + case 2: + case KTX_RG: + case KTX_RG_INTEGER: + case KTX_LUMINANCE_ALPHA: + { + bytes_per_block = 2 * ogl_type_size; + break; + } + case KTX_RG8: + case KTX_LUMINANCE8_ALPHA8: + { + bytes_per_block = 2; + break; + } + case 3: + case KTX_SRGB: + case KTX_RGB: + case KTX_BGR: + case KTX_RGB_INTEGER: + case KTX_BGR_INTEGER: + { + bytes_per_block = is_packed_pixel_ogl_type(ogl_type) ? ogl_type_size : (3 * ogl_type_size); + break; + } + case KTX_RGB8: + case KTX_SRGB8: + { + bytes_per_block = 3; + break; + } + case 4: + case KTX_RGBA: + case KTX_BGRA: + case KTX_RGBA_INTEGER: + case KTX_BGRA_INTEGER: + case KTX_SRGB_ALPHA: + { + bytes_per_block = is_packed_pixel_ogl_type(ogl_type) ? ogl_type_size : (4 * ogl_type_size); + break; + } + case KTX_SRGB8_ALPHA8: + case KTX_RGBA8: + { + bytes_per_block = 4; + break; + } + default: + return false; + } + return true; } - default: - return false; - } - return true; -} - -bool ktx_texture::compute_pixel_info() { - if ((!m_header.m_glType) || (!m_header.m_glFormat)) { - if ((m_header.m_glType) || (m_header.m_glFormat)) - return false; - - // Must be a compressed format. - if (!get_ogl_fmt_desc(m_header.m_glInternalFormat, m_header.m_glType, m_block_dim, m_bytes_per_block)) { + + bool ktx_texture::compute_pixel_info() + { + if ((!m_header.m_glType) || (!m_header.m_glFormat)) + { + if ((m_header.m_glType) || (m_header.m_glFormat)) + { + return false; + } + + // Must be a compressed format. + if (!get_ogl_fmt_desc(m_header.m_glInternalFormat, m_header.m_glType, m_block_dim, m_bytes_per_block)) + { #if CRNLIB_KTX_PVRTEX_WORKAROUNDS - if ((!m_header.m_glInternalFormat) && (!m_header.m_glType) && (!m_header.m_glTypeSize) && (!m_header.m_glBaseInternalFormat)) { - // PVRTexTool writes bogus headers when outputting ETC1. - console::warning("ktx_texture::compute_pixel_info: Header doesn't specify any format, assuming ETC1 and hoping for the best"); - m_header.m_glBaseInternalFormat = KTX_RGB; - m_header.m_glInternalFormat = KTX_ETC1_RGB8_OES; - m_header.m_glTypeSize = 1; - m_block_dim = 4; - m_bytes_per_block = 8; - return true; - } + if ((!m_header.m_glInternalFormat) && (!m_header.m_glType) && (!m_header.m_glTypeSize) && (!m_header.m_glBaseInternalFormat)) + { + // PVRTexTool writes bogus headers when outputting ETC1. + console::warning("ktx_texture::compute_pixel_info: Header doesn't specify any format, assuming ETC1 and hoping for the best"); + m_header.m_glBaseInternalFormat = KTX_RGB; + m_header.m_glInternalFormat = KTX_ETC1_RGB8_OES; + m_header.m_glTypeSize = 1; + m_block_dim = 4; + m_bytes_per_block = 8; + return true; + } #endif - return false; - } + return false; + } - if (m_block_dim == 1) - return false; - } else { - // Must be an uncompressed format. - if (!get_ogl_fmt_desc(m_header.m_glFormat, m_header.m_glType, m_block_dim, m_bytes_per_block)) - return false; + if (m_block_dim == 1) + { + return false; + } + } + else + { + // Must be an uncompressed format. + if (!get_ogl_fmt_desc(m_header.m_glFormat, m_header.m_glType, m_block_dim, m_bytes_per_block)) + { + return false; + } - if (m_block_dim > 1) - return false; - } - return true; -} + if (m_block_dim > 1) + { + return false; + } + } + return true; + } -bool ktx_texture::read_from_stream(data_stream_serializer& serializer) { - clear(); + bool ktx_texture::read_from_stream(data_stream_serializer& serializer) + { + clear(); - // Read header - if (serializer.read(&m_header, 1, sizeof(m_header)) != sizeof(ktx_header)) - return false; + // Read header + if (serializer.read(&m_header, 1, sizeof(m_header)) != sizeof(ktx_header)) + { + return false; + } - // Check header - if (memcmp(s_ktx_file_id, m_header.m_identifier, sizeof(m_header.m_identifier))) - return false; + // Check header + if (memcmp(s_ktx_file_id, m_header.m_identifier, sizeof(m_header.m_identifier))) + { + return false; + } - if ((m_header.m_endianness != KTX_OPPOSITE_ENDIAN) && (m_header.m_endianness != KTX_ENDIAN)) - return false; + if ((m_header.m_endianness != KTX_OPPOSITE_ENDIAN) && (m_header.m_endianness != KTX_ENDIAN)) + { + return false; + } - m_opposite_endianness = (m_header.m_endianness == KTX_OPPOSITE_ENDIAN); - if (m_opposite_endianness) { - m_header.endian_swap(); + m_opposite_endianness = (m_header.m_endianness == KTX_OPPOSITE_ENDIAN); + if (m_opposite_endianness) + { + m_header.endian_swap(); - if ((m_header.m_glTypeSize != sizeof(uint8)) && (m_header.m_glTypeSize != sizeof(uint16)) && (m_header.m_glTypeSize != sizeof(uint32))) - return false; - } + if ((m_header.m_glTypeSize != sizeof(uint8)) && (m_header.m_glTypeSize != sizeof(uint16)) && (m_header.m_glTypeSize != sizeof(uint32))) + { + return false; + } + } - if (!check_header()) - return false; + if (!check_header()) + { + return false; + } - if (!compute_pixel_info()) - return false; + if (!compute_pixel_info()) + { + return false; + } - uint8 pad_bytes[3]; + uint8 pad_bytes[3]; - // Read the key value entries - uint num_key_value_bytes_remaining = m_header.m_bytesOfKeyValueData; - while (num_key_value_bytes_remaining) { - if (num_key_value_bytes_remaining < sizeof(uint32)) - return false; + // Read the key value entries + uint num_key_value_bytes_remaining = m_header.m_bytesOfKeyValueData; + while (num_key_value_bytes_remaining) + { + if (num_key_value_bytes_remaining < sizeof(uint32)) + { + return false; + } - uint32 key_value_byte_size; - if (serializer.read(&key_value_byte_size, 1, sizeof(uint32)) != sizeof(uint32)) - return false; + uint32 key_value_byte_size; + if (serializer.read(&key_value_byte_size, 1, sizeof(uint32)) != sizeof(uint32)) + { + return false; + } - num_key_value_bytes_remaining -= sizeof(uint32); + num_key_value_bytes_remaining -= sizeof(uint32); - if (m_opposite_endianness) - key_value_byte_size = utils::swap32(key_value_byte_size); + if (m_opposite_endianness) + { + key_value_byte_size = utils::swap32(key_value_byte_size); + } - if (key_value_byte_size > num_key_value_bytes_remaining) - return false; + if (key_value_byte_size > num_key_value_bytes_remaining) + { + return false; + } - uint8_vec key_value_data; - if (key_value_byte_size) { - key_value_data.resize(key_value_byte_size); - if (serializer.read(&key_value_data[0], 1, key_value_byte_size) != key_value_byte_size) - return false; - } + uint8_vec key_value_data; + if (key_value_byte_size) + { + key_value_data.resize(key_value_byte_size); + if (serializer.read(&key_value_data[0], 1, key_value_byte_size) != key_value_byte_size) + { + return false; + } + } - m_key_values.push_back(key_value_data); + m_key_values.push_back(key_value_data); - uint padding = 3 - ((key_value_byte_size + 3) % 4); - if (padding) { - if (serializer.read(pad_bytes, 1, padding) != padding) - return false; - } + uint padding = 3 - ((key_value_byte_size + 3) % 4); + if (padding) + { + if (serializer.read(pad_bytes, 1, padding) != padding) + { + return false; + } + } - num_key_value_bytes_remaining -= key_value_byte_size; - if (num_key_value_bytes_remaining < padding) - return false; - num_key_value_bytes_remaining -= padding; - } + num_key_value_bytes_remaining -= key_value_byte_size; + if (num_key_value_bytes_remaining < padding) + { + return false; + } + num_key_value_bytes_remaining -= padding; + } - // Now read the mip levels - uint total_faces = get_num_mips() * get_array_size() * get_num_faces() * get_depth(); - if ((!total_faces) || (total_faces > 65535)) - return false; + // Now read the mip levels + uint total_faces = get_num_mips() * get_array_size() * get_num_faces() * get_depth(); + if ((!total_faces) || (total_faces > 65535)) + { + return false; + } // See Section 2.8 of KTX file format: No rounding to block sizes should be applied for block compressed textures. // OK, I'm going to break that rule otherwise KTX can only store a subset of textures that DDS can handle for no good reason. @@ -356,479 +444,654 @@ bool ktx_texture::read_from_stream(data_stream_serializer& serializer) { const uint mip0_row_blocks = m_header.m_pixelWidth / m_block_dim; const uint mip0_col_blocks = CRNLIB_MAX(1, m_header.m_pixelHeight) / m_block_dim; #else - const uint mip0_row_blocks = (m_header.m_pixelWidth + m_block_dim - 1) / m_block_dim; - const uint mip0_col_blocks = (CRNLIB_MAX(1, m_header.m_pixelHeight) + m_block_dim - 1) / m_block_dim; + const uint mip0_row_blocks = (m_header.m_pixelWidth + m_block_dim - 1) / m_block_dim; + const uint mip0_col_blocks = (CRNLIB_MAX(1, m_header.m_pixelHeight) + m_block_dim - 1) / m_block_dim; #endif - if ((!mip0_row_blocks) || (!mip0_col_blocks)) - return false; + if ((!mip0_row_blocks) || (!mip0_col_blocks)) + { + return false; + } - bool has_valid_image_size_fields = true; - bool disable_mip_and_cubemap_padding = false; + bool has_valid_image_size_fields = true; + bool disable_mip_and_cubemap_padding = false; #if CRNLIB_KTX_PVRTEX_WORKAROUNDS - { - // PVRTexTool has a bogus KTX writer that doesn't write any imageSize fields. Nice. - size_t expected_bytes_remaining = 0; - for (uint mip_level = 0; mip_level < get_num_mips(); mip_level++) { - uint mip_width, mip_height, mip_depth; - get_mip_dim(mip_level, mip_width, mip_height, mip_depth); - - const uint mip_row_blocks = (mip_width + m_block_dim - 1) / m_block_dim; - const uint mip_col_blocks = (mip_height + m_block_dim - 1) / m_block_dim; - if ((!mip_row_blocks) || (!mip_col_blocks)) - return false; - - expected_bytes_remaining += sizeof(uint32); - - if ((!m_header.m_numberOfArrayElements) && (get_num_faces() == 6)) { - for (uint face = 0; face < get_num_faces(); face++) { - uint slice_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; - expected_bytes_remaining += slice_size; + { + // PVRTexTool has a bogus KTX writer that doesn't write any imageSize fields. Nice. + size_t expected_bytes_remaining = 0; + for (uint mip_level = 0; mip_level < get_num_mips(); mip_level++) + { + uint mip_width, mip_height, mip_depth; + get_mip_dim(mip_level, mip_width, mip_height, mip_depth); + + const uint mip_row_blocks = (mip_width + m_block_dim - 1) / m_block_dim; + const uint mip_col_blocks = (mip_height + m_block_dim - 1) / m_block_dim; + if ((!mip_row_blocks) || (!mip_col_blocks)) + { + return false; + } + + expected_bytes_remaining += sizeof(uint32); + + if ((!m_header.m_numberOfArrayElements) && (get_num_faces() == 6)) + { + for (uint face = 0; face < get_num_faces(); face++) + { + uint slice_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; + expected_bytes_remaining += slice_size; + + uint num_cube_pad_bytes = 3 - ((slice_size + 3) % 4); + expected_bytes_remaining += num_cube_pad_bytes; + } + } + else + { + uint total_mip_size = 0; + for (uint array_element = 0; array_element < get_array_size(); array_element++) + { + for (uint face = 0; face < get_num_faces(); face++) + { + for (uint zslice = 0; zslice < mip_depth; zslice++) + { + uint slice_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; + total_mip_size += slice_size; + } + } + } + expected_bytes_remaining += total_mip_size; + + uint num_mip_pad_bytes = 3 - ((total_mip_size + 3) % 4); + expected_bytes_remaining += num_mip_pad_bytes; + } + } - uint num_cube_pad_bytes = 3 - ((slice_size + 3) % 4); - expected_bytes_remaining += num_cube_pad_bytes; - } - } else { - uint total_mip_size = 0; - for (uint array_element = 0; array_element < get_array_size(); array_element++) { - for (uint face = 0; face < get_num_faces(); face++) { - for (uint zslice = 0; zslice < mip_depth; zslice++) { - uint slice_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; - total_mip_size += slice_size; + if (serializer.get_stream()->get_remaining() < expected_bytes_remaining) + { + has_valid_image_size_fields = false; + disable_mip_and_cubemap_padding = true; + console::warning("ktx_texture::read_from_stream: KTX file size is smaller than expected - trying to read anyway without imageSize fields"); } - } } - expected_bytes_remaining += total_mip_size; +#endif - uint num_mip_pad_bytes = 3 - ((total_mip_size + 3) % 4); - expected_bytes_remaining += num_mip_pad_bytes; - } - } + for (uint mip_level = 0; mip_level < get_num_mips(); mip_level++) + { + uint mip_width, mip_height, mip_depth; + get_mip_dim(mip_level, mip_width, mip_height, mip_depth); - if (serializer.get_stream()->get_remaining() < expected_bytes_remaining) { - has_valid_image_size_fields = false; - disable_mip_and_cubemap_padding = true; - console::warning("ktx_texture::read_from_stream: KTX file size is smaller than expected - trying to read anyway without imageSize fields"); - } - } -#endif + const uint mip_row_blocks = (mip_width + m_block_dim - 1) / m_block_dim; + const uint mip_col_blocks = (mip_height + m_block_dim - 1) / m_block_dim; + if ((!mip_row_blocks) || (!mip_col_blocks)) + { + return false; + } - for (uint mip_level = 0; mip_level < get_num_mips(); mip_level++) { - uint mip_width, mip_height, mip_depth; - get_mip_dim(mip_level, mip_width, mip_height, mip_depth); + uint32 image_size = 0; + if (!has_valid_image_size_fields) + { + image_size = mip_depth * mip_row_blocks * mip_col_blocks * m_bytes_per_block * get_array_size() * get_num_faces(); + } + else + { + if (serializer.read(&image_size, 1, sizeof(image_size)) != sizeof(image_size)) + { + return false; + } + + if (m_opposite_endianness) + { + image_size = utils::swap32(image_size); + } + } - const uint mip_row_blocks = (mip_width + m_block_dim - 1) / m_block_dim; - const uint mip_col_blocks = (mip_height + m_block_dim - 1) / m_block_dim; - if ((!mip_row_blocks) || (!mip_col_blocks)) - return false; + if (!image_size) + { + return false; + } - uint32 image_size = 0; - if (!has_valid_image_size_fields) - image_size = mip_depth * mip_row_blocks * mip_col_blocks * m_bytes_per_block * get_array_size() * get_num_faces(); - else { - if (serializer.read(&image_size, 1, sizeof(image_size)) != sizeof(image_size)) - return false; + uint total_mip_size = 0; + + if ((!m_header.m_numberOfArrayElements) && (get_num_faces() == 6)) + { + // plain non-array cubemap + for (uint face = 0; face < get_num_faces(); face++) + { + CRNLIB_ASSERT(m_image_data.size() == get_image_index(mip_level, 0, face, 0)); + + m_image_data.push_back(uint8_vec()); + uint8_vec& image_data = m_image_data.back(); + + image_data.resize(image_size); + if (serializer.read(&image_data[0], 1, image_size) != image_size) + { + return false; + } + + if (m_opposite_endianness) + { + utils::endian_swap_mem(&image_data[0], image_size, m_header.m_glTypeSize); + } + + uint num_cube_pad_bytes = disable_mip_and_cubemap_padding ? 0 : (3 - ((image_size + 3) % 4)); + if (serializer.read(pad_bytes, 1, num_cube_pad_bytes) != num_cube_pad_bytes) + { + return false; + } + + total_mip_size += image_size + num_cube_pad_bytes; + } + } + else + { + // 1D, 2D, 3D (normal or array texture), or array cubemap + uint num_image_bytes_remaining = image_size; + + for (uint array_element = 0; array_element < get_array_size(); array_element++) + { + for (uint face = 0; face < get_num_faces(); face++) + { + for (uint zslice = 0; zslice < mip_depth; zslice++) + { + CRNLIB_ASSERT(m_image_data.size() == get_image_index(mip_level, array_element, face, zslice)); + + uint slice_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; + if ((!slice_size) || (slice_size > num_image_bytes_remaining)) + { + return false; + } + + m_image_data.push_back(uint8_vec()); + uint8_vec& image_data = m_image_data.back(); + + image_data.resize(slice_size); + if (serializer.read(&image_data[0], 1, slice_size) != slice_size) + { + return false; + } + + if (m_opposite_endianness) + { + utils::endian_swap_mem(&image_data[0], slice_size, m_header.m_glTypeSize); + } + + num_image_bytes_remaining -= slice_size; + + total_mip_size += slice_size; + } + } + } + + if (num_image_bytes_remaining) + { + return false; + } + } - if (m_opposite_endianness) - image_size = utils::swap32(image_size); + uint num_mip_pad_bytes = disable_mip_and_cubemap_padding ? 0 : (3 - ((total_mip_size + 3) % 4)); + if (serializer.read(pad_bytes, 1, num_mip_pad_bytes) != num_mip_pad_bytes) + { + return false; + } + } + return true; } - if (!image_size) - return false; + bool ktx_texture::write_to_stream(data_stream_serializer& serializer, bool no_keyvalue_data) + { + if (!consistency_check()) + { + CRNLIB_ASSERT(0); + return false; + } - uint total_mip_size = 0; + memcpy(m_header.m_identifier, s_ktx_file_id, sizeof(m_header.m_identifier)); + m_header.m_endianness = m_opposite_endianness ? KTX_OPPOSITE_ENDIAN : KTX_ENDIAN; - if ((!m_header.m_numberOfArrayElements) && (get_num_faces() == 6)) { - // plain non-array cubemap - for (uint face = 0; face < get_num_faces(); face++) { - CRNLIB_ASSERT(m_image_data.size() == get_image_index(mip_level, 0, face, 0)); + if (m_block_dim == 1) + { + m_header.m_glTypeSize = get_ogl_type_size(m_header.m_glType); + m_header.m_glBaseInternalFormat = m_header.m_glFormat; + } + else + { + m_header.m_glBaseInternalFormat = get_ogl_base_internal_fmt(m_header.m_glInternalFormat); + } - m_image_data.push_back(uint8_vec()); - uint8_vec& image_data = m_image_data.back(); + m_header.m_bytesOfKeyValueData = 0; + if (!no_keyvalue_data) + { + for (uint i = 0; i < m_key_values.size(); i++) + { + m_header.m_bytesOfKeyValueData += sizeof(uint32) + ((m_key_values[i].size() + 3) & ~3); + } + } + + if (m_opposite_endianness) + { + m_header.endian_swap(); + } - image_data.resize(image_size); - if (serializer.read(&image_data[0], 1, image_size) != image_size) - return false; + bool success = (serializer.write(&m_header, sizeof(m_header), 1) == 1); if (m_opposite_endianness) - utils::endian_swap_mem(&image_data[0], image_size, m_header.m_glTypeSize); + { + m_header.endian_swap(); + } - uint num_cube_pad_bytes = disable_mip_and_cubemap_padding ? 0 : (3 - ((image_size + 3) % 4)); - if (serializer.read(pad_bytes, 1, num_cube_pad_bytes) != num_cube_pad_bytes) - return false; + if (!success) + { + return success; + } + + uint total_key_value_bytes = 0; + const uint8 padding[3] = { 0, 0, 0 }; + + if (!no_keyvalue_data) + { + for (uint i = 0; i < m_key_values.size(); i++) + { + uint32 key_value_size = m_key_values[i].size(); + + if (m_opposite_endianness) + { + key_value_size = utils::swap32(key_value_size); + } + + success = (serializer.write(&key_value_size, sizeof(key_value_size), 1) == 1); + total_key_value_bytes += sizeof(key_value_size); + + if (m_opposite_endianness) + { + key_value_size = utils::swap32(key_value_size); + } + + if (!success) + { + return false; + } + + if (key_value_size) + { + if (serializer.write(&m_key_values[i][0], key_value_size, 1) != 1) + { + return false; + } + total_key_value_bytes += key_value_size; + + uint num_padding = 3 - ((key_value_size + 3) % 4); + if ((num_padding) && (serializer.write(padding, num_padding, 1) != 1)) + { + return false; + } + total_key_value_bytes += num_padding; + } + } + (void)total_key_value_bytes; + } - total_mip_size += image_size + num_cube_pad_bytes; - } - } else { - // 1D, 2D, 3D (normal or array texture), or array cubemap - uint num_image_bytes_remaining = image_size; + CRNLIB_ASSERT(total_key_value_bytes == m_header.m_bytesOfKeyValueData); - for (uint array_element = 0; array_element < get_array_size(); array_element++) { - for (uint face = 0; face < get_num_faces(); face++) { - for (uint zslice = 0; zslice < mip_depth; zslice++) { - CRNLIB_ASSERT(m_image_data.size() == get_image_index(mip_level, array_element, face, zslice)); + for (uint mip_level = 0; mip_level < get_num_mips(); mip_level++) + { + uint mip_width, mip_height, mip_depth; + get_mip_dim(mip_level, mip_width, mip_height, mip_depth); - uint slice_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; - if ((!slice_size) || (slice_size > num_image_bytes_remaining)) - return false; + const uint mip_row_blocks = (mip_width + m_block_dim - 1) / m_block_dim; + const uint mip_col_blocks = (mip_height + m_block_dim - 1) / m_block_dim; + if ((!mip_row_blocks) || (!mip_col_blocks)) + { + return false; + } - m_image_data.push_back(uint8_vec()); - uint8_vec& image_data = m_image_data.back(); + uint32 image_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; + if ((m_header.m_numberOfArrayElements) || (get_num_faces() == 1)) + { + image_size *= (get_array_size() * get_num_faces() * get_depth()); + } - image_data.resize(slice_size); - if (serializer.read(&image_data[0], 1, slice_size) != slice_size) - return false; + if (!image_size) + { + return false; + } if (m_opposite_endianness) - utils::endian_swap_mem(&image_data[0], slice_size, m_header.m_glTypeSize); + { + image_size = utils::swap32(image_size); + } - num_image_bytes_remaining -= slice_size; + success = (serializer.write(&image_size, sizeof(image_size), 1) == 1); - total_mip_size += slice_size; - } + if (m_opposite_endianness) + { + image_size = utils::swap32(image_size); + } + + if (!success) + { + return false; + } + + uint total_mip_size = 0; + + if ((!m_header.m_numberOfArrayElements) && (get_num_faces() == 6)) + { + // plain non-array cubemap + for (uint face = 0; face < get_num_faces(); face++) + { + const uint8_vec& image_data = get_image_data(get_image_index(mip_level, 0, face, 0)); + if ((!image_data.size()) || (image_data.size() != image_size)) + { + return false; + } + + if (m_opposite_endianness) + { + uint8_vec tmp_image_data(image_data); + utils::endian_swap_mem(&tmp_image_data[0], tmp_image_data.size(), m_header.m_glTypeSize); + if (serializer.write(&tmp_image_data[0], tmp_image_data.size(), 1) != 1) + { + return false; + } + } + else if (serializer.write(&image_data[0], image_data.size(), 1) != 1) + { + return false; + } + + uint num_cube_pad_bytes = 3 - ((image_data.size() + 3) % 4); + if ((num_cube_pad_bytes) && (serializer.write(padding, num_cube_pad_bytes, 1) != 1)) + { + return false; + } + + total_mip_size += image_size + num_cube_pad_bytes; + } + } + else + { + // 1D, 2D, 3D (normal or array texture), or array cubemap + for (uint array_element = 0; array_element < get_array_size(); array_element++) + { + for (uint face = 0; face < get_num_faces(); face++) + { + for (uint zslice = 0; zslice < mip_depth; zslice++) + { + const uint8_vec& image_data = get_image_data(get_image_index(mip_level, array_element, face, zslice)); + if (!image_data.size()) + { + return false; + } + + if (m_opposite_endianness) + { + uint8_vec tmp_image_data(image_data); + utils::endian_swap_mem(&tmp_image_data[0], tmp_image_data.size(), m_header.m_glTypeSize); + if (serializer.write(&tmp_image_data[0], tmp_image_data.size(), 1) != 1) + { + return false; + } + } + else if (serializer.write(&image_data[0], image_data.size(), 1) != 1) + { + return false; + } + + total_mip_size += image_data.size(); + } + } + } + + uint num_mip_pad_bytes = 3 - ((total_mip_size + 3) % 4); + if ((num_mip_pad_bytes) && (serializer.write(padding, num_mip_pad_bytes, 1) != 1)) + { + return false; + } + total_mip_size += num_mip_pad_bytes; + } + CRNLIB_ASSERT((total_mip_size & 3) == 0); } - } - if (num_image_bytes_remaining) - return false; + return true; } - uint num_mip_pad_bytes = disable_mip_and_cubemap_padding ? 0 : (3 - ((total_mip_size + 3) % 4)); - if (serializer.read(pad_bytes, 1, num_mip_pad_bytes) != num_mip_pad_bytes) - return false; - } - return true; -} + bool ktx_texture::init_2D(uint width, uint height, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type) + { + clear(); -bool ktx_texture::write_to_stream(data_stream_serializer& serializer, bool no_keyvalue_data) { - if (!consistency_check()) { - CRNLIB_ASSERT(0); - return false; - } + m_header.m_pixelWidth = width; + m_header.m_pixelHeight = height; + m_header.m_numberOfMipmapLevels = num_mips; + m_header.m_glInternalFormat = ogl_internal_fmt; + m_header.m_glFormat = ogl_fmt; + m_header.m_glType = ogl_type; + m_header.m_numberOfFaces = 1; - memcpy(m_header.m_identifier, s_ktx_file_id, sizeof(m_header.m_identifier)); - m_header.m_endianness = m_opposite_endianness ? KTX_OPPOSITE_ENDIAN : KTX_ENDIAN; + if (!compute_pixel_info()) + { + return false; + } - if (m_block_dim == 1) { - m_header.m_glTypeSize = get_ogl_type_size(m_header.m_glType); - m_header.m_glBaseInternalFormat = m_header.m_glFormat; - } else { - m_header.m_glBaseInternalFormat = get_ogl_base_internal_fmt(m_header.m_glInternalFormat); - } + return true; + } - m_header.m_bytesOfKeyValueData = 0; - if (!no_keyvalue_data) { - for (uint i = 0; i < m_key_values.size(); i++) - m_header.m_bytesOfKeyValueData += sizeof(uint32) + ((m_key_values[i].size() + 3) & ~3); - } + bool ktx_texture::init_2D_array(uint width, uint height, uint num_mips, uint array_size, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type) + { + clear(); + + m_header.m_pixelWidth = width; + m_header.m_pixelHeight = height; + m_header.m_numberOfMipmapLevels = num_mips; + m_header.m_numberOfArrayElements = array_size; + m_header.m_glInternalFormat = ogl_internal_fmt; + m_header.m_glFormat = ogl_fmt; + m_header.m_glType = ogl_type; + m_header.m_numberOfFaces = 1; + + if (!compute_pixel_info()) + { + return false; + } - if (m_opposite_endianness) - m_header.endian_swap(); + return true; + } - bool success = (serializer.write(&m_header, sizeof(m_header), 1) == 1); + bool ktx_texture::init_3D(uint width, uint height, uint depth, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type) + { + clear(); + + m_header.m_pixelWidth = width; + m_header.m_pixelHeight = height; + m_header.m_pixelDepth = depth; + m_header.m_numberOfMipmapLevels = num_mips; + m_header.m_glInternalFormat = ogl_internal_fmt; + m_header.m_glFormat = ogl_fmt; + m_header.m_glType = ogl_type; + m_header.m_numberOfFaces = 1; + + if (!compute_pixel_info()) + { + return false; + } - if (m_opposite_endianness) - m_header.endian_swap(); + return true; + } - if (!success) - return success; + bool ktx_texture::init_cubemap(uint dim, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type) + { + clear(); - uint total_key_value_bytes = 0; - const uint8 padding[3] = {0, 0, 0}; + m_header.m_pixelWidth = dim; + m_header.m_pixelHeight = dim; + m_header.m_numberOfMipmapLevels = num_mips; + m_header.m_glInternalFormat = ogl_internal_fmt; + m_header.m_glFormat = ogl_fmt; + m_header.m_glType = ogl_type; + m_header.m_numberOfFaces = 6; - if (!no_keyvalue_data) { - for (uint i = 0; i < m_key_values.size(); i++) { - uint32 key_value_size = m_key_values[i].size(); + if (!compute_pixel_info()) + { + return false; + } - if (m_opposite_endianness) - key_value_size = utils::swap32(key_value_size); + return true; + } - success = (serializer.write(&key_value_size, sizeof(key_value_size), 1) == 1); - total_key_value_bytes += sizeof(key_value_size); + bool ktx_texture::check_header() const + { + if (((get_num_faces() != 1) && (get_num_faces() != 6)) || (!m_header.m_pixelWidth)) + { + return false; + } - if (m_opposite_endianness) - key_value_size = utils::swap32(key_value_size); + if ((!m_header.m_pixelHeight) && (m_header.m_pixelDepth)) + { + return false; + } - if (!success) - return false; + if ((get_num_faces() == 6) && ((m_header.m_pixelDepth) || (!m_header.m_pixelHeight))) + { + return false; + } - if (key_value_size) { - if (serializer.write(&m_key_values[i][0], key_value_size, 1) != 1) - return false; - total_key_value_bytes += key_value_size; + if (m_header.m_numberOfMipmapLevels) + { + const uint max_mipmap_dimension = 1U << (m_header.m_numberOfMipmapLevels - 1U); + if (max_mipmap_dimension > (CRNLIB_MAX(CRNLIB_MAX(m_header.m_pixelWidth, m_header.m_pixelHeight), m_header.m_pixelDepth))) + { + return false; + } + } - uint num_padding = 3 - ((key_value_size + 3) % 4); - if ((num_padding) && (serializer.write(padding, num_padding, 1) != 1)) - return false; - total_key_value_bytes += num_padding; - } + return true; } - (void)total_key_value_bytes; - } - - CRNLIB_ASSERT(total_key_value_bytes == m_header.m_bytesOfKeyValueData); - - for (uint mip_level = 0; mip_level < get_num_mips(); mip_level++) { - uint mip_width, mip_height, mip_depth; - get_mip_dim(mip_level, mip_width, mip_height, mip_depth); - const uint mip_row_blocks = (mip_width + m_block_dim - 1) / m_block_dim; - const uint mip_col_blocks = (mip_height + m_block_dim - 1) / m_block_dim; - if ((!mip_row_blocks) || (!mip_col_blocks)) - return false; + bool ktx_texture::consistency_check() const + { + if (!check_header()) + { + return false; + } - uint32 image_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; - if ((m_header.m_numberOfArrayElements) || (get_num_faces() == 1)) - image_size *= (get_array_size() * get_num_faces() * get_depth()); + uint block_dim = 0, bytes_per_block = 0; + if ((!m_header.m_glType) || (!m_header.m_glFormat)) + { + if ((m_header.m_glType) || (m_header.m_glFormat)) + { + return false; + } + if (!get_ogl_fmt_desc(m_header.m_glInternalFormat, m_header.m_glType, block_dim, bytes_per_block)) + { + return false; + } + if (block_dim == 1) + { + return false; + } + //if ((get_width() % block_dim) || (get_height() % block_dim)) + // return false; + } + else + { + if (!get_ogl_fmt_desc(m_header.m_glFormat, m_header.m_glType, block_dim, bytes_per_block)) + { + return false; + } + if (block_dim > 1) + { + return false; + } + } + if ((m_block_dim != block_dim) || (m_bytes_per_block != bytes_per_block)) + { + return false; + } - if (!image_size) - return false; + if (m_image_data.size() != get_total_images()) + { + return false; + } - if (m_opposite_endianness) - image_size = utils::swap32(image_size); + for (uint mip_level = 0; mip_level < get_num_mips(); mip_level++) + { + uint mip_width, mip_height, mip_depth; + get_mip_dim(mip_level, mip_width, mip_height, mip_depth); - success = (serializer.write(&image_size, sizeof(image_size), 1) == 1); + const uint mip_row_blocks = (mip_width + m_block_dim - 1) / m_block_dim; + const uint mip_col_blocks = (mip_height + m_block_dim - 1) / m_block_dim; + if ((!mip_row_blocks) || (!mip_col_blocks)) + { + return false; + } - if (m_opposite_endianness) - image_size = utils::swap32(image_size); + for (uint array_element = 0; array_element < get_array_size(); array_element++) + { + for (uint face = 0; face < get_num_faces(); face++) + { + for (uint zslice = 0; zslice < mip_depth; zslice++) + { + const uint8_vec& image_data = get_image_data(get_image_index(mip_level, array_element, face, zslice)); + + uint expected_image_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; + if (image_data.size() != expected_image_size) + { + return false; + } + } + } + } + } - if (!success) - return false; + return true; + } - uint total_mip_size = 0; + const uint8_vec* ktx_texture::find_key(const char* pKey) const + { + const size_t n = strlen(pKey) + 1; + for (uint i = 0; i < m_key_values.size(); i++) + { + const uint8_vec& v = m_key_values[i]; + if ((v.size() >= n) && (!memcmp(&v[0], pKey, n))) + { + return &v; + } + } - if ((!m_header.m_numberOfArrayElements) && (get_num_faces() == 6)) { - // plain non-array cubemap - for (uint face = 0; face < get_num_faces(); face++) { - const uint8_vec& image_data = get_image_data(get_image_index(mip_level, 0, face, 0)); - if ((!image_data.size()) || (image_data.size() != image_size)) - return false; + return nullptr; + } - if (m_opposite_endianness) { - uint8_vec tmp_image_data(image_data); - utils::endian_swap_mem(&tmp_image_data[0], tmp_image_data.size(), m_header.m_glTypeSize); - if (serializer.write(&tmp_image_data[0], tmp_image_data.size(), 1) != 1) + bool ktx_texture::get_key_value_as_string(const char* pKey, dynamic_string& str) const + { + const uint8_vec* p = find_key(pKey); + if (!p) + { + str.clear(); return false; - } else if (serializer.write(&image_data[0], image_data.size(), 1) != 1) - return false; - - uint num_cube_pad_bytes = 3 - ((image_data.size() + 3) % 4); - if ((num_cube_pad_bytes) && (serializer.write(padding, num_cube_pad_bytes, 1) != 1)) - return false; - - total_mip_size += image_size + num_cube_pad_bytes; - } - } else { - // 1D, 2D, 3D (normal or array texture), or array cubemap - for (uint array_element = 0; array_element < get_array_size(); array_element++) { - for (uint face = 0; face < get_num_faces(); face++) { - for (uint zslice = 0; zslice < mip_depth; zslice++) { - const uint8_vec& image_data = get_image_data(get_image_index(mip_level, array_element, face, zslice)); - if (!image_data.size()) - return false; - - if (m_opposite_endianness) { - uint8_vec tmp_image_data(image_data); - utils::endian_swap_mem(&tmp_image_data[0], tmp_image_data.size(), m_header.m_glTypeSize); - if (serializer.write(&tmp_image_data[0], tmp_image_data.size(), 1) != 1) - return false; - } else if (serializer.write(&image_data[0], image_data.size(), 1) != 1) - return false; + } + + const uint ofs = (static_cast(strlen(pKey)) + 1); + const uint8* pValue = p->get_ptr() + ofs; + const uint n = p->size() - ofs; - total_mip_size += image_data.size(); - } + uint i; + for (i = 0; i < n; i++) + { + if (!pValue[i]) + { + break; + } } - } - uint num_mip_pad_bytes = 3 - ((total_mip_size + 3) % 4); - if ((num_mip_pad_bytes) && (serializer.write(padding, num_mip_pad_bytes, 1) != 1)) - return false; - total_mip_size += num_mip_pad_bytes; + str.set_from_buf(pValue, i); + return true; } - CRNLIB_ASSERT((total_mip_size & 3) == 0); - } - - return true; -} - -bool ktx_texture::init_2D(uint width, uint height, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type) { - clear(); - - m_header.m_pixelWidth = width; - m_header.m_pixelHeight = height; - m_header.m_numberOfMipmapLevels = num_mips; - m_header.m_glInternalFormat = ogl_internal_fmt; - m_header.m_glFormat = ogl_fmt; - m_header.m_glType = ogl_type; - m_header.m_numberOfFaces = 1; - - if (!compute_pixel_info()) - return false; - - return true; -} - -bool ktx_texture::init_2D_array(uint width, uint height, uint num_mips, uint array_size, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type) { - clear(); - - m_header.m_pixelWidth = width; - m_header.m_pixelHeight = height; - m_header.m_numberOfMipmapLevels = num_mips; - m_header.m_numberOfArrayElements = array_size; - m_header.m_glInternalFormat = ogl_internal_fmt; - m_header.m_glFormat = ogl_fmt; - m_header.m_glType = ogl_type; - m_header.m_numberOfFaces = 1; - - if (!compute_pixel_info()) - return false; - - return true; -} - -bool ktx_texture::init_3D(uint width, uint height, uint depth, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type) { - clear(); - - m_header.m_pixelWidth = width; - m_header.m_pixelHeight = height; - m_header.m_pixelDepth = depth; - m_header.m_numberOfMipmapLevels = num_mips; - m_header.m_glInternalFormat = ogl_internal_fmt; - m_header.m_glFormat = ogl_fmt; - m_header.m_glType = ogl_type; - m_header.m_numberOfFaces = 1; - - if (!compute_pixel_info()) - return false; - - return true; -} - -bool ktx_texture::init_cubemap(uint dim, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type) { - clear(); - - m_header.m_pixelWidth = dim; - m_header.m_pixelHeight = dim; - m_header.m_numberOfMipmapLevels = num_mips; - m_header.m_glInternalFormat = ogl_internal_fmt; - m_header.m_glFormat = ogl_fmt; - m_header.m_glType = ogl_type; - m_header.m_numberOfFaces = 6; - - if (!compute_pixel_info()) - return false; - - return true; -} - -bool ktx_texture::check_header() const { - if (((get_num_faces() != 1) && (get_num_faces() != 6)) || (!m_header.m_pixelWidth)) - return false; - - if ((!m_header.m_pixelHeight) && (m_header.m_pixelDepth)) - return false; - - if ((get_num_faces() == 6) && ((m_header.m_pixelDepth) || (!m_header.m_pixelHeight))) - return false; - - if (m_header.m_numberOfMipmapLevels) { - const uint max_mipmap_dimension = 1U << (m_header.m_numberOfMipmapLevels - 1U); - if (max_mipmap_dimension > (CRNLIB_MAX(CRNLIB_MAX(m_header.m_pixelWidth, m_header.m_pixelHeight), m_header.m_pixelDepth))) - return false; - } - - return true; -} - -bool ktx_texture::consistency_check() const { - if (!check_header()) - return false; - - uint block_dim = 0, bytes_per_block = 0; - if ((!m_header.m_glType) || (!m_header.m_glFormat)) { - if ((m_header.m_glType) || (m_header.m_glFormat)) - return false; - if (!get_ogl_fmt_desc(m_header.m_glInternalFormat, m_header.m_glType, block_dim, bytes_per_block)) - return false; - if (block_dim == 1) - return false; - //if ((get_width() % block_dim) || (get_height() % block_dim)) - // return false; - } else { - if (!get_ogl_fmt_desc(m_header.m_glFormat, m_header.m_glType, block_dim, bytes_per_block)) - return false; - if (block_dim > 1) - return false; - } - if ((m_block_dim != block_dim) || (m_bytes_per_block != bytes_per_block)) - return false; - - if (m_image_data.size() != get_total_images()) - return false; - - for (uint mip_level = 0; mip_level < get_num_mips(); mip_level++) { - uint mip_width, mip_height, mip_depth; - get_mip_dim(mip_level, mip_width, mip_height, mip_depth); - - const uint mip_row_blocks = (mip_width + m_block_dim - 1) / m_block_dim; - const uint mip_col_blocks = (mip_height + m_block_dim - 1) / m_block_dim; - if ((!mip_row_blocks) || (!mip_col_blocks)) - return false; - - for (uint array_element = 0; array_element < get_array_size(); array_element++) { - for (uint face = 0; face < get_num_faces(); face++) { - for (uint zslice = 0; zslice < mip_depth; zslice++) { - const uint8_vec& image_data = get_image_data(get_image_index(mip_level, array_element, face, zslice)); - - uint expected_image_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; - if (image_data.size() != expected_image_size) - return false; - } - } + + uint ktx_texture::add_key_value(const char* pKey, const void* pVal, uint val_size) + { + const uint idx = m_key_values.size(); + m_key_values.resize(idx + 1); + uint8_vec& v = m_key_values.back(); + v.append(reinterpret_cast(pKey), static_cast(strlen(pKey)) + 1); + v.append(static_cast(pVal), val_size); + return idx; } - } - - return true; -} - -const uint8_vec* ktx_texture::find_key(const char* pKey) const { - const size_t n = strlen(pKey) + 1; - for (uint i = 0; i < m_key_values.size(); i++) { - const uint8_vec& v = m_key_values[i]; - if ((v.size() >= n) && (!memcmp(&v[0], pKey, n))) - return &v; - } - - return nullptr; -} - -bool ktx_texture::get_key_value_as_string(const char* pKey, dynamic_string& str) const { - const uint8_vec* p = find_key(pKey); - if (!p) { - str.clear(); - return false; - } - - const uint ofs = (static_cast(strlen(pKey)) + 1); - const uint8* pValue = p->get_ptr() + ofs; - const uint n = p->size() - ofs; - - uint i; - for (i = 0; i < n; i++) - if (!pValue[i]) - break; - - str.set_from_buf(pValue, i); - return true; -} - -uint ktx_texture::add_key_value(const char* pKey, const void* pVal, uint val_size) { - const uint idx = m_key_values.size(); - m_key_values.resize(idx + 1); - uint8_vec& v = m_key_values.back(); - v.append(reinterpret_cast(pKey), static_cast(strlen(pKey)) + 1); - v.append(static_cast(pVal), val_size); - return idx; -} - -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_ktx_texture.h b/crnlib/crn_ktx_texture.h index 42e556d..30571c8 100644 --- a/crnlib/crn_ktx_texture.h +++ b/crnlib/crn_ktx_texture.h @@ -1,4 +1,26 @@ -// File: crn_ktx_texture.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #ifndef _KTX_TEXTURE_H_ #define _KTX_TEXTURE_H_ #ifdef _MSC_VER @@ -10,282 +32,389 @@ #define KTX_ENDIAN 0x04030201 #define KTX_OPPOSITE_ENDIAN 0x01020304 -namespace crnlib { -extern const uint8 s_ktx_file_id[12]; - -struct ktx_header { - uint8 m_identifier[12]; - uint32 m_endianness; - uint32 m_glType; - uint32 m_glTypeSize; - uint32 m_glFormat; - uint32 m_glInternalFormat; - uint32 m_glBaseInternalFormat; - uint32 m_pixelWidth; - uint32 m_pixelHeight; - uint32 m_pixelDepth; - uint32 m_numberOfArrayElements; - uint32 m_numberOfFaces; - uint32 m_numberOfMipmapLevels; - uint32 m_bytesOfKeyValueData; - - void clear() { - memset(this, 0, sizeof(*this)); - } - - void endian_swap() { - utils::endian_swap_mem32(&m_endianness, (sizeof(*this) - sizeof(m_identifier)) / sizeof(uint32)); - } -}; - -typedef crnlib::vector ktx_key_value_vec; -typedef crnlib::vector ktx_image_data_vec; - -// Compressed pixel data formats: ETC1, DXT1, DXT3, DXT5 -enum { - KTX_ETC1_RGB8_OES = 0x8D64, - KTX_COMPRESSED_RGB8_ETC2 = 0x9274, - KTX_COMPRESSED_RGBA8_ETC2_EAC = 0x9278, - KTX_RGB_S3TC = 0x83A0, - KTX_RGB4_S3TC = 0x83A1, - KTX_COMPRESSED_RGB_S3TC_DXT1_EXT = 0x83F0, - KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT = 0x83F1, - KTX_COMPRESSED_SRGB_S3TC_DXT1_EXT = 0x8C4C, - KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT = 0x8C4D, - KTX_RGBA_S3TC = 0x83A2, - KTX_RGBA4_S3TC = 0x83A3, - KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT = 0x83F2, - KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT = 0x8C4E, - KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT = 0x83F3, - KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT = 0x8C4F, - KTX_RGBA_DXT5_S3TC = 0x83A4, - KTX_RGBA4_DXT5_S3TC = 0x83A5, - KTX_COMPRESSED_RED_RGTC1_EXT = 0x8DBB, - KTX_COMPRESSED_SIGNED_RED_RGTC1_EXT = 0x8DBC, - KTX_COMPRESSED_RED_GREEN_RGTC2_EXT = 0x8DBD, - KTX_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT = 0x8DBE, - KTX_COMPRESSED_LUMINANCE_LATC1_EXT = 0x8C70, - KTX_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT = 0x8C71, - KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT = 0x8C72, - KTX_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT = 0x8C73 -}; - -// Pixel formats (various internal, base, and base internal formats) -enum { - KTX_R8 = 0x8229, - KTX_R8UI = 0x8232, - KTX_RGB8 = 0x8051, - KTX_SRGB8 = 0x8C41, - KTX_SRGB = 0x8C40, - KTX_SRGB_ALPHA = 0x8C42, - KTX_SRGB8_ALPHA8 = 0x8C43, - KTX_RGBA8 = 0x8058, - KTX_STENCIL_INDEX = 0x1901, - KTX_DEPTH_COMPONENT = 0x1902, - KTX_DEPTH_STENCIL = 0x84F9, - KTX_RED = 0x1903, - KTX_GREEN = 0x1904, - KTX_BLUE = 0x1905, - KTX_ALPHA = 0x1906, - KTX_RG = 0x8227, - KTX_RGB = 0x1907, - KTX_RGBA = 0x1908, - KTX_BGR = 0x80E0, - KTX_BGRA = 0x80E1, - KTX_RED_INTEGER = 0x8D94, - KTX_GREEN_INTEGER = 0x8D95, - KTX_BLUE_INTEGER = 0x8D96, - KTX_ALPHA_INTEGER = 0x8D97, - KTX_RGB_INTEGER = 0x8D98, - KTX_RGBA_INTEGER = 0x8D99, - KTX_BGR_INTEGER = 0x8D9A, - KTX_BGRA_INTEGER = 0x8D9B, - KTX_LUMINANCE = 0x1909, - KTX_LUMINANCE_ALPHA = 0x190A, - KTX_RG_INTEGER = 0x8228, - KTX_RG8 = 0x822B, - KTX_ALPHA8 = 0x803C, - KTX_LUMINANCE8 = 0x8040, - KTX_LUMINANCE8_ALPHA8 = 0x8045 -}; - -// Pixel data types -enum { - KTX_UNSIGNED_BYTE = 0x1401, - KTX_BYTE = 0x1400, - KTX_UNSIGNED_SHORT = 0x1403, - KTX_SHORT = 0x1402, - KTX_UNSIGNED_INT = 0x1405, - KTX_INT = 0x1404, - KTX_HALF_FLOAT = 0x140B, - KTX_FLOAT = 0x1406, - KTX_UNSIGNED_BYTE_3_3_2 = 0x8032, - KTX_UNSIGNED_BYTE_2_3_3_REV = 0x8362, - KTX_UNSIGNED_SHORT_5_6_5 = 0x8363, - KTX_UNSIGNED_SHORT_5_6_5_REV = 0x8364, - KTX_UNSIGNED_SHORT_4_4_4_4 = 0x8033, - KTX_UNSIGNED_SHORT_4_4_4_4_REV = 0x8365, - KTX_UNSIGNED_SHORT_5_5_5_1 = 0x8034, - KTX_UNSIGNED_SHORT_1_5_5_5_REV = 0x8366, - KTX_UNSIGNED_INT_8_8_8_8 = 0x8035, - KTX_UNSIGNED_INT_8_8_8_8_REV = 0x8367, - KTX_UNSIGNED_INT_10_10_10_2 = 0x8036, - KTX_UNSIGNED_INT_2_10_10_10_REV = 0x8368, - KTX_UNSIGNED_INT_24_8 = 0x84FA, - KTX_UNSIGNED_INT_10F_11F_11F_REV = 0x8C3B, - KTX_UNSIGNED_INT_5_9_9_9_REV = 0x8C3E, - KTX_FLOAT_32_UNSIGNED_INT_24_8_REV = 0x8DAD -}; - -bool is_packed_pixel_ogl_type(uint32 ogl_type); -uint get_ogl_type_size(uint32 ogl_type); -bool get_ogl_fmt_desc(uint32 ogl_fmt, uint32 ogl_type, uint& block_dim, uint& bytes_per_block); -uint get_ogl_type_size(uint32 ogl_type); -uint32 get_ogl_base_internal_fmt(uint32 ogl_fmt); - -class ktx_texture { - public: - ktx_texture() { - clear(); - } - - ktx_texture(const ktx_texture& other) { - *this = other; - } - - ktx_texture& operator=(const ktx_texture& rhs) { - if (this == &rhs) - return *this; - - clear(); - - m_header = rhs.m_header; - m_key_values = rhs.m_key_values; - m_image_data = rhs.m_image_data; - m_block_dim = rhs.m_block_dim; - m_bytes_per_block = rhs.m_bytes_per_block; - m_opposite_endianness = rhs.m_opposite_endianness; - - return *this; - } - - void clear() { - m_header.clear(); - m_key_values.clear(); - m_image_data.clear(); - - m_block_dim = 0; - m_bytes_per_block = 0; - - m_opposite_endianness = false; - } - - // High level methods - bool read_from_stream(data_stream_serializer& serializer); - bool write_to_stream(data_stream_serializer& serializer, bool no_keyvalue_data = false); - - bool init_2D(uint width, uint height, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type); - bool init_2D_array(uint width, uint height, uint num_mips, uint array_size, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type); - bool init_3D(uint width, uint height, uint depth, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type); - bool init_cubemap(uint dim, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type); - - bool check_header() const; - bool consistency_check() const; - - // General info - - bool is_valid() const { return (m_header.m_pixelWidth > 0) && (m_image_data.size() > 0); } - - uint get_width() const { return m_header.m_pixelWidth; } - uint get_height() const { return CRNLIB_MAX(m_header.m_pixelHeight, 1); } - uint get_depth() const { return CRNLIB_MAX(m_header.m_pixelDepth, 1); } - uint get_num_mips() const { return CRNLIB_MAX(m_header.m_numberOfMipmapLevels, 1); } - uint get_array_size() const { return CRNLIB_MAX(m_header.m_numberOfArrayElements, 1); } - uint get_num_faces() const { return m_header.m_numberOfFaces; } - - uint32 get_ogl_type() const { return m_header.m_glType; } - uint32 get_ogl_fmt() const { return m_header.m_glFormat; } - uint32 get_ogl_base_fmt() const { return m_header.m_glBaseInternalFormat; } - uint32 get_ogl_internal_fmt() const { return m_header.m_glInternalFormat; } - - uint get_total_images() const { return get_num_mips() * (get_depth() * get_num_faces() * get_array_size()); } - - bool is_compressed() const { return m_block_dim > 1; } - bool is_uncompressed() const { return !is_compressed(); } - - bool get_opposite_endianness() const { return m_opposite_endianness; } - void set_opposite_endianness(bool flag) { m_opposite_endianness = flag; } - - uint32 get_block_dim() const { return m_block_dim; } - uint32 get_bytes_per_block() const { return m_bytes_per_block; } - - const ktx_header& get_header() const { return m_header; } - - // Key values - const ktx_key_value_vec& get_key_value_vec() const { return m_key_values; } - ktx_key_value_vec& get_key_value_vec() { return m_key_values; } - - const uint8_vec* find_key(const char* pKey) const; - bool get_key_value_as_string(const char* pKey, dynamic_string& str) const; - - uint add_key_value(const char* pKey, const void* pVal, uint val_size); - uint add_key_value(const char* pKey, const char* pVal) { return add_key_value(pKey, pVal, static_cast(strlen(pVal)) + 1); } - - // Image data - uint get_num_images() const { return m_image_data.size(); } - - const uint8_vec& get_image_data(uint image_index) const { return m_image_data[image_index]; } - uint8_vec& get_image_data(uint image_index) { return m_image_data[image_index]; } - - const uint8_vec& get_image_data(uint mip_index, uint array_index, uint face_index, uint zslice_index) const { return get_image_data(get_image_index(mip_index, array_index, face_index, zslice_index)); } - uint8_vec& get_image_data(uint mip_index, uint array_index, uint face_index, uint zslice_index) { return get_image_data(get_image_index(mip_index, array_index, face_index, zslice_index)); } - - const ktx_image_data_vec& get_image_data_vec() const { return m_image_data; } - ktx_image_data_vec& get_image_data_vec() { return m_image_data; } - - void add_image(uint face_index, uint mip_index, const void* pImage, uint image_size) { - const uint image_index = get_image_index(mip_index, 0, face_index, 0); - if (image_index >= m_image_data.size()) - m_image_data.resize(image_index + 1); - if (image_size) { - uint8_vec& v = m_image_data[image_index]; - v.resize(image_size); - memcpy(&v[0], pImage, image_size); - } - } - - uint get_image_index(uint mip_index, uint array_index, uint face_index, uint zslice_index) const { - CRNLIB_ASSERT((mip_index < get_num_mips()) && (array_index < get_array_size()) && (face_index < get_num_faces()) && (zslice_index < get_depth())); - return zslice_index + (face_index * get_depth()) + (array_index * (get_depth() * get_num_faces())) + (mip_index * (get_depth() * get_num_faces() * get_array_size())); - } - - void get_mip_dim(uint mip_index, uint& mip_width, uint& mip_height) const { - CRNLIB_ASSERT(mip_index < get_num_mips()); - mip_width = CRNLIB_MAX(get_width() >> mip_index, 1); - mip_height = CRNLIB_MAX(get_height() >> mip_index, 1); - } - - void get_mip_dim(uint mip_index, uint& mip_width, uint& mip_height, uint& mip_depth) const { - CRNLIB_ASSERT(mip_index < get_num_mips()); - mip_width = CRNLIB_MAX(get_width() >> mip_index, 1); - mip_height = CRNLIB_MAX(get_height() >> mip_index, 1); - mip_depth = CRNLIB_MAX(get_depth() >> mip_index, 1); - } - - private: - ktx_header m_header; - - ktx_key_value_vec m_key_values; - ktx_image_data_vec m_image_data; - - uint32 m_block_dim; - uint32 m_bytes_per_block; - - bool m_opposite_endianness; - - bool compute_pixel_info(); -}; - -} // namespace crnlib - -#endif // #ifndef _KTX_TEXTURE_H_ +namespace crnlib +{ + extern const uint8 s_ktx_file_id[12]; + + struct ktx_header + { + uint8 m_identifier[12]; + uint32 m_endianness; + uint32 m_glType; + uint32 m_glTypeSize; + uint32 m_glFormat; + uint32 m_glInternalFormat; + uint32 m_glBaseInternalFormat; + uint32 m_pixelWidth; + uint32 m_pixelHeight; + uint32 m_pixelDepth; + uint32 m_numberOfArrayElements; + uint32 m_numberOfFaces; + uint32 m_numberOfMipmapLevels; + uint32 m_bytesOfKeyValueData; + + void clear() + { + memset(this, 0, sizeof(*this)); + } + + void endian_swap() + { + utils::endian_swap_mem32(&m_endianness, (sizeof(*this) - sizeof(m_identifier)) / sizeof(uint32)); + } + }; + + typedef crnlib::vector ktx_key_value_vec; + typedef crnlib::vector ktx_image_data_vec; + + // Compressed pixel data formats: ETC1, DXT1, DXT3, DXT5 + enum + { + KTX_ETC1_RGB8_OES = 0x8D64, + KTX_COMPRESSED_RGB8_ETC2 = 0x9274, + KTX_COMPRESSED_RGBA8_ETC2_EAC = 0x9278, + KTX_RGB_S3TC = 0x83A0, + KTX_RGB4_S3TC = 0x83A1, + KTX_COMPRESSED_RGB_S3TC_DXT1_EXT = 0x83F0, + KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT = 0x83F1, + KTX_COMPRESSED_SRGB_S3TC_DXT1_EXT = 0x8C4C, + KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT = 0x8C4D, + KTX_RGBA_S3TC = 0x83A2, + KTX_RGBA4_S3TC = 0x83A3, + KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT = 0x83F2, + KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT = 0x8C4E, + KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT = 0x83F3, + KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT = 0x8C4F, + KTX_RGBA_DXT5_S3TC = 0x83A4, + KTX_RGBA4_DXT5_S3TC = 0x83A5, + KTX_COMPRESSED_RED_RGTC1_EXT = 0x8DBB, + KTX_COMPRESSED_SIGNED_RED_RGTC1_EXT = 0x8DBC, + KTX_COMPRESSED_RED_GREEN_RGTC2_EXT = 0x8DBD, + KTX_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT = 0x8DBE, + KTX_COMPRESSED_LUMINANCE_LATC1_EXT = 0x8C70, + KTX_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT = 0x8C71, + KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT = 0x8C72, + KTX_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT = 0x8C73 + }; + + // Pixel formats (various internal, base, and base internal formats) + enum + { + KTX_R8 = 0x8229, + KTX_R8UI = 0x8232, + KTX_RGB8 = 0x8051, + KTX_SRGB8 = 0x8C41, + KTX_SRGB = 0x8C40, + KTX_SRGB_ALPHA = 0x8C42, + KTX_SRGB8_ALPHA8 = 0x8C43, + KTX_RGBA8 = 0x8058, + KTX_STENCIL_INDEX = 0x1901, + KTX_DEPTH_COMPONENT = 0x1902, + KTX_DEPTH_STENCIL = 0x84F9, + KTX_RED = 0x1903, + KTX_GREEN = 0x1904, + KTX_BLUE = 0x1905, + KTX_ALPHA = 0x1906, + KTX_RG = 0x8227, + KTX_RGB = 0x1907, + KTX_RGBA = 0x1908, + KTX_BGR = 0x80E0, + KTX_BGRA = 0x80E1, + KTX_RED_INTEGER = 0x8D94, + KTX_GREEN_INTEGER = 0x8D95, + KTX_BLUE_INTEGER = 0x8D96, + KTX_ALPHA_INTEGER = 0x8D97, + KTX_RGB_INTEGER = 0x8D98, + KTX_RGBA_INTEGER = 0x8D99, + KTX_BGR_INTEGER = 0x8D9A, + KTX_BGRA_INTEGER = 0x8D9B, + KTX_LUMINANCE = 0x1909, + KTX_LUMINANCE_ALPHA = 0x190A, + KTX_RG_INTEGER = 0x8228, + KTX_RG8 = 0x822B, + KTX_ALPHA8 = 0x803C, + KTX_LUMINANCE8 = 0x8040, + KTX_LUMINANCE8_ALPHA8 = 0x8045 + }; + + // Pixel data types + enum + { + KTX_UNSIGNED_BYTE = 0x1401, + KTX_BYTE = 0x1400, + KTX_UNSIGNED_SHORT = 0x1403, + KTX_SHORT = 0x1402, + KTX_UNSIGNED_INT = 0x1405, + KTX_INT = 0x1404, + KTX_HALF_FLOAT = 0x140B, + KTX_FLOAT = 0x1406, + KTX_UNSIGNED_BYTE_3_3_2 = 0x8032, + KTX_UNSIGNED_BYTE_2_3_3_REV = 0x8362, + KTX_UNSIGNED_SHORT_5_6_5 = 0x8363, + KTX_UNSIGNED_SHORT_5_6_5_REV = 0x8364, + KTX_UNSIGNED_SHORT_4_4_4_4 = 0x8033, + KTX_UNSIGNED_SHORT_4_4_4_4_REV = 0x8365, + KTX_UNSIGNED_SHORT_5_5_5_1 = 0x8034, + KTX_UNSIGNED_SHORT_1_5_5_5_REV = 0x8366, + KTX_UNSIGNED_INT_8_8_8_8 = 0x8035, + KTX_UNSIGNED_INT_8_8_8_8_REV = 0x8367, + KTX_UNSIGNED_INT_10_10_10_2 = 0x8036, + KTX_UNSIGNED_INT_2_10_10_10_REV = 0x8368, + KTX_UNSIGNED_INT_24_8 = 0x84FA, + KTX_UNSIGNED_INT_10F_11F_11F_REV = 0x8C3B, + KTX_UNSIGNED_INT_5_9_9_9_REV = 0x8C3E, + KTX_FLOAT_32_UNSIGNED_INT_24_8_REV = 0x8DAD + }; + + bool is_packed_pixel_ogl_type(uint32 ogl_type); + uint get_ogl_type_size(uint32 ogl_type); + bool get_ogl_fmt_desc(uint32 ogl_fmt, uint32 ogl_type, uint& block_dim, uint& bytes_per_block); + uint get_ogl_type_size(uint32 ogl_type); + uint32 get_ogl_base_internal_fmt(uint32 ogl_fmt); + + class ktx_texture + { + public: + ktx_texture() + { + clear(); + } + + ktx_texture(const ktx_texture& other) + { + *this = other; + } + + ktx_texture& operator=(const ktx_texture& rhs) + { + if (this == &rhs) + { + return *this; + } + + clear(); + + m_header = rhs.m_header; + m_key_values = rhs.m_key_values; + m_image_data = rhs.m_image_data; + m_block_dim = rhs.m_block_dim; + m_bytes_per_block = rhs.m_bytes_per_block; + m_opposite_endianness = rhs.m_opposite_endianness; + + return *this; + } + + void clear() + { + m_header.clear(); + m_key_values.clear(); + m_image_data.clear(); + + m_block_dim = 0; + m_bytes_per_block = 0; + + m_opposite_endianness = false; + } + + // High level methods + bool read_from_stream(data_stream_serializer& serializer); + bool write_to_stream(data_stream_serializer& serializer, bool no_keyvalue_data = false); + + bool init_2D(uint width, uint height, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type); + bool init_2D_array(uint width, uint height, uint num_mips, uint array_size, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type); + bool init_3D(uint width, uint height, uint depth, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type); + bool init_cubemap(uint dim, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type); + + bool check_header() const; + bool consistency_check() const; + + // General info + + bool is_valid() const + { + return (m_header.m_pixelWidth > 0) && (m_image_data.size() > 0); + } + + uint get_width() const + { + return m_header.m_pixelWidth; + } + uint get_height() const + { + return CRNLIB_MAX(m_header.m_pixelHeight, 1); + } + uint get_depth() const + { + return CRNLIB_MAX(m_header.m_pixelDepth, 1); + } + uint get_num_mips() const + { + return CRNLIB_MAX(m_header.m_numberOfMipmapLevels, 1); + } + uint get_array_size() const + { + return CRNLIB_MAX(m_header.m_numberOfArrayElements, 1); + } + uint get_num_faces() const + { + return m_header.m_numberOfFaces; + } + + uint32 get_ogl_type() const + { + return m_header.m_glType; + } + uint32 get_ogl_fmt() const + { + return m_header.m_glFormat; + } + uint32 get_ogl_base_fmt() const + { + return m_header.m_glBaseInternalFormat; + } + uint32 get_ogl_internal_fmt() const + { + return m_header.m_glInternalFormat; + } + + uint get_total_images() const + { + return get_num_mips() * (get_depth() * get_num_faces() * get_array_size()); + } + + bool is_compressed() const + { + return m_block_dim > 1; + } + bool is_uncompressed() const + { + return !is_compressed(); + } + + bool get_opposite_endianness() const + { + return m_opposite_endianness; + } + void set_opposite_endianness(bool flag) + { + m_opposite_endianness = flag; + } + + uint32 get_block_dim() const + { + return m_block_dim; + } + uint32 get_bytes_per_block() const + { + return m_bytes_per_block; + } + + const ktx_header& get_header() const + { + return m_header; + } + + // Key values + const ktx_key_value_vec& get_key_value_vec() const + { + return m_key_values; + } + ktx_key_value_vec& get_key_value_vec() + { + return m_key_values; + } + + const uint8_vec* find_key(const char* pKey) const; + bool get_key_value_as_string(const char* pKey, dynamic_string& str) const; + + uint add_key_value(const char* pKey, const void* pVal, uint val_size); + uint add_key_value(const char* pKey, const char* pVal) + { + return add_key_value(pKey, pVal, static_cast(strlen(pVal)) + 1); + } + + // Image data + uint get_num_images() const + { + return m_image_data.size(); + } + + const uint8_vec& get_image_data(uint image_index) const + { + return m_image_data[image_index]; + } + uint8_vec& get_image_data(uint image_index) + { + return m_image_data[image_index]; + } + + const uint8_vec& get_image_data(uint mip_index, uint array_index, uint face_index, uint zslice_index) const + { + return get_image_data(get_image_index(mip_index, array_index, face_index, zslice_index)); + } + uint8_vec& get_image_data(uint mip_index, uint array_index, uint face_index, uint zslice_index) + { + return get_image_data(get_image_index(mip_index, array_index, face_index, zslice_index)); + } + + const ktx_image_data_vec& get_image_data_vec() const + { + return m_image_data; + } + ktx_image_data_vec& get_image_data_vec() + { + return m_image_data; + } + + void add_image(uint face_index, uint mip_index, const void* pImage, uint image_size) + { + const uint image_index = get_image_index(mip_index, 0, face_index, 0); + if (image_index >= m_image_data.size()) + { + m_image_data.resize(image_index + 1); + } + if (image_size) + { + uint8_vec& v = m_image_data[image_index]; + v.resize(image_size); + memcpy(&v[0], pImage, image_size); + } + } + + uint get_image_index(uint mip_index, uint array_index, uint face_index, uint zslice_index) const + { + CRNLIB_ASSERT((mip_index < get_num_mips()) && (array_index < get_array_size()) && (face_index < get_num_faces()) && (zslice_index < get_depth())); + return zslice_index + (face_index * get_depth()) + (array_index * (get_depth() * get_num_faces())) + (mip_index * (get_depth() * get_num_faces() * get_array_size())); + } + + void get_mip_dim(uint mip_index, uint& mip_width, uint& mip_height) const + { + CRNLIB_ASSERT(mip_index < get_num_mips()); + mip_width = CRNLIB_MAX(get_width() >> mip_index, 1); + mip_height = CRNLIB_MAX(get_height() >> mip_index, 1); + } + + void get_mip_dim(uint mip_index, uint& mip_width, uint& mip_height, uint& mip_depth) const + { + CRNLIB_ASSERT(mip_index < get_num_mips()); + mip_width = CRNLIB_MAX(get_width() >> mip_index, 1); + mip_height = CRNLIB_MAX(get_height() >> mip_index, 1); + mip_depth = CRNLIB_MAX(get_depth() >> mip_index, 1); + } + + private: + ktx_header m_header; + + ktx_key_value_vec m_key_values; + ktx_image_data_vec m_image_data; + + uint32 m_block_dim; + uint32 m_bytes_per_block; + + bool m_opposite_endianness; + + bool compute_pixel_info(); + }; +} // namespace crnlib + +#endif // #ifndef _KTX_TEXTURE_H_ diff --git a/crnlib/crn_lzma_codec.cpp b/crnlib/crn_lzma_codec.cpp index e075a4b..4481799 100644 --- a/crnlib/crn_lzma_codec.cpp +++ b/crnlib/crn_lzma_codec.cpp @@ -1,5 +1,25 @@ -// File: crn_lzma_codec.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include @@ -11,7 +31,7 @@ namespace crnlib { - lzma_codec::lzma_codec(): + lzma_codec::lzma_codec() : m_pCompress(LzmaCompress), m_pUncompress(LzmaUncompress) { @@ -54,7 +74,7 @@ namespace crnlib status = (*m_pCompress)(pComp_data, &destLen, reinterpret_cast(p), n, pHDR->m_lzma_props, &outPropsSize, -1, /* 0 <= level <= 9, default = 5 */ - 0, /* default = (1 << 24) */ + 0, /* default = (1 << 24) */ -1, /* 0 <= lc <= 8, default = 3 */ -1, /* 0 <= lp <= 4, default = 0 */ -1, /* 0 <= pb <= 4, default = 2 */ @@ -64,7 +84,7 @@ namespace crnlib #else 1 #endif - ); + ); if (status != SZ_ERROR_OUTPUT_EOF) { @@ -160,4 +180,4 @@ namespace crnlib return true; } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_lzma_codec.h b/crnlib/crn_lzma_codec.h index bfba7f0..9692ce9 100644 --- a/crnlib/crn_lzma_codec.h +++ b/crnlib/crn_lzma_codec.h @@ -1,5 +1,26 @@ -// File: crn_lzma_codec.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_packed_uint.h" @@ -26,14 +47,14 @@ namespace crnlib private: typedef int(CRNLIB_STDCALL* LzmaCompressFuncPtr)(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t srcLen, unsigned char* outProps, size_t* outPropsSize, /* *outPropsSize must be = 5 */ - int level, /* 0 <= level <= 9, default = 5 */ - unsigned dictSize, /* default = (1 << 24) */ - int lc, /* 0 <= lc <= 8, default = 3 */ - int lp, /* 0 <= lp <= 4, default = 0 */ - int pb, /* 0 <= pb <= 4, default = 2 */ - int fb, /* 5 <= fb <= 273, default = 32 */ - int numThreads /* 1 or 2, default = 2 */ - ); + int level, /* 0 <= level <= 9, default = 5 */ + unsigned dictSize, /* default = (1 << 24) */ + int lc, /* 0 <= lc <= 8, default = 3 */ + int lp, /* 0 <= lp <= 4, default = 0 */ + int pb, /* 0 <= pb <= 4, default = 2 */ + int fb, /* 5 <= fb <= 273, default = 32 */ + int numThreads /* 1 or 2, default = 2 */ + ); typedef int(CRNLIB_STDCALL* LzmaUncompressFuncPtr)(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t* srcLen, const unsigned char* props, size_t propsSize); @@ -41,13 +62,17 @@ namespace crnlib LzmaCompressFuncPtr m_pCompress; LzmaUncompressFuncPtr m_pUncompress; - enum { cLZMAPropsSize = 5 }; + enum + { + cLZMAPropsSize = 5 + }; #pragma pack(push) #pragma pack(1) struct header { - enum { + enum + { cSig = 'L' | ('0' << 8), cChecksumSkipBytes = 3 }; @@ -64,4 +89,4 @@ namespace crnlib #pragma pack(pop) }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_math.cpp b/crnlib/crn_math.cpp index 36d2603..4250473 100644 --- a/crnlib/crn_math.cpp +++ b/crnlib/crn_math.cpp @@ -1,5 +1,25 @@ -// File: crn_math.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" @@ -7,8 +27,7 @@ namespace crnlib { namespace math { - uint g_bitmasks[32] = - { + uint g_bitmasks[32] = { 1U << 0U, 1U << 1U, 1U << 2U, 1U << 3U, 1U << 4U, 1U << 5U, 1U << 6U, 1U << 7U, 1U << 8U, 1U << 9U, 1U << 10U, 1U << 11U, @@ -52,7 +71,8 @@ namespace crnlib const int tex_height = height; width = 1; - for (;;) { + for (;;) + { if ((width * 2) > tex_width) { break; @@ -83,5 +103,5 @@ namespace crnlib height = math::next_pow2((uint32)height); } } - } // namespace math -} // namespace crnlib + } // namespace math +} // namespace crnlib diff --git a/crnlib/crn_math.h b/crnlib/crn_math.h index 9f3faec..16021da 100644 --- a/crnlib/crn_math.h +++ b/crnlib/crn_math.h @@ -1,5 +1,26 @@ -// File: crn_math.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_export.h" @@ -21,13 +42,13 @@ namespace crnlib extern uint g_bitmasks[32]; - template + template inline bool within_closed_range(T a, T b, T c) { return (a >= b) && (a <= c); } - template + template inline bool within_open_range(T a, T b, T c) { return (a >= b) && (a < c); @@ -36,43 +57,43 @@ namespace crnlib // Yes I know these should probably be pass by ref, not val: // http://www.stepanovpapers.com/notes.pdf // Just don't use them on non-simple (non built-in) types! - template + template inline T minimum(T a, T b) { return (a < b) ? a : b; } - template + template inline T minimum(T a, T b, T c) { return minimum(minimum(a, b), c); } - template + template inline T maximum(T a, T b) { return (a > b) ? a : b; } - template + template inline T maximum(T a, T b, T c) { return maximum(maximum(a, b), c); } - template + template inline T lerp(T a, T b, U c) { return a + (b - a) * c; } - template + template inline T clamp(T value, T low, T high) { return (value < low) ? low : ((value > high) ? high : value); } - template + template inline T saturate(T value) { return (value < 0.0f) ? 0.0f : ((value > 1.0f) ? 1.0f : value); @@ -108,13 +129,13 @@ namespace crnlib return static_cast((f < 0.0f) ? 0.0f : floor(f + .5f)); } - template + template inline int sign(T value) { return (value < 0) ? -1 : ((value > 0) ? 1 : 0); } - template + template inline T square(T value) { return value * value; @@ -129,7 +150,7 @@ namespace crnlib return x && ((x & (x - 1U)) == 0U); } - template + template inline T align_up_value(T x, uint alignment) { CRNLIB_ASSERT(is_power_of_2(alignment)); @@ -138,7 +159,7 @@ namespace crnlib return static_cast(q); } - template + template inline T align_down_value(T x, uint alignment) { CRNLIB_ASSERT(is_power_of_2(alignment)); @@ -147,7 +168,7 @@ namespace crnlib return static_cast(q); } - template + template inline T get_align_up_value_delta(T x, uint alignment) { return align_up_value(x, alignment) - x; @@ -241,12 +262,12 @@ namespace crnlib // http://www-graphics.stanford.edu/~seander/bithacks.html inline uint count_trailing_zero_bits(uint v) { - uint c = 32; // c will be the number of zero bits on the right + uint c = 32; // c will be the number of zero bits on the right static const unsigned int B[] = { 0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF }; - static const unsigned int S[] = { 1, 2, 4, 8, 16 }; // Our Magic Binary Numbers + static const unsigned int S[] = { 1, 2, 4, 8, 16 }; // Our Magic Binary Numbers - for (int i = 4; i >= 0; --i) // unroll for more speed + for (int i = 4; i >= 0; --i) // unroll for more speed { if (v & B[i]) { @@ -255,7 +276,8 @@ namespace crnlib } } - if (v) { + if (v) + { c--; } @@ -331,4 +353,4 @@ namespace crnlib } } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_matrix.h b/crnlib/crn_matrix.h index ae2f51b..782ba3a 100644 --- a/crnlib/crn_matrix.h +++ b/crnlib/crn_matrix.h @@ -1,12 +1,33 @@ -// File: crn_matrix.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_vec.h" namespace crnlib { - template + template Z& matrix_mul_helper(Z& result, const X& lhs, const Y& rhs) { CRNLIB_ASSUME(Z::num_rows == X::num_rows); @@ -28,7 +49,7 @@ namespace crnlib return result; } - template + template Z& matrix_mul_helper_transpose_lhs(Z& result, const X& lhs, const Y& rhs) { CRNLIB_ASSUME(Z::num_rows == X::num_cols); @@ -49,7 +70,7 @@ namespace crnlib return result; } - template + template Z& matrix_mul_helper_transpose_rhs(Z& result, const X& lhs, const Y& rhs) { CRNLIB_ASSUME(Z::num_rows == X::num_rows); @@ -70,12 +91,13 @@ namespace crnlib return result; } - template + template class matrix { public: typedef T scalar_type; - enum { + enum + { num_rows = R, num_cols = C }; @@ -86,11 +108,19 @@ namespace crnlib typedef vec row_vec; typedef vec<(C > 1) ? (C - 1) : 0, T> subrow_vec; - inline matrix() {} + inline matrix() + { + } - inline matrix(eClear) { clear(); } + inline matrix(eClear) + { + clear(); + } - inline matrix(const T* p) { set(p); } + inline matrix(const T* p) + { + set(p); + } inline matrix(const matrix& other) { @@ -544,7 +574,8 @@ namespace crnlib return result; } - static inline subcol_vec transform_point(const subcol_vec& a, const matrix& b) { + static inline subcol_vec transform_point(const subcol_vec& a, const matrix& b) + { subcol_vec result(0); for (int r = 0; r < R; r++) { @@ -664,5 +695,4 @@ namespace crnlib typedef matrix<4, 4, double> matrix44D; typedef matrix<8, 8, float> matrix88F; - -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_mem.cpp b/crnlib/crn_mem.cpp index 0400960..c5867f1 100644 --- a/crnlib/crn_mem.cpp +++ b/crnlib/crn_mem.cpp @@ -1,5 +1,25 @@ -// File: crn_mem.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "crn_console.h" @@ -22,244 +42,307 @@ typedef __darwin_uuid_t uuid_t; #endif #endif -namespace crnlib { +namespace crnlib +{ #if CRNLIB_MEM_STATS #if CRNLIB_64BIT_POINTERS -typedef LONGLONG mem_stat_t; + typedef LONGLONG mem_stat_t; #define CRNLIB_MEM_COMPARE_EXCHANGE InterlockedCompareExchange64 #else -typedef LONG mem_stat_t; + typedef LONG mem_stat_t; #define CRNLIB_MEM_COMPARE_EXCHANGE InterlockedCompareExchange #endif -static volatile mem_stat_t g_total_blocks; -static volatile mem_stat_t g_total_allocated; -static volatile mem_stat_t g_max_allocated; - -static mem_stat_t update_total_allocated(int block_delta, mem_stat_t byte_delta) { - mem_stat_t cur_total_blocks; - for (;;) { - cur_total_blocks = (mem_stat_t)g_total_blocks; - mem_stat_t new_total_blocks = static_cast(cur_total_blocks + block_delta); - CRNLIB_ASSERT(new_total_blocks >= 0); - if (CRNLIB_MEM_COMPARE_EXCHANGE(&g_total_blocks, new_total_blocks, cur_total_blocks) == cur_total_blocks) - break; - } - - mem_stat_t cur_total_allocated, new_total_allocated; - for (;;) { - cur_total_allocated = g_total_allocated; - new_total_allocated = static_cast(cur_total_allocated + byte_delta); - CRNLIB_ASSERT(new_total_allocated >= 0); - if (CRNLIB_MEM_COMPARE_EXCHANGE(&g_total_allocated, new_total_allocated, cur_total_allocated) == cur_total_allocated) - break; - } - for (;;) { - mem_stat_t cur_max_allocated = g_max_allocated; - mem_stat_t new_max_allocated = CRNLIB_MAX(new_total_allocated, cur_max_allocated); - if (CRNLIB_MEM_COMPARE_EXCHANGE(&g_max_allocated, new_max_allocated, cur_max_allocated) == cur_max_allocated) - break; - } - return new_total_allocated; -} -#endif // CRNLIB_MEM_STATS - -static void* crnlib_default_realloc(void* p, size_t size, size_t* pActual_size, bool movable, void*) { - void* p_new; - - if (!p) { - p_new = ::malloc(size); - CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); - - if (!p_new) { - printf("WARNING: ::malloc() of size %u failed!\n", (uint)size); + static volatile mem_stat_t g_total_blocks; + static volatile mem_stat_t g_total_allocated; + static volatile mem_stat_t g_max_allocated; + + static mem_stat_t update_total_allocated(int block_delta, mem_stat_t byte_delta) + { + mem_stat_t cur_total_blocks; + for (;;) + { + cur_total_blocks = (mem_stat_t)g_total_blocks; + mem_stat_t new_total_blocks = static_cast(cur_total_blocks + block_delta); + CRNLIB_ASSERT(new_total_blocks >= 0); + if (CRNLIB_MEM_COMPARE_EXCHANGE(&g_total_blocks, new_total_blocks, cur_total_blocks) == cur_total_blocks) + break; + } + + mem_stat_t cur_total_allocated, new_total_allocated; + for (;;) + { + cur_total_allocated = g_total_allocated; + new_total_allocated = static_cast(cur_total_allocated + byte_delta); + CRNLIB_ASSERT(new_total_allocated >= 0); + if (CRNLIB_MEM_COMPARE_EXCHANGE(&g_total_allocated, new_total_allocated, cur_total_allocated) == cur_total_allocated) + break; + } + for (;;) + { + mem_stat_t cur_max_allocated = g_max_allocated; + mem_stat_t new_max_allocated = CRNLIB_MAX(new_total_allocated, cur_max_allocated); + if (CRNLIB_MEM_COMPARE_EXCHANGE(&g_max_allocated, new_max_allocated, cur_max_allocated) == cur_max_allocated) + break; + } + return new_total_allocated; } - - if (pActual_size) - *pActual_size = p_new ? ::_msize(p_new) : 0; - } else if (!size) { - ::free(p); - p_new = nullptr; - - if (pActual_size) - *pActual_size = 0; - } else { - void* p_final_block = p; +#endif // CRNLIB_MEM_STATS + + static void* crnlib_default_realloc(void* p, size_t size, size_t* pActual_size, bool movable, void*) + { + void* p_new; + + if (!p) + { + p_new = ::malloc(size); + CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); + + if (!p_new) + { + printf("WARNING: ::malloc() of size %u failed!\n", (uint)size); + } + + if (pActual_size) + { + *pActual_size = p_new ? ::_msize(p_new) : 0; + } + } + else if (!size) + { + ::free(p); + p_new = nullptr; + + if (pActual_size) + { + *pActual_size = 0; + } + } + else + { + void* p_final_block = p; #ifdef WIN32 - p_new = ::_expand(p, size); + p_new = ::_expand(p, size); #else - p_new = nullptr; + p_new = nullptr; #endif - if (p_new) { - CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); - p_final_block = p_new; - } else if (movable) { - p_new = ::realloc(p, size); - - if (p_new) { - CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); - p_final_block = p_new; - } else { - printf("WARNING: ::realloc() of size %u failed!\n", (uint)size); - } + if (p_new) + { + CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); + p_final_block = p_new; + } + else if (movable) + { + p_new = ::realloc(p, size); + + if (p_new) + { + CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); + p_final_block = p_new; + } + else + { + printf("WARNING: ::realloc() of size %u failed!\n", (uint)size); + } + } + + if (pActual_size) + { + *pActual_size = ::_msize(p_final_block); + } + } + + return p_new; } - if (pActual_size) - *pActual_size = ::_msize(p_final_block); - } - - return p_new; -} - -static size_t crnlib_default_msize(void* p, void*) { - return p ? ::_msize(p) : 0; -} - -static crn_realloc_func g_pRealloc = crnlib_default_realloc; -static crn_msize_func g_pMSize = crnlib_default_msize; -static void* g_pUser_data; - -void crnlib_mem_error(const char* p_msg) { - crnlib_assert(p_msg, __FILE__, __LINE__); -} -void* crnlib_malloc(size_t size) { - return crnlib_malloc(size, nullptr); -} - -void* crnlib_malloc(size_t size, size_t* pActual_size) { - size = (size + sizeof(uint32) - 1U) & ~(sizeof(uint32) - 1U); - if (!size) - size = sizeof(uint32); - - if (size > CRNLIB_MAX_POSSIBLE_BLOCK_SIZE) { - crnlib_mem_error("crnlib_malloc: size too big"); - return nullptr; - } + static size_t crnlib_default_msize(void* p, void*) + { + return p ? ::_msize(p) : 0; + } - size_t actual_size = size; - uint8* p_new = static_cast((*g_pRealloc)(nullptr, size, &actual_size, true, g_pUser_data)); + static crn_realloc_func g_pRealloc = crnlib_default_realloc; + static crn_msize_func g_pMSize = crnlib_default_msize; + static void* g_pUser_data; - if (pActual_size) - *pActual_size = actual_size; + void crnlib_mem_error(const char* p_msg) + { + crnlib_assert(p_msg, __FILE__, __LINE__); + } + void* crnlib_malloc(size_t size) + { + return crnlib_malloc(size, nullptr); + } - if ((!p_new) || (actual_size < size)) { - crnlib_mem_error("crnlib_malloc: out of memory"); - return nullptr; - } + void* crnlib_malloc(size_t size, size_t* pActual_size) + { + size = (size + sizeof(uint32) - 1U) & ~(sizeof(uint32) - 1U); + if (!size) + { + size = sizeof(uint32); + } + + if (size > CRNLIB_MAX_POSSIBLE_BLOCK_SIZE) + { + crnlib_mem_error("crnlib_malloc: size too big"); + return nullptr; + } + + size_t actual_size = size; + uint8* p_new = static_cast((*g_pRealloc)(nullptr, size, &actual_size, true, g_pUser_data)); + + if (pActual_size) + { + *pActual_size = actual_size; + } + + if ((!p_new) || (actual_size < size)) + { + crnlib_mem_error("crnlib_malloc: out of memory"); + return nullptr; + } - CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); + CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); #if CRNLIB_MEM_STATS - CRNLIB_ASSERT((*g_pMSize)(p_new, g_pUser_data) == actual_size); - update_total_allocated(1, static_cast(actual_size)); + CRNLIB_ASSERT((*g_pMSize)(p_new, g_pUser_data) == actual_size); + update_total_allocated(1, static_cast(actual_size)); #endif - return p_new; -} + return p_new; + } -void* crnlib_realloc(void* p, size_t size, size_t* pActual_size, bool movable) { - if ((ptr_bits_t)p & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) { - crnlib_mem_error("crnlib_realloc: bad ptr"); - return nullptr; - } + void* crnlib_realloc(void* p, size_t size, size_t* pActual_size, bool movable) + { + if ((ptr_bits_t)p & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) + { + crnlib_mem_error("crnlib_realloc: bad ptr"); + return nullptr; + } - if (size > CRNLIB_MAX_POSSIBLE_BLOCK_SIZE) { - crnlib_mem_error("crnlib_malloc: size too big"); - return nullptr; - } + if (size > CRNLIB_MAX_POSSIBLE_BLOCK_SIZE) + { + crnlib_mem_error("crnlib_malloc: size too big"); + return nullptr; + } #if CRNLIB_MEM_STATS - size_t cur_size = p ? (*g_pMSize)(p, g_pUser_data) : 0; - CRNLIB_ASSERT(!p || (cur_size >= sizeof(uint32))); + size_t cur_size = p ? (*g_pMSize)(p, g_pUser_data) : 0; + CRNLIB_ASSERT(!p || (cur_size >= sizeof(uint32))); #endif - if ((size) && (size < sizeof(uint32))) - size = sizeof(uint32); + if ((size) && (size < sizeof(uint32))) + { + size = sizeof(uint32); + } - size_t actual_size = size; - void* p_new = (*g_pRealloc)(p, size, &actual_size, movable, g_pUser_data); + size_t actual_size = size; + void* p_new = (*g_pRealloc)(p, size, &actual_size, movable, g_pUser_data); - if (pActual_size) - *pActual_size = actual_size; + if (pActual_size) + { + *pActual_size = actual_size; + } - CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); + CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); #if CRNLIB_MEM_STATS - CRNLIB_ASSERT(!p_new || ((*g_pMSize)(p_new, g_pUser_data) == actual_size)); - - int num_new_blocks = 0; - if (p) { - if (!p_new) - num_new_blocks = -1; - } else if (p_new) { - num_new_blocks = 1; - } - update_total_allocated(num_new_blocks, static_cast(actual_size) - static_cast(cur_size)); + CRNLIB_ASSERT(!p_new || ((*g_pMSize)(p_new, g_pUser_data) == actual_size)); + + int num_new_blocks = 0; + if (p) + { + if (!p_new) + num_new_blocks = -1; + } + else if (p_new) + { + num_new_blocks = 1; + } + update_total_allocated(num_new_blocks, static_cast(actual_size) - static_cast(cur_size)); #endif - return p_new; -} + return p_new; + } -void* crnlib_calloc(size_t count, size_t size, size_t* pActual_size) { - size_t total = count * size; - void* p = crnlib_malloc(total, pActual_size); - if (p) - memset(p, 0, total); - return p; -} + void* crnlib_calloc(size_t count, size_t size, size_t* pActual_size) + { + size_t total = count * size; + void* p = crnlib_malloc(total, pActual_size); + if (p) + { + memset(p, 0, total); + } + return p; + } -void crnlib_free(void* p) { - if (!p) - return; + void crnlib_free(void* p) + { + if (!p) + { + return; + } - if (reinterpret_cast(p) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) { - crnlib_mem_error("crnlib_free: bad ptr"); - return; - } + if (reinterpret_cast(p) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) + { + crnlib_mem_error("crnlib_free: bad ptr"); + return; + } #if CRNLIB_MEM_STATS - size_t cur_size = (*g_pMSize)(p, g_pUser_data); - CRNLIB_ASSERT(cur_size >= sizeof(uint32)); - update_total_allocated(-1, -static_cast(cur_size)); + size_t cur_size = (*g_pMSize)(p, g_pUser_data); + CRNLIB_ASSERT(cur_size >= sizeof(uint32)); + update_total_allocated(-1, -static_cast(cur_size)); #endif - (*g_pRealloc)(p, 0, nullptr, true, g_pUser_data); -} + (*g_pRealloc)(p, 0, nullptr, true, g_pUser_data); + } -size_t crnlib_msize(void* p) { - if (!p) - return 0; + size_t crnlib_msize(void* p) + { + if (!p) + { + return 0; + } - if (reinterpret_cast(p) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) { - crnlib_mem_error("crnlib_msize: bad ptr"); - return 0; - } + if (reinterpret_cast(p) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) + { + crnlib_mem_error("crnlib_msize: bad ptr"); + return 0; + } - return (*g_pMSize)(p, g_pUser_data); -} + return (*g_pMSize)(p, g_pUser_data); + } -void crnlib_print_mem_stats() { + void crnlib_print_mem_stats() + { #if CRNLIB_MEM_STATS - if (console::is_initialized()) { - console::debug("crnlib_print_mem_stats:"); - console::debug("Current blocks: %u, allocated: " CRNLIB_INT64_FORMAT_SPECIFIER ", max ever allocated: " CRNLIB_INT64_FORMAT_SPECIFIER, g_total_blocks, (int64)g_total_allocated, (int64)g_max_allocated); - } else { - printf("crnlib_print_mem_stats:\n"); - printf("Current blocks: %u, allocated: " CRNLIB_INT64_FORMAT_SPECIFIER ", max ever allocated: " CRNLIB_INT64_FORMAT_SPECIFIER "\n", g_total_blocks, (int64)g_total_allocated, (int64)g_max_allocated); - } + if (console::is_initialized()) + { + console::debug("crnlib_print_mem_stats:"); + console::debug("Current blocks: %u, allocated: " CRNLIB_INT64_FORMAT_SPECIFIER ", max ever allocated: " CRNLIB_INT64_FORMAT_SPECIFIER, g_total_blocks, (int64)g_total_allocated, (int64)g_max_allocated); + } + else + { + printf("crnlib_print_mem_stats:\n"); + printf("Current blocks: %u, allocated: " CRNLIB_INT64_FORMAT_SPECIFIER ", max ever allocated: " CRNLIB_INT64_FORMAT_SPECIFIER "\n", g_total_blocks, (int64)g_total_allocated, (int64)g_max_allocated); + } #endif -} + } + +} // namespace crnlib -} // namespace crnlib - -void crn_set_memory_callbacks(crn_realloc_func pRealloc, crn_msize_func pMSize, void* pUser_data) { - if ((!pRealloc) || (!pMSize)) { - crnlib::g_pRealloc = crnlib::crnlib_default_realloc; - crnlib::g_pMSize = crnlib::crnlib_default_msize; - crnlib::g_pUser_data = nullptr; - } else { - crnlib::g_pRealloc = pRealloc; - crnlib::g_pMSize = pMSize; - crnlib::g_pUser_data = pUser_data; - } +void crn_set_memory_callbacks(crn_realloc_func pRealloc, crn_msize_func pMSize, void* pUser_data) +{ + if ((!pRealloc) || (!pMSize)) + { + crnlib::g_pRealloc = crnlib::crnlib_default_realloc; + crnlib::g_pMSize = crnlib::crnlib_default_msize; + crnlib::g_pUser_data = nullptr; + } + else + { + crnlib::g_pRealloc = pRealloc; + crnlib::g_pMSize = pMSize; + crnlib::g_pUser_data = pUser_data; + } } diff --git a/crnlib/crn_mem.h b/crnlib/crn_mem.h index 396e5cb..d6d086a 100644 --- a/crnlib/crn_mem.h +++ b/crnlib/crn_mem.h @@ -1,5 +1,26 @@ -// File: crn_mem.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_export.h" @@ -27,7 +48,7 @@ namespace crnlib // omfg - there must be a better way - template + template inline T* crnlib_new() { T* p = static_cast(crnlib_malloc(sizeof(T))); @@ -38,98 +59,98 @@ namespace crnlib return helpers::construct(p); } - template + template inline T* crnlib_new(const A& init0) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0); } - template + template inline T* crnlib_new(A& init0) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0); } - template + template inline T* crnlib_new(const A& init0, const B& init1) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1); } - template + template inline T* crnlib_new(const A& init0, const B& init1, const C& init2) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2); } - template + template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3); } - template + template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3, init4); } - template + template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5); } - template + template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6); } - template + template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7); } - template + template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8); } - template + template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8, const J& init9) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8, init9); } - template + template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8, const J& init9, const K& init10) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8, init9, init10); } - template + template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8, const J& init9, const K& init10, const L& init11) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8, init9, init10, init11); } - template + template inline T* crnlib_new_array(uint32 num) { if (!num) @@ -157,7 +178,7 @@ namespace crnlib return p; } - template + template inline void crnlib_delete(T* p) { if (p) @@ -170,7 +191,7 @@ namespace crnlib } } - template + template inline void crnlib_delete_array(T* p) { if (p) @@ -190,32 +211,32 @@ namespace crnlib } } -} // namespace crnlib - -#define CRNLIB_DEFINE_NEW_DELETE \ - void* operator new(size_t size) \ - { \ - void* p = crnlib::crnlib_malloc(size); \ - if (!p) \ - { \ - crnlib_fail("new: Out of memory!", __FILE__, __LINE__); \ - } \ - return p; \ - } \ - void* operator new[](size_t size) \ - { \ - void* p = crnlib::crnlib_malloc(size); \ - if (!p) \ - { \ +} // namespace crnlib + +#define CRNLIB_DEFINE_NEW_DELETE \ + void* operator new(size_t size) \ + { \ + void* p = crnlib::crnlib_malloc(size); \ + if (!p) \ + { \ + crnlib_fail("new: Out of memory!", __FILE__, __LINE__); \ + } \ + return p; \ + } \ + void* operator new[](size_t size) \ + { \ + void* p = crnlib::crnlib_malloc(size); \ + if (!p) \ + { \ crnlib_fail("new[]: Out of memory!", __FILE__, __LINE__); \ - } \ - return p; \ - } \ - void operator delete(void* p_block) \ - { \ - crnlib::crnlib_free(p_block); \ - } \ - void operator delete[](void* p_block) \ - { \ - crnlib::crnlib_free(p_block); \ + } \ + return p; \ + } \ + void operator delete(void* p_block) \ + { \ + crnlib::crnlib_free(p_block); \ + } \ + void operator delete[](void* p_block) \ + { \ + crnlib::crnlib_free(p_block); \ } diff --git a/crnlib/crn_mipmapped_texture.cpp b/crnlib/crn_mipmapped_texture.cpp index b7f8d72..f89ecba 100644 --- a/crnlib/crn_mipmapped_texture.cpp +++ b/crnlib/crn_mipmapped_texture.cpp @@ -1,5 +1,26 @@ -// File: crn_dds_texture.cpp - Actually supports both .DDS and .KTX. Probably will rename this eventually. -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_mipmapped_texture.h" #include "crn_cfile_stream.h" diff --git a/crnlib/crn_mipmapped_texture.h b/crnlib/crn_mipmapped_texture.h index aeb08c5..2a3cdd1 100644 --- a/crnlib/crn_mipmapped_texture.h +++ b/crnlib/crn_mipmapped_texture.h @@ -1,6 +1,28 @@ -// File: crn_mipmapped_texture.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once + #include "crn_dxt_image.h" #include "../inc/dds_defs.h" #include "crn_pixel_format.h" diff --git a/crnlib/crn_packed_uint.h b/crnlib/crn_packed_uint.h index 8a055bf..7a7a4ad 100644 --- a/crnlib/crn_packed_uint.h +++ b/crnlib/crn_packed_uint.h @@ -1,11 +1,31 @@ -// File: crn_packed_uint -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once namespace crnlib { - template + template struct packed_uint { inline packed_uint() @@ -77,7 +97,7 @@ namespace crnlib unsigned char m_buf[N]; }; - template + template class packed_value { public: @@ -113,4 +133,4 @@ namespace crnlib private: uint8 m_bytes[sizeof(T)]; }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_pixel_format.cpp b/crnlib/crn_pixel_format.cpp index e719b67..35a18c1 100644 --- a/crnlib/crn_pixel_format.cpp +++ b/crnlib/crn_pixel_format.cpp @@ -1,5 +1,26 @@ -// File: crn_pixel_format.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_pixel_format.h" #include "crn_image.h" diff --git a/crnlib/crn_pixel_format.h b/crnlib/crn_pixel_format.h index 35ff12e..1e4f547 100644 --- a/crnlib/crn_pixel_format.h +++ b/crnlib/crn_pixel_format.h @@ -1,6 +1,28 @@ -// File: crn_pixel_format.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once + #include "crn_dxt.h" #include "crnlib.h" #include "dds_defs.h" diff --git a/crnlib/crn_platform.cpp b/crnlib/crn_platform.cpp index a804dd1..16f5360 100644 --- a/crnlib/crn_platform.cpp +++ b/crnlib/crn_platform.cpp @@ -1,5 +1,25 @@ -// File: crn_platform.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" diff --git a/crnlib/crn_platform.h b/crnlib/crn_platform.h index 6cda08f..119d406 100644 --- a/crnlib/crn_platform.h +++ b/crnlib/crn_platform.h @@ -1,5 +1,26 @@ -// File: crn_platform.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_core.h" diff --git a/crnlib/crn_prefix_coding.cpp b/crnlib/crn_prefix_coding.cpp index c2fc184..ae418b9 100644 --- a/crnlib/crn_prefix_coding.cpp +++ b/crnlib/crn_prefix_coding.cpp @@ -1,5 +1,25 @@ -// File: crn_prefix_coding.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "crn_prefix_coding.h" diff --git a/crnlib/crn_prefix_coding.h b/crnlib/crn_prefix_coding.h index 7f97cd9..db181e2 100644 --- a/crnlib/crn_prefix_coding.h +++ b/crnlib/crn_prefix_coding.h @@ -1,5 +1,25 @@ -// File: crn_prefix_coding.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once diff --git a/crnlib/crn_qdxt1.cpp b/crnlib/crn_qdxt1.cpp index 115cfb1..ae9e3fa 100644 --- a/crnlib/crn_qdxt1.cpp +++ b/crnlib/crn_qdxt1.cpp @@ -1,5 +1,26 @@ -// File: crn_qdxt.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_qdxt1.h" #include "crn_dxt1.h" @@ -9,146 +30,170 @@ #define GENERATE_DEBUG_IMAGES 0 -namespace crnlib { -qdxt1::qdxt1(task_pool& task_pool) - : m_pTask_pool(&task_pool), - m_main_thread_id(0), - m_canceled(false), - m_progress_start(0), - m_progress_range(100), - m_num_blocks(0), - m_pBlocks(nullptr), - m_pDst_elements(nullptr), - m_elements_per_block(0), - m_max_selector_clusters(0), - m_prev_percentage_complete(-1), - m_selector_clusterizer(task_pool) { -} - -qdxt1::~qdxt1() { -} - -void qdxt1::clear() { - m_main_thread_id = 0; - m_num_blocks = 0; - m_pBlocks = 0; - m_pDst_elements = nullptr; - m_elements_per_block = 0; - m_params.clear(); - m_endpoint_clusterizer.clear(); - m_endpoint_cluster_indices.clear(); - m_max_selector_clusters = 0; - m_canceled = false; - m_progress_start = 0; - m_progress_range = 100; - m_selector_clusterizer.clear(); - - for (uint i = 0; i <= qdxt1_params::cMaxQuality; i++) - m_cached_selector_cluster_indices[i].clear(); - - m_cluster_hash.clear(); - - m_prev_percentage_complete = -1; -} - -bool qdxt1::init(uint n, const dxt_pixel_block* pBlocks, const qdxt1_params& params) { - clear(); - - CRNLIB_ASSERT(n && pBlocks); - - m_main_thread_id = crn_get_current_thread_id(); - - m_num_blocks = n; - m_pBlocks = pBlocks; - m_params = params; - - m_endpoint_clusterizer.reserve_training_vecs(m_num_blocks); - - m_progress_start = 0; - m_progress_range = 75; - - const bool debugging = false; - image_u8 debug_img; - - if ((m_params.m_hierarchical) && (m_params.m_num_mips)) { - vec6F_clusterizer::training_vec_array& training_vecs = m_endpoint_clusterizer.get_training_vecs(); - training_vecs.resize(m_num_blocks); - - uint encoding_hist[cNumChunkEncodings]; - utils::zero_object(encoding_hist); - - uint total_processed_blocks = 0; - uint next_progress_threshold = 512; - - for (uint level = 0; level < m_params.m_num_mips; level++) { - const qdxt1_params::mip_desc& level_desc = m_params.m_mip_desc[level]; +namespace crnlib +{ + qdxt1::qdxt1(task_pool& task_pool) : + m_pTask_pool(&task_pool), + m_main_thread_id(0), + m_canceled(false), + m_progress_start(0), + m_progress_range(100), + m_num_blocks(0), + m_pBlocks(nullptr), + m_pDst_elements(nullptr), + m_elements_per_block(0), + m_max_selector_clusters(0), + m_prev_percentage_complete(-1), + m_selector_clusterizer(task_pool) + { + } - const uint num_chunks_x = (level_desc.m_block_width + cChunkBlockWidth - 1) / cChunkBlockWidth; - const uint num_chunks_y = (level_desc.m_block_height + cChunkBlockHeight - 1) / cChunkBlockHeight; + qdxt1::~qdxt1() + { + } - const uint level_width = level_desc.m_block_width * 4; - const uint level_height = level_desc.m_block_height * 4; + void qdxt1::clear() + { + m_main_thread_id = 0; + m_num_blocks = 0; + m_pBlocks = 0; + m_pDst_elements = nullptr; + m_elements_per_block = 0; + m_params.clear(); + m_endpoint_clusterizer.clear(); + m_endpoint_cluster_indices.clear(); + m_max_selector_clusters = 0; + m_canceled = false; + m_progress_start = 0; + m_progress_range = 100; + m_selector_clusterizer.clear(); + + for (uint i = 0; i <= qdxt1_params::cMaxQuality; i++) + { + m_cached_selector_cluster_indices[i].clear(); + } - if (debugging) - debug_img.resize(num_chunks_x * cChunkPixelWidth, num_chunks_y * cChunkPixelHeight); + m_cluster_hash.clear(); - float adaptive_tile_color_psnr_derating = 1.5f; // was 2.4f - if ((level) && (adaptive_tile_color_psnr_derating > .25f)) { - adaptive_tile_color_psnr_derating = math::maximum(.25f, adaptive_tile_color_psnr_derating / powf(3.1f, static_cast(level))); // was 3.0f - } - for (uint chunk_y = 0; chunk_y < num_chunks_y; chunk_y++) { - for (uint chunk_x = 0; chunk_x < num_chunks_x; chunk_x++) { - color_quad_u8 chunk_pixels[cChunkPixelWidth * cChunkPixelHeight]; + m_prev_percentage_complete = -1; + } - for (uint y = 0; y < cChunkPixelHeight; y++) { - const uint pix_y = math::minimum(chunk_y * cChunkPixelHeight + y, level_height - 1); + bool qdxt1::init(uint n, const dxt_pixel_block* pBlocks, const qdxt1_params& params) + { + clear(); - const uint outer_block_index = level_desc.m_first_block + ((pix_y >> 2) * level_desc.m_block_width); + CRNLIB_ASSERT(n && pBlocks); - for (uint x = 0; x < cChunkPixelWidth; x++) { - const uint pix_x = math::minimum(chunk_x * cChunkPixelWidth + x, level_width - 1); + m_main_thread_id = crn_get_current_thread_id(); - const uint block_index = outer_block_index + (pix_x >> 2); + m_num_blocks = n; + m_pBlocks = pBlocks; + m_params = params; - const dxt_pixel_block& block = m_pBlocks[block_index]; + m_endpoint_clusterizer.reserve_training_vecs(m_num_blocks); - const color_quad_u8& p = block.m_pixels[pix_y & 3][pix_x & 3]; + m_progress_start = 0; + m_progress_range = 75; - chunk_pixels[x + y * 8] = p; - } - } + const bool debugging = false; + image_u8 debug_img; - struct layout_results { - uint m_low_color; - uint m_high_color; - uint8 m_selectors[cChunkPixelWidth * cChunkPixelHeight]; - uint64 m_error; - //float m_penalty; - }; - layout_results layouts[cNumChunkTileLayouts]; + if ((m_params.m_hierarchical) && (m_params.m_num_mips)) + { + vec6F_clusterizer::training_vec_array& training_vecs = m_endpoint_clusterizer.get_training_vecs(); + training_vecs.resize(m_num_blocks); - for (uint l = 0; l < cNumChunkTileLayouts; l++) { - const uint width = g_chunk_tile_layouts[l].m_width; - const uint height = g_chunk_tile_layouts[l].m_height; - const uint x_ofs = g_chunk_tile_layouts[l].m_x_ofs; - const uint y_ofs = g_chunk_tile_layouts[l].m_y_ofs; + uint encoding_hist[cNumChunkEncodings]; + utils::zero_object(encoding_hist); - color_quad_u8 layout_pixels[cChunkPixelWidth * cChunkPixelHeight]; - for (uint y = 0; y < height; y++) - for (uint x = 0; x < width; x++) - layout_pixels[x + y * width] = chunk_pixels[(x_ofs + x) + (y_ofs + y) * cChunkPixelWidth]; + uint total_processed_blocks = 0; + uint next_progress_threshold = 512; - const uint n = width * height; - dxt_fast::compress_color_block(n, layout_pixels, layouts[l].m_low_color, layouts[l].m_high_color, layouts[l].m_selectors); + for (uint level = 0; level < m_params.m_num_mips; level++) + { + const qdxt1_params::mip_desc& level_desc = m_params.m_mip_desc[level]; - color_quad_u8 c[4]; - dxt1_block::get_block_colors(c, static_cast(layouts[l].m_low_color), static_cast(layouts[l].m_high_color)); + const uint num_chunks_x = (level_desc.m_block_width + cChunkBlockWidth - 1) / cChunkBlockWidth; + const uint num_chunks_y = (level_desc.m_block_height + cChunkBlockHeight - 1) / cChunkBlockHeight; - uint64 error = 0; - for (uint i = 0; i < n; i++) - error += color::elucidian_distance(layout_pixels[i], c[layouts[l].m_selectors[i]], false); + const uint level_width = level_desc.m_block_width * 4; + const uint level_height = level_desc.m_block_height * 4; - layouts[l].m_error = error; + if (debugging) + { + debug_img.resize(num_chunks_x * cChunkPixelWidth, num_chunks_y * cChunkPixelHeight); + } + + float adaptive_tile_color_psnr_derating = 1.5f; // was 2.4f + if ((level) && (adaptive_tile_color_psnr_derating > .25f)) + { + adaptive_tile_color_psnr_derating = math::maximum(.25f, adaptive_tile_color_psnr_derating / powf(3.1f, static_cast(level))); // was 3.0f + } + for (uint chunk_y = 0; chunk_y < num_chunks_y; chunk_y++) + { + for (uint chunk_x = 0; chunk_x < num_chunks_x; chunk_x++) + { + color_quad_u8 chunk_pixels[cChunkPixelWidth * cChunkPixelHeight]; + + for (uint y = 0; y < cChunkPixelHeight; y++) + { + const uint pix_y = math::minimum(chunk_y * cChunkPixelHeight + y, level_height - 1); + + const uint outer_block_index = level_desc.m_first_block + ((pix_y >> 2) * level_desc.m_block_width); + + for (uint x = 0; x < cChunkPixelWidth; x++) + { + const uint pix_x = math::minimum(chunk_x * cChunkPixelWidth + x, level_width - 1); + + const uint block_index = outer_block_index + (pix_x >> 2); + + const dxt_pixel_block& block = m_pBlocks[block_index]; + + const color_quad_u8& p = block.m_pixels[pix_y & 3][pix_x & 3]; + + chunk_pixels[x + y * 8] = p; + } + } + + struct layout_results + { + uint m_low_color; + uint m_high_color; + uint8 m_selectors[cChunkPixelWidth * cChunkPixelHeight]; + uint64 m_error; + //float m_penalty; + }; + layout_results layouts[cNumChunkTileLayouts]; + + for (uint l = 0; l < cNumChunkTileLayouts; l++) + { + const uint width = g_chunk_tile_layouts[l].m_width; + const uint height = g_chunk_tile_layouts[l].m_height; + const uint x_ofs = g_chunk_tile_layouts[l].m_x_ofs; + const uint y_ofs = g_chunk_tile_layouts[l].m_y_ofs; + + color_quad_u8 layout_pixels[cChunkPixelWidth * cChunkPixelHeight]; + for (uint y = 0; y < height; y++) + { + for (uint x = 0; x < width; x++) + { + layout_pixels[x + y * width] = chunk_pixels[(x_ofs + x) + (y_ofs + y) * cChunkPixelWidth]; + } + } + + const uint n = width * height; + dxt_fast::compress_color_block(n, layout_pixels, layouts[l].m_low_color, layouts[l].m_high_color, layouts[l].m_selectors); + + color_quad_u8 c[4]; + dxt1_block::get_block_colors(c, static_cast(layouts[l].m_low_color), static_cast(layouts[l].m_high_color)); + + uint64 error = 0; + for (uint i = 0; i < n; i++) + { + error += color::elucidian_distance(layout_pixels[i], c[layouts[l].m_selectors[i]], false); + } + + layouts[l].m_error = error; #if 0 if ((width > 4) || (height > 4)) @@ -166,132 +211,154 @@ bool qdxt1::init(uint n, const dxt_pixel_block* pBlocks, const qdxt1_params& par layouts[l].m_penalty = 0.0f; } #endif - } - - double best_peak_snr = -1.0f; - uint best_encoding = 0; - - for (uint e = 0; e < cNumChunkEncodings; e++) { - const chunk_encoding_desc& encoding_desc = g_chunk_encodings[e]; - - double total_error = 0; - - for (uint t = 0; t < encoding_desc.m_num_tiles; t++) - total_error += (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_error; - - //double mean_squared = total_error * (1.0f / (16.0f * 3.0f)); - double mean_squared = total_error * (1.0f / (64.0f * 3.0f)); - double root_mean_squared = sqrt(mean_squared); - - double peak_snr = 999999.0f; - if (mean_squared) - peak_snr = math::clamp(log10(255.0f / root_mean_squared) * 20.0f, 0.0f, 500.0f); - - //if (level) - // adaptive_tile_color_psnr_derating = math::lerp(adaptive_tile_color_psnr_derating * .5f, .3f, math::maximum((level - 1) / float(m_params.m_num_mips - 2), 1.0f)); + } - float color_derating = math::lerp(0.0f, adaptive_tile_color_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f); - peak_snr = peak_snr - color_derating; + double best_peak_snr = -1.0f; + uint best_encoding = 0; - //for (uint t = 0; t < encoding_desc.m_num_tiles; t++) - // peak_snr -= (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_penalty; + for (uint e = 0; e < cNumChunkEncodings; e++) + { + const chunk_encoding_desc& encoding_desc = g_chunk_encodings[e]; + + double total_error = 0; + + for (uint t = 0; t < encoding_desc.m_num_tiles; t++) + { + total_error += (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_error; + } + + //double mean_squared = total_error * (1.0f / (16.0f * 3.0f)); + double mean_squared = total_error * (1.0f / (64.0f * 3.0f)); + double root_mean_squared = sqrt(mean_squared); - if (peak_snr > best_peak_snr) { - best_peak_snr = peak_snr; - best_encoding = e; - } - } + double peak_snr = 999999.0f; + if (mean_squared) + { + peak_snr = math::clamp(log10(255.0f / root_mean_squared) * 20.0f, 0.0f, 500.0f); + } - encoding_hist[best_encoding]++; + //if (level) + // adaptive_tile_color_psnr_derating = math::lerp(adaptive_tile_color_psnr_derating * .5f, .3f, math::maximum((level - 1) / float(m_params.m_num_mips - 2), 1.0f)); - const chunk_encoding_desc& encoding_desc = g_chunk_encodings[best_encoding]; + float color_derating = math::lerp(0.0f, adaptive_tile_color_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f); + peak_snr = peak_snr - color_derating; + + //for (uint t = 0; t < encoding_desc.m_num_tiles; t++) + // peak_snr -= (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_penalty; - for (uint t = 0; t < encoding_desc.m_num_tiles; t++) { - const chunk_tile_desc& tile_desc = encoding_desc.m_tiles[t]; + if (peak_snr > best_peak_snr) + { + best_peak_snr = peak_snr; + best_encoding = e; + } + } - uint layout_index = tile_desc.m_layout_index; - const layout_results& layout = layouts[layout_index]; - color_quad_u8 c[4]; - if (debugging) - dxt1_block::get_block_colors(c, static_cast(layout.m_low_color), static_cast(layout.m_high_color)); + encoding_hist[best_encoding]++; - color_quad_u8 tile_pixels[cChunkPixelWidth * cChunkPixelHeight]; + const chunk_encoding_desc& encoding_desc = g_chunk_encodings[best_encoding]; - for (uint y = 0; y < tile_desc.m_height; y++) { - const uint pix_y = y + tile_desc.m_y_ofs; + for (uint t = 0; t < encoding_desc.m_num_tiles; t++) + { + const chunk_tile_desc& tile_desc = encoding_desc.m_tiles[t]; - for (uint x = 0; x < tile_desc.m_width; x++) { - const uint pix_x = x + tile_desc.m_x_ofs; + uint layout_index = tile_desc.m_layout_index; + const layout_results& layout = layouts[layout_index]; + color_quad_u8 c[4]; + if (debugging) + { + dxt1_block::get_block_colors(c, static_cast(layout.m_low_color), static_cast(layout.m_high_color)); + } - tile_pixels[x + y * tile_desc.m_width] = chunk_pixels[pix_x + pix_y * cChunkPixelWidth]; + color_quad_u8 tile_pixels[cChunkPixelWidth * cChunkPixelHeight]; - if (debugging) - debug_img(chunk_x * 8 + pix_x, chunk_y * 8 + pix_y) = c[layout.m_selectors[x + y * tile_desc.m_width]]; - } - } + for (uint y = 0; y < tile_desc.m_height; y++) + { + const uint pix_y = y + tile_desc.m_y_ofs; - color_quad_u8 l, h; - dxt_fast::find_representative_colors(tile_desc.m_width * tile_desc.m_height, tile_pixels, l, h); + for (uint x = 0; x < tile_desc.m_width; x++) + { + const uint pix_x = x + tile_desc.m_x_ofs; - //const uint dist = color::color_distance(m_params.m_perceptual, l, h, false); - const uint dist = color::elucidian_distance(l, h, false); + tile_pixels[x + y * tile_desc.m_width] = chunk_pixels[pix_x + pix_y * cChunkPixelWidth]; - const uint cColorDistToWeight = 5000; - const uint cMaxWeight = 8; - uint weight = math::clamp(dist / cColorDistToWeight, 1, cMaxWeight); + if (debugging) + { + debug_img(chunk_x * 8 + pix_x, chunk_y * 8 + pix_y) = c[layout.m_selectors[x + y * tile_desc.m_width]]; + } + } + } - vec6F ev; + color_quad_u8 l, h; + dxt_fast::find_representative_colors(tile_desc.m_width * tile_desc.m_height, tile_pixels, l, h); - ev[0] = l[0]; - ev[1] = l[1]; - ev[2] = l[2]; - ev[3] = h[0]; - ev[4] = h[1]; - ev[5] = h[2]; + //const uint dist = color::color_distance(m_params.m_perceptual, l, h, false); + const uint dist = color::elucidian_distance(l, h, false); - for (uint y = 0; y < (tile_desc.m_height >> 2); y++) { - uint block_y = chunk_y * cChunkBlockHeight + y + (tile_desc.m_y_ofs >> 2); - if (block_y >= level_desc.m_block_height) - continue; + const uint cColorDistToWeight = 5000; + const uint cMaxWeight = 8; + uint weight = math::clamp(dist / cColorDistToWeight, 1, cMaxWeight); - for (uint x = 0; x < (tile_desc.m_width >> 2); x++) { - uint block_x = chunk_x * cChunkBlockWidth + x + (tile_desc.m_x_ofs >> 2); - if (block_x >= level_desc.m_block_width) - break; + vec6F ev; - uint block_index = level_desc.m_first_block + block_x + block_y * level_desc.m_block_width; + ev[0] = l[0]; + ev[1] = l[1]; + ev[2] = l[2]; + ev[3] = h[0]; + ev[4] = h[1]; + ev[5] = h[2]; - training_vecs[block_index].first = ev; - training_vecs[block_index].second = weight; + for (uint y = 0; y < (tile_desc.m_height >> 2); y++) + { + uint block_y = chunk_y * cChunkBlockHeight + y + (tile_desc.m_y_ofs >> 2); + if (block_y >= level_desc.m_block_height) + { + continue; + } - total_processed_blocks++; + for (uint x = 0; x < (tile_desc.m_width >> 2); x++) + { + uint block_x = chunk_x * cChunkBlockWidth + x + (tile_desc.m_x_ofs >> 2); + if (block_x >= level_desc.m_block_width) + { + break; + } + + uint block_index = level_desc.m_first_block + block_x + block_y * level_desc.m_block_width; + + training_vecs[block_index].first = ev; + training_vecs[block_index].second = weight; + + total_processed_blocks++; + + //if (debugging) + //{ + // debug_img(block_x, block_y) = l; + // debug_img(block_x + level_desc.m_block_width, block_y) = h; + //} - //if (debugging) - //{ - // debug_img(block_x, block_y) = l; - // debug_img(block_x + level_desc.m_block_width, block_y) = h; - //} + } // x + } // y + } //t - } // x - } // y - } //t - - if (total_processed_blocks >= next_progress_threshold) { - next_progress_threshold += 512; - - if (!update_progress(total_processed_blocks, m_num_blocks - 1)) - return false; - } - - } // chunk_x - } // chunk_y + if (total_processed_blocks >= next_progress_threshold) + { + next_progress_threshold += 512; + + if (!update_progress(total_processed_blocks, m_num_blocks - 1)) + { + return false; + } + } + + } // chunk_x + } // chunk_y #if GENERATE_DEBUG_IMAGES - if (debugging) - image_utils::write_to_file(dynamic_string(cVarArg, "debug_%u.tga", level).get_ptr(), debug_img, image_utils::cWriteFlagIgnoreAlpha); + if (debugging) + image_utils::write_to_file(dynamic_string(cVarArg, "debug_%u.tga", level).get_ptr(), debug_img, image_utils::cWriteFlagIgnoreAlpha); #endif - } // level + } // level #if 0 trace("chunk encoding hist: "); @@ -299,494 +366,598 @@ bool qdxt1::init(uint n, const dxt_pixel_block* pBlocks, const qdxt1_params& par trace("%u ", encoding_hist[i]); trace("\n"); #endif - } else { - for (uint block_index = 0; block_index < m_num_blocks; block_index++) { - if ((block_index & 511) == 0) { - if (!update_progress(block_index, m_num_blocks - 1)) - return false; - } + } + else + { + for (uint block_index = 0; block_index < m_num_blocks; block_index++) + { + if ((block_index & 511) == 0) + { + if (!update_progress(block_index, m_num_blocks - 1)) + { + return false; + } + } + + color_quad_u8 l, h; + dxt_fast::find_representative_colors(cDXTBlockSize * cDXTBlockSize, &m_pBlocks[block_index].m_pixels[0][0], l, h); + + //const uint dist = color::color_distance(m_params.m_perceptual, l, h, false); + const uint dist = color::elucidian_distance(l, h, false); + + const uint cColorDistToWeight = 5000; + const uint cMaxWeight = 8; + uint weight = math::clamp(dist / cColorDistToWeight, 1, cMaxWeight); + + vec6F ev; + + ev[0] = l[0]; + ev[1] = l[1]; + ev[2] = l[2]; + ev[3] = h[0]; + ev[4] = h[1]; + ev[5] = h[2]; + + m_endpoint_clusterizer.add_training_vec(ev, weight); + } + } + + const uint cMaxEndpointClusters = 65535U; - color_quad_u8 l, h; - dxt_fast::find_representative_colors(cDXTBlockSize * cDXTBlockSize, &m_pBlocks[block_index].m_pixels[0][0], l, h); + m_progress_start = 75; + m_progress_range = 20; - //const uint dist = color::color_distance(m_params.m_perceptual, l, h, false); - const uint dist = color::elucidian_distance(l, h, false); + if (!m_endpoint_clusterizer.generate_codebook(cMaxEndpointClusters, generate_codebook_progress_callback, this)) + { + return false; + } - const uint cColorDistToWeight = 5000; - const uint cMaxWeight = 8; - uint weight = math::clamp(dist / cColorDistToWeight, 1, cMaxWeight); + crnlib::hash_map selector_hash; - vec6F ev; + m_progress_start = 95; + m_progress_range = 5; - ev[0] = l[0]; - ev[1] = l[1]; - ev[2] = l[2]; - ev[3] = h[0]; - ev[4] = h[1]; - ev[5] = h[2]; + for (uint block_index = 0; block_index < m_num_blocks; block_index++) + { + if ((block_index & 511) == 0) + { + if (!update_progress(block_index, m_num_blocks - 1)) + { + return false; + } + } - m_endpoint_clusterizer.add_training_vec(ev, weight); - } - } + dxt1_block dxt_blk; + dxt_fast::compress_color_block(&dxt_blk, &m_pBlocks[block_index].m_pixels[0][0]); - const uint cMaxEndpointClusters = 65535U; + uint selectors = dxt_blk.m_selectors[0] | (dxt_blk.m_selectors[1] << 8) | (dxt_blk.m_selectors[2] << 16) | (dxt_blk.m_selectors[3] << 24); - m_progress_start = 75; - m_progress_range = 20; + selector_hash.insert(selectors); + } - if (!m_endpoint_clusterizer.generate_codebook(cMaxEndpointClusters, generate_codebook_progress_callback, this)) - return false; + m_max_selector_clusters = selector_hash.size() + 128; - crnlib::hash_map selector_hash; + // trace("max endpoint clusters: %u\n", m_endpoint_clusterizer.get_codebook_size()); + // trace("max selector clusters: %u\n", m_max_selector_clusters); - m_progress_start = 95; - m_progress_range = 5; + update_progress(1, 1); - for (uint block_index = 0; block_index < m_num_blocks; block_index++) { - if ((block_index & 511) == 0) { - if (!update_progress(block_index, m_num_blocks - 1)) - return false; + return true; } - dxt1_block dxt_blk; - dxt_fast::compress_color_block(&dxt_blk, &m_pBlocks[block_index].m_pixels[0][0]); + bool qdxt1::update_progress(uint value, uint max_value) + { + if (!m_params.m_pProgress_func) + { + return true; + } - uint selectors = dxt_blk.m_selectors[0] | (dxt_blk.m_selectors[1] << 8) | (dxt_blk.m_selectors[2] << 16) | (dxt_blk.m_selectors[3] << 24); + uint percentage = max_value ? (m_progress_start + (value * m_progress_range + (max_value / 2)) / max_value) : 100; + if ((int)percentage == m_prev_percentage_complete) + { + return true; + } + m_prev_percentage_complete = percentage; - selector_hash.insert(selectors); - } + if (!m_params.m_pProgress_func(m_params.m_progress_start + (percentage * m_params.m_progress_range) / 100U, m_params.m_pProgress_data)) + { + m_canceled = true; + return false; + } - m_max_selector_clusters = selector_hash.size() + 128; + return true; + } - // trace("max endpoint clusters: %u\n", m_endpoint_clusterizer.get_codebook_size()); - // trace("max selector clusters: %u\n", m_max_selector_clusters); + void qdxt1::pack_endpoints_task(uint64 data, void*) + { + const uint thread_index = static_cast(data); - update_progress(1, 1); + crnlib::vector cluster_pixels; + cluster_pixels.reserve(1024); - return true; -} + crnlib::vector selectors; + selectors.reserve(1024); -bool qdxt1::update_progress(uint value, uint max_value) { - if (!m_params.m_pProgress_func) - return true; + dxt1_endpoint_optimizer optimizer; + dxt1_endpoint_optimizer::params p; + dxt1_endpoint_optimizer::results r; - uint percentage = max_value ? (m_progress_start + (value * m_progress_range + (max_value / 2)) / max_value) : 100; - if ((int)percentage == m_prev_percentage_complete) - return true; - m_prev_percentage_complete = percentage; + p.m_quality = m_params.m_dxt_quality; + p.m_use_alpha_blocks = m_params.m_use_alpha_blocks; + p.m_dxt1a_alpha_threshold = m_params.m_dxt1a_alpha_threshold; + p.m_perceptual = m_params.m_perceptual; - if (!m_params.m_pProgress_func(m_params.m_progress_start + (percentage * m_params.m_progress_range) / 100U, m_params.m_pProgress_data)) { - m_canceled = true; - return false; - } + uint cluster_index_progress_mask = math::next_pow2(m_endpoint_cluster_indices.size() / 100); + cluster_index_progress_mask /= 2; + cluster_index_progress_mask = math::maximum(cluster_index_progress_mask, 8); + cluster_index_progress_mask -= 1; - return true; -} + cluster_id cid; + const crnlib::vector& indices = cid.m_cells; -void qdxt1::pack_endpoints_task(uint64 data, void*) { - const uint thread_index = static_cast(data); + for (uint cluster_index = 0; cluster_index < m_endpoint_cluster_indices.size(); cluster_index++) + { + if (m_canceled) + { + return; + } - crnlib::vector cluster_pixels; - cluster_pixels.reserve(1024); + if ((cluster_index & cluster_index_progress_mask) == 0) + { + if (crn_get_current_thread_id() == m_main_thread_id) + { + if (!update_progress(cluster_index, m_endpoint_cluster_indices.size() - 1)) + { + return; + } + } + } - crnlib::vector selectors; - selectors.reserve(1024); + if (m_pTask_pool->get_num_threads()) + { + if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + { + continue; + } + } - dxt1_endpoint_optimizer optimizer; - dxt1_endpoint_optimizer::params p; - dxt1_endpoint_optimizer::results r; + const crnlib::vector& cluster_indices = m_endpoint_cluster_indices[cluster_index]; - p.m_quality = m_params.m_dxt_quality; - p.m_use_alpha_blocks = m_params.m_use_alpha_blocks; - p.m_dxt1a_alpha_threshold = m_params.m_dxt1a_alpha_threshold; - p.m_perceptual = m_params.m_perceptual; + selectors.resize(cluster_indices.size() * cDXTBlockSize * cDXTBlockSize); - uint cluster_index_progress_mask = math::next_pow2(m_endpoint_cluster_indices.size() / 100); - cluster_index_progress_mask /= 2; - cluster_index_progress_mask = math::maximum(cluster_index_progress_mask, 8); - cluster_index_progress_mask -= 1; + bool found = false; + uint32 found_endpoints = 0; - cluster_id cid; - const crnlib::vector& indices = cid.m_cells; + cid.set(cluster_indices); - for (uint cluster_index = 0; cluster_index < m_endpoint_cluster_indices.size(); cluster_index++) { - if (m_canceled) - return; + { + scoped_spinlock lock(m_cluster_hash_lock); - if ((cluster_index & cluster_index_progress_mask) == 0) { - if (crn_get_current_thread_id() == m_main_thread_id) { - if (!update_progress(cluster_index, m_endpoint_cluster_indices.size() - 1)) - return; - } - } + cluster_hash::const_iterator it(m_cluster_hash.find(cid)); + if (it != m_cluster_hash.end()) + { + CRNLIB_ASSERT(cid == it->first); - if (m_pTask_pool->get_num_threads()) { - if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) - continue; - } + found = true; + found_endpoints = it->second; + } + } - const crnlib::vector& cluster_indices = m_endpoint_cluster_indices[cluster_index]; + if (found) + { + const uint16 low_color = static_cast(found_endpoints); + const uint16 high_color = static_cast((found_endpoints >> 16U)); + + color_quad_u8 block_colors[4]; + dxt1_block::get_block_colors(block_colors, low_color, high_color); + + const bool is_alpha_block = (low_color <= high_color); + + for (uint block_iter = 0; block_iter < indices.size(); block_iter++) + { + const uint block_index = indices[block_iter]; + + const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0]; + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + dxt1_block& dxt_block = get_block(block_index); + + dxt_block.set_low_color(static_cast(low_color)); + dxt_block.set_high_color(static_cast(high_color)); + + uint mask = 0; + for (int i = 15; i >= 0; i--) + { + mask <<= 2; + + const color_quad_u8& c = pSrc_pixels[i]; + + uint dist0 = color::color_distance(m_params.m_perceptual, c, block_colors[0], false); + uint dist1 = color::color_distance(m_params.m_perceptual, c, block_colors[1], false); + uint dist2 = color::color_distance(m_params.m_perceptual, c, block_colors[2], false); + + uint selector = 0, best_dist = dist0; + + if (dist1 < best_dist) + { + selector = 1; + best_dist = dist1; + } + if (dist2 < best_dist) + { + selector = 2; + best_dist = dist2; + } + + if (!is_alpha_block) + { + uint dist3 = color::color_distance(m_params.m_perceptual, c, block_colors[3], false); + if (dist3 < best_dist) + { + selector = 3; + } + } + else + { + if (c.a < m_params.m_dxt1a_alpha_threshold) + { + selector = 3; + } + } + + mask |= selector; + } + + dxt_block.m_selectors[0] = static_cast(mask & 0xFF); + dxt_block.m_selectors[1] = static_cast((mask >> 8) & 0xFF); + dxt_block.m_selectors[2] = static_cast((mask >> 16) & 0xFF); + dxt_block.m_selectors[3] = static_cast((mask >> 24) & 0xFF); + } + } + } + else + { + cluster_pixels.resize(indices.size() * cDXTBlockSize * cDXTBlockSize); - selectors.resize(cluster_indices.size() * cDXTBlockSize * cDXTBlockSize); + color_quad_u8* pDst = &cluster_pixels[0]; - bool found = false; - uint32 found_endpoints = 0; + bool has_alpha_pixels = false; - cid.set(cluster_indices); + for (uint block_iter = 0; block_iter < indices.size(); block_iter++) + { + const uint block_index = indices[block_iter]; - { - scoped_spinlock lock(m_cluster_hash_lock); + //const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0]; + const color_quad_u8* pSrc_pixels = (const color_quad_u8*)m_pBlocks[block_index].m_pixels; - cluster_hash::const_iterator it(m_cluster_hash.find(cid)); - if (it != m_cluster_hash.end()) { - CRNLIB_ASSERT(cid == it->first); + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + const color_quad_u8& src = pSrc_pixels[i]; - found = true; - found_endpoints = it->second; - } - } + if (src.a < m_params.m_dxt1a_alpha_threshold) + { + has_alpha_pixels = true; + } - if (found) { - const uint16 low_color = static_cast(found_endpoints); - const uint16 high_color = static_cast((found_endpoints >> 16U)); + *pDst++ = src; + } + } - color_quad_u8 block_colors[4]; - dxt1_block::get_block_colors(block_colors, low_color, high_color); + p.m_block_index = cluster_index; + p.m_num_pixels = cluster_pixels.size(); + p.m_pPixels = cluster_pixels.begin(); - const bool is_alpha_block = (low_color <= high_color); + r.m_pSelectors = selectors.begin(); - for (uint block_iter = 0; block_iter < indices.size(); block_iter++) { - const uint block_index = indices[block_iter]; + uint low_color, high_color; + if ((m_params.m_dxt_quality != cCRNDXTQualitySuperFast) || (has_alpha_pixels)) + { + p.m_pixels_have_alpha = has_alpha_pixels; - const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0]; + optimizer.compute(p, r); + low_color = r.m_low_color; + high_color = r.m_high_color; + } + else + { + dxt_fast::compress_color_block(cluster_pixels.size(), cluster_pixels.begin(), low_color, high_color, selectors.begin(), true); + } - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { - dxt1_block& dxt_block = get_block(block_index); + const uint8* pSrc_selectors = selectors.begin(); - dxt_block.set_low_color(static_cast(low_color)); - dxt_block.set_high_color(static_cast(high_color)); + for (uint block_iter = 0; block_iter < indices.size(); block_iter++) + { + const uint block_index = indices[block_iter]; - uint mask = 0; - for (int i = 15; i >= 0; i--) { - mask <<= 2; + dxt1_block& dxt_block = get_block(block_index); - const color_quad_u8& c = pSrc_pixels[i]; + dxt_block.set_low_color(static_cast(low_color)); + dxt_block.set_high_color(static_cast(high_color)); - uint dist0 = color::color_distance(m_params.m_perceptual, c, block_colors[0], false); - uint dist1 = color::color_distance(m_params.m_perceptual, c, block_colors[1], false); - uint dist2 = color::color_distance(m_params.m_perceptual, c, block_colors[2], false); + uint mask = 0; + for (int i = 15; i >= 0; i--) + { + mask <<= 2; + mask |= pSrc_selectors[i]; + } + pSrc_selectors += (cDXTBlockSize * cDXTBlockSize); - uint selector = 0, best_dist = dist0; + dxt_block.m_selectors[0] = static_cast(mask & 0xFF); + dxt_block.m_selectors[1] = static_cast((mask >> 8) & 0xFF); + dxt_block.m_selectors[2] = static_cast((mask >> 16) & 0xFF); + dxt_block.m_selectors[3] = static_cast((mask >> 24) & 0xFF); + } - if (dist1 < best_dist) { - selector = 1; - best_dist = dist1; - } - if (dist2 < best_dist) { - selector = 2; - best_dist = dist2; - } + { + scoped_spinlock lock(m_cluster_hash_lock); - if (!is_alpha_block) { - uint dist3 = color::color_distance(m_params.m_perceptual, c, block_colors[3], false); - if (dist3 < best_dist) { - selector = 3; - } - } else { - if (c.a < m_params.m_dxt1a_alpha_threshold) - selector = 3; + m_cluster_hash.insert(cid, low_color | (high_color << 16)); + } } - - mask |= selector; - } - - dxt_block.m_selectors[0] = static_cast(mask & 0xFF); - dxt_block.m_selectors[1] = static_cast((mask >> 8) & 0xFF); - dxt_block.m_selectors[2] = static_cast((mask >> 16) & 0xFF); - dxt_block.m_selectors[3] = static_cast((mask >> 24) & 0xFF); } - } - } else { - cluster_pixels.resize(indices.size() * cDXTBlockSize * cDXTBlockSize); - - color_quad_u8* pDst = &cluster_pixels[0]; - - bool has_alpha_pixels = false; - - for (uint block_iter = 0; block_iter < indices.size(); block_iter++) { - const uint block_index = indices[block_iter]; - - //const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0]; - const color_quad_u8* pSrc_pixels = (const color_quad_u8*)m_pBlocks[block_index].m_pixels; - - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { - const color_quad_u8& src = pSrc_pixels[i]; + } - if (src.a < m_params.m_dxt1a_alpha_threshold) - has_alpha_pixels = true; + struct optimize_selectors_params + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(optimize_selectors_params); - *pDst++ = src; + optimize_selectors_params( + crnlib::vector>& selector_cluster_indices) : + m_selector_cluster_indices(selector_cluster_indices) + { } - } - p.m_block_index = cluster_index; - p.m_num_pixels = cluster_pixels.size(); - p.m_pPixels = cluster_pixels.begin(); + crnlib::vector>& m_selector_cluster_indices; + }; - r.m_pSelectors = selectors.begin(); - - uint low_color, high_color; - if ((m_params.m_dxt_quality != cCRNDXTQualitySuperFast) || (has_alpha_pixels)) { - p.m_pixels_have_alpha = has_alpha_pixels; - - optimizer.compute(p, r); - low_color = r.m_low_color; - high_color = r.m_high_color; - } else { - dxt_fast::compress_color_block(cluster_pixels.size(), cluster_pixels.begin(), low_color, high_color, selectors.begin(), true); - } + void qdxt1::optimize_selectors_task(uint64 data, void* pData_ptr) + { + const uint thread_index = static_cast(data); - const uint8* pSrc_selectors = selectors.begin(); + optimize_selectors_params& task_params = *static_cast(pData_ptr); - for (uint block_iter = 0; block_iter < indices.size(); block_iter++) { - const uint block_index = indices[block_iter]; + crnlib::vector block_categories[2]; + block_categories[0].reserve(2048); + block_categories[1].reserve(2048); - dxt1_block& dxt_block = get_block(block_index); + for (uint cluster_index = 0; cluster_index < task_params.m_selector_cluster_indices.size(); cluster_index++) + { + if (m_canceled) + { + return; + } - dxt_block.set_low_color(static_cast(low_color)); - dxt_block.set_high_color(static_cast(high_color)); + if ((cluster_index & 255) == 0) + { + if (crn_get_current_thread_id() == m_main_thread_id) + { + if (!update_progress(cluster_index, task_params.m_selector_cluster_indices.size() - 1)) + { + return; + } + } + } - uint mask = 0; - for (int i = 15; i >= 0; i--) { - mask <<= 2; - mask |= pSrc_selectors[i]; - } - pSrc_selectors += (cDXTBlockSize * cDXTBlockSize); + if (m_pTask_pool->get_num_threads()) + { + if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + { + continue; + } + } - dxt_block.m_selectors[0] = static_cast(mask & 0xFF); - dxt_block.m_selectors[1] = static_cast((mask >> 8) & 0xFF); - dxt_block.m_selectors[2] = static_cast((mask >> 16) & 0xFF); - dxt_block.m_selectors[3] = static_cast((mask >> 24) & 0xFF); - } + const crnlib::vector& selector_indices = task_params.m_selector_cluster_indices[cluster_index]; - { - scoped_spinlock lock(m_cluster_hash_lock); + if (selector_indices.size() <= 1) + { + continue; + } - m_cluster_hash.insert(cid, low_color | (high_color << 16)); - } - } - } -} + block_categories[0].resize(0); + block_categories[1].resize(0); + + for (uint block_iter = 0; block_iter < selector_indices.size(); block_iter++) + { + const uint block_index = selector_indices[block_iter]; + + const dxt1_block& src_block = get_block(block_index); + + if (!src_block.is_alpha_block()) + { + block_categories[0].push_back(block_index); + } + else + { + bool has_alpha_pixels = false; + + if (m_params.m_dxt1a_alpha_threshold > 0) + { + const color_quad_u8* pSrc_pixels = (const color_quad_u8*)m_pBlocks[block_index].m_pixels; + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + const color_quad_u8& src = pSrc_pixels[i]; + if (src.a < m_params.m_dxt1a_alpha_threshold) + { + has_alpha_pixels = true; + break; + } + } + } + + if (has_alpha_pixels) + { + continue; + } + + block_categories[1].push_back(block_index); + } + } -struct optimize_selectors_params { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(optimize_selectors_params); + dxt1_block blk; + utils::zero_object(blk); - optimize_selectors_params( - crnlib::vector >& selector_cluster_indices) - : m_selector_cluster_indices(selector_cluster_indices) { - } + for (uint block_type = 0; block_type <= 1; block_type++) + { + const crnlib::vector& block_indices = block_categories[block_type]; + if (block_indices.size() <= 1) + { + continue; + } - crnlib::vector >& m_selector_cluster_indices; -}; + for (uint y = 0; y < 4; y++) + { + for (uint x = 0; x < 4; x++) + { + uint best_s = 0; + uint64 best_error = 0xFFFFFFFFFFULL; -void qdxt1::optimize_selectors_task(uint64 data, void* pData_ptr) { - const uint thread_index = static_cast(data); + uint max_s = 4; + if (block_type == 1) + { + max_s = 3; + } - optimize_selectors_params& task_params = *static_cast(pData_ptr); + for (uint s = 0; s < max_s; s++) + { + uint64 total_error = 0; - crnlib::vector block_categories[2]; - block_categories[0].reserve(2048); - block_categories[1].reserve(2048); + for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) + { + const uint block_index = block_indices[block_iter]; - for (uint cluster_index = 0; cluster_index < task_params.m_selector_cluster_indices.size(); cluster_index++) { - if (m_canceled) - return; + const color_quad_u8& orig_color = m_pBlocks[block_index].m_pixels[y][x]; - if ((cluster_index & 255) == 0) { - if (crn_get_current_thread_id() == m_main_thread_id) { - if (!update_progress(cluster_index, task_params.m_selector_cluster_indices.size() - 1)) - return; - } - } + const dxt1_block& dst_block = get_block(block_index); - if (m_pTask_pool->get_num_threads()) { - if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) - continue; - } + color_quad_u8 colors[4]; + dxt1_block::get_block_colors(colors, static_cast(dst_block.get_low_color()), static_cast(dst_block.get_high_color())); - const crnlib::vector& selector_indices = task_params.m_selector_cluster_indices[cluster_index]; + uint error = color::color_distance(m_params.m_perceptual, orig_color, colors[s], false); - if (selector_indices.size() <= 1) - continue; + total_error += error; + } - block_categories[0].resize(0); - block_categories[1].resize(0); + if (total_error < best_error) + { + best_error = total_error; + best_s = s; + } + } - for (uint block_iter = 0; block_iter < selector_indices.size(); block_iter++) { - const uint block_index = selector_indices[block_iter]; + blk.set_selector(x, y, best_s); - const dxt1_block& src_block = get_block(block_index); + } // x + } // y - if (!src_block.is_alpha_block()) - block_categories[0].push_back(block_index); - else { - bool has_alpha_pixels = false; + for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) + { + const uint block_index = block_indices[block_iter]; - if (m_params.m_dxt1a_alpha_threshold > 0) { - const color_quad_u8* pSrc_pixels = (const color_quad_u8*)m_pBlocks[block_index].m_pixels; + dxt1_block& dst_block = get_block(block_index); - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { - const color_quad_u8& src = pSrc_pixels[i]; - if (src.a < m_params.m_dxt1a_alpha_threshold) { - has_alpha_pixels = true; - break; + memcpy(dst_block.m_selectors, blk.m_selectors, sizeof(dst_block.m_selectors)); + } } - } - } - - if (has_alpha_pixels) - continue; - block_categories[1].push_back(block_index); - } + } // cluster_index } - dxt1_block blk; - utils::zero_object(blk); - - for (uint block_type = 0; block_type <= 1; block_type++) { - const crnlib::vector& block_indices = block_categories[block_type]; - if (block_indices.size() <= 1) - continue; - - for (uint y = 0; y < 4; y++) { - for (uint x = 0; x < 4; x++) { - uint best_s = 0; - uint64 best_error = 0xFFFFFFFFFFULL; - - uint max_s = 4; - if (block_type == 1) - max_s = 3; - - for (uint s = 0; s < max_s; s++) { - uint64 total_error = 0; - - for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) { - const uint block_index = block_indices[block_iter]; - - const color_quad_u8& orig_color = m_pBlocks[block_index].m_pixels[y][x]; + bool qdxt1::generate_codebook_progress_callback(uint percentage_completed, void* pData) + { + return static_cast(pData)->update_progress(percentage_completed, 100U); + } - const dxt1_block& dst_block = get_block(block_index); + bool qdxt1::create_selector_clusters(uint max_selector_clusters, crnlib::vector>& selector_cluster_indices) + { + m_progress_start = m_progress_range; + m_progress_range = 33; - color_quad_u8 colors[4]; - dxt1_block::get_block_colors(colors, static_cast(dst_block.get_low_color()), static_cast(dst_block.get_high_color())); + weighted_selector_vec_array selector_vecs(m_num_blocks); - uint error = color::color_distance(m_params.m_perceptual, orig_color, colors[s], false); + for (uint block_iter = 0; block_iter < m_num_blocks; block_iter++) + { + dxt1_block& dxt1_block = get_block(block_iter); - total_error += error; - } + vec16F sv; + float* pDst = &sv[0]; - if (total_error < best_error) { - best_error = total_error; - best_s = s; + for (uint y = 0; y < 4; y++) + { + for (uint x = 0; x < 4; x++) + { + *pDst++ = g_dxt1_to_linear[dxt1_block.get_selector(x, y)]; + } } - } - blk.set_selector(x, y, best_s); + const color_quad_u8 first_color(dxt1_block::unpack_color((uint16)dxt1_block.get_low_color(), true)); + const color_quad_u8 second_color(dxt1_block::unpack_color((uint16)dxt1_block.get_high_color(), true)); + const uint dist = color::color_distance(m_params.m_perceptual, first_color, second_color, false); - } // x - } // y - - for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) { - const uint block_index = block_indices[block_iter]; + const uint cColorDistToWeight = 2000; + const uint cMaxWeight = 2048; + uint weight = math::clamp(dist / cColorDistToWeight, 1, cMaxWeight); - dxt1_block& dst_block = get_block(block_index); + selector_vecs[block_iter].m_vec = sv; + selector_vecs[block_iter].m_weight = weight; + } - memcpy(dst_block.m_selectors, blk.m_selectors, sizeof(dst_block.m_selectors)); - } + return m_selector_clusterizer.create_clusters( + selector_vecs, max_selector_clusters, selector_cluster_indices, generate_codebook_progress_callback, this); } - } // cluster_index -} - -bool qdxt1::generate_codebook_progress_callback(uint percentage_completed, void* pData) { - return static_cast(pData)->update_progress(percentage_completed, 100U); -} - -bool qdxt1::create_selector_clusters(uint max_selector_clusters, crnlib::vector >& selector_cluster_indices) { - m_progress_start = m_progress_range; - m_progress_range = 33; - - weighted_selector_vec_array selector_vecs(m_num_blocks); - - for (uint block_iter = 0; block_iter < m_num_blocks; block_iter++) { - dxt1_block& dxt1_block = get_block(block_iter); - - vec16F sv; - float* pDst = &sv[0]; - - for (uint y = 0; y < 4; y++) - for (uint x = 0; x < 4; x++) - *pDst++ = g_dxt1_to_linear[dxt1_block.get_selector(x, y)]; - - const color_quad_u8 first_color(dxt1_block::unpack_color((uint16)dxt1_block.get_low_color(), true)); - const color_quad_u8 second_color(dxt1_block::unpack_color((uint16)dxt1_block.get_high_color(), true)); - const uint dist = color::color_distance(m_params.m_perceptual, first_color, second_color, false); - - const uint cColorDistToWeight = 2000; - const uint cMaxWeight = 2048; - uint weight = math::clamp(dist / cColorDistToWeight, 1, cMaxWeight); - - selector_vecs[block_iter].m_vec = sv; - selector_vecs[block_iter].m_weight = weight; - } - - return m_selector_clusterizer.create_clusters( - selector_vecs, max_selector_clusters, selector_cluster_indices, generate_codebook_progress_callback, this); -} - -bool qdxt1::pack(dxt1_block* pDst_elements, uint elements_per_block, const qdxt1_params& params, float quality_power_mul) { - CRNLIB_ASSERT(m_num_blocks); - - m_main_thread_id = crn_get_current_thread_id(); - m_canceled = false; + bool qdxt1::pack(dxt1_block* pDst_elements, uint elements_per_block, const qdxt1_params& params, float quality_power_mul) + { + CRNLIB_ASSERT(m_num_blocks); - m_pDst_elements = pDst_elements; - m_elements_per_block = elements_per_block; - m_params = params; - if (!m_params.m_use_alpha_blocks) - m_params.m_dxt1a_alpha_threshold = 0; + m_main_thread_id = crn_get_current_thread_id(); + m_canceled = false; - m_prev_percentage_complete = -1; + m_pDst_elements = pDst_elements; + m_elements_per_block = elements_per_block; + m_params = params; + if (!m_params.m_use_alpha_blocks) + { + m_params.m_dxt1a_alpha_threshold = 0; + } - CRNLIB_ASSERT(m_params.m_quality_level <= qdxt1_params::cMaxQuality); - const float quality = m_params.m_quality_level / (float)qdxt1_params::cMaxQuality; - const float endpoint_quality = powf(quality, 1.8f * quality_power_mul); - const float selector_quality = powf(quality, 1.65f * quality_power_mul); + m_prev_percentage_complete = -1; + + CRNLIB_ASSERT(m_params.m_quality_level <= qdxt1_params::cMaxQuality); + const float quality = m_params.m_quality_level / (float)qdxt1_params::cMaxQuality; + const float endpoint_quality = powf(quality, 1.8f * quality_power_mul); + const float selector_quality = powf(quality, 1.65f * quality_power_mul); + + //const uint max_endpoint_clusters = math::clamp(static_cast(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 128U, m_endpoint_clusterizer.get_codebook_size()); + //const uint max_selector_clusters = math::clamp(static_cast(m_max_selector_clusters * selector_quality), 150U, m_max_selector_clusters); + const uint max_endpoint_clusters = math::clamp(static_cast(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 96U, m_endpoint_clusterizer.get_codebook_size()); + const uint max_selector_clusters = math::clamp(static_cast(m_max_selector_clusters * selector_quality), 128U, m_max_selector_clusters); + + if (quality >= 1.0f) + { + m_endpoint_cluster_indices.resize(m_num_blocks); + for (uint i = 0; i < m_num_blocks; i++) + { + m_endpoint_cluster_indices[i].resize(1); + m_endpoint_cluster_indices[i][0] = i; + } + } + else + { + m_endpoint_clusterizer.retrieve_clusters(max_endpoint_clusters, m_endpoint_cluster_indices); + } - //const uint max_endpoint_clusters = math::clamp(static_cast(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 128U, m_endpoint_clusterizer.get_codebook_size()); - //const uint max_selector_clusters = math::clamp(static_cast(m_max_selector_clusters * selector_quality), 150U, m_max_selector_clusters); - const uint max_endpoint_clusters = math::clamp(static_cast(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 96U, m_endpoint_clusterizer.get_codebook_size()); - const uint max_selector_clusters = math::clamp(static_cast(m_max_selector_clusters * selector_quality), 128U, m_max_selector_clusters); + // trace("endpoint clusters: %u\n", m_endpoint_cluster_indices.size()); - if (quality >= 1.0f) { - m_endpoint_cluster_indices.resize(m_num_blocks); - for (uint i = 0; i < m_num_blocks; i++) { - m_endpoint_cluster_indices[i].resize(1); - m_endpoint_cluster_indices[i][0] = i; - } - } else - m_endpoint_clusterizer.retrieve_clusters(max_endpoint_clusters, m_endpoint_cluster_indices); - - // trace("endpoint clusters: %u\n", m_endpoint_cluster_indices.size()); - - uint total_blocks = 0; - uint max_blocks = 0; - for (uint i = 0; i < m_endpoint_cluster_indices.size(); i++) { - uint num = m_endpoint_cluster_indices[i].size(); - total_blocks += num; - max_blocks = math::maximum(max_blocks, num); - } + uint total_blocks = 0; + uint max_blocks = 0; + for (uint i = 0; i < m_endpoint_cluster_indices.size(); i++) + { + uint num = m_endpoint_cluster_indices[i].size(); + total_blocks += num; + max_blocks = math::maximum(max_blocks, num); + } #if 0 trace("Num clusters: %u, Average blocks per cluster: %u, Max blocks per cluster: %u\n", m_endpoint_cluster_indices.size(), @@ -794,47 +965,63 @@ bool qdxt1::pack(dxt1_block* pDst_elements, uint elements_per_block, const qdxt1 max_blocks); #endif - crnlib::vector >& selector_cluster_indices = m_cached_selector_cluster_indices[params.m_quality_level]; + crnlib::vector>& selector_cluster_indices = m_cached_selector_cluster_indices[params.m_quality_level]; - m_progress_start = 0; - if (quality >= 1.0f) - m_progress_range = 100; - else if (selector_cluster_indices.empty()) - m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 33; - else - m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 50; + m_progress_start = 0; + if (quality >= 1.0f) + { + m_progress_range = 100; + } + else if (selector_cluster_indices.empty()) + { + m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 33; + } + else + { + m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 50; + } - for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) - m_pTask_pool->queue_object_task(this, &qdxt1::pack_endpoints_task, i); - m_pTask_pool->join(); + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + { + m_pTask_pool->queue_object_task(this, &qdxt1::pack_endpoints_task, i); + } + m_pTask_pool->join(); - if (m_canceled) - return false; + if (m_canceled) + { + return false; + } - if (quality >= 1.0f) - return true; + if (quality >= 1.0f) + { + return true; + } - if (selector_cluster_indices.empty()) { - create_selector_clusters(max_selector_clusters, selector_cluster_indices); + if (selector_cluster_indices.empty()) + { + create_selector_clusters(max_selector_clusters, selector_cluster_indices); - if (m_canceled) { - selector_cluster_indices.clear(); + if (m_canceled) + { + selector_cluster_indices.clear(); - return false; - } - } + return false; + } + } - m_progress_start += m_progress_range; - m_progress_range = 100 - m_progress_start; + m_progress_start += m_progress_range; + m_progress_range = 100 - m_progress_start; - optimize_selectors_params optimize_selectors_task_params(selector_cluster_indices); + optimize_selectors_params optimize_selectors_task_params(selector_cluster_indices); - for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) - m_pTask_pool->queue_object_task(this, &qdxt1::optimize_selectors_task, i, &optimize_selectors_task_params); + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + { + m_pTask_pool->queue_object_task(this, &qdxt1::optimize_selectors_task, i, &optimize_selectors_task_params); + } - m_pTask_pool->join(); + m_pTask_pool->join(); - return !m_canceled; -} + return !m_canceled; + } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_qdxt1.h b/crnlib/crn_qdxt1.h index d5f333b..5e3109f 100644 --- a/crnlib/crn_qdxt1.h +++ b/crnlib/crn_qdxt1.h @@ -1,6 +1,28 @@ -// File: crn_qdxt1.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once + #include "crn_dxt.h" #include "crn_hash_map.h" #include "crn_clusterizer.h" @@ -9,165 +31,199 @@ #include "crn_dxt_image.h" #include "crn_export.h" -namespace crnlib { -struct qdxt1_params { - qdxt1_params() { - clear(); - } - - void clear() { - m_quality_level = cMaxQuality; - m_dxt_quality = cCRNDXTQualityUber; - m_perceptual = true; - m_dxt1a_alpha_threshold = 0; - m_use_alpha_blocks = true; - m_pProgress_func = nullptr; - m_pProgress_data = nullptr; - m_num_mips = 0; - m_hierarchical = true; - utils::zero_object(m_mip_desc); - m_progress_start = 0; - m_progress_range = 100; - } - - void init(const dxt_image::pack_params& pp, int quality_level, bool hierarchical) { - m_dxt_quality = pp.m_quality; - m_hierarchical = hierarchical; - m_perceptual = pp.m_perceptual; - m_use_alpha_blocks = pp.m_use_both_block_types; - m_quality_level = quality_level; - m_dxt1a_alpha_threshold = pp.m_dxt1a_alpha_threshold; - } - - enum { cMaxQuality = cCRNMaxQualityLevel }; - uint m_quality_level; - - uint m_dxt1a_alpha_threshold; - crn_dxt_quality m_dxt_quality; - bool m_perceptual; - bool m_use_alpha_blocks; - bool m_hierarchical; - - struct mip_desc { - uint m_first_block; - uint m_block_width; - uint m_block_height; - }; - - uint m_num_mips; - enum { cMaxMips = 128 }; - mip_desc m_mip_desc[cMaxMips]; - - typedef bool (*progress_callback_func)(uint percentage_completed, void* pProgress_data); - progress_callback_func m_pProgress_func; - void* m_pProgress_data; - uint m_progress_start; - uint m_progress_range; -}; - -class CRN_EXPORT qdxt1 { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(qdxt1); - - public: - qdxt1(task_pool& task_pool); - ~qdxt1(); - - void clear(); - - bool init(uint n, const dxt_pixel_block* pBlocks, const qdxt1_params& params); - - uint get_num_blocks() const { return m_num_blocks; } - const dxt_pixel_block* get_blocks() const { return m_pBlocks; } - - bool pack(dxt1_block* pDst_elements, uint elements_per_block, const qdxt1_params& params, float quality_power_mul); - - private: - task_pool* m_pTask_pool; - crn_thread_id_t m_main_thread_id; - bool m_canceled; - - uint m_progress_start; - uint m_progress_range; - - uint m_num_blocks; - const dxt_pixel_block* m_pBlocks; - - dxt1_block* m_pDst_elements; - uint m_elements_per_block; - qdxt1_params m_params; - - uint m_max_selector_clusters; - - int m_prev_percentage_complete; - - typedef vec<6, float> vec6F; - typedef clusterizer vec6F_clusterizer; - vec6F_clusterizer m_endpoint_clusterizer; - - crnlib::vector > m_endpoint_cluster_indices; - - typedef vec<16, float> vec16F; - typedef threaded_clusterizer vec16F_clusterizer; - - typedef vec16F_clusterizer::weighted_vec weighted_selector_vec; - typedef vec16F_clusterizer::weighted_vec_array weighted_selector_vec_array; - - vec16F_clusterizer m_selector_clusterizer; - - crnlib::vector > m_cached_selector_cluster_indices[qdxt1_params::cMaxQuality + 1]; - - struct cluster_id { - cluster_id() - : m_hash(0) { - } - - cluster_id(const crnlib::vector& indices) { - set(indices); - } - - void set(const crnlib::vector& indices) { - m_cells.resize(indices.size()); - - for (uint i = 0; i < indices.size(); i++) - m_cells[i] = static_cast(indices[i]); - - std::sort(m_cells.begin(), m_cells.end()); - - m_hash = fast_hash(&m_cells[0], sizeof(m_cells[0]) * m_cells.size()); - } +namespace crnlib +{ + struct qdxt1_params + { + qdxt1_params() + { + clear(); + } + + void clear() + { + m_quality_level = cMaxQuality; + m_dxt_quality = cCRNDXTQualityUber; + m_perceptual = true; + m_dxt1a_alpha_threshold = 0; + m_use_alpha_blocks = true; + m_pProgress_func = nullptr; + m_pProgress_data = nullptr; + m_num_mips = 0; + m_hierarchical = true; + utils::zero_object(m_mip_desc); + m_progress_start = 0; + m_progress_range = 100; + } + + void init(const dxt_image::pack_params& pp, int quality_level, bool hierarchical) + { + m_dxt_quality = pp.m_quality; + m_hierarchical = hierarchical; + m_perceptual = pp.m_perceptual; + m_use_alpha_blocks = pp.m_use_both_block_types; + m_quality_level = quality_level; + m_dxt1a_alpha_threshold = pp.m_dxt1a_alpha_threshold; + } + + enum + { + cMaxQuality = cCRNMaxQualityLevel + }; + uint m_quality_level; + + uint m_dxt1a_alpha_threshold; + crn_dxt_quality m_dxt_quality; + bool m_perceptual; + bool m_use_alpha_blocks; + bool m_hierarchical; + + struct mip_desc + { + uint m_first_block; + uint m_block_width; + uint m_block_height; + }; + + uint m_num_mips; + enum + { + cMaxMips = 128 + }; + mip_desc m_mip_desc[cMaxMips]; + + typedef bool (*progress_callback_func)(uint percentage_completed, void* pProgress_data); + progress_callback_func m_pProgress_func; + void* m_pProgress_data; + uint m_progress_start; + uint m_progress_range; + }; + + class CRN_EXPORT qdxt1 + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(qdxt1); + + public: + qdxt1(task_pool& task_pool); + ~qdxt1(); + + void clear(); + + bool init(uint n, const dxt_pixel_block* pBlocks, const qdxt1_params& params); + + uint get_num_blocks() const + { + return m_num_blocks; + } + const dxt_pixel_block* get_blocks() const + { + return m_pBlocks; + } + + bool pack(dxt1_block* pDst_elements, uint elements_per_block, const qdxt1_params& params, float quality_power_mul); + + private: + task_pool* m_pTask_pool; + crn_thread_id_t m_main_thread_id; + bool m_canceled; + + uint m_progress_start; + uint m_progress_range; + + uint m_num_blocks; + const dxt_pixel_block* m_pBlocks; + + dxt1_block* m_pDst_elements; + uint m_elements_per_block; + qdxt1_params m_params; + + uint m_max_selector_clusters; + + int m_prev_percentage_complete; + + typedef vec<6, float> vec6F; + typedef clusterizer vec6F_clusterizer; + vec6F_clusterizer m_endpoint_clusterizer; + + crnlib::vector> m_endpoint_cluster_indices; + + typedef vec<16, float> vec16F; + typedef threaded_clusterizer vec16F_clusterizer; + + typedef vec16F_clusterizer::weighted_vec weighted_selector_vec; + typedef vec16F_clusterizer::weighted_vec_array weighted_selector_vec_array; + + vec16F_clusterizer m_selector_clusterizer; + + crnlib::vector> m_cached_selector_cluster_indices[qdxt1_params::cMaxQuality + 1]; + + struct cluster_id + { + cluster_id() : + m_hash(0) + { + } + + cluster_id(const crnlib::vector& indices) + { + set(indices); + } + + void set(const crnlib::vector& indices) + { + m_cells.resize(indices.size()); + + for (uint i = 0; i < indices.size(); i++) + { + m_cells[i] = static_cast(indices[i]); + } - bool operator<(const cluster_id& rhs) const { - return m_cells < rhs.m_cells; - } + std::sort(m_cells.begin(), m_cells.end()); + + m_hash = fast_hash(&m_cells[0], sizeof(m_cells[0]) * m_cells.size()); + } - bool operator==(const cluster_id& rhs) const { - if (m_hash != rhs.m_hash) - return false; + bool operator<(const cluster_id& rhs) const + { + return m_cells < rhs.m_cells; + } - return m_cells == rhs.m_cells; - } + bool operator==(const cluster_id& rhs) const + { + if (m_hash != rhs.m_hash) + { + return false; + } - crnlib::vector m_cells; + return m_cells == rhs.m_cells; + } - size_t m_hash; + crnlib::vector m_cells; - operator size_t() const { return m_hash; } - }; + size_t m_hash; - typedef crnlib::hash_map cluster_hash; - cluster_hash m_cluster_hash; - spinlock m_cluster_hash_lock; + operator size_t() const + { + return m_hash; + } + }; - static bool generate_codebook_dummy_progress_callback(uint percentage_completed, void* pData); - static bool generate_codebook_progress_callback(uint percentage_completed, void* pData); - bool update_progress(uint value, uint max_value); - void pack_endpoints_task(uint64 data, void* pData_ptr); - void optimize_selectors_task(uint64 data, void* pData_ptr); - bool create_selector_clusters(uint max_selector_clusters, crnlib::vector >& selector_cluster_indices); + typedef crnlib::hash_map cluster_hash; + cluster_hash m_cluster_hash; + spinlock m_cluster_hash_lock; - inline dxt1_block& get_block(uint index) const { return m_pDst_elements[index * m_elements_per_block]; } -}; + static bool generate_codebook_dummy_progress_callback(uint percentage_completed, void* pData); + static bool generate_codebook_progress_callback(uint percentage_completed, void* pData); + bool update_progress(uint value, uint max_value); + void pack_endpoints_task(uint64 data, void* pData_ptr); + void optimize_selectors_task(uint64 data, void* pData_ptr); + bool create_selector_clusters(uint max_selector_clusters, crnlib::vector>& selector_cluster_indices); -CRNLIB_DEFINE_BITWISE_MOVABLE(qdxt1::cluster_id); + inline dxt1_block& get_block(uint index) const + { + return m_pDst_elements[index * m_elements_per_block]; + } + }; -} // namespace crnlib + CRNLIB_DEFINE_BITWISE_MOVABLE(qdxt1::cluster_id); +} // namespace crnlib diff --git a/crnlib/crn_qdxt5.cpp b/crnlib/crn_qdxt5.cpp index 3e10ff8..26c92e9 100644 --- a/crnlib/crn_qdxt5.cpp +++ b/crnlib/crn_qdxt5.cpp @@ -1,5 +1,26 @@ -// File: crn_qdxt5.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_qdxt5.h" #include "crn_dxt5a.h" @@ -10,264 +31,310 @@ #define QDXT5_DEBUGGING 0 -namespace crnlib { -qdxt5::qdxt5(task_pool& task_pool) - : m_pTask_pool(&task_pool), - m_main_thread_id(0), - m_canceled(false), - m_progress_start(0), - m_progress_range(100), - m_num_blocks(0), - m_pBlocks(nullptr), - m_pDst_elements(nullptr), - m_elements_per_block(0), - m_max_selector_clusters(0), - m_prev_percentage_complete(-1), - m_selector_clusterizer(task_pool) { -} - -qdxt5::~qdxt5() { -} - -void qdxt5::clear() { - m_main_thread_id = 0; - m_num_blocks = 0; - m_pBlocks = 0; - m_pDst_elements = nullptr; - m_elements_per_block = 0; - m_params.clear(); - m_endpoint_clusterizer.clear(); - m_endpoint_cluster_indices.clear(); - m_max_selector_clusters = 0; - m_canceled = false; - m_progress_start = 0; - m_progress_range = 100; - m_selector_clusterizer.clear(); - - for (uint i = 0; i <= qdxt5_params::cMaxQuality; i++) - m_cached_selector_cluster_indices[i].clear(); - - m_cluster_hash.clear(); - - m_prev_percentage_complete = -1; -} - -bool qdxt5::init(uint n, const dxt_pixel_block* pBlocks, const qdxt5_params& params) { - clear(); - - CRNLIB_ASSERT(n && pBlocks); - - m_main_thread_id = crn_get_current_thread_id(); - - m_num_blocks = n; - m_pBlocks = pBlocks; - m_params = params; - - m_endpoint_clusterizer.reserve_training_vecs(m_num_blocks); - - m_progress_start = 0; - m_progress_range = 75; - - image_u8 debug_img; - - const bool debugging = true; - - if ((m_params.m_hierarchical) && (m_params.m_num_mips)) { - vec2F_clusterizer::training_vec_array& training_vecs = m_endpoint_clusterizer.get_training_vecs(); - training_vecs.resize(m_num_blocks); - - uint encoding_hist[cNumChunkEncodings]; - utils::zero_object(encoding_hist); - - uint total_processed_blocks = 0; - uint next_progress_threshold = 512; - - for (uint level = 0; level < m_params.m_num_mips; level++) { - const qdxt5_params::mip_desc& level_desc = m_params.m_mip_desc[level]; - - const uint num_chunks_x = (level_desc.m_block_width + cChunkBlockWidth - 1) / cChunkBlockWidth; - const uint num_chunks_y = (level_desc.m_block_height + cChunkBlockHeight - 1) / cChunkBlockHeight; - - const uint level_width = level_desc.m_block_width * 4; - const uint level_height = level_desc.m_block_height * 4; - - if (debugging) - debug_img.resize(num_chunks_x * cChunkPixelWidth, num_chunks_y * cChunkPixelHeight); - - for (uint chunk_y = 0; chunk_y < num_chunks_y; chunk_y++) { - for (uint chunk_x = 0; chunk_x < num_chunks_x; chunk_x++) { - color_quad_u8 chunk_pixels[cChunkPixelWidth * cChunkPixelHeight]; - - for (uint y = 0; y < cChunkPixelHeight; y++) { - const uint pix_y = math::minimum(chunk_y * cChunkPixelHeight + y, level_height - 1); - - const uint outer_block_index = level_desc.m_first_block + ((pix_y >> 2) * level_desc.m_block_width); +namespace crnlib +{ + qdxt5::qdxt5(task_pool& task_pool) : + m_pTask_pool(&task_pool), + m_main_thread_id(0), + m_canceled(false), + m_progress_start(0), + m_progress_range(100), + m_num_blocks(0), + m_pBlocks(nullptr), + m_pDst_elements(nullptr), + m_elements_per_block(0), + m_max_selector_clusters(0), + m_prev_percentage_complete(-1), + m_selector_clusterizer(task_pool) + { + } - for (uint x = 0; x < cChunkPixelWidth; x++) { - const uint pix_x = math::minimum(chunk_x * cChunkPixelWidth + x, level_width - 1); + qdxt5::~qdxt5() + { + } - const uint block_index = outer_block_index + (pix_x >> 2); + void qdxt5::clear() + { + m_main_thread_id = 0; + m_num_blocks = 0; + m_pBlocks = 0; + m_pDst_elements = nullptr; + m_elements_per_block = 0; + m_params.clear(); + m_endpoint_clusterizer.clear(); + m_endpoint_cluster_indices.clear(); + m_max_selector_clusters = 0; + m_canceled = false; + m_progress_start = 0; + m_progress_range = 100; + m_selector_clusterizer.clear(); + + for (uint i = 0; i <= qdxt5_params::cMaxQuality; i++) + { + m_cached_selector_cluster_indices[i].clear(); + } - const dxt_pixel_block& block = m_pBlocks[block_index]; + m_cluster_hash.clear(); - const color_quad_u8& p = block.m_pixels[pix_y & 3][pix_x & 3]; + m_prev_percentage_complete = -1; + } - chunk_pixels[x + y * 8] = p; - } - } + bool qdxt5::init(uint n, const dxt_pixel_block* pBlocks, const qdxt5_params& params) + { + clear(); - struct layout_results { - uint m_low_color; - uint m_high_color; - uint8 m_selectors[cChunkPixelWidth * cChunkPixelHeight]; - uint64 m_error; - //float m_penalty; - }; - layout_results layouts[cNumChunkTileLayouts]; + CRNLIB_ASSERT(n && pBlocks); - for (uint l = 0; l < cNumChunkTileLayouts; l++) { - const uint width = g_chunk_tile_layouts[l].m_width; - const uint height = g_chunk_tile_layouts[l].m_height; - const uint x_ofs = g_chunk_tile_layouts[l].m_x_ofs; - const uint y_ofs = g_chunk_tile_layouts[l].m_y_ofs; + m_main_thread_id = crn_get_current_thread_id(); - color_quad_u8 layout_pixels[cChunkPixelWidth * cChunkPixelHeight]; - for (uint y = 0; y < height; y++) - for (uint x = 0; x < width; x++) - layout_pixels[x + y * width] = chunk_pixels[(x_ofs + x) + (y_ofs + y) * cChunkPixelWidth]; + m_num_blocks = n; + m_pBlocks = pBlocks; + m_params = params; - const uint n = width * height; - dxt_fast::compress_alpha_block(n, layout_pixels, layouts[l].m_low_color, layouts[l].m_high_color, layouts[l].m_selectors, m_params.m_comp_index); + m_endpoint_clusterizer.reserve_training_vecs(m_num_blocks); - uint c[dxt5_block::cMaxSelectorValues]; - dxt5_block::get_block_values(c, layouts[l].m_low_color, layouts[l].m_high_color); + m_progress_start = 0; + m_progress_range = 75; - uint64 error = 0; - for (uint i = 0; i < n; i++) - error += math::square((int)layout_pixels[i][m_params.m_comp_index] - (int)c[layouts[l].m_selectors[i]]); + image_u8 debug_img; - layouts[l].m_error = error; - } + const bool debugging = true; - double best_peak_snr = -1.0f; - uint best_encoding = 0; + if ((m_params.m_hierarchical) && (m_params.m_num_mips)) + { + vec2F_clusterizer::training_vec_array& training_vecs = m_endpoint_clusterizer.get_training_vecs(); + training_vecs.resize(m_num_blocks); - for (uint e = 0; e < cNumChunkEncodings; e++) { - const chunk_encoding_desc& encoding_desc = g_chunk_encodings[e]; + uint encoding_hist[cNumChunkEncodings]; + utils::zero_object(encoding_hist); - double total_error = 0; + uint total_processed_blocks = 0; + uint next_progress_threshold = 512; - for (uint t = 0; t < encoding_desc.m_num_tiles; t++) - total_error += (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_error; + for (uint level = 0; level < m_params.m_num_mips; level++) + { + const qdxt5_params::mip_desc& level_desc = m_params.m_mip_desc[level]; - double mean_squared = total_error * (1.0f / 64.0f); - double root_mean_squared = sqrt(mean_squared); + const uint num_chunks_x = (level_desc.m_block_width + cChunkBlockWidth - 1) / cChunkBlockWidth; + const uint num_chunks_y = (level_desc.m_block_height + cChunkBlockHeight - 1) / cChunkBlockHeight; - double peak_snr = 999999.0f; - if (mean_squared) - peak_snr = math::clamp(log10(255.0f / root_mean_squared) * 20.0f, 0.0f, 500.0f); + const uint level_width = level_desc.m_block_width * 4; + const uint level_height = level_desc.m_block_height * 4; - float adaptive_tile_alpha_psnr_derating = 2.4f; - //if (level) - // adaptive_tile_alpha_psnr_derating = math::lerp(adaptive_tile_alpha_psnr_derating * .5f, .3f, math::maximum((level - 1) / float(m_params.m_num_mips - 2), 1.0f)); - if ((level) && (adaptive_tile_alpha_psnr_derating > .25f)) { - adaptive_tile_alpha_psnr_derating = math::maximum(.25f, adaptive_tile_alpha_psnr_derating / powf(3.0f, static_cast(level))); - } + if (debugging) + { + debug_img.resize(num_chunks_x * cChunkPixelWidth, num_chunks_y * cChunkPixelHeight); + } + + for (uint chunk_y = 0; chunk_y < num_chunks_y; chunk_y++) + { + for (uint chunk_x = 0; chunk_x < num_chunks_x; chunk_x++) + { + color_quad_u8 chunk_pixels[cChunkPixelWidth * cChunkPixelHeight]; + + for (uint y = 0; y < cChunkPixelHeight; y++) + { + const uint pix_y = math::minimum(chunk_y * cChunkPixelHeight + y, level_height - 1); + + const uint outer_block_index = level_desc.m_first_block + ((pix_y >> 2) * level_desc.m_block_width); + + for (uint x = 0; x < cChunkPixelWidth; x++) + { + const uint pix_x = math::minimum(chunk_x * cChunkPixelWidth + x, level_width - 1); + + const uint block_index = outer_block_index + (pix_x >> 2); + + const dxt_pixel_block& block = m_pBlocks[block_index]; + + const color_quad_u8& p = block.m_pixels[pix_y & 3][pix_x & 3]; + + chunk_pixels[x + y * 8] = p; + } + } + + struct layout_results + { + uint m_low_color; + uint m_high_color; + uint8 m_selectors[cChunkPixelWidth * cChunkPixelHeight]; + uint64 m_error; + //float m_penalty; + }; + layout_results layouts[cNumChunkTileLayouts]; + + for (uint l = 0; l < cNumChunkTileLayouts; l++) + { + const uint width = g_chunk_tile_layouts[l].m_width; + const uint height = g_chunk_tile_layouts[l].m_height; + const uint x_ofs = g_chunk_tile_layouts[l].m_x_ofs; + const uint y_ofs = g_chunk_tile_layouts[l].m_y_ofs; - float alpha_derating = math::lerp(0.0f, adaptive_tile_alpha_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f); - peak_snr = peak_snr - alpha_derating; + color_quad_u8 layout_pixels[cChunkPixelWidth * cChunkPixelHeight]; + for (uint y = 0; y < height; y++) + { + for (uint x = 0; x < width; x++) + { + layout_pixels[x + y * width] = chunk_pixels[(x_ofs + x) + (y_ofs + y) * cChunkPixelWidth]; + } + } + + const uint n = width * height; + dxt_fast::compress_alpha_block(n, layout_pixels, layouts[l].m_low_color, layouts[l].m_high_color, layouts[l].m_selectors, m_params.m_comp_index); - //for (uint t = 0; t < encoding_desc.m_num_tiles; t++) - // peak_snr -= (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_penalty; + uint c[dxt5_block::cMaxSelectorValues]; + dxt5_block::get_block_values(c, layouts[l].m_low_color, layouts[l].m_high_color); - if (peak_snr > best_peak_snr) { - best_peak_snr = peak_snr; - best_encoding = e; - } - } + uint64 error = 0; + for (uint i = 0; i < n; i++) + { + error += math::square((int)layout_pixels[i][m_params.m_comp_index] - (int)c[layouts[l].m_selectors[i]]); + } - encoding_hist[best_encoding]++; + layouts[l].m_error = error; + } - const chunk_encoding_desc& encoding_desc = g_chunk_encodings[best_encoding]; + double best_peak_snr = -1.0f; + uint best_encoding = 0; + + for (uint e = 0; e < cNumChunkEncodings; e++) + { + const chunk_encoding_desc& encoding_desc = g_chunk_encodings[e]; + + double total_error = 0; + + for (uint t = 0; t < encoding_desc.m_num_tiles; t++) + { + total_error += (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_error; + } + + double mean_squared = total_error * (1.0f / 64.0f); + double root_mean_squared = sqrt(mean_squared); + + double peak_snr = 999999.0f; + if (mean_squared) + { + peak_snr = math::clamp(log10(255.0f / root_mean_squared) * 20.0f, 0.0f, 500.0f); + } - for (uint t = 0; t < encoding_desc.m_num_tiles; t++) { - const chunk_tile_desc& tile_desc = encoding_desc.m_tiles[t]; + float adaptive_tile_alpha_psnr_derating = 2.4f; + //if (level) + // adaptive_tile_alpha_psnr_derating = math::lerp(adaptive_tile_alpha_psnr_derating * .5f, .3f, math::maximum((level - 1) / float(m_params.m_num_mips - 2), 1.0f)); + if ((level) && (adaptive_tile_alpha_psnr_derating > .25f)) + { + adaptive_tile_alpha_psnr_derating = math::maximum(.25f, adaptive_tile_alpha_psnr_derating / powf(3.0f, static_cast(level))); + } + + float alpha_derating = math::lerp(0.0f, adaptive_tile_alpha_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f); + peak_snr = peak_snr - alpha_derating; + + //for (uint t = 0; t < encoding_desc.m_num_tiles; t++) + // peak_snr -= (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_penalty; - uint layout_index = tile_desc.m_layout_index; - const layout_results& layout = layouts[layout_index]; + if (peak_snr > best_peak_snr) + { + best_peak_snr = peak_snr; + best_encoding = e; + } + } - uint c[dxt5_block::cMaxSelectorValues]; - if (debugging) - dxt5_block::get_block_values(c, layout.m_low_color, layout.m_high_color); + encoding_hist[best_encoding]++; - color_quad_u8 tile_pixels[cChunkPixelWidth * cChunkPixelHeight]; + const chunk_encoding_desc& encoding_desc = g_chunk_encodings[best_encoding]; - for (uint y = 0; y < tile_desc.m_height; y++) { - const uint pix_y = y + tile_desc.m_y_ofs; + for (uint t = 0; t < encoding_desc.m_num_tiles; t++) + { + const chunk_tile_desc& tile_desc = encoding_desc.m_tiles[t]; + + uint layout_index = tile_desc.m_layout_index; + const layout_results& layout = layouts[layout_index]; + + uint c[dxt5_block::cMaxSelectorValues]; + if (debugging) + { + dxt5_block::get_block_values(c, layout.m_low_color, layout.m_high_color); + } + + color_quad_u8 tile_pixels[cChunkPixelWidth * cChunkPixelHeight]; + + for (uint y = 0; y < tile_desc.m_height; y++) + { + const uint pix_y = y + tile_desc.m_y_ofs; - for (uint x = 0; x < tile_desc.m_width; x++) { - const uint pix_x = x + tile_desc.m_x_ofs; + for (uint x = 0; x < tile_desc.m_width; x++) + { + const uint pix_x = x + tile_desc.m_x_ofs; - uint a = chunk_pixels[pix_x + pix_y * cChunkPixelWidth][m_params.m_comp_index]; + uint a = chunk_pixels[pix_x + pix_y * cChunkPixelWidth][m_params.m_comp_index]; - tile_pixels[x + y * tile_desc.m_width].set(a, a, a, 255); + tile_pixels[x + y * tile_desc.m_width].set(a, a, a, 255); - if (debugging) - debug_img(chunk_x * 8 + pix_x, chunk_y * 8 + pix_y) = c[layout.m_selectors[x + y * tile_desc.m_width]]; - } - } + if (debugging) + { + debug_img(chunk_x * 8 + pix_x, chunk_y * 8 + pix_y) = c[layout.m_selectors[x + y * tile_desc.m_width]]; + } + } + } - color_quad_u8 l, h; - dxt_fast::find_representative_colors(tile_desc.m_width * tile_desc.m_height, tile_pixels, l, h); + color_quad_u8 l, h; + dxt_fast::find_representative_colors(tile_desc.m_width * tile_desc.m_height, tile_pixels, l, h); - const uint dist = math::square((int)l[0] - (int)h[0]); + const uint dist = math::square((int)l[0] - (int)h[0]); - const int cAlphaErrorToWeight = 8; - const uint cMaxWeight = 8; - uint weight = math::clamp(dist / cAlphaErrorToWeight, 1, cMaxWeight); + const int cAlphaErrorToWeight = 8; + const uint cMaxWeight = 8; + uint weight = math::clamp(dist / cAlphaErrorToWeight, 1, cMaxWeight); - vec2F ev; + vec2F ev; - ev[0] = l[0]; - ev[1] = h[0]; + ev[0] = l[0]; + ev[1] = h[0]; - for (uint y = 0; y < (tile_desc.m_height >> 2); y++) { - uint block_y = chunk_y * cChunkBlockHeight + y + (tile_desc.m_y_ofs >> 2); - if (block_y >= level_desc.m_block_height) - continue; + for (uint y = 0; y < (tile_desc.m_height >> 2); y++) + { + uint block_y = chunk_y * cChunkBlockHeight + y + (tile_desc.m_y_ofs >> 2); + if (block_y >= level_desc.m_block_height) + { + continue; + } - for (uint x = 0; x < (tile_desc.m_width >> 2); x++) { - uint block_x = chunk_x * cChunkBlockWidth + x + (tile_desc.m_x_ofs >> 2); - if (block_x >= level_desc.m_block_width) - break; + for (uint x = 0; x < (tile_desc.m_width >> 2); x++) + { + uint block_x = chunk_x * cChunkBlockWidth + x + (tile_desc.m_x_ofs >> 2); + if (block_x >= level_desc.m_block_width) + { + break; + } - uint block_index = level_desc.m_first_block + block_x + block_y * level_desc.m_block_width; + uint block_index = level_desc.m_first_block + block_x + block_y * level_desc.m_block_width; - training_vecs[block_index].first = ev; - training_vecs[block_index].second = weight; + training_vecs[block_index].first = ev; + training_vecs[block_index].second = weight; - total_processed_blocks++; + total_processed_blocks++; - } // x - } // y - } //t + } // x + } // y + } //t - if (total_processed_blocks >= next_progress_threshold) { - next_progress_threshold += 512; + if (total_processed_blocks >= next_progress_threshold) + { + next_progress_threshold += 512; - if (!update_progress(total_processed_blocks, m_num_blocks - 1)) - return false; - } + if (!update_progress(total_processed_blocks, m_num_blocks - 1)) + { + return false; + } + } - } // chunk_x - } // chunk_y + } // chunk_x + } // chunk_y #if QDXT5_DEBUGGING - if (debugging) - image_utils::write_to_file(dynamic_wstring(cVarArg, "debug_%u.tga", level).get_ptr(), debug_img, image_utils::cWriteFlagIgnoreAlpha); + if (debugging) + image_utils::write_to_file(dynamic_wstring(cVarArg, "debug_%u.tga", level).get_ptr(), debug_img, image_utils::cWriteFlagIgnoreAlpha); #endif - } // level + } // level #if 0 trace("chunk encoding hist: "); @@ -275,473 +342,598 @@ bool qdxt5::init(uint n, const dxt_pixel_block* pBlocks, const qdxt5_params& par trace("%u ", encoding_hist[i]); trace("\n"); #endif - } else { - for (uint block_index = 0; block_index < m_num_blocks; block_index++) { - if ((block_index & 511) == 0) { - if (!update_progress(block_index, m_num_blocks - 1)) - return false; - } - - color_quad_u8 c[16]; - for (uint y = 0; y < cDXTBlockSize; y++) - for (uint x = 0; x < cDXTBlockSize; x++) - c[x + y * cDXTBlockSize].set(m_pBlocks[block_index].m_pixels[y][x][m_params.m_comp_index], 255); + } + else + { + for (uint block_index = 0; block_index < m_num_blocks; block_index++) + { + if ((block_index & 511) == 0) + { + if (!update_progress(block_index, m_num_blocks - 1)) + { + return false; + } + } + + color_quad_u8 c[16]; + for (uint y = 0; y < cDXTBlockSize; y++) + { + for (uint x = 0; x < cDXTBlockSize; x++) + { + c[x + y * cDXTBlockSize].set(m_pBlocks[block_index].m_pixels[y][x][m_params.m_comp_index], 255); + } + } + + color_quad_u8 l, h; + dxt_fast::find_representative_colors(cDXTBlockSize * cDXTBlockSize, c, l, h); + + const uint dist = math::square((int)l[0] - (int)h[0]); + + const int cAlphaErrorToWeight = 8; + const uint cMaxWeight = 8; + uint weight = math::clamp(dist / cAlphaErrorToWeight, 1, cMaxWeight); + + vec2F ev; + + ev[0] = l[0]; + ev[1] = h[0]; + + m_endpoint_clusterizer.add_training_vec(ev, weight); + } + } - color_quad_u8 l, h; - dxt_fast::find_representative_colors(cDXTBlockSize * cDXTBlockSize, c, l, h); + const uint cMaxEndpointClusters = 65535U; - const uint dist = math::square((int)l[0] - (int)h[0]); + m_progress_start = 75; + m_progress_range = 20; - const int cAlphaErrorToWeight = 8; - const uint cMaxWeight = 8; - uint weight = math::clamp(dist / cAlphaErrorToWeight, 1, cMaxWeight); + if (!m_endpoint_clusterizer.generate_codebook(cMaxEndpointClusters, generate_codebook_progress_callback, this)) + { + return false; + } - vec2F ev; + crnlib::hash_map selector_hash; - ev[0] = l[0]; - ev[1] = h[0]; + m_progress_start = 95; + m_progress_range = 5; - m_endpoint_clusterizer.add_training_vec(ev, weight); - } - } + for (uint block_index = 0; block_index < m_num_blocks; block_index++) + { + if ((block_index & 511) == 0) + { + if (!update_progress(block_index, m_num_blocks - 1)) + { + return false; + } + } - const uint cMaxEndpointClusters = 65535U; + dxt5_block dxt_blk; + dxt_fast::compress_alpha_block(&dxt_blk, &m_pBlocks[block_index].m_pixels[0][0], m_params.m_comp_index); - m_progress_start = 75; - m_progress_range = 20; + uint64 selectors = 0; + for (uint i = 0; i < dxt5_block::cNumSelectorBytes; i++) + { + selectors |= static_cast(dxt_blk.m_selectors[i]) << (i * 8U); + } - if (!m_endpoint_clusterizer.generate_codebook(cMaxEndpointClusters, generate_codebook_progress_callback, this)) - return false; + selector_hash.insert(selectors); + } - crnlib::hash_map selector_hash; + m_max_selector_clusters = selector_hash.size() + 128; - m_progress_start = 95; - m_progress_range = 5; + update_progress(1, 1); - for (uint block_index = 0; block_index < m_num_blocks; block_index++) { - if ((block_index & 511) == 0) { - if (!update_progress(block_index, m_num_blocks - 1)) - return false; + return true; } - dxt5_block dxt_blk; - dxt_fast::compress_alpha_block(&dxt_blk, &m_pBlocks[block_index].m_pixels[0][0], m_params.m_comp_index); - - uint64 selectors = 0; - for (uint i = 0; i < dxt5_block::cNumSelectorBytes; i++) - selectors |= static_cast(dxt_blk.m_selectors[i]) << (i * 8U); + bool qdxt5::update_progress(uint value, uint max_value) + { + if (!m_params.m_pProgress_func) + { + return true; + } - selector_hash.insert(selectors); - } + uint percentage = max_value ? (m_progress_start + (value * m_progress_range + (max_value / 2)) / max_value) : 100; + if ((int)percentage == m_prev_percentage_complete) + { + return true; + } + m_prev_percentage_complete = percentage; - m_max_selector_clusters = selector_hash.size() + 128; + if (!m_params.m_pProgress_func(m_params.m_progress_start + (percentage * m_params.m_progress_range) / 100U, m_params.m_pProgress_data)) + { + m_canceled = true; + return false; + } - update_progress(1, 1); + return true; + } - return true; -} + void qdxt5::pack_endpoints_task(uint64 data, void*) + { + const uint thread_index = static_cast(data); -bool qdxt5::update_progress(uint value, uint max_value) { - if (!m_params.m_pProgress_func) - return true; + crnlib::vector cluster_pixels; + cluster_pixels.reserve(1024); - uint percentage = max_value ? (m_progress_start + (value * m_progress_range + (max_value / 2)) / max_value) : 100; - if ((int)percentage == m_prev_percentage_complete) - return true; - m_prev_percentage_complete = percentage; + crnlib::vector selectors; + selectors.reserve(1024); - if (!m_params.m_pProgress_func(m_params.m_progress_start + (percentage * m_params.m_progress_range) / 100U, m_params.m_pProgress_data)) { - m_canceled = true; - return false; - } + dxt5_endpoint_optimizer optimizer; + dxt5_endpoint_optimizer::params p; + dxt5_endpoint_optimizer::results r; - return true; -} + p.m_quality = m_params.m_dxt_quality; + p.m_comp_index = m_params.m_comp_index; + p.m_use_both_block_types = m_params.m_use_both_block_types; -void qdxt5::pack_endpoints_task(uint64 data, void*) { - const uint thread_index = static_cast(data); + uint cluster_index_progress_mask = math::next_pow2(m_endpoint_cluster_indices.size() / 100); + cluster_index_progress_mask /= 2; + cluster_index_progress_mask = math::maximum(cluster_index_progress_mask, 8); + cluster_index_progress_mask -= 1; - crnlib::vector cluster_pixels; - cluster_pixels.reserve(1024); + for (uint cluster_index = 0; cluster_index < m_endpoint_cluster_indices.size(); cluster_index++) + { + if (m_canceled) + { + return; + } - crnlib::vector selectors; - selectors.reserve(1024); + if ((cluster_index & cluster_index_progress_mask) == 0) + { + if (crn_get_current_thread_id() == m_main_thread_id) + { + if (!update_progress(cluster_index, m_endpoint_cluster_indices.size() - 1)) + { + return; + } + } + } - dxt5_endpoint_optimizer optimizer; - dxt5_endpoint_optimizer::params p; - dxt5_endpoint_optimizer::results r; + if (m_pTask_pool->get_num_threads()) + { + if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + { + continue; + } + } - p.m_quality = m_params.m_dxt_quality; - p.m_comp_index = m_params.m_comp_index; - p.m_use_both_block_types = m_params.m_use_both_block_types; + const crnlib::vector& cluster_indices = m_endpoint_cluster_indices[cluster_index]; - uint cluster_index_progress_mask = math::next_pow2(m_endpoint_cluster_indices.size() / 100); - cluster_index_progress_mask /= 2; - cluster_index_progress_mask = math::maximum(cluster_index_progress_mask, 8); - cluster_index_progress_mask -= 1; + selectors.resize(cluster_indices.size() * cDXTBlockSize * cDXTBlockSize); - for (uint cluster_index = 0; cluster_index < m_endpoint_cluster_indices.size(); cluster_index++) { - if (m_canceled) - return; + cluster_pixels.resize(cluster_indices.size() * cDXTBlockSize * cDXTBlockSize); - if ((cluster_index & cluster_index_progress_mask) == 0) { - if (crn_get_current_thread_id() == m_main_thread_id) { - if (!update_progress(cluster_index, m_endpoint_cluster_indices.size() - 1)) - return; - } - } + color_quad_u8* pDst = &cluster_pixels[0]; - if (m_pTask_pool->get_num_threads()) { - if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) - continue; - } + for (uint block_iter = 0; block_iter < cluster_indices.size(); block_iter++) + { + const uint block_index = cluster_indices[block_iter]; - const crnlib::vector& cluster_indices = m_endpoint_cluster_indices[cluster_index]; + //const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0]; + const color_quad_u8* pSrc_pixels = (const color_quad_u8*)m_pBlocks[block_index].m_pixels; - selectors.resize(cluster_indices.size() * cDXTBlockSize * cDXTBlockSize); + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + const color_quad_u8& src = pSrc_pixels[i]; - cluster_pixels.resize(cluster_indices.size() * cDXTBlockSize * cDXTBlockSize); + *pDst++ = src; + } + } - color_quad_u8* pDst = &cluster_pixels[0]; + p.m_block_index = cluster_index; + p.m_num_pixels = cluster_pixels.size(); + p.m_pPixels = cluster_pixels.begin(); - for (uint block_iter = 0; block_iter < cluster_indices.size(); block_iter++) { - const uint block_index = cluster_indices[block_iter]; + r.m_pSelectors = selectors.begin(); - //const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0]; - const color_quad_u8* pSrc_pixels = (const color_quad_u8*)m_pBlocks[block_index].m_pixels; + uint low_color; + uint high_color; + if (m_params.m_dxt_quality != cCRNDXTQualitySuperFast) + { + optimizer.compute(p, r); + low_color = r.m_first_endpoint; + high_color = r.m_second_endpoint; + } + else + { + dxt_fast::compress_alpha_block(cluster_pixels.size(), cluster_pixels.begin(), low_color, high_color, selectors.begin(), m_params.m_comp_index); + } - for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { - const color_quad_u8& src = pSrc_pixels[i]; + const uint8* pSrc_selectors = selectors.begin(); - *pDst++ = src; - } - } + for (uint block_iter = 0; block_iter < cluster_indices.size(); block_iter++) + { + const uint block_index = cluster_indices[block_iter]; - p.m_block_index = cluster_index; - p.m_num_pixels = cluster_pixels.size(); - p.m_pPixels = cluster_pixels.begin(); + dxt5_block& dxt_block = get_block(block_index); - r.m_pSelectors = selectors.begin(); + dxt_block.set_low_alpha(low_color); + dxt_block.set_high_alpha(high_color); - uint low_color; - uint high_color; - if (m_params.m_dxt_quality != cCRNDXTQualitySuperFast) { - optimizer.compute(p, r); - low_color = r.m_first_endpoint; - high_color = r.m_second_endpoint; - } else { - dxt_fast::compress_alpha_block(cluster_pixels.size(), cluster_pixels.begin(), low_color, high_color, selectors.begin(), m_params.m_comp_index); + for (uint y = 0; y < 4; y++) + { + for (uint x = 0; x < 4; x++) + { + dxt_block.set_selector(x, y, *pSrc_selectors++); + } + } + } + } } - const uint8* pSrc_selectors = selectors.begin(); + struct optimize_selectors_params + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(optimize_selectors_params); - for (uint block_iter = 0; block_iter < cluster_indices.size(); block_iter++) { - const uint block_index = cluster_indices[block_iter]; + optimize_selectors_params( + crnlib::vector>& selector_cluster_indices) : + m_selector_cluster_indices(selector_cluster_indices) + { + } - dxt5_block& dxt_block = get_block(block_index); + crnlib::vector>& m_selector_cluster_indices; + }; - dxt_block.set_low_alpha(low_color); - dxt_block.set_high_alpha(high_color); + void qdxt5::optimize_selectors_task(uint64 data, void* pData_ptr) + { + const uint thread_index = static_cast(data); - for (uint y = 0; y < 4; y++) - for (uint x = 0; x < 4; x++) - dxt_block.set_selector(x, y, *pSrc_selectors++); - } - } -} + optimize_selectors_params& task_params = *static_cast(pData_ptr); -struct optimize_selectors_params { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(optimize_selectors_params); + crnlib::vector block_categories[2]; + block_categories[0].reserve(2048); + block_categories[1].reserve(2048); - optimize_selectors_params( - crnlib::vector >& selector_cluster_indices) - : m_selector_cluster_indices(selector_cluster_indices) { - } + for (uint cluster_index = 0; cluster_index < task_params.m_selector_cluster_indices.size(); cluster_index++) + { + if (m_canceled) + { + return; + } - crnlib::vector >& m_selector_cluster_indices; -}; + if ((cluster_index & 255) == 0) + { + if (crn_get_current_thread_id() == m_main_thread_id) + { + if (!update_progress(cluster_index, task_params.m_selector_cluster_indices.size() - 1)) + { + return; + } + } + } -void qdxt5::optimize_selectors_task(uint64 data, void* pData_ptr) { - const uint thread_index = static_cast(data); + if (m_pTask_pool->get_num_threads()) + { + if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + { + continue; + } + } - optimize_selectors_params& task_params = *static_cast(pData_ptr); + const crnlib::vector& selector_indices = task_params.m_selector_cluster_indices[cluster_index]; - crnlib::vector block_categories[2]; - block_categories[0].reserve(2048); - block_categories[1].reserve(2048); + if (selector_indices.size() <= 1) + { + continue; + } - for (uint cluster_index = 0; cluster_index < task_params.m_selector_cluster_indices.size(); cluster_index++) { - if (m_canceled) - return; + block_categories[0].resize(0); + block_categories[1].resize(0); - if ((cluster_index & 255) == 0) { - if (crn_get_current_thread_id() == m_main_thread_id) { - if (!update_progress(cluster_index, task_params.m_selector_cluster_indices.size() - 1)) - return; - } - } + for (uint block_iter = 0; block_iter < selector_indices.size(); block_iter++) + { + const uint block_index = selector_indices[block_iter]; - if (m_pTask_pool->get_num_threads()) { - if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) - continue; - } + const dxt5_block& src_block = get_block(block_index); - const crnlib::vector& selector_indices = task_params.m_selector_cluster_indices[cluster_index]; + block_categories[src_block.is_alpha6_block()].push_back(block_index); + } - if (selector_indices.size() <= 1) - continue; + dxt5_block blk; + utils::zero_object(blk); - block_categories[0].resize(0); - block_categories[1].resize(0); + for (uint block_type = 0; block_type <= 1; block_type++) + { + const crnlib::vector& block_indices = block_categories[block_type]; + if (block_indices.size() <= 1) + { + continue; + } - for (uint block_iter = 0; block_iter < selector_indices.size(); block_iter++) { - const uint block_index = selector_indices[block_iter]; + for (uint y = 0; y < cDXTBlockSize; y++) + { + for (uint x = 0; x < cDXTBlockSize; x++) + { + uint best_s = 0; + uint64 best_error = 0xFFFFFFFFFFULL; - const dxt5_block& src_block = get_block(block_index); + for (uint s = 0; s < dxt5_block::cMaxSelectorValues; s++) + { + uint64 total_error = 0; - block_categories[src_block.is_alpha6_block()].push_back(block_index); - } + for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) + { + const uint block_index = block_indices[block_iter]; - dxt5_block blk; - utils::zero_object(blk); + const color_quad_u8& orig_color = m_pBlocks[block_index].m_pixels[y][x]; - for (uint block_type = 0; block_type <= 1; block_type++) { - const crnlib::vector& block_indices = block_categories[block_type]; - if (block_indices.size() <= 1) - continue; + const dxt5_block& dst_block = get_block(block_index); - for (uint y = 0; y < cDXTBlockSize; y++) { - for (uint x = 0; x < cDXTBlockSize; x++) { - uint best_s = 0; - uint64 best_error = 0xFFFFFFFFFFULL; + uint values[dxt5_block::cMaxSelectorValues]; + dxt5_block::get_block_values(values, dst_block.get_low_alpha(), dst_block.get_high_alpha()); - for (uint s = 0; s < dxt5_block::cMaxSelectorValues; s++) { - uint64 total_error = 0; + int error = math::square((int)orig_color[m_params.m_comp_index] - (int)values[s]); - for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) { - const uint block_index = block_indices[block_iter]; + total_error += error; + } - const color_quad_u8& orig_color = m_pBlocks[block_index].m_pixels[y][x]; + if (total_error < best_error) + { + best_error = total_error; + best_s = s; + } + } - const dxt5_block& dst_block = get_block(block_index); + blk.set_selector(x, y, best_s); - uint values[dxt5_block::cMaxSelectorValues]; - dxt5_block::get_block_values(values, dst_block.get_low_alpha(), dst_block.get_high_alpha()); + } // x + } // y - int error = math::square((int)orig_color[m_params.m_comp_index] - (int)values[s]); + for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) + { + const uint block_index = block_indices[block_iter]; - total_error += error; - } + dxt5_block& dst_block = get_block(block_index); - if (total_error < best_error) { - best_error = total_error; - best_s = s; + memcpy(dst_block.m_selectors, blk.m_selectors, sizeof(dst_block.m_selectors)); + } } - } - - blk.set_selector(x, y, best_s); - } // x - } // y - - for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) { - const uint block_index = block_indices[block_iter]; - - dxt5_block& dst_block = get_block(block_index); - - memcpy(dst_block.m_selectors, blk.m_selectors, sizeof(dst_block.m_selectors)); - } + } // cluster_index } - } // cluster_index -} - -bool qdxt5::generate_codebook_progress_callback(uint percentage_completed, void* pData) { - return static_cast(pData)->update_progress(percentage_completed, 100U); -} - -bool qdxt5::create_selector_clusters(uint max_selector_clusters, crnlib::vector >& selector_cluster_indices) { - weighted_selector_vec_array selector_vecs[2]; - crnlib::vector selector_vec_remap[2]; - - for (uint block_type = 0; block_type < 2; block_type++) { - for (uint block_iter = 0; block_iter < m_num_blocks; block_iter++) { - dxt5_block& dxt5_block = get_block(block_iter); - if ((uint)dxt5_block.is_alpha6_block() != block_type) - continue; - - vec16F sv; - float* pDst = &sv[0]; - - bool uses_absolute_values = false; - - for (uint y = 0; y < 4; y++) { - for (uint x = 0; x < 4; x++) { - const uint s = dxt5_block.get_selector(x, y); - - float f; - if (dxt5_block.is_alpha6_block()) { - if (s >= 6) { - uses_absolute_values = true; - f = 0.0f; - } else - f = g_dxt5_alpha6_to_linear[s]; - } else - f = g_dxt5_to_linear[s]; + bool qdxt5::generate_codebook_progress_callback(uint percentage_completed, void* pData) + { + return static_cast(pData)->update_progress(percentage_completed, 100U); + } - *pDst++ = f; + bool qdxt5::create_selector_clusters(uint max_selector_clusters, crnlib::vector>& selector_cluster_indices) + { + weighted_selector_vec_array selector_vecs[2]; + crnlib::vector selector_vec_remap[2]; + + for (uint block_type = 0; block_type < 2; block_type++) + { + for (uint block_iter = 0; block_iter < m_num_blocks; block_iter++) + { + dxt5_block& dxt5_block = get_block(block_iter); + if ((uint)dxt5_block.is_alpha6_block() != block_type) + { + continue; + } + + vec16F sv; + float* pDst = &sv[0]; + + bool uses_absolute_values = false; + + for (uint y = 0; y < 4; y++) + { + for (uint x = 0; x < 4; x++) + { + const uint s = dxt5_block.get_selector(x, y); + + float f; + if (dxt5_block.is_alpha6_block()) + { + if (s >= 6) + { + uses_absolute_values = true; + f = 0.0f; + } + else + { + f = g_dxt5_alpha6_to_linear[s]; + } + } + else + { + f = g_dxt5_to_linear[s]; + } + + *pDst++ = f; + } + } + + if (uses_absolute_values) + { + continue; + } + + int low_alpha = dxt5_block.get_low_alpha(); + int high_alpha = dxt5_block.get_high_alpha(); + int dist = math::square(low_alpha - high_alpha); + + const uint cAlphaDistToWeight = 8; + const uint cMaxWeight = 2048; + uint weight = math::clamp(dist / cAlphaDistToWeight, 1, cMaxWeight); + + selector_vecs[block_type].resize(selector_vecs[block_type].size() + 1); + selector_vecs[block_type].back().m_vec = sv; + selector_vecs[block_type].back().m_weight = weight; + + selector_vec_remap[block_type].push_back(block_iter); + } } - } - if (uses_absolute_values) - continue; + selector_cluster_indices.clear(); - int low_alpha = dxt5_block.get_low_alpha(); - int high_alpha = dxt5_block.get_high_alpha(); - int dist = math::square(low_alpha - high_alpha); - - const uint cAlphaDistToWeight = 8; - const uint cMaxWeight = 2048; - uint weight = math::clamp(dist / cAlphaDistToWeight, 1, cMaxWeight); - - selector_vecs[block_type].resize(selector_vecs[block_type].size() + 1); - selector_vecs[block_type].back().m_vec = sv; - selector_vecs[block_type].back().m_weight = weight; - - selector_vec_remap[block_type].push_back(block_iter); - } - } - - selector_cluster_indices.clear(); - - for (uint block_type = 0; block_type < 2; block_type++) { - if (selector_vecs[block_type].empty()) - continue; + for (uint block_type = 0; block_type < 2; block_type++) + { + if (selector_vecs[block_type].empty()) + { + continue; + } - if ((selector_vecs[block_type].size() / (float)m_num_blocks) < .01f) - continue; - uint max_clusters = static_cast((math::emulu(selector_vecs[block_type].size(), max_selector_clusters) + (m_num_blocks - 1)) / m_num_blocks); - max_clusters = math::minimum(math::maximum(64U, max_clusters), selector_vecs[block_type].size()); - if (max_clusters >= selector_vecs[block_type].size()) - continue; + if ((selector_vecs[block_type].size() / (float)m_num_blocks) < .01f) + { + continue; + } + uint max_clusters = static_cast((math::emulu(selector_vecs[block_type].size(), max_selector_clusters) + (m_num_blocks - 1)) / m_num_blocks); + max_clusters = math::minimum(math::maximum(64U, max_clusters), selector_vecs[block_type].size()); + if (max_clusters >= selector_vecs[block_type].size()) + { + continue; + } #if QDXT5_DEBUGGING - trace("max_clusters (%u): %u\n", block_type, max_clusters); + trace("max_clusters (%u): %u\n", block_type, max_clusters); #endif - crnlib::vector > block_type_selector_cluster_indices; + crnlib::vector> block_type_selector_cluster_indices; - if (!block_type) { - m_progress_start = m_progress_range; - m_progress_range = 16; - } else { - m_progress_start = m_progress_range + 16; - m_progress_range = 17; - } + if (!block_type) + { + m_progress_start = m_progress_range; + m_progress_range = 16; + } + else + { + m_progress_start = m_progress_range + 16; + m_progress_range = 17; + } - if (!m_selector_clusterizer.create_clusters( - selector_vecs[block_type], max_clusters, block_type_selector_cluster_indices, generate_codebook_progress_callback, this)) { - return false; - } + if (!m_selector_clusterizer.create_clusters( + selector_vecs[block_type], max_clusters, block_type_selector_cluster_indices, generate_codebook_progress_callback, this)) + { + return false; + } - const uint first_cluster = selector_cluster_indices.size(); - selector_cluster_indices.enlarge(block_type_selector_cluster_indices.size()); + const uint first_cluster = selector_cluster_indices.size(); + selector_cluster_indices.enlarge(block_type_selector_cluster_indices.size()); - for (uint i = 0; i < block_type_selector_cluster_indices.size(); i++) { - crnlib::vector& indices = selector_cluster_indices[first_cluster + i]; - indices.swap(block_type_selector_cluster_indices[i]); + for (uint i = 0; i < block_type_selector_cluster_indices.size(); i++) + { + crnlib::vector& indices = selector_cluster_indices[first_cluster + i]; + indices.swap(block_type_selector_cluster_indices[i]); - for (uint j = 0; j < indices.size(); j++) - indices.at(j) = selector_vec_remap[block_type][indices.at(j)]; - } - } + for (uint j = 0; j < indices.size(); j++) + { + indices.at(j) = selector_vec_remap[block_type][indices.at(j)]; + } + } + } - return true; -} + return true; + } -bool qdxt5::pack(dxt5_block* pDst_elements, uint elements_per_block, const qdxt5_params& params) { - CRNLIB_ASSERT(m_num_blocks); + bool qdxt5::pack(dxt5_block* pDst_elements, uint elements_per_block, const qdxt5_params& params) + { + CRNLIB_ASSERT(m_num_blocks); - m_main_thread_id = crn_get_current_thread_id(); - m_canceled = false; + m_main_thread_id = crn_get_current_thread_id(); + m_canceled = false; - m_pDst_elements = pDst_elements; - m_elements_per_block = elements_per_block; - m_params = params; + m_pDst_elements = pDst_elements; + m_elements_per_block = elements_per_block; + m_params = params; - m_prev_percentage_complete = -1; + m_prev_percentage_complete = -1; - CRNLIB_ASSERT(m_params.m_quality_level <= qdxt5_params::cMaxQuality); - const float quality = m_params.m_quality_level / (float)qdxt5_params::cMaxQuality; - const float endpoint_quality = powf(quality, 2.1f); - const float selector_quality = powf(quality, 1.65f); + CRNLIB_ASSERT(m_params.m_quality_level <= qdxt5_params::cMaxQuality); + const float quality = m_params.m_quality_level / (float)qdxt5_params::cMaxQuality; + const float endpoint_quality = powf(quality, 2.1f); + const float selector_quality = powf(quality, 1.65f); - const uint max_endpoint_clusters = math::clamp(static_cast(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 16U, m_endpoint_clusterizer.get_codebook_size()); - const uint max_selector_clusters = math::clamp(static_cast(m_max_selector_clusters * selector_quality), 32U, m_max_selector_clusters); + const uint max_endpoint_clusters = math::clamp(static_cast(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 16U, m_endpoint_clusterizer.get_codebook_size()); + const uint max_selector_clusters = math::clamp(static_cast(m_max_selector_clusters * selector_quality), 32U, m_max_selector_clusters); #if QDXT5_DEBUGGING - trace("max endpoint clusters: %u\n", max_endpoint_clusters); - trace("max selector clusters: %u\n", max_selector_clusters); + trace("max endpoint clusters: %u\n", max_endpoint_clusters); + trace("max selector clusters: %u\n", max_selector_clusters); #endif - if (quality >= 1.0f) { - m_endpoint_cluster_indices.resize(m_num_blocks); - for (uint i = 0; i < m_num_blocks; i++) { - m_endpoint_cluster_indices[i].resize(1); - m_endpoint_cluster_indices[i][0] = i; - } - } else - m_endpoint_clusterizer.retrieve_clusters(max_endpoint_clusters, m_endpoint_cluster_indices); + if (quality >= 1.0f) + { + m_endpoint_cluster_indices.resize(m_num_blocks); + for (uint i = 0; i < m_num_blocks; i++) + { + m_endpoint_cluster_indices[i].resize(1); + m_endpoint_cluster_indices[i][0] = i; + } + } + else + { + m_endpoint_clusterizer.retrieve_clusters(max_endpoint_clusters, m_endpoint_cluster_indices); + } - uint total_blocks = 0; - uint max_blocks = 0; - for (uint i = 0; i < m_endpoint_cluster_indices.size(); i++) { - uint num = m_endpoint_cluster_indices[i].size(); - total_blocks += num; - max_blocks = math::maximum(max_blocks, num); - } + uint total_blocks = 0; + uint max_blocks = 0; + for (uint i = 0; i < m_endpoint_cluster_indices.size(); i++) + { + uint num = m_endpoint_cluster_indices[i].size(); + total_blocks += num; + max_blocks = math::maximum(max_blocks, num); + } - crnlib::vector >& selector_cluster_indices = m_cached_selector_cluster_indices[params.m_quality_level]; + crnlib::vector>& selector_cluster_indices = m_cached_selector_cluster_indices[params.m_quality_level]; - m_progress_start = 0; - if (quality >= 1.0f) - m_progress_range = 100; - else if (selector_cluster_indices.empty()) - m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 33; - else - m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 50; + m_progress_start = 0; + if (quality >= 1.0f) + { + m_progress_range = 100; + } + else if (selector_cluster_indices.empty()) + { + m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 33; + } + else + { + m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 50; + } - for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) - m_pTask_pool->queue_object_task(this, &qdxt5::pack_endpoints_task, i); - m_pTask_pool->join(); + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + { + m_pTask_pool->queue_object_task(this, &qdxt5::pack_endpoints_task, i); + } + m_pTask_pool->join(); - if (m_canceled) - return false; + if (m_canceled) + { + return false; + } - if (quality >= 1.0f) - return true; + if (quality >= 1.0f) + { + return true; + } - if (selector_cluster_indices.empty()) { - create_selector_clusters(max_selector_clusters, selector_cluster_indices); + if (selector_cluster_indices.empty()) + { + create_selector_clusters(max_selector_clusters, selector_cluster_indices); - if (m_canceled) { - selector_cluster_indices.clear(); + if (m_canceled) + { + selector_cluster_indices.clear(); - return false; - } - } + return false; + } + } - m_progress_start += m_progress_range; - m_progress_range = 100 - m_progress_start; + m_progress_start += m_progress_range; + m_progress_range = 100 - m_progress_start; - optimize_selectors_params optimize_selectors_task_params(selector_cluster_indices); + optimize_selectors_params optimize_selectors_task_params(selector_cluster_indices); - for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) - m_pTask_pool->queue_object_task(this, &qdxt5::optimize_selectors_task, i, &optimize_selectors_task_params); + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + { + m_pTask_pool->queue_object_task(this, &qdxt5::optimize_selectors_task, i, &optimize_selectors_task_params); + } - m_pTask_pool->join(); + m_pTask_pool->join(); - return !m_canceled; -} + return !m_canceled; + } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_qdxt5.h b/crnlib/crn_qdxt5.h index f3c7b88..b66ae40 100644 --- a/crnlib/crn_qdxt5.h +++ b/crnlib/crn_qdxt5.h @@ -1,6 +1,28 @@ -// File: crn_qdxt5.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once + #include "crn_hash_map.h" #include "crn_clusterizer.h" #include "crn_hash.h" @@ -9,164 +31,198 @@ #include "crn_dxt_image.h" #include "crn_export.h" -namespace crnlib { -struct qdxt5_params { - qdxt5_params() { - clear(); - } - - void clear() { - m_quality_level = cMaxQuality; - m_dxt_quality = cCRNDXTQualityUber; - - m_pProgress_func = nullptr; - m_pProgress_data = nullptr; - m_num_mips = 0; - m_hierarchical = true; - utils::zero_object(m_mip_desc); - - m_comp_index = 3; - m_progress_start = 0; - m_progress_range = 100; - - m_use_both_block_types = true; - } - - void init(const dxt_image::pack_params& pp, int quality_level, bool hierarchical, int comp_index = 3) { - m_dxt_quality = pp.m_quality; - m_hierarchical = hierarchical; - m_comp_index = comp_index; - m_use_both_block_types = pp.m_use_both_block_types; - m_quality_level = quality_level; - } - - enum { cMaxQuality = cCRNMaxQualityLevel }; - uint m_quality_level; - crn_dxt_quality m_dxt_quality; - bool m_hierarchical; - - struct mip_desc { - uint m_first_block; - uint m_block_width; - uint m_block_height; - }; - - uint m_num_mips; - enum { cMaxMips = 128 }; - mip_desc m_mip_desc[cMaxMips]; - - typedef bool (*progress_callback_func)(uint percentage_completed, void* pProgress_data); - progress_callback_func m_pProgress_func; - void* m_pProgress_data; - uint m_progress_start; - uint m_progress_range; - - uint m_comp_index; - - bool m_use_both_block_types; -}; - -class CRN_EXPORT qdxt5 { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(qdxt5); - - public: - qdxt5(task_pool& task_pool); - ~qdxt5(); - - void clear(); - - bool init(uint n, const dxt_pixel_block* pBlocks, const qdxt5_params& params); - - uint get_num_blocks() const { return m_num_blocks; } - const dxt_pixel_block* get_blocks() const { return m_pBlocks; } - - bool pack(dxt5_block* pDst_elements, uint elements_per_block, const qdxt5_params& params); - - private: - task_pool* m_pTask_pool; - crn_thread_id_t m_main_thread_id; - bool m_canceled; - - uint m_progress_start; - uint m_progress_range; - - uint m_num_blocks; - const dxt_pixel_block* m_pBlocks; - - dxt5_block* m_pDst_elements; - uint m_elements_per_block; - qdxt5_params m_params; - - uint m_max_selector_clusters; - - int m_prev_percentage_complete; - - typedef vec<2, float> vec2F; - typedef clusterizer vec2F_clusterizer; - vec2F_clusterizer m_endpoint_clusterizer; - - crnlib::vector > m_endpoint_cluster_indices; - - typedef vec<16, float> vec16F; - typedef threaded_clusterizer vec16F_clusterizer; - - typedef vec16F_clusterizer::weighted_vec weighted_selector_vec; - typedef vec16F_clusterizer::weighted_vec_array weighted_selector_vec_array; - - vec16F_clusterizer m_selector_clusterizer; - - crnlib::vector > m_cached_selector_cluster_indices[qdxt5_params::cMaxQuality + 1]; - - struct cluster_id { - cluster_id() - : m_hash(0) { - } - - cluster_id(const crnlib::vector& indices) { - set(indices); - } - - void set(const crnlib::vector& indices) { - m_cells.resize(indices.size()); +namespace crnlib +{ + struct qdxt5_params + { + qdxt5_params() + { + clear(); + } + + void clear() + { + m_quality_level = cMaxQuality; + m_dxt_quality = cCRNDXTQualityUber; + + m_pProgress_func = nullptr; + m_pProgress_data = nullptr; + m_num_mips = 0; + m_hierarchical = true; + utils::zero_object(m_mip_desc); + + m_comp_index = 3; + m_progress_start = 0; + m_progress_range = 100; + + m_use_both_block_types = true; + } + + void init(const dxt_image::pack_params& pp, int quality_level, bool hierarchical, int comp_index = 3) + { + m_dxt_quality = pp.m_quality; + m_hierarchical = hierarchical; + m_comp_index = comp_index; + m_use_both_block_types = pp.m_use_both_block_types; + m_quality_level = quality_level; + } + + enum + { + cMaxQuality = cCRNMaxQualityLevel + }; + uint m_quality_level; + crn_dxt_quality m_dxt_quality; + bool m_hierarchical; + + struct mip_desc + { + uint m_first_block; + uint m_block_width; + uint m_block_height; + }; + + uint m_num_mips; + enum + { + cMaxMips = 128 + }; + mip_desc m_mip_desc[cMaxMips]; + + typedef bool (*progress_callback_func)(uint percentage_completed, void* pProgress_data); + progress_callback_func m_pProgress_func; + void* m_pProgress_data; + uint m_progress_start; + uint m_progress_range; + + uint m_comp_index; + + bool m_use_both_block_types; + }; + + class CRN_EXPORT qdxt5 + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(qdxt5); + public: + qdxt5(task_pool& task_pool); + ~qdxt5(); + + void clear(); + + bool init(uint n, const dxt_pixel_block* pBlocks, const qdxt5_params& params); + + uint get_num_blocks() const + { + return m_num_blocks; + } + const dxt_pixel_block* get_blocks() const + { + return m_pBlocks; + } + + bool pack(dxt5_block* pDst_elements, uint elements_per_block, const qdxt5_params& params); + + private: + task_pool* m_pTask_pool; + crn_thread_id_t m_main_thread_id; + bool m_canceled; + + uint m_progress_start; + uint m_progress_range; + + uint m_num_blocks; + const dxt_pixel_block* m_pBlocks; + + dxt5_block* m_pDst_elements; + uint m_elements_per_block; + qdxt5_params m_params; + + uint m_max_selector_clusters; + + int m_prev_percentage_complete; + + typedef vec<2, float> vec2F; + typedef clusterizer vec2F_clusterizer; + vec2F_clusterizer m_endpoint_clusterizer; + + crnlib::vector> m_endpoint_cluster_indices; + + typedef vec<16, float> vec16F; + typedef threaded_clusterizer vec16F_clusterizer; + + typedef vec16F_clusterizer::weighted_vec weighted_selector_vec; + typedef vec16F_clusterizer::weighted_vec_array weighted_selector_vec_array; + + vec16F_clusterizer m_selector_clusterizer; + + crnlib::vector> m_cached_selector_cluster_indices[qdxt5_params::cMaxQuality + 1]; + + struct cluster_id + { + cluster_id() : + m_hash(0) + { + } + + cluster_id(const crnlib::vector& indices) + { + set(indices); + } + + void set(const crnlib::vector& indices) + { + m_cells.resize(indices.size()); - for (uint i = 0; i < indices.size(); i++) - m_cells[i] = static_cast(indices[i]); + for (uint i = 0; i < indices.size(); i++) + { + m_cells[i] = static_cast(indices[i]); + } - std::sort(m_cells.begin(), m_cells.end()); + std::sort(m_cells.begin(), m_cells.end()); - m_hash = fast_hash(&m_cells[0], sizeof(m_cells[0]) * m_cells.size()); - } + m_hash = fast_hash(&m_cells[0], sizeof(m_cells[0]) * m_cells.size()); + } - bool operator<(const cluster_id& rhs) const { - return m_cells < rhs.m_cells; - } + bool operator<(const cluster_id& rhs) const + { + return m_cells < rhs.m_cells; + } - bool operator==(const cluster_id& rhs) const { - if (m_hash != rhs.m_hash) - return false; + bool operator==(const cluster_id& rhs) const + { + if (m_hash != rhs.m_hash) + { + return false; + } - return m_cells == rhs.m_cells; - } + return m_cells == rhs.m_cells; + } - crnlib::vector m_cells; + crnlib::vector m_cells; - size_t m_hash; + size_t m_hash; - operator size_t() const { return m_hash; } - }; + operator size_t() const + { + return m_hash; + } + }; - typedef crnlib::hash_map cluster_hash; - cluster_hash m_cluster_hash; - spinlock m_cluster_hash_lock; + typedef crnlib::hash_map cluster_hash; + cluster_hash m_cluster_hash; + spinlock m_cluster_hash_lock; - static bool generate_codebook_dummy_progress_callback(uint percentage_completed, void* pData); - static bool generate_codebook_progress_callback(uint percentage_completed, void* pData); - bool update_progress(uint value, uint max_value); - void pack_endpoints_task(uint64 data, void* pData_ptr); - void optimize_selectors_task(uint64 data, void* pData_ptr); - bool create_selector_clusters(uint max_selector_clusters, crnlib::vector >& selector_cluster_indices); + static bool generate_codebook_dummy_progress_callback(uint percentage_completed, void* pData); + static bool generate_codebook_progress_callback(uint percentage_completed, void* pData); + bool update_progress(uint value, uint max_value); + void pack_endpoints_task(uint64 data, void* pData_ptr); + void optimize_selectors_task(uint64 data, void* pData_ptr); + bool create_selector_clusters(uint max_selector_clusters, crnlib::vector>& selector_cluster_indices); - inline dxt5_block& get_block(uint index) const { return m_pDst_elements[index * m_elements_per_block]; } -}; + inline dxt5_block& get_block(uint index) const + { + return m_pDst_elements[index * m_elements_per_block]; + } + }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_radix_sort.h b/crnlib/crn_radix_sort.h index 5ab60ed..92df4cf 100644 --- a/crnlib/crn_radix_sort.h +++ b/crnlib/crn_radix_sort.h @@ -1,12 +1,32 @@ -// File: crn_radix_sort.h -// File: crn_radix_sort.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once namespace crnlib { // Returns pointer to sorted array. - template + template T* radix_sort(uint num_vals, T* pBuf0, T* pBuf1, uint key_ofs, uint key_size) { CRNLIB_ASSERT_OPEN_RANGE(key_ofs, 0, sizeof(T)); @@ -166,7 +186,7 @@ namespace crnlib #undef CRNLIB_GET_KEY // Returns pointer to sorted array. - template + template T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices) { CRNLIB_ASSERT_OPEN_RANGE(key_ofs, 0, sizeof(T)); @@ -184,7 +204,9 @@ namespace crnlib } if (num_indices & 1) + { *p = static_cast(i); + } } uint hist[256 * 4]; @@ -347,4 +369,4 @@ namespace crnlib #undef CRNLIB_GET_KEY #undef CRNLIB_GET_KEY_FROM_INDEX -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_rand.cpp b/crnlib/crn_rand.cpp index 753e16d..e39e20c 100644 --- a/crnlib/crn_rand.cpp +++ b/crnlib/crn_rand.cpp @@ -297,8 +297,7 @@ namespace crnlib } /* Reject P if outside outer ellipse, or outside acceptance region */ - } - while ((q > 0.27846) || (v * v > -4.0 * log(u) * u * u)); + } while ((q > 0.27846) || (v * v > -4.0 * log(u) * u * u)); /* Return ratio of P's coordinates as the normal deviate */ return (mean + stddev * v / u); @@ -308,13 +307,13 @@ namespace crnlib { } - fast_random::fast_random(): + fast_random::fast_random() : jsr(0xABCD917A), jcong(0x17F3DEAD) { } - fast_random::fast_random(const fast_random& other): + fast_random::fast_random(const fast_random& other) : jsr(other.jsr), jcong(other.jcong) { @@ -404,5 +403,4 @@ namespace crnlib return math::clamp(r, l, h); } - -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_rand.h b/crnlib/crn_rand.h index 7ea38fe..42fa1c5 100644 --- a/crnlib/crn_rand.h +++ b/crnlib/crn_rand.h @@ -1,5 +1,25 @@ -// File: crn_rand.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -28,7 +48,10 @@ namespace crnlib public: well512(); - enum { cStateSize = 16 }; + enum + { + cStateSize = 16 + }; void seed(uint32 seed[cStateSize]); void seed(uint32 seed); void seed(uint32 seed1, uint32 seed2, uint32 seed3); @@ -122,4 +145,4 @@ namespace crnlib uint32 jcong; }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_ray.h b/crnlib/crn_ray.h index 60112c4..fa4a4ce 100644 --- a/crnlib/crn_ray.h +++ b/crnlib/crn_ray.h @@ -1,5 +1,25 @@ -// File: crn_ray.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -7,7 +27,7 @@ namespace crnlib { - template + template class ray { public: @@ -21,7 +41,7 @@ namespace crnlib { clear(); } - inline ray(const vector_type& origin, const vector_type& direction): + inline ray(const vector_type& origin, const vector_type& direction) : m_origin(origin), m_direction(direction) { @@ -72,4 +92,4 @@ namespace crnlib typedef ray ray2F; typedef ray ray3F; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_rect.h b/crnlib/crn_rect.h index c3e3db4..b9f552f 100644 --- a/crnlib/crn_rect.h +++ b/crnlib/crn_rect.h @@ -1,166 +1,257 @@ -// File: crn_rect.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once + #include "crn_vec.h" #include "crn_hash.h" -namespace crnlib { -class rect { - public: - inline rect() { - } - - inline rect(eClear) { - clear(); - } - - // up to, but not including right/bottom - inline rect(int left, int top, int right, int bottom) { - set(left, top, right, bottom); - } - - inline rect(const vec2I& lo, const vec2I& hi) { - m_corner[0] = lo; - m_corner[1] = hi; - } - - inline rect(const vec2I& point) { - m_corner[0] = point; - m_corner[1].set(point[0] + 1, point[1] + 1); - } - - inline bool operator==(const rect& r) const { - return (m_corner[0] == r.m_corner[0]) && (m_corner[1] == r.m_corner[1]); - } - - inline bool operator<(const rect& r) const { - for (uint i = 0; i < 2; i++) { - if (m_corner[i] < r.m_corner[i]) - return true; - else if (!(m_corner[i] == r.m_corner[i])) - return false; - } - - return false; - } - - inline void clear() { - m_corner[0].clear(); - m_corner[1].clear(); - } - - inline void set(int left, int top, int right, int bottom) { - m_corner[0].set(left, top); - m_corner[1].set(right, bottom); - } - - inline void set(const vec2I& lo, const vec2I& hi) { - m_corner[0] = lo; - m_corner[1] = hi; - } - - inline void set(const vec2I& point) { - m_corner[0] = point; - m_corner[1].set(point[0] + 1, point[1] + 1); - } - - inline uint get_width() const { return m_corner[1][0] - m_corner[0][0]; } - inline uint get_height() const { return m_corner[1][1] - m_corner[0][1]; } - - inline int get_left() const { return m_corner[0][0]; } - inline int get_top() const { return m_corner[0][1]; } - inline int get_right() const { return m_corner[1][0]; } - inline int get_bottom() const { return m_corner[1][1]; } - - inline bool is_empty() const { return (m_corner[1][0] <= m_corner[0][0]) || (m_corner[1][1] <= m_corner[0][1]); } - - inline uint get_dimension(uint axis) const { return m_corner[1][axis] - m_corner[0][axis]; } - inline uint get_area() const { return get_dimension(0) * get_dimension(1); } - - inline const vec2I& operator[](uint i) const { - CRNLIB_ASSERT(i < 2); - return m_corner[i]; - } - inline vec2I& operator[](uint i) { - CRNLIB_ASSERT(i < 2); - return m_corner[i]; - } - - inline rect& translate(int x_ofs, int y_ofs) { - m_corner[0][0] += x_ofs; - m_corner[0][1] += y_ofs; - m_corner[1][0] += x_ofs; - m_corner[1][1] += y_ofs; - return *this; - } - - inline rect& init_expand() { - m_corner[0].set(INT_MAX); - m_corner[1].set(INT_MIN); - return *this; - } - - inline rect& expand(int x, int y) { - m_corner[0][0] = math::minimum(m_corner[0][0], x); - m_corner[0][1] = math::minimum(m_corner[0][1], y); - m_corner[1][0] = math::maximum(m_corner[1][0], x + 1); - m_corner[1][1] = math::maximum(m_corner[1][1], y + 1); - return *this; - } - - inline rect& expand(const rect& r) { - m_corner[0][0] = math::minimum(m_corner[0][0], r[0][0]); - m_corner[0][1] = math::minimum(m_corner[0][1], r[0][1]); - m_corner[1][0] = math::maximum(m_corner[1][0], r[1][0]); - m_corner[1][1] = math::maximum(m_corner[1][1], r[1][1]); - return *this; - } - - inline bool touches(const rect& r) const { - for (uint i = 0; i < 2; i++) { - if (r[1][i] <= m_corner[0][i]) - return false; - else if (r[0][i] >= m_corner[1][i]) - return false; - } - - return true; - } - - inline bool within(const rect& r) const { - for (uint i = 0; i < 2; i++) { - if (m_corner[0][i] < r[0][i]) - return false; - else if (m_corner[1][i] > r[1][i]) - return false; - } - - return true; - } - - inline bool intersect(const rect& r) { - if (!touches(r)) { - clear(); - return false; - } - - for (uint i = 0; i < 2; i++) { - m_corner[0][i] = math::maximum(m_corner[0][i], r[0][i]); - m_corner[1][i] = math::minimum(m_corner[1][i], r[1][i]); - } - - return true; - } - - inline bool contains(int x, int y) const { - return (x >= m_corner[0][0]) && (x < m_corner[1][0]) && - (y >= m_corner[0][1]) && (y < m_corner[1][1]); - } - - inline bool contains(const vec2I& p) const { return contains(p[0], p[1]); } - - private: - vec2I m_corner[2]; -}; - -} // namespace crnlib +namespace crnlib +{ + class rect + { + public: + inline rect() + { + } + + inline rect(eClear) + { + clear(); + } + + // up to, but not including right/bottom + inline rect(int left, int top, int right, int bottom) + { + set(left, top, right, bottom); + } + + inline rect(const vec2I& lo, const vec2I& hi) + { + m_corner[0] = lo; + m_corner[1] = hi; + } + + inline rect(const vec2I& point) + { + m_corner[0] = point; + m_corner[1].set(point[0] + 1, point[1] + 1); + } + + inline bool operator==(const rect& r) const + { + return (m_corner[0] == r.m_corner[0]) && (m_corner[1] == r.m_corner[1]); + } + + inline bool operator<(const rect& r) const + { + for (uint i = 0; i < 2; i++) + { + if (m_corner[i] < r.m_corner[i]) + { + return true; + } + else if (!(m_corner[i] == r.m_corner[i])) + { + return false; + } + } + + return false; + } + + inline void clear() + { + m_corner[0].clear(); + m_corner[1].clear(); + } + + inline void set(int left, int top, int right, int bottom) + { + m_corner[0].set(left, top); + m_corner[1].set(right, bottom); + } + + inline void set(const vec2I& lo, const vec2I& hi) + { + m_corner[0] = lo; + m_corner[1] = hi; + } + + inline void set(const vec2I& point) + { + m_corner[0] = point; + m_corner[1].set(point[0] + 1, point[1] + 1); + } + + inline uint get_width() const + { + return m_corner[1][0] - m_corner[0][0]; + } + inline uint get_height() const + { + return m_corner[1][1] - m_corner[0][1]; + } + + inline int get_left() const + { + return m_corner[0][0]; + } + inline int get_top() const + { + return m_corner[0][1]; + } + inline int get_right() const + { + return m_corner[1][0]; + } + inline int get_bottom() const + { + return m_corner[1][1]; + } + + inline bool is_empty() const + { + return (m_corner[1][0] <= m_corner[0][0]) || (m_corner[1][1] <= m_corner[0][1]); + } + + inline uint get_dimension(uint axis) const + { + return m_corner[1][axis] - m_corner[0][axis]; + } + inline uint get_area() const + { + return get_dimension(0) * get_dimension(1); + } + + inline const vec2I& operator[](uint i) const + { + CRNLIB_ASSERT(i < 2); + return m_corner[i]; + } + inline vec2I& operator[](uint i) + { + CRNLIB_ASSERT(i < 2); + return m_corner[i]; + } + + inline rect& translate(int x_ofs, int y_ofs) + { + m_corner[0][0] += x_ofs; + m_corner[0][1] += y_ofs; + m_corner[1][0] += x_ofs; + m_corner[1][1] += y_ofs; + return *this; + } + + inline rect& init_expand() + { + m_corner[0].set(INT_MAX); + m_corner[1].set(INT_MIN); + return *this; + } + + inline rect& expand(int x, int y) + { + m_corner[0][0] = math::minimum(m_corner[0][0], x); + m_corner[0][1] = math::minimum(m_corner[0][1], y); + m_corner[1][0] = math::maximum(m_corner[1][0], x + 1); + m_corner[1][1] = math::maximum(m_corner[1][1], y + 1); + return *this; + } + + inline rect& expand(const rect& r) + { + m_corner[0][0] = math::minimum(m_corner[0][0], r[0][0]); + m_corner[0][1] = math::minimum(m_corner[0][1], r[0][1]); + m_corner[1][0] = math::maximum(m_corner[1][0], r[1][0]); + m_corner[1][1] = math::maximum(m_corner[1][1], r[1][1]); + return *this; + } + + inline bool touches(const rect& r) const + { + for (uint i = 0; i < 2; i++) + { + if (r[1][i] <= m_corner[0][i]) + { + return false; + } + else if (r[0][i] >= m_corner[1][i]) + { + return false; + } + } + + return true; + } + + inline bool within(const rect& r) const + { + for (uint i = 0; i < 2; i++) + { + if (m_corner[0][i] < r[0][i]) + { + return false; + } + else if (m_corner[1][i] > r[1][i]) + { + return false; + } + } + + return true; + } + + inline bool intersect(const rect& r) + { + if (!touches(r)) + { + clear(); + return false; + } + + for (uint i = 0; i < 2; i++) + { + m_corner[0][i] = math::maximum(m_corner[0][i], r[0][i]); + m_corner[1][i] = math::minimum(m_corner[1][i], r[1][i]); + } + + return true; + } + + inline bool contains(int x, int y) const + { + return (x >= m_corner[0][0]) && (x < m_corner[1][0]) && + (y >= m_corner[0][1]) && (y < m_corner[1][1]); + } + + inline bool contains(const vec2I& p) const + { + return contains(p[0], p[1]); + } + + private: + vec2I m_corner[2]; + }; +} // namespace crnlib diff --git a/crnlib/crn_resample_filters.cpp b/crnlib/crn_resample_filters.cpp index 7b315ef..62291b2 100644 --- a/crnlib/crn_resample_filters.cpp +++ b/crnlib/crn_resample_filters.cpp @@ -230,7 +230,7 @@ namespace crnlib } #define GAUSSIAN_SUPPORT (1.25f) - static float gaussian_filter(float t) // with blackman window + static float gaussian_filter(float t) // with blackman window { if (t < 0) { @@ -248,7 +248,8 @@ namespace crnlib // Windowed sinc -- see "Jimm Blinn's Corner: Dirty Pixels" pg. 26. #define LANCZOS3_SUPPORT (3.0f) - static float lanczos3_filter(float t) { + static float lanczos3_filter(float t) + { if (t < 0.0f) { t = -t; @@ -283,7 +284,8 @@ namespace crnlib } #define LANCZOS6_SUPPORT (6.0f) - static float lanczos6_filter(float t) { + static float lanczos6_filter(float t) + { if (t < 0.0f) { t = -t; @@ -300,7 +302,8 @@ namespace crnlib } #define LANCZOS12_SUPPORT (12.0f) - static float lanczos12_filter(float t) { + static float lanczos12_filter(float t) + { if (t < 0.0f) { t = -t; @@ -327,7 +330,7 @@ namespace crnlib pow = 1.0; k = 0; ds = 1.0; - while (ds > sum * EPSILON_RATIO) // FIXME: Shouldn't this stop after X iterations for max. safety? + while (ds > sum * EPSILON_RATIO) // FIXME: Shouldn't this stop after X iterations for max. safety? { ++k; pow = pow * (xh / k); @@ -365,24 +368,23 @@ namespace crnlib return 0.0f; } - const resample_filter g_resample_filters[] = - { - {"box", box_filter, BOX_FILTER_SUPPORT}, - {"tent", tent_filter, TENT_FILTER_SUPPORT}, - {"bell", bell_filter, BELL_SUPPORT}, - {"b-spline", B_spline_filter, B_SPLINE_SUPPORT}, - {"mitchell", mitchell_filter, MITCHELL_SUPPORT}, - {"lanczos3", lanczos3_filter, LANCZOS3_SUPPORT}, - {"blackman", blackman_filter, BLACKMAN_SUPPORT}, - {"lanczos4", lanczos4_filter, LANCZOS4_SUPPORT}, - {"lanczos6", lanczos6_filter, LANCZOS6_SUPPORT}, - {"lanczos12", lanczos12_filter, LANCZOS12_SUPPORT}, - {"kaiser", kaiser_filter, KAISER_SUPPORT}, - {"gaussian", gaussian_filter, GAUSSIAN_SUPPORT}, - {"catmullrom", catmull_rom_filter, CATMULL_ROM_SUPPORT}, - {"quadratic_interp", quadratic_interp_filter, QUADRATIC_SUPPORT}, - {"quadratic_approx", quadratic_approx_filter, QUADRATIC_SUPPORT}, - {"quadratic_mix", quadratic_mix_filter, QUADRATIC_SUPPORT}, + const resample_filter g_resample_filters[] = { + { "box", box_filter, BOX_FILTER_SUPPORT }, + { "tent", tent_filter, TENT_FILTER_SUPPORT }, + { "bell", bell_filter, BELL_SUPPORT }, + { "b-spline", B_spline_filter, B_SPLINE_SUPPORT }, + { "mitchell", mitchell_filter, MITCHELL_SUPPORT }, + { "lanczos3", lanczos3_filter, LANCZOS3_SUPPORT }, + { "blackman", blackman_filter, BLACKMAN_SUPPORT }, + { "lanczos4", lanczos4_filter, LANCZOS4_SUPPORT }, + { "lanczos6", lanczos6_filter, LANCZOS6_SUPPORT }, + { "lanczos12", lanczos12_filter, LANCZOS12_SUPPORT }, + { "kaiser", kaiser_filter, KAISER_SUPPORT }, + { "gaussian", gaussian_filter, GAUSSIAN_SUPPORT }, + { "catmullrom", catmull_rom_filter, CATMULL_ROM_SUPPORT }, + { "quadratic_interp", quadratic_interp_filter, QUADRATIC_SUPPORT }, + { "quadratic_approx", quadratic_approx_filter, QUADRATIC_SUPPORT }, + { "quadratic_mix", quadratic_mix_filter, QUADRATIC_SUPPORT }, }; const int g_num_resample_filters = sizeof(g_resample_filters) / sizeof(g_resample_filters[0]); @@ -399,4 +401,4 @@ namespace crnlib return cInvalidIndex; } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_resample_filters.h b/crnlib/crn_resample_filters.h index 1ce4928..26e71e5 100644 --- a/crnlib/crn_resample_filters.h +++ b/crnlib/crn_resample_filters.h @@ -1,4 +1,3 @@ -// File: crn_resample_filters.h // RG: This is public domain code, originally derived from Graphics Gems 3, see: http://code.google.com/p/imageresampler/ #pragma once @@ -20,5 +19,4 @@ namespace crnlib CRN_EXPORT extern const int g_num_resample_filters; CRN_EXPORT int find_resample_filter(const char* pName); - -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_resampler.cpp b/crnlib/crn_resampler.cpp index 0071b62..547fecf 100644 --- a/crnlib/crn_resampler.cpp +++ b/crnlib/crn_resampler.cpp @@ -4,14 +4,16 @@ #include "crn_resampler.h" #include "crn_resample_filters.h" -namespace crnlib { +namespace crnlib +{ #define resampler_assert CRNLIB_ASSERT -static inline int resampler_range_check(int v, int h) { - (void)h; - resampler_assert((v >= 0) && (v < h)); - return v; -} + static inline int resampler_range_check(int v, int h) + { + (void)h; + resampler_assert((v >= 0) && (v < h)); + return v; + } #ifndef max #define max(a, b) (((a) > (b)) ? (a) : (b)) @@ -31,776 +33,941 @@ static inline int resampler_range_check(int v, int h) { #define RESAMPLER_DEBUG 0 -// (x mod y) with special handling for negative x values. -static inline int posmod(int x, int y) { - if (x >= 0) - return (x % y); - else { - int m = (-x) % y; + // (x mod y) with special handling for negative x values. + static inline int posmod(int x, int y) + { + if (x >= 0) + { + return (x % y); + } + else + { + int m = (-x) % y; - if (m != 0) - m = y - m; + if (m != 0) + { + m = y - m; + } - return (m); - } -} + return (m); + } + } -// Float to int cast with truncation. -static inline int cast_to_int(Resample_Real i) { - return (int)i; -} + // Float to int cast with truncation. + static inline int cast_to_int(Resample_Real i) + { + return (int)i; + } -/* Ensure that the contributing source sample is + /* Ensure that the contributing source sample is * within bounds. If not, reflect, clamp, or wrap. */ -int Resampler::reflect(const int j, const int src_x, const Boundary_Op boundary_op) { - int n; - - if (j < 0) { - if (boundary_op == BOUNDARY_REFLECT) { - n = -j; - - if (n >= src_x) - n = src_x - 1; - } else if (boundary_op == BOUNDARY_WRAP) - n = posmod(j, src_x); - else - n = 0; - } else if (j >= src_x) { - if (boundary_op == BOUNDARY_REFLECT) { - n = (src_x - j) + (src_x - 1); - - if (n < 0) - n = 0; - } else if (boundary_op == BOUNDARY_WRAP) - n = posmod(j, src_x); - else - n = src_x - 1; - } else - n = j; - - return n; -} - -// The make_clist() method generates, for all destination samples, -// the list of all source samples with non-zero weighted contributions. -Resampler::Contrib_List* Resampler::make_clist( - int src_x, int dst_x, Boundary_Op boundary_op, - Resample_Real (*Pfilter)(Resample_Real), - Resample_Real filter_support, - Resample_Real filter_scale, - Resample_Real src_ofs) { - typedef struct - { - // The center of the range in DISCRETE coordinates (pixel center = 0.0f). - Resample_Real center; - int left, right; - } Contrib_Bounds; - - int i, j, k, n, left, right; - Resample_Real total_weight; - Resample_Real xscale, center, half_width, weight; - Contrib_List* Pcontrib; - Contrib* Pcpool; - Contrib* Pcpool_next; - Contrib_Bounds* Pcontrib_bounds; - - if ((Pcontrib = (Contrib_List*)crnlib_calloc(dst_x, sizeof(Contrib_List))) == nullptr) - return nullptr; - - Pcontrib_bounds = (Contrib_Bounds*)crnlib_calloc(dst_x, sizeof(Contrib_Bounds)); - if (!Pcontrib_bounds) { - crnlib_free(Pcontrib); - return (nullptr); - } - - const Resample_Real oo_filter_scale = 1.0f / filter_scale; - - const Resample_Real NUDGE = 0.5f; - xscale = dst_x / (Resample_Real)src_x; - - if (xscale < 1.0f) { - int total; - (void)total; - - /* Handle case when there are fewer destination + int Resampler::reflect(const int j, const int src_x, const Boundary_Op boundary_op) + { + int n; + + if (j < 0) + { + if (boundary_op == BOUNDARY_REFLECT) + { + n = -j; + + if (n >= src_x) + { + n = src_x - 1; + } + } + else if (boundary_op == BOUNDARY_WRAP) + { + n = posmod(j, src_x); + } + else + { + n = 0; + } + } + else if (j >= src_x) + { + if (boundary_op == BOUNDARY_REFLECT) + { + n = (src_x - j) + (src_x - 1); + + if (n < 0) + { + n = 0; + } + } + else if (boundary_op == BOUNDARY_WRAP) + { + n = posmod(j, src_x); + } + else + { + n = src_x - 1; + } + } + else + { + n = j; + } + + return n; + } + + // The make_clist() method generates, for all destination samples, + // the list of all source samples with non-zero weighted contributions. + Resampler::Contrib_List* Resampler::make_clist( + int src_x, int dst_x, Boundary_Op boundary_op, + Resample_Real (*Pfilter)(Resample_Real), + Resample_Real filter_support, + Resample_Real filter_scale, + Resample_Real src_ofs) + { + typedef struct + { + // The center of the range in DISCRETE coordinates (pixel center = 0.0f). + Resample_Real center; + int left, right; + } Contrib_Bounds; + + int i, j, k, n, left, right; + Resample_Real total_weight; + Resample_Real xscale, center, half_width, weight; + Contrib_List* Pcontrib; + Contrib* Pcpool; + Contrib* Pcpool_next; + Contrib_Bounds* Pcontrib_bounds; + + if ((Pcontrib = (Contrib_List*)crnlib_calloc(dst_x, sizeof(Contrib_List))) == nullptr) + { + return nullptr; + } + + Pcontrib_bounds = (Contrib_Bounds*)crnlib_calloc(dst_x, sizeof(Contrib_Bounds)); + if (!Pcontrib_bounds) + { + crnlib_free(Pcontrib); + return (nullptr); + } + + const Resample_Real oo_filter_scale = 1.0f / filter_scale; + + const Resample_Real NUDGE = 0.5f; + xscale = dst_x / (Resample_Real)src_x; + + if (xscale < 1.0f) + { + int total; + (void)total; + + /* Handle case when there are fewer destination * samples than source samples (downsampling/minification). */ - // stretched half width of filter - half_width = (filter_support / xscale) * filter_scale; + // stretched half width of filter + half_width = (filter_support / xscale) * filter_scale; - // Find the range of source sample(s) that will contribute to each destination sample. + // Find the range of source sample(s) that will contribute to each destination sample. - for (i = 0, n = 0; i < dst_x; i++) { - // Convert from discrete to continuous coordinates, scale, then convert back to discrete. - center = ((Resample_Real)i + NUDGE) / xscale; - center -= NUDGE; - center += src_ofs; + for (i = 0, n = 0; i < dst_x; i++) + { + // Convert from discrete to continuous coordinates, scale, then convert back to discrete. + center = ((Resample_Real)i + NUDGE) / xscale; + center -= NUDGE; + center += src_ofs; - left = cast_to_int((Resample_Real)floor(center - half_width)); - right = cast_to_int((Resample_Real)ceil(center + half_width)); + left = cast_to_int((Resample_Real)floor(center - half_width)); + right = cast_to_int((Resample_Real)ceil(center + half_width)); - Pcontrib_bounds[i].center = center; - Pcontrib_bounds[i].left = left; - Pcontrib_bounds[i].right = right; + Pcontrib_bounds[i].center = center; + Pcontrib_bounds[i].left = left; + Pcontrib_bounds[i].right = right; - n += (right - left + 1); - } + n += (right - left + 1); + } - /* Allocate memory for contributors. */ + /* Allocate memory for contributors. */ - if ((n == 0) || ((Pcpool = (Contrib*)crnlib_calloc(n, sizeof(Contrib))) == nullptr)) { - crnlib_free(Pcontrib); - crnlib_free(Pcontrib_bounds); - return nullptr; - } - total = n; + if ((n == 0) || ((Pcpool = (Contrib*)crnlib_calloc(n, sizeof(Contrib))) == nullptr)) + { + crnlib_free(Pcontrib); + crnlib_free(Pcontrib_bounds); + return nullptr; + } + total = n; - Pcpool_next = Pcpool; + Pcpool_next = Pcpool; - /* Create the list of source samples which + /* Create the list of source samples which * contribute to each destination sample. */ - for (i = 0; i < dst_x; i++) { - int max_k = -1; - Resample_Real max_w = -1e+20f; + for (i = 0; i < dst_x; i++) + { + int max_k = -1; + Resample_Real max_w = -1e+20f; - center = Pcontrib_bounds[i].center; - left = Pcontrib_bounds[i].left; - right = Pcontrib_bounds[i].right; + center = Pcontrib_bounds[i].center; + left = Pcontrib_bounds[i].left; + right = Pcontrib_bounds[i].right; - Pcontrib[i].n = 0; - Pcontrib[i].p = Pcpool_next; - Pcpool_next += (right - left + 1); - resampler_assert((Pcpool_next - Pcpool) <= total); + Pcontrib[i].n = 0; + Pcontrib[i].p = Pcpool_next; + Pcpool_next += (right - left + 1); + resampler_assert((Pcpool_next - Pcpool) <= total); - total_weight = 0; + total_weight = 0; - for (j = left; j <= right; j++) - total_weight += (*Pfilter)((center - (Resample_Real)j) * xscale * oo_filter_scale); - const Resample_Real norm = static_cast(1.0f / total_weight); + for (j = left; j <= right; j++) + { + total_weight += (*Pfilter)((center - (Resample_Real)j) * xscale * oo_filter_scale); + } + const Resample_Real norm = static_cast(1.0f / total_weight); - total_weight = 0; + total_weight = 0; #if RESAMPLER_DEBUG - printf("%i: ", i); + printf("%i: ", i); #endif - for (j = left; j <= right; j++) { - weight = (*Pfilter)((center - (Resample_Real)j) * xscale * oo_filter_scale) * norm; - if (weight == 0.0f) - continue; + for (j = left; j <= right; j++) + { + weight = (*Pfilter)((center - (Resample_Real)j) * xscale * oo_filter_scale) * norm; + if (weight == 0.0f) + { + continue; + } - n = reflect(j, src_x, boundary_op); + n = reflect(j, src_x, boundary_op); #if RESAMPLER_DEBUG - printf("%i(%f), ", n, weight); + printf("%i(%f), ", n, weight); #endif - /* Increment the number of source + /* Increment the number of source * samples which contribute to the * current destination sample. */ - k = Pcontrib[i].n++; + k = Pcontrib[i].n++; - Pcontrib[i].p[k].pixel = (unsigned short)n; /* store src sample number */ - Pcontrib[i].p[k].weight = weight; /* store src sample weight */ + Pcontrib[i].p[k].pixel = (unsigned short)n; /* store src sample number */ + Pcontrib[i].p[k].weight = weight; /* store src sample weight */ - total_weight += weight; /* total weight of all contributors */ + total_weight += weight; /* total weight of all contributors */ - if (weight > max_w) { - max_w = weight; - max_k = k; - } - } + if (weight > max_w) + { + max_w = weight; + max_k = k; + } + } #if RESAMPLER_DEBUG - printf("\n\n"); + printf("\n\n"); #endif - //resampler_assert(Pcontrib[i].n); - //resampler_assert(max_k != -1); - if ((max_k == -1) || (Pcontrib[i].n == 0)) { - crnlib_free(Pcpool); - crnlib_free(Pcontrib); - crnlib_free(Pcontrib_bounds); - return nullptr; - } - - if (total_weight != 1.0f) - Pcontrib[i].p[max_k].weight += 1.0f - total_weight; - } - } else { - /* Handle case when there are more + //resampler_assert(Pcontrib[i].n); + //resampler_assert(max_k != -1); + if ((max_k == -1) || (Pcontrib[i].n == 0)) + { + crnlib_free(Pcpool); + crnlib_free(Pcontrib); + crnlib_free(Pcontrib_bounds); + return nullptr; + } + + if (total_weight != 1.0f) + { + Pcontrib[i].p[max_k].weight += 1.0f - total_weight; + } + } + } + else + { + /* Handle case when there are more * destination samples than source * samples (upsampling). */ - half_width = filter_support * filter_scale; + half_width = filter_support * filter_scale; - // Find the source sample(s) that contribute to each destination sample. + // Find the source sample(s) that contribute to each destination sample. - for (i = 0, n = 0; i < dst_x; i++) { - // Convert from discrete to continuous coordinates, scale, then convert back to discrete. - center = ((Resample_Real)i + NUDGE) / xscale; - center -= NUDGE; - center += src_ofs; + for (i = 0, n = 0; i < dst_x; i++) + { + // Convert from discrete to continuous coordinates, scale, then convert back to discrete. + center = ((Resample_Real)i + NUDGE) / xscale; + center -= NUDGE; + center += src_ofs; - left = cast_to_int((Resample_Real)floor(center - half_width)); - right = cast_to_int((Resample_Real)ceil(center + half_width)); + left = cast_to_int((Resample_Real)floor(center - half_width)); + right = cast_to_int((Resample_Real)ceil(center + half_width)); - Pcontrib_bounds[i].center = center; - Pcontrib_bounds[i].left = left; - Pcontrib_bounds[i].right = right; + Pcontrib_bounds[i].center = center; + Pcontrib_bounds[i].left = left; + Pcontrib_bounds[i].right = right; - n += (right - left + 1); - } + n += (right - left + 1); + } - /* Allocate memory for contributors. */ + /* Allocate memory for contributors. */ - int total = n; - if ((total == 0) || ((Pcpool = (Contrib*)crnlib_calloc(total, sizeof(Contrib))) == nullptr)) { - crnlib_free(Pcontrib); - crnlib_free(Pcontrib_bounds); - return nullptr; - } + int total = n; + if ((total == 0) || ((Pcpool = (Contrib*)crnlib_calloc(total, sizeof(Contrib))) == nullptr)) + { + crnlib_free(Pcontrib); + crnlib_free(Pcontrib_bounds); + return nullptr; + } - Pcpool_next = Pcpool; + Pcpool_next = Pcpool; - /* Create the list of source samples which + /* Create the list of source samples which * contribute to each destination sample. */ - for (i = 0; i < dst_x; i++) { - int max_k = -1; - Resample_Real max_w = -1e+20f; + for (i = 0; i < dst_x; i++) + { + int max_k = -1; + Resample_Real max_w = -1e+20f; - center = Pcontrib_bounds[i].center; - left = Pcontrib_bounds[i].left; - right = Pcontrib_bounds[i].right; + center = Pcontrib_bounds[i].center; + left = Pcontrib_bounds[i].left; + right = Pcontrib_bounds[i].right; - Pcontrib[i].n = 0; - Pcontrib[i].p = Pcpool_next; - Pcpool_next += (right - left + 1); - resampler_assert((Pcpool_next - Pcpool) <= total); + Pcontrib[i].n = 0; + Pcontrib[i].p = Pcpool_next; + Pcpool_next += (right - left + 1); + resampler_assert((Pcpool_next - Pcpool) <= total); - total_weight = 0; - for (j = left; j <= right; j++) - total_weight += (*Pfilter)((center - (Resample_Real)j) * oo_filter_scale); + total_weight = 0; + for (j = left; j <= right; j++) + { + total_weight += (*Pfilter)((center - (Resample_Real)j) * oo_filter_scale); + } - const Resample_Real norm = static_cast(1.0f / total_weight); + const Resample_Real norm = static_cast(1.0f / total_weight); - total_weight = 0; + total_weight = 0; #if RESAMPLER_DEBUG - printf("%i: ", i); + printf("%i: ", i); #endif - for (j = left; j <= right; j++) { - weight = (*Pfilter)((center - (Resample_Real)j) * oo_filter_scale) * norm; - if (weight == 0.0f) - continue; + for (j = left; j <= right; j++) + { + weight = (*Pfilter)((center - (Resample_Real)j) * oo_filter_scale) * norm; + if (weight == 0.0f) + { + continue; + } - n = reflect(j, src_x, boundary_op); + n = reflect(j, src_x, boundary_op); #if RESAMPLER_DEBUG - printf("%i(%f), ", n, weight); + printf("%i(%f), ", n, weight); #endif - /* Increment the number of source + /* Increment the number of source * samples which contribute to the * current destination sample. */ - k = Pcontrib[i].n++; + k = Pcontrib[i].n++; - Pcontrib[i].p[k].pixel = (unsigned short)n; /* store src sample number */ - Pcontrib[i].p[k].weight = weight; /* store src sample weight */ + Pcontrib[i].p[k].pixel = (unsigned short)n; /* store src sample number */ + Pcontrib[i].p[k].weight = weight; /* store src sample weight */ - total_weight += weight; /* total weight of all contributors */ + total_weight += weight; /* total weight of all contributors */ - if (weight > max_w) { - max_w = weight; - max_k = k; - } - } + if (weight > max_w) + { + max_w = weight; + max_k = k; + } + } #if RESAMPLER_DEBUG - printf("\n\n"); + printf("\n\n"); #endif - //resampler_assert(Pcontrib[i].n); - //resampler_assert(max_k != -1); - - if ((max_k == -1) || (Pcontrib[i].n == 0)) { - crnlib_free(Pcpool); - crnlib_free(Pcontrib); - crnlib_free(Pcontrib_bounds); - return nullptr; - } - - if (total_weight != 1.0f) - Pcontrib[i].p[max_k].weight += 1.0f - total_weight; - } - } + //resampler_assert(Pcontrib[i].n); + //resampler_assert(max_k != -1); + + if ((max_k == -1) || (Pcontrib[i].n == 0)) + { + crnlib_free(Pcpool); + crnlib_free(Pcontrib); + crnlib_free(Pcontrib_bounds); + return nullptr; + } + + if (total_weight != 1.0f) + { + Pcontrib[i].p[max_k].weight += 1.0f - total_weight; + } + } + } #if RESAMPLER_DEBUG - printf("*******\n"); + printf("*******\n"); #endif - crnlib_free(Pcontrib_bounds); + crnlib_free(Pcontrib_bounds); - return Pcontrib; -} + return Pcontrib; + } -void Resampler::resample_x(Sample* Pdst, const Sample* Psrc) { - resampler_assert(Pdst); - resampler_assert(Psrc); + void Resampler::resample_x(Sample* Pdst, const Sample* Psrc) + { + resampler_assert(Pdst); + resampler_assert(Psrc); - int i, j; - Sample total; - Contrib_List* Pclist = m_Pclist_x; - Contrib* p; + int i, j; + Sample total; + Contrib_List* Pclist = m_Pclist_x; + Contrib* p; - for (i = m_resample_dst_x; i > 0; i--, Pclist++) { + for (i = m_resample_dst_x; i > 0; i--, Pclist++) + { #if CRNLIB_RESAMPLER_DEBUG_OPS - total_ops += Pclist->n; + total_ops += Pclist->n; #endif - for (j = Pclist->n, p = Pclist->p, total = 0; j > 0; j--, p++) - total += Psrc[p->pixel] * p->weight; + for (j = Pclist->n, p = Pclist->p, total = 0; j > 0; j--, p++) + { + total += Psrc[p->pixel] * p->weight; + } - *Pdst++ = total; - } -} + *Pdst++ = total; + } + } -void Resampler::scale_y_mov(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x) { - int i; + void Resampler::scale_y_mov(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x) + { + int i; #if CRNLIB_RESAMPLER_DEBUG_OPS - total_ops += dst_x; + total_ops += dst_x; #endif - // Not += because temp buf wasn't cleared. - for (i = dst_x; i > 0; i--) - *Ptmp++ = *Psrc++ * weight; -} + // Not += because temp buf wasn't cleared. + for (i = dst_x; i > 0; i--) + { + *Ptmp++ = *Psrc++ * weight; + } + } -void Resampler::scale_y_add(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x) { + void Resampler::scale_y_add(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x) + { #if CRNLIB_RESAMPLER_DEBUG_OPS - total_ops += dst_x; + total_ops += dst_x; #endif - for (int i = dst_x; i > 0; i--) - (*Ptmp++) += *Psrc++ * weight; -} + for (int i = dst_x; i > 0; i--) + { + (*Ptmp++) += *Psrc++ * weight; + } + } -void Resampler::clamp(Sample* Pdst, int n) { - while (n > 0) { - Sample x = *Pdst; - *Pdst++ = clamp_sample(x); - n--; - } -} + void Resampler::clamp(Sample* Pdst, int n) + { + while (n > 0) + { + Sample x = *Pdst; + *Pdst++ = clamp_sample(x); + n--; + } + } -void Resampler::resample_y(Sample* Pdst) { - int i, j; - Sample* Psrc; - Contrib_List* Pclist = &m_Pclist_y[m_cur_dst_y]; + void Resampler::resample_y(Sample* Pdst) + { + int i, j; + Sample* Psrc; + Contrib_List* Pclist = &m_Pclist_y[m_cur_dst_y]; - Sample* Ptmp = m_delay_x_resample ? m_Ptmp_buf : Pdst; - resampler_assert(Ptmp); + Sample* Ptmp = m_delay_x_resample ? m_Ptmp_buf : Pdst; + resampler_assert(Ptmp); - /* Process each contributor. */ + /* Process each contributor. */ - for (i = 0; i < Pclist->n; i++) { - /* locate the contributor's location in the scan + for (i = 0; i < Pclist->n; i++) + { + /* locate the contributor's location in the scan * buffer -- the contributor must always be found! */ - for (j = 0; j < MAX_SCAN_BUF_SIZE; j++) - if (m_Pscan_buf->scan_buf_y[j] == Pclist->p[i].pixel) - break; + for (j = 0; j < MAX_SCAN_BUF_SIZE; j++) + { + if (m_Pscan_buf->scan_buf_y[j] == Pclist->p[i].pixel) + { + break; + } + } - resampler_assert(j < MAX_SCAN_BUF_SIZE); + resampler_assert(j < MAX_SCAN_BUF_SIZE); - Psrc = m_Pscan_buf->scan_buf_l[j]; + Psrc = m_Pscan_buf->scan_buf_l[j]; - if (!i) - scale_y_mov(Ptmp, Psrc, Pclist->p[i].weight, m_intermediate_x); - else - scale_y_add(Ptmp, Psrc, Pclist->p[i].weight, m_intermediate_x); + if (!i) + { + scale_y_mov(Ptmp, Psrc, Pclist->p[i].weight, m_intermediate_x); + } + else + { + scale_y_add(Ptmp, Psrc, Pclist->p[i].weight, m_intermediate_x); + } - /* If this source line doesn't contribute to any + /* If this source line doesn't contribute to any * more destination lines then mark the scanline buffer slot * which holds this source line as free. * (The max. number of slots used depends on the Y * axis sampling factor and the scaled filter width.) */ - if (--m_Psrc_y_count[resampler_range_check(Pclist->p[i].pixel, m_resample_src_y)] == 0) { - m_Psrc_y_flag[resampler_range_check(Pclist->p[i].pixel, m_resample_src_y)] = FALSE; - m_Pscan_buf->scan_buf_y[j] = -1; - } - } + if (--m_Psrc_y_count[resampler_range_check(Pclist->p[i].pixel, m_resample_src_y)] == 0) + { + m_Psrc_y_flag[resampler_range_check(Pclist->p[i].pixel, m_resample_src_y)] = FALSE; + m_Pscan_buf->scan_buf_y[j] = -1; + } + } - /* Now generate the destination line */ + /* Now generate the destination line */ - if (m_delay_x_resample) // Was X resampling delayed until after Y resampling? - { - resampler_assert(Pdst != Ptmp); - resample_x(Pdst, Ptmp); - } else { - resampler_assert(Pdst == Ptmp); - } + if (m_delay_x_resample) // Was X resampling delayed until after Y resampling? + { + resampler_assert(Pdst != Ptmp); + resample_x(Pdst, Ptmp); + } + else + { + resampler_assert(Pdst == Ptmp); + } - if (m_lo < m_hi) - clamp(Pdst, m_resample_dst_x); -} + if (m_lo < m_hi) + { + clamp(Pdst, m_resample_dst_x); + } + } -bool Resampler::put_line(const Sample* Psrc) { - int i; + bool Resampler::put_line(const Sample* Psrc) + { + int i; - if (m_cur_src_y >= m_resample_src_y) - return false; + if (m_cur_src_y >= m_resample_src_y) + { + return false; + } - /* Does this source line contribute + /* Does this source line contribute * to any destination line? if not, * exit now. */ - if (!m_Psrc_y_count[resampler_range_check(m_cur_src_y, m_resample_src_y)]) { - m_cur_src_y++; - return true; - } + if (!m_Psrc_y_count[resampler_range_check(m_cur_src_y, m_resample_src_y)]) + { + m_cur_src_y++; + return true; + } - /* Find an empty slot in the scanline buffer. (FIXME: Perf. is terrible here with extreme scaling ratios.) */ + /* Find an empty slot in the scanline buffer. (FIXME: Perf. is terrible here with extreme scaling ratios.) */ - for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) - if (m_Pscan_buf->scan_buf_y[i] == -1) - break; + for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) + { + if (m_Pscan_buf->scan_buf_y[i] == -1) + { + break; + } + } - /* If the buffer is full, exit with an error. */ + /* If the buffer is full, exit with an error. */ - if (i == MAX_SCAN_BUF_SIZE) { - m_status = STATUS_SCAN_BUFFER_FULL; - return false; - } + if (i == MAX_SCAN_BUF_SIZE) + { + m_status = STATUS_SCAN_BUFFER_FULL; + return false; + } - m_Psrc_y_flag[resampler_range_check(m_cur_src_y, m_resample_src_y)] = TRUE; - m_Pscan_buf->scan_buf_y[i] = m_cur_src_y; + m_Psrc_y_flag[resampler_range_check(m_cur_src_y, m_resample_src_y)] = TRUE; + m_Pscan_buf->scan_buf_y[i] = m_cur_src_y; - /* Does this slot have any memory allocated to it? */ + /* Does this slot have any memory allocated to it? */ - if (!m_Pscan_buf->scan_buf_l[i]) { - if ((m_Pscan_buf->scan_buf_l[i] = (Sample*)crnlib_malloc(m_intermediate_x * sizeof(Sample))) == nullptr) { - m_status = STATUS_OUT_OF_MEMORY; - return false; - } - } + if (!m_Pscan_buf->scan_buf_l[i]) + { + if ((m_Pscan_buf->scan_buf_l[i] = (Sample*)crnlib_malloc(m_intermediate_x * sizeof(Sample))) == nullptr) + { + m_status = STATUS_OUT_OF_MEMORY; + return false; + } + } - // Resampling on the X axis first? - if (m_delay_x_resample) { - resampler_assert(m_intermediate_x == m_resample_src_x); + // Resampling on the X axis first? + if (m_delay_x_resample) + { + resampler_assert(m_intermediate_x == m_resample_src_x); - // Y-X resampling order - memcpy(m_Pscan_buf->scan_buf_l[i], Psrc, m_intermediate_x * sizeof(Sample)); - } else { - resampler_assert(m_intermediate_x == m_resample_dst_x); + // Y-X resampling order + memcpy(m_Pscan_buf->scan_buf_l[i], Psrc, m_intermediate_x * sizeof(Sample)); + } + else + { + resampler_assert(m_intermediate_x == m_resample_dst_x); - // X-Y resampling order - resample_x(m_Pscan_buf->scan_buf_l[i], Psrc); - } + // X-Y resampling order + resample_x(m_Pscan_buf->scan_buf_l[i], Psrc); + } - m_cur_src_y++; + m_cur_src_y++; - return true; -} + return true; + } -const Resampler::Sample* Resampler::get_line() { - int i; + const Resampler::Sample* Resampler::get_line() + { + int i; - /* If all the destination lines have been + /* If all the destination lines have been * generated, then always return nullptr. */ - if (m_cur_dst_y == m_resample_dst_y) - return nullptr; + if (m_cur_dst_y == m_resample_dst_y) + { + return nullptr; + } - /* Check to see if all the required + /* Check to see if all the required * contributors are present, if not, * return nullptr. */ - for (i = 0; i < m_Pclist_y[m_cur_dst_y].n; i++) - if (!m_Psrc_y_flag[resampler_range_check(m_Pclist_y[m_cur_dst_y].p[i].pixel, m_resample_src_y)]) - return nullptr; + for (i = 0; i < m_Pclist_y[m_cur_dst_y].n; i++) + { + if (!m_Psrc_y_flag[resampler_range_check(m_Pclist_y[m_cur_dst_y].p[i].pixel, m_resample_src_y)]) + { + return nullptr; + } + } - resample_y(m_Pdst_buf); + resample_y(m_Pdst_buf); - m_cur_dst_y++; + m_cur_dst_y++; - return m_Pdst_buf; -} + return m_Pdst_buf; + } -Resampler::~Resampler() { - int i; + Resampler::~Resampler() + { + int i; #if CRNLIB_RESAMPLER_DEBUG_OPS - printf("actual ops: %i\n", total_ops); + printf("actual ops: %i\n", total_ops); #endif - crnlib_free(m_Pdst_buf); - m_Pdst_buf = nullptr; + crnlib_free(m_Pdst_buf); + m_Pdst_buf = nullptr; - if (m_Ptmp_buf) { - crnlib_free(m_Ptmp_buf); - m_Ptmp_buf = nullptr; - } + if (m_Ptmp_buf) + { + crnlib_free(m_Ptmp_buf); + m_Ptmp_buf = nullptr; + } - /* Don't deallocate a contibutor list + /* Don't deallocate a contibutor list * if the user passed us one of their own. */ - if ((m_Pclist_x) && (!m_clist_x_forced)) { - crnlib_free(m_Pclist_x->p); - crnlib_free(m_Pclist_x); - m_Pclist_x = nullptr; - } - - if ((m_Pclist_y) && (!m_clist_y_forced)) { - crnlib_free(m_Pclist_y->p); - crnlib_free(m_Pclist_y); - m_Pclist_y = nullptr; - } - - crnlib_free(m_Psrc_y_count); - m_Psrc_y_count = nullptr; - - crnlib_free(m_Psrc_y_flag); - m_Psrc_y_flag = nullptr; - - if (m_Pscan_buf) { - for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) - crnlib_free(m_Pscan_buf->scan_buf_l[i]); - - crnlib_free(m_Pscan_buf); - m_Pscan_buf = nullptr; - } -} - -void Resampler::restart() { - if (STATUS_OKAY != m_status) - return; - - m_cur_src_y = m_cur_dst_y = 0; - - int i, j; - for (i = 0; i < m_resample_src_y; i++) { - m_Psrc_y_count[i] = 0; - m_Psrc_y_flag[i] = FALSE; - } - - for (i = 0; i < m_resample_dst_y; i++) { - for (j = 0; j < m_Pclist_y[i].n; j++) - m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++; - } - - for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) { - m_Pscan_buf->scan_buf_y[i] = -1; - - crnlib_free(m_Pscan_buf->scan_buf_l[i]); - m_Pscan_buf->scan_buf_l[i] = nullptr; - } -} - -Resampler::Resampler(int src_x, int src_y, - int dst_x, int dst_y, - Boundary_Op boundary_op, - Resample_Real sample_low, Resample_Real sample_high, - const char* Pfilter_name, - Contrib_List* Pclist_x, - Contrib_List* Pclist_y, - Resample_Real filter_x_scale, - Resample_Real filter_y_scale, - Resample_Real src_x_ofs, - Resample_Real src_y_ofs) { - int i, j; - Resample_Real support, (*func)(Resample_Real); - - resampler_assert(src_x > 0); - resampler_assert(src_y > 0); - resampler_assert(dst_x > 0); - resampler_assert(dst_y > 0); + if ((m_Pclist_x) && (!m_clist_x_forced)) + { + crnlib_free(m_Pclist_x->p); + crnlib_free(m_Pclist_x); + m_Pclist_x = nullptr; + } -#if CRNLIB_RESAMPLER_DEBUG_OPS - total_ops = 0; -#endif + if ((m_Pclist_y) && (!m_clist_y_forced)) + { + crnlib_free(m_Pclist_y->p); + crnlib_free(m_Pclist_y); + m_Pclist_y = nullptr; + } + + crnlib_free(m_Psrc_y_count); + m_Psrc_y_count = nullptr; + + crnlib_free(m_Psrc_y_flag); + m_Psrc_y_flag = nullptr; - m_lo = sample_low; - m_hi = sample_high; - - m_delay_x_resample = false; - m_intermediate_x = 0; - m_Pdst_buf = nullptr; - m_Ptmp_buf = nullptr; - m_clist_x_forced = false; - m_Pclist_x = nullptr; - m_clist_y_forced = false; - m_Pclist_y = nullptr; - m_Psrc_y_count = nullptr; - m_Psrc_y_flag = nullptr; - m_Pscan_buf = nullptr; - m_status = STATUS_OKAY; - - m_resample_src_x = src_x; - m_resample_src_y = src_y; - m_resample_dst_x = dst_x; - m_resample_dst_y = dst_y; - - m_boundary_op = boundary_op; - - if ((m_Pdst_buf = (Sample*)crnlib_malloc(m_resample_dst_x * sizeof(Sample))) == nullptr) { - m_status = STATUS_OUT_OF_MEMORY; - return; - } - - // Find the specified filter. - - if (Pfilter_name == nullptr) - Pfilter_name = CRNLIB_RESAMPLER_DEFAULT_FILTER; - - for (i = 0; i < g_num_resample_filters; i++) - if (strcmp(Pfilter_name, g_resample_filters[i].name) == 0) - break; - - if (i == g_num_resample_filters) { - m_status = STATUS_BAD_FILTER_NAME; - return; - } - - func = g_resample_filters[i].func; - support = g_resample_filters[i].support; - - /* Create contributor lists, unless the user supplied custom lists. */ - - if (!Pclist_x) { - m_Pclist_x = make_clist(m_resample_src_x, m_resample_dst_x, m_boundary_op, func, support, filter_x_scale, src_x_ofs); - if (!m_Pclist_x) { - m_status = STATUS_OUT_OF_MEMORY; - return; + if (m_Pscan_buf) + { + for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) + { + crnlib_free(m_Pscan_buf->scan_buf_l[i]); + } + + crnlib_free(m_Pscan_buf); + m_Pscan_buf = nullptr; + } } - } else { - m_Pclist_x = Pclist_x; - m_clist_x_forced = true; - } - - if (!Pclist_y) { - m_Pclist_y = make_clist(m_resample_src_y, m_resample_dst_y, m_boundary_op, func, support, filter_y_scale, src_y_ofs); - if (!m_Pclist_y) { - m_status = STATUS_OUT_OF_MEMORY; - return; + + void Resampler::restart() + { + if (STATUS_OKAY != m_status) + { + return; + } + + m_cur_src_y = m_cur_dst_y = 0; + + int i, j; + for (i = 0; i < m_resample_src_y; i++) + { + m_Psrc_y_count[i] = 0; + m_Psrc_y_flag[i] = FALSE; + } + + for (i = 0; i < m_resample_dst_y; i++) + { + for (j = 0; j < m_Pclist_y[i].n; j++) + { + m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++; + } + } + + for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) + { + m_Pscan_buf->scan_buf_y[i] = -1; + + crnlib_free(m_Pscan_buf->scan_buf_l[i]); + m_Pscan_buf->scan_buf_l[i] = nullptr; + } } - } else { - m_Pclist_y = Pclist_y; - m_clist_y_forced = true; - } - - if ((m_Psrc_y_count = (int*)crnlib_calloc(m_resample_src_y, sizeof(int))) == nullptr) { - m_status = STATUS_OUT_OF_MEMORY; - return; - } - - if ((m_Psrc_y_flag = (unsigned char*)crnlib_calloc(m_resample_src_y, sizeof(unsigned char))) == nullptr) { - m_status = STATUS_OUT_OF_MEMORY; - return; - } - - /* Count how many times each source line + + Resampler::Resampler(int src_x, int src_y, + int dst_x, int dst_y, + Boundary_Op boundary_op, + Resample_Real sample_low, Resample_Real sample_high, + const char* Pfilter_name, + Contrib_List* Pclist_x, + Contrib_List* Pclist_y, + Resample_Real filter_x_scale, + Resample_Real filter_y_scale, + Resample_Real src_x_ofs, + Resample_Real src_y_ofs) + { + int i, j; + Resample_Real support, (*func)(Resample_Real); + + resampler_assert(src_x > 0); + resampler_assert(src_y > 0); + resampler_assert(dst_x > 0); + resampler_assert(dst_y > 0); + +#if CRNLIB_RESAMPLER_DEBUG_OPS + total_ops = 0; +#endif + + m_lo = sample_low; + m_hi = sample_high; + + m_delay_x_resample = false; + m_intermediate_x = 0; + m_Pdst_buf = nullptr; + m_Ptmp_buf = nullptr; + m_clist_x_forced = false; + m_Pclist_x = nullptr; + m_clist_y_forced = false; + m_Pclist_y = nullptr; + m_Psrc_y_count = nullptr; + m_Psrc_y_flag = nullptr; + m_Pscan_buf = nullptr; + m_status = STATUS_OKAY; + + m_resample_src_x = src_x; + m_resample_src_y = src_y; + m_resample_dst_x = dst_x; + m_resample_dst_y = dst_y; + + m_boundary_op = boundary_op; + + if ((m_Pdst_buf = (Sample*)crnlib_malloc(m_resample_dst_x * sizeof(Sample))) == nullptr) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + + // Find the specified filter. + + if (Pfilter_name == nullptr) + { + Pfilter_name = CRNLIB_RESAMPLER_DEFAULT_FILTER; + } + + for (i = 0; i < g_num_resample_filters; i++) + { + if (strcmp(Pfilter_name, g_resample_filters[i].name) == 0) + { + break; + } + } + + if (i == g_num_resample_filters) + { + m_status = STATUS_BAD_FILTER_NAME; + return; + } + + func = g_resample_filters[i].func; + support = g_resample_filters[i].support; + + /* Create contributor lists, unless the user supplied custom lists. */ + + if (!Pclist_x) + { + m_Pclist_x = make_clist(m_resample_src_x, m_resample_dst_x, m_boundary_op, func, support, filter_x_scale, src_x_ofs); + if (!m_Pclist_x) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + } + else + { + m_Pclist_x = Pclist_x; + m_clist_x_forced = true; + } + + if (!Pclist_y) + { + m_Pclist_y = make_clist(m_resample_src_y, m_resample_dst_y, m_boundary_op, func, support, filter_y_scale, src_y_ofs); + if (!m_Pclist_y) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + } + else + { + m_Pclist_y = Pclist_y; + m_clist_y_forced = true; + } + + if ((m_Psrc_y_count = (int*)crnlib_calloc(m_resample_src_y, sizeof(int))) == nullptr) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + + if ((m_Psrc_y_flag = (unsigned char*)crnlib_calloc(m_resample_src_y, sizeof(unsigned char))) == nullptr) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + + /* Count how many times each source line * contributes to a destination line. */ - for (i = 0; i < m_resample_dst_y; i++) - for (j = 0; j < m_Pclist_y[i].n; j++) - m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++; + for (i = 0; i < m_resample_dst_y; i++) + { + for (j = 0; j < m_Pclist_y[i].n; j++) + { + m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++; + } + } - if ((m_Pscan_buf = (Scan_Buf*)crnlib_malloc(sizeof(Scan_Buf))) == nullptr) { - m_status = STATUS_OUT_OF_MEMORY; - return; - } + if ((m_Pscan_buf = (Scan_Buf*)crnlib_malloc(sizeof(Scan_Buf))) == nullptr) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } - for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) { - m_Pscan_buf->scan_buf_y[i] = -1; - m_Pscan_buf->scan_buf_l[i] = nullptr; - } + for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) + { + m_Pscan_buf->scan_buf_y[i] = -1; + m_Pscan_buf->scan_buf_l[i] = nullptr; + } - m_cur_src_y = m_cur_dst_y = 0; - { - // Determine which axis to resample first by comparing the number of multiplies required - // for each possibility. - int x_ops = count_ops(m_Pclist_x, m_resample_dst_x); - int y_ops = count_ops(m_Pclist_y, m_resample_dst_y); + m_cur_src_y = m_cur_dst_y = 0; + { + // Determine which axis to resample first by comparing the number of multiplies required + // for each possibility. + int x_ops = count_ops(m_Pclist_x, m_resample_dst_x); + int y_ops = count_ops(m_Pclist_y, m_resample_dst_y); - // Hack 10/2000: Weight Y axis ops a little more than X axis ops. - // (Y axis ops use more cache resources.) - int xy_ops = x_ops * m_resample_src_y + - (4 * y_ops * m_resample_dst_x) / 3; + // Hack 10/2000: Weight Y axis ops a little more than X axis ops. + // (Y axis ops use more cache resources.) + int xy_ops = x_ops * m_resample_src_y + + (4 * y_ops * m_resample_dst_x) / 3; - int yx_ops = (4 * y_ops * m_resample_src_x) / 3 + - x_ops * m_resample_dst_y; + int yx_ops = (4 * y_ops * m_resample_src_x) / 3 + + x_ops * m_resample_dst_y; #if CRNLIB_RESAMPLER_DEBUG_OPS - printf("src: %i %i\n", m_resample_src_x, m_resample_src_y); - printf("dst: %i %i\n", m_resample_dst_x, m_resample_dst_y); - printf("x_ops: %i\n", x_ops); - printf("y_ops: %i\n", y_ops); - printf("xy_ops: %i\n", xy_ops); - printf("yx_ops: %i\n", yx_ops); + printf("src: %i %i\n", m_resample_src_x, m_resample_src_y); + printf("dst: %i %i\n", m_resample_dst_x, m_resample_dst_y); + printf("x_ops: %i\n", x_ops); + printf("y_ops: %i\n", y_ops); + printf("xy_ops: %i\n", xy_ops); + printf("yx_ops: %i\n", yx_ops); #endif - // Now check which resample order is better. In case of a tie, choose the order - // which buffers the least amount of data. - if ((xy_ops > yx_ops) || - ((xy_ops == yx_ops) && (m_resample_src_x < m_resample_dst_x))) { - m_delay_x_resample = true; - m_intermediate_x = m_resample_src_x; - } else { - m_delay_x_resample = false; - m_intermediate_x = m_resample_dst_x; - } + // Now check which resample order is better. In case of a tie, choose the order + // which buffers the least amount of data. + if ((xy_ops > yx_ops) || + ((xy_ops == yx_ops) && (m_resample_src_x < m_resample_dst_x))) + { + m_delay_x_resample = true; + m_intermediate_x = m_resample_src_x; + } + else + { + m_delay_x_resample = false; + m_intermediate_x = m_resample_dst_x; + } #if CRNLIB_RESAMPLER_DEBUG_OPS - printf("delaying: %i\n", m_delay_x_resample); + printf("delaying: %i\n", m_delay_x_resample); #endif - } + } - if (m_delay_x_resample) { - if ((m_Ptmp_buf = (Sample*)crnlib_malloc(m_intermediate_x * sizeof(Sample))) == nullptr) { - m_status = STATUS_OUT_OF_MEMORY; - return; + if (m_delay_x_resample) + { + if ((m_Ptmp_buf = (Sample*)crnlib_malloc(m_intermediate_x * sizeof(Sample))) == nullptr) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + } } - } -} -void Resampler::get_clists(Contrib_List** ptr_clist_x, Contrib_List** ptr_clist_y) { - if (ptr_clist_x) - *ptr_clist_x = m_Pclist_x; + void Resampler::get_clists(Contrib_List** ptr_clist_x, Contrib_List** ptr_clist_y) + { + if (ptr_clist_x) + { + *ptr_clist_x = m_Pclist_x; + } - if (ptr_clist_y) - *ptr_clist_y = m_Pclist_y; -} + if (ptr_clist_y) + { + *ptr_clist_y = m_Pclist_y; + } + } -int Resampler::get_filter_num() { - return g_num_resample_filters; -} + int Resampler::get_filter_num() + { + return g_num_resample_filters; + } -const char* Resampler::get_filter_name(int filter_num) { - if ((filter_num < 0) || (filter_num >= g_num_resample_filters)) - return nullptr; - else - return g_resample_filters[filter_num].name; -} + const char* Resampler::get_filter_name(int filter_num) + { + if ((filter_num < 0) || (filter_num >= g_num_resample_filters)) + { + return nullptr; + } + else + { + return g_resample_filters[filter_num].name; + } + } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_resampler.h b/crnlib/crn_resampler.h index aa0bd4b..8689def 100644 --- a/crnlib/crn_resampler.h +++ b/crnlib/crn_resampler.h @@ -1,165 +1,192 @@ -// File: crn_resampler.h // RG: This is public domain code, originally derived from Graphics Gems 3, see: http://code.google.com/p/imageresampler/ + #pragma once #include "crn_export.h" -namespace crnlib { +namespace crnlib +{ #define CRNLIB_RESAMPLER_DEBUG_OPS 0 #define CRNLIB_RESAMPLER_DEFAULT_FILTER "lanczos4" #define CRNLIB_RESAMPLER_MAX_DIMENSION 16384 -// float or double -typedef float Resample_Real; - -class CRN_EXPORT Resampler { - public: - typedef Resample_Real Sample; - - struct Contrib { - Resample_Real weight; - unsigned short pixel; - }; - - struct Contrib_List { - unsigned short n; - Contrib* p; - }; - - enum Boundary_Op { - BOUNDARY_WRAP = 0, - BOUNDARY_REFLECT = 1, - BOUNDARY_CLAMP = 2 - }; - - enum Status { - STATUS_OKAY = 0, - STATUS_OUT_OF_MEMORY = 1, - STATUS_BAD_FILTER_NAME = 2, - STATUS_SCAN_BUFFER_FULL = 3 - }; - - // src_x/src_y - Input dimensions - // dst_x/dst_y - Output dimensions - // boundary_op - How to sample pixels near the image boundaries - // sample_low/sample_high - Clamp output samples to specified range, or disable clamping if sample_low >= sample_high - // Pclist_x/Pclist_y - Optional pointers to contributor lists from another instance of a Resampler - // src_x_ofs/src_y_ofs - Offset input image by specified amount (fractional values okay) - Resampler( - int src_x, int src_y, - int dst_x, int dst_y, - Boundary_Op boundary_op = BOUNDARY_CLAMP, - Resample_Real sample_low = 0.0f, Resample_Real sample_high = 0.0f, - const char* Pfilter_name = CRNLIB_RESAMPLER_DEFAULT_FILTER, - Contrib_List* Pclist_x = nullptr, - Contrib_List* Pclist_y = nullptr, - Resample_Real filter_x_scale = 1.0f, - Resample_Real filter_y_scale = 1.0f, - Resample_Real src_x_ofs = 0.0f, - Resample_Real src_y_ofs = 0.0f); - - ~Resampler(); - - // Reinits resampler so it can handle another frame. - void restart(); - - // false on out of memory. - bool put_line(const Sample* Psrc); - - // nullptr if no scanlines are currently available (give the resampler more scanlines!) - const Sample* get_line(); - - Status status() const { return m_status; } - - // Returned contributor lists can be shared with another Resampler. - void get_clists(Contrib_List** ptr_clist_x, Contrib_List** ptr_clist_y); - Contrib_List* get_clist_x() const { return m_Pclist_x; } - Contrib_List* get_clist_y() const { return m_Pclist_y; } - - // Filter accessors. - static int get_filter_num(); - static const char* get_filter_name(int filter_num); - - static Contrib_List* make_clist( - int src_x, int dst_x, Boundary_Op boundary_op, - Resample_Real (*Pfilter)(Resample_Real), - Resample_Real filter_support, - Resample_Real filter_scale, - Resample_Real src_ofs); - - private: - Resampler(); - Resampler(const Resampler& o); - Resampler& operator=(const Resampler& o); + // float or double + typedef float Resample_Real; + + class CRN_EXPORT Resampler + { + public: + typedef Resample_Real Sample; + + struct Contrib + { + Resample_Real weight; + unsigned short pixel; + }; + + struct Contrib_List + { + unsigned short n; + Contrib* p; + }; + + enum Boundary_Op + { + BOUNDARY_WRAP = 0, + BOUNDARY_REFLECT = 1, + BOUNDARY_CLAMP = 2 + }; + + enum Status + { + STATUS_OKAY = 0, + STATUS_OUT_OF_MEMORY = 1, + STATUS_BAD_FILTER_NAME = 2, + STATUS_SCAN_BUFFER_FULL = 3 + }; + + // src_x/src_y - Input dimensions + // dst_x/dst_y - Output dimensions + // boundary_op - How to sample pixels near the image boundaries + // sample_low/sample_high - Clamp output samples to specified range, or disable clamping if sample_low >= sample_high + // Pclist_x/Pclist_y - Optional pointers to contributor lists from another instance of a Resampler + // src_x_ofs/src_y_ofs - Offset input image by specified amount (fractional values okay) + Resampler( + int src_x, int src_y, + int dst_x, int dst_y, + Boundary_Op boundary_op = BOUNDARY_CLAMP, + Resample_Real sample_low = 0.0f, Resample_Real sample_high = 0.0f, + const char* Pfilter_name = CRNLIB_RESAMPLER_DEFAULT_FILTER, + Contrib_List* Pclist_x = nullptr, + Contrib_List* Pclist_y = nullptr, + Resample_Real filter_x_scale = 1.0f, + Resample_Real filter_y_scale = 1.0f, + Resample_Real src_x_ofs = 0.0f, + Resample_Real src_y_ofs = 0.0f); + + ~Resampler(); + + // Reinits resampler so it can handle another frame. + void restart(); + + // false on out of memory. + bool put_line(const Sample* Psrc); + + // nullptr if no scanlines are currently available (give the resampler more scanlines!) + const Sample* get_line(); + + Status status() const + { + return m_status; + } + + // Returned contributor lists can be shared with another Resampler. + void get_clists(Contrib_List** ptr_clist_x, Contrib_List** ptr_clist_y); + Contrib_List* get_clist_x() const + { + return m_Pclist_x; + } + Contrib_List* get_clist_y() const + { + return m_Pclist_y; + } + + // Filter accessors. + static int get_filter_num(); + static const char* get_filter_name(int filter_num); + + static Contrib_List* make_clist( + int src_x, int dst_x, Boundary_Op boundary_op, + Resample_Real (*Pfilter)(Resample_Real), + Resample_Real filter_support, + Resample_Real filter_scale, + Resample_Real src_ofs); + + private: + Resampler(); + Resampler(const Resampler& o); + Resampler& operator=(const Resampler& o); #ifdef CRNLIB_RESAMPLER_DEBUG_OPS - int total_ops; + int total_ops; #endif - int m_intermediate_x; + int m_intermediate_x; - int m_resample_src_x; - int m_resample_src_y; - int m_resample_dst_x; - int m_resample_dst_y; + int m_resample_src_x; + int m_resample_src_y; + int m_resample_dst_x; + int m_resample_dst_y; - Boundary_Op m_boundary_op; + Boundary_Op m_boundary_op; - Sample* m_Pdst_buf; - Sample* m_Ptmp_buf; + Sample* m_Pdst_buf; + Sample* m_Ptmp_buf; - Contrib_List* m_Pclist_x; - Contrib_List* m_Pclist_y; + Contrib_List* m_Pclist_x; + Contrib_List* m_Pclist_y; - bool m_clist_x_forced; - bool m_clist_y_forced; + bool m_clist_x_forced; + bool m_clist_y_forced; - bool m_delay_x_resample; + bool m_delay_x_resample; - int* m_Psrc_y_count; - unsigned char* m_Psrc_y_flag; + int* m_Psrc_y_count; + unsigned char* m_Psrc_y_flag; - // The maximum number of scanlines that can be buffered at one time. - enum { MAX_SCAN_BUF_SIZE = CRNLIB_RESAMPLER_MAX_DIMENSION }; + // The maximum number of scanlines that can be buffered at one time. + enum + { + MAX_SCAN_BUF_SIZE = CRNLIB_RESAMPLER_MAX_DIMENSION + }; - struct Scan_Buf { - int scan_buf_y[MAX_SCAN_BUF_SIZE]; - Sample* scan_buf_l[MAX_SCAN_BUF_SIZE]; - }; + struct Scan_Buf + { + int scan_buf_y[MAX_SCAN_BUF_SIZE]; + Sample* scan_buf_l[MAX_SCAN_BUF_SIZE]; + }; - Scan_Buf* m_Pscan_buf; + Scan_Buf* m_Pscan_buf; - int m_cur_src_y; - int m_cur_dst_y; + int m_cur_src_y; + int m_cur_dst_y; - Status m_status; + Status m_status; - void resample_x(Sample* Pdst, const Sample* Psrc); - void scale_y_mov(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x); - void scale_y_add(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x); - void clamp(Sample* Pdst, int n); - void resample_y(Sample* Pdst); + void resample_x(Sample* Pdst, const Sample* Psrc); + void scale_y_mov(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x); + void scale_y_add(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x); + void clamp(Sample* Pdst, int n); + void resample_y(Sample* Pdst); - static int reflect(const int j, const int src_x, const Boundary_Op boundary_op); + static int reflect(const int j, const int src_x, const Boundary_Op boundary_op); - inline int count_ops(Contrib_List* Pclist, int k) { - int i, t = 0; - for (i = 0; i < k; i++) - t += Pclist[i].n; - return (t); - } + inline int count_ops(Contrib_List* Pclist, int k) + { + int i, t = 0; + for (i = 0; i < k; i++) + { + t += Pclist[i].n; + } + return (t); + } - Resample_Real m_lo; - Resample_Real m_hi; + Resample_Real m_lo; + Resample_Real m_hi; - inline Resample_Real clamp_sample(Resample_Real f) const { - if (f < m_lo) - f = m_lo; - else if (f > m_hi) - f = m_hi; - return f; - } -}; + inline Resample_Real clamp_sample(Resample_Real f) const + { + if (f < m_lo) + { + f = m_lo; + } + else if (f > m_hi) + { + f = m_hi; + } + return f; + } + }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_sparse_array.h b/crnlib/crn_sparse_array.h index bf7535d..c5cc7a6 100644 --- a/crnlib/crn_sparse_array.h +++ b/crnlib/crn_sparse_array.h @@ -1,346 +1,479 @@ -// File: crn_sparse_array.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once -namespace crnlib +namespace crnlib { -template -class sparse_array_traits { - public: - static inline void* alloc_space(uint size) { - return crnlib_malloc(size); - } - - static inline void free_space(void* p) { - crnlib_free(p); - } - - static inline void construct_group(T* p) { - scalar_type::construct_array(p, 1U << Log2N); - } - - static inline void destruct_group(T* p) { - scalar_type::destruct_array(p, 1U << Log2N); - } - - static inline void construct_element(T* p) { - scalar_type::construct(p); - } - - static inline void destruct_element(T* p) { - scalar_type::destruct(p); - } - - static inline void copy_group(T* pDst, const T* pSrc) { - for (uint j = 0; j < (1U << Log2N); j++) - pDst[j] = pSrc[j]; - } -}; - -template class Traits = sparse_array_traits> -class sparse_array : public Traits { - public: - enum { N = 1U << Log2N }; - - inline sparse_array() - : m_size(0), m_num_active_groups(0) { - init_default(); - } - - inline sparse_array(uint size) - : m_size(0), m_num_active_groups(0) { - init_default(); - - resize(size); - } - - inline sparse_array(const sparse_array& other) - : m_size(0), m_num_active_groups(0) { - init_default(); - - *this = other; - } - - inline ~sparse_array() { - for (uint i = 0; (i < m_groups.size()) && m_num_active_groups; i++) - free_group(m_groups[i]); - - deinit_default(); - } - - bool assign(const sparse_array& other) { - if (this == &other) - return true; - - if (!try_resize(other.size())) - return false; - - for (uint i = 0; i < other.m_groups.size(); i++) { - const T* p = other.m_groups[i]; - - T* q = m_groups[i]; - - if (p) { - if (!q) { - q = alloc_group(true); - if (!q) - return false; - - m_groups[i] = q; + template + class sparse_array_traits + { + public: + static inline void* alloc_space(uint size) + { + return crnlib_malloc(size); } - copy_group(q, p); - } else if (q) { - free_group(q); - m_groups[i] = nullptr; - } - } - - return true; - } - - sparse_array& operator=(const sparse_array& other) { - if (!assign(other)) { - CRNLIB_FAIL("Out of memory"); - } - - return *this; - } - - bool operator==(const sparse_array& other) const { - if (m_size != other.m_size) - return false; - - for (uint i = 0; i < m_size; i++) - if (!((*this)[i] == other[i])) - return false; - - return true; - } - - bool operator<(const sparse_array& rhs) const { - const uint min_size = math::minimum(m_size, rhs.m_size); - - uint i; - for (i = 0; i < min_size; i++) - if (!((*this)[i] == rhs[i])) - break; + static inline void free_space(void* p) + { + crnlib_free(p); + } - if (i < min_size) - return (*this)[i] < rhs[i]; + static inline void construct_group(T* p) + { + scalar_type::construct_array(p, 1U << Log2N); + } - return m_size < rhs.m_size; - } + static inline void destruct_group(T* p) + { + scalar_type::destruct_array(p, 1U << Log2N); + } - void clear() { - if (m_groups.size()) { - for (uint i = 0; (i < m_groups.size()) && m_num_active_groups; i++) - free_group(m_groups[i]); + static inline void construct_element(T* p) + { + scalar_type::construct(p); + } - m_groups.clear(); - } + static inline void destruct_element(T* p) + { + scalar_type::destruct(p); + } - m_size = 0; + static inline void copy_group(T* pDst, const T* pSrc) + { + for (uint j = 0; j < (1U << Log2N); j++) + { + pDst[j] = pSrc[j]; + } + } + }; + + template class Traits = sparse_array_traits> + class sparse_array : public Traits + { + public: + enum + { + N = 1U << Log2N + }; + + inline sparse_array() : + m_size(0), m_num_active_groups(0) + { + init_default(); + } - CRNLIB_ASSERT(!m_num_active_groups); - } + inline sparse_array(uint size) : + m_size(0), m_num_active_groups(0) + { + init_default(); - bool try_resize(uint size) { - if (m_size == size) - return true; + resize(size); + } - const uint new_num_groups = (size + N - 1) >> Log2N; - if (new_num_groups != m_groups.size()) { - for (uint i = new_num_groups; i < m_groups.size(); i++) - free_group(m_groups[i]); + inline sparse_array(const sparse_array& other) : + m_size(0), m_num_active_groups(0) + { + init_default(); - if (!m_groups.try_resize(new_num_groups)) - return false; - } + *this = other; + } - m_size = size; - return true; - } + inline ~sparse_array() + { + for (uint i = 0; (i < m_groups.size()) && m_num_active_groups; i++) + { + free_group(m_groups[i]); + } - void resize(uint size) { - if (!try_resize(size)) { - CRNLIB_FAIL("Out of memory"); - } - } + deinit_default(); + } - inline uint size() const { return m_size; } - inline bool empty() const { return 0 == m_size; } + bool assign(const sparse_array& other) + { + if (this == &other) + { + return true; + } + + if (!try_resize(other.size())) + { + return false; + } + + for (uint i = 0; i < other.m_groups.size(); i++) + { + const T* p = other.m_groups[i]; + + T* q = m_groups[i]; + + if (p) + { + if (!q) + { + q = alloc_group(true); + if (!q) + { + return false; + } + + m_groups[i] = q; + } + + copy_group(q, p); + } + else if (q) + { + free_group(q); + m_groups[i] = nullptr; + } + } + + return true; + } - inline uint capacity() const { return m_groups.size(); } + sparse_array& operator=(const sparse_array& other) + { + if (!assign(other)) + { + CRNLIB_FAIL("Out of memory"); + } - inline const T& operator[](uint i) const { - CRNLIB_ASSERT(i < m_size); - const T* p = m_groups[i >> Log2N]; - const void* t = m_default; - return p ? p[i & (N - 1)] : *reinterpret_cast(t); - } + return *this; + } - inline const T* get(uint i) const { - CRNLIB_ASSERT(i < m_size); - const T* p = m_groups[i >> Log2N]; - return p ? &p[i & (N - 1)] : nullptr; - } + bool operator==(const sparse_array& other) const + { + if (m_size != other.m_size) + { + return false; + } + + for (uint i = 0; i < m_size; i++) + { + if ((*this)[i] != other[i]) + { + return false; + } + } + + return true; + } - inline T* get(uint i) { - CRNLIB_ASSERT(i < m_size); - T* p = m_groups[i >> Log2N]; - return p ? &p[i & (N - 1)] : nullptr; - } + bool operator<(const sparse_array& rhs) const + { + const uint min_size = math::minimum(m_size, rhs.m_size); + + uint i; + for (i = 0; i < min_size; i++) + { + if ((*this)[i] != rhs[i]) + { + break; + } + } + + if (i < min_size) + { + return (*this)[i] < rhs[i]; + } + + return m_size < rhs.m_size; + } - inline bool is_present(uint i) const { - CRNLIB_ASSERT(i < m_size); - return m_groups[i >> Log2N] != nullptr; - } - - inline uint get_num_groups() const { return m_groups.size(); } + void clear() + { + if (m_groups.size()) + { + for (uint i = 0; (i < m_groups.size()) && m_num_active_groups; i++) + { + free_group(m_groups[i]); + } - inline const T* get_group(uint group_index) const { - return m_groups[group_index]; - } + m_groups.clear(); + } - inline T* get_group(uint group_index) { - return m_groups[group_index]; - } + m_size = 0; - inline uint get_group_size() const { - return N; - } + CRNLIB_ASSERT(!m_num_active_groups); + } - inline T* ensure_valid(uint index) { - CRNLIB_ASSERT(index <= m_size); + bool try_resize(uint size) + { + if (m_size == size) + { + return true; + } + + const uint new_num_groups = (size + N - 1) >> Log2N; + if (new_num_groups != m_groups.size()) + { + for (uint i = new_num_groups; i < m_groups.size(); i++) + { + free_group(m_groups[i]); + } + + if (!m_groups.try_resize(new_num_groups)) + { + return false; + } + } + + m_size = size; + return true; + } - const uint group_index = index >> Log2N; + void resize(uint size) + { + if (!try_resize(size)) + { + CRNLIB_FAIL("Out of memory"); + } + } - if (group_index >= m_groups.size()) { - T* p = alloc_group(true); - if (!p) - return nullptr; + inline uint size() const + { + return m_size; + } + inline bool empty() const + { + return 0 == m_size; + } - if (!m_groups.try_push_back(p)) { - free_group(p); - return nullptr; - } - } + inline uint capacity() const + { + return m_groups.size(); + } - T* p = m_groups[group_index]; - if (!p) { - p = alloc_group(true); - if (!p) - return nullptr; + inline const T& operator[](uint i) const + { + CRNLIB_ASSERT(i < m_size); + const T* p = m_groups[i >> Log2N]; + const void* t = m_default; + return p ? p[i & (N - 1)] : *reinterpret_cast(t); + } - m_groups[group_index] = p; - } + inline const T* get(uint i) const + { + CRNLIB_ASSERT(i < m_size); + const T* p = m_groups[i >> Log2N]; + return p ? &p[i & (N - 1)] : nullptr; + } - m_size = math::maximum(index + 1, m_size); + inline T* get(uint i) + { + CRNLIB_ASSERT(i < m_size); + T* p = m_groups[i >> Log2N]; + return p ? &p[i & (N - 1)] : nullptr; + } - return p + (index & (N - 1)); - } + inline bool is_present(uint i) const + { + CRNLIB_ASSERT(i < m_size); + return m_groups[i >> Log2N] != nullptr; + } - inline bool set(uint index, const T& obj) { - T* p = ensure_valid(index); - if (!p) - return false; + inline uint get_num_groups() const + { + return m_groups.size(); + } - *p = obj; + inline const T* get_group(uint group_index) const + { + return m_groups[group_index]; + } - return true; - } + inline T* get_group(uint group_index) + { + return m_groups[group_index]; + } - inline void push_back(const T& obj) { - if (!set(m_size, obj)) { - CRNLIB_FAIL("Out of memory"); - } - } + inline uint get_group_size() const + { + return N; + } - inline bool try_push_back(const T& obj) { - return set(m_size, obj); - } + inline T* ensure_valid(uint index) + { + CRNLIB_ASSERT(index <= m_size); + + const uint group_index = index >> Log2N; + + if (group_index >= m_groups.size()) + { + T* p = alloc_group(true); + if (!p) + { + return nullptr; + } + + if (!m_groups.try_push_back(p)) + { + free_group(p); + return nullptr; + } + } + + T* p = m_groups[group_index]; + if (!p) + { + p = alloc_group(true); + if (!p) + { + return nullptr; + } + + m_groups[group_index] = p; + } + + m_size = math::maximum(index + 1, m_size); + + return p + (index & (N - 1)); + } - inline void pop_back() { - CRNLIB_ASSERT(m_size); - if (m_size) - resize(m_size - 1); - } + inline bool set(uint index, const T& obj) + { + T* p = ensure_valid(index); + if (!p) + { + return false; + } - inline void unset_range(uint start, uint num) { - if (!num) - return; + *p = obj; - CRNLIB_ASSERT((start + num) <= capacity()); + return true; + } - const uint num_to_skip = math::minimum(math::get_align_up_value_delta(start, N), num); - num -= num_to_skip; + inline void push_back(const T& obj) + { + if (!set(m_size, obj)) + { + CRNLIB_FAIL("Out of memory"); + } + } - const uint first_group = (start + num_to_skip) >> Log2N; - const uint num_groups = num >> Log2N; + inline bool try_push_back(const T& obj) + { + return set(m_size, obj); + } - for (uint i = 0; i < num_groups; i++) { - T* p = m_groups[first_group + i]; - if (p) { - free_group(p); - m_groups[i] = nullptr; - } - } - } + inline void pop_back() + { + CRNLIB_ASSERT(m_size); + if (m_size) + { + resize(m_size - 1); + } + } - inline void unset_all() { - unset_range(0, m_groups.size() << Log2N); - } + inline void unset_range(uint start, uint num) + { + if (!num) + { + return; + } + + CRNLIB_ASSERT((start + num) <= capacity()); + + const uint num_to_skip = math::minimum(math::get_align_up_value_delta(start, N), num); + num -= num_to_skip; + + const uint first_group = (start + num_to_skip) >> Log2N; + const uint num_groups = num >> Log2N; + + for (uint i = 0; i < num_groups; i++) + { + T* p = m_groups[first_group + i]; + if (p) + { + free_group(p); + m_groups[i] = nullptr; + } + } + } - inline void swap(sparse_array& other) { - std::swap(m_size, other.m_size); - m_groups.swap(other.m_groups); - std::swap(m_num_active_groups, other.m_num_active_groups); - } + inline void unset_all() + { + unset_range(0, m_groups.size() << Log2N); + } - private: - uint m_size; - uint m_num_active_groups; + inline void swap(sparse_array& other) + { + std::swap(m_size, other.m_size); + m_groups.swap(other.m_groups); + std::swap(m_num_active_groups, other.m_num_active_groups); + } - crnlib::vector m_groups; + private: + uint m_size; + uint m_num_active_groups; - uint64 m_default[(sizeof(T) + sizeof(uint64) - 1) / sizeof(uint64)]; + crnlib::vector m_groups; - inline T* alloc_group(bool nofail = false) { - T* p = static_cast(sparse_array_traits::alloc_space(N * sizeof(T))); + uint64 m_default[(sizeof(T) + sizeof(uint64) - 1) / sizeof(uint64)]; - if (!p) { - if (nofail) - return nullptr; + inline T* alloc_group(bool nofail = false) + { + T* p = static_cast(sparse_array_traits::alloc_space(N * sizeof(T))); - CRNLIB_FAIL("Out of memory"); - } + if (!p) + { + if (nofail) + { + return nullptr; + } - sparse_array_traits::construct_group(p); + CRNLIB_FAIL("Out of memory"); + } - m_num_active_groups++; + sparse_array_traits::construct_group(p); - return p; - } + m_num_active_groups++; - inline void free_group(T* p) { - if (p) { - CRNLIB_ASSERT(m_num_active_groups); - m_num_active_groups--; + return p; + } - sparse_array_traits::destruct_group(p); + inline void free_group(T* p) + { + if (p) + { + CRNLIB_ASSERT(m_num_active_groups); + m_num_active_groups--; - sparse_array_traits::free_space(p); - } - } + sparse_array_traits::destruct_group(p); - inline void init_default() { - sparse_array_traits::construct_element(reinterpret_cast(m_default)); - } + sparse_array_traits::free_space(p); + } + } - inline void deinit_default() { - sparse_array_traits::destruct_element(reinterpret_cast(m_default)); - } -}; + inline void init_default() + { + sparse_array_traits::construct_element(reinterpret_cast(m_default)); + } -} // namespace crnlib + inline void deinit_default() + { + sparse_array_traits::destruct_element(reinterpret_cast(m_default)); + } + }; +} // namespace crnlib diff --git a/crnlib/crn_sparse_bit_array.cpp b/crnlib/crn_sparse_bit_array.cpp index 406a26b..0ac978f 100644 --- a/crnlib/crn_sparse_bit_array.cpp +++ b/crnlib/crn_sparse_bit_array.cpp @@ -1,454 +1,591 @@ -// File: crn_sparse_bit_array.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_sparse_bit_array.h" -namespace crnlib { -sparse_bit_array::sparse_bit_array() - : m_num_groups(0), m_ppGroups(nullptr) { -} - -sparse_bit_array::sparse_bit_array(uint size) - : m_num_groups(0), m_ppGroups(nullptr) { - resize(size); -} - -sparse_bit_array::sparse_bit_array(sparse_bit_array& other) { - m_num_groups = other.m_num_groups; - m_ppGroups = (uint32**)crnlib_malloc(m_num_groups * sizeof(uint32*)); - CRNLIB_VERIFY(m_ppGroups); - - for (uint i = 0; i < m_num_groups; i++) { - if (other.m_ppGroups[i]) { - m_ppGroups[i] = alloc_group(false); - memcpy(m_ppGroups[i], other.m_ppGroups[i], cBytesPerGroup); - } else - m_ppGroups[i] = nullptr; - } -} - -sparse_bit_array::~sparse_bit_array() { - clear(); -} - -sparse_bit_array& sparse_bit_array::operator=(sparse_bit_array& other) { - if (this == &other) - return *this; - - if (m_num_groups != other.m_num_groups) { - clear(); - - m_num_groups = other.m_num_groups; - m_ppGroups = (uint32**)crnlib_calloc(m_num_groups, sizeof(uint32*)); - CRNLIB_VERIFY(m_ppGroups); - } - - for (uint i = 0; i < m_num_groups; i++) { - if (other.m_ppGroups[i]) { - if (!m_ppGroups[i]) - m_ppGroups[i] = alloc_group(false); - memcpy(m_ppGroups[i], other.m_ppGroups[i], cBytesPerGroup); - } else if (m_ppGroups[i]) { - free_group(m_ppGroups[i]); - m_ppGroups[i] = nullptr; +namespace crnlib +{ + sparse_bit_array::sparse_bit_array() : + m_num_groups(0), + m_ppGroups(nullptr) + { } - } - - return *this; -} - -void sparse_bit_array::clear() { - if (!m_num_groups) - return; - - for (uint i = 0; i < m_num_groups; i++) - free_group(m_ppGroups[i]); - - crnlib_free(m_ppGroups); - m_ppGroups = nullptr; - - m_num_groups = 0; -} - -void sparse_bit_array::swap(sparse_bit_array& other) { - std::swap(m_ppGroups, other.m_ppGroups); - std::swap(m_num_groups, other.m_num_groups); -} - -void sparse_bit_array::optimize() { - for (uint i = 0; i < m_num_groups; i++) { - uint32* s = m_ppGroups[i]; - if (s) { - uint j; - for (j = 0; j < cDWORDsPerGroup; j++) - if (s[j]) - break; - if (j == cDWORDsPerGroup) { - free_group(s); - m_ppGroups[i] = nullptr; - } + + sparse_bit_array::sparse_bit_array(uint size) : + m_num_groups(0), + m_ppGroups(nullptr) + { + resize(size); } - } -} - -void sparse_bit_array::set_bit_range(uint index, uint num) { - CRNLIB_ASSERT((index + num) <= (m_num_groups << cBitsPerGroupShift)); - - if (!num) - return; - else if (num == 1) { - set_bit(index); - return; - } - - while ((index & cBitsPerGroupMask) || (num <= cBitsPerGroup)) { - uint group_index = index >> cBitsPerGroupShift; - CRNLIB_ASSERT(group_index < m_num_groups); - - uint32* pGroup = m_ppGroups[group_index]; - if (!pGroup) { - pGroup = alloc_group(true); - m_ppGroups[group_index] = pGroup; + + sparse_bit_array::sparse_bit_array(sparse_bit_array& other) + { + m_num_groups = other.m_num_groups; + m_ppGroups = (uint32**)crnlib_malloc(m_num_groups * sizeof(uint32*)); + CRNLIB_VERIFY(m_ppGroups); + + for (uint i = 0; i < m_num_groups; i++) + { + if (other.m_ppGroups[i]) + { + m_ppGroups[i] = alloc_group(false); + memcpy(m_ppGroups[i], other.m_ppGroups[i], cBytesPerGroup); + } + else + { + m_ppGroups[i] = nullptr; + } + } } - const uint group_bit_ofs = index & cBitsPerGroupMask; + sparse_bit_array::~sparse_bit_array() + { + clear(); + } - const uint dword_bit_ofs = group_bit_ofs & 31; - const uint max_bits_to_set = 32 - dword_bit_ofs; + sparse_bit_array& sparse_bit_array::operator=(sparse_bit_array& other) + { + if (this == &other) + { + return *this; + } - const uint bits_to_set = math::minimum(max_bits_to_set, num); - const uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_set)); + if (m_num_groups != other.m_num_groups) + { + clear(); - pGroup[group_bit_ofs >> 5] |= (msk << dword_bit_ofs); + m_num_groups = other.m_num_groups; + m_ppGroups = (uint32**)crnlib_calloc(m_num_groups, sizeof(uint32*)); + CRNLIB_VERIFY(m_ppGroups); + } - num -= bits_to_set; - if (!num) - return; + for (uint i = 0; i < m_num_groups; i++) + { + if (other.m_ppGroups[i]) + { + if (!m_ppGroups[i]) + { + m_ppGroups[i] = alloc_group(false); + } + memcpy(m_ppGroups[i], other.m_ppGroups[i], cBytesPerGroup); + } + else if (m_ppGroups[i]) + { + free_group(m_ppGroups[i]); + m_ppGroups[i] = nullptr; + } + } - index += bits_to_set; - } + return *this; + } - while (num >= cBitsPerGroup) { - uint group_index = index >> cBitsPerGroupShift; - CRNLIB_ASSERT(group_index < m_num_groups); + void sparse_bit_array::clear() + { + if (!m_num_groups) + { + return; + } - uint32* pGroup = m_ppGroups[group_index]; - if (!pGroup) { - pGroup = alloc_group(true); - m_ppGroups[group_index] = pGroup; - } + for (uint i = 0; i < m_num_groups; i++) + { + free_group(m_ppGroups[i]); + } - memset(pGroup, 0xFF, sizeof(uint32) * cDWORDsPerGroup); + crnlib_free(m_ppGroups); + m_ppGroups = nullptr; - num -= cBitsPerGroup; - index += cBitsPerGroup; - } + m_num_groups = 0; + } - while (num) { - uint group_index = index >> cBitsPerGroupShift; - CRNLIB_ASSERT(group_index < m_num_groups); + void sparse_bit_array::swap(sparse_bit_array& other) + { + std::swap(m_ppGroups, other.m_ppGroups); + std::swap(m_num_groups, other.m_num_groups); + } - uint32* pGroup = m_ppGroups[group_index]; - if (!pGroup) { - pGroup = alloc_group(true); - m_ppGroups[group_index] = pGroup; + void sparse_bit_array::optimize() + { + for (uint i = 0; i < m_num_groups; i++) + { + uint32* s = m_ppGroups[i]; + if (s) + { + uint j; + for (j = 0; j < cDWORDsPerGroup; j++) + { + if (s[j]) + { + break; + } + } + if (j == cDWORDsPerGroup) + { + free_group(s); + m_ppGroups[i] = nullptr; + } + } + } } - uint group_bit_ofs = index & cBitsPerGroupMask; + void sparse_bit_array::set_bit_range(uint index, uint num) + { + CRNLIB_ASSERT((index + num) <= (m_num_groups << cBitsPerGroupShift)); + + if (!num) + { + return; + } + else if (num == 1) + { + set_bit(index); + return; + } + + while ((index & cBitsPerGroupMask) || (num <= cBitsPerGroup)) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); - uint bits_to_set = math::minimum(32U, num); - uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_set)); + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + { + pGroup = alloc_group(true); + m_ppGroups[group_index] = pGroup; + } - pGroup[group_bit_ofs >> 5] |= (msk << (group_bit_ofs & 31)); + const uint group_bit_ofs = index & cBitsPerGroupMask; - num -= bits_to_set; - index += bits_to_set; - } -} + const uint dword_bit_ofs = group_bit_ofs & 31; + const uint max_bits_to_set = 32 - dword_bit_ofs; -void sparse_bit_array::clear_all_bits() { - for (uint i = 0; i < m_num_groups; i++) { - uint32* pGroup = m_ppGroups[i]; - if (pGroup) - memset(pGroup, 0, sizeof(uint32) * cDWORDsPerGroup); - } -} + const uint bits_to_set = math::minimum(max_bits_to_set, num); + const uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_set)); -void sparse_bit_array::clear_bit_range(uint index, uint num) { - CRNLIB_ASSERT((index + num) <= (m_num_groups << cBitsPerGroupShift)); + pGroup[group_bit_ofs >> 5] |= (msk << dword_bit_ofs); - if (!num) - return; - else if (num == 1) { - clear_bit(index); - return; - } + num -= bits_to_set; + if (!num) + { + return; + } - while ((index & cBitsPerGroupMask) || (num <= cBitsPerGroup)) { - uint group_index = index >> cBitsPerGroupShift; - CRNLIB_ASSERT(group_index < m_num_groups); + index += bits_to_set; + } - const uint group_bit_ofs = index & cBitsPerGroupMask; + while (num >= cBitsPerGroup) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); - const uint dword_bit_ofs = group_bit_ofs & 31; - const uint max_bits_to_set = 32 - dword_bit_ofs; + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + { + pGroup = alloc_group(true); + m_ppGroups[group_index] = pGroup; + } - const uint bits_to_set = math::minimum(max_bits_to_set, num); + memset(pGroup, 0xFF, sizeof(uint32) * cDWORDsPerGroup); - uint32* pGroup = m_ppGroups[group_index]; - if (pGroup) { - const uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_set)); + num -= cBitsPerGroup; + index += cBitsPerGroup; + } - pGroup[group_bit_ofs >> 5] &= (~(msk << dword_bit_ofs)); - } + while (num) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); - num -= bits_to_set; - if (!num) - return; + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + { + pGroup = alloc_group(true); + m_ppGroups[group_index] = pGroup; + } - index += bits_to_set; - } + uint group_bit_ofs = index & cBitsPerGroupMask; - while (num >= cBitsPerGroup) { - uint group_index = index >> cBitsPerGroupShift; - CRNLIB_ASSERT(group_index < m_num_groups); + uint bits_to_set = math::minimum(32U, num); + uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_set)); - uint32* pGroup = m_ppGroups[group_index]; - if (pGroup) { - free_group(pGroup); - m_ppGroups[group_index] = nullptr; + pGroup[group_bit_ofs >> 5] |= (msk << (group_bit_ofs & 31)); + + num -= bits_to_set; + index += bits_to_set; + } } - num -= cBitsPerGroup; - index += cBitsPerGroup; - } + void sparse_bit_array::clear_all_bits() + { + for (uint i = 0; i < m_num_groups; i++) + { + uint32* pGroup = m_ppGroups[i]; + if (pGroup) + { + memset(pGroup, 0, sizeof(uint32) * cDWORDsPerGroup); + } + } + } - while (num) { - uint group_index = index >> cBitsPerGroupShift; - CRNLIB_ASSERT(group_index < m_num_groups); + void sparse_bit_array::clear_bit_range(uint index, uint num) + { + CRNLIB_ASSERT((index + num) <= (m_num_groups << cBitsPerGroupShift)); - uint bits_to_set = math::minimum(32u, num); + if (!num) + { + return; + } + else if (num == 1) + { + clear_bit(index); + return; + } - uint32* pGroup = m_ppGroups[group_index]; - if (pGroup) { - uint group_bit_ofs = index & cBitsPerGroupMask; + while ((index & cBitsPerGroupMask) || (num <= cBitsPerGroup)) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); - uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_set)); + const uint group_bit_ofs = index & cBitsPerGroupMask; - pGroup[group_bit_ofs >> 5] &= (~(msk << (group_bit_ofs & 31))); - } + const uint dword_bit_ofs = group_bit_ofs & 31; + const uint max_bits_to_set = 32 - dword_bit_ofs; - num -= bits_to_set; - index += bits_to_set; - } -} - -void sparse_bit_array::resize(uint size) { - uint num_groups = (size + cBitsPerGroup - 1) >> cBitsPerGroupShift; - if (num_groups == m_num_groups) - return; - - if (!num_groups) { - clear(); - return; - } - - sparse_bit_array temp; - temp.swap(*this); - - m_num_groups = num_groups; - m_ppGroups = (uint32**)crnlib_calloc(m_num_groups, sizeof(uint32*)); - CRNLIB_VERIFY(m_ppGroups); - - uint n = math::minimum(temp.m_num_groups, m_num_groups); - for (uint i = 0; i < n; i++) { - uint32* p = temp.m_ppGroups[i]; - if (p) { - m_ppGroups[i] = temp.m_ppGroups[i]; - temp.m_ppGroups[i] = nullptr; - } - } -} - -sparse_bit_array& sparse_bit_array::operator&=(const sparse_bit_array& other) { - if (this == &other) - return *this; - - CRNLIB_VERIFY(other.m_num_groups == m_num_groups); - - for (uint i = 0; i < m_num_groups; i++) { - uint32* d = m_ppGroups[i]; - if (!d) - continue; - uint32* s = other.m_ppGroups[i]; - - if (!s) { - free_group(d); - m_ppGroups[i] = nullptr; - } else { - uint32 oc = 0; - for (uint j = 0; j < cDWORDsPerGroup; j++) { - uint32 c = d[j] & s[j]; - d[j] = c; - oc |= c; - } - if (!oc) { - free_group(d); - m_ppGroups[i] = nullptr; - } - } - } - - return *this; -} - -sparse_bit_array& sparse_bit_array::operator|=(const sparse_bit_array& other) { - if (this == &other) - return *this; - - CRNLIB_VERIFY(other.m_num_groups == m_num_groups); - - for (uint i = 0; i < m_num_groups; i++) { - uint32* s = other.m_ppGroups[i]; - if (!s) - continue; - - uint32* d = m_ppGroups[i]; - if (!d) { - d = alloc_group(true); - m_ppGroups[i] = d; - memcpy(d, s, cBytesPerGroup); - } else { - uint32 oc = 0; - for (uint j = 0; j < cDWORDsPerGroup; j++) { - uint32 c = d[j] | s[j]; - d[j] = c; - oc |= c; - } - if (!oc) { - free_group(d); - m_ppGroups[i] = nullptr; - } - } - } - - return *this; -} - -sparse_bit_array& sparse_bit_array::and_not(const sparse_bit_array& other) { - if (this == &other) - return *this; - - CRNLIB_VERIFY(other.m_num_groups == m_num_groups); - - for (uint i = 0; i < m_num_groups; i++) { - uint32* d = m_ppGroups[i]; - if (!d) - continue; - uint32* s = other.m_ppGroups[i]; - if (!s) - continue; - - uint32 oc = 0; - for (uint j = 0; j < cDWORDsPerGroup; j++) { - uint32 c = d[j] & (~s[j]); - d[j] = c; - oc |= c; - } - if (!oc) { - free_group(d); - m_ppGroups[i] = nullptr; - } - } + const uint bits_to_set = math::minimum(max_bits_to_set, num); - return *this; -} + uint32* pGroup = m_ppGroups[group_index]; + if (pGroup) + { + const uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_set)); + pGroup[group_bit_ofs >> 5] &= (~(msk << dword_bit_ofs)); + } -int sparse_bit_array::find_first_set_bit(uint index, uint num) const { - CRNLIB_ASSERT((index + num) <= (m_num_groups << cBitsPerGroupShift)); + num -= bits_to_set; + if (!num) + { + return; + } - if (!num) - return -1; + index += bits_to_set; + } - while ((index & cBitsPerGroupMask) || (num <= cBitsPerGroup)) { - uint group_index = index >> cBitsPerGroupShift; - CRNLIB_ASSERT(group_index < m_num_groups); + while (num >= cBitsPerGroup) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); - const uint group_bit_ofs = index & cBitsPerGroupMask; - const uint dword_bit_ofs = group_bit_ofs & 31; + uint32* pGroup = m_ppGroups[group_index]; + if (pGroup) + { + free_group(pGroup); + m_ppGroups[group_index] = nullptr; + } - const uint max_bits_to_examine = 32 - dword_bit_ofs; - const uint bits_to_examine = math::minimum(max_bits_to_examine, num); + num -= cBitsPerGroup; + index += cBitsPerGroup; + } - uint32* pGroup = m_ppGroups[group_index]; - if (pGroup) { - const uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_examine)); + while (num) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); - uint bits = pGroup[group_bit_ofs >> 5] & (msk << dword_bit_ofs); - if (bits) { - uint num_trailing_zeros = math::count_trailing_zero_bits(bits); - int set_index = num_trailing_zeros + (index & ~31); - CRNLIB_ASSERT(get_bit(set_index)); - return set_index; - } - } + uint bits_to_set = math::minimum(32u, num); - num -= bits_to_examine; - if (!num) - return -1; + uint32* pGroup = m_ppGroups[group_index]; + if (pGroup) + { + uint group_bit_ofs = index & cBitsPerGroupMask; - index += bits_to_examine; - } + uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_set)); - while (num >= cBitsPerGroup) { - uint group_index = index >> cBitsPerGroupShift; - CRNLIB_ASSERT(group_index < m_num_groups); + pGroup[group_bit_ofs >> 5] &= (~(msk << (group_bit_ofs & 31))); + } - uint32* pGroup = m_ppGroups[group_index]; - if (pGroup) { - for (uint i = 0; i < cDWORDsPerGroup; i++) { - uint32 bits = pGroup[i]; - if (bits) { - uint num_trailing_zeros = math::count_trailing_zero_bits(bits); + num -= bits_to_set; + index += bits_to_set; + } + } + + void sparse_bit_array::resize(uint size) + { + uint num_groups = (size + cBitsPerGroup - 1) >> cBitsPerGroupShift; + if (num_groups == m_num_groups) + { + return; + } + + if (!num_groups) + { + clear(); + return; + } - int set_index = num_trailing_zeros + index + (i << 5); - CRNLIB_ASSERT(get_bit(set_index)); - return set_index; + sparse_bit_array temp; + temp.swap(*this); + + m_num_groups = num_groups; + m_ppGroups = (uint32**)crnlib_calloc(m_num_groups, sizeof(uint32*)); + CRNLIB_VERIFY(m_ppGroups); + + uint n = math::minimum(temp.m_num_groups, m_num_groups); + for (uint i = 0; i < n; i++) + { + uint32* p = temp.m_ppGroups[i]; + if (p) + { + m_ppGroups[i] = temp.m_ppGroups[i]; + temp.m_ppGroups[i] = nullptr; + } } - } } - num -= cBitsPerGroup; - index += cBitsPerGroup; - } + sparse_bit_array& sparse_bit_array::operator&=(const sparse_bit_array& other) + { + if (this == &other) + { + return *this; + } + + CRNLIB_VERIFY(other.m_num_groups == m_num_groups); + + for (uint i = 0; i < m_num_groups; i++) + { + uint32* d = m_ppGroups[i]; + if (!d) + { + continue; + } + uint32* s = other.m_ppGroups[i]; + + if (!s) + { + free_group(d); + m_ppGroups[i] = nullptr; + } + else + { + uint32 oc = 0; + for (uint j = 0; j < cDWORDsPerGroup; j++) + { + uint32 c = d[j] & s[j]; + d[j] = c; + oc |= c; + } + if (!oc) + { + free_group(d); + m_ppGroups[i] = nullptr; + } + } + } + + return *this; + } - while (num) { - uint group_index = index >> cBitsPerGroupShift; - CRNLIB_ASSERT(group_index < m_num_groups); + sparse_bit_array& sparse_bit_array::operator|=(const sparse_bit_array& other) + { + if (this == &other) + { + return *this; + } - uint bits_to_examine = math::minimum(32U, num); + CRNLIB_VERIFY(other.m_num_groups == m_num_groups); + + for (uint i = 0; i < m_num_groups; i++) + { + uint32* s = other.m_ppGroups[i]; + if (!s) + { + continue; + } + + uint32* d = m_ppGroups[i]; + if (!d) + { + d = alloc_group(true); + m_ppGroups[i] = d; + memcpy(d, s, cBytesPerGroup); + } + else + { + uint32 oc = 0; + for (uint j = 0; j < cDWORDsPerGroup; j++) + { + uint32 c = d[j] | s[j]; + d[j] = c; + oc |= c; + } + if (!oc) + { + free_group(d); + m_ppGroups[i] = nullptr; + } + } + } - uint32* pGroup = m_ppGroups[group_index]; - if (pGroup) { - uint group_bit_ofs = index & cBitsPerGroupMask; + return *this; + } - uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_examine)); + sparse_bit_array& sparse_bit_array::and_not(const sparse_bit_array& other) + { + if (this == &other) + { + return *this; + } - uint32 bits = pGroup[group_bit_ofs >> 5] & (msk << (group_bit_ofs & 31)); - if (bits) { - uint num_trailing_zeros = math::count_trailing_zero_bits(bits); + CRNLIB_VERIFY(other.m_num_groups == m_num_groups); + + for (uint i = 0; i < m_num_groups; i++) + { + uint32* d = m_ppGroups[i]; + if (!d) + { + continue; + } + uint32* s = other.m_ppGroups[i]; + if (!s) + { + continue; + } + + uint32 oc = 0; + for (uint j = 0; j < cDWORDsPerGroup; j++) + { + uint32 c = d[j] & (~s[j]); + d[j] = c; + oc |= c; + } + if (!oc) + { + free_group(d); + m_ppGroups[i] = nullptr; + } + } - int set_index = num_trailing_zeros + (index & ~31); - CRNLIB_ASSERT(get_bit(set_index)); - return set_index; - } + return *this; } - num -= bits_to_examine; - index += bits_to_examine; - } + int sparse_bit_array::find_first_set_bit(uint index, uint num) const + { + CRNLIB_ASSERT((index + num) <= (m_num_groups << cBitsPerGroupShift)); + + if (!num) + { + return -1; + } + + while ((index & cBitsPerGroupMask) || (num <= cBitsPerGroup)) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + const uint group_bit_ofs = index & cBitsPerGroupMask; + const uint dword_bit_ofs = group_bit_ofs & 31; + + const uint max_bits_to_examine = 32 - dword_bit_ofs; + const uint bits_to_examine = math::minimum(max_bits_to_examine, num); + + uint32* pGroup = m_ppGroups[group_index]; + if (pGroup) + { + const uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_examine)); + + uint bits = pGroup[group_bit_ofs >> 5] & (msk << dword_bit_ofs); + if (bits) + { + uint num_trailing_zeros = math::count_trailing_zero_bits(bits); + int set_index = num_trailing_zeros + (index & ~31); + CRNLIB_ASSERT(get_bit(set_index)); + return set_index; + } + } + + num -= bits_to_examine; + if (!num) + { + return -1; + } + + index += bits_to_examine; + } + + while (num >= cBitsPerGroup) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint32* pGroup = m_ppGroups[group_index]; + if (pGroup) + { + for (uint i = 0; i < cDWORDsPerGroup; i++) + { + uint32 bits = pGroup[i]; + if (bits) + { + uint num_trailing_zeros = math::count_trailing_zero_bits(bits); + + int set_index = num_trailing_zeros + index + (i << 5); + CRNLIB_ASSERT(get_bit(set_index)); + return set_index; + } + } + } + + num -= cBitsPerGroup; + index += cBitsPerGroup; + } + + while (num) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); - return -1; -} + uint bits_to_examine = math::minimum(32U, num); + + uint32* pGroup = m_ppGroups[group_index]; + if (pGroup) + { + uint group_bit_ofs = index & cBitsPerGroupMask; + + uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_examine)); + + uint32 bits = pGroup[group_bit_ofs >> 5] & (msk << (group_bit_ofs & 31)); + if (bits) + { + uint num_trailing_zeros = math::count_trailing_zero_bits(bits); + + int set_index = num_trailing_zeros + (index & ~31); + CRNLIB_ASSERT(get_bit(set_index)); + return set_index; + } + } + + num -= bits_to_examine; + index += bits_to_examine; + } + + return -1; + } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_sparse_bit_array.h b/crnlib/crn_sparse_bit_array.h index 4a962a5..d4ef4fe 100644 --- a/crnlib/crn_sparse_bit_array.h +++ b/crnlib/crn_sparse_bit_array.h @@ -1,163 +1,216 @@ -// File: crn_sparse_bit_array.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_export.h" -namespace crnlib { -class CRN_EXPORT sparse_bit_array { - public: - sparse_bit_array(); - sparse_bit_array(uint size); - sparse_bit_array(sparse_bit_array& other); - ~sparse_bit_array(); - - sparse_bit_array& operator=(sparse_bit_array& other); - - void clear(); - - inline uint get_size() { return (m_num_groups << cBitsPerGroupShift); } - - void resize(uint size); - - sparse_bit_array& operator&=(const sparse_bit_array& other); - sparse_bit_array& operator|=(const sparse_bit_array& other); - sparse_bit_array& and_not(const sparse_bit_array& other); - - void swap(sparse_bit_array& other); - - void optimize(); - - void set_bit_range(uint index, uint num); - void clear_bit_range(uint index, uint num); - - void clear_all_bits(); - - inline void set_bit(uint index) { - uint group_index = index >> cBitsPerGroupShift; - CRNLIB_ASSERT(group_index < m_num_groups); - - uint32* pGroup = m_ppGroups[group_index]; - if (!pGroup) { - pGroup = alloc_group(true); - m_ppGroups[group_index] = pGroup; - } - - uint bit_ofs = index & (cBitsPerGroup - 1); - - pGroup[bit_ofs >> 5] |= (1U << (bit_ofs & 31)); - } - - inline void clear_bit(uint index) { - uint group_index = index >> cBitsPerGroupShift; - CRNLIB_ASSERT(group_index < m_num_groups); - - uint32* pGroup = m_ppGroups[group_index]; - if (!pGroup) { - pGroup = alloc_group(true); - m_ppGroups[group_index] = pGroup; - } - - uint bit_ofs = index & (cBitsPerGroup - 1); - - pGroup[bit_ofs >> 5] &= (~(1U << (bit_ofs & 31))); - } - - inline void set(uint index, bool value) { - uint group_index = index >> cBitsPerGroupShift; - CRNLIB_ASSERT(group_index < m_num_groups); - - uint32* pGroup = m_ppGroups[group_index]; - if (!pGroup) { - pGroup = alloc_group(true); - m_ppGroups[group_index] = pGroup; - } - - uint bit_ofs = index & (cBitsPerGroup - 1); - - uint bit = (1U << (bit_ofs & 31)); - - uint c = pGroup[bit_ofs >> 5]; - uint mask = (uint)(-(int)value); +namespace crnlib +{ + class CRN_EXPORT sparse_bit_array + { + public: + sparse_bit_array(); + sparse_bit_array(uint size); + sparse_bit_array(sparse_bit_array& other); + ~sparse_bit_array(); - pGroup[bit_ofs >> 5] = (c & ~bit) | (mask & bit); - } + sparse_bit_array& operator=(sparse_bit_array& other); - inline bool get_bit(uint index) const { - uint group_index = index >> cBitsPerGroupShift; - CRNLIB_ASSERT(group_index < m_num_groups); + void clear(); - uint32* pGroup = m_ppGroups[group_index]; - if (!pGroup) - return 0; + inline uint get_size() + { + return (m_num_groups << cBitsPerGroupShift); + } - uint bit_ofs = index & (cBitsPerGroup - 1); + void resize(uint size); - uint bit = (1U << (bit_ofs & 31)); + sparse_bit_array& operator&=(const sparse_bit_array& other); + sparse_bit_array& operator|=(const sparse_bit_array& other); + sparse_bit_array& and_not(const sparse_bit_array& other); - return (pGroup[bit_ofs >> 5] & bit) != 0; - } + void swap(sparse_bit_array& other); - inline uint32 get_uint32(uint index) const { - uint group_index = index >> cBitsPerGroupShift; - CRNLIB_ASSERT(group_index < m_num_groups); + void optimize(); - uint32* pGroup = m_ppGroups[group_index]; - if (!pGroup) - return 0; + void set_bit_range(uint index, uint num); + void clear_bit_range(uint index, uint num); - uint bit_ofs = index & (cBitsPerGroup - 1); + void clear_all_bits(); - return pGroup[bit_ofs >> 5]; - } + inline void set_bit(uint index) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); - inline void set_uint32(uint index, uint32 value) const { - uint group_index = index >> cBitsPerGroupShift; - CRNLIB_ASSERT(group_index < m_num_groups); + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + { + pGroup = alloc_group(true); + m_ppGroups[group_index] = pGroup; + } - uint32* pGroup = m_ppGroups[group_index]; - if (!pGroup) { - pGroup = alloc_group(true); - m_ppGroups[group_index] = pGroup; - } + uint bit_ofs = index & (cBitsPerGroup - 1); - uint bit_ofs = index & (cBitsPerGroup - 1); + pGroup[bit_ofs >> 5] |= (1U << (bit_ofs & 31)); + } - pGroup[bit_ofs >> 5] = value; - } + inline void clear_bit(uint index) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); - int find_first_set_bit(uint index, uint num) const; + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + { + pGroup = alloc_group(true); + m_ppGroups[group_index] = pGroup; + } - enum { - cDWORDsPerGroupShift = 4U, - cDWORDsPerGroup = 1U << cDWORDsPerGroupShift, + uint bit_ofs = index & (cBitsPerGroup - 1); - cBitsPerGroupShift = cDWORDsPerGroupShift + 5, - cBitsPerGroup = 1U << cBitsPerGroupShift, - cBitsPerGroupMask = cBitsPerGroup - 1U, + pGroup[bit_ofs >> 5] &= (~(1U << (bit_ofs & 31))); + } - cBytesPerGroup = cDWORDsPerGroup * sizeof(uint32) - }; + inline void set(uint index, bool value) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); - uint get_num_groups() const { return m_num_groups; } - uint32** get_groups() { return m_ppGroups; } + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + { + pGroup = alloc_group(true); + m_ppGroups[group_index] = pGroup; + } - private: - uint m_num_groups; - uint32** m_ppGroups; + uint bit_ofs = index & (cBitsPerGroup - 1); - static inline uint32* alloc_group(bool clear) { - uint32* p = (uint32*)crnlib_malloc(cBytesPerGroup); - CRNLIB_VERIFY(p); - if (clear) - memset(p, 0, cBytesPerGroup); - return p; - } + uint bit = (1U << (bit_ofs & 31)); + + uint c = pGroup[bit_ofs >> 5]; + uint mask = (uint)(-(int)value); + + pGroup[bit_ofs >> 5] = (c & ~bit) | (mask & bit); + } + + inline bool get_bit(uint index) const + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + { + return 0; + } + + uint bit_ofs = index & (cBitsPerGroup - 1); + + uint bit = (1U << (bit_ofs & 31)); + + return (pGroup[bit_ofs >> 5] & bit) != 0; + } - static inline void free_group(void* p) { - if (p) - crnlib_free(p); - } -}; + inline uint32 get_uint32(uint index) const + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + { + return 0; + } -} // namespace crnlib + uint bit_ofs = index & (cBitsPerGroup - 1); + + return pGroup[bit_ofs >> 5]; + } + + inline void set_uint32(uint index, uint32 value) const + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + { + pGroup = alloc_group(true); + m_ppGroups[group_index] = pGroup; + } + + uint bit_ofs = index & (cBitsPerGroup - 1); + + pGroup[bit_ofs >> 5] = value; + } + + int find_first_set_bit(uint index, uint num) const; + + enum + { + cDWORDsPerGroupShift = 4U, + cDWORDsPerGroup = 1U << cDWORDsPerGroupShift, + + cBitsPerGroupShift = cDWORDsPerGroupShift + 5, + cBitsPerGroup = 1U << cBitsPerGroupShift, + cBitsPerGroupMask = cBitsPerGroup - 1U, + + cBytesPerGroup = cDWORDsPerGroup * sizeof(uint32) + }; + + uint get_num_groups() const + { + return m_num_groups; + } + uint32** get_groups() + { + return m_ppGroups; + } + + private: + uint m_num_groups; + uint32** m_ppGroups; + + static inline uint32* alloc_group(bool clear) + { + uint32* p = (uint32*)crnlib_malloc(cBytesPerGroup); + CRNLIB_VERIFY(p); + if (clear) + { + memset(p, 0, cBytesPerGroup); + } + return p; + } + + static inline void free_group(void* p) + { + if (p) + { + crnlib_free(p); + } + } + }; + +} // namespace crnlib diff --git a/crnlib/crn_strutils.cpp b/crnlib/crn_strutils.cpp index 1ee14f8..3095ad4 100644 --- a/crnlib/crn_strutils.cpp +++ b/crnlib/crn_strutils.cpp @@ -1,5 +1,26 @@ -// File: crn_strutils.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_strutils.h" @@ -51,8 +72,7 @@ namespace crnlib dst_len--; - } - while (c); + } while (c); CRNLIB_ASSERT((q - pDst) <= (int)dst_len); @@ -76,8 +96,7 @@ namespace crnlib { *p-- = static_cast('0' + (j % 10)); j /= 10; - } - while (j); + } while (j); if (value < 0) { @@ -113,8 +132,7 @@ namespace crnlib { *p-- = static_cast('0' + (value % 10)); value /= 10; - } - while (value); + } while (value); const size_t total_bytes = (buf + cBufSize - 1) - p; if (total_bytes > len) @@ -502,7 +520,9 @@ namespace crnlib { p = buf; if (buf >= pEnd) + { break; + } int i = *buf++; @@ -702,4 +722,4 @@ namespace crnlib return status == 0; } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_strutils.h b/crnlib/crn_strutils.h index cfefeb2..f726943 100644 --- a/crnlib/crn_strutils.h +++ b/crnlib/crn_strutils.h @@ -1,37 +1,59 @@ -// File: crn_strutils.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once #include "crn_export.h" +#include "crn_core.h" + #ifdef WIN32 #define CRNLIB_PATH_SEPERATOR_CHAR '\\' #else #define CRNLIB_PATH_SEPERATOR_CHAR '/' #endif -namespace crnlib +namespace crnlib { - CRN_EXPORT char* crn_strdup(const char* pStr); - CRN_EXPORT int crn_stricmp(const char* p, const char* q); + CRN_EXPORT char* crn_strdup(const char* pStr); + CRN_EXPORT int crn_stricmp(const char* p, const char* q); - CRN_EXPORT char* strcpy_safe(char* pDst, uint dst_len, const char* pSrc); + CRN_EXPORT char* strcpy_safe(char* pDst, uint dst_len, const char* pSrc); - CRN_EXPORT bool int_to_string(int value, char* pDst, uint len); - CRN_EXPORT bool uint_to_string(uint value, char* pDst, uint len); + CRN_EXPORT bool int_to_string(int value, char* pDst, uint len); + CRN_EXPORT bool uint_to_string(uint value, char* pDst, uint len); - CRN_EXPORT bool string_to_int(const char*& pBuf, int& value); + CRN_EXPORT bool string_to_int(const char*& pBuf, int& value); - CRN_EXPORT bool string_to_uint(const char*& pBuf, uint& value); + CRN_EXPORT bool string_to_uint(const char*& pBuf, uint& value); - CRN_EXPORT bool string_to_int64(const char*& pBuf, int64& value); - CRN_EXPORT bool string_to_uint64(const char*& pBuf, uint64& value); + CRN_EXPORT bool string_to_int64(const char*& pBuf, int64& value); + CRN_EXPORT bool string_to_uint64(const char*& pBuf, uint64& value); - CRN_EXPORT bool string_to_bool(const char* p, bool& value); + CRN_EXPORT bool string_to_bool(const char* p, bool& value); - CRN_EXPORT bool string_to_float(const char*& p, float& value, uint round_digit = 512U); + CRN_EXPORT bool string_to_float(const char*& p, float& value, uint round_digit = 512U); - CRN_EXPORT bool string_to_double(const char*& p, double& value, uint round_digit = 512U); - CRN_EXPORT bool string_to_double(const char*& p, const char* pEnd, double& value, uint round_digit = 512U); + CRN_EXPORT bool string_to_double(const char*& p, double& value, uint round_digit = 512U); + CRN_EXPORT bool string_to_double(const char*& p, const char* pEnd, double& value, uint round_digit = 512U); } // namespace crnlib diff --git a/crnlib/crn_symbol_codec.cpp b/crnlib/crn_symbol_codec.cpp index ea74ba0..98d8e05 100644 --- a/crnlib/crn_symbol_codec.cpp +++ b/crnlib/crn_symbol_codec.cpp @@ -1,596 +1,777 @@ -// File: crn_symbol_codec.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_symbol_codec.h" #include "crn_huffman_codes.h" -namespace crnlib { -static float gProbCost[cSymbolCodecArithProbScale]; - -//const uint cArithProbMulLenSigBits = 8; -//const uint cArithProbMulLenSigScale = 1 << cArithProbMulLenSigBits; - -class arith_prob_cost_initializer { - public: - arith_prob_cost_initializer() { - const float cInvLn2 = 1.0f / 0.69314718f; - - for (uint i = 0; i < cSymbolCodecArithProbScale; i++) - gProbCost[i] = -logf(i * (1.0f / cSymbolCodecArithProbScale)) * cInvLn2; - } -}; - -static arith_prob_cost_initializer g_prob_cost_initializer; - -double symbol_histogram::calc_entropy() const { - double total = 0.0f; - for (uint i = 0; i < m_hist.size(); i++) - total += m_hist[i]; - if (total == 0.0f) - return 0.0f; - - double entropy = 0.0f; - double neg_inv_log2 = -1.0f / log(2.0f); - double inv_total = 1.0f / total; - for (uint i = 0; i < m_hist.size(); i++) { - if (m_hist[i]) { - double bits = log(m_hist[i] * inv_total) * neg_inv_log2; - entropy += bits * m_hist[i]; - } - } - - return entropy; -} - -uint64 symbol_histogram::get_total() const { - uint64 total = 0; - for (uint i = 0; i < m_hist.size(); i++) - total += m_hist[i]; - return total; -} - -adaptive_huffman_data_model::adaptive_huffman_data_model(bool encoding, uint total_syms) - : m_total_syms(0), - m_update_cycle(0), - m_symbols_until_update(0), - m_total_count(0), - m_pDecode_tables(nullptr), - m_decoder_table_bits(0), - m_encoding(encoding) { - if (total_syms) - init(encoding, total_syms); -} - -adaptive_huffman_data_model::adaptive_huffman_data_model(const adaptive_huffman_data_model& other) - : m_total_syms(0), - m_update_cycle(0), - m_symbols_until_update(0), - m_total_count(0), - m_pDecode_tables(nullptr), - m_decoder_table_bits(0), - m_encoding(false) { - *this = other; -} - -adaptive_huffman_data_model::~adaptive_huffman_data_model() { - if (m_pDecode_tables) - crnlib_delete(m_pDecode_tables); -} - -adaptive_huffman_data_model& adaptive_huffman_data_model::operator=(const adaptive_huffman_data_model& rhs) { - if (this == &rhs) - return *this; - - m_total_syms = rhs.m_total_syms; - - m_update_cycle = rhs.m_update_cycle; - m_symbols_until_update = rhs.m_symbols_until_update; - - m_total_count = rhs.m_total_count; - - m_sym_freq = rhs.m_sym_freq; - - m_codes = rhs.m_codes; - m_code_sizes = rhs.m_code_sizes; - - if (rhs.m_pDecode_tables) { - if (m_pDecode_tables) - *m_pDecode_tables = *rhs.m_pDecode_tables; - else - m_pDecode_tables = crnlib_new(*rhs.m_pDecode_tables); - } else { - crnlib_delete(m_pDecode_tables); - m_pDecode_tables = nullptr; - } - - m_decoder_table_bits = rhs.m_decoder_table_bits; - m_encoding = rhs.m_encoding; - - return *this; -} - -void adaptive_huffman_data_model::clear() { - m_sym_freq.clear(); - m_codes.clear(); - m_code_sizes.clear(); - - m_total_syms = 0; - m_update_cycle = 0; - m_symbols_until_update = 0; - m_decoder_table_bits = 0; - m_total_count = 0; - - if (m_pDecode_tables) { - crnlib_delete(m_pDecode_tables); - m_pDecode_tables = nullptr; - } -} - -void adaptive_huffman_data_model::init(bool encoding, uint total_syms) { - clear(); - - m_encoding = encoding; - - m_sym_freq.resize(total_syms); - m_code_sizes.resize(total_syms); - - m_total_syms = total_syms; - - if (m_total_syms <= 16) - m_decoder_table_bits = 0; - else - m_decoder_table_bits = static_cast(math::minimum(1 + math::ceil_log2i(m_total_syms), prefix_coding::cMaxTableBits)); - - if (m_encoding) - m_codes.resize(total_syms); - else - m_pDecode_tables = crnlib_new(); - - reset(); -} - -void adaptive_huffman_data_model::reset() { - if (!m_total_syms) - return; - - for (uint i = 0; i < m_total_syms; i++) - m_sym_freq[i] = 1; - - m_total_count = 0; - m_update_cycle = m_total_syms; - - update(); - - m_symbols_until_update = m_update_cycle = 8; //(m_total_syms + 6) >> 1; -} - -void adaptive_huffman_data_model::rescale() { - uint total_freq = 0; - - for (uint i = 0; i < m_total_syms; i++) { - uint freq = (m_sym_freq[i] + 1) >> 1; - total_freq += freq; - m_sym_freq[i] = static_cast(freq); - } - - m_total_count = total_freq; -} +namespace crnlib +{ + static float gProbCost[cSymbolCodecArithProbScale]; -void adaptive_huffman_data_model::update() { - m_total_count += m_update_cycle; + //const uint cArithProbMulLenSigBits = 8; + //const uint cArithProbMulLenSigScale = 1 << cArithProbMulLenSigBits; - if (m_total_count >= 32768) - rescale(); - - void* pTables = create_generate_huffman_codes_tables(); + class arith_prob_cost_initializer + { + public: + arith_prob_cost_initializer() + { + const float cInvLn2 = 1.0f / 0.69314718f; - uint max_code_size, total_freq; - bool status = generate_huffman_codes(pTables, m_total_syms, &m_sym_freq[0], &m_code_sizes[0], max_code_size, total_freq); - CRNLIB_ASSERT(status); - CRNLIB_ASSERT(total_freq == m_total_count); + for (uint i = 0; i < cSymbolCodecArithProbScale; i++) + { + gProbCost[i] = -logf(i * (1.0f / cSymbolCodecArithProbScale)) * cInvLn2; + } + } + }; - if (max_code_size > prefix_coding::cMaxExpectedCodeSize) - prefix_coding::limit_max_code_size(m_total_syms, &m_code_sizes[0], prefix_coding::cMaxExpectedCodeSize); + static arith_prob_cost_initializer g_prob_cost_initializer; - free_generate_huffman_codes_tables(pTables); + double symbol_histogram::calc_entropy() const + { + double total = 0.0f; + for (uint i = 0; i < m_hist.size(); i++) + { + total += m_hist[i]; + } + if (total == 0.0f) + { + return 0.0f; + } + + double entropy = 0.0f; + double neg_inv_log2 = -1.0f / log(2.0f); + double inv_total = 1.0f / total; + for (uint i = 0; i < m_hist.size(); i++) + { + if (m_hist[i]) + { + double bits = log(m_hist[i] * inv_total) * neg_inv_log2; + entropy += bits * m_hist[i]; + } + } + + return entropy; + } - if (m_encoding) - status = prefix_coding::generate_codes(m_total_syms, &m_code_sizes[0], &m_codes[0]); - else - status = prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, m_decoder_table_bits); + uint64 symbol_histogram::get_total() const + { + uint64 total = 0; + for (uint i = 0; i < m_hist.size(); i++) + { + total += m_hist[i]; + } + return total; + } - CRNLIB_ASSERT(status); - (void)status; + adaptive_huffman_data_model::adaptive_huffman_data_model(bool encoding, uint total_syms) : + m_total_syms(0), + m_update_cycle(0), + m_symbols_until_update(0), + m_total_count(0), + m_pDecode_tables(nullptr), + m_decoder_table_bits(0), + m_encoding(encoding) + { + if (total_syms) + { + init(encoding, total_syms); + } + } - m_update_cycle = (5 * m_update_cycle) >> 2; - uint max_cycle = (m_total_syms + 6) << 3; // this was << 2 - which is ~12% slower but compresses around .5% better + adaptive_huffman_data_model::adaptive_huffman_data_model(const adaptive_huffman_data_model& other) : + m_total_syms(0), + m_update_cycle(0), + m_symbols_until_update(0), + m_total_count(0), + m_pDecode_tables(nullptr), + m_decoder_table_bits(0), + m_encoding(false) + { + *this = other; + } - if (m_update_cycle > max_cycle) - m_update_cycle = max_cycle; + adaptive_huffman_data_model::~adaptive_huffman_data_model() + { + if (m_pDecode_tables) + { + crnlib_delete(m_pDecode_tables); + } + } - m_symbols_until_update = m_update_cycle; -} + adaptive_huffman_data_model& adaptive_huffman_data_model::operator=(const adaptive_huffman_data_model& rhs) + { + if (this == &rhs) + { + return *this; + } + + m_total_syms = rhs.m_total_syms; + + m_update_cycle = rhs.m_update_cycle; + m_symbols_until_update = rhs.m_symbols_until_update; + + m_total_count = rhs.m_total_count; + + m_sym_freq = rhs.m_sym_freq; + + m_codes = rhs.m_codes; + m_code_sizes = rhs.m_code_sizes; + + if (rhs.m_pDecode_tables) + { + if (m_pDecode_tables) + { + *m_pDecode_tables = *rhs.m_pDecode_tables; + } + else + { + m_pDecode_tables = crnlib_new(*rhs.m_pDecode_tables); + } + } + else + { + crnlib_delete(m_pDecode_tables); + m_pDecode_tables = nullptr; + } + + m_decoder_table_bits = rhs.m_decoder_table_bits; + m_encoding = rhs.m_encoding; + + return *this; + } -static_huffman_data_model::static_huffman_data_model() - : m_total_syms(0), - m_pDecode_tables(nullptr), - m_encoding(false) { -} + void adaptive_huffman_data_model::clear() + { + m_sym_freq.clear(); + m_codes.clear(); + m_code_sizes.clear(); + + m_total_syms = 0; + m_update_cycle = 0; + m_symbols_until_update = 0; + m_decoder_table_bits = 0; + m_total_count = 0; + + if (m_pDecode_tables) + { + crnlib_delete(m_pDecode_tables); + m_pDecode_tables = nullptr; + } + } -static_huffman_data_model::static_huffman_data_model(const static_huffman_data_model& other) - : m_total_syms(0), - m_pDecode_tables(nullptr), - m_encoding(false) { - *this = other; -} + void adaptive_huffman_data_model::init(bool encoding, uint total_syms) + { + clear(); + + m_encoding = encoding; + + m_sym_freq.resize(total_syms); + m_code_sizes.resize(total_syms); + + m_total_syms = total_syms; + + if (m_total_syms <= 16) + { + m_decoder_table_bits = 0; + } + else + { + m_decoder_table_bits = static_cast(math::minimum(1 + math::ceil_log2i(m_total_syms), prefix_coding::cMaxTableBits)); + } + + if (m_encoding) + { + m_codes.resize(total_syms); + } + else + { + m_pDecode_tables = crnlib_new(); + } + + reset(); + } -static_huffman_data_model::~static_huffman_data_model() { - if (m_pDecode_tables) - crnlib_delete(m_pDecode_tables); -} + void adaptive_huffman_data_model::reset() + { + if (!m_total_syms) + { + return; + } -static_huffman_data_model& static_huffman_data_model::operator=(const static_huffman_data_model& rhs) { - if (this == &rhs) - return *this; + for (uint i = 0; i < m_total_syms; i++) + { + m_sym_freq[i] = 1; + } - m_total_syms = rhs.m_total_syms; - m_codes = rhs.m_codes; - m_code_sizes = rhs.m_code_sizes; + m_total_count = 0; + m_update_cycle = m_total_syms; - if (rhs.m_pDecode_tables) { - if (m_pDecode_tables) - *m_pDecode_tables = *rhs.m_pDecode_tables; - else - m_pDecode_tables = crnlib_new(*rhs.m_pDecode_tables); - } else { - crnlib_delete(m_pDecode_tables); - m_pDecode_tables = nullptr; - } + update(); - m_encoding = rhs.m_encoding; + m_symbols_until_update = m_update_cycle = 8; //(m_total_syms + 6) >> 1; + } - return *this; -} + void adaptive_huffman_data_model::rescale() + { + uint total_freq = 0; -void static_huffman_data_model::clear() { - m_total_syms = 0; - m_codes.clear(); - m_code_sizes.clear(); - if (m_pDecode_tables) { - crnlib_delete(m_pDecode_tables); - m_pDecode_tables = nullptr; - } - m_encoding = false; -} + for (uint i = 0; i < m_total_syms; i++) + { + uint freq = (m_sym_freq[i] + 1) >> 1; + total_freq += freq; + m_sym_freq[i] = static_cast(freq); + } -bool static_huffman_data_model::init(bool encoding, uint total_syms, const uint16* pSym_freq, uint code_size_limit) { - CRNLIB_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); + m_total_count = total_freq; + } - m_encoding = encoding; + void adaptive_huffman_data_model::update() + { + m_total_count += m_update_cycle; - m_total_syms = total_syms; + if (m_total_count >= 32768) + { + rescale(); + } - code_size_limit = math::minimum(code_size_limit, prefix_coding::cMaxExpectedCodeSize); + void* pTables = create_generate_huffman_codes_tables(); - m_code_sizes.resize(total_syms); + uint max_code_size, total_freq; + bool status = generate_huffman_codes(pTables, m_total_syms, &m_sym_freq[0], &m_code_sizes[0], max_code_size, total_freq); + CRNLIB_ASSERT(status); + CRNLIB_ASSERT(total_freq == m_total_count); - void* pTables = create_generate_huffman_codes_tables(); + if (max_code_size > prefix_coding::cMaxExpectedCodeSize) + { + prefix_coding::limit_max_code_size(m_total_syms, &m_code_sizes[0], prefix_coding::cMaxExpectedCodeSize); + } - uint max_code_size = 0, total_freq; - bool status = generate_huffman_codes(pTables, m_total_syms, pSym_freq, &m_code_sizes[0], max_code_size, total_freq); + free_generate_huffman_codes_tables(pTables); - free_generate_huffman_codes_tables(pTables); + if (m_encoding) + { + status = prefix_coding::generate_codes(m_total_syms, &m_code_sizes[0], &m_codes[0]); + } + else + { + status = prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, m_decoder_table_bits); + } - if (!status) - return false; + CRNLIB_ASSERT(status); + (void)status; - if (max_code_size > code_size_limit) { - if (!prefix_coding::limit_max_code_size(m_total_syms, &m_code_sizes[0], code_size_limit)) - return false; - } + m_update_cycle = (5 * m_update_cycle) >> 2; + uint max_cycle = (m_total_syms + 6) << 3; // this was << 2 - which is ~12% slower but compresses around .5% better - if (m_encoding) { - m_codes.resize(total_syms); + if (m_update_cycle > max_cycle) + { + m_update_cycle = max_cycle; + } - if (m_pDecode_tables) { - crnlib_delete(m_pDecode_tables); - m_pDecode_tables = nullptr; + m_symbols_until_update = m_update_cycle; } - if (!prefix_coding::generate_codes(m_total_syms, &m_code_sizes[0], &m_codes[0])) - return false; - } else { - m_codes.clear(); - - if (!m_pDecode_tables) - m_pDecode_tables = crnlib_new(); - - if (!prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, compute_decoder_table_bits())) - return false; - } - - return true; -} + static_huffman_data_model::static_huffman_data_model() : + m_total_syms(0), + m_pDecode_tables(nullptr), + m_encoding(false) + { + } -bool static_huffman_data_model::init(bool encoding, uint total_syms, const uint* pSym_freq, uint code_size_limit) { - CRNLIB_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); + static_huffman_data_model::static_huffman_data_model(const static_huffman_data_model& other) : + m_total_syms(0), + m_pDecode_tables(nullptr), + m_encoding(false) + { + *this = other; + } - crnlib::vector sym_freq16(total_syms); + static_huffman_data_model::~static_huffman_data_model() + { + if (m_pDecode_tables) + { + crnlib_delete(m_pDecode_tables); + } + } - uint max_freq = 0; - for (uint i = 0; i < total_syms; i++) - max_freq = math::maximum(max_freq, pSym_freq[i]); + static_huffman_data_model& static_huffman_data_model::operator=(const static_huffman_data_model& rhs) + { + if (this == &rhs) + { + return *this; + } + + m_total_syms = rhs.m_total_syms; + m_codes = rhs.m_codes; + m_code_sizes = rhs.m_code_sizes; + + if (rhs.m_pDecode_tables) + { + if (m_pDecode_tables) + { + *m_pDecode_tables = *rhs.m_pDecode_tables; + } + else + { + m_pDecode_tables = crnlib_new(*rhs.m_pDecode_tables); + } + } + else + { + crnlib_delete(m_pDecode_tables); + m_pDecode_tables = nullptr; + } + + m_encoding = rhs.m_encoding; + + return *this; + } - if (!max_freq) - return false; + void static_huffman_data_model::clear() + { + m_total_syms = 0; + m_codes.clear(); + m_code_sizes.clear(); + if (m_pDecode_tables) + { + crnlib_delete(m_pDecode_tables); + m_pDecode_tables = nullptr; + } + m_encoding = false; + } - if (max_freq <= cUINT16_MAX) { - for (uint i = 0; i < total_syms; i++) - sym_freq16[i] = static_cast(pSym_freq[i]); - } else { - for (uint i = 0; i < total_syms; i++) { - uint f = pSym_freq[i]; - if (!f) - continue; + bool static_huffman_data_model::init(bool encoding, uint total_syms, const uint16* pSym_freq, uint code_size_limit) + { + CRNLIB_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); - uint64 fl = f; + m_encoding = encoding; - fl = ((fl << 16) - fl) + (max_freq >> 1); - fl /= max_freq; - if (fl < 1) - fl = 1; + m_total_syms = total_syms; - CRNLIB_ASSERT(fl <= cUINT16_MAX); + code_size_limit = math::minimum(code_size_limit, prefix_coding::cMaxExpectedCodeSize); - sym_freq16[i] = static_cast(fl); - } - } + m_code_sizes.resize(total_syms); - return init(encoding, total_syms, &sym_freq16[0], code_size_limit); -} + void* pTables = create_generate_huffman_codes_tables(); -bool static_huffman_data_model::init(bool encoding, uint total_syms, const uint8* pCode_sizes, uint code_size_limit) { - CRNLIB_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); + uint max_code_size = 0, total_freq; + bool status = generate_huffman_codes(pTables, m_total_syms, pSym_freq, &m_code_sizes[0], max_code_size, total_freq); - m_encoding = encoding; + free_generate_huffman_codes_tables(pTables); - code_size_limit = math::minimum(code_size_limit, prefix_coding::cMaxExpectedCodeSize); + if (!status) + { + return false; + } - m_code_sizes.resize(total_syms); + if (max_code_size > code_size_limit) + { + if (!prefix_coding::limit_max_code_size(m_total_syms, &m_code_sizes[0], code_size_limit)) + { + return false; + } + } - uint min_code_size = UINT_MAX; - uint max_code_size = 0; + if (m_encoding) + { + m_codes.resize(total_syms); - for (uint i = 0; i < total_syms; i++) { - uint s = pCode_sizes[i]; - m_code_sizes[i] = static_cast(s); - min_code_size = math::minimum(min_code_size, s); - max_code_size = math::maximum(max_code_size, s); - } + if (m_pDecode_tables) + { + crnlib_delete(m_pDecode_tables); + m_pDecode_tables = nullptr; + } - if ((max_code_size < 1) || (max_code_size > 32) || (min_code_size > code_size_limit)) - return false; + if (!prefix_coding::generate_codes(m_total_syms, &m_code_sizes[0], &m_codes[0])) + { + return false; + } + } + else + { + m_codes.clear(); - if (max_code_size > code_size_limit) { - if (!prefix_coding::limit_max_code_size(m_total_syms, &m_code_sizes[0], code_size_limit)) - return false; - } + if (!m_pDecode_tables) + { + m_pDecode_tables = crnlib_new(); + } - if (m_encoding) { - m_codes.resize(total_syms); + if (!prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, compute_decoder_table_bits())) + { + return false; + } + } - if (m_pDecode_tables) { - crnlib_delete(m_pDecode_tables); - m_pDecode_tables = nullptr; + return true; } - if (!prefix_coding::generate_codes(m_total_syms, &m_code_sizes[0], &m_codes[0])) - return false; - } else { - m_codes.clear(); - - if (!m_pDecode_tables) - m_pDecode_tables = crnlib_new(); - - if (!prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, compute_decoder_table_bits())) - return false; - } - - return true; -} + bool static_huffman_data_model::init(bool encoding, uint total_syms, const uint* pSym_freq, uint code_size_limit) + { + CRNLIB_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); + + crnlib::vector sym_freq16(total_syms); + + uint max_freq = 0; + for (uint i = 0; i < total_syms; i++) + { + max_freq = math::maximum(max_freq, pSym_freq[i]); + } + + if (!max_freq) + { + return false; + } + + if (max_freq <= cUINT16_MAX) + { + for (uint i = 0; i < total_syms; i++) + { + sym_freq16[i] = static_cast(pSym_freq[i]); + } + } + else + { + for (uint i = 0; i < total_syms; i++) + { + uint f = pSym_freq[i]; + if (!f) + { + continue; + } + + uint64 fl = f; + + fl = ((fl << 16) - fl) + (max_freq >> 1); + fl /= max_freq; + if (fl < 1) + { + fl = 1; + } + + CRNLIB_ASSERT(fl <= cUINT16_MAX); + + sym_freq16[i] = static_cast(fl); + } + } + + return init(encoding, total_syms, &sym_freq16[0], code_size_limit); + } -bool static_huffman_data_model::init(bool encoding, const symbol_histogram& hist, uint code_size_limit) { - return init(encoding, hist.size(), hist.get_ptr(), code_size_limit); -} + bool static_huffman_data_model::init(bool encoding, uint total_syms, const uint8* pCode_sizes, uint code_size_limit) + { + CRNLIB_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); + + m_encoding = encoding; + + code_size_limit = math::minimum(code_size_limit, prefix_coding::cMaxExpectedCodeSize); + + m_code_sizes.resize(total_syms); + + uint min_code_size = UINT_MAX; + uint max_code_size = 0; + + for (uint i = 0; i < total_syms; i++) + { + uint s = pCode_sizes[i]; + m_code_sizes[i] = static_cast(s); + min_code_size = math::minimum(min_code_size, s); + max_code_size = math::maximum(max_code_size, s); + } + + if ((max_code_size < 1) || (max_code_size > 32) || (min_code_size > code_size_limit)) + { + return false; + } + + if (max_code_size > code_size_limit) + { + if (!prefix_coding::limit_max_code_size(m_total_syms, &m_code_sizes[0], code_size_limit)) + { + return false; + } + } + + if (m_encoding) + { + m_codes.resize(total_syms); + + if (m_pDecode_tables) + { + crnlib_delete(m_pDecode_tables); + m_pDecode_tables = nullptr; + } + + if (!prefix_coding::generate_codes(m_total_syms, &m_code_sizes[0], &m_codes[0])) + { + return false; + } + } + else + { + m_codes.clear(); + + if (!m_pDecode_tables) + { + m_pDecode_tables = crnlib_new(); + } + + if (!prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, compute_decoder_table_bits())) + { + return false; + } + } + + return true; + } -bool static_huffman_data_model::prepare_decoder_tables() { - uint total_syms = m_code_sizes.size(); + bool static_huffman_data_model::init(bool encoding, const symbol_histogram& hist, uint code_size_limit) + { + return init(encoding, hist.size(), hist.get_ptr(), code_size_limit); + } - CRNLIB_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms)); + bool static_huffman_data_model::prepare_decoder_tables() + { + uint total_syms = m_code_sizes.size(); - m_encoding = false; + CRNLIB_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms)); - m_total_syms = total_syms; + m_encoding = false; - m_codes.clear(); + m_total_syms = total_syms; - if (!m_pDecode_tables) - m_pDecode_tables = crnlib_new(); + m_codes.clear(); - return prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, compute_decoder_table_bits()); -} + if (!m_pDecode_tables) + { + m_pDecode_tables = crnlib_new(); + } -uint static_huffman_data_model::compute_decoder_table_bits() const { - uint decoder_table_bits = 0; - if (m_total_syms > 16) - decoder_table_bits = static_cast(math::minimum(1 + math::ceil_log2i(m_total_syms), prefix_coding::cMaxTableBits)); - return decoder_table_bits; -} + return prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, compute_decoder_table_bits()); + } -adaptive_bit_model::adaptive_bit_model() { - clear(); -} + uint static_huffman_data_model::compute_decoder_table_bits() const + { + uint decoder_table_bits = 0; + if (m_total_syms > 16) + { + decoder_table_bits = static_cast(math::minimum(1 + math::ceil_log2i(m_total_syms), prefix_coding::cMaxTableBits)); + } + return decoder_table_bits; + } -adaptive_bit_model::adaptive_bit_model(float prob0) { - set_probability_0(prob0); -} + adaptive_bit_model::adaptive_bit_model() + { + clear(); + } -adaptive_bit_model::adaptive_bit_model(const adaptive_bit_model& other) - : m_bit_0_prob(other.m_bit_0_prob) { -} + adaptive_bit_model::adaptive_bit_model(float prob0) + { + set_probability_0(prob0); + } -adaptive_bit_model& adaptive_bit_model::operator=(const adaptive_bit_model& rhs) { - m_bit_0_prob = rhs.m_bit_0_prob; - return *this; -} + adaptive_bit_model::adaptive_bit_model(const adaptive_bit_model& other) : + m_bit_0_prob(other.m_bit_0_prob) + { + } -void adaptive_bit_model::clear() { - m_bit_0_prob = 1U << (cSymbolCodecArithProbBits - 1); -} + adaptive_bit_model& adaptive_bit_model::operator=(const adaptive_bit_model& rhs) + { + m_bit_0_prob = rhs.m_bit_0_prob; + return *this; + } -void adaptive_bit_model::set_probability_0(float prob0) { - m_bit_0_prob = static_cast(math::clamp((uint)(prob0 * cSymbolCodecArithProbScale), 1, cSymbolCodecArithProbScale - 1)); -} + void adaptive_bit_model::clear() + { + m_bit_0_prob = 1U << (cSymbolCodecArithProbBits - 1); + } -float adaptive_bit_model::get_cost(uint bit) const { - return gProbCost[bit ? (cSymbolCodecArithProbScale - m_bit_0_prob) : m_bit_0_prob]; -} + void adaptive_bit_model::set_probability_0(float prob0) + { + m_bit_0_prob = static_cast(math::clamp((uint)(prob0 * cSymbolCodecArithProbScale), 1, cSymbolCodecArithProbScale - 1)); + } -void adaptive_bit_model::update(uint bit) { - if (!bit) - m_bit_0_prob += ((cSymbolCodecArithProbScale - m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); - else - m_bit_0_prob -= (m_bit_0_prob >> cSymbolCodecArithProbMoveBits); - CRNLIB_ASSERT(m_bit_0_prob >= 1); - CRNLIB_ASSERT(m_bit_0_prob < cSymbolCodecArithProbScale); -} + float adaptive_bit_model::get_cost(uint bit) const + { + return gProbCost[bit ? (cSymbolCodecArithProbScale - m_bit_0_prob) : m_bit_0_prob]; + } -adaptive_arith_data_model::adaptive_arith_data_model(bool encoding, uint total_syms) { - init(encoding, total_syms); -} + void adaptive_bit_model::update(uint bit) + { + if (!bit) + { + m_bit_0_prob += ((cSymbolCodecArithProbScale - m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); + } + else + { + m_bit_0_prob -= (m_bit_0_prob >> cSymbolCodecArithProbMoveBits); + } + CRNLIB_ASSERT(m_bit_0_prob >= 1); + CRNLIB_ASSERT(m_bit_0_prob < cSymbolCodecArithProbScale); + } -adaptive_arith_data_model::adaptive_arith_data_model(const adaptive_arith_data_model& other) { - m_total_syms = other.m_total_syms; - m_probs = other.m_probs; -} + adaptive_arith_data_model::adaptive_arith_data_model(bool encoding, uint total_syms) + { + init(encoding, total_syms); + } -adaptive_arith_data_model::~adaptive_arith_data_model() { -} + adaptive_arith_data_model::adaptive_arith_data_model(const adaptive_arith_data_model& other) + { + m_total_syms = other.m_total_syms; + m_probs = other.m_probs; + } -adaptive_arith_data_model& adaptive_arith_data_model::operator=(const adaptive_arith_data_model& rhs) { - m_total_syms = rhs.m_total_syms; - m_probs = rhs.m_probs; - return *this; -} + adaptive_arith_data_model::~adaptive_arith_data_model() + { + } -void adaptive_arith_data_model::clear() { - m_total_syms = 0; - m_probs.clear(); -} + adaptive_arith_data_model& adaptive_arith_data_model::operator=(const adaptive_arith_data_model& rhs) + { + m_total_syms = rhs.m_total_syms; + m_probs = rhs.m_probs; + return *this; + } -void adaptive_arith_data_model::init(bool, uint total_syms) { - if (!total_syms) { - clear(); - return; - } + void adaptive_arith_data_model::clear() + { + m_total_syms = 0; + m_probs.clear(); + } - if ((total_syms < 2) || (!math::is_power_of_2(total_syms))) - total_syms = math::next_pow2(total_syms); + void adaptive_arith_data_model::init(bool, uint total_syms) + { + if (!total_syms) + { + clear(); + return; + } - m_total_syms = total_syms; + if ((total_syms < 2) || (!math::is_power_of_2(total_syms))) + { + total_syms = math::next_pow2(total_syms); + } - m_probs.resize(m_total_syms); -} + m_total_syms = total_syms; -void adaptive_arith_data_model::reset() { - for (uint i = 0; i < m_probs.size(); i++) - m_probs[i].clear(); -} + m_probs.resize(m_total_syms); + } -float adaptive_arith_data_model::get_cost(uint sym) const { - uint node = 1; + void adaptive_arith_data_model::reset() + { + for (uint i = 0; i < m_probs.size(); i++) + { + m_probs[i].clear(); + } + } - uint bitmask = m_total_syms; + float adaptive_arith_data_model::get_cost(uint sym) const + { + uint node = 1; - float cost = 0.0f; - do { - bitmask >>= 1; - - uint bit = (sym & bitmask) ? 1 : 0; - cost += m_probs[node].get_cost(bit); - node = (node << 1) + bit; - - } while (bitmask > 1); - - return cost; -} + uint bitmask = m_total_syms; -symbol_codec::symbol_codec() { - clear(); -} + float cost = 0.0f; + do + { + bitmask >>= 1; -void symbol_codec::clear() { - m_pDecode_buf = nullptr; - m_pDecode_buf_next = nullptr; - m_pDecode_buf_end = nullptr; - m_decode_buf_size = 0; + uint bit = (sym & bitmask) ? 1 : 0; + cost += m_probs[node].get_cost(bit); + node = (node << 1) + bit; + } while (bitmask > 1); - m_bit_buf = 0; - m_bit_count = 0; - m_total_model_updates = 0; - m_mode = cNull; - m_simulate_encoding = false; - m_total_bits_written = 0; + return cost; + } - m_arith_base = 0; - m_arith_value = 0; - m_arith_length = 0; - m_arith_total_bits = 0; + symbol_codec::symbol_codec() + { + clear(); + } - m_output_buf.clear(); - m_arith_output_buf.clear(); - m_output_syms.clear(); -} + void symbol_codec::clear() + { + m_pDecode_buf = nullptr; + m_pDecode_buf_next = nullptr; + m_pDecode_buf_end = nullptr; + m_decode_buf_size = 0; + + m_bit_buf = 0; + m_bit_count = 0; + m_total_model_updates = 0; + m_mode = cNull; + m_simulate_encoding = false; + m_total_bits_written = 0; + + m_arith_base = 0; + m_arith_value = 0; + m_arith_length = 0; + m_arith_total_bits = 0; + + m_output_buf.clear(); + m_arith_output_buf.clear(); + m_output_syms.clear(); + } -void symbol_codec::start_encoding(uint expected_file_size) { - m_mode = cEncoding; + void symbol_codec::start_encoding(uint expected_file_size) + { + m_mode = cEncoding; - m_total_model_updates = 0; - m_total_bits_written = 0; + m_total_model_updates = 0; + m_total_bits_written = 0; - put_bits_init(expected_file_size); + put_bits_init(expected_file_size); - m_output_syms.resize(0); + m_output_syms.resize(0); - arith_start_encoding(); -} + arith_start_encoding(); + } -// Code length encoding symbols: -// 0-16 - actual code lengths -const uint cMaxCodelengthCodes = 21; + // Code length encoding symbols: + // 0-16 - actual code lengths + const uint cMaxCodelengthCodes = 21; -const uint cSmallZeroRunCode = 17; -const uint cLargeZeroRunCode = 18; -const uint cSmallRepeatCode = 19; -const uint cLargeRepeatCode = 20; + const uint cSmallZeroRunCode = 17; + const uint cLargeZeroRunCode = 18; + const uint cSmallRepeatCode = 19; + const uint cLargeRepeatCode = 20; -const uint cMinSmallZeroRunSize = 3; -const uint cMaxSmallZeroRunSize = 10; -const uint cMinLargeZeroRunSize = 11; -const uint cMaxLargeZeroRunSize = 138; + const uint cMinSmallZeroRunSize = 3; + const uint cMaxSmallZeroRunSize = 10; + const uint cMinLargeZeroRunSize = 11; + const uint cMaxLargeZeroRunSize = 138; -const uint cSmallMinNonZeroRunSize = 3; -const uint cSmallMaxNonZeroRunSize = 6; -const uint cLargeMinNonZeroRunSize = 7; -const uint cLargeMaxNonZeroRunSize = 70; + const uint cSmallMinNonZeroRunSize = 3; + const uint cSmallMaxNonZeroRunSize = 6; + const uint cLargeMinNonZeroRunSize = 7; + const uint cLargeMaxNonZeroRunSize = 70; -const uint cSmallZeroRunExtraBits = 3; -const uint cLargeZeroRunExtraBits = 7; -const uint cSmallNonZeroRunExtraBits = 2; -const uint cLargeNonZeroRunExtraBits = 6; + const uint cSmallZeroRunExtraBits = 3; + const uint cLargeZeroRunExtraBits = 7; + const uint cSmallNonZeroRunExtraBits = 2; + const uint cLargeNonZeroRunExtraBits = 6; -static const uint8 g_most_probable_codelength_codes[] = - { + static const uint8 g_most_probable_codelength_codes[] = { cSmallZeroRunCode, cLargeZeroRunCode, cSmallRepeatCode, cLargeRepeatCode, @@ -602,1066 +783,1389 @@ static const uint8 g_most_probable_codelength_codes[] = 3, 13, 2, 14, 1, 15, - 16}; -const uint cNumMostProbableCodelengthCodes = sizeof(g_most_probable_codelength_codes) / sizeof(g_most_probable_codelength_codes[0]); - -static inline void end_zero_run(uint& size, crnlib::vector& codes) { - if (!size) - return; - - if (size < cMinSmallZeroRunSize) { - while (size--) - codes.push_back(0); - } else if (size <= cMaxSmallZeroRunSize) - codes.push_back(static_cast(cSmallZeroRunCode | ((size - cMinSmallZeroRunSize) << 8))); - else { - CRNLIB_ASSERT((size >= cMinLargeZeroRunSize) && (size <= cMaxLargeZeroRunSize)); - codes.push_back(static_cast(cLargeZeroRunCode | ((size - cMinLargeZeroRunSize) << 8))); - } - - size = 0; -} - -static inline void end_nonzero_run(uint& size, uint len, crnlib::vector& codes) { - if (!size) - return; - - if (size < cSmallMinNonZeroRunSize) { - while (size--) - codes.push_back(static_cast(len)); - } else if (size <= cSmallMaxNonZeroRunSize) { - codes.push_back(static_cast(cSmallRepeatCode | ((size - cSmallMinNonZeroRunSize) << 8))); - } else { - CRNLIB_ASSERT((size >= cLargeMinNonZeroRunSize) && (size <= cLargeMaxNonZeroRunSize)); - codes.push_back(static_cast(cLargeRepeatCode | ((size - cLargeMinNonZeroRunSize) << 8))); - } - - size = 0; -} + 16 + }; + const uint cNumMostProbableCodelengthCodes = sizeof(g_most_probable_codelength_codes) / sizeof(g_most_probable_codelength_codes[0]); -uint symbol_codec::encode_transmit_static_huffman_data_model(static_huffman_data_model& model, bool simulate = false, static_huffman_data_model* pDeltaModel) { - CRNLIB_ASSERT(m_mode == cEncoding); - - uint total_used_syms = 0; - for (uint i = model.m_total_syms; i > 0; i--) { - if (model.m_code_sizes[i - 1]) { - total_used_syms = i; - break; - } - } - - if (!total_used_syms) { - if (!simulate) { - encode_bits(0, math::total_bits(prefix_coding::cMaxSupportedSyms)); - } - - return math::total_bits(prefix_coding::cMaxSupportedSyms); - } - - crnlib::vector codes; - codes.reserve(model.m_total_syms); - - uint prev_len = UINT_MAX; - uint cur_zero_run_size = 0; - uint cur_nonzero_run_size = 0; - - const uint8* pCodesizes = &model.m_code_sizes[0]; - - crnlib::vector delta_code_sizes; - if ((pDeltaModel) && (pDeltaModel->get_total_syms())) { - if (pDeltaModel->m_code_sizes.size() < total_used_syms) - return 0; - - delta_code_sizes.resize(total_used_syms); - for (uint i = 0; i < total_used_syms; i++) { - int delta = (int)model.m_code_sizes[i] - (int)pDeltaModel->m_code_sizes[i]; - if (delta < 0) - delta += 17; - delta_code_sizes[i] = static_cast(delta); + static inline void end_zero_run(uint& size, crnlib::vector& codes) + { + if (!size) + { + return; + } + + if (size < cMinSmallZeroRunSize) + { + while (size--) + { + codes.push_back(0); + } + } + else if (size <= cMaxSmallZeroRunSize) + { + codes.push_back(static_cast(cSmallZeroRunCode | ((size - cMinSmallZeroRunSize) << 8))); + } + else + { + CRNLIB_ASSERT((size >= cMinLargeZeroRunSize) && (size <= cMaxLargeZeroRunSize)); + codes.push_back(static_cast(cLargeZeroRunCode | ((size - cMinLargeZeroRunSize) << 8))); + } + + size = 0; } - pCodesizes = delta_code_sizes.get_ptr(); - } - - for (uint i = 0; i <= total_used_syms; i++) { - const uint len = (i < total_used_syms) ? *pCodesizes++ : 0xFF; - CRNLIB_ASSERT((len == 0xFF) || (len <= prefix_coding::cMaxExpectedCodeSize)); - - if (!len) { - end_nonzero_run(cur_nonzero_run_size, prev_len, codes); - - if (++cur_zero_run_size == cMaxLargeZeroRunSize) - end_zero_run(cur_zero_run_size, codes); - } else { - end_zero_run(cur_zero_run_size, codes); - - if (len != prev_len) { - end_nonzero_run(cur_nonzero_run_size, prev_len, codes); - - if (len != 0xFF) - codes.push_back(static_cast(len)); - } else if (++cur_nonzero_run_size == cLargeMaxNonZeroRunSize) - end_nonzero_run(cur_nonzero_run_size, prev_len, codes); - } - - prev_len = len; - } - - uint16 hist[cMaxCodelengthCodes]; - utils::zero_object(hist); - - for (uint i = 0; i < codes.size(); i++) { - uint code = codes[i] & 0xFF; - CRNLIB_ASSERT(code < cMaxCodelengthCodes); - hist[code] = static_cast(hist[code] + 1); - } - - static_huffman_data_model dm; - if (!dm.init(true, cMaxCodelengthCodes, hist, 7)) - return 0; - - uint num_codelength_codes_to_send; - for (num_codelength_codes_to_send = cNumMostProbableCodelengthCodes; num_codelength_codes_to_send > 0; num_codelength_codes_to_send--) - if (dm.get_cost(g_most_probable_codelength_codes[num_codelength_codes_to_send - 1])) - break; - - uint total_bits = math::total_bits(prefix_coding::cMaxSupportedSyms); - total_bits += 5; - total_bits += 3 * num_codelength_codes_to_send; - - if (!simulate) { - encode_bits(total_used_syms, math::total_bits(prefix_coding::cMaxSupportedSyms)); - - encode_bits(num_codelength_codes_to_send, 5); - for (uint i = 0; i < num_codelength_codes_to_send; i++) - encode_bits(dm.get_cost(g_most_probable_codelength_codes[i]), 3); - } - - for (uint i = 0; i < codes.size(); i++) { - uint code = codes[i]; - uint extra = code >> 8; - code &= 0xFF; - - uint extra_bits = 0; - if (code == cSmallZeroRunCode) - extra_bits = cSmallZeroRunExtraBits; - else if (code == cLargeZeroRunCode) - extra_bits = cLargeZeroRunExtraBits; - else if (code == cSmallRepeatCode) - extra_bits = cSmallNonZeroRunExtraBits; - else if (code == cLargeRepeatCode) - extra_bits = cLargeNonZeroRunExtraBits; - - total_bits += dm.get_cost(code); - - if (!simulate) - encode(code, dm); - - if (extra_bits) { - if (!simulate) - encode_bits(extra, extra_bits); - - total_bits += extra_bits; + static inline void end_nonzero_run(uint& size, uint len, crnlib::vector& codes) + { + if (!size) + { + return; + } + + if (size < cSmallMinNonZeroRunSize) + { + while (size--) + { + codes.push_back(static_cast(len)); + } + } + else if (size <= cSmallMaxNonZeroRunSize) + { + codes.push_back(static_cast(cSmallRepeatCode | ((size - cSmallMinNonZeroRunSize) << 8))); + } + else + { + CRNLIB_ASSERT((size >= cLargeMinNonZeroRunSize) && (size <= cLargeMaxNonZeroRunSize)); + codes.push_back(static_cast(cLargeRepeatCode | ((size - cLargeMinNonZeroRunSize) << 8))); + } + + size = 0; } - } - - return total_bits; -} - -void symbol_codec::encode_bits(uint bits, uint num_bits) { - CRNLIB_ASSERT(m_mode == cEncoding); - - if (!num_bits) - return; - - CRNLIB_ASSERT((num_bits == 32) || (bits <= ((1U << num_bits) - 1))); - - if (num_bits > 16) { - record_put_bits(bits >> 16, num_bits - 16); - record_put_bits(bits & 0xFFFF, 16); - } else - record_put_bits(bits, num_bits); -} - -void symbol_codec::encode_align_to_byte() { - CRNLIB_ASSERT(m_mode == cEncoding); - - if (!m_simulate_encoding) { - output_symbol sym; - sym.m_bits = 0; - sym.m_num_bits = output_symbol::cAlignToByteSym; - sym.m_arith_prob0 = 0; - m_output_syms.push_back(sym); - } else { - // We really don't know how many we're going to write, so just be conservative. - m_total_bits_written += 7; - } -} - -void symbol_codec::encode(uint sym, adaptive_huffman_data_model& model) { - CRNLIB_ASSERT(m_mode == cEncoding); - CRNLIB_ASSERT(model.m_encoding); - - record_put_bits(model.m_codes[sym], model.m_code_sizes[sym]); - - uint freq = model.m_sym_freq[sym]; - freq++; - model.m_sym_freq[sym] = static_cast(freq); - - if (freq == cUINT16_MAX) - model.rescale(); - if (--model.m_symbols_until_update == 0) { - m_total_model_updates++; - model.update(); - } -} - -void symbol_codec::encode(uint sym, static_huffman_data_model& model) { - CRNLIB_ASSERT(m_mode == cEncoding); - CRNLIB_ASSERT(model.m_encoding); - - CRNLIB_ASSERT(model.m_code_sizes[sym]); - - record_put_bits(model.m_codes[sym], model.m_code_sizes[sym]); -} - -void symbol_codec::encode_truncated_binary(uint v, uint n) { - CRNLIB_ASSERT((n >= 2) && (v < n)); - - uint k = math::floor_log2i(n); - uint u = (1 << (k + 1)) - n; - - if (v < u) - encode_bits(v, k); - else - encode_bits(v + u, k + 1); -} - -uint symbol_codec::encode_truncated_binary_cost(uint v, uint n) { - CRNLIB_ASSERT((n >= 2) && (v < n)); - - uint k = math::floor_log2i(n); - uint u = (1 << (k + 1)) - n; - - if (v < u) - return k; - else - return k + 1; -} - -void symbol_codec::encode_golomb(uint v, uint m) { - CRNLIB_ASSERT(m > 0); - - uint q = v / m; - uint r = v % m; - - while (q > 16) { - encode_bits(0xFFFF, 16); - q -= 16; - } - - if (q) - encode_bits((1 << q) - 1, q); - - encode_bits(0, 1); - - encode_truncated_binary(r, m); -} - -void symbol_codec::encode_rice(uint v, uint m) { - CRNLIB_ASSERT(m > 0); - - uint q = v >> m; - uint r = v & ((1 << m) - 1); - - while (q > 16) { - encode_bits(0xFFFF, 16); - q -= 16; - } - - if (q) - encode_bits((1 << q) - 1, q); - - encode_bits(0, 1); - - encode_bits(r, m); -} - -uint symbol_codec::encode_rice_get_cost(uint v, uint m) { - CRNLIB_ASSERT(m > 0); - - uint q = v >> m; - //uint r = v & ((1 << m) - 1); - - return q + 1 + m; -} - -void symbol_codec::arith_propagate_carry() { - int index = m_arith_output_buf.size() - 1; - while (index >= 0) { - uint c = m_arith_output_buf[index]; - - if (c == 0xFF) - m_arith_output_buf[index] = 0; - else { - m_arith_output_buf[index]++; - break; + uint symbol_codec::encode_transmit_static_huffman_data_model(static_huffman_data_model& model, bool simulate = false, static_huffman_data_model* pDeltaModel) + { + CRNLIB_ASSERT(m_mode == cEncoding); + + uint total_used_syms = 0; + for (uint i = model.m_total_syms; i > 0; i--) + { + if (model.m_code_sizes[i - 1]) + { + total_used_syms = i; + break; + } + } + + if (!total_used_syms) + { + if (!simulate) + { + encode_bits(0, math::total_bits(prefix_coding::cMaxSupportedSyms)); + } + + return math::total_bits(prefix_coding::cMaxSupportedSyms); + } + + crnlib::vector codes; + codes.reserve(model.m_total_syms); + + uint prev_len = UINT_MAX; + uint cur_zero_run_size = 0; + uint cur_nonzero_run_size = 0; + + const uint8* pCodesizes = &model.m_code_sizes[0]; + + crnlib::vector delta_code_sizes; + if ((pDeltaModel) && (pDeltaModel->get_total_syms())) + { + if (pDeltaModel->m_code_sizes.size() < total_used_syms) + { + return 0; + } + + delta_code_sizes.resize(total_used_syms); + for (uint i = 0; i < total_used_syms; i++) + { + int delta = (int)model.m_code_sizes[i] - (int)pDeltaModel->m_code_sizes[i]; + if (delta < 0) + { + delta += 17; + } + delta_code_sizes[i] = static_cast(delta); + } + + pCodesizes = delta_code_sizes.get_ptr(); + } + + for (uint i = 0; i <= total_used_syms; i++) + { + const uint len = (i < total_used_syms) ? *pCodesizes++ : 0xFF; + CRNLIB_ASSERT((len == 0xFF) || (len <= prefix_coding::cMaxExpectedCodeSize)); + + if (!len) + { + end_nonzero_run(cur_nonzero_run_size, prev_len, codes); + + if (++cur_zero_run_size == cMaxLargeZeroRunSize) + { + end_zero_run(cur_zero_run_size, codes); + } + } + else + { + end_zero_run(cur_zero_run_size, codes); + + if (len != prev_len) + { + end_nonzero_run(cur_nonzero_run_size, prev_len, codes); + + if (len != 0xFF) + { + codes.push_back(static_cast(len)); + } + } + else if (++cur_nonzero_run_size == cLargeMaxNonZeroRunSize) + { + end_nonzero_run(cur_nonzero_run_size, prev_len, codes); + } + } + + prev_len = len; + } + + uint16 hist[cMaxCodelengthCodes]; + utils::zero_object(hist); + + for (uint i = 0; i < codes.size(); i++) + { + uint code = codes[i] & 0xFF; + CRNLIB_ASSERT(code < cMaxCodelengthCodes); + hist[code] = static_cast(hist[code] + 1); + } + + static_huffman_data_model dm; + if (!dm.init(true, cMaxCodelengthCodes, hist, 7)) + { + return 0; + } + + uint num_codelength_codes_to_send; + for (num_codelength_codes_to_send = cNumMostProbableCodelengthCodes; num_codelength_codes_to_send > 0; num_codelength_codes_to_send--) + { + if (dm.get_cost(g_most_probable_codelength_codes[num_codelength_codes_to_send - 1])) + { + break; + } + } + + uint total_bits = math::total_bits(prefix_coding::cMaxSupportedSyms); + total_bits += 5; + total_bits += 3 * num_codelength_codes_to_send; + + if (!simulate) + { + encode_bits(total_used_syms, math::total_bits(prefix_coding::cMaxSupportedSyms)); + + encode_bits(num_codelength_codes_to_send, 5); + for (uint i = 0; i < num_codelength_codes_to_send; i++) + { + encode_bits(dm.get_cost(g_most_probable_codelength_codes[i]), 3); + } + } + + for (uint i = 0; i < codes.size(); i++) + { + uint code = codes[i]; + uint extra = code >> 8; + code &= 0xFF; + + uint extra_bits = 0; + if (code == cSmallZeroRunCode) + { + extra_bits = cSmallZeroRunExtraBits; + } + else if (code == cLargeZeroRunCode) + { + extra_bits = cLargeZeroRunExtraBits; + } + else if (code == cSmallRepeatCode) + { + extra_bits = cSmallNonZeroRunExtraBits; + } + else if (code == cLargeRepeatCode) + { + extra_bits = cLargeNonZeroRunExtraBits; + } + + total_bits += dm.get_cost(code); + + if (!simulate) + { + encode(code, dm); + } + + if (extra_bits) + { + if (!simulate) + { + encode_bits(extra, extra_bits); + } + + total_bits += extra_bits; + } + } + + return total_bits; } - index--; - } -} - -void symbol_codec::arith_renorm_enc_interval() { - do { - m_arith_output_buf.push_back((m_arith_base >> 24) & 0xFF); - m_total_bits_written += 8; - - m_arith_base <<= 8; - } while ((m_arith_length <<= 8) < cSymbolCodecArithMinLen); -} - -void symbol_codec::arith_start_encoding() { - m_arith_output_buf.resize(0); - - m_arith_base = 0; - m_arith_value = 0; - m_arith_length = cSymbolCodecArithMaxLen; - m_arith_total_bits = 0; -} - -void symbol_codec::encode(uint bit, adaptive_bit_model& model, bool update_model) { - CRNLIB_ASSERT(m_mode == cEncoding); - - m_arith_total_bits++; - - if (!m_simulate_encoding) { - output_symbol sym; - sym.m_bits = bit; - sym.m_num_bits = -1; - sym.m_arith_prob0 = model.m_bit_0_prob; - m_output_syms.push_back(sym); - } - - //uint x = gArithProbMulTab[model.m_bit_0_prob >> (cSymbolCodecArithProbBits - cSymbolCodecArithProbMulBits)][m_arith_length >> (32 - cSymbolCodecArithProbMulLenSigBits)] << 16; - uint x = model.m_bit_0_prob * (m_arith_length >> cSymbolCodecArithProbBits); - - if (!bit) { - if (update_model) - model.m_bit_0_prob += ((cSymbolCodecArithProbScale - model.m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); - - m_arith_length = x; - } else { - if (update_model) - model.m_bit_0_prob -= (model.m_bit_0_prob >> cSymbolCodecArithProbMoveBits); - - uint orig_base = m_arith_base; - m_arith_base += x; - m_arith_length -= x; - if (orig_base > m_arith_base) - arith_propagate_carry(); - } - - if (m_arith_length < cSymbolCodecArithMinLen) - arith_renorm_enc_interval(); -} - -void symbol_codec::encode(uint sym, adaptive_arith_data_model& model) { - uint node = 1; - - uint bitmask = model.m_total_syms; - - do { - bitmask >>= 1; - - uint bit = (sym & bitmask) ? 1 : 0; - encode(bit, model.m_probs[node]); - node = (node << 1) + bit; - - } while (bitmask > 1); -} - -void symbol_codec::arith_stop_encoding() { - if (!m_arith_total_bits) - return; - - uint orig_base = m_arith_base; + void symbol_codec::encode_bits(uint bits, uint num_bits) + { + CRNLIB_ASSERT(m_mode == cEncoding); + + if (!num_bits) + { + return; + } + + CRNLIB_ASSERT((num_bits == 32) || (bits <= ((1U << num_bits) - 1))); + + if (num_bits > 16) + { + record_put_bits(bits >> 16, num_bits - 16); + record_put_bits(bits & 0xFFFF, 16); + } + else + { + record_put_bits(bits, num_bits); + } + } - if (m_arith_length > 2 * cSymbolCodecArithMinLen) { - m_arith_base += cSymbolCodecArithMinLen; - m_arith_length = (cSymbolCodecArithMinLen >> 1); - } else { - m_arith_base += (cSymbolCodecArithMinLen >> 1); - m_arith_length = (cSymbolCodecArithMinLen >> 9); - } + void symbol_codec::encode_align_to_byte() + { + CRNLIB_ASSERT(m_mode == cEncoding); + + if (!m_simulate_encoding) + { + output_symbol sym; + sym.m_bits = 0; + sym.m_num_bits = output_symbol::cAlignToByteSym; + sym.m_arith_prob0 = 0; + m_output_syms.push_back(sym); + } + else + { + // We really don't know how many we're going to write, so just be conservative. + m_total_bits_written += 7; + } + } - if (orig_base > m_arith_base) - arith_propagate_carry(); + void symbol_codec::encode(uint sym, adaptive_huffman_data_model& model) + { + CRNLIB_ASSERT(m_mode == cEncoding); + CRNLIB_ASSERT(model.m_encoding); - arith_renorm_enc_interval(); + record_put_bits(model.m_codes[sym], model.m_code_sizes[sym]); - while (m_arith_output_buf.size() < 4) { - m_arith_output_buf.push_back(0); - m_total_bits_written += 8; - } -} + uint freq = model.m_sym_freq[sym]; + freq++; + model.m_sym_freq[sym] = static_cast(freq); -void symbol_codec::stop_encoding(bool support_arith) { - CRNLIB_ASSERT(m_mode == cEncoding); + if (freq == cUINT16_MAX) + { + model.rescale(); + } - arith_stop_encoding(); + if (--model.m_symbols_until_update == 0) + { + m_total_model_updates++; + model.update(); + } + } - if (!m_simulate_encoding) - assemble_output_buf(support_arith); + void symbol_codec::encode(uint sym, static_huffman_data_model& model) + { + CRNLIB_ASSERT(m_mode == cEncoding); + CRNLIB_ASSERT(model.m_encoding); - m_mode = cNull; -} + CRNLIB_ASSERT(model.m_code_sizes[sym]); -void symbol_codec::record_put_bits(uint bits, uint num_bits) { - CRNLIB_ASSERT(m_mode == cEncoding); + record_put_bits(model.m_codes[sym], model.m_code_sizes[sym]); + } - CRNLIB_ASSERT(num_bits <= 25); - CRNLIB_ASSERT(m_bit_count >= 25); + void symbol_codec::encode_truncated_binary(uint v, uint n) + { + CRNLIB_ASSERT((n >= 2) && (v < n)); + + uint k = math::floor_log2i(n); + uint u = (1 << (k + 1)) - n; + + if (v < u) + { + encode_bits(v, k); + } + else + { + encode_bits(v + u, k + 1); + } + } - if (!num_bits) - return; + uint symbol_codec::encode_truncated_binary_cost(uint v, uint n) + { + CRNLIB_ASSERT((n >= 2) && (v < n)); + + uint k = math::floor_log2i(n); + uint u = (1 << (k + 1)) - n; + + if (v < u) + { + return k; + } + else + { + return k + 1; + } + } - m_total_bits_written += num_bits; + void symbol_codec::encode_golomb(uint v, uint m) + { + CRNLIB_ASSERT(m > 0); - if (!m_simulate_encoding) { - output_symbol sym; - sym.m_bits = bits; - sym.m_num_bits = (uint16)num_bits; - sym.m_arith_prob0 = 0; - m_output_syms.push_back(sym); - } -} + uint q = v / m; + uint r = v % m; -void symbol_codec::put_bits_init(uint expected_size) { - m_bit_buf = 0; - m_bit_count = cBitBufSize; + while (q > 16) + { + encode_bits(0xFFFF, 16); + q -= 16; + } - m_output_buf.resize(0); - m_output_buf.reserve(expected_size); -} + if (q) + { + encode_bits((1 << q) - 1, q); + } -void symbol_codec::put_bits(uint bits, uint num_bits) { - CRNLIB_ASSERT(num_bits <= 25); - CRNLIB_ASSERT(m_bit_count >= 25); + encode_bits(0, 1); - if (!num_bits) - return; + encode_truncated_binary(r, m); + } - m_bit_count -= num_bits; - m_bit_buf |= (static_cast(bits) << m_bit_count); + void symbol_codec::encode_rice(uint v, uint m) + { + CRNLIB_ASSERT(m > 0); - m_total_bits_written += num_bits; + uint q = v >> m; + uint r = v & ((1 << m) - 1); - while (m_bit_count <= (cBitBufSize - 8)) { - m_output_buf.push_back(static_cast(m_bit_buf >> (cBitBufSize - 8))); + while (q > 16) + { + encode_bits(0xFFFF, 16); + q -= 16; + } - m_bit_buf <<= 8; - m_bit_count += 8; - } -} + if (q) + { + encode_bits((1 << q) - 1, q); + } -void symbol_codec::put_bits_align_to_byte() { - uint num_bits_in = cBitBufSize - m_bit_count; - if (num_bits_in & 7) { - put_bits(0, 8 - (num_bits_in & 7)); - } -} + encode_bits(0, 1); -void symbol_codec::flush_bits() { - //put_bits(15, 4); // for table look-ahead - //put_bits(3, 3); // for table look-ahead + encode_bits(r, m); + } - put_bits(0, 7); // to ensure the last bits are flushed -} + uint symbol_codec::encode_rice_get_cost(uint v, uint m) + { + CRNLIB_ASSERT(m > 0); -void symbol_codec::assemble_output_buf(bool support_arith) { - m_total_bits_written = 0; + uint q = v >> m; + //uint r = v & ((1 << m) - 1); - uint arith_buf_ofs = 0; + return q + 1 + m; + } - if (support_arith) { - if (m_arith_output_buf.size()) { - put_bits(1, 1); - - m_arith_length = cSymbolCodecArithMaxLen; - m_arith_value = 0; - for (uint i = 0; i < 4; i++) { - const uint c = m_arith_output_buf[arith_buf_ofs++]; - m_arith_value = (m_arith_value << 8) | c; - put_bits(c, 8); - } - } else { - put_bits(0, 1); + void symbol_codec::arith_propagate_carry() + { + int index = m_arith_output_buf.size() - 1; + while (index >= 0) + { + uint c = m_arith_output_buf[index]; + + if (c == 0xFF) + { + m_arith_output_buf[index] = 0; + } + else + { + m_arith_output_buf[index]++; + break; + } + + index--; + } } - } - for (uint sym_index = 0; sym_index < m_output_syms.size(); sym_index++) { - const output_symbol& sym = m_output_syms[sym_index]; + void symbol_codec::arith_renorm_enc_interval() + { + do + { + m_arith_output_buf.push_back((m_arith_base >> 24) & 0xFF); + m_total_bits_written += 8; - if (sym.m_num_bits == output_symbol::cAlignToByteSym) { - put_bits_align_to_byte(); - } else if (sym.m_num_bits == output_symbol::cArithSym) { - if (m_arith_length < cSymbolCodecArithMinLen) { - do { - const uint c = (arith_buf_ofs < m_arith_output_buf.size()) ? m_arith_output_buf[arith_buf_ofs++] : 0; - put_bits(c, 8); - m_arith_value = (m_arith_value << 8) | c; + m_arith_base <<= 8; } while ((m_arith_length <<= 8) < cSymbolCodecArithMinLen); - } + } - //uint x = gArithProbMulTab[sym.m_arith_prob0 >> (cSymbolCodecArithProbBits - cSymbolCodecArithProbMulBits)][m_arith_length >> (32 - cSymbolCodecArithProbMulLenSigBits)] << 16; - uint x = sym.m_arith_prob0 * (m_arith_length >> cSymbolCodecArithProbBits); - uint bit = (m_arith_value >= x); + void symbol_codec::arith_start_encoding() + { + m_arith_output_buf.resize(0); - if (bit == 0) { - m_arith_length = x; - } else { - m_arith_value -= x; - m_arith_length -= x; - } + m_arith_base = 0; + m_arith_value = 0; + m_arith_length = cSymbolCodecArithMaxLen; + m_arith_total_bits = 0; + } - CRNLIB_VERIFY(bit == sym.m_bits); - } else { - put_bits(sym.m_bits, sym.m_num_bits); + void symbol_codec::encode(uint bit, adaptive_bit_model& model, bool update_model) + { + CRNLIB_ASSERT(m_mode == cEncoding); + + m_arith_total_bits++; + + if (!m_simulate_encoding) + { + output_symbol sym; + sym.m_bits = bit; + sym.m_num_bits = -1; + sym.m_arith_prob0 = model.m_bit_0_prob; + m_output_syms.push_back(sym); + } + + //uint x = gArithProbMulTab[model.m_bit_0_prob >> (cSymbolCodecArithProbBits - cSymbolCodecArithProbMulBits)][m_arith_length >> (32 - cSymbolCodecArithProbMulLenSigBits)] << 16; + uint x = model.m_bit_0_prob * (m_arith_length >> cSymbolCodecArithProbBits); + + if (!bit) + { + if (update_model) + { + model.m_bit_0_prob += ((cSymbolCodecArithProbScale - model.m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); + } + + m_arith_length = x; + } + else + { + if (update_model) + { + model.m_bit_0_prob -= (model.m_bit_0_prob >> cSymbolCodecArithProbMoveBits); + } + + uint orig_base = m_arith_base; + m_arith_base += x; + m_arith_length -= x; + if (orig_base > m_arith_base) + { + arith_propagate_carry(); + } + } + + if (m_arith_length < cSymbolCodecArithMinLen) + { + arith_renorm_enc_interval(); + } } - } - flush_bits(); -} - -//------------------------------------------------------------------------------------------------------------------ -// Decoding -//------------------------------------------------------------------------------------------------------------------ - -bool symbol_codec::start_decoding(const uint8* pBuf, size_t buf_size, bool eof_flag, need_bytes_func_ptr pNeed_bytes_func, void* pPrivate_data) { - if (!buf_size) - return false; - - m_total_model_updates = 0; - - m_pDecode_buf = pBuf; - m_pDecode_buf_next = pBuf; - m_decode_buf_size = buf_size; - m_pDecode_buf_end = pBuf + buf_size; - - m_pDecode_need_bytes_func = pNeed_bytes_func; - m_pDecode_private_data = pPrivate_data; - m_decode_buf_eof = eof_flag; - if (!pNeed_bytes_func) { - m_decode_buf_eof = true; - } - - m_mode = cDecoding; - - get_bits_init(); - - return true; -} + void symbol_codec::encode(uint sym, adaptive_arith_data_model& model) + { + uint node = 1; -uint symbol_codec::decode_bits(uint num_bits) { - CRNLIB_ASSERT(m_mode == cDecoding); + uint bitmask = model.m_total_syms; - if (!num_bits) - return 0; + do + { + bitmask >>= 1; - if (num_bits > 16) { - uint a = get_bits(num_bits - 16); - uint b = get_bits(16); + uint bit = (sym & bitmask) ? 1 : 0; + encode(bit, model.m_probs[node]); + node = (node << 1) + bit; - return (a << 16) | b; - } else - return get_bits(num_bits); -} + } while (bitmask > 1); + } -void symbol_codec::decode_remove_bits(uint num_bits) { - CRNLIB_ASSERT(m_mode == cDecoding); + void symbol_codec::arith_stop_encoding() + { + if (!m_arith_total_bits) + { + return; + } + + uint orig_base = m_arith_base; + + if (m_arith_length > 2 * cSymbolCodecArithMinLen) + { + m_arith_base += cSymbolCodecArithMinLen; + m_arith_length = (cSymbolCodecArithMinLen >> 1); + } + else + { + m_arith_base += (cSymbolCodecArithMinLen >> 1); + m_arith_length = (cSymbolCodecArithMinLen >> 9); + } + + if (orig_base > m_arith_base) + { + arith_propagate_carry(); + } + + arith_renorm_enc_interval(); + + while (m_arith_output_buf.size() < 4) + { + m_arith_output_buf.push_back(0); + m_total_bits_written += 8; + } + } - while (num_bits > 16) { - remove_bits(16); - num_bits -= 16; - } + void symbol_codec::stop_encoding(bool support_arith) + { + CRNLIB_ASSERT(m_mode == cEncoding); - remove_bits(num_bits); -} + arith_stop_encoding(); -uint symbol_codec::decode_peek_bits(uint num_bits) { - CRNLIB_ASSERT(m_mode == cDecoding); - CRNLIB_ASSERT(num_bits <= 25); + if (!m_simulate_encoding) + { + assemble_output_buf(support_arith); + } - if (!num_bits) - return 0; + m_mode = cNull; + } - while (m_bit_count < (int)num_bits) { - uint c = 0; - if (m_pDecode_buf_next == m_pDecode_buf_end) { - if (!m_decode_buf_eof) { - m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); - m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; - m_pDecode_buf_next = m_pDecode_buf; - if (m_pDecode_buf_next < m_pDecode_buf_end) - c = *m_pDecode_buf_next++; - } - } else - c = *m_pDecode_buf_next++; + void symbol_codec::record_put_bits(uint bits, uint num_bits) + { + CRNLIB_ASSERT(m_mode == cEncoding); + + CRNLIB_ASSERT(num_bits <= 25); + CRNLIB_ASSERT(m_bit_count >= 25); + + if (!num_bits) + { + return; + } + + m_total_bits_written += num_bits; + + if (!m_simulate_encoding) + { + output_symbol sym; + sym.m_bits = bits; + sym.m_num_bits = (uint16)num_bits; + sym.m_arith_prob0 = 0; + m_output_syms.push_back(sym); + } + } - m_bit_count += 8; - CRNLIB_ASSERT(m_bit_count <= cBitBufSize); + void symbol_codec::put_bits_init(uint expected_size) + { + m_bit_buf = 0; + m_bit_count = cBitBufSize; - m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); - } + m_output_buf.resize(0); + m_output_buf.reserve(expected_size); + } - return static_cast(m_bit_buf >> (cBitBufSize - num_bits)); -} + void symbol_codec::put_bits(uint bits, uint num_bits) + { + CRNLIB_ASSERT(num_bits <= 25); + CRNLIB_ASSERT(m_bit_count >= 25); -uint symbol_codec::decode(adaptive_huffman_data_model& model) { - CRNLIB_ASSERT(m_mode == cDecoding); - CRNLIB_ASSERT(!model.m_encoding); + if (!num_bits) + { + return; + } - const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; + m_bit_count -= num_bits; + m_bit_buf |= (static_cast(bits) << m_bit_count); - while (m_bit_count < (cBitBufSize - 8)) { - uint c = 0; - if (m_pDecode_buf_next == m_pDecode_buf_end) { - if (!m_decode_buf_eof) { - m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); - m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; - m_pDecode_buf_next = m_pDecode_buf; - if (m_pDecode_buf_next < m_pDecode_buf_end) - c = *m_pDecode_buf_next++; - } - } else - c = *m_pDecode_buf_next++; + m_total_bits_written += num_bits; - m_bit_count += 8; - m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); - } + while (m_bit_count <= (cBitBufSize - 8)) + { + m_output_buf.push_back(static_cast(m_bit_buf >> (cBitBufSize - 8))); - uint k = static_cast((m_bit_buf >> (cBitBufSize - 16)) + 1); - uint sym, len; - - if (k <= pTables->m_table_max_code) { - uint32 t = pTables->m_lookup[m_bit_buf >> (cBitBufSize - pTables->m_table_bits)]; - - CRNLIB_ASSERT(t != cUINT32_MAX); - sym = t & cUINT16_MAX; - len = t >> 16; - - CRNLIB_ASSERT(model.m_code_sizes[sym] == len); - } else { - len = pTables->m_decode_start_code_size; - - for (;;) { - if (k <= pTables->m_max_codes[len - 1]) - break; - len++; - } - - int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast((m_bit_buf >> (cBitBufSize - len))); - - if (((uint)val_ptr >= model.m_total_syms)) { - // corrupted stream, or a bug - CRNLIB_ASSERT(0); - return 0; - } - - sym = pTables->m_sorted_symbol_order[val_ptr]; - } - - m_bit_buf <<= len; - m_bit_count -= len; - - uint freq = model.m_sym_freq[sym]; - freq++; - model.m_sym_freq[sym] = static_cast(freq); - - if (freq == cUINT16_MAX) - model.rescale(); - - if (--model.m_symbols_until_update == 0) { - m_total_model_updates++; - model.update(); - } - - return sym; -} - -void symbol_codec::decode_set_input_buffer(const uint8* pBuf, size_t buf_size, const uint8* pBuf_next, bool eof_flag) { - CRNLIB_ASSERT(m_mode == cDecoding); - - m_pDecode_buf = pBuf; - m_pDecode_buf_next = pBuf_next; - m_decode_buf_size = buf_size; - m_pDecode_buf_end = pBuf + buf_size; - - if (!m_pDecode_need_bytes_func) - m_decode_buf_eof = true; - else - m_decode_buf_eof = eof_flag; -} - -bool symbol_codec::decode_receive_static_huffman_data_model(static_huffman_data_model& model, static_huffman_data_model* pDeltaModel) { - CRNLIB_ASSERT(m_mode == cDecoding); - - const uint total_used_syms = decode_bits(math::total_bits(prefix_coding::cMaxSupportedSyms)); - if (!total_used_syms) { - model.clear(); - return true; - } - - model.m_code_sizes.resize(total_used_syms); - memset(&model.m_code_sizes[0], 0, sizeof(model.m_code_sizes[0]) * total_used_syms); - - const uint num_codelength_codes_to_send = decode_bits(5); - if ((num_codelength_codes_to_send < 1) || (num_codelength_codes_to_send > cMaxCodelengthCodes)) - return false; - - static_huffman_data_model dm; - dm.m_code_sizes.resize(cMaxCodelengthCodes); - - for (uint i = 0; i < num_codelength_codes_to_send; i++) - dm.m_code_sizes[g_most_probable_codelength_codes[i]] = static_cast(decode_bits(3)); - - if (!dm.prepare_decoder_tables()) - return false; - - uint ofs = 0; - while (ofs < total_used_syms) { - const uint num_remaining = total_used_syms - ofs; - - uint code = decode(dm); - if (code <= 16) - model.m_code_sizes[ofs++] = static_cast(code); - else if (code == cSmallZeroRunCode) { - uint len = decode_bits(cSmallZeroRunExtraBits) + cMinSmallZeroRunSize; - if (len > num_remaining) - return false; - ofs += len; - } else if (code == cLargeZeroRunCode) { - uint len = decode_bits(cLargeZeroRunExtraBits) + cMinLargeZeroRunSize; - if (len > num_remaining) - return false; - ofs += len; - } else if ((code == cSmallRepeatCode) || (code == cLargeRepeatCode)) { - uint len; - if (code == cSmallRepeatCode) - len = decode_bits(cSmallNonZeroRunExtraBits) + cSmallMinNonZeroRunSize; - else - len = decode_bits(cLargeNonZeroRunExtraBits) + cLargeMinNonZeroRunSize; - - if ((!ofs) || (len > num_remaining)) - return false; - const uint prev = model.m_code_sizes[ofs - 1]; - if (!prev) - return false; - const uint end = ofs + len; - while (ofs < end) - model.m_code_sizes[ofs++] = static_cast(prev); - } else { - CRNLIB_ASSERT(0); - return false; + m_bit_buf <<= 8; + m_bit_count += 8; + } } - } - if (ofs != total_used_syms) - return false; - - if ((pDeltaModel) && (pDeltaModel->get_total_syms())) { - uint n = math::minimum(pDeltaModel->m_code_sizes.size(), total_used_syms); - for (uint i = 0; i < n; i++) { - int codesize = model.m_code_sizes[i] + pDeltaModel->m_code_sizes[i]; - if (codesize > 16) - codesize -= 17; - model.m_code_sizes[i] = static_cast(codesize); + void symbol_codec::put_bits_align_to_byte() + { + uint num_bits_in = cBitBufSize - m_bit_count; + if (num_bits_in & 7) + { + put_bits(0, 8 - (num_bits_in & 7)); + } } - } - - return model.prepare_decoder_tables(); -} -uint symbol_codec::decode(static_huffman_data_model& model) { - CRNLIB_ASSERT(m_mode == cDecoding); - CRNLIB_ASSERT(!model.m_encoding); + void symbol_codec::flush_bits() + { + //put_bits(15, 4); // for table look-ahead + //put_bits(3, 3); // for table look-ahead - const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; + put_bits(0, 7); // to ensure the last bits are flushed + } - while (m_bit_count < (cBitBufSize - 8)) { - uint c = 0; - if (m_pDecode_buf_next == m_pDecode_buf_end) { - if (!m_decode_buf_eof) { - m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); - m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; - m_pDecode_buf_next = m_pDecode_buf; - if (m_pDecode_buf_next < m_pDecode_buf_end) - c = *m_pDecode_buf_next++; - } - } else - c = *m_pDecode_buf_next++; + void symbol_codec::assemble_output_buf(bool support_arith) + { + m_total_bits_written = 0; + + uint arith_buf_ofs = 0; + + if (support_arith) + { + if (m_arith_output_buf.size()) + { + put_bits(1, 1); + + m_arith_length = cSymbolCodecArithMaxLen; + m_arith_value = 0; + for (uint i = 0; i < 4; i++) + { + const uint c = m_arith_output_buf[arith_buf_ofs++]; + m_arith_value = (m_arith_value << 8) | c; + put_bits(c, 8); + } + } + else + { + put_bits(0, 1); + } + } + + for (uint sym_index = 0; sym_index < m_output_syms.size(); sym_index++) + { + const output_symbol& sym = m_output_syms[sym_index]; + + if (sym.m_num_bits == output_symbol::cAlignToByteSym) + { + put_bits_align_to_byte(); + } + else if (sym.m_num_bits == output_symbol::cArithSym) + { + if (m_arith_length < cSymbolCodecArithMinLen) + { + do + { + const uint c = (arith_buf_ofs < m_arith_output_buf.size()) ? m_arith_output_buf[arith_buf_ofs++] : 0; + put_bits(c, 8); + m_arith_value = (m_arith_value << 8) | c; + } while ((m_arith_length <<= 8) < cSymbolCodecArithMinLen); + } + + //uint x = gArithProbMulTab[sym.m_arith_prob0 >> (cSymbolCodecArithProbBits - cSymbolCodecArithProbMulBits)][m_arith_length >> (32 - cSymbolCodecArithProbMulLenSigBits)] << 16; + uint x = sym.m_arith_prob0 * (m_arith_length >> cSymbolCodecArithProbBits); + uint bit = (m_arith_value >= x); + + if (bit == 0) + { + m_arith_length = x; + } + else + { + m_arith_value -= x; + m_arith_length -= x; + } + + CRNLIB_VERIFY(bit == sym.m_bits); + } + else + { + put_bits(sym.m_bits, sym.m_num_bits); + } + } + + flush_bits(); + } - m_bit_count += 8; - m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); - } + //------------------------------------------------------------------------------------------------------------------ + // Decoding + //------------------------------------------------------------------------------------------------------------------ - uint k = static_cast((m_bit_buf >> (cBitBufSize - 16)) + 1); - uint sym, len; + bool symbol_codec::start_decoding(const uint8* pBuf, size_t buf_size, bool eof_flag, need_bytes_func_ptr pNeed_bytes_func, void* pPrivate_data) + { + if (!buf_size) + { + return false; + } - if (k <= pTables->m_table_max_code) { - uint32 t = pTables->m_lookup[m_bit_buf >> (cBitBufSize - pTables->m_table_bits)]; + m_total_model_updates = 0; - CRNLIB_ASSERT(t != cUINT32_MAX); - sym = t & cUINT16_MAX; - len = t >> 16; + m_pDecode_buf = pBuf; + m_pDecode_buf_next = pBuf; + m_decode_buf_size = buf_size; + m_pDecode_buf_end = pBuf + buf_size; - CRNLIB_ASSERT(model.m_code_sizes[sym] == len); - } else { - len = pTables->m_decode_start_code_size; + m_pDecode_need_bytes_func = pNeed_bytes_func; + m_pDecode_private_data = pPrivate_data; + m_decode_buf_eof = eof_flag; + if (!pNeed_bytes_func) + { + m_decode_buf_eof = true; + } - for (;;) { - if (k <= pTables->m_max_codes[len - 1]) - break; - len++; - } + m_mode = cDecoding; - int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast((m_bit_buf >> (cBitBufSize - len))); + get_bits_init(); - if (((uint)val_ptr >= model.m_total_syms)) { - // corrupted stream, or a bug - CRNLIB_ASSERT(0); - return 0; + return true; } - sym = pTables->m_sorted_symbol_order[val_ptr]; - } - - m_bit_buf <<= len; - m_bit_count -= len; - - return sym; -} - -uint symbol_codec::decode_truncated_binary(uint n) { - CRNLIB_ASSERT(n >= 2); - - uint k = math::floor_log2i(n); - uint u = (1 << (k + 1)) - n; + uint symbol_codec::decode_bits(uint num_bits) + { + CRNLIB_ASSERT(m_mode == cDecoding); + + if (!num_bits) + { + return 0; + } + + if (num_bits > 16) + { + uint a = get_bits(num_bits - 16); + uint b = get_bits(16); + + return (a << 16) | b; + } + else + { + return get_bits(num_bits); + } + } - uint i = decode_bits(k); + void symbol_codec::decode_remove_bits(uint num_bits) + { + CRNLIB_ASSERT(m_mode == cDecoding); - if (i >= u) - i = ((i << 1) | decode_bits(1)) - u; + while (num_bits > 16) + { + remove_bits(16); + num_bits -= 16; + } - return i; -} + remove_bits(num_bits); + } -uint symbol_codec::decode_golomb(uint m) { - CRNLIB_ASSERT(m > 1); + uint symbol_codec::decode_peek_bits(uint num_bits) + { + CRNLIB_ASSERT(m_mode == cDecoding); + CRNLIB_ASSERT(num_bits <= 25); + + if (!num_bits) + { + return 0; + } + + while (m_bit_count < (int)num_bits) + { + uint c = 0; + if (m_pDecode_buf_next == m_pDecode_buf_end) + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + if (m_pDecode_buf_next < m_pDecode_buf_end) + { + c = *m_pDecode_buf_next++; + } + } + } + else + { + c = *m_pDecode_buf_next++; + } + + m_bit_count += 8; + CRNLIB_ASSERT(m_bit_count <= cBitBufSize); + + m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); + } + + return static_cast(m_bit_buf >> (cBitBufSize - num_bits)); + } - uint q = 0; + uint symbol_codec::decode(adaptive_huffman_data_model& model) + { + CRNLIB_ASSERT(m_mode == cDecoding); + CRNLIB_ASSERT(!model.m_encoding); + + const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; + + while (m_bit_count < (cBitBufSize - 8)) + { + uint c = 0; + if (m_pDecode_buf_next == m_pDecode_buf_end) + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + if (m_pDecode_buf_next < m_pDecode_buf_end) + { + c = *m_pDecode_buf_next++; + } + } + } + else + { + c = *m_pDecode_buf_next++; + } + + m_bit_count += 8; + m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); + } + + uint k = static_cast((m_bit_buf >> (cBitBufSize - 16)) + 1); + uint sym, len; + + if (k <= pTables->m_table_max_code) + { + uint32 t = pTables->m_lookup[m_bit_buf >> (cBitBufSize - pTables->m_table_bits)]; + + CRNLIB_ASSERT(t != cUINT32_MAX); + sym = t & cUINT16_MAX; + len = t >> 16; + + CRNLIB_ASSERT(model.m_code_sizes[sym] == len); + } + else + { + len = pTables->m_decode_start_code_size; + + for (;;) + { + if (k <= pTables->m_max_codes[len - 1]) + { + break; + } + len++; + } + + int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast((m_bit_buf >> (cBitBufSize - len))); + + if (((uint)val_ptr >= model.m_total_syms)) + { + // corrupted stream, or a bug + CRNLIB_ASSERT(0); + return 0; + } + + sym = pTables->m_sorted_symbol_order[val_ptr]; + } + + m_bit_buf <<= len; + m_bit_count -= len; + + uint freq = model.m_sym_freq[sym]; + freq++; + model.m_sym_freq[sym] = static_cast(freq); + + if (freq == cUINT16_MAX) + { + model.rescale(); + } + + if (--model.m_symbols_until_update == 0) + { + m_total_model_updates++; + model.update(); + } + + return sym; + } - for (;;) { - uint k = decode_peek_bits(16); + void symbol_codec::decode_set_input_buffer(const uint8* pBuf, size_t buf_size, const uint8* pBuf_next, bool eof_flag) + { + CRNLIB_ASSERT(m_mode == cDecoding); + + m_pDecode_buf = pBuf; + m_pDecode_buf_next = pBuf_next; + m_decode_buf_size = buf_size; + m_pDecode_buf_end = pBuf + buf_size; + + if (!m_pDecode_need_bytes_func) + { + m_decode_buf_eof = true; + } + else + { + m_decode_buf_eof = eof_flag; + } + } - uint l = utils::count_leading_zeros16((~k) & 0xFFFF); - q += l; - if (l < 16) - break; - } + bool symbol_codec::decode_receive_static_huffman_data_model(static_huffman_data_model& model, static_huffman_data_model* pDeltaModel) + { + CRNLIB_ASSERT(m_mode == cDecoding); + + const uint total_used_syms = decode_bits(math::total_bits(prefix_coding::cMaxSupportedSyms)); + if (!total_used_syms) + { + model.clear(); + return true; + } + + model.m_code_sizes.resize(total_used_syms); + memset(&model.m_code_sizes[0], 0, sizeof(model.m_code_sizes[0]) * total_used_syms); + + const uint num_codelength_codes_to_send = decode_bits(5); + if ((num_codelength_codes_to_send < 1) || (num_codelength_codes_to_send > cMaxCodelengthCodes)) + { + return false; + } + + static_huffman_data_model dm; + dm.m_code_sizes.resize(cMaxCodelengthCodes); + + for (uint i = 0; i < num_codelength_codes_to_send; i++) + { + dm.m_code_sizes[g_most_probable_codelength_codes[i]] = static_cast(decode_bits(3)); + } + + if (!dm.prepare_decoder_tables()) + { + return false; + } + + uint ofs = 0; + while (ofs < total_used_syms) + { + const uint num_remaining = total_used_syms - ofs; + + uint code = decode(dm); + if (code <= 16) + { + model.m_code_sizes[ofs++] = static_cast(code); + } + else if (code == cSmallZeroRunCode) + { + uint len = decode_bits(cSmallZeroRunExtraBits) + cMinSmallZeroRunSize; + if (len > num_remaining) + { + return false; + } + ofs += len; + } + else if (code == cLargeZeroRunCode) + { + uint len = decode_bits(cLargeZeroRunExtraBits) + cMinLargeZeroRunSize; + if (len > num_remaining) + { + return false; + } + ofs += len; + } + else if ((code == cSmallRepeatCode) || (code == cLargeRepeatCode)) + { + uint len; + if (code == cSmallRepeatCode) + { + len = decode_bits(cSmallNonZeroRunExtraBits) + cSmallMinNonZeroRunSize; + } + else + { + len = decode_bits(cLargeNonZeroRunExtraBits) + cLargeMinNonZeroRunSize; + } + + if ((!ofs) || (len > num_remaining)) + { + return false; + } + const uint prev = model.m_code_sizes[ofs - 1]; + if (!prev) + { + return false; + } + const uint end = ofs + len; + while (ofs < end) + { + model.m_code_sizes[ofs++] = static_cast(prev); + } + } + else + { + CRNLIB_ASSERT(0); + return false; + } + } + + if (ofs != total_used_syms) + { + return false; + } + + if ((pDeltaModel) && (pDeltaModel->get_total_syms())) + { + uint n = math::minimum(pDeltaModel->m_code_sizes.size(), total_used_syms); + for (uint i = 0; i < n; i++) + { + int codesize = model.m_code_sizes[i] + pDeltaModel->m_code_sizes[i]; + if (codesize > 16) + { + codesize -= 17; + } + model.m_code_sizes[i] = static_cast(codesize); + } + } + + return model.prepare_decoder_tables(); + } - decode_remove_bits(q + 1); + uint symbol_codec::decode(static_huffman_data_model& model) + { + CRNLIB_ASSERT(m_mode == cDecoding); + CRNLIB_ASSERT(!model.m_encoding); + + const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; + + while (m_bit_count < (cBitBufSize - 8)) + { + uint c = 0; + if (m_pDecode_buf_next == m_pDecode_buf_end) + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + if (m_pDecode_buf_next < m_pDecode_buf_end) + { + c = *m_pDecode_buf_next++; + } + } + } + else + { + c = *m_pDecode_buf_next++; + } + + m_bit_count += 8; + m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); + } + + uint k = static_cast((m_bit_buf >> (cBitBufSize - 16)) + 1); + uint sym, len; + + if (k <= pTables->m_table_max_code) + { + uint32 t = pTables->m_lookup[m_bit_buf >> (cBitBufSize - pTables->m_table_bits)]; + + CRNLIB_ASSERT(t != cUINT32_MAX); + sym = t & cUINT16_MAX; + len = t >> 16; + + CRNLIB_ASSERT(model.m_code_sizes[sym] == len); + } + else + { + len = pTables->m_decode_start_code_size; + + for (;;) + { + if (k <= pTables->m_max_codes[len - 1]) + { + break; + } + len++; + } + + int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast((m_bit_buf >> (cBitBufSize - len))); + + if (((uint)val_ptr >= model.m_total_syms)) + { + // corrupted stream, or a bug + CRNLIB_ASSERT(0); + return 0; + } + + sym = pTables->m_sorted_symbol_order[val_ptr]; + } + + m_bit_buf <<= len; + m_bit_count -= len; + + return sym; + } - uint r = decode_truncated_binary(m); + uint symbol_codec::decode_truncated_binary(uint n) + { + CRNLIB_ASSERT(n >= 2); - return (q * m) + r; -} + uint k = math::floor_log2i(n); + uint u = (1 << (k + 1)) - n; -uint symbol_codec::decode_rice(uint m) { - CRNLIB_ASSERT(m > 0); + uint i = decode_bits(k); - uint q = 0; + if (i >= u) + { + i = ((i << 1) | decode_bits(1)) - u; + } - for (;;) { - uint k = decode_peek_bits(16); + return i; + } - uint l = utils::count_leading_zeros16((~k) & 0xFFFF); + uint symbol_codec::decode_golomb(uint m) + { + CRNLIB_ASSERT(m > 1); - q += l; + uint q = 0; - decode_remove_bits(l); + for (;;) + { + uint k = decode_peek_bits(16); - if (l < 16) - break; - } + uint l = utils::count_leading_zeros16((~k) & 0xFFFF); + q += l; + if (l < 16) + { + break; + } + } - decode_remove_bits(1); + decode_remove_bits(q + 1); - uint r = decode_bits(m); + uint r = decode_truncated_binary(m); - return (q << m) + r; -} + return (q * m) + r; + } -uint64 symbol_codec::stop_decoding() { - CRNLIB_ASSERT(m_mode == cDecoding); + uint symbol_codec::decode_rice(uint m) + { + CRNLIB_ASSERT(m > 0); - uint64 n = m_pDecode_buf_next - m_pDecode_buf; + uint q = 0; - m_mode = cNull; + for (;;) + { + uint k = decode_peek_bits(16); - return n; -} + uint l = utils::count_leading_zeros16((~k) & 0xFFFF); -void symbol_codec::get_bits_init() { - m_bit_buf = 0; - m_bit_count = 0; -} + q += l; -uint symbol_codec::get_bits(uint num_bits) { - CRNLIB_ASSERT(num_bits <= 25); + decode_remove_bits(l); - if (!num_bits) - return 0; + if (l < 16) + { + break; + } + } - while (m_bit_count < (int)num_bits) { - uint c = 0; - if (m_pDecode_buf_next == m_pDecode_buf_end) { - if (!m_decode_buf_eof) { - m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); - m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; - m_pDecode_buf_next = m_pDecode_buf; - if (m_pDecode_buf_next < m_pDecode_buf_end) - c = *m_pDecode_buf_next++; - } - } else - c = *m_pDecode_buf_next++; + decode_remove_bits(1); - m_bit_count += 8; - CRNLIB_ASSERT(m_bit_count <= cBitBufSize); + uint r = decode_bits(m); - m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); - } + return (q << m) + r; + } - uint result = static_cast(m_bit_buf >> (cBitBufSize - num_bits)); + uint64 symbol_codec::stop_decoding() + { + CRNLIB_ASSERT(m_mode == cDecoding); - m_bit_buf <<= num_bits; - m_bit_count -= num_bits; + uint64 n = m_pDecode_buf_next - m_pDecode_buf; - return result; -} + m_mode = cNull; -void symbol_codec::remove_bits(uint num_bits) { - CRNLIB_ASSERT(num_bits <= 25); + return n; + } - if (!num_bits) - return; + void symbol_codec::get_bits_init() + { + m_bit_buf = 0; + m_bit_count = 0; + } - while (m_bit_count < (int)num_bits) { - uint c = 0; - if (m_pDecode_buf_next == m_pDecode_buf_end) { - if (!m_decode_buf_eof) { - m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); - m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; - m_pDecode_buf_next = m_pDecode_buf; - if (m_pDecode_buf_next < m_pDecode_buf_end) - c = *m_pDecode_buf_next++; - } - } else - c = *m_pDecode_buf_next++; + uint symbol_codec::get_bits(uint num_bits) + { + CRNLIB_ASSERT(num_bits <= 25); + + if (!num_bits) + { + return 0; + } + + while (m_bit_count < (int)num_bits) + { + uint c = 0; + if (m_pDecode_buf_next == m_pDecode_buf_end) + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + if (m_pDecode_buf_next < m_pDecode_buf_end) + { + c = *m_pDecode_buf_next++; + } + } + } + else + { + c = *m_pDecode_buf_next++; + } + + m_bit_count += 8; + CRNLIB_ASSERT(m_bit_count <= cBitBufSize); + + m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); + } + + uint result = static_cast(m_bit_buf >> (cBitBufSize - num_bits)); + + m_bit_buf <<= num_bits; + m_bit_count -= num_bits; + + return result; + } - m_bit_count += 8; - CRNLIB_ASSERT(m_bit_count <= cBitBufSize); + void symbol_codec::remove_bits(uint num_bits) + { + CRNLIB_ASSERT(num_bits <= 25); + + if (!num_bits) + { + return; + } + + while (m_bit_count < (int)num_bits) + { + uint c = 0; + if (m_pDecode_buf_next == m_pDecode_buf_end) + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + if (m_pDecode_buf_next < m_pDecode_buf_end) + { + c = *m_pDecode_buf_next++; + } + } + } + else + { + c = *m_pDecode_buf_next++; + } + + m_bit_count += 8; + CRNLIB_ASSERT(m_bit_count <= cBitBufSize); + + m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); + } + + m_bit_buf <<= num_bits; + m_bit_count -= num_bits; + } - m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); - } + void symbol_codec::decode_align_to_byte() + { + CRNLIB_ASSERT(m_mode == cDecoding); - m_bit_buf <<= num_bits; - m_bit_count -= num_bits; -} + if (m_bit_count & 7) + { + remove_bits(m_bit_count & 7); + } + } -void symbol_codec::decode_align_to_byte() { - CRNLIB_ASSERT(m_mode == cDecoding); + int symbol_codec::decode_remove_byte_from_bit_buf() + { + if (m_bit_count < 8) + { + return -1; + } + int result = static_cast(m_bit_buf >> (cBitBufSize - 8)); + m_bit_buf <<= 8; + m_bit_count -= 8; + return result; + } - if (m_bit_count & 7) { - remove_bits(m_bit_count & 7); - } -} + uint symbol_codec::decode(adaptive_bit_model& model, bool update_model) + { + if (m_arith_length < cSymbolCodecArithMinLen) + { + uint c = get_bits(8); + m_arith_value = (m_arith_value << 8) | c; + + m_arith_length <<= 8; + CRNLIB_ASSERT(m_arith_length >= cSymbolCodecArithMinLen); + } + + CRNLIB_ASSERT(m_arith_length >= cSymbolCodecArithMinLen); + + //uint x = gArithProbMulTab[model.m_bit_0_prob >> (cSymbolCodecArithProbBits - cSymbolCodecArithProbMulBits)][m_arith_length >> (32 - cSymbolCodecArithProbMulLenSigBits)] << 16; + uint x = model.m_bit_0_prob * (m_arith_length >> cSymbolCodecArithProbBits); + uint bit = (m_arith_value >= x); + + if (!bit) + { + if (update_model) + { + model.m_bit_0_prob += ((cSymbolCodecArithProbScale - model.m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); + } + + m_arith_length = x; + } + else + { + if (update_model) + { + model.m_bit_0_prob -= (model.m_bit_0_prob >> cSymbolCodecArithProbMoveBits); + } + + m_arith_value -= x; + m_arith_length -= x; + } + + return bit; + } -int symbol_codec::decode_remove_byte_from_bit_buf() { - if (m_bit_count < 8) - return -1; - int result = static_cast(m_bit_buf >> (cBitBufSize - 8)); - m_bit_buf <<= 8; - m_bit_count -= 8; - return result; -} + uint symbol_codec::decode(adaptive_arith_data_model& model) + { + uint node = 1; -uint symbol_codec::decode(adaptive_bit_model& model, bool update_model) { - if (m_arith_length < cSymbolCodecArithMinLen) { - uint c = get_bits(8); - m_arith_value = (m_arith_value << 8) | c; + do + { + uint bit = decode(model.m_probs[node]); - m_arith_length <<= 8; - CRNLIB_ASSERT(m_arith_length >= cSymbolCodecArithMinLen); - } + node = (node << 1) + bit; - CRNLIB_ASSERT(m_arith_length >= cSymbolCodecArithMinLen); - - //uint x = gArithProbMulTab[model.m_bit_0_prob >> (cSymbolCodecArithProbBits - cSymbolCodecArithProbMulBits)][m_arith_length >> (32 - cSymbolCodecArithProbMulLenSigBits)] << 16; - uint x = model.m_bit_0_prob * (m_arith_length >> cSymbolCodecArithProbBits); - uint bit = (m_arith_value >= x); - - if (!bit) { - if (update_model) - model.m_bit_0_prob += ((cSymbolCodecArithProbScale - model.m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); - - m_arith_length = x; - } else { - if (update_model) - model.m_bit_0_prob -= (model.m_bit_0_prob >> cSymbolCodecArithProbMoveBits); - - m_arith_value -= x; - m_arith_length -= x; - } - - return bit; -} - -uint symbol_codec::decode(adaptive_arith_data_model& model) { - uint node = 1; - - do { - uint bit = decode(model.m_probs[node]); + } while (node < model.m_total_syms); - node = (node << 1) + bit; + return node - model.m_total_syms; + } - } while (node < model.m_total_syms); + void symbol_codec::start_arith_decoding() + { + CRNLIB_ASSERT(m_mode == cDecoding); + + m_arith_length = cSymbolCodecArithMaxLen; + m_arith_value = 0; + + if (get_bits(1)) + { + m_arith_value = (get_bits(8) << 24); + m_arith_value |= (get_bits(8) << 16); + m_arith_value |= (get_bits(8) << 8); + m_arith_value |= get_bits(8); + } + } - return node - model.m_total_syms; -} - -void symbol_codec::start_arith_decoding() { - CRNLIB_ASSERT(m_mode == cDecoding); - - m_arith_length = cSymbolCodecArithMaxLen; - m_arith_value = 0; - - if (get_bits(1)) { - m_arith_value = (get_bits(8) << 24); - m_arith_value |= (get_bits(8) << 16); - m_arith_value |= (get_bits(8) << 8); - m_arith_value |= get_bits(8); - } -} - -void symbol_codec::decode_need_bytes() { - if (!m_decode_buf_eof) { - m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); - m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; - m_pDecode_buf_next = m_pDecode_buf; - } -} - -} // namespace crnlib + void symbol_codec::decode_need_bytes() + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + } + } +} // namespace crnlib diff --git a/crnlib/crn_symbol_codec.h b/crnlib/crn_symbol_codec.h index 6f10dc7..ca7baf1 100644 --- a/crnlib/crn_symbol_codec.h +++ b/crnlib/crn_symbol_codec.h @@ -1,179 +1,248 @@ -// File: crn_symbol_codec.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once + #include "crn_prefix_coding.h" #include "crn_export.h" -namespace crnlib { -class symbol_codec; -class adaptive_arith_data_model; - -const uint cSymbolCodecArithMinLen = 0x01000000U; -const uint cSymbolCodecArithMaxLen = 0xFFFFFFFFU; - -const uint cSymbolCodecArithProbBits = 11; -const uint cSymbolCodecArithProbScale = 1 << cSymbolCodecArithProbBits; -const uint cSymbolCodecArithProbMoveBits = 5; - -const uint cSymbolCodecArithProbMulBits = 8; -const uint cSymbolCodecArithProbMulScale = 1 << cSymbolCodecArithProbMulBits; - -class CRN_EXPORT symbol_histogram { - public: - inline symbol_histogram(uint size = 0) - : m_hist(size) {} - - inline void clear() { m_hist.clear(); } - - inline uint size() const { return static_cast(m_hist.size()); } - - inline void inc_freq(uint x, uint amount = 1) { - uint h = m_hist[x]; - CRNLIB_ASSERT(amount <= (0xFFFFFFFF - h)); - m_hist[x] = h + amount; - } - - inline void set_all(uint val) { - for (uint i = 0; i < m_hist.size(); i++) - m_hist[i] = val; - } - - inline void resize(uint new_size) { m_hist.resize(new_size); } +namespace crnlib +{ + class symbol_codec; + class adaptive_arith_data_model; + + const uint cSymbolCodecArithMinLen = 0x01000000U; + const uint cSymbolCodecArithMaxLen = 0xFFFFFFFFU; + + const uint cSymbolCodecArithProbBits = 11; + const uint cSymbolCodecArithProbScale = 1 << cSymbolCodecArithProbBits; + const uint cSymbolCodecArithProbMoveBits = 5; + + const uint cSymbolCodecArithProbMulBits = 8; + const uint cSymbolCodecArithProbMulScale = 1 << cSymbolCodecArithProbMulBits; + + class CRN_EXPORT symbol_histogram + { + public: + inline symbol_histogram(uint size = 0) : + m_hist(size) + { + } - inline const uint* get_ptr() const { return m_hist.empty() ? nullptr : &m_hist.front(); } + inline void clear() + { + m_hist.clear(); + } + + inline uint size() const + { + return static_cast(m_hist.size()); + } - double calc_entropy() const; + inline void inc_freq(uint x, uint amount = 1) + { + uint h = m_hist[x]; + CRNLIB_ASSERT(amount <= (0xFFFFFFFF - h)); + m_hist[x] = h + amount; + } - uint operator[](uint i) const { return m_hist[i]; } - uint& operator[](uint i) { return m_hist[i]; } + inline void set_all(uint val) + { + for (uint i = 0; i < m_hist.size(); i++) { + m_hist[i] = val; +} + } + + inline void resize(uint new_size) + { + m_hist.resize(new_size); + } + + inline const uint* get_ptr() const + { + return m_hist.empty() ? nullptr : &m_hist.front(); + } + + double calc_entropy() const; + + uint operator[](uint i) const + { + return m_hist[i]; + } + uint& operator[](uint i) + { + return m_hist[i]; + } - uint64 get_total() const; + uint64 get_total() const; - private: - crnlib::vector m_hist; -}; + private: + crnlib::vector m_hist; + }; -class CRN_EXPORT adaptive_huffman_data_model { - public: - adaptive_huffman_data_model(bool encoding = true, uint total_syms = 0); - adaptive_huffman_data_model(const adaptive_huffman_data_model& other); - ~adaptive_huffman_data_model(); + class CRN_EXPORT adaptive_huffman_data_model + { + public: + adaptive_huffman_data_model(bool encoding = true, uint total_syms = 0); + adaptive_huffman_data_model(const adaptive_huffman_data_model& other); + ~adaptive_huffman_data_model(); - adaptive_huffman_data_model& operator=(const adaptive_huffman_data_model& rhs); + adaptive_huffman_data_model& operator=(const adaptive_huffman_data_model& rhs); - void clear(); + void clear(); - void init(bool encoding, uint total_syms); - void reset(); + void init(bool encoding, uint total_syms); + void reset(); - void rescale(); + void rescale(); - uint get_total_syms() const { return m_total_syms; } - uint get_cost(uint sym) const { return m_code_sizes[sym]; } + uint get_total_syms() const + { + return m_total_syms; + } + uint get_cost(uint sym) const + { + return m_code_sizes[sym]; + } - public: - uint m_total_syms; + public: + uint m_total_syms; - uint m_update_cycle; - uint m_symbols_until_update; + uint m_update_cycle; + uint m_symbols_until_update; - uint m_total_count; + uint m_total_count; - crnlib::vector m_sym_freq; + crnlib::vector m_sym_freq; - crnlib::vector m_codes; - crnlib::vector m_code_sizes; + crnlib::vector m_codes; + crnlib::vector m_code_sizes; - prefix_coding::decoder_tables* m_pDecode_tables; + prefix_coding::decoder_tables* m_pDecode_tables; - uint8 m_decoder_table_bits; - bool m_encoding; + uint8 m_decoder_table_bits; + bool m_encoding; - void update(); + void update(); - friend class symbol_codec; -}; + friend class symbol_codec; + }; -class CRN_EXPORT static_huffman_data_model { - public: - static_huffman_data_model(); - static_huffman_data_model(const static_huffman_data_model& other); - ~static_huffman_data_model(); + class CRN_EXPORT static_huffman_data_model + { + public: + static_huffman_data_model(); + static_huffman_data_model(const static_huffman_data_model& other); + ~static_huffman_data_model(); - static_huffman_data_model& operator=(const static_huffman_data_model& rhs); + static_huffman_data_model& operator=(const static_huffman_data_model& rhs); - void clear(); + void clear(); - bool init(bool encoding, uint total_syms, const uint16* pSym_freq, uint code_size_limit); - bool init(bool encoding, uint total_syms, const uint* pSym_freq, uint code_size_limit); - bool init(bool encoding, uint total_syms, const uint8* pCode_sizes, uint code_size_limit); - bool init(bool encoding, const symbol_histogram& hist, uint code_size_limit); + bool init(bool encoding, uint total_syms, const uint16* pSym_freq, uint code_size_limit); + bool init(bool encoding, uint total_syms, const uint* pSym_freq, uint code_size_limit); + bool init(bool encoding, uint total_syms, const uint8* pCode_sizes, uint code_size_limit); + bool init(bool encoding, const symbol_histogram& hist, uint code_size_limit); - uint get_total_syms() const { return m_total_syms; } - uint get_cost(uint sym) const { return m_code_sizes[sym]; } + uint get_total_syms() const + { + return m_total_syms; + } + uint get_cost(uint sym) const + { + return m_code_sizes[sym]; + } - const uint8* get_code_sizes() const { return m_code_sizes.empty() ? nullptr : &m_code_sizes[0]; } + const uint8* get_code_sizes() const + { + return m_code_sizes.empty() ? nullptr : &m_code_sizes[0]; + } - private: - uint m_total_syms; + private: + uint m_total_syms; - crnlib::vector m_codes; - crnlib::vector m_code_sizes; + crnlib::vector m_codes; + crnlib::vector m_code_sizes; - prefix_coding::decoder_tables* m_pDecode_tables; + prefix_coding::decoder_tables* m_pDecode_tables; - bool m_encoding; + bool m_encoding; - bool prepare_decoder_tables(); - uint compute_decoder_table_bits() const; + bool prepare_decoder_tables(); + uint compute_decoder_table_bits() const; - friend class symbol_codec; -}; + friend class symbol_codec; + }; -class CRN_EXPORT adaptive_bit_model { - public: - adaptive_bit_model(); - adaptive_bit_model(float prob0); - adaptive_bit_model(const adaptive_bit_model& other); + class CRN_EXPORT adaptive_bit_model + { + public: + adaptive_bit_model(); + adaptive_bit_model(float prob0); + adaptive_bit_model(const adaptive_bit_model& other); - adaptive_bit_model& operator=(const adaptive_bit_model& rhs); + adaptive_bit_model& operator=(const adaptive_bit_model& rhs); - void clear(); - void set_probability_0(float prob0); - void update(uint bit); + void clear(); + void set_probability_0(float prob0); + void update(uint bit); - float get_cost(uint bit) const; + float get_cost(uint bit) const; - public: - uint16 m_bit_0_prob; + public: + uint16 m_bit_0_prob; - friend class symbol_codec; - friend class adaptive_arith_data_model; -}; + friend class symbol_codec; + friend class adaptive_arith_data_model; + }; -class CRN_EXPORT adaptive_arith_data_model { - public: - adaptive_arith_data_model(bool encoding = true, uint total_syms = 0); - adaptive_arith_data_model(const adaptive_arith_data_model& other); - ~adaptive_arith_data_model(); + class CRN_EXPORT adaptive_arith_data_model + { + public: + adaptive_arith_data_model(bool encoding = true, uint total_syms = 0); + adaptive_arith_data_model(const adaptive_arith_data_model& other); + ~adaptive_arith_data_model(); - adaptive_arith_data_model& operator=(const adaptive_arith_data_model& rhs); + adaptive_arith_data_model& operator=(const adaptive_arith_data_model& rhs); - void clear(); + void clear(); - void init(bool encoding, uint total_syms); - void reset(); + void init(bool encoding, uint total_syms); + void reset(); - uint get_total_syms() const { return m_total_syms; } - float get_cost(uint sym) const; + uint get_total_syms() const + { + return m_total_syms; + } + float get_cost(uint sym) const; - private: - uint m_total_syms; - typedef crnlib::vector adaptive_bit_model_vector; - adaptive_bit_model_vector m_probs; + private: + uint m_total_syms; + typedef crnlib::vector adaptive_bit_model_vector; + adaptive_bit_model_vector m_probs; - friend class symbol_codec; -}; + friend class symbol_codec; + }; #if defined(_WIN64) #define CRNLIB_SYMBOL_CODEC_USE_64_BIT_BUFFER 1 @@ -181,134 +250,170 @@ class CRN_EXPORT adaptive_arith_data_model { #define CRNLIB_SYMBOL_CODEC_USE_64_BIT_BUFFER 0 #endif -class CRN_EXPORT symbol_codec { - public: - symbol_codec(); - - void clear(); - - // Encoding - void start_encoding(uint expected_file_size); - uint encode_transmit_static_huffman_data_model(static_huffman_data_model& model, bool simulate, static_huffman_data_model* pDelta_model = nullptr); - void encode_bits(uint bits, uint num_bits); - void encode_align_to_byte(); - void encode(uint sym, adaptive_huffman_data_model& model); - void encode(uint sym, static_huffman_data_model& model); - void encode_truncated_binary(uint v, uint n); - static uint encode_truncated_binary_cost(uint v, uint n); - void encode_golomb(uint v, uint m); - void encode_rice(uint v, uint m); - static uint encode_rice_get_cost(uint v, uint m); - void encode(uint bit, adaptive_bit_model& model, bool update_model = true); - void encode(uint sym, adaptive_arith_data_model& model); - - inline void encode_enable_simulation(bool enabled) { m_simulate_encoding = enabled; } - inline bool encode_get_simulation() { return m_simulate_encoding; } - inline uint encode_get_total_bits_written() const { return m_total_bits_written; } - - void stop_encoding(bool support_arith); - - const crnlib::vector& get_encoding_buf() const { return m_output_buf; } - crnlib::vector& get_encoding_buf() { return m_output_buf; } - - // Decoding - - typedef void (*need_bytes_func_ptr)(size_t num_bytes_consumed, void* pPrivate_data, const uint8*& pBuf, size_t& buf_size, bool& eof_flag); - - bool start_decoding(const uint8* pBuf, size_t buf_size, bool eof_flag = true, need_bytes_func_ptr pNeed_bytes_func = nullptr, void* pPrivate_data = nullptr); - void decode_set_input_buffer(const uint8* pBuf, size_t buf_size, const uint8* pBuf_next, bool eof_flag = true); - inline uint64 decode_get_bytes_consumed() const { return m_pDecode_buf_next - m_pDecode_buf; } - inline uint64 decode_get_bits_remaining() const { return ((m_pDecode_buf_end - m_pDecode_buf_next) << 3) + m_bit_count; } - void start_arith_decoding(); - bool decode_receive_static_huffman_data_model(static_huffman_data_model& model, static_huffman_data_model* pDeltaModel); - uint decode_bits(uint num_bits); - uint decode_peek_bits(uint num_bits); - void decode_remove_bits(uint num_bits); - void decode_align_to_byte(); - int decode_remove_byte_from_bit_buf(); - uint decode(adaptive_huffman_data_model& model); - uint decode(static_huffman_data_model& model); - uint decode_truncated_binary(uint n); - uint decode_golomb(uint m); - uint decode_rice(uint m); - uint decode(adaptive_bit_model& model, bool update_model = true); - uint decode(adaptive_arith_data_model& model); - uint64 stop_decoding(); - - uint get_total_model_updates() const { return m_total_model_updates; } - - public: - const uint8* m_pDecode_buf; - const uint8* m_pDecode_buf_next; - const uint8* m_pDecode_buf_end; - size_t m_decode_buf_size; - bool m_decode_buf_eof; - - need_bytes_func_ptr m_pDecode_need_bytes_func; - void* m_pDecode_private_data; + class CRN_EXPORT symbol_codec + { + public: + symbol_codec(); + + void clear(); + + // Encoding + void start_encoding(uint expected_file_size); + uint encode_transmit_static_huffman_data_model(static_huffman_data_model& model, bool simulate, static_huffman_data_model* pDelta_model = nullptr); + void encode_bits(uint bits, uint num_bits); + void encode_align_to_byte(); + void encode(uint sym, adaptive_huffman_data_model& model); + void encode(uint sym, static_huffman_data_model& model); + void encode_truncated_binary(uint v, uint n); + static uint encode_truncated_binary_cost(uint v, uint n); + void encode_golomb(uint v, uint m); + void encode_rice(uint v, uint m); + static uint encode_rice_get_cost(uint v, uint m); + void encode(uint bit, adaptive_bit_model& model, bool update_model = true); + void encode(uint sym, adaptive_arith_data_model& model); + + inline void encode_enable_simulation(bool enabled) + { + m_simulate_encoding = enabled; + } + inline bool encode_get_simulation() + { + return m_simulate_encoding; + } + inline uint encode_get_total_bits_written() const + { + return m_total_bits_written; + } + + void stop_encoding(bool support_arith); + + const crnlib::vector& get_encoding_buf() const + { + return m_output_buf; + } + crnlib::vector& get_encoding_buf() + { + return m_output_buf; + } + + // Decoding + + typedef void (*need_bytes_func_ptr)(size_t num_bytes_consumed, void* pPrivate_data, const uint8*& pBuf, size_t& buf_size, bool& eof_flag); + + bool start_decoding(const uint8* pBuf, size_t buf_size, bool eof_flag = true, need_bytes_func_ptr pNeed_bytes_func = nullptr, void* pPrivate_data = nullptr); + void decode_set_input_buffer(const uint8* pBuf, size_t buf_size, const uint8* pBuf_next, bool eof_flag = true); + inline uint64 decode_get_bytes_consumed() const + { + return m_pDecode_buf_next - m_pDecode_buf; + } + inline uint64 decode_get_bits_remaining() const + { + return ((m_pDecode_buf_end - m_pDecode_buf_next) << 3) + m_bit_count; + } + void start_arith_decoding(); + bool decode_receive_static_huffman_data_model(static_huffman_data_model& model, static_huffman_data_model* pDeltaModel); + uint decode_bits(uint num_bits); + uint decode_peek_bits(uint num_bits); + void decode_remove_bits(uint num_bits); + void decode_align_to_byte(); + int decode_remove_byte_from_bit_buf(); + uint decode(adaptive_huffman_data_model& model); + uint decode(static_huffman_data_model& model); + uint decode_truncated_binary(uint n); + uint decode_golomb(uint m); + uint decode_rice(uint m); + uint decode(adaptive_bit_model& model, bool update_model = true); + uint decode(adaptive_arith_data_model& model); + uint64 stop_decoding(); + + uint get_total_model_updates() const + { + return m_total_model_updates; + } + + public: + const uint8* m_pDecode_buf; + const uint8* m_pDecode_buf_next; + const uint8* m_pDecode_buf_end; + size_t m_decode_buf_size; + bool m_decode_buf_eof; + + need_bytes_func_ptr m_pDecode_need_bytes_func; + void* m_pDecode_private_data; #if CRNLIB_SYMBOL_CODEC_USE_64_BIT_BUFFER - typedef uint64 bit_buf_t; - enum { cBitBufSize = 64 }; + typedef uint64 bit_buf_t; + enum + { + cBitBufSize = 64 + }; #else - typedef uint32 bit_buf_t; - enum { cBitBufSize = 32 }; + typedef uint32 bit_buf_t; + enum + { + cBitBufSize = 32 + }; #endif - bit_buf_t m_bit_buf; - int m_bit_count; + bit_buf_t m_bit_buf; + int m_bit_count; - uint m_total_model_updates; + uint m_total_model_updates; - crnlib::vector m_output_buf; - crnlib::vector m_arith_output_buf; + crnlib::vector m_output_buf; + crnlib::vector m_arith_output_buf; - struct output_symbol { - uint m_bits; + struct output_symbol + { + uint m_bits; - enum { cArithSym = -1, - cAlignToByteSym = -2 }; - int16 m_num_bits; + enum + { + cArithSym = -1, + cAlignToByteSym = -2 + }; + int16 m_num_bits; - uint16 m_arith_prob0; - }; - crnlib::vector m_output_syms; + uint16 m_arith_prob0; + }; + crnlib::vector m_output_syms; - uint m_total_bits_written; - bool m_simulate_encoding; + uint m_total_bits_written; + bool m_simulate_encoding; - uint m_arith_base; - uint m_arith_value; - uint m_arith_length; - uint m_arith_total_bits; + uint m_arith_base; + uint m_arith_value; + uint m_arith_length; + uint m_arith_total_bits; - bool m_support_arith; + bool m_support_arith; - void put_bits_init(uint expected_size); - void record_put_bits(uint bits, uint num_bits); + void put_bits_init(uint expected_size); + void record_put_bits(uint bits, uint num_bits); - void arith_propagate_carry(); - void arith_renorm_enc_interval(); - void arith_start_encoding(); - void arith_stop_encoding(); + void arith_propagate_carry(); + void arith_renorm_enc_interval(); + void arith_start_encoding(); + void arith_stop_encoding(); - void put_bits(uint bits, uint num_bits); - void put_bits_align_to_byte(); - void flush_bits(); - void assemble_output_buf(bool support_arith); + void put_bits(uint bits, uint num_bits); + void put_bits_align_to_byte(); + void flush_bits(); + void assemble_output_buf(bool support_arith); - void get_bits_init(); - uint get_bits(uint num_bits); - void remove_bits(uint num_bits); + void get_bits_init(); + uint get_bits(uint num_bits); + void remove_bits(uint num_bits); - void decode_need_bytes(); + void decode_need_bytes(); - enum { - cNull, - cEncoding, - cDecoding - } m_mode; -}; + enum + { + cNull, + cEncoding, + cDecoding + } m_mode; + }; #define CRNLIB_SYMBOL_CODEC_USE_MACROS 1 @@ -320,165 +425,190 @@ class CRN_EXPORT symbol_codec { #if CRNLIB_SYMBOL_CODEC_USE_MACROS #define CRNLIB_SYMBOL_CODEC_DECODE_DECLARE(codec) \ - uint arith_value; \ - uint arith_length; \ - symbol_codec::bit_buf_t bit_buf; \ - int bit_count; \ - const uint8* pDecode_buf_next; + uint arith_value; \ + uint arith_length; \ + symbol_codec::bit_buf_t bit_buf; \ + int bit_count; \ + const uint8* pDecode_buf_next; #define CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) \ - arith_value = codec.m_arith_value; \ - arith_length = codec.m_arith_length; \ - bit_buf = codec.m_bit_buf; \ - bit_count = codec.m_bit_count; \ - pDecode_buf_next = codec.m_pDecode_buf_next; + arith_value = codec.m_arith_value; \ + arith_length = codec.m_arith_length; \ + bit_buf = codec.m_bit_buf; \ + bit_count = codec.m_bit_count; \ + pDecode_buf_next = codec.m_pDecode_buf_next; #define CRNLIB_SYMBOL_CODEC_DECODE_END(codec) \ - codec.m_arith_value = arith_value; \ - codec.m_arith_length = arith_length; \ - codec.m_bit_buf = bit_buf; \ - codec.m_bit_count = bit_count; \ - codec.m_pDecode_buf_next = pDecode_buf_next; - -#define CRNLIB_SYMBOL_CODEC_DECODE_GET_BITS(codec, result, num_bits) \ - { \ - while (bit_count < (int)(num_bits)) { \ - uint c = 0; \ - if (pDecode_buf_next == codec.m_pDecode_buf_end) { \ - CRNLIB_SYMBOL_CODEC_DECODE_END(codec) \ - codec.decode_need_bytes(); \ - CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) \ - if (pDecode_buf_next < codec.m_pDecode_buf_end) \ - c = *pDecode_buf_next++; \ - } else \ - c = *pDecode_buf_next++; \ - bit_count += 8; \ - bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ - } \ - result = num_bits ? static_cast(bit_buf >> (symbol_codec::cBitBufSize - (num_bits))) : 0; \ - bit_buf <<= (num_bits); \ - bit_count -= (num_bits); \ - } - -#define CRNLIB_SYMBOL_CODEC_DECODE_ARITH_BIT(codec, result, model) \ - { \ - if (arith_length < cSymbolCodecArithMinLen) { \ - uint c; \ - CRNLIB_SYMBOL_CODEC_DECODE_GET_BITS(codec, c, 8); \ - arith_value = (arith_value << 8) | c; \ - arith_length <<= 8; \ - } \ - uint x = model.m_bit_0_prob * (arith_length >> cSymbolCodecArithProbBits); \ - result = (arith_value >= x); \ - if (!result) { \ - model.m_bit_0_prob += ((cSymbolCodecArithProbScale - model.m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); \ - arith_length = x; \ - } else { \ - model.m_bit_0_prob -= (model.m_bit_0_prob >> cSymbolCodecArithProbMoveBits); \ - arith_value -= x; \ - arith_length -= x; \ - } \ - } + codec.m_arith_value = arith_value; \ + codec.m_arith_length = arith_length; \ + codec.m_bit_buf = bit_buf; \ + codec.m_bit_count = bit_count; \ + codec.m_pDecode_buf_next = pDecode_buf_next; + +#define CRNLIB_SYMBOL_CODEC_DECODE_GET_BITS(codec, result, num_bits) \ + { \ + while (bit_count < (int)(num_bits)) \ + { \ + uint c = 0; \ + if (pDecode_buf_next == codec.m_pDecode_buf_end) \ + { \ + CRNLIB_SYMBOL_CODEC_DECODE_END(codec) \ + codec.decode_need_bytes(); \ + CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) \ + if (pDecode_buf_next < codec.m_pDecode_buf_end) \ + c = *pDecode_buf_next++; \ + } \ + else \ + c = *pDecode_buf_next++; \ + bit_count += 8; \ + bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ + } \ + result = num_bits ? static_cast(bit_buf >> (symbol_codec::cBitBufSize - (num_bits))) : 0; \ + bit_buf <<= (num_bits); \ + bit_count -= (num_bits); \ + } + +#define CRNLIB_SYMBOL_CODEC_DECODE_ARITH_BIT(codec, result, model) \ + { \ + if (arith_length < cSymbolCodecArithMinLen) \ + { \ + uint c; \ + CRNLIB_SYMBOL_CODEC_DECODE_GET_BITS(codec, c, 8); \ + arith_value = (arith_value << 8) | c; \ + arith_length <<= 8; \ + } \ + uint x = model.m_bit_0_prob * (arith_length >> cSymbolCodecArithProbBits); \ + result = (arith_value >= x); \ + if (!result) \ + { \ + model.m_bit_0_prob += ((cSymbolCodecArithProbScale - model.m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); \ + arith_length = x; \ + } \ + else \ + { \ + model.m_bit_0_prob -= (model.m_bit_0_prob >> cSymbolCodecArithProbMoveBits); \ + arith_value -= x; \ + arith_length -= x; \ + } \ + } #if CRNLIB_SYMBOL_CODEC_USE_64_BIT_BUFFER -#define CRNLIB_SYMBOL_CODEC_DECODE_ADAPTIVE_HUFFMAN(codec, result, model) \ - { \ - const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; \ - if (bit_count < 24) { \ - uint c = 0; \ - pDecode_buf_next += sizeof(uint32); \ - if (pDecode_buf_next >= codec.m_pDecode_buf_end) { \ - pDecode_buf_next -= sizeof(uint32); \ - while (bit_count < 24) { \ - CRNLIB_SYMBOL_CODEC_DECODE_END(codec) \ - codec.decode_need_bytes(); \ - CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) \ - if (pDecode_buf_next < codec.m_pDecode_buf_end) \ - c = *pDecode_buf_next++; \ - bit_count += 8; \ - bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ - } \ - } else { \ - c = CRNLIB_READ_BIG_ENDIAN_UINT32(pDecode_buf_next - sizeof(uint32)); \ - bit_count += 32; \ - bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ - } \ - } \ - uint k = static_cast((bit_buf >> (symbol_codec::cBitBufSize - 16)) + 1); \ - uint len; \ - if (k <= pTables->m_table_max_code) { \ - uint32 t = pTables->m_lookup[bit_buf >> (symbol_codec::cBitBufSize - pTables->m_table_bits)]; \ - result = t & UINT16_MAX; \ - len = t >> 16; \ - } else { \ - len = pTables->m_decode_start_code_size; \ - for (;;) { \ - if (k <= pTables->m_max_codes[len - 1]) \ - break; \ - len++; \ - } \ - int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast(bit_buf >> (symbol_codec::cBitBufSize - len)); \ - if (((uint)val_ptr >= model.m_total_syms)) \ - val_ptr = 0; \ - result = pTables->m_sorted_symbol_order[val_ptr]; \ - } \ - bit_buf <<= len; \ - bit_count -= len; \ - uint freq = model.m_sym_freq[result]; \ - freq++; \ - model.m_sym_freq[result] = static_cast(freq); \ - if (freq == UINT16_MAX) \ - model.rescale(); \ - if (--model.m_symbols_until_update == 0) { \ - model.update(); \ - } \ - } +#define CRNLIB_SYMBOL_CODEC_DECODE_ADAPTIVE_HUFFMAN(codec, result, model) \ + { \ + const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; \ + if (bit_count < 24) \ + { \ + uint c = 0; \ + pDecode_buf_next += sizeof(uint32); \ + if (pDecode_buf_next >= codec.m_pDecode_buf_end) \ + { \ + pDecode_buf_next -= sizeof(uint32); \ + while (bit_count < 24) \ + { \ + CRNLIB_SYMBOL_CODEC_DECODE_END(codec) \ + codec.decode_need_bytes(); \ + CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) \ + if (pDecode_buf_next < codec.m_pDecode_buf_end) \ + c = *pDecode_buf_next++; \ + bit_count += 8; \ + bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ + } \ + } \ + else \ + { \ + c = CRNLIB_READ_BIG_ENDIAN_UINT32(pDecode_buf_next - sizeof(uint32)); \ + bit_count += 32; \ + bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ + } \ + } \ + uint k = static_cast((bit_buf >> (symbol_codec::cBitBufSize - 16)) + 1); \ + uint len; \ + if (k <= pTables->m_table_max_code) \ + { \ + uint32 t = pTables->m_lookup[bit_buf >> (symbol_codec::cBitBufSize - pTables->m_table_bits)]; \ + result = t & UINT16_MAX; \ + len = t >> 16; \ + } \ + else \ + { \ + len = pTables->m_decode_start_code_size; \ + for (;;) \ + { \ + if (k <= pTables->m_max_codes[len - 1]) \ + break; \ + len++; \ + } \ + int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast(bit_buf >> (symbol_codec::cBitBufSize - len)); \ + if (((uint)val_ptr >= model.m_total_syms)) \ + val_ptr = 0; \ + result = pTables->m_sorted_symbol_order[val_ptr]; \ + } \ + bit_buf <<= len; \ + bit_count -= len; \ + uint freq = model.m_sym_freq[result]; \ + freq++; \ + model.m_sym_freq[result] = static_cast(freq); \ + if (freq == UINT16_MAX) \ + model.rescale(); \ + if (--model.m_symbols_until_update == 0) \ + { \ + model.update(); \ + } \ + } #else -#define CRNLIB_SYMBOL_CODEC_DECODE_ADAPTIVE_HUFFMAN(codec, result, model) \ - { \ - const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; \ - while (bit_count < (symbol_codec::cBitBufSize - 8)) { \ - uint c = 0; \ - if (pDecode_buf_next == codec.m_pDecode_buf_end) { \ - CRNLIB_SYMBOL_CODEC_DECODE_END(codec) \ - codec.decode_need_bytes(); \ - CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) \ - if (pDecode_buf_next < codec.m_pDecode_buf_end) \ - c = *pDecode_buf_next++; \ - } else \ - c = *pDecode_buf_next++; \ - bit_count += 8; \ - bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ - } \ - uint k = static_cast((bit_buf >> (symbol_codec::cBitBufSize - 16)) + 1); \ - uint len; \ - if (k <= pTables->m_table_max_code) { \ - uint32 t = pTables->m_lookup[bit_buf >> (symbol_codec::cBitBufSize - pTables->m_table_bits)]; \ - result = t & UINT16_MAX; \ - len = t >> 16; \ - } else { \ - len = pTables->m_decode_start_code_size; \ - for (;;) { \ - if (k <= pTables->m_max_codes[len - 1]) \ - break; \ - len++; \ - } \ - int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast(bit_buf >> (symbol_codec::cBitBufSize - len)); \ - if (((uint)val_ptr >= model.m_total_syms)) \ - val_ptr = 0; \ - result = pTables->m_sorted_symbol_order[val_ptr]; \ - } \ - bit_buf <<= len; \ - bit_count -= len; \ - uint freq = model.m_sym_freq[result]; \ - freq++; \ - model.m_sym_freq[result] = static_cast(freq); \ - if (freq == UINT16_MAX) \ - model.rescale(); \ - if (--model.m_symbols_until_update == 0) { \ - model.update(); \ - } \ - } +#define CRNLIB_SYMBOL_CODEC_DECODE_ADAPTIVE_HUFFMAN(codec, result, model) \ + { \ + const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; \ + while (bit_count < (symbol_codec::cBitBufSize - 8)) \ + { \ + uint c = 0; \ + if (pDecode_buf_next == codec.m_pDecode_buf_end) \ + { \ + CRNLIB_SYMBOL_CODEC_DECODE_END(codec) \ + codec.decode_need_bytes(); \ + CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) \ + if (pDecode_buf_next < codec.m_pDecode_buf_end) \ + c = *pDecode_buf_next++; \ + } \ + else \ + c = *pDecode_buf_next++; \ + bit_count += 8; \ + bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ + } \ + uint k = static_cast((bit_buf >> (symbol_codec::cBitBufSize - 16)) + 1); \ + uint len; \ + if (k <= pTables->m_table_max_code) \ + { \ + uint32 t = pTables->m_lookup[bit_buf >> (symbol_codec::cBitBufSize - pTables->m_table_bits)]; \ + result = t & UINT16_MAX; \ + len = t >> 16; \ + } \ + else \ + { \ + len = pTables->m_decode_start_code_size; \ + for (;;) \ + { \ + if (k <= pTables->m_max_codes[len - 1]) \ + break; \ + len++; \ + } \ + int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast(bit_buf >> (symbol_codec::cBitBufSize - len)); \ + if (((uint)val_ptr >= model.m_total_syms)) \ + val_ptr = 0; \ + result = pTables->m_sorted_symbol_order[val_ptr]; \ + } \ + bit_buf <<= len; \ + bit_count -= len; \ + uint freq = model.m_sym_freq[result]; \ + freq++; \ + model.m_sym_freq[result] = static_cast(freq); \ + if (freq == UINT16_MAX) \ + model.rescale(); \ + if (--model.m_symbols_until_update == 0) \ + { \ + model.update(); \ + } \ + } #endif #else @@ -491,4 +621,4 @@ class CRN_EXPORT symbol_codec { #define CRNLIB_SYMBOL_CODEC_DECODE_ADAPTIVE_HUFFMAN(codec, result, model) result = codec.decode(model); #endif -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_texture_comp.cpp b/crnlib/crn_texture_comp.cpp index a81b3bc..058d2fd 100644 --- a/crnlib/crn_texture_comp.cpp +++ b/crnlib/crn_texture_comp.cpp @@ -1,5 +1,25 @@ -// File: crn_texture_comp.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "crn_texture_comp.h" @@ -113,7 +133,9 @@ namespace crnlib float cached_bitrates[cNumQualityLevels]; for (int i = 0; i < cNumQualityLevels; i++) + { cached_bitrates[i] = -1.0f; + } float highest_bitrate = 0.0f; @@ -133,7 +155,9 @@ namespace crnlib { int bracket_low = trial_quality; while ((cached_bitrates[bracket_low] < 0) && (bracket_low > cLowestQuality)) + { bracket_low--; + } if (cached_bitrates[bracket_low] < 0) { @@ -336,7 +360,9 @@ namespace crnlib case cCRNMipModeUseSourceOrGenerateMips: { if (work_tex.get_num_levels() == 1) + { generate_new_mips = true; + } break; } case cCRNMipModeUseSourceMips: @@ -461,7 +487,8 @@ namespace crnlib } break; } - case cCRNSMNextPow2: { + case cCRNSMNextPow2: + { if (!is_pow2) { math::compute_upper_pow2_dim(new_width, new_height); @@ -589,4 +616,4 @@ namespace crnlib return create_compressed_texture(new_params, comp_data, pActual_quality_level, pActual_bitrate); } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_texture_comp.h b/crnlib/crn_texture_comp.h index 867b1d4..77f15ae 100644 --- a/crnlib/crn_texture_comp.h +++ b/crnlib/crn_texture_comp.h @@ -1,5 +1,25 @@ -// File: crn_texture_comp.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -13,6 +33,7 @@ namespace crnlib class itexture_comp { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(itexture_comp); + public: itexture_comp() { @@ -32,11 +53,10 @@ namespace crnlib }; CRN_EXPORT bool create_compressed_texture(const crn_comp_params& params, crnlib::vector& comp_data, - uint32* pActual_quality_level, float* pActual_bitrate); + uint32* pActual_quality_level, float* pActual_bitrate); CRN_EXPORT bool create_texture_mipmaps(mipmapped_texture& work_tex, const crn_comp_params& params, - const crn_mipmap_params& mipmap_params, bool generate_mipmaps); + const crn_mipmap_params& mipmap_params, bool generate_mipmaps); CRN_EXPORT bool create_compressed_texture(const crn_comp_params& params, const crn_mipmap_params& mipmap_params, - crnlib::vector& comp_data, uint32* pActual_quality_level, - float* pActual_bitrate); - -} // namespace crnlib + crnlib::vector& comp_data, uint32* pActual_quality_level, + float* pActual_bitrate); +} // namespace crnlib diff --git a/crnlib/crn_texture_conversion.cpp b/crnlib/crn_texture_conversion.cpp index 11d6431..a9224e5 100644 --- a/crnlib/crn_texture_conversion.cpp +++ b/crnlib/crn_texture_conversion.cpp @@ -1,5 +1,26 @@ -// File: crn_texture_conversion.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_texture_conversion.h" #include "crn_console.h" @@ -9,656 +30,826 @@ #include "crn_texture_comp.h" #include "crn_strutils.h" -namespace crnlib { -namespace texture_conversion { -struct progress_params { - convert_params* m_pParams; - uint m_start_percentage; - bool m_canceled; -}; - -convert_stats::convert_stats() { - clear(); -} - -bool convert_stats::init( - const char* pSrc_filename, - const char* pDst_filename, - mipmapped_texture& src_tex, - texture_file_types::format dst_file_type, - bool lzma_stats) { - m_src_filename = pSrc_filename; - m_dst_filename = pDst_filename; - m_dst_file_type = dst_file_type; - - m_pInput_tex = &src_tex; - - file_utils::get_file_size(pSrc_filename, m_input_file_size); - file_utils::get_file_size(pDst_filename, m_output_file_size); - - m_total_input_pixels = 0; - for (uint i = 0; i < src_tex.get_num_levels(); i++) { - uint width = math::maximum(1, src_tex.get_width() >> i); - uint height = math::maximum(1, src_tex.get_height() >> i); - m_total_input_pixels += width * height * src_tex.get_num_faces(); - } - - m_output_comp_file_size = 0; - - m_total_output_pixels = 0; - - if (lzma_stats) { - vector dst_tex_bytes; - if (!cfile_stream::read_file_into_array(pDst_filename, dst_tex_bytes)) { - console::error("Failed loading output file: %s", pDst_filename); - return false; - } - if (!dst_tex_bytes.size()) { - console::error("Output file is empty: %s", pDst_filename); - return false; - } - vector cmp_tex_bytes; - lzma_codec lossless_codec; - if (lossless_codec.pack(dst_tex_bytes.get_ptr(), dst_tex_bytes.size(), cmp_tex_bytes)) { - m_output_comp_file_size = cmp_tex_bytes.size(); - } - } - - if (!m_output_tex.read_from_file(pDst_filename, m_dst_file_type)) { - console::error("Failed loading output file: %s", pDst_filename); - return false; - } - - for (uint i = 0; i < m_output_tex.get_num_levels(); i++) { - uint width = math::maximum(1, m_output_tex.get_width() >> i); - uint height = math::maximum(1, m_output_tex.get_height() >> i); - m_total_output_pixels += width * height * m_output_tex.get_num_faces(); - } - CRNLIB_ASSERT(m_total_output_pixels == m_output_tex.get_total_pixels_in_all_faces_and_mips()); - - return true; -} - -bool convert_stats::print(bool psnr_metrics, bool mip_stats, bool grayscale_sampling, const char* pCSVStatsFile) const { - if (!m_pInput_tex) - return false; - - console::info("Input texture: %ux%u, Levels: %u, Faces: %u, Format: %s", +namespace crnlib +{ + namespace texture_conversion + { + struct progress_params + { + convert_params* m_pParams; + uint m_start_percentage; + bool m_canceled; + }; + + convert_stats::convert_stats() + { + clear(); + } + + bool convert_stats::init(const char* pSrc_filename, const char* pDst_filename, mipmapped_texture& src_tex, + texture_file_types::format dst_file_type, bool lzma_stats) + { + m_src_filename = pSrc_filename; + m_dst_filename = pDst_filename; + m_dst_file_type = dst_file_type; + + m_pInput_tex = &src_tex; + + file_utils::get_file_size(pSrc_filename, m_input_file_size); + file_utils::get_file_size(pDst_filename, m_output_file_size); + + m_total_input_pixels = 0; + for (uint i = 0; i < src_tex.get_num_levels(); i++) + { + uint width = math::maximum(1, src_tex.get_width() >> i); + uint height = math::maximum(1, src_tex.get_height() >> i); + m_total_input_pixels += width * height * src_tex.get_num_faces(); + } + + m_output_comp_file_size = 0; + + m_total_output_pixels = 0; + + if (lzma_stats) + { + vector dst_tex_bytes; + if (!cfile_stream::read_file_into_array(pDst_filename, dst_tex_bytes)) + { + console::error("Failed loading output file: %s", pDst_filename); + return false; + } + if (!dst_tex_bytes.size()) + { + console::error("Output file is empty: %s", pDst_filename); + return false; + } + vector cmp_tex_bytes; + lzma_codec lossless_codec; + if (lossless_codec.pack(dst_tex_bytes.get_ptr(), dst_tex_bytes.size(), cmp_tex_bytes)) + { + m_output_comp_file_size = cmp_tex_bytes.size(); + } + } + + if (!m_output_tex.read_from_file(pDst_filename, m_dst_file_type)) + { + console::error("Failed loading output file: %s", pDst_filename); + return false; + } + + for (uint i = 0; i < m_output_tex.get_num_levels(); i++) + { + uint width = math::maximum(1, m_output_tex.get_width() >> i); + uint height = math::maximum(1, m_output_tex.get_height() >> i); + m_total_output_pixels += width * height * m_output_tex.get_num_faces(); + } + CRNLIB_ASSERT(m_total_output_pixels == m_output_tex.get_total_pixels_in_all_faces_and_mips()); + + return true; + } + + bool convert_stats::print(bool psnr_metrics, bool mip_stats, bool grayscale_sampling, const char* pCSVStatsFile) const + { + if (!m_pInput_tex) + { + return false; + } + + console::info("Input texture: %ux%u, Levels: %u, Faces: %u, Format: %s", m_pInput_tex->get_width(), m_pInput_tex->get_height(), m_pInput_tex->get_num_levels(), m_pInput_tex->get_num_faces(), pixel_format_helpers::get_pixel_format_string(m_pInput_tex->get_format())); - // Just casting the uint64's filesizes to uint32 here to work around gcc issues - it's not even possible to have files that large anyway. - console::info("Input pixels: %u, Input file size: %u, Input bits/pixel: %1.3f", + // Just casting the uint64's filesizes to uint32 here to work around gcc issues - it's not even possible to have files that large anyway. + console::info("Input pixels: %u, Input file size: %u, Input bits/pixel: %1.3f", m_total_input_pixels, (uint32)m_input_file_size, (m_input_file_size * 8.0f) / m_total_input_pixels); - console::info("Output texture: %ux%u, Levels: %u, Faces: %u, Format: %s", + console::info("Output texture: %ux%u, Levels: %u, Faces: %u, Format: %s", m_output_tex.get_width(), m_output_tex.get_height(), m_output_tex.get_num_levels(), m_output_tex.get_num_faces(), pixel_format_helpers::get_pixel_format_string(m_output_tex.get_format())); - console::info("Output pixels: %u, Output file size: %u, Output bits/pixel: %1.3f", + console::info("Output pixels: %u, Output file size: %u, Output bits/pixel: %1.3f", m_total_output_pixels, (uint32)m_output_file_size, (m_output_file_size * 8.0f) / m_total_output_pixels); - if (m_output_comp_file_size) { - console::info("LZMA compressed output file size: %u bytes, %1.3f bits/pixel", - (uint32)m_output_comp_file_size, (m_output_comp_file_size * 8.0f) / m_total_output_pixels); - } - if (psnr_metrics) { - if ((m_pInput_tex->get_width() != m_output_tex.get_width()) || (m_pInput_tex->get_height() != m_output_tex.get_height()) || (m_pInput_tex->get_num_faces() != m_output_tex.get_num_faces())) { - console::warning("Unable to compute image statistics - input/output texture dimensions are different."); - } else { - uint num_faces = math::minimum(m_pInput_tex->get_num_faces(), m_output_tex.get_num_faces()); - uint num_levels = math::minimum(m_pInput_tex->get_num_levels(), m_output_tex.get_num_levels()); - - if (!mip_stats) - num_levels = 1; - - for (uint face = 0; face < num_faces; face++) { - for (uint level = 0; level < num_levels; level++) { - image_u8 a, b; - image_u8* pA = m_pInput_tex->get_level_image(face, level, a); - image_u8* pB = m_output_tex.get_level_image(face, level, b); - - if (pA && pB) { - image_u8 grayscale_a, grayscale_b; - if (grayscale_sampling) { - grayscale_a = *pA; - grayscale_a.convert_to_grayscale(); - pA = &grayscale_a; - - grayscale_b = *pB; - grayscale_b.convert_to_grayscale(); - pB = &grayscale_b; - } - - console::info("Face %u Mipmap level %u statistics:", face, level); - image_utils::print_image_metrics(*pA, *pB); - - if ((pA->has_rgb()) || (pB->has_rgb())) - image_utils::print_ssim(*pA, *pB); - } + if (m_output_comp_file_size) + { + console::info("LZMA compressed output file size: %u bytes, %1.3f bits/pixel", + (uint32)m_output_comp_file_size, (m_output_comp_file_size * 8.0f) / m_total_output_pixels); + } + if (psnr_metrics) + { + if ((m_pInput_tex->get_width() != m_output_tex.get_width()) || (m_pInput_tex->get_height() != m_output_tex.get_height()) || (m_pInput_tex->get_num_faces() != m_output_tex.get_num_faces())) + { + console::warning("Unable to compute image statistics - input/output texture dimensions are different."); + } + else + { + uint num_faces = math::minimum(m_pInput_tex->get_num_faces(), m_output_tex.get_num_faces()); + uint num_levels = math::minimum(m_pInput_tex->get_num_levels(), m_output_tex.get_num_levels()); + + if (!mip_stats) + { + num_levels = 1; + } + + for (uint face = 0; face < num_faces; face++) + { + for (uint level = 0; level < num_levels; level++) + { + image_u8 a, b; + image_u8* pA = m_pInput_tex->get_level_image(face, level, a); + image_u8* pB = m_output_tex.get_level_image(face, level, b); + + if (pA && pB) + { + image_u8 grayscale_a, grayscale_b; + if (grayscale_sampling) + { + grayscale_a = *pA; + grayscale_a.convert_to_grayscale(); + pA = &grayscale_a; + + grayscale_b = *pB; + grayscale_b.convert_to_grayscale(); + pB = &grayscale_b; + } + + console::info("Face %u Mipmap level %u statistics:", face, level); + image_utils::print_image_metrics(*pA, *pB); + + if ((pA->has_rgb()) || (pB->has_rgb())) + { + image_utils::print_ssim(*pA, *pB); + } + } + } + } + + if (pCSVStatsFile) + { + // FIXME: This is kind of a hack, and should be combined with the code above. + image_u8 a, b; + image_u8* pA = m_pInput_tex->get_level_image(0, 0, a); + image_u8* pB = m_output_tex.get_level_image(0, 0, b); + if (pA && pB) + { + image_u8 grayscale_a, grayscale_b; + if (grayscale_sampling) + { + grayscale_a = *pA; + grayscale_a.convert_to_grayscale(); + pA = &grayscale_a; + + grayscale_b = *pB; + grayscale_b.convert_to_grayscale(); + pB = &grayscale_b; + } + + image_utils::error_metrics rgb_error; + image_utils::error_metrics luma_error; + if (rgb_error.compute(*pA, *pB, 0, 3, false) && luma_error.compute(*pA, *pB, 0, 0, true)) + { + bool bCSVStatsFileExists = file_utils::does_file_exist(pCSVStatsFile); + FILE* pFile; + crn_fopen(&pFile, pCSVStatsFile, "a"); + if (!pFile) + { + console::warning("Unable to append to CSV stats file: %s\n", pCSVStatsFile); + } + else + { + if (!bCSVStatsFileExists) + { + fprintf(pFile, "name,width,height,miplevels,rgb_rms,luma_rms,effective_output_size,effective_bitrate\n"); + } + dynamic_string filename; + file_utils::split_path(m_src_filename.get_ptr(), nullptr, nullptr, &filename, nullptr); + + uint64 effective_output_size = m_output_comp_file_size ? m_output_comp_file_size : m_output_file_size; + float bitrate = (effective_output_size * 8.0f) / m_total_output_pixels; + fprintf(pFile, "%s,%u,%u,%u,%f,%f,%u,%f\n", + filename.get_ptr(), + pB->get_width(), pB->get_height(), m_output_tex.get_num_levels(), + rgb_error.mRootMeanSquared, luma_error.mRootMeanSquared, + (uint32)effective_output_size, bitrate); + fclose(pFile); + } + } + } + } + } + } + + return true; + } + + void convert_stats::clear() + { + m_src_filename.clear(); + m_dst_filename.clear(); + m_dst_file_type = texture_file_types::cFormatInvalid; + + m_pInput_tex = nullptr; + m_output_tex.clear(); + + m_input_file_size = 0; + m_total_input_pixels = 0; + + m_output_file_size = 0; + m_total_output_pixels = 0; + + m_output_comp_file_size = 0; + } + + //----------------------------------------------------------------------- + + static crn_bool crn_progress_callback(crn_uint32 phase_index, crn_uint32 total_phases, crn_uint32 subphase_index, crn_uint32 total_subphases, void* pUser_data_ptr) + { + progress_params& params = *static_cast(pUser_data_ptr); + + if (params.m_canceled) + { + return false; + } + if (!params.m_pParams->m_pProgress_func) + { + return true; + } + + int percentage_complete = params.m_start_percentage + (int)(.5f + (phase_index + float(subphase_index) / total_subphases) * (100.0f - params.m_start_percentage) / total_phases); + + percentage_complete = math::clamp(percentage_complete, 0, 100); + + if (!params.m_pParams->m_pProgress_func(percentage_complete, params.m_pParams->m_pProgress_user_data)) + { + params.m_canceled = true; + return false; + } + + return true; + } + + static bool dxt_progress_callback_func(uint percentage_complete, void* pUser_data_ptr) + { + progress_params& params = *static_cast(pUser_data_ptr); + + if (params.m_canceled) + { + return false; + } + + if (!params.m_pParams->m_pProgress_func) + { + return true; + } + + int scaled_percentage_complete = params.m_start_percentage + (percentage_complete * (100 - params.m_start_percentage)) / 100; + + scaled_percentage_complete = math::clamp(scaled_percentage_complete, 0, 100); + + if (!params.m_pParams->m_pProgress_func(scaled_percentage_complete, params.m_pParams->m_pProgress_user_data)) + { + params.m_canceled = true; + return false; + } + + return true; + } + + static bool convert_error(const convert_params& params, const char* pError_msg) + { + params.m_status = false; + params.m_error_message = pError_msg; + + remove(params.m_dst_filename.get_ptr()); + + return false; + } + + static pixel_format choose_pixel_format(convert_params& params, const crn_comp_params& comp_params, const mipmapped_texture& src_tex, texture_type tex_type) + { + const pixel_format src_fmt = src_tex.get_format(); + const texture_file_types::format src_file_type = src_tex.get_source_file_type(); + const bool is_normal_map = (tex_type == cTextureTypeNormalMap); + + if (params.m_always_use_source_pixel_format) + { + return src_fmt; + } + + // Attempt to choose a reasonable/sane output pixel format. + if (params.m_dst_file_type == texture_file_types::cFormatCRN) + { + if (is_normal_map) + { + if (pixel_format_helpers::is_dxt(src_fmt)) + { + return src_fmt; + } + else + { + return PIXEL_FMT_DXT5_AGBR; + } + } + } + else if (params.m_dst_file_type == texture_file_types::cFormatKTX) + { + if ((src_file_type != texture_file_types::cFormatCRN) && (src_file_type != texture_file_types::cFormatKTX) && (src_file_type != texture_file_types::cFormatDDS)) + { + if (is_normal_map) + { + return pixel_format_helpers::has_alpha(src_fmt) ? PIXEL_FMT_A8R8G8B8 : PIXEL_FMT_R8G8B8; + } + else if (pixel_format_helpers::is_grayscale(src_fmt)) + { + if (pixel_format_helpers::has_alpha(src_fmt)) + { + return PIXEL_FMT_ETC2A; + } + else + { + return PIXEL_FMT_ETC1; + } + } + else if (pixel_format_helpers::has_alpha(src_fmt)) + { + return PIXEL_FMT_ETC2A; + } + else + { + return PIXEL_FMT_ETC1; + } + } + } + else if (params.m_dst_file_type == texture_file_types::cFormatDDS) + { + if ((src_file_type != texture_file_types::cFormatCRN) && (src_file_type != texture_file_types::cFormatKTX) && (src_file_type != texture_file_types::cFormatDDS)) + { + if (is_normal_map) + { + return PIXEL_FMT_DXT5_AGBR; + } + else if (pixel_format_helpers::is_grayscale(src_fmt)) + { + if (pixel_format_helpers::has_alpha(src_fmt)) + { + return comp_params.get_flag(cCRNCompFlagDXT1AForTransparency) ? PIXEL_FMT_DXT1A : PIXEL_FMT_DXT5; + } + else + { + return PIXEL_FMT_DXT1; + } + } + else if (pixel_format_helpers::has_alpha(src_fmt)) + { + return comp_params.get_flag(cCRNCompFlagDXT1AForTransparency) ? PIXEL_FMT_DXT1A : PIXEL_FMT_DXT5; + } + else + { + return PIXEL_FMT_DXT1; + } + } + } + else + { + // Destination is a regular image format. + if (pixel_format_helpers::is_grayscale(src_fmt)) + { + if (pixel_format_helpers::has_alpha(src_fmt)) + { + return PIXEL_FMT_A8L8; + } + else + { + return PIXEL_FMT_L8; + } + } + else if (pixel_format_helpers::has_alpha(src_fmt)) + { + return PIXEL_FMT_A8R8G8B8; + } + else + { + return PIXEL_FMT_R8G8B8; + } + } + + return src_fmt; + } + + static void print_comp_params(const crn_comp_params& comp_params) + { + console::debug("\nTexture conversion compression parameters:"); + console::debug(" Desired bitrate: %3.3f", comp_params.m_target_bitrate); + console::debug(" CRN Quality: %i", comp_params.m_quality_level); + console::debug("CRN C endpoints/selectors: %u %u", comp_params.m_crn_color_endpoint_palette_size, comp_params.m_crn_color_selector_palette_size); + console::debug("CRN A endpoints/selectors: %u %u", comp_params.m_crn_alpha_endpoint_palette_size, comp_params.m_crn_alpha_selector_palette_size); + console::debug(" DXT both block types: %u, Alpha threshold: %u", comp_params.get_flag(cCRNCompFlagUseBothBlockTypes), comp_params.m_dxt1a_alpha_threshold); + console::debug(" DXT compression quality: %s", crn_get_dxt_quality_string(comp_params.m_dxt_quality)); + console::debug(" Perceptual: %u, Large Blocks: %u", comp_params.get_flag(cCRNCompFlagPerceptual), comp_params.get_flag(cCRNCompFlagHierarchical)); + console::debug(" Compressor: %s", get_dxt_compressor_name(comp_params.m_dxt_compressor_type)); + console::debug(" Disable endpoint caching: %u", comp_params.get_flag(cCRNCompFlagDisableEndpointCaching)); + console::debug(" Grayscale sampling: %u", comp_params.get_flag(cCRNCompFlagGrayscaleSampling)); + console::debug(" Max helper threads: %u", comp_params.m_num_helper_threads); + console::debug(""); + } + + static void print_mipmap_params(const crn_mipmap_params& mipmap_params) + { + console::debug("\nTexture conversion MIP-map parameters:"); + console::debug(" Mode: %s", crn_get_mip_mode_name(mipmap_params.m_mode)); + console::debug(" Filter: %s", crn_get_mip_filter_name(mipmap_params.m_filter)); + console::debug("Gamma filtering: %u, Gamma: %2.2f", mipmap_params.m_gamma_filtering, mipmap_params.m_gamma); + console::debug(" Blurriness: %2.2f", mipmap_params.m_blurriness); + console::debug(" Renormalize: %u", mipmap_params.m_renormalize); + console::debug("Renorm. top mip: %u", mipmap_params.m_rtopmip); + console::debug(" Tiled: %u", mipmap_params.m_tiled); + console::debug(" Max Levels: %u", mipmap_params.m_max_levels); + console::debug(" Min level size: %u", mipmap_params.m_min_mip_size); + console::debug(" window: %u %u %u %u", mipmap_params.m_window_left, mipmap_params.m_window_top, mipmap_params.m_window_right, mipmap_params.m_window_bottom); + console::debug(" scale mode: %s", crn_get_scale_mode_desc(mipmap_params.m_scale_mode)); + console::debug(" scale: %f %f", mipmap_params.m_scale_x, mipmap_params.m_scale_y); + console::debug(" clamp: %u %u, clamp_scale: %u", mipmap_params.m_clamp_width, mipmap_params.m_clamp_height, mipmap_params.m_clamp_scale); + console::debug(""); + } + + void convert_params::print() + { + console::debug("\nTexture conversion parameters:"); + console::debug(" Resolution: %ux%u, Faces: %u, Levels: %u, Format: %s, X Flipped: %u, Y Flipped: %u", + m_pInput_texture->get_width(), + m_pInput_texture->get_height(), + m_pInput_texture->get_num_faces(), + m_pInput_texture->get_num_levels(), + pixel_format_helpers::get_pixel_format_string(m_pInput_texture->get_format()), + m_pInput_texture->is_x_flipped(), + m_pInput_texture->is_y_flipped()); + + console::debug(" texture_type: %s", get_texture_type_desc(m_texture_type)); + console::debug(" dst_filename: %s", m_dst_filename.get_ptr()); + console::debug(" dst_file_type: %s", texture_file_types::get_extension(m_dst_file_type)); + console::debug(" dst_format: %s", pixel_format_helpers::get_pixel_format_string(m_dst_format)); + console::debug(" quick: %u", m_quick); + console::debug(" use_source_format: %u", m_always_use_source_pixel_format); + console::debug(" Y Flip: %u", m_y_flip); + console::debug(" Unflip: %u", m_unflip); + } + + static bool write_compressed_texture( + mipmapped_texture& work_tex, convert_params& params, crn_comp_params& comp_params, pixel_format dst_format, progress_params& progress_state, bool perceptual, convert_stats& stats) + { + comp_params.m_file_type = (params.m_dst_file_type == texture_file_types::cFormatCRN) ? cCRNFileTypeCRN : cCRNFileTypeDDS; + + comp_params.m_pProgress_func = crn_progress_callback; + comp_params.m_pProgress_func_data = &progress_state; + comp_params.set_flag(cCRNCompFlagPerceptual, perceptual); + + crn_format crn_fmt = pixel_format_helpers::convert_pixel_format_to_best_crn_format(dst_format); + comp_params.m_format = crn_fmt; + + console::message("Writing %s texture to file: \"%s\"", crn_get_format_string(crn_fmt), params.m_dst_filename.get_ptr()); + + uint32 actual_quality_level; + float actual_bitrate; + bool status = work_tex.write_to_file(params.m_dst_filename.get_ptr(), params.m_dst_file_type, &comp_params, &actual_quality_level, &actual_bitrate); + if (!status) + { + return convert_error(params, "Failed writing output file!"); + } + + if (!params.m_no_stats) + { + if (!stats.init(params.m_pInput_texture->get_source_filename().get_ptr(), params.m_dst_filename.get_ptr(), *params.m_pIntermediate_texture, params.m_dst_file_type, params.m_lzma_stats)) + { + console::warning("Unable to compute output statistics for file: %s", params.m_pInput_texture->get_source_filename().get_ptr()); + } + } + + return true; } - } - - if (pCSVStatsFile) { - // FIXME: This is kind of a hack, and should be combined with the code above. - image_u8 a, b; - image_u8* pA = m_pInput_tex->get_level_image(0, 0, a); - image_u8* pB = m_output_tex.get_level_image(0, 0, b); - if (pA && pB) { - image_u8 grayscale_a, grayscale_b; - if (grayscale_sampling) { - grayscale_a = *pA; - grayscale_a.convert_to_grayscale(); - pA = &grayscale_a; - - grayscale_b = *pB; - grayscale_b.convert_to_grayscale(); - pB = &grayscale_b; - } - - image_utils::error_metrics rgb_error; - image_utils::error_metrics luma_error; - if (rgb_error.compute(*pA, *pB, 0, 3, false) && luma_error.compute(*pA, *pB, 0, 0, true)) { - bool bCSVStatsFileExists = file_utils::does_file_exist(pCSVStatsFile); - FILE* pFile; - crn_fopen(&pFile, pCSVStatsFile, "a"); - if (!pFile) - console::warning("Unable to append to CSV stats file: %s\n", pCSVStatsFile); - else { - if (!bCSVStatsFileExists) - fprintf(pFile, "name,width,height,miplevels,rgb_rms,luma_rms,effective_output_size,effective_bitrate\n"); - dynamic_string filename; - file_utils::split_path(m_src_filename.get_ptr(), nullptr, nullptr, &filename, nullptr); - - uint64 effective_output_size = m_output_comp_file_size ? m_output_comp_file_size : m_output_file_size; - float bitrate = (effective_output_size * 8.0f) / m_total_output_pixels; - fprintf(pFile, "%s,%u,%u,%u,%f,%f,%u,%f\n", - filename.get_ptr(), - pB->get_width(), pB->get_height(), m_output_tex.get_num_levels(), - rgb_error.mRootMeanSquared, luma_error.mRootMeanSquared, - (uint32)effective_output_size, bitrate); - fclose(pFile); - } - } + + static bool convert_and_write_normal_texture(mipmapped_texture& work_tex, convert_params& params, const crn_comp_params& comp_params, pixel_format dst_format, progress_params& progress_state, bool formats_differ, bool perceptual, convert_stats& stats) + { + if (formats_differ) + { + dxt_image::pack_params pack_params; + + pack_params.m_perceptual = perceptual; + pack_params.m_compressor = comp_params.m_dxt_compressor_type; + pack_params.m_pProgress_callback = dxt_progress_callback_func; + pack_params.m_pProgress_callback_user_data_ptr = &progress_state; + pack_params.m_dxt1a_alpha_threshold = comp_params.m_dxt1a_alpha_threshold; + pack_params.m_quality = comp_params.m_dxt_quality; + pack_params.m_endpoint_caching = !comp_params.get_flag(cCRNCompFlagDisableEndpointCaching); + pack_params.m_grayscale_sampling = comp_params.get_flag(cCRNCompFlagGrayscaleSampling); + if ((!comp_params.get_flag(cCRNCompFlagUseBothBlockTypes)) && (!comp_params.get_flag(cCRNCompFlagDXT1AForTransparency))) + { + pack_params.m_use_both_block_types = false; + } + + pack_params.m_num_helper_threads = comp_params.m_num_helper_threads; + pack_params.m_use_transparent_indices_for_black = comp_params.get_flag(cCRNCompFlagUseTransparentIndicesForBlack); + + console::info("Converting texture format from %s to %s", pixel_format_helpers::get_pixel_format_string(work_tex.get_format()), pixel_format_helpers::get_pixel_format_string(dst_format)); + + timer tm; + tm.start(); + + bool status = work_tex.convert(dst_format, pack_params); + + double t = tm.get_elapsed_secs(); + + console::info(""); + + if (!status) + { + if (progress_state.m_canceled) + { + params.m_canceled = true; + return false; + } + else + { + return convert_error(params, "Failed converting texture to output format!"); + } + } + + console::info("Texture format conversion took %3.3fs", t); + } + + if (params.m_write_mipmaps_to_multiple_files) + { + for (uint f = 0; f < work_tex.get_num_faces(); f++) + { + for (uint l = 0; l < work_tex.get_num_levels(); l++) + { + dynamic_string filename(params.m_dst_filename.get_ptr()); + + dynamic_string drv, dir, fn, ext; + if (!file_utils::split_path(params.m_dst_filename.get_ptr(), &drv, &dir, &fn, &ext)) + { + return false; + } + + fn += dynamic_string(cVarArg, "_face%u_mip%u", f, l).get_ptr(); + filename = drv + dir + fn + ext; + + mip_level* pLevel = work_tex.get_level(f, l); + + face_vec face(1); + face[0].push_back(crnlib_new(*pLevel)); + + mipmapped_texture new_tex; + new_tex.assign(face); + + console::info("Writing texture face %u mip level %u to file %s", f, l, filename.get_ptr()); + + if (!new_tex.write_to_file(filename.get_ptr(), params.m_dst_file_type, nullptr, nullptr, nullptr)) + { + return convert_error(params, "Failed writing output file!"); + } + } + } + } + else + { + console::message("Writing texture to file: \"%s\"", params.m_dst_filename.get_ptr()); + + if (!work_tex.write_to_file(params.m_dst_filename.get_ptr(), params.m_dst_file_type, nullptr, nullptr, nullptr)) + { + return convert_error(params, "Failed writing output file!"); + } + + if (!params.m_no_stats) + { + if (!stats.init(params.m_pInput_texture->get_source_filename().get_ptr(), params.m_dst_filename.get_ptr(), *params.m_pIntermediate_texture, params.m_dst_file_type, params.m_lzma_stats)) + { + console::warning("Unable to compute output statistics for file: %s", params.m_pInput_texture->get_source_filename().get_ptr()); + } + } + } + + return true; } - } - } - } - return true; -} + bool process(convert_params& params, convert_stats& stats) + { + texture_type tex_type = params.m_texture_type; + + crn_comp_params comp_params(params.m_comp_params); + crn_mipmap_params mipmap_params(params.m_mipmap_params); + + progress_params progress_state; + progress_state.m_pParams = ¶ms; + progress_state.m_canceled = false; + progress_state.m_start_percentage = 0; + + params.m_status = false; + params.m_error_message.clear(); + + if (params.m_pIntermediate_texture) + { + crnlib_delete(params.m_pIntermediate_texture); + params.m_pIntermediate_texture = nullptr; + } + + params.m_pIntermediate_texture = crnlib_new(*params.m_pInput_texture); + + mipmapped_texture& work_tex = *params.m_pInput_texture; + + if ((params.m_unflip) && (work_tex.is_flipped())) + { + console::info("Unflipping texture"); + work_tex.unflip(true, true); + } + + if (params.m_y_flip) + { + console::info("Flipping texture on Y axis"); + + // This is awkward - if we're writing to KTX, then go ahead and properly update the work texture's orientation flags. + // Otherwise, don't bother updating the orientation flags because the writer may then attempt to unflip the texture before writing to formats + // that don't support flipped textures (ugh). + const bool bOutputFormatSupportsFlippedTextures = params.m_dst_file_type == texture_file_types::cFormatKTX; + if (!work_tex.flip_y(bOutputFormatSupportsFlippedTextures)) + { + console::warning("Failed flipping texture on Y axis"); + } + } + + if ((params.m_dst_format != PIXEL_FMT_INVALID) && (pixel_format_helpers::is_alpha_only(params.m_dst_format))) + { + if ((work_tex.get_comp_flags() & pixel_format_helpers::cCompFlagAValid) == 0) + { + console::warning("Output format is alpha-only, but input doesn't have alpha, so setting alpha to luminance."); + + work_tex.convert(PIXEL_FMT_A8, crnlib::dxt_image::pack_params()); + + if (tex_type == cTextureTypeNormalMap) + { + tex_type = cTextureTypeRegularMap; + } + } + } + + pixel_format dst_format = params.m_dst_format; + if (pixel_format_helpers::is_dxt(dst_format)) + { + if ((params.m_dst_file_type != texture_file_types::cFormatCRN) && + (params.m_dst_file_type != texture_file_types::cFormatDDS) && + (params.m_dst_file_type != texture_file_types::cFormatKTX)) + { + console::warning("Output file format does not support DXTc - automatically choosing a non-DXT pixel format."); + dst_format = PIXEL_FMT_INVALID; + } + } + + if (dst_format == PIXEL_FMT_INVALID) + { + // Caller didn't specify a format to use, so try to pick something reasonable. + // This is actually much trickier than it seems, and the current approach kind of sucks. + dst_format = choose_pixel_format(params, comp_params, work_tex, tex_type); + } + + if ((dst_format == PIXEL_FMT_DXT1) && (comp_params.get_flag(cCRNCompFlagDXT1AForTransparency))) + { + dst_format = PIXEL_FMT_DXT1A; + } + else if (dst_format == PIXEL_FMT_DXT1A) + { + comp_params.set_flag(cCRNCompFlagDXT1AForTransparency, true); + } + + if ((dst_format == PIXEL_FMT_DXT1A) && (params.m_dst_file_type == texture_file_types::cFormatCRN)) + { + console::warning("CRN file format does not support DXT1A compressed textures - converting to DXT5 instead."); + dst_format = PIXEL_FMT_DXT5; + } + + const bool is_normal_map = (tex_type == cTextureTypeNormalMap); + bool perceptual = comp_params.get_flag(cCRNCompFlagPerceptual); + if (is_normal_map) + { + perceptual = false; + mipmap_params.m_gamma_filtering = false; + } + + if (pixel_format_helpers::is_pixel_format_non_srgb(dst_format)) + { + if (perceptual) + { + console::message("Output pixel format is swizzled or not RGB, disabling perceptual color metrics"); + perceptual = false; + } + } + + if (pixel_format_helpers::is_normal_map(dst_format)) + { + if (perceptual) + { + console::message("Output pixel format is intended for normal maps, disabling perceptual color metrics"); + } + + perceptual = false; + } + + bool generate_mipmaps = texture_file_types::supports_mipmaps(params.m_dst_file_type); + if ((params.m_write_mipmaps_to_multiple_files) && + ((params.m_dst_file_type != texture_file_types::cFormatCRN) && (params.m_dst_file_type != texture_file_types::cFormatDDS) && (params.m_dst_file_type != texture_file_types::cFormatKTX))) + { + generate_mipmaps = true; + } + + if (params.m_param_debugging) + { + params.print(); + + print_comp_params(comp_params); + print_mipmap_params(mipmap_params); + } + + if (!create_texture_mipmaps(work_tex, comp_params, mipmap_params, generate_mipmaps)) + { + return convert_error(params, "Failed creating texture mipmaps!"); + } -void convert_stats::clear() { - m_src_filename.clear(); - m_dst_filename.clear(); - m_dst_file_type = texture_file_types::cFormatInvalid; - - m_pInput_tex = nullptr; - m_output_tex.clear(); - - m_input_file_size = 0; - m_total_input_pixels = 0; - - m_output_file_size = 0; - m_total_output_pixels = 0; - - m_output_comp_file_size = 0; -} - -//----------------------------------------------------------------------- - -static crn_bool crn_progress_callback(crn_uint32 phase_index, crn_uint32 total_phases, crn_uint32 subphase_index, crn_uint32 total_subphases, void* pUser_data_ptr) { - progress_params& params = *static_cast(pUser_data_ptr); - - if (params.m_canceled) - return false; - if (!params.m_pParams->m_pProgress_func) - return true; - - int percentage_complete = params.m_start_percentage + (int)(.5f + (phase_index + float(subphase_index) / total_subphases) * (100.0f - params.m_start_percentage) / total_phases); - - percentage_complete = math::clamp(percentage_complete, 0, 100); - - if (!params.m_pParams->m_pProgress_func(percentage_complete, params.m_pParams->m_pProgress_user_data)) { - params.m_canceled = true; - return false; - } - - return true; -} - -static bool dxt_progress_callback_func(uint percentage_complete, void* pUser_data_ptr) { - progress_params& params = *static_cast(pUser_data_ptr); - - if (params.m_canceled) - return false; - - if (!params.m_pParams->m_pProgress_func) - return true; - - int scaled_percentage_complete = params.m_start_percentage + (percentage_complete * (100 - params.m_start_percentage)) / 100; - - scaled_percentage_complete = math::clamp(scaled_percentage_complete, 0, 100); - - if (!params.m_pParams->m_pProgress_func(scaled_percentage_complete, params.m_pParams->m_pProgress_user_data)) { - params.m_canceled = true; - return false; - } - - return true; -} - -static bool convert_error(const convert_params& params, const char* pError_msg) { - params.m_status = false; - params.m_error_message = pError_msg; - - remove(params.m_dst_filename.get_ptr()); - - return false; -} - -static pixel_format choose_pixel_format(convert_params& params, const crn_comp_params& comp_params, const mipmapped_texture& src_tex, texture_type tex_type) { - const pixel_format src_fmt = src_tex.get_format(); - const texture_file_types::format src_file_type = src_tex.get_source_file_type(); - const bool is_normal_map = (tex_type == cTextureTypeNormalMap); - - if (params.m_always_use_source_pixel_format) - return src_fmt; - - // Attempt to choose a reasonable/sane output pixel format. - if (params.m_dst_file_type == texture_file_types::cFormatCRN) { - if (is_normal_map) { - if (pixel_format_helpers::is_dxt(src_fmt)) - return src_fmt; - else - return PIXEL_FMT_DXT5_AGBR; - } - } else if (params.m_dst_file_type == texture_file_types::cFormatKTX) { - if ((src_file_type != texture_file_types::cFormatCRN) && (src_file_type != texture_file_types::cFormatKTX) && (src_file_type != texture_file_types::cFormatDDS)) { - if (is_normal_map) { - return pixel_format_helpers::has_alpha(src_fmt) ? PIXEL_FMT_A8R8G8B8 : PIXEL_FMT_R8G8B8; - } else if (pixel_format_helpers::is_grayscale(src_fmt)) { - if (pixel_format_helpers::has_alpha(src_fmt)) - return PIXEL_FMT_ETC2A; - else - return PIXEL_FMT_ETC1; - } else if (pixel_format_helpers::has_alpha(src_fmt)) - return PIXEL_FMT_ETC2A; - else - return PIXEL_FMT_ETC1; - } - } else if (params.m_dst_file_type == texture_file_types::cFormatDDS) { - if ((src_file_type != texture_file_types::cFormatCRN) && (src_file_type != texture_file_types::cFormatKTX) && (src_file_type != texture_file_types::cFormatDDS)) { - if (is_normal_map) { - return PIXEL_FMT_DXT5_AGBR; - } else if (pixel_format_helpers::is_grayscale(src_fmt)) { - if (pixel_format_helpers::has_alpha(src_fmt)) - return comp_params.get_flag(cCRNCompFlagDXT1AForTransparency) ? PIXEL_FMT_DXT1A : PIXEL_FMT_DXT5; - else - return PIXEL_FMT_DXT1; - } else if (pixel_format_helpers::has_alpha(src_fmt)) - return comp_params.get_flag(cCRNCompFlagDXT1AForTransparency) ? PIXEL_FMT_DXT1A : PIXEL_FMT_DXT5; - else - return PIXEL_FMT_DXT1; - } - } else { - // Destination is a regular image format. - if (pixel_format_helpers::is_grayscale(src_fmt)) { - if (pixel_format_helpers::has_alpha(src_fmt)) - return PIXEL_FMT_A8L8; - else - return PIXEL_FMT_L8; - } else if (pixel_format_helpers::has_alpha(src_fmt)) - return PIXEL_FMT_A8R8G8B8; - else - return PIXEL_FMT_R8G8B8; - } - - return src_fmt; -} - -static void print_comp_params(const crn_comp_params& comp_params) { - console::debug("\nTexture conversion compression parameters:"); - console::debug(" Desired bitrate: %3.3f", comp_params.m_target_bitrate); - console::debug(" CRN Quality: %i", comp_params.m_quality_level); - console::debug("CRN C endpoints/selectors: %u %u", comp_params.m_crn_color_endpoint_palette_size, comp_params.m_crn_color_selector_palette_size); - console::debug("CRN A endpoints/selectors: %u %u", comp_params.m_crn_alpha_endpoint_palette_size, comp_params.m_crn_alpha_selector_palette_size); - console::debug(" DXT both block types: %u, Alpha threshold: %u", comp_params.get_flag(cCRNCompFlagUseBothBlockTypes), comp_params.m_dxt1a_alpha_threshold); - console::debug(" DXT compression quality: %s", crn_get_dxt_quality_string(comp_params.m_dxt_quality)); - console::debug(" Perceptual: %u, Large Blocks: %u", comp_params.get_flag(cCRNCompFlagPerceptual), comp_params.get_flag(cCRNCompFlagHierarchical)); - console::debug(" Compressor: %s", get_dxt_compressor_name(comp_params.m_dxt_compressor_type)); - console::debug(" Disable endpoint caching: %u", comp_params.get_flag(cCRNCompFlagDisableEndpointCaching)); - console::debug(" Grayscale sampling: %u", comp_params.get_flag(cCRNCompFlagGrayscaleSampling)); - console::debug(" Max helper threads: %u", comp_params.m_num_helper_threads); - console::debug(""); -} - -static void print_mipmap_params(const crn_mipmap_params& mipmap_params) { - console::debug("\nTexture conversion MIP-map parameters:"); - console::debug(" Mode: %s", crn_get_mip_mode_name(mipmap_params.m_mode)); - console::debug(" Filter: %s", crn_get_mip_filter_name(mipmap_params.m_filter)); - console::debug("Gamma filtering: %u, Gamma: %2.2f", mipmap_params.m_gamma_filtering, mipmap_params.m_gamma); - console::debug(" Blurriness: %2.2f", mipmap_params.m_blurriness); - console::debug(" Renormalize: %u", mipmap_params.m_renormalize); - console::debug("Renorm. top mip: %u", mipmap_params.m_rtopmip); - console::debug(" Tiled: %u", mipmap_params.m_tiled); - console::debug(" Max Levels: %u", mipmap_params.m_max_levels); - console::debug(" Min level size: %u", mipmap_params.m_min_mip_size); - console::debug(" window: %u %u %u %u", mipmap_params.m_window_left, mipmap_params.m_window_top, mipmap_params.m_window_right, mipmap_params.m_window_bottom); - console::debug(" scale mode: %s", crn_get_scale_mode_desc(mipmap_params.m_scale_mode)); - console::debug(" scale: %f %f", mipmap_params.m_scale_x, mipmap_params.m_scale_y); - console::debug(" clamp: %u %u, clamp_scale: %u", mipmap_params.m_clamp_width, mipmap_params.m_clamp_height, mipmap_params.m_clamp_scale); - console::debug(""); -} - -void convert_params::print() { - console::debug("\nTexture conversion parameters:"); - console::debug(" Resolution: %ux%u, Faces: %u, Levels: %u, Format: %s, X Flipped: %u, Y Flipped: %u", - m_pInput_texture->get_width(), - m_pInput_texture->get_height(), - m_pInput_texture->get_num_faces(), - m_pInput_texture->get_num_levels(), - pixel_format_helpers::get_pixel_format_string(m_pInput_texture->get_format()), - m_pInput_texture->is_x_flipped(), - m_pInput_texture->is_y_flipped()); - - console::debug(" texture_type: %s", get_texture_type_desc(m_texture_type)); - console::debug(" dst_filename: %s", m_dst_filename.get_ptr()); - console::debug(" dst_file_type: %s", texture_file_types::get_extension(m_dst_file_type)); - console::debug(" dst_format: %s", pixel_format_helpers::get_pixel_format_string(m_dst_format)); - console::debug(" quick: %u", m_quick); - console::debug(" use_source_format: %u", m_always_use_source_pixel_format); - console::debug(" Y Flip: %u", m_y_flip); - console::debug(" Unflip: %u", m_unflip); -} - -static bool write_compressed_texture( - mipmapped_texture& work_tex, convert_params& params, crn_comp_params& comp_params, pixel_format dst_format, progress_params& progress_state, bool perceptual, convert_stats& stats) { - comp_params.m_file_type = (params.m_dst_file_type == texture_file_types::cFormatCRN) ? cCRNFileTypeCRN : cCRNFileTypeDDS; - - comp_params.m_pProgress_func = crn_progress_callback; - comp_params.m_pProgress_func_data = &progress_state; - comp_params.set_flag(cCRNCompFlagPerceptual, perceptual); - - crn_format crn_fmt = pixel_format_helpers::convert_pixel_format_to_best_crn_format(dst_format); - comp_params.m_format = crn_fmt; - - console::message("Writing %s texture to file: \"%s\"", crn_get_format_string(crn_fmt), params.m_dst_filename.get_ptr()); - - uint32 actual_quality_level; - float actual_bitrate; - bool status = work_tex.write_to_file(params.m_dst_filename.get_ptr(), params.m_dst_file_type, &comp_params, &actual_quality_level, &actual_bitrate); - if (!status) - return convert_error(params, "Failed writing output file!"); - - if (!params.m_no_stats) { - if (!stats.init(params.m_pInput_texture->get_source_filename().get_ptr(), params.m_dst_filename.get_ptr(), *params.m_pIntermediate_texture, params.m_dst_file_type, params.m_lzma_stats)) { - console::warning("Unable to compute output statistics for file: %s", params.m_pInput_texture->get_source_filename().get_ptr()); - } - } - - return true; -} - -static bool convert_and_write_normal_texture(mipmapped_texture& work_tex, convert_params& params, const crn_comp_params& comp_params, pixel_format dst_format, progress_params& progress_state, bool formats_differ, bool perceptual, convert_stats& stats) { - if (formats_differ) { - dxt_image::pack_params pack_params; - - pack_params.m_perceptual = perceptual; - pack_params.m_compressor = comp_params.m_dxt_compressor_type; - pack_params.m_pProgress_callback = dxt_progress_callback_func; - pack_params.m_pProgress_callback_user_data_ptr = &progress_state; - pack_params.m_dxt1a_alpha_threshold = comp_params.m_dxt1a_alpha_threshold; - pack_params.m_quality = comp_params.m_dxt_quality; - pack_params.m_endpoint_caching = !comp_params.get_flag(cCRNCompFlagDisableEndpointCaching); - pack_params.m_grayscale_sampling = comp_params.get_flag(cCRNCompFlagGrayscaleSampling); - if ((!comp_params.get_flag(cCRNCompFlagUseBothBlockTypes)) && (!comp_params.get_flag(cCRNCompFlagDXT1AForTransparency))) - pack_params.m_use_both_block_types = false; - - pack_params.m_num_helper_threads = comp_params.m_num_helper_threads; - pack_params.m_use_transparent_indices_for_black = comp_params.get_flag(cCRNCompFlagUseTransparentIndicesForBlack); - - console::info("Converting texture format from %s to %s", pixel_format_helpers::get_pixel_format_string(work_tex.get_format()), pixel_format_helpers::get_pixel_format_string(dst_format)); - - timer tm; - tm.start(); - - bool status = work_tex.convert(dst_format, pack_params); - - double t = tm.get_elapsed_secs(); - - console::info(""); - - if (!status) { - if (progress_state.m_canceled) { - params.m_canceled = true; - return false; - } else { - return convert_error(params, "Failed converting texture to output format!"); - } - } - - console::info("Texture format conversion took %3.3fs", t); - } - - if (params.m_write_mipmaps_to_multiple_files) { - for (uint f = 0; f < work_tex.get_num_faces(); f++) { - for (uint l = 0; l < work_tex.get_num_levels(); l++) { - dynamic_string filename(params.m_dst_filename.get_ptr()); - - dynamic_string drv, dir, fn, ext; - if (!file_utils::split_path(params.m_dst_filename.get_ptr(), &drv, &dir, &fn, &ext)) - return false; - - fn += dynamic_string(cVarArg, "_face%u_mip%u", f, l).get_ptr(); - filename = drv + dir + fn + ext; - - mip_level* pLevel = work_tex.get_level(f, l); - - face_vec face(1); - face[0].push_back(crnlib_new(*pLevel)); - - mipmapped_texture new_tex; - new_tex.assign(face); - - console::info("Writing texture face %u mip level %u to file %s", f, l, filename.get_ptr()); - - if (!new_tex.write_to_file(filename.get_ptr(), params.m_dst_file_type, nullptr, nullptr, nullptr)) - return convert_error(params, "Failed writing output file!"); - } - } - } else { - console::message("Writing texture to file: \"%s\"", params.m_dst_filename.get_ptr()); - - if (!work_tex.write_to_file(params.m_dst_filename.get_ptr(), params.m_dst_file_type, nullptr, nullptr, nullptr)) - return convert_error(params, "Failed writing output file!"); - - if (!params.m_no_stats) { - if (!stats.init(params.m_pInput_texture->get_source_filename().get_ptr(), params.m_dst_filename.get_ptr(), *params.m_pIntermediate_texture, params.m_dst_file_type, params.m_lzma_stats)) { - console::warning("Unable to compute output statistics for file: %s", params.m_pInput_texture->get_source_filename().get_ptr()); - } - } - } - - return true; -} - -bool process(convert_params& params, convert_stats& stats) { - texture_type tex_type = params.m_texture_type; - - crn_comp_params comp_params(params.m_comp_params); - crn_mipmap_params mipmap_params(params.m_mipmap_params); - - progress_params progress_state; - progress_state.m_pParams = ¶ms; - progress_state.m_canceled = false; - progress_state.m_start_percentage = 0; - - params.m_status = false; - params.m_error_message.clear(); - - if (params.m_pIntermediate_texture) { - crnlib_delete(params.m_pIntermediate_texture); - params.m_pIntermediate_texture = nullptr; - } - - params.m_pIntermediate_texture = crnlib_new(*params.m_pInput_texture); - - mipmapped_texture& work_tex = *params.m_pInput_texture; - - if ((params.m_unflip) && (work_tex.is_flipped())) { - console::info("Unflipping texture"); - work_tex.unflip(true, true); - } - - if (params.m_y_flip) { - console::info("Flipping texture on Y axis"); - - // This is awkward - if we're writing to KTX, then go ahead and properly update the work texture's orientation flags. - // Otherwise, don't bother updating the orientation flags because the writer may then attempt to unflip the texture before writing to formats - // that don't support flipped textures (ugh). - const bool bOutputFormatSupportsFlippedTextures = params.m_dst_file_type == texture_file_types::cFormatKTX; - if (!work_tex.flip_y(bOutputFormatSupportsFlippedTextures)) { - console::warning("Failed flipping texture on Y axis"); - } - } - - if ((params.m_dst_format != PIXEL_FMT_INVALID) && (pixel_format_helpers::is_alpha_only(params.m_dst_format))) { - if ((work_tex.get_comp_flags() & pixel_format_helpers::cCompFlagAValid) == 0) { - console::warning("Output format is alpha-only, but input doesn't have alpha, so setting alpha to luminance."); - - work_tex.convert(PIXEL_FMT_A8, crnlib::dxt_image::pack_params()); - - if (tex_type == cTextureTypeNormalMap) - tex_type = cTextureTypeRegularMap; - } - } - - pixel_format dst_format = params.m_dst_format; - if (pixel_format_helpers::is_dxt(dst_format)) { - if ((params.m_dst_file_type != texture_file_types::cFormatCRN) && - (params.m_dst_file_type != texture_file_types::cFormatDDS) && - (params.m_dst_file_type != texture_file_types::cFormatKTX)) { - console::warning("Output file format does not support DXTc - automatically choosing a non-DXT pixel format."); - dst_format = PIXEL_FMT_INVALID; - } - } - - if (dst_format == PIXEL_FMT_INVALID) { - // Caller didn't specify a format to use, so try to pick something reasonable. - // This is actually much trickier than it seems, and the current approach kind of sucks. - dst_format = choose_pixel_format(params, comp_params, work_tex, tex_type); - } - - if ((dst_format == PIXEL_FMT_DXT1) && (comp_params.get_flag(cCRNCompFlagDXT1AForTransparency))) - dst_format = PIXEL_FMT_DXT1A; - else if (dst_format == PIXEL_FMT_DXT1A) - comp_params.set_flag(cCRNCompFlagDXT1AForTransparency, true); - - if ((dst_format == PIXEL_FMT_DXT1A) && (params.m_dst_file_type == texture_file_types::cFormatCRN)) { - console::warning("CRN file format does not support DXT1A compressed textures - converting to DXT5 instead."); - dst_format = PIXEL_FMT_DXT5; - } - - const bool is_normal_map = (tex_type == cTextureTypeNormalMap); - bool perceptual = comp_params.get_flag(cCRNCompFlagPerceptual); - if (is_normal_map) { - perceptual = false; - mipmap_params.m_gamma_filtering = false; - } - - if (pixel_format_helpers::is_pixel_format_non_srgb(dst_format)) { - if (perceptual) { - console::message("Output pixel format is swizzled or not RGB, disabling perceptual color metrics"); - perceptual = false; - } - } - - if (pixel_format_helpers::is_normal_map(dst_format)) { - if (perceptual) - console::message("Output pixel format is intended for normal maps, disabling perceptual color metrics"); - - perceptual = false; - } - - bool generate_mipmaps = texture_file_types::supports_mipmaps(params.m_dst_file_type); - if ((params.m_write_mipmaps_to_multiple_files) && - ((params.m_dst_file_type != texture_file_types::cFormatCRN) && (params.m_dst_file_type != texture_file_types::cFormatDDS) && (params.m_dst_file_type != texture_file_types::cFormatKTX))) { - generate_mipmaps = true; - } - - if (params.m_param_debugging) { - params.print(); - - print_comp_params(comp_params); - print_mipmap_params(mipmap_params); - } - - if (!create_texture_mipmaps(work_tex, comp_params, mipmap_params, generate_mipmaps)) - return convert_error(params, "Failed creating texture mipmaps!"); - - bool formats_differ = work_tex.get_format() != dst_format; - if (formats_differ) { - if (pixel_format_helpers::is_dxt1(work_tex.get_format()) && pixel_format_helpers::is_dxt1(dst_format)) - formats_differ = false; - } - - bool status = false; - - timer t; - t.start(); - - if ((params.m_dst_file_type == texture_file_types::cFormatCRN) || - ((params.m_dst_file_type == texture_file_types::cFormatDDS) && (pixel_format_helpers::is_dxt(dst_format)) && - //((formats_differ) || (comp_params.m_target_bitrate > 0.0f) || (comp_params.m_quality_level < cCRNMaxQualityLevel)) - ((comp_params.m_target_bitrate > 0.0f) || (comp_params.m_quality_level < cCRNMaxQualityLevel)))) { - status = write_compressed_texture(work_tex, params, comp_params, dst_format, progress_state, perceptual, stats); - } else { - if ((comp_params.m_target_bitrate > 0.0f) || (comp_params.m_quality_level < cCRNMaxQualityLevel)) { - console::warning("Target bitrate/quality level is not supported for this output file format.\n"); - } - status = convert_and_write_normal_texture(work_tex, params, comp_params, dst_format, progress_state, formats_differ, perceptual, stats); - } - - console::progress(""); - - if (progress_state.m_canceled) { - params.m_canceled = true; - return false; - } - - double total_write_time = t.get_elapsed_secs(); - - if (status) { - if (params.m_param_debugging) - console::info("Work texture format: %s, desired destination format: %s", pixel_format_helpers::get_pixel_format_string(work_tex.get_format()), pixel_format_helpers::get_pixel_format_string(dst_format)); - - console::message("Texture successfully written in %3.3fs", total_write_time); - } else { - dynamic_string str; - - if (work_tex.get_last_error().is_empty()) - str.format("Failed writing texture to file \"%s\"", params.m_dst_filename.get_ptr()); - else - str.format("Failed writing texture to file \"%s\", Reason: %s", params.m_dst_filename.get_ptr(), work_tex.get_last_error().get_ptr()); - - return convert_error(params, str.get_ptr()); - } - - if (params.m_debugging) { - crnlib_print_mem_stats(); - } - - params.m_status = true; - return true; -} - -} // namespace texture_conversion - -} // namespace crnlib + bool formats_differ = work_tex.get_format() != dst_format; + if (formats_differ) + { + if (pixel_format_helpers::is_dxt1(work_tex.get_format()) && pixel_format_helpers::is_dxt1(dst_format)) + { + formats_differ = false; + } + } + + bool status = false; + + timer t; + t.start(); + + if ((params.m_dst_file_type == texture_file_types::cFormatCRN) || + ((params.m_dst_file_type == texture_file_types::cFormatDDS) && (pixel_format_helpers::is_dxt(dst_format)) && + //((formats_differ) || (comp_params.m_target_bitrate > 0.0f) || (comp_params.m_quality_level < cCRNMaxQualityLevel)) + ((comp_params.m_target_bitrate > 0.0f) || (comp_params.m_quality_level < cCRNMaxQualityLevel)))) + { + status = write_compressed_texture(work_tex, params, comp_params, dst_format, progress_state, perceptual, stats); + } + else + { + if ((comp_params.m_target_bitrate > 0.0f) || (comp_params.m_quality_level < cCRNMaxQualityLevel)) + { + console::warning("Target bitrate/quality level is not supported for this output file format.\n"); + } + status = convert_and_write_normal_texture(work_tex, params, comp_params, dst_format, progress_state, formats_differ, perceptual, stats); + } + + console::progress(""); + + if (progress_state.m_canceled) + { + params.m_canceled = true; + return false; + } + + double total_write_time = t.get_elapsed_secs(); + + if (status) + { + if (params.m_param_debugging) + { + console::info("Work texture format: %s, desired destination format: %s", pixel_format_helpers::get_pixel_format_string(work_tex.get_format()), pixel_format_helpers::get_pixel_format_string(dst_format)); + } + + console::message("Texture successfully written in %3.3fs", total_write_time); + } + else + { + dynamic_string str; + + if (work_tex.get_last_error().is_empty()) + { + str.format("Failed writing texture to file \"%s\"", params.m_dst_filename.get_ptr()); + } + else + { + str.format("Failed writing texture to file \"%s\", Reason: %s", params.m_dst_filename.get_ptr(), work_tex.get_last_error().get_ptr()); + } + + return convert_error(params, str.get_ptr()); + } + + if (params.m_debugging) + { + crnlib_print_mem_stats(); + } + + params.m_status = true; + return true; + } + } // namespace texture_conversion +} // namespace crnlib diff --git a/crnlib/crn_texture_conversion.h b/crnlib/crn_texture_conversion.h index 16d82de..c9b39f3 100644 --- a/crnlib/crn_texture_conversion.h +++ b/crnlib/crn_texture_conversion.h @@ -1,110 +1,132 @@ -// File: crn_texture_conversion.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once + #include "crn_dxt_image.h" #include "crn_mipmapped_texture.h" #include "crn_rect.h" #include "crn_lzma_codec.h" #include "crn_export.h" -namespace crnlib { -namespace texture_conversion { -class CRN_EXPORT convert_stats { - public: - convert_stats(); - - bool init( - const char* pSrc_filename, - const char* pDst_filename, - mipmapped_texture& src_tex, - texture_file_types::format dst_file_type, - bool lzma_stats); - - bool print(bool psnr_metrics, bool mip_stats, bool grayscale_sampling, const char* pCSVStatsFile = nullptr) const; - - void clear(); - - dynamic_string m_src_filename; - dynamic_string m_dst_filename; - texture_file_types::format m_dst_file_type; - - mipmapped_texture* m_pInput_tex; - mipmapped_texture m_output_tex; - - uint64 m_input_file_size; - uint m_total_input_pixels; - - uint64 m_output_file_size; - uint m_total_output_pixels; - - uint64 m_output_comp_file_size; -}; - -class CRN_EXPORT convert_params { - public: - convert_params() - : m_pInput_texture(nullptr), - m_texture_type(cTextureTypeUnknown), - m_dst_file_type(texture_file_types::cFormatInvalid), - m_dst_format(PIXEL_FMT_INVALID), - m_pProgress_func(nullptr), - m_pProgress_user_data(nullptr), - m_pIntermediate_texture(nullptr), - m_y_flip(false), - m_unflip(false), - m_always_use_source_pixel_format(false), - m_write_mipmaps_to_multiple_files(false), - m_quick(false), - m_debugging(false), - m_param_debugging(false), - m_no_stats(false), - m_lzma_stats(false), - m_status(false), - m_canceled(false) { - } - - ~convert_params() { - crnlib_delete(m_pIntermediate_texture); - } - - void print(); - - // Input parameters - mipmapped_texture* m_pInput_texture; - - texture_type m_texture_type; - - dynamic_string m_dst_filename; - texture_file_types::format m_dst_file_type; - pixel_format m_dst_format; - - crn_comp_params m_comp_params; - crn_mipmap_params m_mipmap_params; - - typedef bool (*progress_callback_func_ptr)(uint percentage_complete, void* pUser_data_ptr); - progress_callback_func_ptr m_pProgress_func; - void* m_pProgress_user_data; - - // Return parameters - mipmapped_texture* m_pIntermediate_texture; - mutable dynamic_string m_error_message; - - bool m_y_flip; - bool m_unflip; - bool m_always_use_source_pixel_format; - bool m_write_mipmaps_to_multiple_files; - bool m_quick; - bool m_debugging; - bool m_param_debugging; - bool m_no_stats; - - bool m_lzma_stats; - mutable bool m_status; - mutable bool m_canceled; -}; - -CRN_EXPORT bool process(convert_params& params, convert_stats& stats); - -} // namespace texture_conversion - -} // namespace crnlib +namespace crnlib +{ + namespace texture_conversion + { + class CRN_EXPORT convert_stats + { + public: + convert_stats(); + + bool init(const char* pSrc_filename, const char* pDst_filename, mipmapped_texture& src_tex, + texture_file_types::format dst_file_type, bool lzma_stats); + + bool print(bool psnr_metrics, bool mip_stats, bool grayscale_sampling, const char* pCSVStatsFile = nullptr) const; + + void clear(); + + dynamic_string m_src_filename; + dynamic_string m_dst_filename; + texture_file_types::format m_dst_file_type; + + mipmapped_texture* m_pInput_tex; + mipmapped_texture m_output_tex; + + uint64 m_input_file_size; + uint m_total_input_pixels; + + uint64 m_output_file_size; + uint m_total_output_pixels; + + uint64 m_output_comp_file_size; + }; + + class CRN_EXPORT convert_params + { + public: + convert_params() : + m_pInput_texture(nullptr), + m_texture_type(cTextureTypeUnknown), + m_dst_file_type(texture_file_types::cFormatInvalid), + m_dst_format(PIXEL_FMT_INVALID), + m_pProgress_func(nullptr), + m_pProgress_user_data(nullptr), + m_pIntermediate_texture(nullptr), + m_y_flip(false), + m_unflip(false), + m_always_use_source_pixel_format(false), + m_write_mipmaps_to_multiple_files(false), + m_quick(false), + m_debugging(false), + m_param_debugging(false), + m_no_stats(false), + m_lzma_stats(false), + m_status(false), + m_canceled(false) + { + } + + ~convert_params() + { + crnlib_delete(m_pIntermediate_texture); + } + + void print(); + + // Input parameters + mipmapped_texture* m_pInput_texture; + + texture_type m_texture_type; + + dynamic_string m_dst_filename; + texture_file_types::format m_dst_file_type; + pixel_format m_dst_format; + + crn_comp_params m_comp_params; + crn_mipmap_params m_mipmap_params; + + typedef bool (*progress_callback_func_ptr)(uint percentage_complete, void* pUser_data_ptr); + progress_callback_func_ptr m_pProgress_func; + void* m_pProgress_user_data; + + // Return parameters + mipmapped_texture* m_pIntermediate_texture; + mutable dynamic_string m_error_message; + + bool m_y_flip; + bool m_unflip; + bool m_always_use_source_pixel_format; + bool m_write_mipmaps_to_multiple_files; + bool m_quick; + bool m_debugging; + bool m_param_debugging; + bool m_no_stats; + + bool m_lzma_stats; + mutable bool m_status; + mutable bool m_canceled; + }; + + CRN_EXPORT bool process(convert_params& params, convert_stats& stats); + } // namespace texture_conversion +} // namespace crnlib diff --git a/crnlib/crn_texture_file_types.cpp b/crnlib/crn_texture_file_types.cpp index 60b7404..8b1c265 100644 --- a/crnlib/crn_texture_file_types.cpp +++ b/crnlib/crn_texture_file_types.cpp @@ -1,5 +1,25 @@ -// File: crn_texture_file_types.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "crn_texture_file_types.h" @@ -15,8 +35,7 @@ namespace crnlib return nullptr; } - static const char* extensions[cNumFileFormats] = - { + static const char* extensions[cNumFileFormats] = { "dds", "crn", "ktx", @@ -70,7 +89,8 @@ namespace crnlib bool texture_file_types::supports_mipmaps(format fmt) { - switch (fmt) { + switch (fmt) + { case cFormatCRN: case cFormatDDS: case cFormatKTX: @@ -84,7 +104,8 @@ namespace crnlib bool texture_file_types::supports_alpha(format fmt) { - switch (fmt) { + switch (fmt) + { case cFormatJPG: case cFormatJPEG: case cFormatGIF: @@ -99,7 +120,8 @@ namespace crnlib const char* get_texture_type_desc(texture_type t) { - switch (t) { + switch (t) + { case cTextureTypeUnknown: return "Unknown"; case cTextureTypeRegularMap: @@ -118,4 +140,4 @@ namespace crnlib return "?"; } -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_texture_file_types.h b/crnlib/crn_texture_file_types.h index ca0e8d2..6bf78fa 100644 --- a/crnlib/crn_texture_file_types.h +++ b/crnlib/crn_texture_file_types.h @@ -1,5 +1,25 @@ -// File: crn_texture_file_types.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once @@ -66,5 +86,4 @@ namespace crnlib }; CRN_EXPORT const char* get_texture_type_desc(texture_type t); - -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_threaded_clusterizer.h b/crnlib/crn_threaded_clusterizer.h index a9be100..8fb45b5 100644 --- a/crnlib/crn_threaded_clusterizer.h +++ b/crnlib/crn_threaded_clusterizer.h @@ -1,330 +1,440 @@ -// File: crn_threaded_clusterizer.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once + #include "crn_clusterizer.h" #include "crn_threading.h" -namespace crnlib { -template -class threaded_clusterizer { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(threaded_clusterizer); - - public: - threaded_clusterizer(task_pool& tp) - : m_pTask_pool(&tp), - m_pProgress_callback(nullptr), - m_pProgress_callback_data(nullptr), - m_canceled(false) { - } - - void clear() { - for (uint i = 0; i < cMaxClusterizers; i++) - m_clusterizers[i].clear(); - } - - struct weighted_vec { - weighted_vec() {} - weighted_vec(const VectorType& v, uint w) - : m_vec(v), m_weight(w) {} - - VectorType m_vec; - uint m_weight; - }; - typedef crnlib::vector weighted_vec_array; - - typedef bool (*progress_callback_func)(uint percentage_completed, void* pProgress_data); - - bool create_clusters( - const weighted_vec_array& weighted_vecs, - uint max_clusters, crnlib::vector >& cluster_indices, - progress_callback_func pProgress_callback, - void* pProgress_callback_data) { - m_main_thread_id = crn_get_current_thread_id(); - m_canceled = false; - m_pProgress_callback = pProgress_callback; - m_pProgress_callback_data = pProgress_callback_data; - - if (max_clusters >= 128) { - crnlib::vector primary_indices(weighted_vecs.size()); - for (uint i = 0; i < weighted_vecs.size(); i++) - primary_indices[i] = i; - - CRNLIB_ASSUME(cMaxClusterizers == 4); - - crnlib::vector indices[6]; - - compute_split(weighted_vecs, primary_indices, indices[0], indices[1]); - compute_split(weighted_vecs, indices[0], indices[2], indices[3]); - compute_split(weighted_vecs, indices[1], indices[4], indices[5]); - - create_clusters_task_state task_state[4]; - - m_cluster_task_displayed_progress = false; - - uint total_partitions = 0; - for (uint i = 0; i < 4; i++) { - const uint num_indices = indices[2 + i].size(); - if (num_indices) - total_partitions++; - } - - for (uint i = 0; i < 4; i++) { - const uint num_indices = indices[2 + i].size(); - if (!num_indices) - continue; - - task_state[i].m_pWeighted_vecs = &weighted_vecs; - task_state[i].m_pIndices = &indices[2 + i]; - task_state[i].m_max_clusters = (max_clusters + (total_partitions / 2)) / total_partitions; - - m_pTask_pool->queue_object_task(this, &threaded_clusterizer::create_clusters_task, i, &task_state[i]); - } - - m_pTask_pool->join(); - - if (m_canceled) - return false; - - uint total_clusters = 0; - for (uint i = 0; i < 4; i++) - total_clusters += task_state[i].m_cluster_indices.size(); - - cluster_indices.reserve(total_clusters); - cluster_indices.resize(0); - - for (uint i = 0; i < 4; i++) { - const uint ofs = cluster_indices.size(); - - cluster_indices.resize(ofs + task_state[i].m_cluster_indices.size()); - - for (uint j = 0; j < task_state[i].m_cluster_indices.size(); j++) { - cluster_indices[ofs + j].swap(task_state[i].m_cluster_indices[j]); +namespace crnlib +{ + template + class threaded_clusterizer + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(threaded_clusterizer); + public: + threaded_clusterizer(task_pool& tp): + m_pTask_pool(&tp), + m_pProgress_callback(nullptr), + m_pProgress_callback_data(nullptr), + m_canceled(false) + { } - } - } else { - m_clusterizers[0].clear(); - m_clusterizers[0].get_training_vecs().reserve(weighted_vecs.size()); - - for (uint i = 0; i < weighted_vecs.size(); i++) { - const weighted_vec& v = weighted_vecs[i]; - - m_clusterizers[0].add_training_vec(v.m_vec, v.m_weight); - } - - m_clusterizers[0].generate_codebook(max_clusters, generate_codebook_progress_callback, this, false); //m_params.m_dxt_quality <= cCRNDXTQualityFast); - - const uint num_clusters = m_clusterizers[0].get_codebook_size(); - - m_clusterizers[0].retrieve_clusters(num_clusters, cluster_indices); - } - - return !m_canceled; - } - - private: - task_pool* m_pTask_pool; - - crn_thread_id_t m_main_thread_id; - - struct create_clusters_task_state { - create_clusters_task_state() - : m_pWeighted_vecs(nullptr), m_pIndices(nullptr), m_max_clusters(0) { - } - - const weighted_vec_array* m_pWeighted_vecs; - crnlib::vector* m_pIndices; - crnlib::vector > m_cluster_indices; - uint m_max_clusters; - }; - - typedef clusterizer vector_clusterizer; - enum { cMaxClusterizers = 4 }; - vector_clusterizer m_clusterizers[cMaxClusterizers]; - bool m_cluster_task_displayed_progress; - - progress_callback_func m_pProgress_callback; - void* m_pProgress_callback_data; - bool m_canceled; - - static bool generate_codebook_progress_callback(uint percentage_completed, void* pData) { - threaded_clusterizer* pClusterizer = static_cast(pData); - - if (!pClusterizer->m_pProgress_callback) - return true; - - if (!pClusterizer->m_pProgress_callback(percentage_completed, pClusterizer->m_pProgress_callback_data)) { - pClusterizer->m_canceled = true; - return false; - } - return true; - } - - void compute_pca(VectorType& axis_res, VectorType& centroid_res, const weighted_vec_array& vecs, const vector& indices) { - const uint N = VectorType::num_elements; - - VectorType centroid(0.0f); - double total_weight = 0.0f; - for (uint i = 0; i < indices.size(); i++) { - const weighted_vec& v = vecs[indices[i]]; - centroid += v.m_vec * static_cast(v.m_weight); - total_weight += v.m_weight; - } - - if (total_weight == 0.0f) { - axis_res.clear(); - centroid_res = centroid; - return; - } - - double one_over_total_weight = 1.0f / total_weight; - for (uint i = 0; i < N; i++) - centroid[i] = static_cast(centroid[i] * one_over_total_weight); - - matrix covar; - covar.clear(); - - for (uint i = 0; i < indices.size(); i++) { - const weighted_vec& weighted_vec = vecs[indices[i]]; - - const VectorType v(weighted_vec.m_vec - centroid); - const VectorType w(v * static_cast(weighted_vec.m_weight)); - - for (uint x = 0; x < N; x++) - for (uint y = x; y < N; y++) - covar[x][y] = covar[x][y] + v[x] * w[y]; - } - - for (uint x = 0; x < N; x++) - for (uint y = x; y < N; y++) - covar[x][y] = static_cast(covar[x][y] * one_over_total_weight); - - for (uint x = 0; x < (N - 1); x++) - for (uint y = x + 1; y < N; y++) - covar[y][x] = covar[x][y]; - - VectorType axis; - for (uint i = 0; i < N; i++) - axis[i] = math::lerp(.75f, 1.25f, i * (1.0f / (N - 1))); - - VectorType prev_axis(axis); - - const uint cMaxIterations = 10; - for (uint iter = 0; iter < cMaxIterations; iter++) { - VectorType x; - - double max_sum = 0; - - for (uint i = 0; i < N; i++) { - double sum = 0; - - for (uint j = 0; j < N; j++) - sum += axis[j] * covar[i][j]; - - x[i] = static_cast(sum); - - max_sum = math::maximum(max_sum, fabs(sum)); - } - - if (max_sum != 0.0f) - x *= static_cast(1.0f / max_sum); - - VectorType delta_axis(prev_axis - x); - - prev_axis = axis; - axis = x; - - if (delta_axis.norm() < .0025f) - break; - } - - axis.normalize(); - - axis_res = axis; - centroid_res = centroid; - } + void clear() + { + for (uint i = 0; i < cMaxClusterizers; i++) + { + m_clusterizers[i].clear(); + } + } - void compute_division( - const VectorType& axis, const VectorType& centroid, const weighted_vec_array& vecs, const vector& indices, - vector& left_indices, - vector& right_indices) { - left_indices.resize(0); - right_indices.resize(0); + struct weighted_vec + { + weighted_vec() + { + } + weighted_vec(const VectorType& v, uint w) : + m_vec(v), + m_weight(w) + { + } + + VectorType m_vec; + uint m_weight; + }; + typedef crnlib::vector weighted_vec_array; + + typedef bool (*progress_callback_func)(uint percentage_completed, void* pProgress_data); + + bool create_clusters( + const weighted_vec_array& weighted_vecs, + uint max_clusters, crnlib::vector >& cluster_indices, + progress_callback_func pProgress_callback, + void* pProgress_callback_data) + { + m_main_thread_id = crn_get_current_thread_id(); + m_canceled = false; + m_pProgress_callback = pProgress_callback; + m_pProgress_callback_data = pProgress_callback_data; + + if (max_clusters >= 128) + { + crnlib::vector primary_indices(weighted_vecs.size()); + for (uint i = 0; i < weighted_vecs.size(); i++) + { + primary_indices[i] = i; + } + + CRNLIB_ASSUME(cMaxClusterizers == 4); + + crnlib::vector indices[6]; + + compute_split(weighted_vecs, primary_indices, indices[0], indices[1]); + compute_split(weighted_vecs, indices[0], indices[2], indices[3]); + compute_split(weighted_vecs, indices[1], indices[4], indices[5]); + + create_clusters_task_state task_state[4]; + + m_cluster_task_displayed_progress = false; + + uint total_partitions = 0; + for (uint i = 0; i < 4; i++) + { + const uint num_indices = indices[2 + i].size(); + if (num_indices) + { + total_partitions++; + } + } + + for (uint i = 0; i < 4; i++) + { + const uint num_indices = indices[2 + i].size(); + if (!num_indices) + { + continue; + } + + task_state[i].m_pWeighted_vecs = &weighted_vecs; + task_state[i].m_pIndices = &indices[2 + i]; + task_state[i].m_max_clusters = (max_clusters + (total_partitions / 2)) / total_partitions; + + m_pTask_pool->queue_object_task(this, &threaded_clusterizer::create_clusters_task, i, &task_state[i]); + } + + m_pTask_pool->join(); + + if (m_canceled) + { + return false; + } + + uint total_clusters = 0; + for (uint i = 0; i < 4; i++) + { + total_clusters += task_state[i].m_cluster_indices.size(); + } + + cluster_indices.reserve(total_clusters); + cluster_indices.resize(0); + + for (uint i = 0; i < 4; i++) + { + const uint ofs = cluster_indices.size(); + + cluster_indices.resize(ofs + task_state[i].m_cluster_indices.size()); + + for (uint j = 0; j < task_state[i].m_cluster_indices.size(); j++) + { + cluster_indices[ofs + j].swap(task_state[i].m_cluster_indices[j]); + } + } + } + else + { + m_clusterizers[0].clear(); + m_clusterizers[0].get_training_vecs().reserve(weighted_vecs.size()); + + for (uint i = 0; i < weighted_vecs.size(); i++) + { + const weighted_vec& v = weighted_vecs[i]; + + m_clusterizers[0].add_training_vec(v.m_vec, v.m_weight); + } + + m_clusterizers[0].generate_codebook(max_clusters, generate_codebook_progress_callback, this, false); //m_params.m_dxt_quality <= cCRNDXTQualityFast); + + const uint num_clusters = m_clusterizers[0].get_codebook_size(); + + m_clusterizers[0].retrieve_clusters(num_clusters, cluster_indices); + } + + return !m_canceled; + } - for (uint i = 0; i < indices.size(); i++) { - const uint vec_index = indices[i]; - const VectorType v(vecs[vec_index].m_vec - centroid); + private: + task_pool* m_pTask_pool; + + crn_thread_id_t m_main_thread_id; + + struct create_clusters_task_state + { + create_clusters_task_state(): + m_pWeighted_vecs(nullptr), + m_pIndices(nullptr), + m_max_clusters(0) + { + } + + const weighted_vec_array* m_pWeighted_vecs; + crnlib::vector* m_pIndices; + crnlib::vector > m_cluster_indices; + uint m_max_clusters; + }; + + typedef clusterizer vector_clusterizer; + + enum { cMaxClusterizers = 4 }; + vector_clusterizer m_clusterizers[cMaxClusterizers]; + bool m_cluster_task_displayed_progress; + + progress_callback_func m_pProgress_callback; + void* m_pProgress_callback_data; + bool m_canceled; + + static bool generate_codebook_progress_callback(uint percentage_completed, void* pData) + { + threaded_clusterizer* pClusterizer = static_cast(pData); + + if (!pClusterizer->m_pProgress_callback) + { + return true; + } + + if (!pClusterizer->m_pProgress_callback(percentage_completed, pClusterizer->m_pProgress_callback_data)) + { + pClusterizer->m_canceled = true; + return false; + } + return true; + } - float t = v * axis; - if (t < 0.0f) - left_indices.push_back(vec_index); - else - right_indices.push_back(vec_index); - } - } + void compute_pca(VectorType& axis_res, VectorType& centroid_res, const weighted_vec_array& vecs, const vector& indices) + { + const uint N = VectorType::num_elements; + + VectorType centroid(0.0f); + double total_weight = 0.0f; + for (uint i = 0; i < indices.size(); i++) + { + const weighted_vec& v = vecs[indices[i]]; + centroid += v.m_vec * static_cast(v.m_weight); + total_weight += v.m_weight; + } + + if (total_weight == 0.0f) + { + axis_res.clear(); + centroid_res = centroid; + return; + } + + double one_over_total_weight = 1.0f / total_weight; + for (uint i = 0; i < N; i++) + { + centroid[i] = static_cast(centroid[i] * one_over_total_weight); + } + + matrix covar; + covar.clear(); + + for (uint i = 0; i < indices.size(); i++) + { + const weighted_vec& weighted_vec = vecs[indices[i]]; + + const VectorType v(weighted_vec.m_vec - centroid); + const VectorType w(v * static_cast(weighted_vec.m_weight)); + + for (uint x = 0; x < N; x++) + { + for (uint y = x; y < N; y++) + { + covar[x][y] = covar[x][y] + v[x] * w[y]; + } + } + } + + for (uint x = 0; x < N; x++) + { + for (uint y = x; y < N; y++) + { + covar[x][y] = static_cast(covar[x][y] * one_over_total_weight); + } + } + + for (uint x = 0; x < (N - 1); x++) + { + for (uint y = x + 1; y < N; y++) + { + covar[y][x] = covar[x][y]; + } + } + + VectorType axis; + for (uint i = 0; i < N; i++) + { + axis[i] = math::lerp(.75f, 1.25f, i * (1.0f / (N - 1))); + } + + VectorType prev_axis(axis); + + const uint cMaxIterations = 10; + for (uint iter = 0; iter < cMaxIterations; iter++) + { + VectorType x; + + double max_sum = 0; + + for (uint i = 0; i < N; i++) + { + double sum = 0; + + for (uint j = 0; j < N; j++) + { + sum += axis[j] * covar[i][j]; + } + + x[i] = static_cast(sum); + + max_sum = math::maximum(max_sum, fabs(sum)); + } + + if (max_sum != 0.0f) + { + x *= static_cast(1.0f / max_sum); + } + + VectorType delta_axis(prev_axis - x); + + prev_axis = axis; + axis = x; + + if (delta_axis.norm() < .0025f) + { + break; + } + } + + axis.normalize(); + + axis_res = axis; + centroid_res = centroid; + } - void compute_split( - const weighted_vec_array& vecs, const vector& indices, - vector& left_indices, - vector& right_indices) { - VectorType axis, centroid; - compute_pca(axis, centroid, vecs, indices); + void compute_division( + const VectorType& axis, const VectorType& centroid, const weighted_vec_array& vecs, const vector& indices, + vector& left_indices, + vector& right_indices) + { + left_indices.resize(0); + right_indices.resize(0); + + for (uint i = 0; i < indices.size(); i++) + { + const uint vec_index = indices[i]; + const VectorType v(vecs[vec_index].m_vec - centroid); + + float t = v * axis; + if (t < 0.0f) + { + left_indices.push_back(vec_index); + } + else + { + right_indices.push_back(vec_index); + } + } + } - compute_division(axis, centroid, vecs, indices, left_indices, right_indices); - } + void compute_split( + const weighted_vec_array& vecs, const vector& indices, + vector& left_indices, + vector& right_indices) + { + VectorType axis, centroid; + compute_pca(axis, centroid, vecs, indices); - static bool generate_codebook_dummy_progress_callback(uint, void* pData) { - if (static_cast(pData)->m_canceled) - return false; + compute_division(axis, centroid, vecs, indices, left_indices, right_indices); + } - return true; - } + static bool generate_codebook_dummy_progress_callback(uint, void* pData) + { + if (static_cast(pData)->m_canceled) + { + return false; + } - void create_clusters_task(uint64 data, void* pData_ptr) { - if (m_canceled) - return; + return true; + } - const uint partition_index = static_cast(data); - create_clusters_task_state& state = *static_cast(pData_ptr); + void create_clusters_task(uint64 data, void* pData_ptr) + { + if (m_canceled) + { + return; + } - m_clusterizers[partition_index].clear(); + const uint partition_index = static_cast(data); + create_clusters_task_state& state = *static_cast(pData_ptr); - for (uint i = 0; i < state.m_pIndices->size(); i++) { - const uint index = (*state.m_pIndices)[i]; - const weighted_vec& v = (*state.m_pWeighted_vecs)[index]; + m_clusterizers[partition_index].clear(); - m_clusterizers[partition_index].add_training_vec(v.m_vec, v.m_weight); - } + for (uint i = 0; i < state.m_pIndices->size(); i++) + { + const uint index = (*state.m_pIndices)[i]; + const weighted_vec& v = (*state.m_pWeighted_vecs)[index]; - if (m_canceled) - return; + m_clusterizers[partition_index].add_training_vec(v.m_vec, v.m_weight); + } - const bool is_main_thread = (crn_get_current_thread_id() == m_main_thread_id); + if (m_canceled) + { + return; + } - const bool quick = false; - m_clusterizers[partition_index].generate_codebook( - state.m_max_clusters, - (is_main_thread && !m_cluster_task_displayed_progress) ? generate_codebook_progress_callback : generate_codebook_dummy_progress_callback, - this, - quick); + const bool is_main_thread = (crn_get_current_thread_id() == m_main_thread_id); - if (is_main_thread) - m_cluster_task_displayed_progress = true; + const bool quick = false; + m_clusterizers[partition_index].generate_codebook( + state.m_max_clusters, + (is_main_thread && !m_cluster_task_displayed_progress) ? generate_codebook_progress_callback : generate_codebook_dummy_progress_callback, + this, + quick); - if (m_canceled) - return; + if (is_main_thread) + { + m_cluster_task_displayed_progress = true; + } - const uint num_clusters = m_clusterizers[partition_index].get_codebook_size(); + if (m_canceled) + { + return; + } - m_clusterizers[partition_index].retrieve_clusters(num_clusters, state.m_cluster_indices); + const uint num_clusters = m_clusterizers[partition_index].get_codebook_size(); - for (uint i = 0; i < state.m_cluster_indices.size(); i++) { - crnlib::vector& indices = state.m_cluster_indices[i]; + m_clusterizers[partition_index].retrieve_clusters(num_clusters, state.m_cluster_indices); - for (uint j = 0; j < indices.size(); j++) - indices[j] = (*state.m_pIndices)[indices[j]]; - } - } -}; + for (uint i = 0; i < state.m_cluster_indices.size(); i++) + { + crnlib::vector& indices = state.m_cluster_indices[i]; + for (uint j = 0; j < indices.size(); j++) + { + indices[j] = (*state.m_pIndices)[indices[j]]; + } + } + } + }; } // namespace crnlib diff --git a/crnlib/crn_threaded_resampler.cpp b/crnlib/crn_threaded_resampler.cpp index bc5fa81..79cedf4 100644 --- a/crnlib/crn_threaded_resampler.cpp +++ b/crnlib/crn_threaded_resampler.cpp @@ -1,282 +1,360 @@ -// File: crn_threaded_resampler.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_threaded_resampler.h" #include "crn_resample_filters.h" #include "crn_threading.h" -namespace crnlib { -threaded_resampler::threaded_resampler(task_pool& tp) - : m_pTask_pool(&tp), - m_pParams(nullptr), - m_pX_contribs(nullptr), - m_pY_contribs(nullptr), - m_bytes_per_pixel(0) { -} - -threaded_resampler::~threaded_resampler() { - free_contrib_lists(); -} - -void threaded_resampler::free_contrib_lists() { - if (m_pX_contribs) { - crnlib_free(m_pX_contribs->p); - m_pX_contribs->p = nullptr; - - crnlib_free(m_pX_contribs); - m_pX_contribs = nullptr; - } - - if (m_pY_contribs) { - crnlib_free(m_pY_contribs->p); - m_pY_contribs->p = nullptr; - - crnlib_free(m_pY_contribs); - m_pY_contribs = nullptr; - } -} - -void threaded_resampler::resample_x_task(uint64 data, void*) { - const uint thread_index = (uint)data; - - for (uint src_y = 0; src_y < m_pParams->m_src_height; src_y++) { - if (m_pTask_pool->get_num_threads()) { - if ((src_y % (m_pTask_pool->get_num_threads() + 1)) != thread_index) - continue; +namespace crnlib +{ + threaded_resampler::threaded_resampler(task_pool& tp) : + m_pTask_pool(&tp), + m_pParams(nullptr), + m_pX_contribs(nullptr), + m_pY_contribs(nullptr), + m_bytes_per_pixel(0) + { } - const Resampler::Contrib_List* pContribs = m_pX_contribs; - const Resampler::Contrib_List* pContribs_end = m_pX_contribs + m_pParams->m_dst_width; - - switch (m_pParams->m_fmt) { - case cPF_Y_F32: { - const float* pSrc = reinterpret_cast(static_cast(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y); - vec4F* pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y; - - do { - const Resampler::Contrib* p = pContribs->p; - const Resampler::Contrib* p_end = pContribs->p + pContribs->n; - - vec4F s(0.0f); - - while (p != p_end) { - const uint src_pixel = p->pixel; - const float src_weight = p->weight; - - s[0] += pSrc[src_pixel] * src_weight; - - p++; - } - - *pDst++ = s; - pContribs++; - } while (pContribs != pContribs_end); - - break; - } - case cPF_RGBX_F32: { - const vec4F* pSrc = reinterpret_cast(static_cast(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y); - vec4F* pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y; - - do { - const Resampler::Contrib* p = pContribs->p; - const Resampler::Contrib* p_end = pContribs->p + pContribs->n; - - vec4F s(0.0f); - - while (p != p_end) { - const float src_weight = p->weight; - - const vec4F& src_pixel = pSrc[p->pixel]; - - s[0] += src_pixel[0] * src_weight; - s[1] += src_pixel[1] * src_weight; - s[2] += src_pixel[2] * src_weight; - - p++; - } - - *pDst++ = s; - pContribs++; - } while (pContribs != pContribs_end); - - break; - } - case cPF_RGBA_F32: { - const vec4F* pSrc = reinterpret_cast(static_cast(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y); - vec4F* pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y; - - do { - Resampler::Contrib* p = pContribs->p; - Resampler::Contrib* p_end = pContribs->p + pContribs->n; - - vec4F s(0.0f); - - while (p != p_end) { - const float src_weight = p->weight; - - const vec4F& src_pixel = pSrc[p->pixel]; + threaded_resampler::~threaded_resampler() + { + free_contrib_lists(); + } - s[0] += src_pixel[0] * src_weight; - s[1] += src_pixel[1] * src_weight; - s[2] += src_pixel[2] * src_weight; - s[3] += src_pixel[3] * src_weight; + void threaded_resampler::free_contrib_lists() + { + if (m_pX_contribs) + { + crnlib_free(m_pX_contribs->p); + m_pX_contribs->p = nullptr; - p++; - } + crnlib_free(m_pX_contribs); + m_pX_contribs = nullptr; + } - *pDst++ = s; - pContribs++; - } while (pContribs != pContribs_end); + if (m_pY_contribs) + { + crnlib_free(m_pY_contribs->p); + m_pY_contribs->p = nullptr; - break; - } - default: - break; + crnlib_free(m_pY_contribs); + m_pY_contribs = nullptr; + } } - } -} - -void threaded_resampler::resample_y_task(uint64 data, void*) { - const uint thread_index = (uint)data; - crnlib::vector tmp(m_pParams->m_dst_width); + void threaded_resampler::resample_x_task(uint64 data, void*) + { + const uint thread_index = (uint)data; - for (uint dst_y = 0; dst_y < m_pParams->m_dst_height; dst_y++) { - if (m_pTask_pool->get_num_threads()) { - if ((dst_y % (m_pTask_pool->get_num_threads() + 1)) != thread_index) - continue; + for (uint src_y = 0; src_y < m_pParams->m_src_height; src_y++) + { + if (m_pTask_pool->get_num_threads()) + { + if ((src_y % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + { + continue; + } + } + + const Resampler::Contrib_List* pContribs = m_pX_contribs; + const Resampler::Contrib_List* pContribs_end = m_pX_contribs + m_pParams->m_dst_width; + + switch (m_pParams->m_fmt) + { + case cPF_Y_F32: + { + const float* pSrc = reinterpret_cast(static_cast(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y); + vec4F* pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y; + + do + { + const Resampler::Contrib* p = pContribs->p; + const Resampler::Contrib* p_end = pContribs->p + pContribs->n; + + vec4F s(0.0f); + + while (p != p_end) + { + const uint src_pixel = p->pixel; + const float src_weight = p->weight; + + s[0] += pSrc[src_pixel] * src_weight; + + p++; + } + + *pDst++ = s; + pContribs++; + } while (pContribs != pContribs_end); + + break; + } + case cPF_RGBX_F32: + { + const vec4F* pSrc = reinterpret_cast(static_cast(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y); + vec4F* pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y; + + do + { + const Resampler::Contrib* p = pContribs->p; + const Resampler::Contrib* p_end = pContribs->p + pContribs->n; + + vec4F s(0.0f); + + while (p != p_end) + { + const float src_weight = p->weight; + + const vec4F& src_pixel = pSrc[p->pixel]; + + s[0] += src_pixel[0] * src_weight; + s[1] += src_pixel[1] * src_weight; + s[2] += src_pixel[2] * src_weight; + + p++; + } + + *pDst++ = s; + pContribs++; + } while (pContribs != pContribs_end); + + break; + } + case cPF_RGBA_F32: + { + const vec4F* pSrc = reinterpret_cast(static_cast(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y); + vec4F* pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y; + + do + { + Resampler::Contrib* p = pContribs->p; + Resampler::Contrib* p_end = pContribs->p + pContribs->n; + + vec4F s(0.0f); + + while (p != p_end) + { + const float src_weight = p->weight; + + const vec4F& src_pixel = pSrc[p->pixel]; + + s[0] += src_pixel[0] * src_weight; + s[1] += src_pixel[1] * src_weight; + s[2] += src_pixel[2] * src_weight; + s[3] += src_pixel[3] * src_weight; + + p++; + } + + *pDst++ = s; + pContribs++; + } while (pContribs != pContribs_end); + + break; + } + default: + break; + } + } } - const Resampler::Contrib_List& contribs = m_pY_contribs[dst_y]; - - const vec4F* pSrc; - - if (contribs.n == 1) { - pSrc = m_tmp_img.get_ptr() + m_pParams->m_dst_width * contribs.p[0].pixel; - } else { - for (uint src_y_iter = 0; src_y_iter < contribs.n; src_y_iter++) { - const vec4F* p = m_tmp_img.get_ptr() + m_pParams->m_dst_width * contribs.p[src_y_iter].pixel; - const float weight = contribs.p[src_y_iter].weight; - - if (!src_y_iter) { - for (uint i = 0; i < m_pParams->m_dst_width; i++) - tmp[i] = p[i] * weight; - } else { - for (uint i = 0; i < m_pParams->m_dst_width; i++) - tmp[i] += p[i] * weight; + void threaded_resampler::resample_y_task(uint64 data, void*) + { + const uint thread_index = (uint)data; + + crnlib::vector tmp(m_pParams->m_dst_width); + + for (uint dst_y = 0; dst_y < m_pParams->m_dst_height; dst_y++) + { + if (m_pTask_pool->get_num_threads()) + { + if ((dst_y % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + { + continue; + } + } + + const Resampler::Contrib_List& contribs = m_pY_contribs[dst_y]; + + const vec4F* pSrc; + + if (contribs.n == 1) + { + pSrc = m_tmp_img.get_ptr() + m_pParams->m_dst_width * contribs.p[0].pixel; + } + else + { + for (uint src_y_iter = 0; src_y_iter < contribs.n; src_y_iter++) + { + const vec4F* p = m_tmp_img.get_ptr() + m_pParams->m_dst_width * contribs.p[src_y_iter].pixel; + const float weight = contribs.p[src_y_iter].weight; + + if (!src_y_iter) + { + for (uint i = 0; i < m_pParams->m_dst_width; i++) + { + tmp[i] = p[i] * weight; + } + } + else + { + for (uint i = 0; i < m_pParams->m_dst_width; i++) + { + tmp[i] += p[i] * weight; + } + } + } + + pSrc = tmp.get_ptr(); + } + + const vec4F* pSrc_end = pSrc + m_pParams->m_dst_width; + + const float l = m_pParams->m_sample_low; + const float h = m_pParams->m_sample_high; + + switch (m_pParams->m_fmt) + { + case cPF_Y_F32: + { + float* pDst = reinterpret_cast(static_cast(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y); + + do + { + *pDst++ = math::clamp((*pSrc)[0], l, h); + + pSrc++; + } while (pSrc != pSrc_end); + + break; + } + case cPF_RGBX_F32: + { + vec4F* pDst = reinterpret_cast(static_cast(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y); + + do + { + (*pDst)[0] = math::clamp((*pSrc)[0], l, h); + (*pDst)[1] = math::clamp((*pSrc)[1], l, h); + (*pDst)[2] = math::clamp((*pSrc)[2], l, h); + (*pDst)[3] = h; + + pSrc++; + pDst++; + } while (pSrc != pSrc_end); + + break; + } + case cPF_RGBA_F32: + { + vec4F* pDst = reinterpret_cast(static_cast(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y); + + do + { + (*pDst)[0] = math::clamp((*pSrc)[0], l, h); + (*pDst)[1] = math::clamp((*pSrc)[1], l, h); + (*pDst)[2] = math::clamp((*pSrc)[2], l, h); + (*pDst)[3] = math::clamp((*pSrc)[3], l, h); + + pSrc++; + pDst++; + } while (pSrc != pSrc_end); + + break; + } + default: + break; + } } - } - - pSrc = tmp.get_ptr(); } - const vec4F* pSrc_end = pSrc + m_pParams->m_dst_width; - - const float l = m_pParams->m_sample_low; - const float h = m_pParams->m_sample_high; - - switch (m_pParams->m_fmt) { - case cPF_Y_F32: { - float* pDst = reinterpret_cast(static_cast(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y); - - do { - *pDst++ = math::clamp((*pSrc)[0], l, h); - - pSrc++; - - } while (pSrc != pSrc_end); + bool threaded_resampler::resample(const params& p) + { + free_contrib_lists(); + + m_pParams = &p; + + CRNLIB_ASSERT(m_pParams->m_src_width && m_pParams->m_src_height); + CRNLIB_ASSERT(m_pParams->m_dst_width && m_pParams->m_dst_height); + + switch (p.m_fmt) + { + case cPF_Y_F32: + m_bytes_per_pixel = 4; + break; + case cPF_RGBX_F32: + case cPF_RGBA_F32: + m_bytes_per_pixel = 16; + break; + default: + CRNLIB_ASSERT(false); + return false; + } - break; - } - case cPF_RGBX_F32: { - vec4F* pDst = reinterpret_cast(static_cast(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y); + int filter_index = find_resample_filter(p.m_Pfilter_name); + if (filter_index < 0) + { + return false; + } - do { - (*pDst)[0] = math::clamp((*pSrc)[0], l, h); - (*pDst)[1] = math::clamp((*pSrc)[1], l, h); - (*pDst)[2] = math::clamp((*pSrc)[2], l, h); - (*pDst)[3] = h; + const resample_filter& filter = g_resample_filters[filter_index]; - pSrc++; - pDst++; + m_pX_contribs = Resampler::make_clist(m_pParams->m_src_width, m_pParams->m_dst_width, m_pParams->m_boundary_op, + filter.func, filter.support, p.m_filter_x_scale, 0.0f); + if (!m_pX_contribs) + { + return false; + } - } while (pSrc != pSrc_end); + m_pY_contribs = Resampler::make_clist(m_pParams->m_src_height, m_pParams->m_dst_height, + m_pParams->m_boundary_op, filter.func, filter.support, p.m_filter_y_scale, + 0.0f); + if (!m_pY_contribs) + { + return false; + } - break; - } - case cPF_RGBA_F32: { - vec4F* pDst = reinterpret_cast(static_cast(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y); + if (!m_tmp_img.try_resize(m_pParams->m_dst_width * m_pParams->m_src_height)) + { + return false; + } - do { - (*pDst)[0] = math::clamp((*pSrc)[0], l, h); - (*pDst)[1] = math::clamp((*pSrc)[1], l, h); - (*pDst)[2] = math::clamp((*pSrc)[2], l, h); - (*pDst)[3] = math::clamp((*pSrc)[3], l, h); + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + { + m_pTask_pool->queue_object_task(this, &threaded_resampler::resample_x_task, i, nullptr); + } + m_pTask_pool->join(); - pSrc++; - pDst++; + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + { + m_pTask_pool->queue_object_task(this, &threaded_resampler::resample_y_task, i, nullptr); + } + m_pTask_pool->join(); - } while (pSrc != pSrc_end); + m_tmp_img.clear(); + free_contrib_lists(); - break; - } - default: - break; + return true; } - } -} - -bool threaded_resampler::resample(const params& p) { - free_contrib_lists(); - - m_pParams = &p; - - CRNLIB_ASSERT(m_pParams->m_src_width && m_pParams->m_src_height); - CRNLIB_ASSERT(m_pParams->m_dst_width && m_pParams->m_dst_height); - - switch (p.m_fmt) { - case cPF_Y_F32: - m_bytes_per_pixel = 4; - break; - case cPF_RGBX_F32: - case cPF_RGBA_F32: - m_bytes_per_pixel = 16; - break; - default: - CRNLIB_ASSERT(false); - return false; - } - - int filter_index = find_resample_filter(p.m_Pfilter_name); - if (filter_index < 0) - return false; - - const resample_filter& filter = g_resample_filters[filter_index]; - - m_pX_contribs = Resampler::make_clist(m_pParams->m_src_width, m_pParams->m_dst_width, m_pParams->m_boundary_op, filter.func, filter.support, p.m_filter_x_scale, 0.0f); - if (!m_pX_contribs) - return false; - - m_pY_contribs = Resampler::make_clist(m_pParams->m_src_height, m_pParams->m_dst_height, m_pParams->m_boundary_op, filter.func, filter.support, p.m_filter_y_scale, 0.0f); - if (!m_pY_contribs) - return false; - - if (!m_tmp_img.try_resize(m_pParams->m_dst_width * m_pParams->m_src_height)) - return false; - - for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) - m_pTask_pool->queue_object_task(this, &threaded_resampler::resample_x_task, i, nullptr); - m_pTask_pool->join(); - - for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) - m_pTask_pool->queue_object_task(this, &threaded_resampler::resample_y_task, i, nullptr); - m_pTask_pool->join(); - - m_tmp_img.clear(); - free_contrib_lists(); - - return true; -} - -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_threaded_resampler.h b/crnlib/crn_threaded_resampler.h index 91b3c02..528c5f5 100644 --- a/crnlib/crn_threaded_resampler.h +++ b/crnlib/crn_threaded_resampler.h @@ -1,83 +1,109 @@ -// File: crn_threaded_resampler.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_resampler.h" #include "crn_vec.h" #include "crn_export.h" -namespace crnlib { -class task_pool; -class CRN_EXPORT threaded_resampler { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(threaded_resampler); - - public: - threaded_resampler(task_pool& tp); - ~threaded_resampler(); +namespace crnlib +{ + class task_pool; + class CRN_EXPORT threaded_resampler + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(threaded_resampler); - enum pixel_format { - cPF_Y_F32, - cPF_RGBX_F32, - cPF_RGBA_F32, + public: + threaded_resampler(task_pool& tp); + ~threaded_resampler(); - cPF_Total - }; + enum pixel_format + { + cPF_Y_F32, + cPF_RGBX_F32, + cPF_RGBA_F32, - struct params { - params() { - clear(); - } + cPF_Total + }; - void clear() { - utils::zero_object(*this); + struct params + { + params() + { + clear(); + } - m_boundary_op = Resampler::BOUNDARY_CLAMP; - m_sample_low = 0.0f; - m_sample_high = 255.0f; - m_Pfilter_name = CRNLIB_RESAMPLER_DEFAULT_FILTER; - m_filter_x_scale = 1.0f; - m_filter_y_scale = 1.0f; - } + void clear() + { + utils::zero_object(*this); - pixel_format m_fmt; + m_boundary_op = Resampler::BOUNDARY_CLAMP; + m_sample_low = 0.0f; + m_sample_high = 255.0f; + m_Pfilter_name = CRNLIB_RESAMPLER_DEFAULT_FILTER; + m_filter_x_scale = 1.0f; + m_filter_y_scale = 1.0f; + } - const void* m_pSrc_pixels; - uint m_src_width; - uint m_src_height; - uint m_src_pitch; + pixel_format m_fmt; - void* m_pDst_pixels; - uint m_dst_width; - uint m_dst_height; - uint m_dst_pitch; + const void* m_pSrc_pixels; + uint m_src_width; + uint m_src_height; + uint m_src_pitch; - Resampler::Boundary_Op m_boundary_op; + void* m_pDst_pixels; + uint m_dst_width; + uint m_dst_height; + uint m_dst_pitch; - float m_sample_low; - float m_sample_high; + Resampler::Boundary_Op m_boundary_op; - const char* m_Pfilter_name; - float m_filter_x_scale; - float m_filter_y_scale; - }; + float m_sample_low; + float m_sample_high; - bool resample(const params& p); + const char* m_Pfilter_name; + float m_filter_x_scale; + float m_filter_y_scale; + }; - private: - task_pool* m_pTask_pool; + bool resample(const params& p); - const params* m_pParams; + private: + task_pool* m_pTask_pool; - Resampler::Contrib_List* m_pX_contribs; - Resampler::Contrib_List* m_pY_contribs; - uint m_bytes_per_pixel; + const params* m_pParams; - crnlib::vector m_tmp_img; + Resampler::Contrib_List* m_pX_contribs; + Resampler::Contrib_List* m_pY_contribs; + uint m_bytes_per_pixel; - void free_contrib_lists(); + crnlib::vector m_tmp_img; - void resample_x_task(uint64 data, void* pData_ptr); - void resample_y_task(uint64 data, void* pData_ptr); -}; + void free_contrib_lists(); -} // namespace crnlib + void resample_x_task(uint64 data, void* pData_ptr); + void resample_y_task(uint64 data, void* pData_ptr); + }; +} // namespace crnlib diff --git a/crnlib/crn_threading.h b/crnlib/crn_threading.h index 14d2add..d497c1d 100644 --- a/crnlib/crn_threading.h +++ b/crnlib/crn_threading.h @@ -1,5 +1,25 @@ -// File: crn_threading.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #if CRNLIB_USE_WIN32_API #include "crn_threading_win32.h" diff --git a/crnlib/crn_threading_null.h b/crnlib/crn_threading_null.h index 730b896..9af326b 100644 --- a/crnlib/crn_threading_null.h +++ b/crnlib/crn_threading_null.h @@ -1,5 +1,25 @@ -// File: crn_threading_null.h -// See Copyright Notice and license at the end of include/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once diff --git a/crnlib/crn_threading_pthreads.cpp b/crnlib/crn_threading_pthreads.cpp index 7be12e3..904a5b3 100644 --- a/crnlib/crn_threading_pthreads.cpp +++ b/crnlib/crn_threading_pthreads.cpp @@ -1,5 +1,25 @@ -// File: crn_threading_pthreads.cpp -// See Copyright Notice and license at the end of include/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "crn_threading_pthreads.h" @@ -8,7 +28,6 @@ #if CRNLIB_USE_PTHREADS_API #ifdef WIN32 -#pragma comment(lib, "../ext/libpthread/lib/pthreadVC2.lib") #include "crn_winhdr.h" #include #else @@ -20,402 +39,473 @@ namespace crnlib { -uint g_number_of_processors = 1; + uint g_number_of_processors = 1; -void crn_threading_init() { + void crn_threading_init() + { #ifdef WIN32 - SYSTEM_INFO g_system_info; - GetSystemInfo(&g_system_info); - g_number_of_processors = math::maximum(1U, g_system_info.dwNumberOfProcessors); + SYSTEM_INFO g_system_info; + GetSystemInfo(&g_system_info); + g_number_of_processors = math::maximum(1U, g_system_info.dwNumberOfProcessors); #else - g_number_of_processors = math::maximum(1, sysconf(_SC_NPROCESSORS_ONLN)); + g_number_of_processors = math::maximum(1, sysconf(_SC_NPROCESSORS_ONLN)); #endif -} + } -crn_thread_id_t crn_get_current_thread_id() -{ + crn_thread_id_t crn_get_current_thread_id() + { #if defined(CRN_OS_BSD4) || defined(CRN_OS_DARWIN) - crn_thread_id_t id; - pthread_threadid_np(pthread_self(), &id); - return id; + crn_thread_id_t id; + pthread_threadid_np(pthread_self(), &id); + return id; #else - return static_cast(pthread_self()); + return static_cast(pthread_self()); #endif -} + } -void crn_sleep(unsigned int milliseconds) { + void crn_sleep(unsigned int milliseconds) + { #ifdef WIN32 - struct timespec interval; - interval.tv_sec = milliseconds / 1000; - interval.tv_nsec = (milliseconds % 1000) * 1000000L; - pthread_delay_np(&interval); + struct timespec interval; + interval.tv_sec = milliseconds / 1000; + interval.tv_nsec = (milliseconds % 1000) * 1000000L; + pthread_delay_np(&interval); #else - while (milliseconds) { - int msecs_to_sleep = CRNLIB_MIN(milliseconds, 1000); - usleep(msecs_to_sleep * 1000); - milliseconds -= msecs_to_sleep; - } + while (milliseconds) + { + int msecs_to_sleep = CRNLIB_MIN(milliseconds, 1000); + usleep(msecs_to_sleep * 1000); + milliseconds -= msecs_to_sleep; + } #endif -} + } -mutex::mutex(unsigned int spin_count) { - spin_count; + mutex::mutex(unsigned int spin_count) + { + spin_count; - if (pthread_mutex_init(&m_mutex, nullptr)) - crnlib_fail("mutex::mutex: pthread_mutex_init() failed", __FILE__, __LINE__); + if (pthread_mutex_init(&m_mutex, nullptr)) + { + crnlib_fail("mutex::mutex: pthread_mutex_init() failed", __FILE__, __LINE__); + } #ifdef CRNLIB_BUILD_DEBUG - m_lock_count = 0; + m_lock_count = 0; #endif -} + } -mutex::~mutex() { + mutex::~mutex() + { #ifdef CRNLIB_BUILD_DEBUG - if (m_lock_count) - crnlib_assert("mutex::~mutex: mutex is still locked", __FILE__, __LINE__); + if (m_lock_count) + { + crnlib_assert("mutex::~mutex: mutex is still locked", __FILE__, __LINE__); + } #endif - if (pthread_mutex_destroy(&m_mutex)) - crnlib_assert("mutex::~mutex: pthread_mutex_destroy() failed", __FILE__, __LINE__); -} + if (pthread_mutex_destroy(&m_mutex)) + { + crnlib_assert("mutex::~mutex: pthread_mutex_destroy() failed", __FILE__, __LINE__); + } + } -void mutex::lock() { - pthread_mutex_lock(&m_mutex); + void mutex::lock() + { + pthread_mutex_lock(&m_mutex); #ifdef CRNLIB_BUILD_DEBUG - m_lock_count++; + m_lock_count++; #endif -} + } -void mutex::unlock() { + void mutex::unlock() + { #ifdef CRNLIB_BUILD_DEBUG - if (!m_lock_count) - crnlib_assert("mutex::unlock: mutex is not locked", __FILE__, __LINE__); - m_lock_count--; + if (!m_lock_count) + { + crnlib_assert("mutex::unlock: mutex is not locked", __FILE__, __LINE__); + } + m_lock_count--; #endif - pthread_mutex_unlock(&m_mutex); -} + pthread_mutex_unlock(&m_mutex); + } -void mutex::set_spin_count(unsigned int count) { - count; -} + void mutex::set_spin_count(unsigned int count) + { + count; + } -semaphore::semaphore(long initialCount, long maximumCount, const char* pName) { - maximumCount, pName; - CRNLIB_ASSERT(maximumCount >= initialCount); - if (sem_init(&m_sem, 0, initialCount)) { - CRNLIB_FAIL("semaphore: sem_init() failed"); - } -} + semaphore::semaphore(long initialCount, long maximumCount, const char* pName) + { + maximumCount, pName; + CRNLIB_ASSERT(maximumCount >= initialCount); + if (sem_init(&m_sem, 0, initialCount)) + { + CRNLIB_FAIL("semaphore: sem_init() failed"); + } + } -semaphore::~semaphore() { - sem_destroy(&m_sem); -} + semaphore::~semaphore() + { + sem_destroy(&m_sem); + } -void semaphore::release(long releaseCount) { - CRNLIB_ASSERT(releaseCount >= 1); + void semaphore::release(long releaseCount) + { + CRNLIB_ASSERT(releaseCount >= 1); - int status = 0; + int status = 0; #ifdef WIN32 - if (1 == releaseCount) - status = sem_post(&m_sem); - else - status = sem_post_multiple(&m_sem, releaseCount); + if (1 == releaseCount) + { + status = sem_post(&m_sem); + } + else + { + status = sem_post_multiple(&m_sem, releaseCount); + } #else - while (releaseCount > 0) { - status = sem_post(&m_sem); - if (status) - break; - releaseCount--; - } + while (releaseCount > 0) + { + status = sem_post(&m_sem); + if (status) + { + break; + } + releaseCount--; + } #endif - if (status) { - CRNLIB_FAIL("semaphore: sem_post() or sem_post_multiple() failed"); - } -} + if (status) + { + CRNLIB_FAIL("semaphore: sem_post() or sem_post_multiple() failed"); + } + } -void semaphore::try_release(long releaseCount) { - CRNLIB_ASSERT(releaseCount >= 1); + void semaphore::try_release(long releaseCount) + { + CRNLIB_ASSERT(releaseCount >= 1); #ifdef WIN32 - if (1 == releaseCount) - sem_post(&m_sem); - else - sem_post_multiple(&m_sem, releaseCount); + if (1 == releaseCount) + { + sem_post(&m_sem); + } + else + { + sem_post_multiple(&m_sem, releaseCount); + } #else - while (releaseCount > 0) { - sem_post(&m_sem); - releaseCount--; - } + while (releaseCount > 0) + { + sem_post(&m_sem); + releaseCount--; + } #endif -} + } + + bool semaphore::wait(uint32 milliseconds) + { + int status; + if (milliseconds == cUINT32_MAX) + { + status = sem_wait(&m_sem); + } + else + { + struct timespec interval; + interval.tv_sec = milliseconds / 1000; + interval.tv_nsec = (milliseconds % 1000) * 1000000L; + status = sem_timedwait(&m_sem, &interval); + } -bool semaphore::wait(uint32 milliseconds) { - int status; - if (milliseconds == cUINT32_MAX) { - status = sem_wait(&m_sem); - } else { - struct timespec interval; - interval.tv_sec = milliseconds / 1000; - interval.tv_nsec = (milliseconds % 1000) * 1000000L; - status = sem_timedwait(&m_sem, &interval); - } + if (status) + { + if (errno != ETIMEDOUT) + { + CRNLIB_FAIL("semaphore: sem_wait() or sem_timedwait() failed"); + } + return false; + } - if (status) { - if (errno != ETIMEDOUT) { - CRNLIB_FAIL("semaphore: sem_wait() or sem_timedwait() failed"); + return true; } - return false; - } - - return true; -} #if defined(CRN_OS_LINUX) -spinlock::spinlock() -{ - if (pthread_spin_init(&m_spinlock, 0)) + spinlock::spinlock() { - CRNLIB_FAIL("spinlock: pthread_spin_init() failed"); + if (pthread_spin_init(&m_spinlock, 0)) + { + CRNLIB_FAIL("spinlock: pthread_spin_init() failed"); + } } -} -spinlock::~spinlock() -{ - pthread_spin_destroy(&m_spinlock); -} + spinlock::~spinlock() + { + pthread_spin_destroy(&m_spinlock); + } -void spinlock::lock() -{ - if (pthread_spin_lock(&m_spinlock)) + void spinlock::lock() { - CRNLIB_FAIL("spinlock: pthread_spin_lock() failed"); + if (pthread_spin_lock(&m_spinlock)) + { + CRNLIB_FAIL("spinlock: pthread_spin_lock() failed"); + } } -} -void spinlock::unlock() -{ - if (pthread_spin_unlock(&m_spinlock)) + void spinlock::unlock() { - CRNLIB_FAIL("spinlock: pthread_spin_unlock() failed"); + if (pthread_spin_unlock(&m_spinlock)) + { + CRNLIB_FAIL("spinlock: pthread_spin_unlock() failed"); + } } -} #elif defined(CRN_OS_DARWIN) -spinlock::spinlock(): - m_spinlock(OS_UNFAIR_LOCK_INIT) -{ -} + spinlock::spinlock() : + m_spinlock(OS_UNFAIR_LOCK_INIT) + { + } -spinlock::~spinlock() -{ -} + spinlock::~spinlock() + { + } -void spinlock::lock() -{ - os_unfair_lock_lock(&m_spinlock); -} + void spinlock::lock() + { + os_unfair_lock_lock(&m_spinlock); + } -void spinlock::unlock() -{ - os_unfair_lock_unlock(&m_spinlock); -} + void spinlock::unlock() + { + os_unfair_lock_unlock(&m_spinlock); + } #else -spinlock::spinlock() -{ - __asm__ __volatile__("" ::: "memory"); - m_spinlock = 0; -} + spinlock::spinlock() + { + __asm__ __volatile__("" ::: "memory"); + m_spinlock = 0; + } -spinlock::~spinlock() -{ -} + spinlock::~spinlock() + { + } -void spinlock::lock() -{ - while (1) + void spinlock::lock() { - int i; - for (i = 0; i < 10000; i++) + while (1) { - if (__sync_bool_compare_and_swap(&m_spinlock, 0, 1)) + int i; + for (i = 0; i < 10000; i++) { - return 0; + if (__sync_bool_compare_and_swap(&m_spinlock, 0, 1)) + { + return 0; + } } + sched_yield(); } - sched_yield(); } -} -void spinlock::unlock() -{ - __asm__ __volatile__("" ::: "memory"); - m_spinlock = 0; -} + void spinlock::unlock() + { + __asm__ __volatile__("" ::: "memory"); + m_spinlock = 0; + } #endif -task_pool::task_pool() - : m_num_threads(0), - m_tasks_available(0, 32767), - m_all_tasks_completed(0, 1), - m_total_submitted_tasks(0), - m_total_completed_tasks(0), - m_exit_flag(false) { - utils::zero_object(m_threads); -} + task_pool::task_pool(): + m_num_threads(0), + m_tasks_available(0, 32767), + m_all_tasks_completed(0, 1), + m_total_submitted_tasks(0), + m_total_completed_tasks(0), + m_exit_flag(false) + { + utils::zero_object(m_threads); + } -task_pool::task_pool(uint num_threads) - : m_num_threads(0), - m_tasks_available(0, 32767), - m_all_tasks_completed(0, 1), - m_total_submitted_tasks(0), - m_total_completed_tasks(0), - m_exit_flag(false) { - utils::zero_object(m_threads); + task_pool::task_pool(uint num_threads): + m_num_threads(0), + m_tasks_available(0, 32767), + m_all_tasks_completed(0, 1), + m_total_submitted_tasks(0), + m_total_completed_tasks(0), + m_exit_flag(false) + { + utils::zero_object(m_threads); - bool status = init(num_threads); - CRNLIB_VERIFY(status); -} + bool status = init(num_threads); + CRNLIB_VERIFY(status); + } -task_pool::~task_pool() { - deinit(); -} - -bool task_pool::init(uint num_threads) { - CRNLIB_ASSERT(num_threads <= cMaxThreads); - num_threads = math::minimum(num_threads, cMaxThreads); - - deinit(); - - bool succeeded = true; - - m_num_threads = 0; - while (m_num_threads < num_threads) { - int status = pthread_create(&m_threads[m_num_threads], nullptr, thread_func, this); - if (status) { - succeeded = false; - break; - } - - m_num_threads++; - } - - if (!succeeded) { - deinit(); - return false; - } - - return true; -} - -void task_pool::deinit() { - if (m_num_threads) { - join(); - - atomic_exchange32(&m_exit_flag, true); - - m_tasks_available.release(m_num_threads); - - for (uint i = 0; i < m_num_threads; i++) - pthread_join(m_threads[i], nullptr); - - m_num_threads = 0; - - atomic_exchange32(&m_exit_flag, false); - } - - m_task_stack.clear(); - m_total_submitted_tasks = 0; - m_total_completed_tasks = 0; -} - -bool task_pool::queue_task(task_callback_func pFunc, uint64 data, void* pData_ptr) { - CRNLIB_ASSERT(pFunc); - - task tsk; - tsk.m_callback = pFunc; - tsk.m_data = data; - tsk.m_pData_ptr = pData_ptr; - tsk.m_flags = 0; - - atomic_increment32(&m_total_submitted_tasks); - if (!m_task_stack.try_push(tsk)) { - atomic_increment32(&m_total_completed_tasks); - return false; - } - - m_tasks_available.release(1); - - return true; -} - -// It's the object's responsibility to delete pObj within the execute_task() method, if needed! -bool task_pool::queue_task(executable_task* pObj, uint64 data, void* pData_ptr) { - CRNLIB_ASSERT(pObj); - - task tsk; - tsk.m_pObj = pObj; - tsk.m_data = data; - tsk.m_pData_ptr = pData_ptr; - tsk.m_flags = cTaskFlagObject; - - atomic_increment32(&m_total_submitted_tasks); - if (!m_task_stack.try_push(tsk)) { - atomic_increment32(&m_total_completed_tasks); - return false; - } + task_pool::~task_pool() + { + deinit(); + } - m_tasks_available.release(1); + bool task_pool::init(uint num_threads) + { + CRNLIB_ASSERT(num_threads <= cMaxThreads); + num_threads = math::minimum(num_threads, cMaxThreads); - return true; -} + deinit(); -void task_pool::process_task(task& tsk) { - if (tsk.m_flags & cTaskFlagObject) - tsk.m_pObj->execute_task(tsk.m_data, tsk.m_pData_ptr); - else - tsk.m_callback(tsk.m_data, tsk.m_pData_ptr); + bool succeeded = true; - if (atomic_increment32(&m_total_completed_tasks) == m_total_submitted_tasks) { - // Try to signal the semaphore (the max count is 1 so this may actually fail). - m_all_tasks_completed.try_release(); - } -} + m_num_threads = 0; + while (m_num_threads < num_threads) + { + int status = pthread_create(&m_threads[m_num_threads], nullptr, thread_func, this); + if (status) + { + succeeded = false; + break; + } -void task_pool::join() { - // Try to steal any outstanding tasks. This could cause one or more worker threads to wake up and immediately go back to sleep, which is wasteful but should be harmless. - task tsk; - while (m_task_stack.pop(tsk)) - process_task(tsk); + m_num_threads++; + } - // At this point the task stack is empty. - // Now wait for all concurrent tasks to complete. The m_all_tasks_completed semaphore has a max count of 1, so it's possible it could have saturated to 1 as the tasks - // where issued and asynchronously completed, so this loop may iterate a few times. - const int total_submitted_tasks = atomic_add32(&m_total_submitted_tasks, 0); - while (m_total_completed_tasks != total_submitted_tasks) { - // If the previous (m_total_completed_tasks != total_submitted_tasks) check failed the semaphore MUST be eventually signalled once the last task completes. - // So I think this can actually be an INFINITE delay, but it shouldn't really matter if it's 1ms. - m_all_tasks_completed.wait(1); - } -} + if (!succeeded) + { + deinit(); + return false; + } -void* task_pool::thread_func(void* pContext) { - task_pool* pPool = static_cast(pContext); - task tsk; + return true; + } - for (;;) { - if (!pPool->m_tasks_available.wait()) - break; + void task_pool::deinit() + { + if (m_num_threads) + { + join(); + + atomic_exchange32(&m_exit_flag, true); + + m_tasks_available.release(m_num_threads); + + for (uint i = 0; i < m_num_threads; i++) + { + pthread_join(m_threads[i], nullptr); + } - if (pPool->m_exit_flag) - break; + m_num_threads = 0; - if (pPool->m_task_stack.pop(tsk)) { - pPool->process_task(tsk); + atomic_exchange32(&m_exit_flag, false); + } + + m_task_stack.clear(); + m_total_submitted_tasks = 0; + m_total_completed_tasks = 0; } - } - - return nullptr; -} + bool task_pool::queue_task(task_callback_func pFunc, uint64 data, void* pData_ptr) + { + CRNLIB_ASSERT(pFunc); + + task tsk; + tsk.m_callback = pFunc; + tsk.m_data = data; + tsk.m_pData_ptr = pData_ptr; + tsk.m_flags = 0; + + atomic_increment32(&m_total_submitted_tasks); + if (!m_task_stack.try_push(tsk)) + { + atomic_increment32(&m_total_completed_tasks); + return false; + } + + m_tasks_available.release(1); + + return true; + } + + // It's the object's responsibility to delete pObj within the execute_task() method, if needed! + bool task_pool::queue_task(executable_task* pObj, uint64 data, void* pData_ptr) + { + CRNLIB_ASSERT(pObj); + + task tsk; + tsk.m_pObj = pObj; + tsk.m_data = data; + tsk.m_pData_ptr = pData_ptr; + tsk.m_flags = cTaskFlagObject; + + atomic_increment32(&m_total_submitted_tasks); + if (!m_task_stack.try_push(tsk)) + { + atomic_increment32(&m_total_completed_tasks); + return false; + } + + m_tasks_available.release(1); + + return true; + } + + void task_pool::process_task(task& tsk) + { + if (tsk.m_flags & cTaskFlagObject) + { + tsk.m_pObj->execute_task(tsk.m_data, tsk.m_pData_ptr); + } + else + { + tsk.m_callback(tsk.m_data, tsk.m_pData_ptr); + } + + if (atomic_increment32(&m_total_completed_tasks) == m_total_submitted_tasks) + { + // Try to signal the semaphore (the max count is 1 so this may actually fail). + m_all_tasks_completed.try_release(); + } + } + + void task_pool::join() + { + // Try to steal any outstanding tasks. This could cause one or more worker threads to wake up and immediately go back to sleep, which is wasteful but should be harmless. + task tsk; + while (m_task_stack.pop(tsk)) + { + process_task(tsk); + } + + // At this point the task stack is empty. + // Now wait for all concurrent tasks to complete. The m_all_tasks_completed semaphore has a max count of 1, so it's possible it could have saturated to 1 as the tasks + // where issued and asynchronously completed, so this loop may iterate a few times. + const int total_submitted_tasks = atomic_add32(&m_total_submitted_tasks, 0); + while (m_total_completed_tasks != total_submitted_tasks) + { + // If the previous (m_total_completed_tasks != total_submitted_tasks) check failed the semaphore MUST be eventually signalled once the last task completes. + // So I think this can actually be an INFINITE delay, but it shouldn't really matter if it's 1ms. + m_all_tasks_completed.wait(1); + } + } + + void* task_pool::thread_func(void* pContext) + { + task_pool* pPool = static_cast(pContext); + task tsk; + + for (;;) + { + if (!pPool->m_tasks_available.wait()) + { + break; + } + + if (pPool->m_exit_flag) + { + break; + } + + if (pPool->m_task_stack.pop(tsk)) + { + pPool->process_task(tsk); + } + } + + return nullptr; + } } // namespace crnlib #endif // CRNLIB_USE_PTHREADS_API diff --git a/crnlib/crn_threading_pthreads.h b/crnlib/crn_threading_pthreads.h index b0cfdeb..feb2703 100644 --- a/crnlib/crn_threading_pthreads.h +++ b/crnlib/crn_threading_pthreads.h @@ -1,5 +1,26 @@ -// File: crn_threading_pthreads.h -// See Copyright Notice and license at the end of include/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_core.h" @@ -22,312 +43,371 @@ #include "crn_export.h" -namespace crnlib { -// g_number_of_processors defaults to 1. Will be higher on multicore machines. +namespace crnlib +{ + // g_number_of_processors defaults to 1. Will be higher on multicore machines. CRN_EXPORT extern uint g_number_of_processors; CRN_EXPORT void crn_threading_init(); -typedef uint64 crn_thread_id_t; -CRN_EXPORT crn_thread_id_t crn_get_current_thread_id(); - -CRN_EXPORT void crn_sleep(unsigned int milliseconds); + typedef uint64 crn_thread_id_t; + CRN_EXPORT crn_thread_id_t crn_get_current_thread_id(); -CRN_EXPORT uint crn_get_max_helper_threads(); + CRN_EXPORT void crn_sleep(unsigned int milliseconds); -class CRN_EXPORT mutex { - mutex(const mutex&); - mutex& operator=(const mutex&); + CRN_EXPORT uint crn_get_max_helper_threads(); - public: - mutex(unsigned int spin_count = 0); - ~mutex(); - void lock(); - void unlock(); - void set_spin_count(unsigned int count); + class CRN_EXPORT mutex + { + mutex(const mutex&); + mutex& operator=(const mutex&); + public: + mutex(unsigned int spin_count = 0); + ~mutex(); + void lock(); + void unlock(); + void set_spin_count(unsigned int count); - private: - pthread_mutex_t m_mutex; + private: + pthread_mutex_t m_mutex; #ifdef CRNLIB_BUILD_DEBUG - unsigned int m_lock_count; + unsigned int m_lock_count; #endif -}; - -class CRN_EXPORT scoped_mutex { - scoped_mutex(const scoped_mutex&); - scoped_mutex& operator=(const scoped_mutex&); - - public: - inline scoped_mutex(mutex& m) - : m_mutex(m) { m_mutex.lock(); } - inline ~scoped_mutex() { m_mutex.unlock(); } - - private: - mutex& m_mutex; -}; + }; -class CRN_EXPORT semaphore { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(semaphore); + class CRN_EXPORT scoped_mutex + { + scoped_mutex(const scoped_mutex&); + scoped_mutex& operator=(const scoped_mutex&); + public: + inline scoped_mutex(mutex& m): + m_mutex(m) + { + m_mutex.lock(); + } + inline ~scoped_mutex() + { + m_mutex.unlock(); + } + + private: + mutex& m_mutex; + }; - public: - semaphore(long initialCount = 0, long maximumCount = 1, const char* pName = nullptr); - ~semaphore(); + class CRN_EXPORT semaphore + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(semaphore); + public: + semaphore(long initialCount = 0, long maximumCount = 1, const char* pName = nullptr); + ~semaphore(); - void release(long releaseCount = 1); - void try_release(long releaseCount = 1); - bool wait(uint32 milliseconds = cUINT32_MAX); + void release(long releaseCount = 1); + void try_release(long releaseCount = 1); + bool wait(uint32 milliseconds = cUINT32_MAX); - private: - sem_t m_sem; -}; + private: + sem_t m_sem; + }; -class CRN_EXPORT spinlock -{ -public: - spinlock(); - ~spinlock(); + class CRN_EXPORT spinlock + { + public: + spinlock(); + ~spinlock(); - void lock(); - void unlock(); + void lock(); + void unlock(); -private: + private: #if defined(CRN_OS_LINUX) - pthread_spinlock_t m_spinlock; + pthread_spinlock_t m_spinlock; #elif defined(CRN_OS_DARWIN) - os_unfair_lock m_spinlock; + os_unfair_lock m_spinlock; #else - int m_spinlock; + int m_spinlock; #endif -}; - -class CRN_EXPORT scoped_spinlock { - scoped_spinlock(const scoped_spinlock&); - scoped_spinlock& operator=(const scoped_spinlock&); - - public: - inline scoped_spinlock(spinlock& lock) - : m_lock(lock) { m_lock.lock(); } - inline ~scoped_spinlock() { m_lock.unlock(); } - - private: - spinlock& m_lock; -}; - -template -class tsstack { - public: - inline tsstack() - : m_top(0) { - } - - inline ~tsstack() { - } - - inline void clear() { - m_spinlock.lock(); - m_top = 0; - m_spinlock.unlock(); - } - - inline bool try_push(const T& obj) { - bool result = false; - m_spinlock.lock(); - if (m_top < (int)cMaxSize) { - m_stack[m_top++] = obj; - result = true; - } - m_spinlock.unlock(); - return result; - } - - inline bool pop(T& obj) { - bool result = false; - m_spinlock.lock(); - if (m_top > 0) { - obj = m_stack[--m_top]; - result = true; - } - m_spinlock.unlock(); - return result; - } - - private: - spinlock m_spinlock; - T m_stack[cMaxSize]; - int m_top; -}; - -class CRN_EXPORT task_pool { - public: - task_pool(); - task_pool(uint num_threads); - ~task_pool(); - - enum { cMaxThreads = 16 }; - bool init(uint num_threads); - void deinit(); - - inline uint get_num_threads() const { return m_num_threads; } - inline uint32 get_num_outstanding_tasks() const { return m_total_submitted_tasks - m_total_completed_tasks; } - - // C-style task callback - typedef void (*task_callback_func)(uint64 data, void* pData_ptr); - bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = nullptr); - - class executable_task { - public: - virtual void execute_task(uint64 data, void* pData_ptr) = 0; - }; - - // It's the caller's responsibility to delete pObj within the execute_task() method, if needed! - bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = nullptr); - - template - inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = nullptr); - - template - inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = nullptr); - - void join(); - - private: - struct task { - inline task() - : m_data(0), m_pData_ptr(nullptr), m_pObj(nullptr), m_flags(0) {} - - uint64 m_data; - void* m_pData_ptr; - - union { - task_callback_func m_callback; - executable_task* m_pObj; }; - uint m_flags; - }; + class CRN_EXPORT scoped_spinlock + { + scoped_spinlock(const scoped_spinlock&); + scoped_spinlock& operator=(const scoped_spinlock&); + public: + inline scoped_spinlock(spinlock& lock): + m_lock(lock) + { + m_lock.lock(); + } + inline ~scoped_spinlock() + { + m_lock.unlock(); + } + + private: + spinlock& m_lock; + }; - tsstack m_task_stack; + template + class tsstack + { + public: + inline tsstack(): + m_top(0) + { + } + + inline ~tsstack() + { + } + + inline void clear() + { + m_spinlock.lock(); + m_top = 0; + m_spinlock.unlock(); + } + + inline bool try_push(const T& obj) + { + bool result = false; + m_spinlock.lock(); + if (m_top < (int)cMaxSize) + { + m_stack[m_top++] = obj; + result = true; + } + m_spinlock.unlock(); + return result; + } + + inline bool pop(T& obj) + { + bool result = false; + m_spinlock.lock(); + if (m_top > 0) + { + obj = m_stack[--m_top]; + result = true; + } + m_spinlock.unlock(); + return result; + } + + private: + spinlock m_spinlock; + T m_stack[cMaxSize]; + int m_top; + }; - uint m_num_threads; - pthread_t m_threads[cMaxThreads]; + class CRN_EXPORT task_pool + { + public: + task_pool(); + task_pool(uint num_threads); + ~task_pool(); - // Signalled whenever a task is queued up. - semaphore m_tasks_available; + enum { cMaxThreads = 16 }; + bool init(uint num_threads); + void deinit(); - // Signalled when all outstanding tasks are completed. - semaphore m_all_tasks_completed; + inline uint get_num_threads() const + { + return m_num_threads; + } + inline uint32 get_num_outstanding_tasks() const + { + return m_total_submitted_tasks - m_total_completed_tasks; + } - enum task_flags { - cTaskFlagObject = 1 - }; + // C-style task callback + typedef void (*task_callback_func)(uint64 data, void* pData_ptr); + bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = nullptr); - volatile atomic32_t m_total_submitted_tasks; - volatile atomic32_t m_total_completed_tasks; - volatile atomic32_t m_exit_flag; + class executable_task + { + public: + virtual void execute_task(uint64 data, void* pData_ptr) = 0; + }; - void process_task(task& tsk); + // It's the caller's responsibility to delete pObj within the execute_task() method, if needed! + bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = nullptr); - static void* thread_func(void* pContext); -}; + template + inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = nullptr); -enum object_task_flags { - cObjectTaskFlagDefault = 0, - cObjectTaskFlagDeleteAfterExecution = 1 -}; + template + inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = nullptr); -template -class object_task : public task_pool::executable_task { - public: - object_task(uint flags = cObjectTaskFlagDefault) - : m_pObject(nullptr), - m_pMethod(nullptr), - m_flags(flags) { - } + void join(); - typedef void (T::*object_method_ptr)(uint64 data, void* pData_ptr); + private: + struct task + { + inline task(): + m_data(0), + m_pData_ptr(nullptr), + m_pObj(nullptr), + m_flags(0) + { + } - object_task(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) - : m_pObject(pObject), - m_pMethod(pMethod), - m_flags(flags) { - CRNLIB_ASSERT(pObject && pMethod); - } + uint64 m_data; + void* m_pData_ptr; - void init(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) { - CRNLIB_ASSERT(pObject && pMethod); + union { + task_callback_func m_callback; + executable_task* m_pObj; + }; - m_pObject = pObject; - m_pMethod = pMethod; - m_flags = flags; - } + uint m_flags; + }; - T* get_object() const { return m_pObject; } - object_method_ptr get_method() const { return m_pMethod; } + tsstack m_task_stack; - virtual void execute_task(uint64 data, void* pData_ptr) { - (m_pObject->*m_pMethod)(data, pData_ptr); + uint m_num_threads; + pthread_t m_threads[cMaxThreads]; - if (m_flags & cObjectTaskFlagDeleteAfterExecution) - crnlib_delete(this); - } + // Signalled whenever a task is queued up. + semaphore m_tasks_available; - protected: - T* m_pObject; + // Signalled when all outstanding tasks are completed. + semaphore m_all_tasks_completed; - object_method_ptr m_pMethod; + enum task_flags + { + cTaskFlagObject = 1 + }; - uint m_flags; -}; + volatile atomic32_t m_total_submitted_tasks; + volatile atomic32_t m_total_completed_tasks; + volatile atomic32_t m_exit_flag; -template -inline bool task_pool::queue_object_task(S* pObject, T pObject_method, uint64 data, void* pData_ptr) { - object_task* pTask = crnlib_new >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); - if (!pTask) - return false; - return queue_task(pTask, data, pData_ptr); -} + void process_task(task& tsk); -template -inline bool task_pool::queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr) { - CRNLIB_ASSERT(pObject); - CRNLIB_ASSERT(num_tasks); - if (!num_tasks) - return true; + static void* thread_func(void* pContext); + }; - bool status = true; + enum object_task_flags + { + cObjectTaskFlagDefault = 0, + cObjectTaskFlagDeleteAfterExecution = 1 + }; - uint i; - for (i = 0; i < num_tasks; i++) { - task tsk; + template + class object_task : public task_pool::executable_task + { + public: + object_task(uint flags = cObjectTaskFlagDefault): + m_pObject(nullptr), + m_pMethod(nullptr), + m_flags(flags) + { + } + + typedef void (T::* object_method_ptr)(uint64 data, void* pData_ptr); + + object_task(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault): + m_pObject(pObject), + m_pMethod(pMethod), + m_flags(flags) + { + CRNLIB_ASSERT(pObject && pMethod); + } + + void init(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) + { + CRNLIB_ASSERT(pObject && pMethod); + + m_pObject = pObject; + m_pMethod = pMethod; + m_flags = flags; + } + + T* get_object() const + { + return m_pObject; + } + object_method_ptr get_method() const + { + return m_pMethod; + } + + virtual void execute_task(uint64 data, void* pData_ptr) + { + (m_pObject->*m_pMethod)(data, pData_ptr); + + if (m_flags & cObjectTaskFlagDeleteAfterExecution) + { + crnlib_delete(this); + } + } + + protected: + T* m_pObject; + + object_method_ptr m_pMethod; + + uint m_flags; + }; - tsk.m_pObj = crnlib_new >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); - if (!tsk.m_pObj) { - status = false; - break; + template + inline bool task_pool::queue_object_task(S* pObject, T pObject_method, uint64 data, void* pData_ptr) + { + object_task* pTask = crnlib_new >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); + if (!pTask) + { + return false; + } + return queue_task(pTask, data, pData_ptr); } - tsk.m_data = first_data + i; - tsk.m_pData_ptr = pData_ptr; - tsk.m_flags = cTaskFlagObject; - - atomic_increment32(&m_total_submitted_tasks); - - if (!m_task_stack.try_push(tsk)) { - atomic_increment32(&m_total_completed_tasks); - - status = false; - break; + template + inline bool task_pool::queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr) + { + CRNLIB_ASSERT(pObject); + CRNLIB_ASSERT(num_tasks); + if (!num_tasks) + { + return true; + } + + bool status = true; + + uint i; + for (i = 0; i < num_tasks; i++) + { + task tsk; + + tsk.m_pObj = crnlib_new >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); + if (!tsk.m_pObj) + { + status = false; + break; + } + + tsk.m_data = first_data + i; + tsk.m_pData_ptr = pData_ptr; + tsk.m_flags = cTaskFlagObject; + + atomic_increment32(&m_total_submitted_tasks); + + if (!m_task_stack.try_push(tsk)) + { + atomic_increment32(&m_total_completed_tasks); + + status = false; + break; + } + } + + if (i) + { + m_tasks_available.release(i); + } + + return status; } - } - - if (i) { - m_tasks_available.release(i); - } - - return status; -} - } // namespace crnlib #endif // CRNLIB_USE_PTHREADS_API diff --git a/crnlib/crn_threading_win32.cpp b/crnlib/crn_threading_win32.cpp index 3957ef0..53b9130 100644 --- a/crnlib/crn_threading_win32.cpp +++ b/crnlib/crn_threading_win32.cpp @@ -1,377 +1,482 @@ -// File: crn_win32_threading.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_threading_win32.h" #include "crn_winhdr.h" + #include -namespace crnlib { -uint g_number_of_processors = 1; +namespace crnlib +{ + uint g_number_of_processors = 1; -void crn_threading_init() { - SYSTEM_INFO g_system_info; - GetSystemInfo(&g_system_info); + void crn_threading_init() + { + SYSTEM_INFO g_system_info; + GetSystemInfo(&g_system_info); - g_number_of_processors = math::maximum(1U, g_system_info.dwNumberOfProcessors); -} + g_number_of_processors = math::maximum(1U, g_system_info.dwNumberOfProcessors); + } -crn_thread_id_t crn_get_current_thread_id() { - return static_cast(GetCurrentThreadId()); -} + crn_thread_id_t crn_get_current_thread_id() + { + return static_cast(GetCurrentThreadId()); + } -void crn_sleep(unsigned int milliseconds) { - Sleep(milliseconds); -} + void crn_sleep(unsigned int milliseconds) + { + Sleep(milliseconds); + } -uint crn_get_max_helper_threads() { - if (g_number_of_processors > 1) { - // use all CPU's - return CRNLIB_MIN((int)task_pool::cMaxThreads, (int)g_number_of_processors - 1); - } + uint crn_get_max_helper_threads() + { + if (g_number_of_processors > 1) + { + // use all CPU's + return CRNLIB_MIN((int)task_pool::cMaxThreads, (int)g_number_of_processors - 1); + } - return 0; -} + return 0; + } -mutex::mutex(unsigned int spin_count) { - CRNLIB_ASSUME(sizeof(mutex) >= sizeof(CRITICAL_SECTION)); + mutex::mutex(unsigned int spin_count) + { + CRNLIB_ASSUME(sizeof(mutex) >= sizeof(CRITICAL_SECTION)); - void* p = m_buf; - CRITICAL_SECTION& m_cs = *static_cast(p); + void* p = m_buf; + CRITICAL_SECTION& m_cs = *static_cast(p); - BOOL status = true; - status = InitializeCriticalSectionAndSpinCount(&m_cs, spin_count); - if (!status) - crnlib_fail("mutex::mutex: InitializeCriticalSectionAndSpinCount failed", __FILE__, __LINE__); + BOOL status = true; + status = InitializeCriticalSectionAndSpinCount(&m_cs, spin_count); + if (!status) + { + crnlib_fail("mutex::mutex: InitializeCriticalSectionAndSpinCount failed", __FILE__, __LINE__); + } #ifdef CRNLIB_BUILD_DEBUG - m_lock_count = 0; + m_lock_count = 0; #endif -} + } -mutex::~mutex() { - void* p = m_buf; - CRITICAL_SECTION& m_cs = *static_cast(p); + mutex::~mutex() + { + void* p = m_buf; + CRITICAL_SECTION& m_cs = *static_cast(p); #ifdef CRNLIB_BUILD_DEBUG - if (m_lock_count) - crnlib_assert("mutex::~mutex: mutex is still locked", __FILE__, __LINE__); + if (m_lock_count) + { + crnlib_assert("mutex::~mutex: mutex is still locked", __FILE__, __LINE__); + } #endif - DeleteCriticalSection(&m_cs); -} + DeleteCriticalSection(&m_cs); + } -void mutex::lock() { - void* p = m_buf; - CRITICAL_SECTION& m_cs = *static_cast(p); + void mutex::lock() + { + void* p = m_buf; + CRITICAL_SECTION& m_cs = *static_cast(p); - EnterCriticalSection(&m_cs); + EnterCriticalSection(&m_cs); #ifdef CRNLIB_BUILD_DEBUG - m_lock_count++; + m_lock_count++; #endif -} + } -void mutex::unlock() { - void* p = m_buf; - CRITICAL_SECTION& m_cs = *static_cast(p); + void mutex::unlock() + { + void* p = m_buf; + CRITICAL_SECTION& m_cs = *static_cast(p); #ifdef CRNLIB_BUILD_DEBUG - if (!m_lock_count) - crnlib_assert("mutex::unlock: mutex is not locked", __FILE__, __LINE__); - m_lock_count--; + if (!m_lock_count) + { + crnlib_assert("mutex::unlock: mutex is not locked", __FILE__, __LINE__); + } + m_lock_count--; #endif - LeaveCriticalSection(&m_cs); -} - -void mutex::set_spin_count(unsigned int count) { - void* p = m_buf; - CRITICAL_SECTION& m_cs = *static_cast(p); - - SetCriticalSectionSpinCount(&m_cs, count); -} - -void spinlock::lock(uint32 max_spins, bool yielding) { - if (g_number_of_processors <= 1) - max_spins = 1; - - uint32 spinCount = 0; - uint32 yieldCount = 0; - - for (;;) { - CRNLIB_ASSUME(sizeof(long) == sizeof(int32)); - if (!InterlockedExchange((volatile long*)&m_flag, TRUE)) - break; - - YieldProcessor(); - YieldProcessor(); - YieldProcessor(); - YieldProcessor(); - YieldProcessor(); - YieldProcessor(); - YieldProcessor(); - YieldProcessor(); - - spinCount++; - if ((yielding) && (spinCount >= max_spins)) { - switch (yieldCount) { - case 0: { - spinCount = 0; - - Sleep(0); - - yieldCount++; - break; + LeaveCriticalSection(&m_cs); + } + + void mutex::set_spin_count(unsigned int count) + { + void* p = m_buf; + CRITICAL_SECTION& m_cs = *static_cast(p); + + SetCriticalSectionSpinCount(&m_cs, count); + } + + void spinlock::lock(uint32 max_spins, bool yielding) + { + if (g_number_of_processors <= 1) + { + max_spins = 1; + } + + uint32 spinCount = 0; + uint32 yieldCount = 0; + + for (;;) + { + CRNLIB_ASSUME(sizeof(long) == sizeof(int32)); + if (!InterlockedExchange((volatile long*)&m_flag, TRUE)) + { + break; + } + + YieldProcessor(); + YieldProcessor(); + YieldProcessor(); + YieldProcessor(); + YieldProcessor(); + YieldProcessor(); + YieldProcessor(); + YieldProcessor(); + + spinCount++; + if ((yielding) && (spinCount >= max_spins)) + { + switch (yieldCount) + { + case 0: + { + spinCount = 0; + + Sleep(0); + + yieldCount++; + break; + } + case 1: + { + if (g_number_of_processors <= 1) + { + spinCount = 0; + } + else + { + spinCount = max_spins / 2; + } + + Sleep(1); + + yieldCount++; + break; + } + case 2: + { + if (g_number_of_processors <= 1) + { + spinCount = 0; + } + else + { + spinCount = max_spins; + } + + Sleep(2); + break; + } + } + } + } + + CRNLIB_MEMORY_IMPORT_BARRIER + } + + void spinlock::unlock() + { + CRNLIB_MEMORY_EXPORT_BARRIER + + InterlockedExchange((volatile long*)&m_flag, FALSE); + } + + semaphore::semaphore(int32 initialCount, int32 maximumCount, const char* pName) + { + m_handle = CreateSemaphoreA(nullptr, initialCount, maximumCount, pName); + if (nullptr == m_handle) + { + CRNLIB_FAIL("semaphore: CreateSemaphore() failed"); + } + } + + semaphore::~semaphore() + { + if (m_handle) + { + CloseHandle(m_handle); + m_handle = nullptr; + } + } + + void semaphore::release(int32 releaseCount, int32* pPreviousCount) + { + CRNLIB_ASSUME(sizeof(LONG) == sizeof(int32)); + if (0 == ReleaseSemaphore(m_handle, releaseCount, (LPLONG)pPreviousCount)) + { + CRNLIB_FAIL("semaphore: ReleaseSemaphore() failed"); + } + } + + bool semaphore::try_release(int32 releaseCount, int32* pPreviousCount) + { + CRNLIB_ASSUME(sizeof(LONG) == sizeof(int32)); + return ReleaseSemaphore(m_handle, releaseCount, (LPLONG)pPreviousCount) != 0; + } + + bool semaphore::wait(uint32 milliseconds) + { + uint32 result = WaitForSingleObject(m_handle, milliseconds); + + if (WAIT_FAILED == result) + { + CRNLIB_FAIL("semaphore: WaitForSingleObject() failed"); } - case 1: { - if (g_number_of_processors <= 1) - spinCount = 0; - else - spinCount = max_spins / 2; - Sleep(1); + return WAIT_OBJECT_0 == result; + } + + task_pool::task_pool() : + m_pTask_stack(crnlib_new()), + m_num_threads(0), + m_tasks_available(0, 32767), + m_all_tasks_completed(0, 1), + m_total_submitted_tasks(0), + m_total_completed_tasks(0), + m_exit_flag(false) + { + utils::zero_object(m_threads); + } + + task_pool::task_pool(uint num_threads) : + m_pTask_stack(crnlib_new()), + m_num_threads(0), + m_tasks_available(0, 32767), + m_all_tasks_completed(0, 1), + m_total_submitted_tasks(0), + m_total_completed_tasks(0), + m_exit_flag(false) + { + utils::zero_object(m_threads); + + bool status = init(num_threads); + CRNLIB_VERIFY(status); + } + + task_pool::~task_pool() + { + deinit(); + crnlib_delete(m_pTask_stack); + } + + bool task_pool::init(uint num_threads) + { + CRNLIB_ASSERT(num_threads <= cMaxThreads); + num_threads = math::minimum(num_threads, cMaxThreads); + + deinit(); + + bool succeeded = true; + + m_num_threads = 0; + while (m_num_threads < num_threads) + { + m_threads[m_num_threads] = (HANDLE)_beginthreadex(nullptr, 32768, thread_func, this, 0, nullptr); + CRNLIB_ASSERT(m_threads[m_num_threads] != 0); + + if (!m_threads[m_num_threads]) + { + succeeded = false; + break; + } + + m_num_threads++; + } + + if (!succeeded) + { + deinit(); + return false; + } + + return true; + } - yieldCount++; - break; + void task_pool::deinit() + { + if (m_num_threads) + { + join(); + + // Set exit flag, then release all threads. Each should wakeup and exit. + atomic_exchange32(&m_exit_flag, true); + + m_tasks_available.release(m_num_threads); + + // Now wait for each thread to exit. + for (uint i = 0; i < m_num_threads; i++) + { + if (m_threads[i]) + { + for (;;) + { + // Can be an INFINITE delay, but set at 30 seconds so this function always provably exits. + DWORD result = WaitForSingleObject(m_threads[i], 30000); + if ((result == WAIT_OBJECT_0) || (result == WAIT_ABANDONED)) + { + break; + } + } + + CloseHandle(m_threads[i]); + m_threads[i] = nullptr; + } + } + + m_num_threads = 0; + + atomic_exchange32(&m_exit_flag, false); } - case 2: { - if (g_number_of_processors <= 1) - spinCount = 0; - else - spinCount = max_spins; - - Sleep(2); - break; + + if (m_pTask_stack) + { + m_pTask_stack->clear(); } - } + m_total_submitted_tasks = 0; + m_total_completed_tasks = 0; } - } - - CRNLIB_MEMORY_IMPORT_BARRIER -} - -void spinlock::unlock() { - CRNLIB_MEMORY_EXPORT_BARRIER - - InterlockedExchange((volatile long*)&m_flag, FALSE); -} - -semaphore::semaphore(int32 initialCount, int32 maximumCount, const char* pName) { - m_handle = CreateSemaphoreA(nullptr, initialCount, maximumCount, pName); - if (nullptr == m_handle) { - CRNLIB_FAIL("semaphore: CreateSemaphore() failed"); - } -} - -semaphore::~semaphore() { - if (m_handle) { - CloseHandle(m_handle); - m_handle = nullptr; - } -} - -void semaphore::release(int32 releaseCount, int32* pPreviousCount) { - CRNLIB_ASSUME(sizeof(LONG) == sizeof(int32)); - if (0 == ReleaseSemaphore(m_handle, releaseCount, (LPLONG)pPreviousCount)) { - CRNLIB_FAIL("semaphore: ReleaseSemaphore() failed"); - } -} - -bool semaphore::try_release(int32 releaseCount, int32* pPreviousCount) { - CRNLIB_ASSUME(sizeof(LONG) == sizeof(int32)); - return ReleaseSemaphore(m_handle, releaseCount, (LPLONG)pPreviousCount) != 0; -} - -bool semaphore::wait(uint32 milliseconds) { - uint32 result = WaitForSingleObject(m_handle, milliseconds); - - if (WAIT_FAILED == result) { - CRNLIB_FAIL("semaphore: WaitForSingleObject() failed"); - } - - return WAIT_OBJECT_0 == result; -} - -task_pool::task_pool() - : m_pTask_stack(crnlib_new()), - m_num_threads(0), - m_tasks_available(0, 32767), - m_all_tasks_completed(0, 1), - m_total_submitted_tasks(0), - m_total_completed_tasks(0), - m_exit_flag(false) { - utils::zero_object(m_threads); -} - -task_pool::task_pool(uint num_threads) - : m_pTask_stack(crnlib_new()), - m_num_threads(0), - m_tasks_available(0, 32767), - m_all_tasks_completed(0, 1), - m_total_submitted_tasks(0), - m_total_completed_tasks(0), - m_exit_flag(false) { - utils::zero_object(m_threads); - - bool status = init(num_threads); - CRNLIB_VERIFY(status); -} - -task_pool::~task_pool() { - deinit(); - crnlib_delete(m_pTask_stack); -} - -bool task_pool::init(uint num_threads) { - CRNLIB_ASSERT(num_threads <= cMaxThreads); - num_threads = math::minimum(num_threads, cMaxThreads); - - deinit(); - - bool succeeded = true; - - m_num_threads = 0; - while (m_num_threads < num_threads) { - m_threads[m_num_threads] = (HANDLE)_beginthreadex(nullptr, 32768, thread_func, this, 0, nullptr); - CRNLIB_ASSERT(m_threads[m_num_threads] != 0); - - if (!m_threads[m_num_threads]) { - succeeded = false; - break; + + bool task_pool::queue_task(task_callback_func pFunc, uint64 data, void* pData_ptr) + { + CRNLIB_ASSERT(pFunc); + + task tsk; + tsk.m_callback = pFunc; + tsk.m_data = data; + tsk.m_pData_ptr = pData_ptr; + tsk.m_flags = 0; + + atomic_increment32(&m_total_submitted_tasks); + + if (!m_pTask_stack->try_push(tsk)) + { + atomic_increment32(&m_total_completed_tasks); + return false; + } + + m_tasks_available.release(1); + + return true; } - m_num_threads++; - } + // It's the object's responsibility to delete pObj within the execute_task() method, if needed! + bool task_pool::queue_task(executable_task* pObj, uint64 data, void* pData_ptr) + { + CRNLIB_ASSERT(pObj); - if (!succeeded) { - deinit(); - return false; - } + task tsk; + tsk.m_pObj = pObj; + tsk.m_data = data; + tsk.m_pData_ptr = pData_ptr; + tsk.m_flags = cTaskFlagObject; - return true; -} + atomic_increment32(&m_total_submitted_tasks); -void task_pool::deinit() { - if (m_num_threads) { - join(); + if (!m_pTask_stack->try_push(tsk)) + { + atomic_increment32(&m_total_completed_tasks); + return false; + } - // Set exit flag, then release all threads. Each should wakeup and exit. - atomic_exchange32(&m_exit_flag, true); + m_tasks_available.release(1); - m_tasks_available.release(m_num_threads); + return true; + } - // Now wait for each thread to exit. - for (uint i = 0; i < m_num_threads; i++) { - if (m_threads[i]) { - for (;;) { - // Can be an INFINITE delay, but set at 30 seconds so this function always provably exits. - DWORD result = WaitForSingleObject(m_threads[i], 30000); - if ((result == WAIT_OBJECT_0) || (result == WAIT_ABANDONED)) - break; + void task_pool::process_task(task& tsk) + { + if (tsk.m_flags & cTaskFlagObject) + { + tsk.m_pObj->execute_task(tsk.m_data, tsk.m_pData_ptr); + } + else + { + tsk.m_callback(tsk.m_data, tsk.m_pData_ptr); } - CloseHandle(m_threads[i]); - m_threads[i] = nullptr; - } + if (atomic_increment32(&m_total_completed_tasks) == m_total_submitted_tasks) + { + // Try to signal the semaphore (the max count is 1 so this may actually fail). + m_all_tasks_completed.try_release(); + } } - m_num_threads = 0; + void task_pool::join() + { + // Try to steal any outstanding tasks. This could cause one or more worker threads to wake up and immediately go back to sleep, which is wasteful but should be harmless. + task tsk; + while (m_pTask_stack->pop(tsk)) + { + process_task(tsk); + } - atomic_exchange32(&m_exit_flag, false); - } - - if (m_pTask_stack) - m_pTask_stack->clear(); - m_total_submitted_tasks = 0; - m_total_completed_tasks = 0; -} - -bool task_pool::queue_task(task_callback_func pFunc, uint64 data, void* pData_ptr) { - CRNLIB_ASSERT(pFunc); - - task tsk; - tsk.m_callback = pFunc; - tsk.m_data = data; - tsk.m_pData_ptr = pData_ptr; - tsk.m_flags = 0; - - atomic_increment32(&m_total_submitted_tasks); - - if (!m_pTask_stack->try_push(tsk)) { - atomic_increment32(&m_total_completed_tasks); - return false; - } - - m_tasks_available.release(1); - - return true; -} - -// It's the object's responsibility to delete pObj within the execute_task() method, if needed! -bool task_pool::queue_task(executable_task* pObj, uint64 data, void* pData_ptr) { - CRNLIB_ASSERT(pObj); - - task tsk; - tsk.m_pObj = pObj; - tsk.m_data = data; - tsk.m_pData_ptr = pData_ptr; - tsk.m_flags = cTaskFlagObject; - - atomic_increment32(&m_total_submitted_tasks); - - if (!m_pTask_stack->try_push(tsk)) { - atomic_increment32(&m_total_completed_tasks); - return false; - } - - m_tasks_available.release(1); - - return true; -} - -void task_pool::process_task(task& tsk) { - if (tsk.m_flags & cTaskFlagObject) - tsk.m_pObj->execute_task(tsk.m_data, tsk.m_pData_ptr); - else - tsk.m_callback(tsk.m_data, tsk.m_pData_ptr); - - if (atomic_increment32(&m_total_completed_tasks) == m_total_submitted_tasks) { - // Try to signal the semaphore (the max count is 1 so this may actually fail). - m_all_tasks_completed.try_release(); - } -} - -void task_pool::join() { - // Try to steal any outstanding tasks. This could cause one or more worker threads to wake up and immediately go back to sleep, which is wasteful but should be harmless. - task tsk; - while (m_pTask_stack->pop(tsk)) - process_task(tsk); - - // At this point the task stack is empty. - // Now wait for all concurrent tasks to complete. The m_all_tasks_completed semaphore has a max count of 1, so it's possible it could have saturated to 1 as the tasks - // where issued and asynchronously completed, so this loop may iterate a few times. - const int total_submitted_tasks = atomic_add32(&m_total_submitted_tasks, 0); - while (m_total_completed_tasks != total_submitted_tasks) { - // If the previous (m_total_completed_tasks != total_submitted_tasks) check failed the semaphore MUST be eventually signalled once the last task completes. - // So I think this can actually be an INFINITE delay, but it shouldn't really matter if it's 1ms. - m_all_tasks_completed.wait(1); - } -} - -unsigned __stdcall task_pool::thread_func(void* pContext) { - task_pool* pPool = static_cast(pContext); - - for (;;) { - if (!pPool->m_tasks_available.wait()) - break; - - if (pPool->m_exit_flag) - break; - - task tsk; - if (pPool->m_pTask_stack->pop(tsk)) - pPool->process_task(tsk); - } - - _endthreadex(0); - return 0; -} - -} // namespace crnlib + // At this point the task stack is empty. + // Now wait for all concurrent tasks to complete. The m_all_tasks_completed semaphore has a max count of 1, so it's possible it could have saturated to 1 as the tasks + // where issued and asynchronously completed, so this loop may iterate a few times. + const int total_submitted_tasks = atomic_add32(&m_total_submitted_tasks, 0); + while (m_total_completed_tasks != total_submitted_tasks) + { + // If the previous (m_total_completed_tasks != total_submitted_tasks) check failed the semaphore MUST be eventually signalled once the last task completes. + // So I think this can actually be an INFINITE delay, but it shouldn't really matter if it's 1ms. + m_all_tasks_completed.wait(1); + } + } + + unsigned __stdcall task_pool::thread_func(void* pContext) + { + task_pool* pPool = static_cast(pContext); + + for (;;) + { + if (!pPool->m_tasks_available.wait()) + { + break; + } + + if (pPool->m_exit_flag) + { + break; + } + + task tsk; + if (pPool->m_pTask_stack->pop(tsk)) + { + pPool->process_task(tsk); + } + } + + _endthreadex(0); + return 0; + } +} // namespace crnlib diff --git a/crnlib/crn_threading_win32.h b/crnlib/crn_threading_win32.h index 2b4b4f2..620c78e 100644 --- a/crnlib/crn_threading_win32.h +++ b/crnlib/crn_threading_win32.h @@ -1,5 +1,26 @@ -// File: crn_win32_threading.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_atomics.h" @@ -9,380 +30,474 @@ #include "crn_export.h" -namespace crnlib { -// g_number_of_processors defaults to 1. Will be higher on multicore machines. +namespace crnlib +{ + // g_number_of_processors defaults to 1. Will be higher on multicore machines. CRN_EXPORT extern uint g_number_of_processors; CRN_EXPORT void crn_threading_init(); -typedef uint64 crn_thread_id_t; -CRN_EXPORT crn_thread_id_t crn_get_current_thread_id(); + typedef uint64 crn_thread_id_t; + CRN_EXPORT crn_thread_id_t crn_get_current_thread_id(); -CRN_EXPORT void crn_sleep(unsigned int milliseconds); + CRN_EXPORT void crn_sleep(unsigned int milliseconds); -CRN_EXPORT uint crn_get_max_helper_threads(); + CRN_EXPORT uint crn_get_max_helper_threads(); -class CRN_EXPORT mutex { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(mutex); + class CRN_EXPORT mutex + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(mutex); - public: - mutex(unsigned int spin_count = 0); - ~mutex(); - void lock(); - void unlock(); - void set_spin_count(unsigned int count); + public: + mutex(unsigned int spin_count = 0); + ~mutex(); + void lock(); + void unlock(); + void set_spin_count(unsigned int count); - private: - int m_buf[12]; + private: + int m_buf[12]; #ifdef CRNLIB_BUILD_DEBUG - unsigned int m_lock_count; + unsigned int m_lock_count; #endif -}; - -class CRN_EXPORT scoped_mutex { - scoped_mutex(const scoped_mutex&); - scoped_mutex& operator=(const scoped_mutex&); - - public: - inline scoped_mutex(mutex& m) - : m_mutex(m) { m_mutex.lock(); } - inline ~scoped_mutex() { m_mutex.unlock(); } - - private: - mutex& m_mutex; -}; - -// Simple non-recursive spinlock. -class CRN_EXPORT spinlock { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(spinlock); - - public: - inline spinlock() - : m_flag(0) {} - - void lock(uint32 max_spins = 4096, bool yielding = true); - - inline void lock_no_barrier(uint32 max_spins = 4096, bool yielding = true) { lock(max_spins, yielding); } - - void unlock(); - - inline void unlock_no_barrier() { m_flag = CRNLIB_FALSE; } - - private: - volatile int32 m_flag; -}; - -class CRN_EXPORT scoped_spinlock { - scoped_spinlock(const scoped_spinlock&); - scoped_spinlock& operator=(const scoped_spinlock&); - - public: - inline scoped_spinlock(spinlock& lock) - : m_lock(lock) { m_lock.lock(); } - inline ~scoped_spinlock() { m_lock.unlock(); } - - private: - spinlock& m_lock; -}; - -class CRN_EXPORT semaphore { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(semaphore); - - public: - semaphore(int32 initialCount = 0, int32 maximumCount = 1, const char* pName = nullptr); - - ~semaphore(); - - inline HANDLE get_handle(void) const { return m_handle; } - - void release(int32 releaseCount = 1, int32* pPreviousCount = nullptr); - bool try_release(int32 releaseCount = 1, int32* pPreviousCount = nullptr); - - bool wait(uint32 milliseconds = cUINT32_MAX); - - private: - HANDLE m_handle; -}; - -template -class tsstack { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(tsstack); - - public: - inline tsstack(bool use_freelist = true) - : m_use_freelist(use_freelist) { - CRNLIB_VERIFY(((ptr_bits_t)this & (CRNLIB_GET_ALIGNMENT(tsstack) - 1)) == 0); - InitializeSListHead(&m_stack_head); - InitializeSListHead(&m_freelist_head); - } - - inline ~tsstack() { - clear(); - } - - inline void clear() { - for (;;) { - node* pNode = (node*)InterlockedPopEntrySList(&m_stack_head); - if (!pNode) - break; - - CRNLIB_MEMORY_IMPORT_BARRIER - - helpers::destruct(&pNode->m_obj); - - crnlib_free(pNode); - } - - flush_freelist(); - } - - inline void flush_freelist() { - if (!m_use_freelist) - return; - - for (;;) { - node* pNode = (node*)InterlockedPopEntrySList(&m_freelist_head); - if (!pNode) - break; - - CRNLIB_MEMORY_IMPORT_BARRIER - - crnlib_free(pNode); - } - } - - inline bool try_push(const T& obj) { - node* pNode = alloc_node(); - if (!pNode) - return false; - - helpers::construct(&pNode->m_obj, obj); - - CRNLIB_MEMORY_EXPORT_BARRIER - - InterlockedPushEntrySList(&m_stack_head, &pNode->m_slist_entry); - - return true; - } - - inline bool pop(T& obj) { - node* pNode = (node*)InterlockedPopEntrySList(&m_stack_head); - if (!pNode) - return false; - - CRNLIB_MEMORY_IMPORT_BARRIER - - obj = pNode->m_obj; - - helpers::destruct(&pNode->m_obj); - - free_node(pNode); - - return true; - } - - private: - SLIST_HEADER m_stack_head; - SLIST_HEADER m_freelist_head; - - struct node { - SLIST_ENTRY m_slist_entry; - T m_obj; - }; + }; - bool m_use_freelist; + class CRN_EXPORT scoped_mutex + { + scoped_mutex(const scoped_mutex&); + scoped_mutex& operator=(const scoped_mutex&); + + public: + inline scoped_mutex(mutex& m) : + m_mutex(m) + { + m_mutex.lock(); + } + inline ~scoped_mutex() + { + m_mutex.unlock(); + } + + private: + mutex& m_mutex; + }; - inline node* alloc_node() { - node* pNode = m_use_freelist ? (node*)InterlockedPopEntrySList(&m_freelist_head) : nullptr; + // Simple non-recursive spinlock. + class CRN_EXPORT spinlock + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(spinlock); - if (!pNode) - pNode = (node*)crnlib_malloc(sizeof(node)); + public: + inline spinlock() : + m_flag(0) + { + } - return pNode; - } + void lock(uint32 max_spins = 4096, bool yielding = true); - inline void free_node(node* pNode) { - if (m_use_freelist) - InterlockedPushEntrySList(&m_freelist_head, &pNode->m_slist_entry); - else - crnlib_free(pNode); - } -}; + inline void lock_no_barrier(uint32 max_spins = 4096, bool yielding = true) + { + lock(max_spins, yielding); + } -// Simple multithreaded task pool. This class assumes a single global thread will be issuing tasks and joining. -class CRN_EXPORT task_pool { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(task_pool); + void unlock(); - public: - task_pool(); - task_pool(uint num_threads); - ~task_pool(); + inline void unlock_no_barrier() + { + m_flag = CRNLIB_FALSE; + } - enum { cMaxThreads = 16 }; - bool init(uint num_threads); - void deinit(); + private: + volatile int32 m_flag; + }; - inline uint get_num_threads() const { return m_num_threads; } - inline uint32 get_num_outstanding_tasks() const { return m_total_submitted_tasks - m_total_completed_tasks; } + class CRN_EXPORT scoped_spinlock + { + scoped_spinlock(const scoped_spinlock&); + scoped_spinlock& operator=(const scoped_spinlock&); + + public: + inline scoped_spinlock(spinlock& lock) : + m_lock(lock) + { + m_lock.lock(); + } + inline ~scoped_spinlock() + { + m_lock.unlock(); + } + + private: + spinlock& m_lock; + }; - // C-style task callback - typedef void (*task_callback_func)(uint64 data, void* pData_ptr); - bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = nullptr); + class CRN_EXPORT semaphore + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(semaphore); - class executable_task { - public: - virtual void execute_task(uint64 data, void* pData_ptr) = 0; - }; + public: + semaphore(int32 initialCount = 0, int32 maximumCount = 1, const char* pName = nullptr); - // It's the caller's responsibility to delete pObj within the execute_task() method, if needed! - bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = nullptr); + ~semaphore(); - template - inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = nullptr); + inline HANDLE get_handle(void) const + { + return m_handle; + } - template - inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = nullptr); + void release(int32 releaseCount = 1, int32* pPreviousCount = nullptr); + bool try_release(int32 releaseCount = 1, int32* pPreviousCount = nullptr); - // Waits for all outstanding tasks (if any) to complete. - // The calling thread will steal any outstanding tasks from worker threads, if possible. - void join(); + bool wait(uint32 milliseconds = cUINT32_MAX); - private: - struct task { - //inline task() : m_data(0), m_pData_ptr(nullptr), m_pObj(nullptr), m_flags(0) { } + private: + HANDLE m_handle; + }; - uint64 m_data; - void* m_pData_ptr; + template + class tsstack + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(tsstack); + + public: + inline tsstack(bool use_freelist = true) : + m_use_freelist(use_freelist) + { + CRNLIB_VERIFY(((ptr_bits_t)this & (CRNLIB_GET_ALIGNMENT(tsstack) - 1)) == 0); + InitializeSListHead(&m_stack_head); + InitializeSListHead(&m_freelist_head); + } - union { - task_callback_func m_callback; - executable_task* m_pObj; + inline ~tsstack() + { + clear(); + } + + inline void clear() + { + for (;;) + { + node* pNode = (node*)InterlockedPopEntrySList(&m_stack_head); + if (!pNode) + { + break; + } + + CRNLIB_MEMORY_IMPORT_BARRIER + + helpers::destruct(&pNode->m_obj); + + crnlib_free(pNode); + } + + flush_freelist(); + } + + inline void flush_freelist() + { + if (!m_use_freelist) + { + return; + } + + for (;;) + { + node* pNode = (node*)InterlockedPopEntrySList(&m_freelist_head); + if (!pNode) + { + break; + } + + CRNLIB_MEMORY_IMPORT_BARRIER + + crnlib_free(pNode); + } + } + + inline bool try_push(const T& obj) + { + node* pNode = alloc_node(); + if (!pNode) + { + return false; + } + + helpers::construct(&pNode->m_obj, obj); + + CRNLIB_MEMORY_EXPORT_BARRIER + + InterlockedPushEntrySList(&m_stack_head, &pNode->m_slist_entry); + + return true; + } + + inline bool pop(T& obj) + { + node* pNode = (node*)InterlockedPopEntrySList(&m_stack_head); + if (!pNode) + { + return false; + } + + CRNLIB_MEMORY_IMPORT_BARRIER + + obj = pNode->m_obj; + + helpers::destruct(&pNode->m_obj); + + free_node(pNode); + + return true; + } + + private: + SLIST_HEADER m_stack_head; + SLIST_HEADER m_freelist_head; + + struct node + { + SLIST_ENTRY m_slist_entry; + T m_obj; + }; + + bool m_use_freelist; + + inline node* alloc_node() + { + node* pNode = m_use_freelist ? (node*)InterlockedPopEntrySList(&m_freelist_head) : nullptr; + + if (!pNode) + { + pNode = (node*)crnlib_malloc(sizeof(node)); + } + + return pNode; + } + + inline void free_node(node* pNode) + { + if (m_use_freelist) + { + InterlockedPushEntrySList(&m_freelist_head, &pNode->m_slist_entry); + } + else + { + crnlib_free(pNode); + } + } }; - uint m_flags; - }; - - typedef tsstack ts_task_stack_t; - ts_task_stack_t* m_pTask_stack; + // Simple multithreaded task pool. This class assumes a single global thread will be issuing tasks and joining. + class CRN_EXPORT task_pool + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(task_pool); - uint m_num_threads; - HANDLE m_threads[cMaxThreads]; + public: + task_pool(); + task_pool(uint num_threads); + ~task_pool(); - // Signalled whenever a task is queued up. - semaphore m_tasks_available; + enum + { + cMaxThreads = 16 + }; + bool init(uint num_threads); + void deinit(); - // Signalled when all outstanding tasks are completed. - semaphore m_all_tasks_completed; + inline uint get_num_threads() const + { + return m_num_threads; + } + inline uint32 get_num_outstanding_tasks() const + { + return m_total_submitted_tasks - m_total_completed_tasks; + } - enum task_flags { - cTaskFlagObject = 1 - }; + // C-style task callback + typedef void (*task_callback_func)(uint64 data, void* pData_ptr); + bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = nullptr); - volatile atomic32_t m_total_submitted_tasks; - volatile atomic32_t m_total_completed_tasks; - volatile atomic32_t m_exit_flag; + class executable_task + { + public: + virtual void execute_task(uint64 data, void* pData_ptr) = 0; + }; - void process_task(task& tsk); + // It's the caller's responsibility to delete pObj within the execute_task() method, if needed! + bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = nullptr); - static unsigned __stdcall thread_func(void* pContext); -}; + template + inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = nullptr); -enum object_task_flags { - cObjectTaskFlagDefault = 0, - cObjectTaskFlagDeleteAfterExecution = 1 -}; + template + inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = nullptr); -template -class object_task : public task_pool::executable_task { - public: - object_task(uint flags = cObjectTaskFlagDefault) - : m_pObject(nullptr), - m_pMethod(nullptr), - m_flags(flags) { - } + // Waits for all outstanding tasks (if any) to complete. + // The calling thread will steal any outstanding tasks from worker threads, if possible. + void join(); - typedef void (T::*object_method_ptr)(uint64 data, void* pData_ptr); + private: + struct task + { + //inline task() : m_data(0), m_pData_ptr(nullptr), m_pObj(nullptr), m_flags(0) { } - object_task(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) - : m_pObject(pObject), - m_pMethod(pMethod), - m_flags(flags) { - CRNLIB_ASSERT(pObject && pMethod); - } + uint64 m_data; + void* m_pData_ptr; - void init(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) { - CRNLIB_ASSERT(pObject && pMethod); + union + { + task_callback_func m_callback; + executable_task* m_pObj; + }; - m_pObject = pObject; - m_pMethod = pMethod; - m_flags = flags; - } + uint m_flags; + }; - T* get_object() const { return m_pObject; } - object_method_ptr get_method() const { return m_pMethod; } + typedef tsstack ts_task_stack_t; + ts_task_stack_t* m_pTask_stack; - virtual void execute_task(uint64 data, void* pData_ptr) { - (m_pObject->*m_pMethod)(data, pData_ptr); + uint m_num_threads; + HANDLE m_threads[cMaxThreads]; - if (m_flags & cObjectTaskFlagDeleteAfterExecution) - crnlib_delete(this); - } + // Signalled whenever a task is queued up. + semaphore m_tasks_available; - protected: - T* m_pObject; + // Signalled when all outstanding tasks are completed. + semaphore m_all_tasks_completed; - object_method_ptr m_pMethod; + enum task_flags + { + cTaskFlagObject = 1 + }; - uint m_flags; -}; + volatile atomic32_t m_total_submitted_tasks; + volatile atomic32_t m_total_completed_tasks; + volatile atomic32_t m_exit_flag; -template -inline bool task_pool::queue_object_task(S* pObject, T pObject_method, uint64 data, void* pData_ptr) { - object_task* pTask = crnlib_new >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); - if (!pTask) - return false; - return queue_task(pTask, data, pData_ptr); -} + void process_task(task& tsk); -template -inline bool task_pool::queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr) { - CRNLIB_ASSERT(pObject); - CRNLIB_ASSERT(num_tasks); - if (!num_tasks) - return true; + static unsigned __stdcall thread_func(void* pContext); + }; - bool status = true; + enum object_task_flags + { + cObjectTaskFlagDefault = 0, + cObjectTaskFlagDeleteAfterExecution = 1 + }; - uint i; - for (i = 0; i < num_tasks; i++) { - task tsk; + template + class object_task : public task_pool::executable_task + { + public: + object_task(uint flags = cObjectTaskFlagDefault) : + m_pObject(nullptr), + m_pMethod(nullptr), + m_flags(flags) + { + } + + typedef void (T::*object_method_ptr)(uint64 data, void* pData_ptr); + + object_task(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) : + m_pObject(pObject), + m_pMethod(pMethod), + m_flags(flags) + { + CRNLIB_ASSERT(pObject && pMethod); + } + + void init(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) + { + CRNLIB_ASSERT(pObject && pMethod); + + m_pObject = pObject; + m_pMethod = pMethod; + m_flags = flags; + } + + T* get_object() const + { + return m_pObject; + } + object_method_ptr get_method() const + { + return m_pMethod; + } + + virtual void execute_task(uint64 data, void* pData_ptr) + { + (m_pObject->*m_pMethod)(data, pData_ptr); + + if (m_flags & cObjectTaskFlagDeleteAfterExecution) + { + crnlib_delete(this); + } + } + + protected: + T* m_pObject; + + object_method_ptr m_pMethod; + + uint m_flags; + }; - tsk.m_pObj = crnlib_new >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); - if (!tsk.m_pObj) { - status = false; - break; + template + inline bool task_pool::queue_object_task(S* pObject, T pObject_method, uint64 data, void* pData_ptr) + { + object_task* pTask = crnlib_new>(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); + if (!pTask) + { + return false; + } + return queue_task(pTask, data, pData_ptr); } - tsk.m_data = first_data + i; - tsk.m_pData_ptr = pData_ptr; - tsk.m_flags = cTaskFlagObject; - - atomic_increment32(&m_total_submitted_tasks); - - if (!m_pTask_stack->try_push(tsk)) { - atomic_increment32(&m_total_completed_tasks); - - status = false; - break; + template + inline bool task_pool::queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr) + { + CRNLIB_ASSERT(pObject); + CRNLIB_ASSERT(num_tasks); + if (!num_tasks) + { + return true; + } + + bool status = true; + + uint i; + for (i = 0; i < num_tasks; i++) + { + task tsk; + + tsk.m_pObj = crnlib_new>(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); + if (!tsk.m_pObj) + { + status = false; + break; + } + + tsk.m_data = first_data + i; + tsk.m_pData_ptr = pData_ptr; + tsk.m_flags = cTaskFlagObject; + + atomic_increment32(&m_total_submitted_tasks); + + if (!m_pTask_stack->try_push(tsk)) + { + atomic_increment32(&m_total_completed_tasks); + + status = false; + break; + } + } + + if (i) + { + m_tasks_available.release(i); + } + + return status; } - } - - if (i) { - m_tasks_available.release(i); - } - - return status; -} - -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_timer.cpp b/crnlib/crn_timer.cpp index 0941828..eb7bd76 100644 --- a/crnlib/crn_timer.cpp +++ b/crnlib/crn_timer.cpp @@ -1,5 +1,25 @@ -// File: crn_win32_timer.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include @@ -23,6 +43,7 @@ namespace crnlib { QueryPerformanceCounter(reinterpret_cast(pTicks)); } + inline void query_counter_frequency(timer_ticks* pTicks) { QueryPerformanceFrequency(reinterpret_cast(pTicks)); @@ -171,5 +192,4 @@ namespace crnlib return ticks * g_inv_freq; } - -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_timer.h b/crnlib/crn_timer.h index 895b9ab..0f1ac83 100644 --- a/crnlib/crn_timer.h +++ b/crnlib/crn_timer.h @@ -1,5 +1,25 @@ -// File: crn_win32_timer.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once diff --git a/crnlib/crn_traits.h b/crnlib/crn_traits.h index 8031b03..2926f29 100644 --- a/crnlib/crn_traits.h +++ b/crnlib/crn_traits.h @@ -1,135 +1,241 @@ -// File: crn_traits.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once namespace crnlib { -template -struct int_traits { - enum { cMin = crnlib::cINT32_MIN, - cMax = crnlib::cINT32_MAX, - cSigned = true }; -}; - -template <> -struct int_traits { - enum { cMin = crnlib::cINT8_MIN, - cMax = crnlib::cINT8_MAX, - cSigned = true }; -}; -template <> -struct int_traits { - enum { cMin = crnlib::cINT16_MIN, - cMax = crnlib::cINT16_MAX, - cSigned = true }; -}; -template <> -struct int_traits { - enum { cMin = crnlib::cINT32_MIN, - cMax = crnlib::cINT32_MAX, - cSigned = true }; -}; - -template <> -struct int_traits { - enum { cMin = 0, - cMax = crnlib::cUINT8_MAX, - cSigned = false }; -}; -template <> -struct int_traits { - enum { cMin = 0, - cMax = crnlib::cUINT16_MAX, - cSigned = false }; -}; -template <> -struct int_traits { - enum { cMin = 0, - cMax = crnlib::cUINT32_MAX, - cSigned = false }; -}; -template -struct scalar_type { - enum { cFlag = false }; - static inline void construct(T* p) { helpers::construct(p); } - static inline void construct(T* p, const T& init) { helpers::construct(p, init); } - static inline void construct_array(T* p, uint n) { helpers::construct_array(p, n); } - static inline void destruct(T* p) { helpers::destruct(p); } - static inline void destruct_array(T* p, uint n) { helpers::destruct_array(p, n); } -}; - -template -struct scalar_type { - enum { cFlag = true }; - static inline void construct(T** p) { memset(p, 0, sizeof(T*)); } - static inline void construct(T** p, T* init) { *p = init; } - static inline void construct_array(T** p, uint n) { memset(p, 0, sizeof(T*) * n); } - static inline void destruct(T**) {} - static inline void destruct_array(T**, uint) {} -}; - -#define CRNLIB_DEFINE_BUILT_IN_TYPE(X) \ - template <> \ - struct scalar_type { \ - enum { cFlag = true }; \ - static inline void construct(X* p) { memset(p, 0, sizeof(X)); } \ - static inline void construct(X* p, const X& init) { memcpy(p, &init, sizeof(X)); } \ - static inline void construct_array(X* p, uint n) { memset(p, 0, sizeof(X) * n); } \ - static inline void destruct(X*) {} \ - static inline void destruct_array(X*, uint) {} \ - }; - -CRNLIB_DEFINE_BUILT_IN_TYPE(bool) -CRNLIB_DEFINE_BUILT_IN_TYPE(char) -CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned char) -CRNLIB_DEFINE_BUILT_IN_TYPE(short) -CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned short) -CRNLIB_DEFINE_BUILT_IN_TYPE(int) -CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned int) -CRNLIB_DEFINE_BUILT_IN_TYPE(long) -CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned long) + template + struct int_traits + { + enum + { + cMin = crnlib::cINT32_MIN, + cMax = crnlib::cINT32_MAX, + cSigned = true + }; + }; + + template<> + struct int_traits + { + enum + { + cMin = crnlib::cINT8_MIN, + cMax = crnlib::cINT8_MAX, + cSigned = true + }; + }; + + template<> + struct int_traits + { + enum + { + cMin = crnlib::cINT16_MIN, + cMax = crnlib::cINT16_MAX, + cSigned = true + }; + }; + + template<> + struct int_traits + { + enum + { + cMin = crnlib::cINT32_MIN, + cMax = crnlib::cINT32_MAX, + cSigned = true + }; + }; + + template<> + struct int_traits + { + enum + { + cMin = 0, + cMax = crnlib::cUINT8_MAX, + cSigned = false + }; + }; + + template<> + struct int_traits + { + enum + { + cMin = 0, + cMax = crnlib::cUINT16_MAX, + cSigned = false + }; + }; + + template<> + struct int_traits + { + enum + { + cMin = 0, + cMax = crnlib::cUINT32_MAX, + cSigned = false + }; + }; + + template + struct scalar_type + { + enum { cFlag = false }; + + static inline void construct(T* p) + { + helpers::construct(p); + } + + static inline void construct(T* p, const T& init) + { + helpers::construct(p, init); + } + + static inline void construct_array(T* p, uint n) + { + helpers::construct_array(p, n); + } + + static inline void destruct(T* p) + { + helpers::destruct(p); + } + + static inline void destruct_array(T* p, uint n) + { + helpers::destruct_array(p, n); + } + }; + + template + struct scalar_type + { + enum { cFlag = true }; + + static inline void construct(T** p) + { + memset(p, 0, sizeof(T*)); + } + + static inline void construct(T** p, T* init) + { + *p = init; + } + + static inline void construct_array(T** p, uint n) + { + memset(p, 0, sizeof(T*) * n); + } + + static inline void destruct(T**) + { + } + + static inline void destruct_array(T**, uint) + { + } + }; + +#define CRNLIB_DEFINE_BUILT_IN_TYPE(X) \ + template <> \ + struct scalar_type { \ + enum { cFlag = true }; \ + static inline void construct(X* p) { memset(p, 0, sizeof(X)); } \ + static inline void construct(X* p, const X& init) { memcpy(p, &init, sizeof(X)); } \ + static inline void construct_array(X* p, uint n) { memset(p, 0, sizeof(X) * n); } \ + static inline void destruct(X*) {} \ + static inline void destruct_array(X*, uint) {} \ + }; + + CRNLIB_DEFINE_BUILT_IN_TYPE(bool) + + CRNLIB_DEFINE_BUILT_IN_TYPE(char) + + CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned char) + + CRNLIB_DEFINE_BUILT_IN_TYPE(short) + + CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned short) + + CRNLIB_DEFINE_BUILT_IN_TYPE(int) + + CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned int) + + CRNLIB_DEFINE_BUILT_IN_TYPE(long) + + CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned long) #ifdef __GNUC__ -CRNLIB_DEFINE_BUILT_IN_TYPE(long long) -CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned long long) + CRNLIB_DEFINE_BUILT_IN_TYPE(long long) + CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned long long) #else -CRNLIB_DEFINE_BUILT_IN_TYPE(__int64) -CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned __int64) + CRNLIB_DEFINE_BUILT_IN_TYPE(__int64) + + CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned __int64) #endif -CRNLIB_DEFINE_BUILT_IN_TYPE(float) -CRNLIB_DEFINE_BUILT_IN_TYPE(double) -CRNLIB_DEFINE_BUILT_IN_TYPE(long double) + CRNLIB_DEFINE_BUILT_IN_TYPE(float) -#undef CRNLIB_DEFINE_BUILT_IN_TYPE + CRNLIB_DEFINE_BUILT_IN_TYPE(double) -// See: http://erdani.org/publications/cuj-2004-06.pdf + CRNLIB_DEFINE_BUILT_IN_TYPE(long double) -template -struct bitwise_movable { - enum { cFlag = false }; -}; +#undef CRNLIB_DEFINE_BUILT_IN_TYPE + + // See: http://erdani.org/publications/cuj-2004-06.pdf + template + struct bitwise_movable + { + enum { cFlag = false }; + }; -// Defines type Q as bitwise movable. -// Bitwise movable: type T may be safely moved to a new location via memcpy, without requiring the old copy to be destructed. -// However, the final version of the object (wherever it winds up in memory) must be eventually destructed (a single time, of course). -// Bitwise movable is a superset of bitwise copyable (all bitwise copyable types are also bitwise movable). + // Defines type Q as bitwise movable. + // Bitwise movable: type T may be safely moved to a new location via memcpy, without requiring the old copy to be destructed. + // However, the final version of the object (wherever it winds up in memory) must be eventually destructed (a single time, of course). + // Bitwise movable is a superset of bitwise copyable (all bitwise copyable types are also bitwise movable). #define CRNLIB_DEFINE_BITWISE_MOVABLE(Q) \ - template <> \ - struct bitwise_movable { \ - enum { cFlag = true }; \ - }; - -template -struct bitwise_copyable { - enum { cFlag = false }; -}; - -// Defines type Q as bitwise copyable. -// Bitwise copyable: type T may be safely and freely copied (duplicated) via memcpy, and *does not* require destruction. + template <> \ + struct bitwise_movable { \ + enum { cFlag = true }; \ + }; + + template + struct bitwise_copyable + { + enum { cFlag = false }; + }; + + // Defines type Q as bitwise copyable. + // Bitwise copyable: type T may be safely and freely copied (duplicated) via memcpy, and *does not* require destruction. #define CRNLIB_DEFINE_BITWISE_COPYABLE(Q) \ - template <> \ - struct bitwise_copyable { \ - enum { cFlag = true }; \ - }; + template <> \ + struct bitwise_copyable { \ + enum { cFlag = true }; \ + }; #define CRNLIB_IS_POD(T) __is_pod(T) @@ -141,34 +247,37 @@ struct bitwise_copyable { #define CRNLIB_HAS_DESTRUCTOR(T) ((!scalar_type::cFlag) && (!__is_pod(T))) -// From yasli_traits.h: -// Credit goes to Boost; -// also found in the C++ Templates book by Vandevoorde and Josuttis + // From yasli_traits.h: + // Credit goes to Boost; + // also found in the C++ Templates book by Vandevoorde and Josuttis -typedef char (&yes_t)[1]; -typedef char (&no_t)[2]; + typedef char (&yes_t)[1]; + typedef char (&no_t)[2]; -template -yes_t class_test(int U::*); -template -no_t class_test(...); + template + yes_t class_test(int U::*); + template + no_t class_test(...); -template -struct is_class { - enum { value = (sizeof(class_test(0)) == sizeof(yes_t)) }; -}; + template + struct is_class + { + enum { value = (sizeof(class_test(0)) == sizeof(yes_t)) }; + }; -template -struct is_pointer { - enum { value = false }; -}; + template + struct is_pointer + { + enum { value = false }; + }; -template -struct is_pointer { - enum { value = true }; -}; + template + struct is_pointer + { + enum { value = true }; + }; -CRNLIB_DEFINE_BITWISE_COPYABLE(empty_type); -CRNLIB_DEFINE_BITWISE_MOVABLE(empty_type); + CRNLIB_DEFINE_BITWISE_COPYABLE(empty_type); -} // namespace crnlib + CRNLIB_DEFINE_BITWISE_MOVABLE(empty_type); +} // namespace crnlib diff --git a/crnlib/crn_tree_clusterizer.h b/crnlib/crn_tree_clusterizer.h index a6d72d9..bcb5721 100644 --- a/crnlib/crn_tree_clusterizer.h +++ b/crnlib/crn_tree_clusterizer.h @@ -1,5 +1,25 @@ -// File: crn_tree_clusterizer.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once diff --git a/crnlib/crn_types.h b/crnlib/crn_types.h index 9703587..5ca3c75 100644 --- a/crnlib/crn_types.h +++ b/crnlib/crn_types.h @@ -1,5 +1,26 @@ -// File: crn_types.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_core.h" diff --git a/crnlib/crn_utils.cpp b/crnlib/crn_utils.cpp index 5bde3ad..afa6d86 100644 --- a/crnlib/crn_utils.cpp +++ b/crnlib/crn_utils.cpp @@ -1,4 +1,26 @@ -// File: crn_utils.cpp +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_core.h" #include "crn_utils.h" @@ -76,7 +98,5 @@ namespace crnlib return num_mips; } - - } // namespace utils - -} // namespace crnlib + } // namespace utils +} // namespace crnlib diff --git a/crnlib/crn_utils.h b/crnlib/crn_utils.h index 7124ffd..6a909a6 100644 --- a/crnlib/crn_utils.h +++ b/crnlib/crn_utils.h @@ -1,5 +1,26 @@ -// File: crn_utils.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_core.h" diff --git a/crnlib/crn_value.cpp b/crnlib/crn_value.cpp index fb1aaea..3dbec9b 100644 --- a/crnlib/crn_value.cpp +++ b/crnlib/crn_value.cpp @@ -1,6 +1,26 @@ -// File: crn_value.cpp -// See Copyright Notice and license at the end of inc/crnlib.h -#include "crn_core.h" +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crn_value.h" namespace crnlib @@ -18,4 +38,4 @@ namespace crnlib nullptr, }; -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_value.h b/crnlib/crn_value.h index dc8a3c2..95b0de4 100644 --- a/crnlib/crn_value.h +++ b/crnlib/crn_value.h @@ -1,5 +1,25 @@ -// File: crn_value.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once diff --git a/crnlib/crn_vec.h b/crnlib/crn_vec.h index 088c667..3c3fc44 100644 --- a/crnlib/crn_vec.h +++ b/crnlib/crn_vec.h @@ -1,5 +1,26 @@ -// File: crn_vec.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_core.h" @@ -247,7 +268,7 @@ namespace crnlib { for (uint i = 0; i < N; i++) { - if (!(m_s[i] == rhs.m_s[i])) + if (m_s[i] != rhs.m_s[i]) { return false; } @@ -263,7 +284,7 @@ namespace crnlib { return true; } - else if (!(m_s[i] == rhs.m_s[i])) + else if (m_s[i] != rhs.m_s[i]) { return false; } diff --git a/crnlib/crn_vec_interval.h b/crnlib/crn_vec_interval.h index 7d05aba..2628a54 100644 --- a/crnlib/crn_vec_interval.h +++ b/crnlib/crn_vec_interval.h @@ -1,5 +1,25 @@ -// File: crn_vec_interval.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #pragma once diff --git a/crnlib/crn_vector.cpp b/crnlib/crn_vector.cpp index 1881eb9..4107e67 100644 --- a/crnlib/crn_vector.cpp +++ b/crnlib/crn_vector.cpp @@ -1,11 +1,28 @@ -// File: crn_vector.cpp -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "crn_vector.h" -#include "crn_rand.h" -#include "crn_color.h" -#include "crn_vec.h" namespace crnlib { @@ -93,5 +110,4 @@ namespace crnlib return true; } - -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_vector.h b/crnlib/crn_vector.h index 9de2336..1276ed2 100644 --- a/crnlib/crn_vector.h +++ b/crnlib/crn_vector.h @@ -1,5 +1,26 @@ -// File: crn_vector.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_export.h" @@ -553,7 +574,7 @@ namespace crnlib const T* pDst = rhs.m_p; for (uint i = m_size; i; i--) { - if (!(*pSrc++ == *pDst++)) + if (*pSrc++ != *pDst++) { return false; } diff --git a/crnlib/crn_vector2d.h b/crnlib/crn_vector2d.h index 558e2a4..52c4551 100644 --- a/crnlib/crn_vector2d.h +++ b/crnlib/crn_vector2d.h @@ -1,4 +1,26 @@ -// File: crn_vector2d.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once namespace crnlib diff --git a/crnlib/crn_version.cpp b/crnlib/crn_version.cpp index 2b669f8..50fdfae 100644 --- a/crnlib/crn_version.cpp +++ b/crnlib/crn_version.cpp @@ -1,4 +1,26 @@ -#include "crn_core.h" +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #include "crnlib.h" #include "crn_version.h" diff --git a/crnlib/crn_winhdr.h b/crnlib/crn_winhdr.h index 7d602f5..c294a8a 100644 --- a/crnlib/crn_winhdr.h +++ b/crnlib/crn_winhdr.h @@ -1,3 +1,26 @@ +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #ifndef WIN32 diff --git a/crnlib/crnlib.cpp b/crnlib/crnlib.cpp index 0a6f3bc..c956553 100644 --- a/crnlib/crnlib.cpp +++ b/crnlib/crnlib.cpp @@ -1,523 +1,543 @@ -// File: crnlib.cpp -// See Copyright Notice and license at the end of inc/crnlib.h - -#include "crn_core.h" -#include "crnlib.h" -#include "crn_comp.h" -#include "crn_dds_comp.h" -#include "crn_dynamic_stream.h" -#include "crn_buffer_stream.h" -#include "crn_ryg_dxt.hpp" -#include "crn_etc.h" -#include "crn_defs.h" -#include "crn_rg_etc1.h" - +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#include "crn_core.h" +#include "crnlib.h" +#include "crn_comp.h" +#include "crn_dds_comp.h" +#include "crn_dynamic_stream.h" +#include "crn_buffer_stream.h" +#include "crn_ryg_dxt.hpp" +#include "crn_etc.h" +#include "crn_defs.h" +#include "crn_rg_etc1.h" + namespace crnlib -{ +{ static void* realloc_func(void* p, size_t size, size_t* pActual_size, bool movable, void*) - { - return crnlib_realloc(p, size, pActual_size, movable); - } - + { + return crnlib_realloc(p, size, pActual_size, movable); + } + static size_t msize_func(void* p, void*) - { - return crnlib_msize(p); - } - + { + return crnlib_msize(p); + } + class crnlib_global_initializer - { - public: + { + public: crnlib_global_initializer() - { - crn_threading_init(); - - crnlib_enable_fail_exceptions(true); - - // Redirect crn_decomp.h's memory allocations into crnlib, which may be further redirected by the outside caller. - crnd::crnd_set_memory_callbacks(realloc_func, msize_func, nullptr); - - ryg_dxt::sInitDXT(); - - pack_etc1_block_init(); - - rg_etc1::pack_etc1_block_init(); - } - }; - - crnlib_global_initializer g_crnlib_initializer; -} // namespace crnlib - -using namespace crnlib; - + { + crn_threading_init(); + + crnlib_enable_fail_exceptions(true); + + // Redirect crn_decomp.h's memory allocations into crnlib, which may be further redirected by the outside caller. + crnd::crnd_set_memory_callbacks(realloc_func, msize_func, nullptr); + + ryg_dxt::sInitDXT(); + + pack_etc1_block_init(); + + rg_etc1::pack_etc1_block_init(); + } + }; + + crnlib_global_initializer g_crnlib_initializer; +} // namespace crnlib + +using namespace crnlib; + const char* crn_get_format_string(crn_format fmt) -{ - return pixel_format_helpers::get_crn_format_string(fmt); -} - +{ + return pixel_format_helpers::get_crn_format_string(fmt); +} + crn_uint32 crn_get_format_fourcc(crn_format fmt) -{ - return crnd::crnd_crn_format_to_fourcc(fmt); -} - +{ + return crnd::crnd_crn_format_to_fourcc(fmt); +} + crn_uint32 crn_get_format_bits_per_texel(crn_format fmt) -{ - return crnd::crnd_get_crn_format_bits_per_texel(fmt); -} - +{ + return crnd::crnd_get_crn_format_bits_per_texel(fmt); +} + crn_uint32 crn_get_bytes_per_dxt_block(crn_format fmt) -{ - return crnd::crnd_get_bytes_per_dxt_block(fmt); -} - +{ + return crnd::crnd_get_bytes_per_dxt_block(fmt); +} + crn_format crn_get_fundamental_dxt_format(crn_format fmt) -{ - return crnd::crnd_get_fundamental_dxt_format(fmt); -} - +{ + return crnd::crnd_get_fundamental_dxt_format(fmt); +} + const char* crn_get_file_type_ext(crn_file_type file_type) -{ +{ switch (file_type) - { - case cCRNFileTypeDDS: - return "dds"; - case cCRNFileTypeCRN: - return "crn"; - default: - break; - } - return "?"; -} - + { + case cCRNFileTypeDDS: + return "dds"; + case cCRNFileTypeCRN: + return "crn"; + default: + break; + } + return "?"; +} + const char* crn_get_mip_mode_desc(crn_mip_mode m) -{ +{ switch (m) - { - case cCRNMipModeUseSourceOrGenerateMips: - return "Use source/generate if none"; - case cCRNMipModeUseSourceMips: - return "Only use source MIP maps (if any)"; - case cCRNMipModeGenerateMips: - return "Always generate new MIP maps"; - case cCRNMipModeNoMips: - return "No MIP maps"; - default: - break; - } - return "?"; -} - + { + case cCRNMipModeUseSourceOrGenerateMips: + return "Use source/generate if none"; + case cCRNMipModeUseSourceMips: + return "Only use source MIP maps (if any)"; + case cCRNMipModeGenerateMips: + return "Always generate new MIP maps"; + case cCRNMipModeNoMips: + return "No MIP maps"; + default: + break; + } + return "?"; +} + const char* crn_get_mip_mode_name(crn_mip_mode m) -{ +{ switch (m) - { - case cCRNMipModeUseSourceOrGenerateMips: - return "UseSourceOrGenerate"; - case cCRNMipModeUseSourceMips: - return "UseSource"; - case cCRNMipModeGenerateMips: - return "Generate"; - case cCRNMipModeNoMips: - return "None"; - default: - break; - } - return "?"; -} - + { + case cCRNMipModeUseSourceOrGenerateMips: + return "UseSourceOrGenerate"; + case cCRNMipModeUseSourceMips: + return "UseSource"; + case cCRNMipModeGenerateMips: + return "Generate"; + case cCRNMipModeNoMips: + return "None"; + default: + break; + } + return "?"; +} + const char* crn_get_mip_filter_name(crn_mip_filter f) -{ +{ switch (f) - { - case cCRNMipFilterBox: - return "box"; - case cCRNMipFilterTent: - return "tent"; - case cCRNMipFilterLanczos4: - return "lanczos4"; - case cCRNMipFilterMitchell: - return "mitchell"; - case cCRNMipFilterKaiser: - return "kaiser"; - default: - break; - } - return "?"; -} - + { + case cCRNMipFilterBox: + return "box"; + case cCRNMipFilterTent: + return "tent"; + case cCRNMipFilterLanczos4: + return "lanczos4"; + case cCRNMipFilterMitchell: + return "mitchell"; + case cCRNMipFilterKaiser: + return "kaiser"; + default: + break; + } + return "?"; +} + const char* crn_get_scale_mode_desc(crn_scale_mode sm) -{ +{ switch (sm) - { - case cCRNSMDisabled: - return "disabled"; - case cCRNSMAbsolute: - return "absolute"; - case cCRNSMRelative: - return "relative"; - case cCRNSMLowerPow2: - return "lowerpow2"; - case cCRNSMNearestPow2: - return "nearestpow2"; - case cCRNSMNextPow2: - return "nextpow2"; - default: - break; - } - return "?"; -} - + { + case cCRNSMDisabled: + return "disabled"; + case cCRNSMAbsolute: + return "absolute"; + case cCRNSMRelative: + return "relative"; + case cCRNSMLowerPow2: + return "lowerpow2"; + case cCRNSMNearestPow2: + return "nearestpow2"; + case cCRNSMNextPow2: + return "nextpow2"; + default: + break; + } + return "?"; +} + const char* crn_get_dxt_quality_string(crn_dxt_quality q) -{ +{ switch (q) - { - case cCRNDXTQualitySuperFast: - return "SuperFast"; - case cCRNDXTQualityFast: - return "Fast"; - case cCRNDXTQualityNormal: - return "Normal"; - case cCRNDXTQualityBetter: - return "Better"; - case cCRNDXTQualityUber: - return "Uber"; - default: - break; - } - CRNLIB_ASSERT(false); - return "?"; -} - + { + case cCRNDXTQualitySuperFast: + return "SuperFast"; + case cCRNDXTQualityFast: + return "Fast"; + case cCRNDXTQualityNormal: + return "Normal"; + case cCRNDXTQualityBetter: + return "Better"; + case cCRNDXTQualityUber: + return "Uber"; + default: + break; + } + CRNLIB_ASSERT(false); + return "?"; +} + void crn_free_block(void* pBlock) -{ - crnlib_free(pBlock); -} - +{ + crnlib_free(pBlock); +} + void* crn_compress(const crn_comp_params& comp_params, crn_uint32& compressed_size, crn_uint32* pActual_quality_level, float* pActual_bitrate) -{ - compressed_size = 0; +{ + compressed_size = 0; if (pActual_quality_level) - { - *pActual_quality_level = 0; - } + { + *pActual_quality_level = 0; + } if (pActual_bitrate) - { - *pActual_bitrate = 0.0f; - } - + { + *pActual_bitrate = 0.0f; + } + if (!comp_params.check()) - { - return nullptr; - } - - crnlib::vector crn_file_data; + { + return nullptr; + } + + crnlib::vector crn_file_data; if (!create_compressed_texture(comp_params, crn_file_data, pActual_quality_level, pActual_bitrate)) - { - return nullptr; - } - - compressed_size = crn_file_data.size(); - return crn_file_data.assume_ownership(); -} - + { + return nullptr; + } + + compressed_size = crn_file_data.size(); + return crn_file_data.assume_ownership(); +} + void* crn_compress(const crn_comp_params& comp_params, const crn_mipmap_params& mip_params, crn_uint32& compressed_size, crn_uint32* pActual_quality_level, float* pActual_bitrate) -{ - compressed_size = 0; +{ + compressed_size = 0; if (pActual_quality_level) - { - *pActual_quality_level = 0; - } + { + *pActual_quality_level = 0; + } if (pActual_bitrate) - { - *pActual_bitrate = 0.0f; - } - + { + *pActual_bitrate = 0.0f; + } + if ((!comp_params.check()) || (!mip_params.check())) - { - return nullptr; - } - - crnlib::vector crn_file_data; + { + return nullptr; + } + + crnlib::vector crn_file_data; if (!create_compressed_texture(comp_params, mip_params, crn_file_data, pActual_quality_level, pActual_bitrate)) - { - return nullptr; - } - - compressed_size = crn_file_data.size(); - return crn_file_data.assume_ownership(); -} - + { + return nullptr; + } + + compressed_size = crn_file_data.size(); + return crn_file_data.assume_ownership(); +} + void* crn_decompress_crn_to_dds(const void* pCRN_file_data, crn_uint32& file_size) -{ - mipmapped_texture tex; +{ + mipmapped_texture tex; if (!tex.read_crn_from_memory(pCRN_file_data, file_size, "from_memory.crn")) - { - file_size = 0; - return nullptr; - } - - file_size = 0; - - dynamic_stream dds_file_data; - dds_file_data.reserve(128 * 1024); - data_stream_serializer serializer(dds_file_data); + { + file_size = 0; + return nullptr; + } + + file_size = 0; + + dynamic_stream dds_file_data; + dds_file_data.reserve(128 * 1024); + data_stream_serializer serializer(dds_file_data); if (!tex.write_dds(serializer)) - { - return nullptr; - } - dds_file_data.reserve(0); - - file_size = static_cast(dds_file_data.get_size()); - return dds_file_data.get_buf().assume_ownership(); -} - + { + return nullptr; + } + dds_file_data.reserve(0); + + file_size = static_cast(dds_file_data.get_size()); + return dds_file_data.get_buf().assume_ownership(); +} + bool crn_decompress_dds_to_images(const void* pDDS_file_data, crn_uint32 dds_file_size, crn_uint32** ppImages, crn_texture_desc& tex_desc) -{ - memset(&tex_desc, 0, sizeof(tex_desc)); - - mipmapped_texture tex; - buffer_stream in_stream(pDDS_file_data, dds_file_size); - data_stream_serializer in_serializer(in_stream); +{ + memset(&tex_desc, 0, sizeof(tex_desc)); + + mipmapped_texture tex; + buffer_stream in_stream(pDDS_file_data, dds_file_size); + data_stream_serializer in_serializer(in_stream); if (!tex.read_dds(in_serializer)) - { - return false; - } - + { + return false; + } + if (tex.is_packed()) - { - // TODO: Allow the user to disable uncooking of swizzled DXT5 formats? - bool uncook = true; - + { + // TODO: Allow the user to disable uncooking of swizzled DXT5 formats? + bool uncook = true; + if (!tex.unpack_from_dxt(uncook)) - { - return false; - } - } - - tex_desc.m_faces = tex.get_num_faces(); - tex_desc.m_width = tex.get_width(); - tex_desc.m_height = tex.get_height(); - tex_desc.m_levels = tex.get_num_levels(); - tex_desc.m_fmt_fourcc = (crn_uint32)tex.get_format(); - + { + return false; + } + } + + tex_desc.m_faces = tex.get_num_faces(); + tex_desc.m_width = tex.get_width(); + tex_desc.m_height = tex.get_height(); + tex_desc.m_levels = tex.get_num_levels(); + tex_desc.m_fmt_fourcc = (crn_uint32)tex.get_format(); + for (uint32 f = 0; f < tex.get_num_faces(); f++) - { + { for (uint32 l = 0; l < tex.get_num_levels(); l++) - { - mip_level* pLevel = tex.get_level(f, l); - image_u8* pImg = pLevel->get_image(); - ppImages[l + tex.get_num_levels() * f] = static_cast(pImg->get_pixel_buf().assume_ownership()); - } - } - - return true; -} - + { + mip_level* pLevel = tex.get_level(f, l); + image_u8* pImg = pLevel->get_image(); + ppImages[l + tex.get_num_levels() * f] = static_cast(pImg->get_pixel_buf().assume_ownership()); + } + } + + return true; +} + void crn_free_all_images(crn_uint32** ppImages, const crn_texture_desc& desc) -{ +{ for (uint32 f = 0; f < desc.m_faces; f++) - { + { for (uint32 l = 0; l < desc.m_levels; l++) - { - crn_free_block(ppImages[l + desc.m_levels * f]); - } - } -} - -// Simple low-level DXTn 4x4 block compressor API. -// Basically just a basic wrapper over the crnlib::dxt_image class. - + { + crn_free_block(ppImages[l + desc.m_levels * f]); + } + } +} + +// Simple low-level DXTn 4x4 block compressor API. +// Basically just a basic wrapper over the crnlib::dxt_image class. + namespace crnlib -{ +{ class crn_block_compressor - { - CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(crn_block_compressor); - public: + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(crn_block_compressor); + public: crn_block_compressor() - { - } - + { + } + bool init(const crn_comp_params& params) - { - m_comp_params = params; - - m_pack_params.init(params); - - crn_format basic_crn_fmt = crnd::crnd_get_fundamental_dxt_format(params.m_format); - pixel_format basic_pixel_fmt = pixel_format_helpers::convert_crn_format_to_pixel_format(basic_crn_fmt); - + { + m_comp_params = params; + + m_pack_params.init(params); + + crn_format basic_crn_fmt = crnd::crnd_get_fundamental_dxt_format(params.m_format); + pixel_format basic_pixel_fmt = pixel_format_helpers::convert_crn_format_to_pixel_format(basic_crn_fmt); + if ((params.get_flag(cCRNCompFlagDXT1AForTransparency)) && (basic_pixel_fmt == PIXEL_FMT_DXT1)) - { - basic_pixel_fmt = PIXEL_FMT_DXT1A; - } - + { + basic_pixel_fmt = PIXEL_FMT_DXT1A; + } + if (!m_image.init(pixel_format_helpers::get_dxt_format(basic_pixel_fmt), cDXTBlockSize, cDXTBlockSize, false)) - { - return false; - } - - return true; - } - + { + return false; + } + + return true; + } + void compress_block(const crn_uint32* pPixels, void* pDst_block) - { + { if (m_image.is_valid()) - { - m_image.set_block_pixels(0, 0, reinterpret_cast(pPixels), m_pack_params, m_set_block_pixels_context); - memcpy(pDst_block, &m_image.get_element(0, 0, 0), m_image.get_bytes_per_block()); - } - } - - private: - dxt_image m_image; - crn_comp_params m_comp_params; - dxt_image::pack_params m_pack_params; - dxt_image::set_block_pixels_context m_set_block_pixels_context; - }; -} - + { + m_image.set_block_pixels(0, 0, reinterpret_cast(pPixels), m_pack_params, m_set_block_pixels_context); + memcpy(pDst_block, &m_image.get_element(0, 0, 0), m_image.get_bytes_per_block()); + } + } + + private: + dxt_image m_image; + crn_comp_params m_comp_params; + dxt_image::pack_params m_pack_params; + dxt_image::set_block_pixels_context m_set_block_pixels_context; + }; +} + crn_block_compressor_context_t crn_create_block_compressor(const crn_comp_params& params) -{ - crn_block_compressor* pComp = crnlib_new(); +{ + crn_block_compressor* pComp = crnlib_new(); if (!pComp->init(params)) - { - crnlib_delete(pComp); - return nullptr; - } - return pComp; -} - + { + crnlib_delete(pComp); + return nullptr; + } + return pComp; +} + void crn_compress_block(crn_block_compressor_context_t pContext, const crn_uint32* pPixels, void* pDst_block) -{ - crn_block_compressor* pComp = static_cast(pContext); - pComp->compress_block(pPixels, pDst_block); -} - +{ + crn_block_compressor* pComp = static_cast(pContext); + pComp->compress_block(pPixels, pDst_block); +} + void crn_free_block_compressor(crn_block_compressor_context_t pContext) -{ - crnlib_delete(static_cast(pContext)); -} - +{ + crnlib_delete(static_cast(pContext)); +} + bool crn_decompress_block(const void* pSrc_block, crn_uint32* pDst_pixels_u32, crn_format crn_fmt) -{ - color_quad_u8* pDst_pixels = reinterpret_cast(pDst_pixels_u32); - +{ + color_quad_u8* pDst_pixels = reinterpret_cast(pDst_pixels_u32); + switch (crn_get_fundamental_dxt_format(crn_fmt)) - { + { case cCRNFmtETC1: - { - const etc1_block& block = *reinterpret_cast(pSrc_block); - unpack_etc1(block, pDst_pixels, false); - break; - } + { + const etc1_block& block = *reinterpret_cast(pSrc_block); + unpack_etc1(block, pDst_pixels, false); + break; + } case cCRNFmtDXT1: - { - const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block); - - color_quad_u8 colors[cDXT1SelectorValues]; - pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); - + { + const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block); + + color_quad_u8 colors[cDXT1SelectorValues]; + pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) - { - const uint s = pDXT1_block->get_selector(i & 3, i >> 2); - - pDst_pixels[i] = colors[s]; - } - - break; - } - + { + const uint s = pDXT1_block->get_selector(i & 3, i >> 2); + + pDst_pixels[i] = colors[s]; + } + + break; + } + case cCRNFmtDXT3: - { - const dxt3_block* pDXT3_block = reinterpret_cast(pSrc_block); - - const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block) + 1; - color_quad_u8 colors[cDXT1SelectorValues]; - pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); - + { + const dxt3_block* pDXT3_block = reinterpret_cast(pSrc_block); + + const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block) + 1; + color_quad_u8 colors[cDXT1SelectorValues]; + pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) - { - const uint s = pDXT1_block->get_selector(i & 3, i >> 2); - const uint a = pDXT3_block->get_alpha(i & 3, i >> 2, true); - - pDst_pixels[i] = colors[s]; - pDst_pixels[i].a = static_cast(a); - } - - break; - } - + { + const uint s = pDXT1_block->get_selector(i & 3, i >> 2); + const uint a = pDXT3_block->get_alpha(i & 3, i >> 2, true); + + pDst_pixels[i] = colors[s]; + pDst_pixels[i].a = static_cast(a); + } + + break; + } + case cCRNFmtDXT5: - { - const dxt5_block* pDXT5_block = reinterpret_cast(pSrc_block); - - const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block) + 1; - color_quad_u8 colors[cDXT1SelectorValues]; - pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); - - uint values[cDXT5SelectorValues]; - dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); - + { + const dxt5_block* pDXT5_block = reinterpret_cast(pSrc_block); + + const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block) + 1; + color_quad_u8 colors[cDXT1SelectorValues]; + pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); + + uint values[cDXT5SelectorValues]; + dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) - { - const uint s = pDXT1_block->get_selector(i & 3, i >> 2); - const uint a = pDXT5_block->get_selector(i & 3, i >> 2); - - pDst_pixels[i] = colors[s]; - pDst_pixels[i].a = static_cast(values[a]); - } - } - - case cCRNFmtDXN_XY: + { + const uint s = pDXT1_block->get_selector(i & 3, i >> 2); + const uint a = pDXT5_block->get_selector(i & 3, i >> 2); + + pDst_pixels[i] = colors[s]; + pDst_pixels[i].a = static_cast(values[a]); + } + } + + case cCRNFmtDXN_XY: case cCRNFmtDXN_YX: - { - const dxt5_block* pDXT5_block0 = reinterpret_cast(pSrc_block); - const dxt5_block* pDXT5_block1 = reinterpret_cast(pSrc_block) + 1; - - uint values0[cDXT5SelectorValues]; - dxt5_block::get_block_values(values0, pDXT5_block0->get_low_alpha(), pDXT5_block0->get_high_alpha()); - - uint values1[cDXT5SelectorValues]; - dxt5_block::get_block_values(values1, pDXT5_block1->get_low_alpha(), pDXT5_block1->get_high_alpha()); - + { + const dxt5_block* pDXT5_block0 = reinterpret_cast(pSrc_block); + const dxt5_block* pDXT5_block1 = reinterpret_cast(pSrc_block) + 1; + + uint values0[cDXT5SelectorValues]; + dxt5_block::get_block_values(values0, pDXT5_block0->get_low_alpha(), pDXT5_block0->get_high_alpha()); + + uint values1[cDXT5SelectorValues]; + dxt5_block::get_block_values(values1, pDXT5_block1->get_low_alpha(), pDXT5_block1->get_high_alpha()); + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) - { - const uint s0 = pDXT5_block0->get_selector(i & 3, i >> 2); - const uint s1 = pDXT5_block1->get_selector(i & 3, i >> 2); - + { + const uint s0 = pDXT5_block0->get_selector(i & 3, i >> 2); + const uint s1 = pDXT5_block1->get_selector(i & 3, i >> 2); + if (crn_fmt == cCRNFmtDXN_XY) - { - pDst_pixels[i].set_noclamp_rgba(values0[s0], values1[s1], 255, 255); - } + { + pDst_pixels[i].set_noclamp_rgba(values0[s0], values1[s1], 255, 255); + } else - { - pDst_pixels[i].set_noclamp_rgba(values1[s1], values0[s0], 255, 255); - } - } - - break; - } - + { + pDst_pixels[i].set_noclamp_rgba(values1[s1], values0[s0], 255, 255); + } + } + + break; + } + case cCRNFmtDXT5A: - { - const dxt5_block* pDXT5_block = reinterpret_cast(pSrc_block); - - uint values[cDXT5SelectorValues]; - dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); - + { + const dxt5_block* pDXT5_block = reinterpret_cast(pSrc_block); + + uint values[cDXT5SelectorValues]; + dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) - { - const uint s = pDXT5_block->get_selector(i & 3, i >> 2); - - pDst_pixels[i].set_noclamp_rgba(255, 255, 255, values[s]); - } - - break; - } + { + const uint s = pDXT5_block->get_selector(i & 3, i >> 2); + + pDst_pixels[i].set_noclamp_rgba(255, 255, 255, values[s]); + } + + break; + } default: - { - return false; - } - } - - return true; -} + { + return false; + } + } + + return true; +} diff --git a/crunch/corpus_gen.cpp b/crunch/corpus_gen.cpp index 3c0b19e..215d397 100644 --- a/crunch/corpus_gen.cpp +++ b/crunch/corpus_gen.cpp @@ -1,6 +1,26 @@ -// File: corpus_gen.cpp - Block compression corpus generator. -// See Copyright Notice and license at the end of inc/crnlib.h -// +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + // Example command line: // -gentest [-deep] [-blockpercentage .035] [-width 4096] [-height 4096] -in c:\temp\*.jpg [-in c:\temp\*.jpeg] [-in @blah.txt] @@ -107,7 +127,7 @@ namespace crn dst_block_index++; } -#if 0 +#if 0 //new_img.swap(img); #else crnlib::vector remaining_blocks(num_blocks_x); @@ -355,10 +375,8 @@ namespace crn num_blocks_remaining--; } - - } // file_index - - } // in_value_index + } // file_index + } // in_value_index } if (next_dst_block) @@ -378,4 +396,4 @@ namespace crn return true; } -} // namespace crnlib +} // namespace crnlib diff --git a/crunch/corpus_gen.h b/crunch/corpus_gen.h index 2e5f0ee..3d0f584 100644 --- a/crunch/corpus_gen.h +++ b/crunch/corpus_gen.h @@ -1,5 +1,26 @@ -// File: corpus_gen.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_command_line_params.h" diff --git a/crunch/corpus_test.cpp b/crunch/corpus_test.cpp index d3304db..3166f2a 100644 --- a/crunch/corpus_test.cpp +++ b/crunch/corpus_test.cpp @@ -1,4 +1,25 @@ -// File: corpus_test.cpp +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ #include "crn_core.h" #include "corpus_test.h" @@ -417,7 +438,7 @@ namespace crn em.print("Best of Both: "); } } - } // file_index + } // file_index } flush_bad_blocks(); @@ -426,4 +447,4 @@ namespace crn return true; } -} // namespace crnlib +} // namespace crnlib diff --git a/crunch/corpus_test.h b/crunch/corpus_test.h index dd31037..ae23d66 100644 --- a/crunch/corpus_test.h +++ b/crunch/corpus_test.h @@ -1,5 +1,26 @@ -// File: corpus_test.h -// See Copyright Notice and license at the end of inc/crnlib.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #pragma once #include "crn_command_line_params.h" diff --git a/crunch/crunch.cpp b/crunch/crunch.cpp index 73cdfe2..9575031 100644 --- a/crunch/crunch.cpp +++ b/crunch/crunch.cpp @@ -1,12 +1,28 @@ -// File: crunch.cpp - Command line tool for DDS/CRN texture compression/decompression. -// This tool exposes all of crnlib's functionality. It also uses a bunch of internal crlib -// classes that aren't directly exposed in the main crnlib.h header. The actual tool is -// implemented as a single class "crunch" which in theory is reusable. Most of the heavy -// lifting is actually done by functions in the crnlib::texture_conversion namespace, -// which are mostly wrappers over the public crnlib.h functions. -// See Copyright Notice and license at the end of inc/crnlib.h -// +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing + #include "crn_core.h" #include "crn_console.h" @@ -66,14 +82,17 @@ class crunch { return m_num_processed; } + inline uint32 get_num_failed() const { return m_num_failed; } + inline uint32 get_num_succeeded() const { return m_num_succeeded; } + inline uint32 get_num_skipped() const { return m_num_skipped; @@ -364,7 +383,8 @@ class crunch uint32 total_input_specs = 0; - for (uint32 phase = 0; phase < 2; phase++) { + for (uint32 phase = 0; phase < 2; phase++) + { command_line_params::param_map_const_iterator begin, end; m_params.find(phase ? "" : "file", begin, end); for (command_line_params::param_map_const_iterator it = begin; it != end; ++it) @@ -466,7 +486,8 @@ class crunch return true; } - bool read_only_file_check(const char* pDst_filename) { + bool read_only_file_check(const char* pDst_filename) + { if (!file_utils::is_read_only(pDst_filename)) { return true; @@ -674,24 +695,20 @@ class crunch switch (status) { - case cCSSucceeded: - { + case cCSSucceeded: { console::info(""); m_num_succeeded++; break; } - case cCSSkipped: - { + case cCSSkipped: { console::info("Skipping file.\n"); m_num_skipped++; break; } - case cCSBadParam: - { + case cCSBadParam: { return false; } - default: - { + default: { if (!m_params.get_value_as_bool("ignoreerrors")) return false; @@ -803,7 +820,8 @@ class crunch { dynamic_string val; - if (m_params.get_value_as_string("mipMode", 0, val)) { + if (m_params.get_value_as_string("mipMode", 0, val)) + { uint32 i; for (i = 0; i < cCRNMipModeTotal; i++) { @@ -1319,7 +1337,6 @@ class crunch params.m_no_stats = m_params.get_value_as_bool("nostats"); - params.m_dst_format = PIXEL_FMT_INVALID; for (uint32 i = 0; i < pixel_format_helpers::get_num_formats(); i++) @@ -1471,7 +1488,8 @@ static int main_internal(int argc, char* argv[]) return status ? EXIT_SUCCESS : EXIT_FAILURE; } -static void pause_and_wait(void) { +static void pause_and_wait(void) +{ console::enable_output(); console::message("\nPress a key to continue."); diff --git a/emscripten/crunch_lib.cpp b/emscripten/crunch_lib.cpp index 0ca2a4e..1237a21 100644 --- a/emscripten/crunch_lib.cpp +++ b/emscripten/crunch_lib.cpp @@ -1,24 +1,26 @@ -/* Copyright (c) 2013, Evan Parker, Brandon Jones. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* + * Copyright (c) 2013, Evan Parker, Brandon Jones. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ #define PLATFORM_NACL // This disables use of 64 bit integers, among other things. diff --git a/examples/example1/example1.cpp b/examples/example1/example1.cpp index 9b166f4..c786c80 100644 --- a/examples/example1/example1.cpp +++ b/examples/example1/example1.cpp @@ -1,6 +1,29 @@ -// File: example1.cpp - Simple command line tool that uses the crnlib lib and the crn_decomp.h header file library +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +// Simple command line tool that uses the crnlib lib and the crn_decomp.h header file library // to compress, transcode/unpack, and inspect CRN/DDS textures. -// See Copyright Notice and license at the end of inc/crnlib.h + #include #include #include @@ -34,511 +57,566 @@ using namespace crnlib; const int cDefaultCRNQualityLevel = 128; -static int print_usage() { - printf("Description: Simple crnlib API example program.\n"); - printf("Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC\n"); - printf("Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet\n"); - printf("Usage: example1 [mode: i/c/d] [source_file] [options]\n"); - printf("\nModes:\n"); - printf("c: Compress to .DDS or .CRN using the crn_compress() func. in crnlib.h\n"); - printf(" The default output format is .DDS\n"); - printf(" Supported source image formats:\n"); - printf(" Baseline JPEG, PNG, BMP, TGA, PSD, and HDR\n"); - printf("d: Transcodes a .CRN file to .DDS using the crn_decompress_crn_to_dds() func.,\n"); - printf("or unpacks each face and mipmap level in a .DDS file to multiple .TGA files.\n"); - printf("i: Display info about source_file.\n"); - printf("\nOptions:\n"); - printf("-out filename - Force output filename.\n"); - printf("\nCompression mode options:\n"); - printf("-crn - Generate a .CRN file instead of .DDS\n"); - printf("-bitrate # - Specify desired CRN/DDS bits/texel, from [.1-8]\n"); - printf(" When writing .DDS: -bitrate or -quality enable clustered DXTn compression.\n"); - printf("-quality # - Specify CRN/DDS quality level factor, from [0-255]\n"); - printf("-noAdaptiveBlocks - Always use 4x4 blocks instead of up to 8x8 macroblocks\n"); - printf("-nonsrgb - Input is not sRGB: disables gamma filtering, perceptual metrics.\n"); - printf("-nomips - Don't generate mipmaps\n"); - printf("-setalphatoluma - Set alpha channel to luma before compression.\n"); - printf("-converttoluma - Set RGB to luma before compression.\n"); - printf("-pixelformat fmt - Output file's crn_format: DXT1, DXT1A, DXT3, DXT5_CCxY,\n"); - printf(" DXT5_xGxR, DXT5_xGBR, DXT5_AGBR, DXN_XY (ATI 3DC), DXN_YX (ATI 3DC),\n"); - printf(" DXT5A (ATN1N)\n"); - printf(" If no output format is specified, this example uses either DXT1 or DXT5.\n"); - return EXIT_FAILURE; +static int print_usage() +{ + printf("Description: Simple crnlib API example program.\n"); + printf("Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC\n"); + printf("Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet\n"); + printf("Usage: example1 [mode: i/c/d] [source_file] [options]\n"); + printf("\nModes:\n"); + printf("c: Compress to .DDS or .CRN using the crn_compress() func. in crnlib.h\n"); + printf(" The default output format is .DDS\n"); + printf(" Supported source image formats:\n"); + printf(" Baseline JPEG, PNG, BMP, TGA, PSD, and HDR\n"); + printf("d: Transcodes a .CRN file to .DDS using the crn_decompress_crn_to_dds() func.,\n"); + printf("or unpacks each face and mipmap level in a .DDS file to multiple .TGA files.\n"); + printf("i: Display info about source_file.\n"); + printf("\nOptions:\n"); + printf("-out filename - Force output filename.\n"); + printf("\nCompression mode options:\n"); + printf("-crn - Generate a .CRN file instead of .DDS\n"); + printf("-bitrate # - Specify desired CRN/DDS bits/texel, from [.1-8]\n"); + printf(" When writing .DDS: -bitrate or -quality enable clustered DXTn compression.\n"); + printf("-quality # - Specify CRN/DDS quality level factor, from [0-255]\n"); + printf("-noAdaptiveBlocks - Always use 4x4 blocks instead of up to 8x8 macroblocks\n"); + printf("-nonsrgb - Input is not sRGB: disables gamma filtering, perceptual metrics.\n"); + printf("-nomips - Don't generate mipmaps\n"); + printf("-setalphatoluma - Set alpha channel to luma before compression.\n"); + printf("-converttoluma - Set RGB to luma before compression.\n"); + printf("-pixelformat fmt - Output file's crn_format: DXT1, DXT1A, DXT3, DXT5_CCxY,\n"); + printf(" DXT5_xGxR, DXT5_xGBR, DXT5_AGBR, DXN_XY (ATI 3DC), DXN_YX (ATI 3DC),\n"); + printf(" DXT5A (ATN1N)\n"); + printf(" If no output format is specified, this example uses either DXT1 or DXT5.\n"); + return EXIT_FAILURE; } -static int error(const char* pMsg, ...) { - va_list args; - va_start(args, pMsg); - char buf[512]; - vsprintf_s(buf, sizeof(buf), pMsg, args); - va_end(args); - printf("%s", buf); - return EXIT_FAILURE; +static int error(const char* pMsg, ...) +{ + va_list args; + va_start(args, pMsg); + char buf[512]; + vsprintf_s(buf, sizeof(buf), pMsg, args); + va_end(args); + printf("%s", buf); + return EXIT_FAILURE; } // Loads an entire file into an allocated memory block. -static crn_uint8* read_file_into_buffer(const char* pFilename, crn_uint32& size) { - size = 0; +static crn_uint8* read_file_into_buffer(const char* pFilename, crn_uint32& size) +{ + size = 0; - FILE* pFile = nullptr; - fopen_s(&pFile, pFilename, "rb"); - if (!pFile) - return nullptr; + FILE* pFile = nullptr; + fopen_s(&pFile, pFilename, "rb"); + if (!pFile) + return nullptr; - fseek(pFile, 0, SEEK_END); - size = ftell(pFile); - fseek(pFile, 0, SEEK_SET); + fseek(pFile, 0, SEEK_END); + size = ftell(pFile); + fseek(pFile, 0, SEEK_SET); - crn_uint8* pSrc_file_data = static_cast(malloc(std::max(1U, size))); - if ((!pSrc_file_data) || (fread(pSrc_file_data, size, 1, pFile) != 1)) { - fclose(pFile); - free(pSrc_file_data); - size = 0; - return nullptr; - } + crn_uint8* pSrc_file_data = static_cast(malloc(std::max(1U, size))); + if ((!pSrc_file_data) || (fread(pSrc_file_data, size, 1, pFile) != 1)) + { + fclose(pFile); + free(pSrc_file_data); + size = 0; + return nullptr; + } - fclose(pFile); - return pSrc_file_data; + fclose(pFile); + return pSrc_file_data; } // Cracks a CRN's file header using the helper functions in crn_decomp.h. -static bool print_crn_info(const crn_uint8* pData, crn_uint32 data_size) { - crnd::crn_file_info file_info; - if (!crnd::crnd_validate_file(pData, data_size, &file_info)) - return false; - - printf("crnd_validate_file:\n"); - printf("File size: %u\nActualDataSize: %u\nHeaderSize: %u\nTotalPaletteSize: %u\nTablesSize: %u\nLevels: %u\n", data_size, - file_info.m_actual_data_size, file_info.m_header_size, file_info.m_total_palette_size, file_info.m_tables_size, file_info.m_levels); - - printf("LevelCompressedSize: "); - for (crn_uint32 i = 0; i < cCRNMaxLevels; i++) - printf("%u ", file_info.m_level_compressed_size[i]); - printf("\n"); - - printf("ColorEndpointPaletteSize: %u\n", file_info.m_color_endpoint_palette_entries); - printf("ColorSelectorPaletteSize: %u\n", file_info.m_color_selector_palette_entries); - printf("AlphaEndpointPaletteSize: %u\n", file_info.m_alpha_endpoint_palette_entries); - printf("AlphaSelectorPaletteSize: %u\n", file_info.m_alpha_selector_palette_entries); - - printf("crnd_get_texture_info:\n"); - crnd::crn_texture_info tex_info; - if (!crnd::crnd_get_texture_info(pData, data_size, &tex_info)) - return false; - - printf("Dimensions: %ux%u\nLevels: %u\nFaces: %u\nBytesPerBlock: %u\nUserData0: %u\nUserData1: %u\nCrnFormat: %S\n", - tex_info.m_width, tex_info.m_height, tex_info.m_levels, tex_info.m_faces, tex_info.m_bytes_per_block, tex_info.m_userdata0, tex_info.m_userdata1, crn_get_format_string(tex_info.m_format)); - - return true; +static bool print_crn_info(const crn_uint8* pData, crn_uint32 data_size) +{ + crnd::crn_file_info file_info; + if (!crnd::crnd_validate_file(pData, data_size, &file_info)) + return false; + + printf("crnd_validate_file:\n"); + printf("File size: %u\nActualDataSize: %u\nHeaderSize: %u\nTotalPaletteSize: %u\nTablesSize: %u\nLevels: %u\n", data_size, + file_info.m_actual_data_size, file_info.m_header_size, file_info.m_total_palette_size, file_info.m_tables_size, file_info.m_levels); + + printf("LevelCompressedSize: "); + for (crn_uint32 i = 0; i < cCRNMaxLevels; i++) + printf("%u ", file_info.m_level_compressed_size[i]); + printf("\n"); + + printf("ColorEndpointPaletteSize: %u\n", file_info.m_color_endpoint_palette_entries); + printf("ColorSelectorPaletteSize: %u\n", file_info.m_color_selector_palette_entries); + printf("AlphaEndpointPaletteSize: %u\n", file_info.m_alpha_endpoint_palette_entries); + printf("AlphaSelectorPaletteSize: %u\n", file_info.m_alpha_selector_palette_entries); + + printf("crnd_get_texture_info:\n"); + crnd::crn_texture_info tex_info; + if (!crnd::crnd_get_texture_info(pData, data_size, &tex_info)) + return false; + + printf("Dimensions: %ux%u\nLevels: %u\nFaces: %u\nBytesPerBlock: %u\nUserData0: %u\nUserData1: %u\nCrnFormat: %S\n", + tex_info.m_width, tex_info.m_height, tex_info.m_levels, tex_info.m_faces, tex_info.m_bytes_per_block, tex_info.m_userdata0, tex_info.m_userdata1, crn_get_format_string(tex_info.m_format)); + + return true; } // Cracks the DDS header and dump its contents. -static bool print_dds_info(const void* pData, crn_uint32 data_size) { - if ((data_size < 128) || (*reinterpret_cast(pData) != crnlib::cDDSFileSignature)) - return false; - - const crnlib::DDSURFACEDESC2& desc = *reinterpret_cast((reinterpret_cast(pData) + sizeof(crn_uint32))); - if (desc.dwSize != sizeof(crnlib::DDSURFACEDESC2)) - return false; - - printf("DDS file information:\n"); - printf("File size: %u\nDimensions: %ux%u\nPitch/LinearSize: %u\n", data_size, desc.dwWidth, desc.dwHeight, desc.dwLinearSize); - printf("MipMapCount: %u\nAlphaBitDepth: %u\n", desc.dwMipMapCount, desc.dwAlphaBitDepth); - - const char* pDDSDFlagNames[] = - { - "DDSD_CAPS", "DDSD_HEIGHT", "DDSD_WIDTH", "DDSD_PITCH", - nullptr, "DDSD_BACKBUFFERCOUNT", "DDSD_ZBUFFERBITDEPTH", "DDSD_ALPHABITDEPTH", - nullptr, nullptr, nullptr, "DDSD_LPSURFACE", - "DDSD_PIXELFORMAT", "DDSD_CKDESTOVERLAY", "DDSD_CKDESTBLT", "DDSD_CKSRCOVERLAY", - "DDSD_CKSRCBLT", "DDSD_MIPMAPCOUNT", "DDSD_REFRESHRATE", "DDSD_LINEARSIZE", - "DDSD_TEXTURESTAGE", "DDSD_FVF", "DDSD_SRCVBHANDLE", "DDSD_DEPTH"}; - - printf("DDSD Flags: 0x%08X ", desc.dwFlags); - for (int i = 0; i < sizeof(pDDSDFlagNames) / sizeof(pDDSDFlagNames[0]); i++) - if ((pDDSDFlagNames[i]) && (desc.dwFlags & (1 << i))) - printf("%s ", pDDSDFlagNames[i]); - printf("\n\n"); - - printf("ddpfPixelFormat.dwFlags: 0x%08X ", desc.ddpfPixelFormat.dwFlags); - if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHAPIXELS) - printf("DDPF_ALPHAPIXELS "); - if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHA) - printf("DDPF_ALPHA "); - if (desc.ddpfPixelFormat.dwFlags & DDPF_FOURCC) - printf("DDPF_FOURCC "); - if (desc.ddpfPixelFormat.dwFlags & DDPF_PALETTEINDEXED8) - printf("DDPF_PALETTEINDEXED8 "); - if (desc.ddpfPixelFormat.dwFlags & DDPF_RGB) - printf("DDPF_RGB "); - if (desc.ddpfPixelFormat.dwFlags & DDPF_LUMINANCE) - printf("DDPF_LUMINANCE "); - printf("\n"); - - printf("ddpfPixelFormat.dwFourCC: 0x%08X '%c' '%c' '%c' '%c'\n", - desc.ddpfPixelFormat.dwFourCC, - std::max(32U, desc.ddpfPixelFormat.dwFourCC & 0xFF), - std::max(32U, (desc.ddpfPixelFormat.dwFourCC >> 8) & 0xFF), - std::max(32U, (desc.ddpfPixelFormat.dwFourCC >> 16) & 0xFF), - std::max(32U, (desc.ddpfPixelFormat.dwFourCC >> 24) & 0xFF)); - - printf("dwRGBBitCount: %u 0x%08X\n", - desc.ddpfPixelFormat.dwRGBBitCount, desc.ddpfPixelFormat.dwRGBBitCount); - - printf("dwRGBBitCount as FOURCC: '%c' '%c' '%c' '%c'\n", - std::max(32U, desc.ddpfPixelFormat.dwRGBBitCount & 0xFF), - std::max(32U, (desc.ddpfPixelFormat.dwRGBBitCount >> 8) & 0xFF), - std::max(32U, (desc.ddpfPixelFormat.dwRGBBitCount >> 16) & 0xFF), - std::max(32U, (desc.ddpfPixelFormat.dwRGBBitCount >> 24) & 0xFF)); - - printf("dwRBitMask: 0x%08X\ndwGBitMask: 0x%08X\ndwBBitMask: 0x%08X\ndwRGBAlphaBitMask: 0x%08X\n", - desc.ddpfPixelFormat.dwRBitMask, desc.ddpfPixelFormat.dwGBitMask, desc.ddpfPixelFormat.dwBBitMask, desc.ddpfPixelFormat.dwRGBAlphaBitMask); - - printf("\n"); - printf("ddsCaps.dwCaps: 0x%08X ", desc.ddsCaps.dwCaps); - if (desc.ddsCaps.dwCaps & DDSCAPS_COMPLEX) - printf("DDSCAPS_COMPLEX "); - if (desc.ddsCaps.dwCaps & DDSCAPS_TEXTURE) - printf("DDSCAPS_TEXTURE "); - if (desc.ddsCaps.dwCaps & DDSCAPS_MIPMAP) - printf("DDSCAPS_MIPMAP"); - printf("\n"); - - printf("ddsCaps.dwCaps2: 0x%08X ", desc.ddsCaps.dwCaps2); - const char* pDDCAPS2FlagNames[] = - { - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, - nullptr, "DDSCAPS2_CUBEMAP", "DDSCAPS2_CUBEMAP_POSITIVEX", "DDSCAPS2_CUBEMAP_NEGATIVEX", - "DDSCAPS2_CUBEMAP_POSITIVEY", "DDSCAPS2_CUBEMAP_NEGATIVEY", "DDSCAPS2_CUBEMAP_POSITIVEZ", "DDSCAPS2_CUBEMAP_NEGATIVEZ", - nullptr, nullptr, nullptr, nullptr, - nullptr, "DDSCAPS2_VOLUME"}; - for (int i = 0; i < sizeof(pDDCAPS2FlagNames) / sizeof(pDDCAPS2FlagNames[0]); i++) - if ((pDDCAPS2FlagNames[i]) && (desc.ddsCaps.dwCaps2 & (1 << i))) - printf("%s ", pDDCAPS2FlagNames[i]); - printf("\n"); - - printf("ddsCaps.dwCaps3: 0x%08X\nddsCaps.dwCaps4: 0x%08X\n", - desc.ddsCaps.dwCaps3, desc.ddsCaps.dwCaps4); - - return true; +static bool print_dds_info(const void* pData, crn_uint32 data_size) +{ + if ((data_size < 128) || (*reinterpret_cast(pData) != crnlib::cDDSFileSignature)) + return false; + + const crnlib::DDSURFACEDESC2& desc = *reinterpret_cast((reinterpret_cast(pData) + sizeof(crn_uint32))); + if (desc.dwSize != sizeof(crnlib::DDSURFACEDESC2)) + return false; + + printf("DDS file information:\n"); + printf("File size: %u\nDimensions: %ux%u\nPitch/LinearSize: %u\n", data_size, desc.dwWidth, desc.dwHeight, desc.dwLinearSize); + printf("MipMapCount: %u\nAlphaBitDepth: %u\n", desc.dwMipMapCount, desc.dwAlphaBitDepth); + + const char* pDDSDFlagNames[] = + { + "DDSD_CAPS", "DDSD_HEIGHT", "DDSD_WIDTH", "DDSD_PITCH", + nullptr, "DDSD_BACKBUFFERCOUNT", "DDSD_ZBUFFERBITDEPTH", "DDSD_ALPHABITDEPTH", + nullptr, nullptr, nullptr, "DDSD_LPSURFACE", + "DDSD_PIXELFORMAT", "DDSD_CKDESTOVERLAY", "DDSD_CKDESTBLT", "DDSD_CKSRCOVERLAY", + "DDSD_CKSRCBLT", "DDSD_MIPMAPCOUNT", "DDSD_REFRESHRATE", "DDSD_LINEARSIZE", + "DDSD_TEXTURESTAGE", "DDSD_FVF", "DDSD_SRCVBHANDLE", "DDSD_DEPTH" }; + + printf("DDSD Flags: 0x%08X ", desc.dwFlags); + for (int i = 0; i < sizeof(pDDSDFlagNames) / sizeof(pDDSDFlagNames[0]); i++) + if ((pDDSDFlagNames[i]) && (desc.dwFlags & (1 << i))) + printf("%s ", pDDSDFlagNames[i]); + printf("\n\n"); + + printf("ddpfPixelFormat.dwFlags: 0x%08X ", desc.ddpfPixelFormat.dwFlags); + if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHAPIXELS) + printf("DDPF_ALPHAPIXELS "); + if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHA) + printf("DDPF_ALPHA "); + if (desc.ddpfPixelFormat.dwFlags & DDPF_FOURCC) + printf("DDPF_FOURCC "); + if (desc.ddpfPixelFormat.dwFlags & DDPF_PALETTEINDEXED8) + printf("DDPF_PALETTEINDEXED8 "); + if (desc.ddpfPixelFormat.dwFlags & DDPF_RGB) + printf("DDPF_RGB "); + if (desc.ddpfPixelFormat.dwFlags & DDPF_LUMINANCE) + printf("DDPF_LUMINANCE "); + printf("\n"); + + printf("ddpfPixelFormat.dwFourCC: 0x%08X '%c' '%c' '%c' '%c'\n", + desc.ddpfPixelFormat.dwFourCC, + std::max(32U, desc.ddpfPixelFormat.dwFourCC & 0xFF), + std::max(32U, (desc.ddpfPixelFormat.dwFourCC >> 8) & 0xFF), + std::max(32U, (desc.ddpfPixelFormat.dwFourCC >> 16) & 0xFF), + std::max(32U, (desc.ddpfPixelFormat.dwFourCC >> 24) & 0xFF)); + + printf("dwRGBBitCount: %u 0x%08X\n", + desc.ddpfPixelFormat.dwRGBBitCount, desc.ddpfPixelFormat.dwRGBBitCount); + + printf("dwRGBBitCount as FOURCC: '%c' '%c' '%c' '%c'\n", + std::max(32U, desc.ddpfPixelFormat.dwRGBBitCount & 0xFF), + std::max(32U, (desc.ddpfPixelFormat.dwRGBBitCount >> 8) & 0xFF), + std::max(32U, (desc.ddpfPixelFormat.dwRGBBitCount >> 16) & 0xFF), + std::max(32U, (desc.ddpfPixelFormat.dwRGBBitCount >> 24) & 0xFF)); + + printf("dwRBitMask: 0x%08X\ndwGBitMask: 0x%08X\ndwBBitMask: 0x%08X\ndwRGBAlphaBitMask: 0x%08X\n", + desc.ddpfPixelFormat.dwRBitMask, desc.ddpfPixelFormat.dwGBitMask, desc.ddpfPixelFormat.dwBBitMask, desc.ddpfPixelFormat.dwRGBAlphaBitMask); + + printf("\n"); + printf("ddsCaps.dwCaps: 0x%08X ", desc.ddsCaps.dwCaps); + if (desc.ddsCaps.dwCaps & DDSCAPS_COMPLEX) + printf("DDSCAPS_COMPLEX "); + if (desc.ddsCaps.dwCaps & DDSCAPS_TEXTURE) + printf("DDSCAPS_TEXTURE "); + if (desc.ddsCaps.dwCaps & DDSCAPS_MIPMAP) + printf("DDSCAPS_MIPMAP"); + printf("\n"); + + printf("ddsCaps.dwCaps2: 0x%08X ", desc.ddsCaps.dwCaps2); + const char* pDDCAPS2FlagNames[] = + { + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, "DDSCAPS2_CUBEMAP", "DDSCAPS2_CUBEMAP_POSITIVEX", "DDSCAPS2_CUBEMAP_NEGATIVEX", + "DDSCAPS2_CUBEMAP_POSITIVEY", "DDSCAPS2_CUBEMAP_NEGATIVEY", "DDSCAPS2_CUBEMAP_POSITIVEZ", "DDSCAPS2_CUBEMAP_NEGATIVEZ", + nullptr, nullptr, nullptr, nullptr, + nullptr, "DDSCAPS2_VOLUME" }; + for (int i = 0; i < sizeof(pDDCAPS2FlagNames) / sizeof(pDDCAPS2FlagNames[0]); i++) + if ((pDDCAPS2FlagNames[i]) && (desc.ddsCaps.dwCaps2 & (1 << i))) + printf("%s ", pDDCAPS2FlagNames[i]); + printf("\n"); + + printf("ddsCaps.dwCaps3: 0x%08X\nddsCaps.dwCaps4: 0x%08X\n", + desc.ddsCaps.dwCaps3, desc.ddsCaps.dwCaps4); + + return true; } // CRN/DDS compression callback function. -static crn_bool progress_callback_func(crn_uint32 phase_index, crn_uint32 total_phases, crn_uint32 subphase_index, crn_uint32 total_subphases, void* pUser_data_ptr) { - int percentage_complete = (int)(.5f + (phase_index + float(subphase_index) / total_subphases) * 100.0f) / total_phases; - printf("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bProcessing: %u%%", std::min(100, std::max(0, percentage_complete))); - return true; +static crn_bool progress_callback_func(crn_uint32 phase_index, crn_uint32 total_phases, crn_uint32 subphase_index, crn_uint32 total_subphases, void* pUser_data_ptr) +{ + int percentage_complete = (int)(.5f + (phase_index + float(subphase_index) / total_subphases) * 100.0f) / total_phases; + printf("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bProcessing: %u%%", std::min(100, std::max(0, percentage_complete))); + return true; } -int main(int argc, char* argv[]) { - printf("example1 - Version v%u.%02u Built " __DATE__ ", " __TIME__ "\n", CRNLIB_VERSION / 100, CRNLIB_VERSION % 100); - - if (argc < 3) - return print_usage(); - - // Parse command line options - int mode = argv[1][0]; - if ((mode != 'c') && (mode != 'd') && (mode != 'i')) - return error("Invalid mode!\n"); - - const char* pSrc_filename = argv[2]; - char out_filename[FILENAME_MAX] = {'\0'}; - - float bitrate = 0.0f; - int quality_level = -1; - bool srgb_colorspace = true; - bool create_mipmaps = true; - bool output_crn = false; - crn_format fmt = cCRNFmtInvalid; - bool use_adaptive_block_sizes = true; - bool set_alpha_to_luma = false; - bool convert_to_luma = false; - bool enable_dxt1a = false; - - for (int i = 3; i < argc; i++) { - if (argv[i][0] == '/') - argv[i][0] = '-'; - - if (!_stricmp(argv[i], "-crn")) { - output_crn = true; - } else if (!_stricmp(argv[i], "-pixelformat")) { - if (++i >= argc) - return error("Expected pixel format!"); - - if (!_stricmp(argv[i], "dxt1a")) { - enable_dxt1a = true; - fmt = cCRNFmtDXT1; - } else { - unsigned int f; - for (f = 0; f < cCRNFmtTotal; f++) { - if (!_stricmp(argv[i], crn_get_format_string(static_cast(f)))) { - fmt = static_cast(f); - break; - } +int main(int argc, char* argv[]) +{ + printf("example1 - Version v%u.%02u Built " __DATE__ ", " __TIME__ "\n", CRNLIB_VERSION / 100, CRNLIB_VERSION % 100); + + if (argc < 3) + return print_usage(); + + // Parse command line options + int mode = argv[1][0]; + if ((mode != 'c') && (mode != 'd') && (mode != 'i')) + return error("Invalid mode!\n"); + + const char* pSrc_filename = argv[2]; + char out_filename[FILENAME_MAX] = { '\0' }; + + float bitrate = 0.0f; + int quality_level = -1; + bool srgb_colorspace = true; + bool create_mipmaps = true; + bool output_crn = false; + crn_format fmt = cCRNFmtInvalid; + bool use_adaptive_block_sizes = true; + bool set_alpha_to_luma = false; + bool convert_to_luma = false; + bool enable_dxt1a = false; + + for (int i = 3; i < argc; i++) + { + if (argv[i][0] == '/') + argv[i][0] = '-'; + + if (!_stricmp(argv[i], "-crn")) + { + output_crn = true; } - if (f == cCRNFmtTotal) - return error("Unrecognized pixel format: %s\n", argv[i]); - } - } else if (!_stricmp(argv[i], "-bitrate")) { - if (++i >= argc) - return error("Invalid bitrate!"); - - bitrate = (float)atof(argv[i]); - if ((bitrate < .1f) || (bitrate > 8.0f)) - return error("Invalid bitrate!"); - } else if (!_stricmp(argv[i], "-quality")) { - if (++i >= argc) - return error("Invalid quality level!"); - - quality_level = atoi(argv[i]); - if ((quality_level < 0) || (quality_level > cCRNMaxQualityLevel)) - return error("Invalid quality level!"); - } else if (!_stricmp(argv[i], "-out")) { - if (++i >= argc) - return error("Expected output filename!"); - - strcpy_s(out_filename, sizeof(out_filename), argv[i]); - } else if (!_stricmp(argv[i], "-nonsrgb")) - srgb_colorspace = false; - else if (!_stricmp(argv[i], "-nomips")) - create_mipmaps = false; - else if (!_stricmp(argv[i], "-noAdaptiveBlocks")) - use_adaptive_block_sizes = false; - else if (!_stricmp(argv[i], "-setalphatoluma")) - set_alpha_to_luma = true; - else if (!_stricmp(argv[i], "-converttoluma")) - convert_to_luma = true; - else - return error("Invalid option: %s\n", argv[i]); - } - - char drive_buf[_MAX_DRIVE], dir_buf[_MAX_DIR], fname_buf[_MAX_FNAME], ext_buf[_MAX_EXT]; - if (_splitpath_s(pSrc_filename, drive_buf, _MAX_DRIVE, dir_buf, _MAX_DIR, fname_buf, _MAX_FNAME, ext_buf, _MAX_EXT)) - return error("Invalid source filename!\n"); - - // Load the source file into memory. - printf("Loading source file: %s\n", pSrc_filename); - crn_uint32 src_file_size; - crn_uint8* pSrc_file_data = read_file_into_buffer(pSrc_filename, src_file_size); - if (!pSrc_file_data) - return error("Unable to read source file\n"); - - if (mode == 'i') { - // Information - if (_stricmp(ext_buf, ".crn") == 0) { - if (!print_crn_info(pSrc_file_data, src_file_size)) { - free(pSrc_file_data); - return error("Not a CRN file!\n"); - } - } else if (_stricmp(ext_buf, ".dds") == 0) { - if (!print_dds_info(pSrc_file_data, src_file_size)) { - free(pSrc_file_data); - return error("Not a DDS file!\n"); - } - } else { - // Try parsing the source file as a regular image. - int x, y, actual_comps; - stbi_uc* p = stbi_load_from_memory(pSrc_file_data, src_file_size, &x, &y, &actual_comps, 4); - if (!p) { - free(pSrc_file_data); - return error("Failed reading image file!\n"); - } - stbi_image_free(p); - - printf("File size: %u\nDimensions: %ix%i\nActual Components: %i\n", src_file_size, x, y, actual_comps); - } - } else if (mode == 'c') { - // Compression to DDS or CRN. - - // If the user has explicitly specified an output file, check the output file's extension to ensure we write the expected format. - if (out_filename[0]) { - char out_fname_buf[_MAX_FNAME], out_ext_buf[_MAX_EXT]; - _splitpath_s(out_filename, nullptr, 0, nullptr, 0, out_fname_buf, _MAX_FNAME, out_ext_buf, _MAX_EXT); - if (!_stricmp(out_ext_buf, ".crn")) - output_crn = true; - else if (!_stricmp(out_ext_buf, ".dds")) - output_crn = false; - } - - // Load source image - int width, height, actual_comps; - crn_uint32* pSrc_image = (crn_uint32*)stbi_load_from_memory(pSrc_file_data, src_file_size, &width, &height, &actual_comps, 4); - if (!pSrc_image) { - free(pSrc_file_data); - return error("Failed reading image file!\n"); - } + else if (!_stricmp(argv[i], "-pixelformat")) + { + if (++i >= argc) + return error("Expected pixel format!"); + + if (!_stricmp(argv[i], "dxt1a")) + { + enable_dxt1a = true; + fmt = cCRNFmtDXT1; + } + else + { + unsigned int f; + for (f = 0; f < cCRNFmtTotal; f++) + { + if (!_stricmp(argv[i], crn_get_format_string(static_cast(f)))) + { + fmt = static_cast(f); + break; + } + } + if (f == cCRNFmtTotal) + return error("Unrecognized pixel format: %s\n", argv[i]); + } + } + else if (!_stricmp(argv[i], "-bitrate")) + { + if (++i >= argc) + return error("Invalid bitrate!"); + + bitrate = (float)atof(argv[i]); + if ((bitrate < .1f) || (bitrate > 8.0f)) + return error("Invalid bitrate!"); + } + else if (!_stricmp(argv[i], "-quality")) + { + if (++i >= argc) + return error("Invalid quality level!"); + + quality_level = atoi(argv[i]); + if ((quality_level < 0) || (quality_level > cCRNMaxQualityLevel)) + return error("Invalid quality level!"); + } + else if (!_stricmp(argv[i], "-out")) + { + if (++i >= argc) + return error("Expected output filename!"); - printf("Source file size: %u, Dimensions: %ux%u\nActual Components: %u\n", src_file_size, width, height, actual_comps); - - // Fill in compression parameters struct. - bool has_alpha_channel = actual_comps > 3; - - if ((fmt == cCRNFmtDXT5A) && (actual_comps <= 3)) - set_alpha_to_luma = true; - - if ((set_alpha_to_luma) || (convert_to_luma)) { - for (int i = 0; i < width * height; i++) { - crn_uint32 r = pSrc_image[i] & 0xFF, g = (pSrc_image[i] >> 8) & 0xFF, b = (pSrc_image[i] >> 16) & 0xFF; - // Compute CCIR 601 luma. - crn_uint32 y = (19595U * r + 38470U * g + 7471U * b + 32768) >> 16U; - crn_uint32 a = (pSrc_image[i] >> 24) & 0xFF; - if (set_alpha_to_luma) - a = y; - if (convert_to_luma) { - r = y; - g = y; - b = y; + strcpy_s(out_filename, sizeof(out_filename), argv[i]); } - pSrc_image[i] = r | (g << 8) | (b << 16) | (a << 24); - } + else if (!_stricmp(argv[i], "-nonsrgb")) + srgb_colorspace = false; + else if (!_stricmp(argv[i], "-nomips")) + create_mipmaps = false; + else if (!_stricmp(argv[i], "-noAdaptiveBlocks")) + use_adaptive_block_sizes = false; + else if (!_stricmp(argv[i], "-setalphatoluma")) + set_alpha_to_luma = true; + else if (!_stricmp(argv[i], "-converttoluma")) + convert_to_luma = true; + else + return error("Invalid option: %s\n", argv[i]); } - crn_comp_params comp_params; - comp_params.m_width = width; - comp_params.m_height = height; - comp_params.set_flag(cCRNCompFlagPerceptual, srgb_colorspace); - comp_params.set_flag(cCRNCompFlagDXT1AForTransparency, enable_dxt1a && has_alpha_channel); - comp_params.set_flag(cCRNCompFlagHierarchical, use_adaptive_block_sizes); - comp_params.m_file_type = output_crn ? cCRNFileTypeCRN : cCRNFileTypeDDS; - comp_params.m_format = (fmt != cCRNFmtInvalid) ? fmt : (has_alpha_channel ? cCRNFmtDXT5 : cCRNFmtDXT1); - - // Important note: This example only feeds a single source image to the compressor, and it internaly generates mipmaps from that source image. - // If you want, there's nothing stopping you from generating the mipmaps on your own, then feeding the multiple source images - // to the compressor. Just set the crn_mipmap_params::m_mode member (set below) to cCRNMipModeUseSourceMips. - comp_params.m_pImages[0][0] = pSrc_image; - - if (bitrate > 0.0f) - comp_params.m_target_bitrate = bitrate; - else if (quality_level >= 0) - comp_params.m_quality_level = quality_level; - else if (output_crn) { - // Set a default quality level for CRN, otherwise we'll get the default (highest quality) which leads to huge compressed palettes. - comp_params.m_quality_level = cDefaultCRNQualityLevel; + char drive_buf[_MAX_DRIVE], dir_buf[_MAX_DIR], fname_buf[_MAX_FNAME], ext_buf[_MAX_EXT]; + if (_splitpath_s(pSrc_filename, drive_buf, _MAX_DRIVE, dir_buf, _MAX_DIR, fname_buf, _MAX_FNAME, ext_buf, _MAX_EXT)) + return error("Invalid source filename!\n"); + + // Load the source file into memory. + printf("Loading source file: %s\n", pSrc_filename); + crn_uint32 src_file_size; + crn_uint8* pSrc_file_data = read_file_into_buffer(pSrc_filename, src_file_size); + if (!pSrc_file_data) + return error("Unable to read source file\n"); + + if (mode == 'i') + { + // Information + if (_stricmp(ext_buf, ".crn") == 0) + { + if (!print_crn_info(pSrc_file_data, src_file_size)) + { + free(pSrc_file_data); + return error("Not a CRN file!\n"); + } + } + else if (_stricmp(ext_buf, ".dds") == 0) + { + if (!print_dds_info(pSrc_file_data, src_file_size)) + { + free(pSrc_file_data); + return error("Not a DDS file!\n"); + } + } + else + { + // Try parsing the source file as a regular image. + int x, y, actual_comps; + stbi_uc* p = stbi_load_from_memory(pSrc_file_data, src_file_size, &x, &y, &actual_comps, 4); + if (!p) + { + free(pSrc_file_data); + return error("Failed reading image file!\n"); + } + stbi_image_free(p); + + printf("File size: %u\nDimensions: %ix%i\nActual Components: %i\n", src_file_size, x, y, actual_comps); + } } + else if (mode == 'c') + { + // Compression to DDS or CRN. + + // If the user has explicitly specified an output file, check the output file's extension to ensure we write the expected format. + if (out_filename[0]) + { + char out_fname_buf[_MAX_FNAME], out_ext_buf[_MAX_EXT]; + _splitpath_s(out_filename, nullptr, 0, nullptr, 0, out_fname_buf, _MAX_FNAME, out_ext_buf, _MAX_EXT); + if (!_stricmp(out_ext_buf, ".crn")) + output_crn = true; + else if (!_stricmp(out_ext_buf, ".dds")) + output_crn = false; + } - // Determine the # of helper threads (in addition to the main thread) to use during compression. NumberOfCPU's-1 is reasonable. - SYSTEM_INFO g_system_info; - GetSystemInfo(&g_system_info); - int num_helper_threads = std::max(0, (int)g_system_info.dwNumberOfProcessors - 1); - comp_params.m_num_helper_threads = num_helper_threads; - - comp_params.m_pProgress_func = progress_callback_func; + // Load source image + int width, height, actual_comps; + crn_uint32* pSrc_image = (crn_uint32*)stbi_load_from_memory(pSrc_file_data, src_file_size, &width, &height, &actual_comps, 4); + if (!pSrc_image) + { + free(pSrc_file_data); + return error("Failed reading image file!\n"); + } - // Fill in mipmap parameters struct. - crn_mipmap_params mip_params; - mip_params.m_gamma_filtering = srgb_colorspace; - mip_params.m_mode = create_mipmaps ? cCRNMipModeGenerateMips : cCRNMipModeNoMips; + printf("Source file size: %u, Dimensions: %ux%u\nActual Components: %u\n", src_file_size, width, height, actual_comps); + + // Fill in compression parameters struct. + bool has_alpha_channel = actual_comps > 3; + + if ((fmt == cCRNFmtDXT5A) && (actual_comps <= 3)) + set_alpha_to_luma = true; + + if ((set_alpha_to_luma) || (convert_to_luma)) + { + for (int i = 0; i < width * height; i++) + { + crn_uint32 r = pSrc_image[i] & 0xFF, g = (pSrc_image[i] >> 8) & 0xFF, b = (pSrc_image[i] >> 16) & 0xFF; + // Compute CCIR 601 luma. + crn_uint32 y = (19595U * r + 38470U * g + 7471U * b + 32768) >> 16U; + crn_uint32 a = (pSrc_image[i] >> 24) & 0xFF; + if (set_alpha_to_luma) + a = y; + if (convert_to_luma) + { + r = y; + g = y; + b = y; + } + pSrc_image[i] = r | (g << 8) | (b << 16) | (a << 24); + } + } - crn_uint32 actual_quality_level; - float actual_bitrate; - crn_uint32 output_file_size; + crn_comp_params comp_params; + comp_params.m_width = width; + comp_params.m_height = height; + comp_params.set_flag(cCRNCompFlagPerceptual, srgb_colorspace); + comp_params.set_flag(cCRNCompFlagDXT1AForTransparency, enable_dxt1a && has_alpha_channel); + comp_params.set_flag(cCRNCompFlagHierarchical, use_adaptive_block_sizes); + comp_params.m_file_type = output_crn ? cCRNFileTypeCRN : cCRNFileTypeDDS; + comp_params.m_format = (fmt != cCRNFmtInvalid) ? fmt : (has_alpha_channel ? cCRNFmtDXT5 : cCRNFmtDXT1); + + // Important note: This example only feeds a single source image to the compressor, and it internaly generates mipmaps from that source image. + // If you want, there's nothing stopping you from generating the mipmaps on your own, then feeding the multiple source images + // to the compressor. Just set the crn_mipmap_params::m_mode member (set below) to cCRNMipModeUseSourceMips. + comp_params.m_pImages[0][0] = pSrc_image; + + if (bitrate > 0.0f) + comp_params.m_target_bitrate = bitrate; + else if (quality_level >= 0) + comp_params.m_quality_level = quality_level; + else if (output_crn) + { + // Set a default quality level for CRN, otherwise we'll get the default (highest quality) which leads to huge compressed palettes. + comp_params.m_quality_level = cDefaultCRNQualityLevel; + } - printf("Compressing to %s\n", crn_get_format_string(comp_params.m_format)); + // Determine the # of helper threads (in addition to the main thread) to use during compression. NumberOfCPU's-1 is reasonable. + SYSTEM_INFO g_system_info; + GetSystemInfo(&g_system_info); + int num_helper_threads = std::max(0, (int)g_system_info.dwNumberOfProcessors - 1); + comp_params.m_num_helper_threads = num_helper_threads; - // Now compress to DDS or CRN. - void* pOutput_file_data = crn_compress(comp_params, mip_params, output_file_size, &actual_quality_level, &actual_bitrate); - printf("\n"); + comp_params.m_pProgress_func = progress_callback_func; - if (!pOutput_file_data) { - stbi_image_free(pSrc_image); - free(pSrc_file_data); - return error("Compression failed!"); - } + // Fill in mipmap parameters struct. + crn_mipmap_params mip_params; + mip_params.m_gamma_filtering = srgb_colorspace; + mip_params.m_mode = create_mipmaps ? cCRNMipModeGenerateMips : cCRNMipModeNoMips; - printf("Compressed to %u bytes, quality level: %u, effective bitrate: %f\n", output_file_size, actual_quality_level, actual_bitrate); - - // Write the output file. - char dst_filename[FILENAME_MAX]; - sprintf_s(dst_filename, sizeof(dst_filename), "%s%s%s%s", drive_buf, dir_buf, fname_buf, output_crn ? ".crn" : ".dds"); - if (out_filename[0]) - strcpy(dst_filename, out_filename); - - printf("Writing %s file: %s\n", output_crn ? "CRN" : "DDS", dst_filename); - FILE* pFile = fopen(dst_filename, "wb"); - if ((!pFile) || (fwrite(pOutput_file_data, output_file_size, 1, pFile) != 1) || (fclose(pFile) == EOF)) { - free(pSrc_file_data); - crn_free_block(pOutput_file_data); - stbi_image_free(pSrc_image); - return error("Failed writing to output file!\n"); - } + crn_uint32 actual_quality_level; + float actual_bitrate; + crn_uint32 output_file_size; - crn_free_block(pOutput_file_data); - stbi_image_free(pSrc_image); - } else if (_stricmp(ext_buf, ".crn") == 0) { - // Decompress/transcode CRN to DDS. - printf("Decompressing CRN to DDS\n"); - - // Transcode the CRN file to a DDS file in memory. - crn_uint32 dds_file_size = src_file_size; - void* pDDS_file_data = crn_decompress_crn_to_dds(pSrc_file_data, dds_file_size); - if (!pDDS_file_data) { - free(pSrc_file_data); - return error("Failed decompressing CRN file!\n"); - } + printf("Compressing to %s\n", crn_get_format_string(comp_params.m_format)); - // Now write the DDS file to disk. - char dst_filename[FILENAME_MAX]; - sprintf_s(dst_filename, sizeof(dst_filename), "%s%s%s.dds", drive_buf, dir_buf, fname_buf); - if (out_filename[0]) - strcpy(dst_filename, out_filename); - - printf("Writing file: %s\n", dst_filename); - FILE* pFile = fopen(dst_filename, "wb"); - if ((!pFile) || (fwrite(pDDS_file_data, dds_file_size, 1, pFile) != 1) || (fclose(pFile) == EOF)) { - crn_free_block(pDDS_file_data); - free(pSrc_file_data); - return error("Failed writing to output file!\n"); - } + // Now compress to DDS or CRN. + void* pOutput_file_data = crn_compress(comp_params, mip_params, output_file_size, &actual_quality_level, &actual_bitrate); + printf("\n"); - printf("\n"); + if (!pOutput_file_data) + { + stbi_image_free(pSrc_image); + free(pSrc_file_data); + return error("Compression failed!"); + } - print_dds_info(pDDS_file_data, dds_file_size); + printf("Compressed to %u bytes, quality level: %u, effective bitrate: %f\n", output_file_size, actual_quality_level, actual_bitrate); - crn_free_block(pDDS_file_data); - } else if (_stricmp(ext_buf, ".dds") == 0) { - // Unpack DDS to one or more TGA's. - if (out_filename[0]) - _splitpath_s(out_filename, drive_buf, _MAX_DRIVE, dir_buf, _MAX_DIR, fname_buf, _MAX_FNAME, ext_buf, _MAX_EXT); + // Write the output file. + char dst_filename[FILENAME_MAX]; + sprintf_s(dst_filename, sizeof(dst_filename), "%s%s%s%s", drive_buf, dir_buf, fname_buf, output_crn ? ".crn" : ".dds"); + if (out_filename[0]) + strcpy(dst_filename, out_filename); + + printf("Writing %s file: %s\n", output_crn ? "CRN" : "DDS", dst_filename); + FILE* pFile = fopen(dst_filename, "wb"); + if ((!pFile) || (fwrite(pOutput_file_data, output_file_size, 1, pFile) != 1) || (fclose(pFile) == EOF)) + { + free(pSrc_file_data); + crn_free_block(pOutput_file_data); + stbi_image_free(pSrc_image); + return error("Failed writing to output file!\n"); + } - crn_texture_desc tex_desc; - crn_uint32* pImages[cCRNMaxFaces * cCRNMaxLevels]; - if (!crn_decompress_dds_to_images(pSrc_file_data, src_file_size, pImages, tex_desc)) { - free(pSrc_file_data); - return error("Failed unpacking DDS file!\n"); + crn_free_block(pOutput_file_data); + stbi_image_free(pSrc_image); } + else if (_stricmp(ext_buf, ".crn") == 0) + { + // Decompress/transcode CRN to DDS. + printf("Decompressing CRN to DDS\n"); + + // Transcode the CRN file to a DDS file in memory. + crn_uint32 dds_file_size = src_file_size; + void* pDDS_file_data = crn_decompress_crn_to_dds(pSrc_file_data, dds_file_size); + if (!pDDS_file_data) + { + free(pSrc_file_data); + return error("Failed decompressing CRN file!\n"); + } - printf("Decompressed texture Dimensions: %ux%u, Faces: %u, Levels: %u, FourCC: 0x%08X '%c' '%c' '%c' '%c'\n", - tex_desc.m_width, tex_desc.m_height, tex_desc.m_faces, tex_desc.m_levels, tex_desc.m_fmt_fourcc, - std::max(32U, tex_desc.m_fmt_fourcc & 0xFF), - std::max(32U, (tex_desc.m_fmt_fourcc >> 8) & 0xFF), - std::max(32U, (tex_desc.m_fmt_fourcc >> 16) & 0xFF), - std::max(32U, (tex_desc.m_fmt_fourcc >> 24) & 0xFF)); - - for (crn_uint32 face_index = 0; face_index < tex_desc.m_faces; face_index++) { - for (crn_uint32 level_index = 0; level_index < tex_desc.m_levels; level_index++) { - int width = std::max(1U, tex_desc.m_width >> level_index); - int height = std::max(1U, tex_desc.m_height >> level_index); - + // Now write the DDS file to disk. char dst_filename[FILENAME_MAX]; - sprintf_s(dst_filename, sizeof(dst_filename), "%s%s%s_face%u_mip%u.tga", drive_buf, dir_buf, fname_buf, face_index, level_index); + sprintf_s(dst_filename, sizeof(dst_filename), "%s%s%s.dds", drive_buf, dir_buf, fname_buf); + if (out_filename[0]) + strcpy(dst_filename, out_filename); printf("Writing file: %s\n", dst_filename); - if (!stbi_write_tga(dst_filename, width, height, 4, pImages[level_index + face_index * tex_desc.m_levels])) { - crn_free_all_images(pImages, tex_desc); - free(pSrc_file_data); + FILE* pFile = fopen(dst_filename, "wb"); + if ((!pFile) || (fwrite(pDDS_file_data, dds_file_size, 1, pFile) != 1) || (fclose(pFile) == EOF)) + { + crn_free_block(pDDS_file_data); + free(pSrc_file_data); + return error("Failed writing to output file!\n"); + } + + printf("\n"); + + print_dds_info(pDDS_file_data, dds_file_size); - return error("Failed writing output file!\n"); + crn_free_block(pDDS_file_data); + } + else if (_stricmp(ext_buf, ".dds") == 0) + { + // Unpack DDS to one or more TGA's. + if (out_filename[0]) + _splitpath_s(out_filename, drive_buf, _MAX_DRIVE, dir_buf, _MAX_DIR, fname_buf, _MAX_FNAME, ext_buf, _MAX_EXT); + + crn_texture_desc tex_desc; + crn_uint32* pImages[cCRNMaxFaces * cCRNMaxLevels]; + if (!crn_decompress_dds_to_images(pSrc_file_data, src_file_size, pImages, tex_desc)) + { + free(pSrc_file_data); + return error("Failed unpacking DDS file!\n"); + } + + printf("Decompressed texture Dimensions: %ux%u, Faces: %u, Levels: %u, FourCC: 0x%08X '%c' '%c' '%c' '%c'\n", + tex_desc.m_width, tex_desc.m_height, tex_desc.m_faces, tex_desc.m_levels, tex_desc.m_fmt_fourcc, + std::max(32U, tex_desc.m_fmt_fourcc & 0xFF), + std::max(32U, (tex_desc.m_fmt_fourcc >> 8) & 0xFF), + std::max(32U, (tex_desc.m_fmt_fourcc >> 16) & 0xFF), + std::max(32U, (tex_desc.m_fmt_fourcc >> 24) & 0xFF)); + + for (crn_uint32 face_index = 0; face_index < tex_desc.m_faces; face_index++) + { + for (crn_uint32 level_index = 0; level_index < tex_desc.m_levels; level_index++) + { + int width = std::max(1U, tex_desc.m_width >> level_index); + int height = std::max(1U, tex_desc.m_height >> level_index); + + char dst_filename[FILENAME_MAX]; + sprintf_s(dst_filename, sizeof(dst_filename), "%s%s%s_face%u_mip%u.tga", drive_buf, dir_buf, fname_buf, face_index, level_index); + + printf("Writing file: %s\n", dst_filename); + if (!stbi_write_tga(dst_filename, width, height, 4, pImages[level_index + face_index * tex_desc.m_levels])) + { + crn_free_all_images(pImages, tex_desc); + free(pSrc_file_data); + + return error("Failed writing output file!\n"); + } + } } - } + + crn_free_all_images(pImages, tex_desc); + } + else + { + free(pSrc_file_data); + return error("Decompression mode only supports .dds or .crn files!\n"); } - crn_free_all_images(pImages, tex_desc); - } else { free(pSrc_file_data); - return error("Decompression mode only supports .dds or .crn files!\n"); - } - - free(pSrc_file_data); - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/examples/example2/example2.cpp b/examples/example2/example2.cpp index 50d887b..ef16eb9 100644 --- a/examples/example2/example2.cpp +++ b/examples/example2/example2.cpp @@ -1,11 +1,36 @@ -// File: example2.cpp - This example uses the crn_decomp.h stand-alone header file library +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +// This example uses the crn_decomp.h stand-alone header file library // to transcode .CRN files directly to .DDS, with no intermediate recompression step to DXTn. +// // This tool does NOT depend on the crnlib library at all. It only needs the low-level // decompression/transcoding functionality defined in inc/crn_decomp.h. +// // This is the basic functionality a game engine would need to employ at runtime to utilize // .CRN textures (excluding writing the output DDS file - instead you would provide the DXTn // bits directly to OpenGL/D3D). -// See Copyright Notice and license at the end of inc/crnlib.h + #include #include #include @@ -21,247 +46,266 @@ using namespace crnlib; -static int print_usage() { - printf("Description: Transcodes .CRN to .DDS files using crn_decomp.h.\n"); - printf("Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC\n"); - printf("Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet\n"); - printf("Usage: example2 [source_file] [options]\n"); - printf("\nOptions:\n"); - printf("-out filename - Force output filename.\n"); - return EXIT_FAILURE; +static int print_usage() +{ + printf("Description: Transcodes .CRN to .DDS files using crn_decomp.h.\n"); + printf("Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC\n"); + printf("Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet\n"); + printf("Usage: example2 [source_file] [options]\n"); + printf("\nOptions:\n"); + printf("-out filename - Force output filename.\n"); + return EXIT_FAILURE; } -static int error(const char* pMsg, ...) { - va_list args; - va_start(args, pMsg); - char buf[512]; - vsprintf_s(buf, sizeof(buf), pMsg, args); - va_end(args); - printf("%s", buf); - return EXIT_FAILURE; +static int error(const char* pMsg, ...) +{ + va_list args; + va_start(args, pMsg); + char buf[512]; + vsprintf_s(buf, sizeof(buf), pMsg, args); + va_end(args); + printf("%s", buf); + return EXIT_FAILURE; } // Loads an entire file into an allocated memory block. -static crn_uint8* read_file_into_buffer(const char* pFilename, crn_uint32& size) { - size = 0; +static crn_uint8* read_file_into_buffer(const char* pFilename, crn_uint32& size) +{ + size = 0; - FILE* pFile = nullptr; - fopen_s(&pFile, pFilename, "rb"); - if (!pFile) - return nullptr; + FILE* pFile = nullptr; + fopen_s(&pFile, pFilename, "rb"); + if (!pFile) + return nullptr; - fseek(pFile, 0, SEEK_END); - size = ftell(pFile); - fseek(pFile, 0, SEEK_SET); + fseek(pFile, 0, SEEK_END); + size = ftell(pFile); + fseek(pFile, 0, SEEK_SET); - crn_uint8* pSrc_file_data = static_cast(malloc(std::max(1U, size))); - if ((!pSrc_file_data) || (fread(pSrc_file_data, size, 1, pFile) != 1)) { - fclose(pFile); - free(pSrc_file_data); - size = 0; - return nullptr; - } + crn_uint8* pSrc_file_data = static_cast(malloc(std::max(1U, size))); + if ((!pSrc_file_data) || (fread(pSrc_file_data, size, 1, pFile) != 1)) + { + fclose(pFile); + free(pSrc_file_data); + size = 0; + return nullptr; + } - fclose(pFile); - return pSrc_file_data; + fclose(pFile); + return pSrc_file_data; } -int main(int argc, char* argv[]) { - printf("example2 - Version v%u.%02u Built " __DATE__ ", " __TIME__ "\n", CRNLIB_VERSION / 100, CRNLIB_VERSION % 100); - - if (argc < 2) - return print_usage(); - - // Parse command line options - const char* pSrc_filename = argv[1]; - char out_filename[FILENAME_MAX] = {'\0'}; - - for (int i = 2; i < argc; i++) { - if (argv[i][0] == '/') - argv[i][0] = '-'; - - if (!_stricmp(argv[i], "-out")) { - if (++i >= argc) - return error("Expected output filename!"); - - strcpy_s(out_filename, sizeof(out_filename), argv[i]); - } else - return error("Invalid option: %s\n", argv[i]); - } - - // Split the source filename into its various components. - char drive_buf[_MAX_DRIVE], dir_buf[_MAX_DIR], fname_buf[_MAX_FNAME], ext_buf[_MAX_EXT]; - if (_splitpath_s(pSrc_filename, drive_buf, _MAX_DRIVE, dir_buf, _MAX_DIR, fname_buf, _MAX_FNAME, ext_buf, _MAX_EXT)) - return error("Invalid source filename!\n"); - - // Load the source file into memory. - printf("Loading source file: %s\n", pSrc_filename); - crn_uint32 src_file_size; - crn_uint8* pSrc_file_data = read_file_into_buffer(pSrc_filename, src_file_size); - if (!pSrc_file_data) - return error("Unable to read source file\n"); - - // Decompress/transcode CRN to DDS. - // DDS files are organized in face-major order, like this: - // Face0: Mip0, Mip1, Mip2, etc. - // Face1: Mip0, Mip1, Mip2, etc. - // etc. - // While CRN files are organized in mip-major order, like this: - // Mip0: Face0, Face1, Face2, Face3, Face4, Face5 - // Mip1: Face0, Face1, Face2, Face3, Face4, Face5 - // etc. - printf("Transcoding CRN to DDS\n"); - - crnd::crn_texture_info tex_info; - if (!crnd::crnd_get_texture_info(pSrc_file_data, src_file_size, &tex_info)) { - free(pSrc_file_data); - return error("crnd_get_texture_info() failed!\n"); - } +int main(int argc, char* argv[]) +{ + printf("example2 - Version v%u.%02u Built " __DATE__ ", " __TIME__ "\n", CRNLIB_VERSION / 100, CRNLIB_VERSION % 100); - timer tm; + if (argc < 2) + return print_usage(); - tm.start(); - crnd::crnd_unpack_context pContext = crnd::crnd_unpack_begin(pSrc_file_data, src_file_size); - double total_unpack_begin_time = tm.get_elapsed_ms(); + // Parse command line options + const char* pSrc_filename = argv[1]; + char out_filename[FILENAME_MAX] = { '\0' }; - if (!pContext) { - free(pSrc_file_data); - return error("crnd_unpack_begin() failed!\n"); - } + for (int i = 2; i < argc; i++) + { + if (argv[i][0] == '/') + argv[i][0] = '-'; - // Now create the DDS file. - char dst_filename[FILENAME_MAX]; - sprintf_s(dst_filename, sizeof(dst_filename), "%s%s%s.dds", drive_buf, dir_buf, fname_buf); - if (out_filename[0]) - strcpy(dst_filename, out_filename); + if (!_stricmp(argv[i], "-out")) + { + if (++i >= argc) + return error("Expected output filename!"); - printf("Writing DDS file: %s\n", dst_filename); + strcpy_s(out_filename, sizeof(out_filename), argv[i]); + } + else + return error("Invalid option: %s\n", argv[i]); + } - FILE* pDDS_file = fopen(dst_filename, "wb"); - if (!pDDS_file) { - crnd::crnd_unpack_end(pContext); - free(pSrc_file_data); - return error("Failed creating destination file!\n"); - } - - // Write the 4-byte DDS signature (not endian safe, but whatever this is a sample). - fwrite(&crnlib::cDDSFileSignature, sizeof(crnlib::cDDSFileSignature), 1, pDDS_file); - - // Prepare the DDS header. - crnlib::DDSURFACEDESC2 dds_desc; - memset(&dds_desc, 0, sizeof(dds_desc)); - dds_desc.dwSize = sizeof(dds_desc); - dds_desc.dwFlags = DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT | ((tex_info.m_levels > 1) ? DDSD_MIPMAPCOUNT : 0); - dds_desc.dwWidth = tex_info.m_width; - dds_desc.dwHeight = tex_info.m_height; - dds_desc.dwMipMapCount = (tex_info.m_levels > 1) ? tex_info.m_levels : 0; - - dds_desc.ddpfPixelFormat.dwSize = sizeof(crnlib::DDPIXELFORMAT); - dds_desc.ddpfPixelFormat.dwFlags = DDPF_FOURCC; - crn_format fundamental_fmt = crnd::crnd_get_fundamental_dxt_format(tex_info.m_format); - dds_desc.ddpfPixelFormat.dwFourCC = crnd::crnd_crn_format_to_fourcc(fundamental_fmt); - if (fundamental_fmt != tex_info.m_format) { - // It's a funky swizzled DXTn format - write its FOURCC to dwRGBBitCount. - dds_desc.ddpfPixelFormat.dwRGBBitCount = crnd::crnd_crn_format_to_fourcc(tex_info.m_format); - } - - dds_desc.ddsCaps.dwCaps = DDSCAPS_TEXTURE; - if (tex_info.m_levels > 1) { - dds_desc.ddsCaps.dwCaps |= (DDSCAPS_COMPLEX | DDSCAPS_MIPMAP); - } - - if (tex_info.m_faces == 6) { - dds_desc.ddsCaps.dwCaps2 = DDSCAPS2_CUBEMAP | - DDSCAPS2_CUBEMAP_POSITIVEX | DDSCAPS2_CUBEMAP_NEGATIVEX | DDSCAPS2_CUBEMAP_POSITIVEY | - DDSCAPS2_CUBEMAP_NEGATIVEY | DDSCAPS2_CUBEMAP_POSITIVEZ | DDSCAPS2_CUBEMAP_NEGATIVEZ; - } - - // Set pitch/linearsize field (some DDS readers require this field to be non-zero). - int bits_per_pixel = crnd::crnd_get_crn_format_bits_per_texel(tex_info.m_format); - dds_desc.lPitch = (((dds_desc.dwWidth + 3) & ~3) * ((dds_desc.dwHeight + 3) & ~3) * bits_per_pixel) >> 3; - dds_desc.dwFlags |= DDSD_LINEARSIZE; - - // Write the DDS header to the output file. - fwrite(&dds_desc, sizeof(dds_desc), 1, pDDS_file); - - // Now transcode all face and mipmap levels into memory, one mip level at a time. - void* pImages[cCRNMaxFaces][cCRNMaxLevels]; - crn_uint32 image_size_in_bytes[cCRNMaxLevels]; - memset(pImages, 0, sizeof(pImages)); - memset(image_size_in_bytes, 0, sizeof(image_size_in_bytes)); - - crn_uint32 total_unpacked_texels = 0; - - double total_unpack_time = 0.0f; - for (crn_uint32 level_index = 0; level_index < tex_info.m_levels; level_index++) { - // Compute the face's width, height, number of DXT blocks per row/col, etc. - const crn_uint32 width = std::max(1U, tex_info.m_width >> level_index); - const crn_uint32 height = std::max(1U, tex_info.m_height >> level_index); - const crn_uint32 blocks_x = std::max(1U, (width + 3) >> 2); - const crn_uint32 blocks_y = std::max(1U, (height + 3) >> 2); - const crn_uint32 row_pitch = blocks_x * crnd::crnd_get_bytes_per_dxt_block(tex_info.m_format); - const crn_uint32 total_face_size = row_pitch * blocks_y; - - image_size_in_bytes[level_index] = total_face_size; - - for (crn_uint32 face_index = 0; face_index < tex_info.m_faces; face_index++) { - void* p = malloc(total_face_size); - if (!p) { - for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) - for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) - free(pImages[f][l]); - crnd::crnd_unpack_end(pContext); + // Split the source filename into its various components. + char drive_buf[_MAX_DRIVE], dir_buf[_MAX_DIR], fname_buf[_MAX_FNAME], ext_buf[_MAX_EXT]; + if (_splitpath_s(pSrc_filename, drive_buf, _MAX_DRIVE, dir_buf, _MAX_DIR, fname_buf, _MAX_FNAME, ext_buf, _MAX_EXT)) + return error("Invalid source filename!\n"); + + // Load the source file into memory. + printf("Loading source file: %s\n", pSrc_filename); + crn_uint32 src_file_size; + crn_uint8* pSrc_file_data = read_file_into_buffer(pSrc_filename, src_file_size); + if (!pSrc_file_data) + return error("Unable to read source file\n"); + + // Decompress/transcode CRN to DDS. + // DDS files are organized in face-major order, like this: + // Face0: Mip0, Mip1, Mip2, etc. + // Face1: Mip0, Mip1, Mip2, etc. + // etc. + // While CRN files are organized in mip-major order, like this: + // Mip0: Face0, Face1, Face2, Face3, Face4, Face5 + // Mip1: Face0, Face1, Face2, Face3, Face4, Face5 + // etc. + printf("Transcoding CRN to DDS\n"); + + crnd::crn_texture_info tex_info; + if (!crnd::crnd_get_texture_info(pSrc_file_data, src_file_size, &tex_info)) + { free(pSrc_file_data); - return error("Out of memory!"); - } - - pImages[face_index][level_index] = p; + return error("crnd_get_texture_info() failed!\n"); } - // Prepare the face pointer array needed by crnd_unpack_level(). - void* pDecomp_images[cCRNMaxFaces]; - for (crn_uint32 face_index = 0; face_index < tex_info.m_faces; face_index++) - pDecomp_images[face_index] = pImages[face_index][level_index]; + timer tm; - // Now transcode the level to raw DXTn tm.start(); - if (!crnd::crnd_unpack_level(pContext, pDecomp_images, total_face_size, row_pitch, level_index)) { - for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) - for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) - free(pImages[f][l]); + crnd::crnd_unpack_context pContext = crnd::crnd_unpack_begin(pSrc_file_data, src_file_size); + double total_unpack_begin_time = tm.get_elapsed_ms(); + + if (!pContext) + { + free(pSrc_file_data); + return error("crnd_unpack_begin() failed!\n"); + } - crnd::crnd_unpack_end(pContext); - free(pSrc_file_data); + // Now create the DDS file. + char dst_filename[FILENAME_MAX]; + sprintf_s(dst_filename, sizeof(dst_filename), "%s%s%s.dds", drive_buf, dir_buf, fname_buf); + if (out_filename[0]) + strcpy(dst_filename, out_filename); - return error("Failed transcoding texture!"); + printf("Writing DDS file: %s\n", dst_filename); + + FILE* pDDS_file = fopen(dst_filename, "wb"); + if (!pDDS_file) + { + crnd::crnd_unpack_end(pContext); + free(pSrc_file_data); + return error("Failed creating destination file!\n"); + } + + // Write the 4-byte DDS signature (not endian safe, but whatever this is a sample). + fwrite(&crnlib::cDDSFileSignature, sizeof(crnlib::cDDSFileSignature), 1, pDDS_file); + + // Prepare the DDS header. + crnlib::DDSURFACEDESC2 dds_desc; + memset(&dds_desc, 0, sizeof(dds_desc)); + dds_desc.dwSize = sizeof(dds_desc); + dds_desc.dwFlags = DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT | ((tex_info.m_levels > 1) ? DDSD_MIPMAPCOUNT : 0); + dds_desc.dwWidth = tex_info.m_width; + dds_desc.dwHeight = tex_info.m_height; + dds_desc.dwMipMapCount = (tex_info.m_levels > 1) ? tex_info.m_levels : 0; + + dds_desc.ddpfPixelFormat.dwSize = sizeof(crnlib::DDPIXELFORMAT); + dds_desc.ddpfPixelFormat.dwFlags = DDPF_FOURCC; + crn_format fundamental_fmt = crnd::crnd_get_fundamental_dxt_format(tex_info.m_format); + dds_desc.ddpfPixelFormat.dwFourCC = crnd::crnd_crn_format_to_fourcc(fundamental_fmt); + if (fundamental_fmt != tex_info.m_format) + { + // It's a funky swizzled DXTn format - write its FOURCC to dwRGBBitCount. + dds_desc.ddpfPixelFormat.dwRGBBitCount = crnd::crnd_crn_format_to_fourcc(tex_info.m_format); } - total_unpack_time += tm.get_elapsed_ms(); - total_unpacked_texels += (blocks_x * blocks_y * 16); - } + dds_desc.ddsCaps.dwCaps = DDSCAPS_TEXTURE; + if (tex_info.m_levels > 1) + { + dds_desc.ddsCaps.dwCaps |= (DDSCAPS_COMPLEX | DDSCAPS_MIPMAP); + } + + if (tex_info.m_faces == 6) + { + dds_desc.ddsCaps.dwCaps2 = DDSCAPS2_CUBEMAP | + DDSCAPS2_CUBEMAP_POSITIVEX | DDSCAPS2_CUBEMAP_NEGATIVEX | DDSCAPS2_CUBEMAP_POSITIVEY | + DDSCAPS2_CUBEMAP_NEGATIVEY | DDSCAPS2_CUBEMAP_POSITIVEZ | DDSCAPS2_CUBEMAP_NEGATIVEZ; + } + + // Set pitch/linearsize field (some DDS readers require this field to be non-zero). + int bits_per_pixel = crnd::crnd_get_crn_format_bits_per_texel(tex_info.m_format); + dds_desc.lPitch = (((dds_desc.dwWidth + 3) & ~3) * ((dds_desc.dwHeight + 3) & ~3) * bits_per_pixel) >> 3; + dds_desc.dwFlags |= DDSD_LINEARSIZE; + + // Write the DDS header to the output file. + fwrite(&dds_desc, sizeof(dds_desc), 1, pDDS_file); - printf("crnd_unpack_begin time: %3.3fms\n", total_unpack_begin_time); - printf("Total crnd_unpack_level time: %3.3fms\n", total_unpack_time); - double total_time = total_unpack_begin_time + total_unpack_time; - printf("Total transcode time: %3.3fms\n", total_time); - printf("Total texels transcoded: %u\n", total_unpacked_texels); - printf("Overall transcode throughput: %3.3f million texels/sec\n", (total_unpacked_texels / (total_time / 1000.0f)) / 1000000.0f); + // Now transcode all face and mipmap levels into memory, one mip level at a time. + void* pImages[cCRNMaxFaces][cCRNMaxLevels]; + crn_uint32 image_size_in_bytes[cCRNMaxLevels]; + memset(pImages, 0, sizeof(pImages)); + memset(image_size_in_bytes, 0, sizeof(image_size_in_bytes)); - // Now write the DXTn data to the DDS file in face-major order. - for (crn_uint32 face_index = 0; face_index < tex_info.m_faces; face_index++) + crn_uint32 total_unpacked_texels = 0; + + double total_unpack_time = 0.0f; for (crn_uint32 level_index = 0; level_index < tex_info.m_levels; level_index++) - fwrite(pImages[face_index][level_index], image_size_in_bytes[level_index], 1, pDDS_file); + { + // Compute the face's width, height, number of DXT blocks per row/col, etc. + const crn_uint32 width = std::max(1U, tex_info.m_width >> level_index); + const crn_uint32 height = std::max(1U, tex_info.m_height >> level_index); + const crn_uint32 blocks_x = std::max(1U, (width + 3) >> 2); + const crn_uint32 blocks_y = std::max(1U, (height + 3) >> 2); + const crn_uint32 row_pitch = blocks_x * crnd::crnd_get_bytes_per_dxt_block(tex_info.m_format); + const crn_uint32 total_face_size = row_pitch * blocks_y; + + image_size_in_bytes[level_index] = total_face_size; + + for (crn_uint32 face_index = 0; face_index < tex_info.m_faces; face_index++) + { + void* p = malloc(total_face_size); + if (!p) + { + for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) + for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) + free(pImages[f][l]); + crnd::crnd_unpack_end(pContext); + free(pSrc_file_data); + return error("Out of memory!"); + } + + pImages[face_index][level_index] = p; + } + + // Prepare the face pointer array needed by crnd_unpack_level(). + void* pDecomp_images[cCRNMaxFaces]; + for (crn_uint32 face_index = 0; face_index < tex_info.m_faces; face_index++) + pDecomp_images[face_index] = pImages[face_index][level_index]; + + // Now transcode the level to raw DXTn + tm.start(); + if (!crnd::crnd_unpack_level(pContext, pDecomp_images, total_face_size, row_pitch, level_index)) + { + for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) + for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) + free(pImages[f][l]); + + crnd::crnd_unpack_end(pContext); + free(pSrc_file_data); + + return error("Failed transcoding texture!"); + } + + total_unpack_time += tm.get_elapsed_ms(); + total_unpacked_texels += (blocks_x * blocks_y * 16); + } + + printf("crnd_unpack_begin time: %3.3fms\n", total_unpack_begin_time); + printf("Total crnd_unpack_level time: %3.3fms\n", total_unpack_time); + double total_time = total_unpack_begin_time + total_unpack_time; + printf("Total transcode time: %3.3fms\n", total_time); + printf("Total texels transcoded: %u\n", total_unpacked_texels); + printf("Overall transcode throughput: %3.3f million texels/sec\n", (total_unpacked_texels / (total_time / 1000.0f)) / 1000000.0f); + + // Now write the DXTn data to the DDS file in face-major order. + for (crn_uint32 face_index = 0; face_index < tex_info.m_faces; face_index++) + for (crn_uint32 level_index = 0; level_index < tex_info.m_levels; level_index++) + fwrite(pImages[face_index][level_index], image_size_in_bytes[level_index], 1, pDDS_file); - for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) - for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) - free(pImages[f][l]); + for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) + for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) + free(pImages[f][l]); - crnd::crnd_unpack_end(pContext); - free(pSrc_file_data); + crnd::crnd_unpack_end(pContext); + free(pSrc_file_data); - if (fclose(pDDS_file) == EOF) { - return error("Failed writing to DDS file!\n"); - } + if (fclose(pDDS_file) == EOF) + { + return error("Failed writing to DDS file!\n"); + } - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/examples/example2/timer.cpp b/examples/example2/timer.cpp index 81d187c..c38d341 100644 --- a/examples/example2/timer.cpp +++ b/examples/example2/timer.cpp @@ -1,5 +1,28 @@ -// File: timer.cpp +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + // A simple high-precision, platform independent timer class. + #include #include #include @@ -18,119 +41,155 @@ unsigned long long timer::g_freq; double timer::g_inv_freq; #if defined(WIN32) || defined(_XBOX) -inline void query_counter(timer_ticks* pTicks) { - QueryPerformanceCounter(reinterpret_cast(pTicks)); +inline void query_counter(timer_ticks* pTicks) +{ + QueryPerformanceCounter(reinterpret_cast(pTicks)); } -inline void query_counter_frequency(timer_ticks* pTicks) { - QueryPerformanceFrequency(reinterpret_cast(pTicks)); + +inline void query_counter_frequency(timer_ticks* pTicks) +{ + QueryPerformanceFrequency(reinterpret_cast(pTicks)); } #elif defined(__GNUC__) #include -inline void query_counter(timer_ticks* pTicks) { - struct timeval cur_time; - gettimeofday(&cur_time, nullptr); - *pTicks = static_cast(cur_time.tv_sec) * 1000000ULL + static_cast(cur_time.tv_usec); +inline void query_counter(timer_ticks* pTicks) +{ + struct timeval cur_time; + gettimeofday(&cur_time, nullptr); + *pTicks = static_cast(cur_time.tv_sec) * 1000000ULL + static_cast(cur_time.tv_usec); } -inline void query_counter_frequency(timer_ticks* pTicks) { - *pTicks = 1000000; + +inline void query_counter_frequency(timer_ticks* pTicks) +{ + *pTicks = 1000000; } #endif -timer::timer() - : m_start_time(0), - m_stop_time(0), - m_started(false), - m_stopped(false) { - if (!g_inv_freq) - init(); +timer::timer() : + m_start_time(0), + m_stop_time(0), + m_started(false), + m_stopped(false) +{ + if (!g_inv_freq) + { + init(); + } } -timer::timer(timer_ticks start_ticks) { - if (!g_inv_freq) - init(); +timer::timer(timer_ticks start_ticks) +{ + if (!g_inv_freq) + { + init(); + } - m_start_time = start_ticks; + m_start_time = start_ticks; - m_started = true; - m_stopped = false; + m_started = true; + m_stopped = false; } -void timer::start(timer_ticks start_ticks) { - m_start_time = start_ticks; +void timer::start(timer_ticks start_ticks) +{ + m_start_time = start_ticks; - m_started = true; - m_stopped = false; + m_started = true; + m_stopped = false; } -void timer::start() { - query_counter(&m_start_time); +void timer::start() +{ + query_counter(&m_start_time); - m_started = true; - m_stopped = false; + m_started = true; + m_stopped = false; } -void timer::stop() { - assert(m_started); +void timer::stop() +{ + assert(m_started); - query_counter(&m_stop_time); + query_counter(&m_stop_time); - m_stopped = true; + m_stopped = true; } -double timer::get_elapsed_secs() const { - assert(m_started); - if (!m_started) - return 0; - - timer_ticks stop_time = m_stop_time; - if (!m_stopped) - query_counter(&stop_time); - - timer_ticks delta = stop_time - m_start_time; - return delta * g_inv_freq; +double timer::get_elapsed_secs() const +{ + assert(m_started); + if (!m_started) + { + return 0; + } + + timer_ticks stop_time = m_stop_time; + if (!m_stopped) + { + query_counter(&stop_time); + } + + timer_ticks delta = stop_time - m_start_time; + return delta * g_inv_freq; } -timer_ticks timer::get_elapsed_us() const { - assert(m_started); - if (!m_started) - return 0; - - timer_ticks stop_time = m_stop_time; - if (!m_stopped) - query_counter(&stop_time); - - timer_ticks delta = stop_time - m_start_time; - return (delta * 1000000ULL + (g_freq >> 1U)) / g_freq; +timer_ticks timer::get_elapsed_us() const +{ + assert(m_started); + if (!m_started) + { + return 0; + } + + timer_ticks stop_time = m_stop_time; + if (!m_stopped) + { + query_counter(&stop_time); + } + + timer_ticks delta = stop_time - m_start_time; + return (delta * 1000000ULL + (g_freq >> 1U)) / g_freq; } -void timer::init() { - if (!g_inv_freq) { - query_counter_frequency(&g_freq); - g_inv_freq = 1.0f / g_freq; +void timer::init() +{ + if (!g_inv_freq) + { + query_counter_frequency(&g_freq); + g_inv_freq = 1.0f / g_freq; - query_counter(&g_init_ticks); - } + query_counter(&g_init_ticks); + } } -timer_ticks timer::get_init_ticks() { - if (!g_inv_freq) - init(); - - return g_init_ticks; +timer_ticks timer::get_init_ticks() +{ + if (!g_inv_freq) + { + init(); + } + + return g_init_ticks; } -timer_ticks timer::get_ticks() { - if (!g_inv_freq) - init(); +timer_ticks timer::get_ticks() +{ + if (!g_inv_freq) + { + init(); + } - timer_ticks ticks; - query_counter(&ticks); - return ticks - g_init_ticks; + timer_ticks ticks; + query_counter(&ticks); + return ticks - g_init_ticks; } -double timer::ticks_to_secs(timer_ticks ticks) { - if (!g_inv_freq) - init(); +double timer::ticks_to_secs(timer_ticks ticks) +{ + if (!g_inv_freq) + { + init(); + } - return ticks * g_inv_freq; + return ticks * g_inv_freq; } diff --git a/examples/example2/timer.h b/examples/example2/timer.h index 3b30f42..bf2f0c8 100644 --- a/examples/example2/timer.h +++ b/examples/example2/timer.h @@ -1,40 +1,86 @@ -// File: timer.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + // A simple high-precision, platform independent timer class. + #pragma once typedef unsigned long long timer_ticks; -class timer { - public: - timer(); - timer(timer_ticks start_ticks); +class timer +{ +public: + timer(); + timer(timer_ticks start_ticks); + + void start(); + void start(timer_ticks start_ticks); + + void stop(); + + double get_elapsed_secs() const; + + inline double get_elapsed_ms() const + { + return get_elapsed_secs() * 1000.0f; + } + + timer_ticks get_elapsed_us() const; + + static void init(); + + static inline timer_ticks get_ticks_per_sec() + { + return g_freq; + } - void start(); - void start(timer_ticks start_ticks); + static timer_ticks get_init_ticks(); + static timer_ticks get_ticks(); + static double ticks_to_secs(timer_ticks ticks); - void stop(); + static inline double ticks_to_ms(timer_ticks ticks) + { + return ticks_to_secs(ticks) * 1000.0f; + } - double get_elapsed_secs() const; - inline double get_elapsed_ms() const { return get_elapsed_secs() * 1000.0f; } - timer_ticks get_elapsed_us() const; + static inline double get_secs() + { + return ticks_to_secs(get_ticks()); + } - static void init(); - static inline timer_ticks get_ticks_per_sec() { return g_freq; } - static timer_ticks get_init_ticks(); - static timer_ticks get_ticks(); - static double ticks_to_secs(timer_ticks ticks); - static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; } - static inline double get_secs() { return ticks_to_secs(get_ticks()); } - static inline double get_ms() { return ticks_to_ms(get_ticks()); } + static inline double get_ms() + { + return ticks_to_ms(get_ticks()); + } - private: - static timer_ticks g_init_ticks; - static timer_ticks g_freq; - static double g_inv_freq; +private: + static timer_ticks g_init_ticks; + static timer_ticks g_freq; + static double g_inv_freq; - timer_ticks m_start_time; - timer_ticks m_stop_time; + timer_ticks m_start_time; + timer_ticks m_stop_time; - bool m_started : 1; - bool m_stopped : 1; + bool m_started : 1; + bool m_stopped : 1; }; diff --git a/examples/example3/example3.cpp b/examples/example3/example3.cpp index 5e0fe60..59912f3 100644 --- a/examples/example3/example3.cpp +++ b/examples/example3/example3.cpp @@ -1,9 +1,31 @@ -// File: example3.cpp - Demonstrates how to use crnlib's simple block compression +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +// This example demonstrates how to use crnlib's simple block compression // API's to manually pack images to DXTn compressed .DDS files. This example isn't multithreaded // so it's not going to be fast. // Also note that this sample only demonstrates traditional/vanilla 4x4 DXTn block compression (not CRN). -// See Copyright Notice and license at the end of inc/crnlib.h #include #include #include @@ -25,6 +47,7 @@ #define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" + #define STB_IMAGE_WRITE_IMPLEMENTATION #include "stb_image_write.h" @@ -32,240 +55,266 @@ using namespace crnlib; const unsigned int cDXTBlockSize = 4; -static int print_usage() { - printf("Description: Simple .DDS DXTn block compression using crnlib.\n"); - printf("Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC\n"); - printf("Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet\n"); - printf("Usage: example3 [source_file] [options]\n"); - printf("\n"); - printf("Note: This simple example is not multithreaded, so it's not going to be\n"); - printf("particularly fast.\n"); - printf("\n"); - printf("Supported source image formats:\n"); - printf("Baseline JPEG, PNG, BMP, TGA, PSD, and HDR\n"); - printf("\nOptions:\n"); - printf("-out filename - Force output filename (always use .DDS extension).\n"); - printf("-nonsrgb - Input is not sRGB: disables gamma filtering, perceptual metrics.\n"); - printf("-pixelformat X - Output DXTn format. Supported formats:\n"); - printf("DXT1, DXT3, DXT5, DXN_XY (ATI 3DC), DXN_YX (ATI 3DC), DXT5A (ATN1N)\n"); - printf("If no output pixel format is specified, this example uses either DXT1 or DXT5.\n"); - printf("-dxtquality X - DXTn quality: superfast, fast, normal, better, uber (default)\n"); - printf("-setalphatoluma - Set alpha channel to luma before compression.\n"); - printf("-converttoluma - Set RGB to luma before compression.\n"); - return EXIT_FAILURE; +static int print_usage() +{ + printf("Description: Simple .DDS DXTn block compression using crnlib.\n"); + printf("Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC\n"); + printf("Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet\n"); + printf("Usage: example3 [source_file] [options]\n"); + printf("\n"); + printf("Note: This simple example is not multithreaded, so it's not going to be\n"); + printf("particularly fast.\n"); + printf("\n"); + printf("Supported source image formats:\n"); + printf("Baseline JPEG, PNG, BMP, TGA, PSD, and HDR\n"); + printf("\nOptions:\n"); + printf("-out filename - Force output filename (always use .DDS extension).\n"); + printf("-nonsrgb - Input is not sRGB: disables gamma filtering, perceptual metrics.\n"); + printf("-pixelformat X - Output DXTn format. Supported formats:\n"); + printf("DXT1, DXT3, DXT5, DXN_XY (ATI 3DC), DXN_YX (ATI 3DC), DXT5A (ATN1N)\n"); + printf("If no output pixel format is specified, this example uses either DXT1 or DXT5.\n"); + printf("-dxtquality X - DXTn quality: superfast, fast, normal, better, uber (default)\n"); + printf("-setalphatoluma - Set alpha channel to luma before compression.\n"); + printf("-converttoluma - Set RGB to luma before compression.\n"); + return EXIT_FAILURE; } -static int error(const char* pMsg, ...) { - va_list args; - va_start(args, pMsg); - char buf[512]; - vsprintf_s(buf, sizeof(buf), pMsg, args); - va_end(args); - printf("%s", buf); - return EXIT_FAILURE; +static int error(const char* pMsg, ...) +{ + va_list args; + va_start(args, pMsg); + char buf[512]; + vsprintf_s(buf, sizeof(buf), pMsg, args); + va_end(args); + printf("%s", buf); + return EXIT_FAILURE; } -int main(int argc, char* argv[]) { - printf("example3 - Version v%u.%02u Built " __DATE__ ", " __TIME__ "\n", CRNLIB_VERSION / 100, CRNLIB_VERSION % 100); - - if (argc < 2) - return print_usage(); - - // Parse command line options - const char* pSrc_filename = argv[1]; - char out_filename[FILENAME_MAX] = {'\0'}; - crn_format fmt = cCRNFmtInvalid; - bool srgb_colorspace = true; - crn_dxt_quality dxt_quality = cCRNDXTQualityUber; // best quality, but slowest - bool set_alpha_to_luma = false; - bool convert_to_luma = false; - - for (int i = 2; i < argc; i++) { - if (argv[i][0] == '/') - argv[i][0] = '-'; - - if (!_stricmp(argv[i], "-out")) { - if (++i >= argc) - return error("Expected output filename!"); - - strcpy_s(out_filename, sizeof(out_filename), argv[i]); - } else if (!_stricmp(argv[i], "-nonsrgb")) - srgb_colorspace = false; - else if (!_stricmp(argv[i], "-pixelformat")) { - if (++i >= argc) - return error("Expected pixel format!"); - - unsigned int f; - for (f = 0; f < cCRNFmtTotal; f++) { - crn_format actual_fmt = crn_get_fundamental_dxt_format(static_cast(f)); - if (!_stricmp(argv[i], crn_get_format_string(actual_fmt))) { - fmt = actual_fmt; - break; +int main(int argc, char* argv[]) +{ + printf("example3 - Version v%u.%02u Built " __DATE__ ", " __TIME__ "\n", CRNLIB_VERSION / 100, CRNLIB_VERSION % 100); + + if (argc < 2) + return print_usage(); + + // Parse command line options + const char* pSrc_filename = argv[1]; + char out_filename[FILENAME_MAX] = { '\0' }; + crn_format fmt = cCRNFmtInvalid; + bool srgb_colorspace = true; + crn_dxt_quality dxt_quality = cCRNDXTQualityUber; // best quality, but slowest + bool set_alpha_to_luma = false; + bool convert_to_luma = false; + + for (int i = 2; i < argc; i++) + { + if (argv[i][0] == '/') + argv[i][0] = '-'; + + if (!_stricmp(argv[i], "-out")) + { + if (++i >= argc) + return error("Expected output filename!"); + + strcpy_s(out_filename, sizeof(out_filename), argv[i]); + } + else if (!_stricmp(argv[i], "-nonsrgb")) + srgb_colorspace = false; + else if (!_stricmp(argv[i], "-pixelformat")) + { + if (++i >= argc) + return error("Expected pixel format!"); + + unsigned int f; + for (f = 0; f < cCRNFmtTotal; f++) + { + crn_format actual_fmt = crn_get_fundamental_dxt_format(static_cast(f)); + if (!_stricmp(argv[i], crn_get_format_string(actual_fmt))) + { + fmt = actual_fmt; + break; + } + } + if (f == cCRNFmtTotal) + return error("Unrecognized pixel format: %s\n", argv[i]); } - } - if (f == cCRNFmtTotal) - return error("Unrecognized pixel format: %s\n", argv[i]); - } else if (!_stricmp(argv[i], "-dxtquality")) { - if (++i >= argc) - return error("Expected DXTn quality!\n"); - - unsigned int q; - for (q = 0; q < cCRNDXTQualityTotal; q++) { - if (!_stricmp(argv[i], crn_get_dxt_quality_string(static_cast(q)))) { - dxt_quality = static_cast(q); - break; + else if (!_stricmp(argv[i], "-dxtquality")) + { + if (++i >= argc) + return error("Expected DXTn quality!\n"); + + unsigned int q; + for (q = 0; q < cCRNDXTQualityTotal; q++) + { + if (!_stricmp(argv[i], crn_get_dxt_quality_string(static_cast(q)))) + { + dxt_quality = static_cast(q); + break; + } + } + if (q == cCRNDXTQualityTotal) + return error("Unrecognized DXTn quality: %s\n", argv[i]); } - } - if (q == cCRNDXTQualityTotal) - return error("Unrecognized DXTn quality: %s\n", argv[i]); - } else if (!_stricmp(argv[i], "-setalphatoluma")) - set_alpha_to_luma = true; - else if (!_stricmp(argv[i], "-converttoluma")) - convert_to_luma = true; - else - return error("Invalid option: %s\n", argv[i]); - } - - // Split the source filename into its various components. - char drive_buf[_MAX_DRIVE], dir_buf[_MAX_DIR], fname_buf[_MAX_FNAME], ext_buf[_MAX_EXT]; - if (_splitpath_s(pSrc_filename, drive_buf, _MAX_DRIVE, dir_buf, _MAX_DIR, fname_buf, _MAX_FNAME, ext_buf, _MAX_EXT)) - return error("Invalid source filename!\n"); - - // Load the source image into memory. - printf("Loading source file: %s\n", pSrc_filename); - int width, height, actual_comps; - crn_uint32* pSrc_image = (crn_uint32*)stbi_load(pSrc_filename, &width, &height, &actual_comps, 4); - if (!pSrc_image) - return error("Unable to read source file\n"); - - if (fmt == cCRNFmtInvalid) { - // Format not specified - automatically choose the DXTn format. - fmt = (actual_comps > 3) ? cCRNFmtDXT5 : cCRNFmtDXT1; - } - - if ((fmt == cCRNFmtDXT5A) && (actual_comps <= 3)) - set_alpha_to_luma = true; - - if ((set_alpha_to_luma) || (convert_to_luma)) { - for (int i = 0; i < width * height; i++) { - crn_uint32 r = pSrc_image[i] & 0xFF, g = (pSrc_image[i] >> 8) & 0xFF, b = (pSrc_image[i] >> 16) & 0xFF; - // Compute CCIR 601 luma. - crn_uint32 y = (19595U * r + 38470U * g + 7471U * b + 32768) >> 16U; - crn_uint32 a = (pSrc_image[i] >> 24) & 0xFF; - if (set_alpha_to_luma) - a = y; - if (convert_to_luma) { - r = y; - g = y; - b = y; - } - pSrc_image[i] = r | (g << 8) | (b << 16) | (a << 24); + else if (!_stricmp(argv[i], "-setalphatoluma")) + set_alpha_to_luma = true; + else if (!_stricmp(argv[i], "-converttoluma")) + convert_to_luma = true; + else + return error("Invalid option: %s\n", argv[i]); + } + + // Split the source filename into its various components. + char drive_buf[_MAX_DRIVE], dir_buf[_MAX_DIR], fname_buf[_MAX_FNAME], ext_buf[_MAX_EXT]; + if (_splitpath_s(pSrc_filename, drive_buf, _MAX_DRIVE, dir_buf, _MAX_DIR, fname_buf, _MAX_FNAME, ext_buf, _MAX_EXT)) + return error("Invalid source filename!\n"); + + // Load the source image into memory. + printf("Loading source file: %s\n", pSrc_filename); + int width, height, actual_comps; + crn_uint32* pSrc_image = (crn_uint32*)stbi_load(pSrc_filename, &width, &height, &actual_comps, 4); + if (!pSrc_image) + return error("Unable to read source file\n"); + + if (fmt == cCRNFmtInvalid) + { + // Format not specified - automatically choose the DXTn format. + fmt = (actual_comps > 3) ? cCRNFmtDXT5 : cCRNFmtDXT1; } - } - printf("Source Dimensions: %ux%u, Actual Components: %u\n", width, height, actual_comps); + if ((fmt == cCRNFmtDXT5A) && (actual_comps <= 3)) + set_alpha_to_luma = true; + + if ((set_alpha_to_luma) || (convert_to_luma)) + { + for (int i = 0; i < width * height; i++) + { + crn_uint32 r = pSrc_image[i] & 0xFF, g = (pSrc_image[i] >> 8) & 0xFF, b = (pSrc_image[i] >> 16) & 0xFF; + // Compute CCIR 601 luma. + crn_uint32 y = (19595U * r + 38470U * g + 7471U * b + 32768) >> 16U; + crn_uint32 a = (pSrc_image[i] >> 24) & 0xFF; + if (set_alpha_to_luma) + a = y; + if (convert_to_luma) + { + r = y; + g = y; + b = y; + } + pSrc_image[i] = r | (g << 8) | (b << 16) | (a << 24); + } + } - const unsigned int num_blocks_x = (width + cDXTBlockSize - 1) / cDXTBlockSize; - const unsigned int num_blocks_y = (height + cDXTBlockSize - 1) / cDXTBlockSize; - const unsigned int bytes_per_block = crn_get_bytes_per_dxt_block(fmt); - const unsigned int total_compressed_size = num_blocks_x * num_blocks_y * bytes_per_block; + printf("Source Dimensions: %ux%u, Actual Components: %u\n", width, height, actual_comps); - printf("Block Dimensions: %ux%u, BytesPerBlock: %u, Total Compressed Size: %u\n", num_blocks_x, num_blocks_y, bytes_per_block, total_compressed_size); + const unsigned int num_blocks_x = (width + cDXTBlockSize - 1) / cDXTBlockSize; + const unsigned int num_blocks_y = (height + cDXTBlockSize - 1) / cDXTBlockSize; + const unsigned int bytes_per_block = crn_get_bytes_per_dxt_block(fmt); + const unsigned int total_compressed_size = num_blocks_x * num_blocks_y * bytes_per_block; - void* pCompressed_data = malloc(total_compressed_size); - if (!pCompressed_data) { - stbi_image_free(pSrc_image); - return error("Out of memory!"); - } - - crn_comp_params comp_params; - comp_params.m_format = fmt; - comp_params.m_dxt_quality = dxt_quality; - comp_params.set_flag(cCRNCompFlagPerceptual, srgb_colorspace); - comp_params.set_flag(cCRNCompFlagDXT1AForTransparency, actual_comps > 3); - - crn_block_compressor_context_t pContext = crn_create_block_compressor(comp_params); - - printf("Compressing to %s: ", crn_get_format_string(fmt)); - - int prev_percentage_complete = -1; - for (crn_uint32 block_y = 0; block_y < num_blocks_y; block_y++) { - for (crn_uint32 block_x = 0; block_x < num_blocks_x; block_x++) { - crn_uint32 pixels[cDXTBlockSize * cDXTBlockSize]; - - // Exact block from image, clamping at the sides of non-divisible by 4 images to avoid artifacts. - crn_uint32* pDst_pixels = pixels; - for (int y = 0; y < cDXTBlockSize; y++) { - const unsigned int actual_y = min(height - 1U, (block_y * cDXTBlockSize) + y); - for (int x = 0; x < cDXTBlockSize; x++) { - const unsigned int actual_x = min(width - 1U, (block_x * cDXTBlockSize) + x); - *pDst_pixels++ = pSrc_image[actual_x + actual_y * width]; - } - } + printf("Block Dimensions: %ux%u, BytesPerBlock: %u, Total Compressed Size: %u\n", num_blocks_x, num_blocks_y, bytes_per_block, total_compressed_size); - // Compress the DXTn block. - crn_compress_block(pContext, pixels, static_cast(pCompressed_data) + (block_x + block_y * num_blocks_x) * bytes_per_block); + void* pCompressed_data = malloc(total_compressed_size); + if (!pCompressed_data) + { + stbi_image_free(pSrc_image); + return error("Out of memory!"); } - int percentage_complete = ((block_y + 1) * 100 + (num_blocks_y / 2)) / num_blocks_y; - if (percentage_complete != prev_percentage_complete) { - printf("\b\b\b\b%3u%%", percentage_complete); - prev_percentage_complete = percentage_complete; + crn_comp_params comp_params; + comp_params.m_format = fmt; + comp_params.m_dxt_quality = dxt_quality; + comp_params.set_flag(cCRNCompFlagPerceptual, srgb_colorspace); + comp_params.set_flag(cCRNCompFlagDXT1AForTransparency, actual_comps > 3); + + crn_block_compressor_context_t pContext = crn_create_block_compressor(comp_params); + + printf("Compressing to %s: ", crn_get_format_string(fmt)); + + int prev_percentage_complete = -1; + for (crn_uint32 block_y = 0; block_y < num_blocks_y; block_y++) + { + for (crn_uint32 block_x = 0; block_x < num_blocks_x; block_x++) + { + crn_uint32 pixels[cDXTBlockSize * cDXTBlockSize]; + + // Exact block from image, clamping at the sides of non-divisible by 4 images to avoid artifacts. + crn_uint32* pDst_pixels = pixels; + for (int y = 0; y < cDXTBlockSize; y++) + { + const unsigned int actual_y = min(height - 1U, (block_y * cDXTBlockSize) + y); + for (int x = 0; x < cDXTBlockSize; x++) + { + const unsigned int actual_x = min(width - 1U, (block_x * cDXTBlockSize) + x); + *pDst_pixels++ = pSrc_image[actual_x + actual_y * width]; + } + } + + // Compress the DXTn block. + crn_compress_block(pContext, pixels, static_cast(pCompressed_data) + (block_x + block_y * num_blocks_x) * bytes_per_block); + } + + int percentage_complete = ((block_y + 1) * 100 + (num_blocks_y / 2)) / num_blocks_y; + if (percentage_complete != prev_percentage_complete) + { + printf("\b\b\b\b%3u%%", percentage_complete); + prev_percentage_complete = percentage_complete; + } } - } - printf("\n"); + printf("\n"); - // Free the block compressor. - crn_free_block_compressor(pContext); - pContext = nullptr; + // Free the block compressor. + crn_free_block_compressor(pContext); + pContext = nullptr; - // Now create the DDS file. - char dst_filename[FILENAME_MAX]; - sprintf_s(dst_filename, sizeof(dst_filename), "%s%s%s.dds", drive_buf, dir_buf, fname_buf); - if (out_filename[0]) - strcpy(dst_filename, out_filename); + // Now create the DDS file. + char dst_filename[FILENAME_MAX]; + sprintf_s(dst_filename, sizeof(dst_filename), "%s%s%s.dds", drive_buf, dir_buf, fname_buf); + if (out_filename[0]) + strcpy(dst_filename, out_filename); - printf("Writing DDS file: %s\n", dst_filename); + printf("Writing DDS file: %s\n", dst_filename); - FILE* pDDS_file = fopen(dst_filename, "wb"); - if (!pDDS_file) { - free(pCompressed_data); - return error("Failed creating destination file!\n"); - } + FILE* pDDS_file = fopen(dst_filename, "wb"); + if (!pDDS_file) + { + free(pCompressed_data); + return error("Failed creating destination file!\n"); + } - // Write the 4-byte DDS signature (not endian safe, but whatever this is a sample). - fwrite(&crnlib::cDDSFileSignature, sizeof(crnlib::cDDSFileSignature), 1, pDDS_file); + // Write the 4-byte DDS signature (not endian safe, but whatever this is a sample). + fwrite(&crnlib::cDDSFileSignature, sizeof(crnlib::cDDSFileSignature), 1, pDDS_file); - // Prepare the DDS header. - crnlib::DDSURFACEDESC2 dds_desc; - memset(&dds_desc, 0, sizeof(dds_desc)); - dds_desc.dwSize = sizeof(dds_desc); - dds_desc.dwFlags = DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT; - dds_desc.dwWidth = width; - dds_desc.dwHeight = height; + // Prepare the DDS header. + crnlib::DDSURFACEDESC2 dds_desc; + memset(&dds_desc, 0, sizeof(dds_desc)); + dds_desc.dwSize = sizeof(dds_desc); + dds_desc.dwFlags = DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT; + dds_desc.dwWidth = width; + dds_desc.dwHeight = height; - dds_desc.ddpfPixelFormat.dwSize = sizeof(crnlib::DDPIXELFORMAT); - dds_desc.ddpfPixelFormat.dwFlags = DDPF_FOURCC; - dds_desc.ddpfPixelFormat.dwFourCC = crn_get_format_fourcc(fmt); - dds_desc.ddsCaps.dwCaps = DDSCAPS_TEXTURE; + dds_desc.ddpfPixelFormat.dwSize = sizeof(crnlib::DDPIXELFORMAT); + dds_desc.ddpfPixelFormat.dwFlags = DDPF_FOURCC; + dds_desc.ddpfPixelFormat.dwFourCC = crn_get_format_fourcc(fmt); + dds_desc.ddsCaps.dwCaps = DDSCAPS_TEXTURE; - // Set pitch/linearsize field (some DDS readers require this field to be non-zero). - unsigned int bits_per_pixel = crn_get_format_bits_per_texel(fmt); - dds_desc.lPitch = (((dds_desc.dwWidth + 3) & ~3) * ((dds_desc.dwHeight + 3) & ~3) * bits_per_pixel) >> 3; - dds_desc.dwFlags |= DDSD_LINEARSIZE; + // Set pitch/linearsize field (some DDS readers require this field to be non-zero). + unsigned int bits_per_pixel = crn_get_format_bits_per_texel(fmt); + dds_desc.lPitch = (((dds_desc.dwWidth + 3) & ~3) * ((dds_desc.dwHeight + 3) & ~3) * bits_per_pixel) >> 3; + dds_desc.dwFlags |= DDSD_LINEARSIZE; - // Write the DDS header to the output file. - fwrite(&dds_desc, sizeof(dds_desc), 1, pDDS_file); + // Write the DDS header to the output file. + fwrite(&dds_desc, sizeof(dds_desc), 1, pDDS_file); - // Write the image's compressed data to the output file. - fwrite(pCompressed_data, total_compressed_size, 1, pDDS_file); - free(pCompressed_data); + // Write the image's compressed data to the output file. + fwrite(pCompressed_data, total_compressed_size, 1, pDDS_file); + free(pCompressed_data); - stbi_image_free(pSrc_image); + stbi_image_free(pSrc_image); - if (fclose(pDDS_file) == EOF) { - return error("Failed writing to DDS file!\n"); - } + if (fclose(pDDS_file) == EOF) + { + return error("Failed writing to DDS file!\n"); + } - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/inc/crn_decomp.h b/inc/crn_decomp.h index 1293029..6af702a 100644 --- a/inc/crn_decomp.h +++ b/inc/crn_decomp.h @@ -1,15 +1,34 @@ -// File: crn_decomp.h - Fast CRN->DXTc texture transcoder header file library -// Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC -// Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet -// See Copyright Notice and license at the end of this file. -// +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + // This single header file contains *all* of the code necessary to unpack .CRN files to raw DXTn bits. // It does NOT depend on the crn compression library. // // Note: This is a single file, stand-alone C++ library which is controlled by the use of the following macro: // If CRND_INCLUDE_CRND_H is NOT defined, the header is included. -// + // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing + #ifndef CRND_INCLUDE_CRND_H #define CRND_INCLUDE_CRND_H @@ -52,77 +71,111 @@ #endif // File: crnd_types.h -namespace crnd { -const crn_uint8 cUINT8_MIN = 0; -const crn_uint8 cUINT8_MAX = 0xFFU; -const uint16 cUINT16_MIN = 0; -const uint16 cUINT16_MAX = 0xFFFFU; -const uint32 cUINT32_MIN = 0; -const uint32 cUINT32_MAX = 0xFFFFFFFFU; - -const int8 cINT8_MIN = -128; -const int8 cINT8_MAX = 127; -const int16 cINT16_MIN = -32768; -const int16 cINT16_MAX = 32767; -const int32 cINT32_MIN = (-2147483647 - 1); -const int32 cINT32_MAX = 2147483647; - -enum eClear { cClear }; - -const uint32 cIntBits = 32U; - -template -struct int_traits { - enum { cMin = crnd::cINT32_MIN, - cMax = crnd::cINT32_MAX, - cSigned = true }; -}; - -template <> -struct int_traits { - enum { cMin = crnd::cINT8_MIN, - cMax = crnd::cINT8_MAX, - cSigned = true }; -}; -template <> -struct int_traits { - enum { cMin = crnd::cINT16_MIN, - cMax = crnd::cINT16_MAX, - cSigned = true }; -}; -template <> -struct int_traits { - enum { cMin = crnd::cINT32_MIN, - cMax = crnd::cINT32_MAX, - cSigned = true }; -}; - -template <> -struct int_traits { - enum { cMin = 0, - cMax = crnd::cUINT8_MAX, - cSigned = false }; -}; -template <> -struct int_traits { - enum { cMin = 0, - cMax = crnd::cUINT16_MAX, - cSigned = false }; -}; -template <> -struct int_traits { - enum { cMin = 0, - cMax = crnd::cUINT32_MAX, - cSigned = false }; -}; - -struct empty_type {}; - -} // namespace crnd +namespace crnd +{ + const crn_uint8 cUINT8_MIN = 0; + const crn_uint8 cUINT8_MAX = 0xFFU; + const uint16 cUINT16_MIN = 0; + const uint16 cUINT16_MAX = 0xFFFFU; + const uint32 cUINT32_MIN = 0; + const uint32 cUINT32_MAX = 0xFFFFFFFFU; + + const int8 cINT8_MIN = -128; + const int8 cINT8_MAX = 127; + const int16 cINT16_MIN = -32768; + const int16 cINT16_MAX = 32767; + const int32 cINT32_MIN = (-2147483647 - 1); + const int32 cINT32_MAX = 2147483647; + + enum eClear { cClear }; + + const uint32 cIntBits = 32U; + + template + struct int_traits + { + enum + { + cMin = crnd::cINT32_MIN, + cMax = crnd::cINT32_MAX, + cSigned = true + }; + }; -// File: crnd_platform.h -namespace crnd { + template<> + struct int_traits + { + enum + { + cMin = crnd::cINT8_MIN, + cMax = crnd::cINT8_MAX, + cSigned = true + }; + }; + + template<> + struct int_traits + { + enum + { + cMin = crnd::cINT16_MIN, + cMax = crnd::cINT16_MAX, + cSigned = true + }; + }; + + template<> + struct int_traits + { + enum + { + cMin = crnd::cINT32_MIN, + cMax = crnd::cINT32_MAX, + cSigned = true + }; + }; + template<> + struct int_traits + { + enum + { + cMin = 0, + cMax = crnd::cUINT8_MAX, + cSigned = false + }; + }; + + template<> + struct int_traits + { + enum + { + cMin = 0, + cMax = crnd::cUINT16_MAX, + cSigned = false + }; + }; + + template<> + struct int_traits + { + enum + { + cMin = 0, + cMax = crnd::cUINT32_MAX, + cSigned = false + }; + }; + + struct empty_type + { + }; +} // namespace crnd + +// File: crnd_platform.h +namespace crnd +{ bool crnd_is_debugger_present(); void crnd_debug_break(); void crnd_output_debug_string(const char* p); @@ -130,15 +183,15 @@ namespace crnd { // actually in crnd_assert.cpp CRN_EXPORT void crnd_assert(const char* pExp, const char* pFile, unsigned line); CRN_EXPORT void crnd_fail(const char* pExp, const char* pFile, unsigned line); - -} // namespace crnd +} // namespace crnd // File: crnd_assert.h -namespace crnd { +namespace crnd +{ CRN_EXPORT void crnd_assert(const char* pExp, const char* pFile, unsigned line); -// Define CRND_ASSERT if there is no user-supplied definition. -// Note that if there is a user-supplied definition, it is used even if NDEBUG is defined. + // Define CRND_ASSERT if there is no user-supplied definition. + // Note that if there is a user-supplied definition, it is used even if NDEBUG is defined. #ifndef CRND_ASSERT #ifdef NDEBUG #define CRND_ASSERT(x) ((void)0) @@ -147,83 +200,145 @@ namespace crnd { #endif #endif // ifndef CRND_ASSERT -void crnd_trace(const char* pFmt, va_list args); -void crnd_trace(const char* pFmt, ...); - -} // namespace crnd + void crnd_trace(const char* pFmt, va_list args); + void crnd_trace(const char* pFmt, ...); +} // namespace crnd // File: crnd_helpers.h -namespace crnd { -namespace helpers { -template -struct rel_ops { - friend bool operator!=(const T& x, const T& y) { return (!(x == y)); } - friend bool operator>(const T& x, const T& y) { return (y < x); } - friend bool operator<=(const T& x, const T& y) { return (!(y < x)); } - friend bool operator>=(const T& x, const T& y) { return (!(x < y)); } -}; - -template -inline T* construct(T* p) { - return new (static_cast(p)) T; -} +namespace crnd +{ + namespace helpers + { + template + struct rel_ops + { + friend bool operator!=(const T& x, const T& y) + { + return (!(x == y)); + } -template -inline T* construct(T* p, const U& init) { - return new (static_cast(p)) T(init); -} + friend bool operator>(const T& x, const T& y) + { + return (y < x); + } -template -void construct_array(T* p, uint32 n) { - T* q = p + n; - for (; p != q; ++p) - new (static_cast(p)) T; -} + friend bool operator<=(const T& x, const T& y) + { + return (!(y < x)); + } -template -void construct_array(T* p, uint32 n, const U& init) { - T* q = p + n; - for (; p != q; ++p) - new (static_cast(p)) T(init); -} + friend bool operator>=(const T& x, const T& y) + { + return (!(x < y)); + } + }; -template -inline void destruct(T* p) { - p->~T(); -} + template + inline T* construct(T* p) + { + return new(static_cast(p)) T; + } -template -inline void destruct_array(T* p, uint32 n) { - T* q = p + n; - for (; p != q; ++p) - p->~T(); -} + template + inline T* construct(T* p, const U& init) + { + return new(static_cast(p)) T(init); + } + + template + void construct_array(T* p, uint32 n) + { + T* q = p + n; + for (; p != q; ++p) + new(static_cast(p)) T; + } + + template + void construct_array(T* p, uint32 n, const U& init) + { + T* q = p + n; + for (; p != q; ++p) + new(static_cast(p)) T(init); + } -} // namespace helpers + template + inline void destruct(T* p) + { + p->~T(); + } -} // namespace crnd + template + inline void destruct_array(T* p, uint32 n) + { + T* q = p + n; + for (; p != q; ++p) + p->~T(); + } + } // namespace helpers +} // namespace crnd // File: crnd_traits.h -namespace crnd { -template -struct scalar_type { - enum { cFlag = false }; - static inline void construct(T* p) { helpers::construct(p); } - static inline void construct(T* p, const T& init) { helpers::construct(p, init); } - static inline void construct_array(T* p, uint32 n) { helpers::construct_array(p, n); } - static inline void destruct(T* p) { helpers::destruct(p); } - static inline void destruct_array(T* p, uint32 n) { helpers::destruct_array(p, n); } -}; - -template -struct scalar_type { - enum { cFlag = true }; - static inline void construct(T** p) { memset(p, 0, sizeof(T*)); } - static inline void construct(T** p, T* init) { *p = init; } - static inline void construct_array(T** p, uint32 n) { memset(p, 0, sizeof(T*) * n); } - static inline void destruct(T**) {} - static inline void destruct_array(T**, uint32) {} -}; +namespace crnd +{ + template + struct scalar_type + { + enum { cFlag = false }; + + static inline void construct(T* p) + { + helpers::construct(p); + } + + static inline void construct(T* p, const T& init) + { + helpers::construct(p, init); + } + + static inline void construct_array(T* p, uint32 n) + { + helpers::construct_array(p, n); + } + + static inline void destruct(T* p) + { + helpers::destruct(p); + } + + static inline void destruct_array(T* p, uint32 n) + { + helpers::destruct_array(p, n); + } + }; + + template + struct scalar_type + { + enum { cFlag = true }; + + static inline void construct(T** p) + { + memset(p, 0, sizeof(T*)); + } + + static inline void construct(T** p, T* init) + { + *p = init; + } + + static inline void construct_array(T** p, uint32 n) + { + memset(p, 0, sizeof(T*) * n); + } + + static inline void destruct(T**) + { + } + + static inline void destruct_array(T**, uint32) + { + } + }; #define CRND_DEFINE_BUILT_IN_TYPE(X) \ template <> \ @@ -236,2305 +351,2681 @@ struct scalar_type { static inline void destruct_array(X*, uint32) {} \ }; -CRND_DEFINE_BUILT_IN_TYPE(bool) -CRND_DEFINE_BUILT_IN_TYPE(char) -CRND_DEFINE_BUILT_IN_TYPE(unsigned char) -CRND_DEFINE_BUILT_IN_TYPE(short) -CRND_DEFINE_BUILT_IN_TYPE(unsigned short) -CRND_DEFINE_BUILT_IN_TYPE(int) -CRND_DEFINE_BUILT_IN_TYPE(unsigned int) -CRND_DEFINE_BUILT_IN_TYPE(long) -CRND_DEFINE_BUILT_IN_TYPE(unsigned long) -CRND_DEFINE_BUILT_IN_TYPE(int64) -CRND_DEFINE_BUILT_IN_TYPE(uint64) -CRND_DEFINE_BUILT_IN_TYPE(float) -CRND_DEFINE_BUILT_IN_TYPE(double) -CRND_DEFINE_BUILT_IN_TYPE(long double) + CRND_DEFINE_BUILT_IN_TYPE(bool) + + CRND_DEFINE_BUILT_IN_TYPE(char) + + CRND_DEFINE_BUILT_IN_TYPE(unsigned char) + + CRND_DEFINE_BUILT_IN_TYPE(short) + + CRND_DEFINE_BUILT_IN_TYPE(unsigned short) + + CRND_DEFINE_BUILT_IN_TYPE(int) + + CRND_DEFINE_BUILT_IN_TYPE(unsigned int) + + CRND_DEFINE_BUILT_IN_TYPE(long) + + CRND_DEFINE_BUILT_IN_TYPE(unsigned long) + + CRND_DEFINE_BUILT_IN_TYPE(int64) + + CRND_DEFINE_BUILT_IN_TYPE(uint64) + + CRND_DEFINE_BUILT_IN_TYPE(float) + + CRND_DEFINE_BUILT_IN_TYPE(double) + + CRND_DEFINE_BUILT_IN_TYPE(long double) #undef CRND_DEFINE_BUILT_IN_TYPE -// See: http://erdani.org/publications/cuj-2004-06.pdf + // See: http://erdani.org/publications/cuj-2004-06.pdf -template -struct bitwise_movable { - enum { cFlag = false }; -}; + template + struct bitwise_movable + { + enum { cFlag = false }; + }; -// Defines type Q as bitwise movable. + // Defines type Q as bitwise movable. #define CRND_DEFINE_BITWISE_MOVABLE(Q) \ template <> \ struct bitwise_movable { \ enum { cFlag = true }; \ }; -// From yasli_traits.h: -// Credit goes to Boost; -// also found in the C++ Templates book by Vandevoorde and Josuttis + // From yasli_traits.h: + // Credit goes to Boost; + // also found in the C++ Templates book by Vandevoorde and Josuttis -typedef char (&yes_t)[1]; -typedef char (&no_t)[2]; + typedef char (&yes_t)[1]; + typedef char (&no_t)[2]; -template -yes_t class_test(int U::*); -template -no_t class_test(...); + template + yes_t class_test(int U::*); + template + no_t class_test(...); -template -struct is_class { - enum { value = (sizeof(class_test(0)) == sizeof(yes_t)) }; -}; + template + struct is_class + { + enum { value = (sizeof(class_test(0)) == sizeof(yes_t)) }; + }; -template -struct is_pointer { - enum { value = false }; -}; + template + struct is_pointer + { + enum { value = false }; + }; -template -struct is_pointer { - enum { value = true }; -}; + template + struct is_pointer + { + enum { value = true }; + }; #define CRND_IS_POD(T) __is_pod(T) - -} // namespace crnd +} // namespace crnd // File: crnd_mem.h -namespace crnd { -void* crnd_malloc(size_t size, size_t* pActual_size = NULL); -void* crnd_realloc(void* p, size_t size, size_t* pActual_size = NULL, bool movable = true); -void crnd_free(void* p); -size_t crnd_msize(void* p); - -template -inline T* crnd_new() { - T* p = static_cast(crnd_malloc(sizeof(T))); - if (!p) - return NULL; - - return helpers::construct(p); -} +namespace crnd +{ + void* crnd_malloc(size_t size, size_t* pActual_size = NULL); + void* crnd_realloc(void* p, size_t size, size_t* pActual_size = NULL, bool movable = true); + void crnd_free(void* p); + size_t crnd_msize(void* p); + + template + inline T* crnd_new() + { + T* p = static_cast(crnd_malloc(sizeof(T))); + if (!p) + return NULL; -template -inline T* crnd_new(const T& init) { - T* p = static_cast(crnd_malloc(sizeof(T))); - if (!p) - return NULL; + return helpers::construct(p); + } - return helpers::construct(p, init); -} + template + inline T* crnd_new(const T& init) + { + T* p = static_cast(crnd_malloc(sizeof(T))); + if (!p) + return NULL; -template -inline T* crnd_new_array(uint32 num) { - if (!num) - num = 1; + return helpers::construct(p, init); + } - uint8* q = static_cast(crnd_malloc(CRND_MIN_ALLOC_ALIGNMENT + sizeof(T) * num)); - if (!q) - return NULL; + template + inline T* crnd_new_array(uint32 num) + { + if (!num) + num = 1; - T* p = reinterpret_cast(q + CRND_MIN_ALLOC_ALIGNMENT); + uint8* q = static_cast(crnd_malloc(CRND_MIN_ALLOC_ALIGNMENT + sizeof(T) * num)); + if (!q) + return NULL; - reinterpret_cast(p)[-1] = num; - reinterpret_cast(p)[-2] = ~num; + T* p = reinterpret_cast(q + CRND_MIN_ALLOC_ALIGNMENT); - helpers::construct_array(p, num); - return p; -} + reinterpret_cast(p)[-1] = num; + reinterpret_cast(p)[-2] = ~num; -template -inline void crnd_delete(T* p) { - if (p) { - helpers::destruct(p); - crnd_free(p); - } -} + helpers::construct_array(p, num); + return p; + } -template -inline void crnd_delete_array(T* p) { - if (p) { - const uint32 num = reinterpret_cast(p)[-1]; - CRND_ASSERT(num && (num == ~reinterpret_cast(p)[-2])); + template + inline void crnd_delete(T* p) + { + if (p) + { + helpers::destruct(p); + crnd_free(p); + } + } - helpers::destruct_array(p, num); + template + inline void crnd_delete_array(T* p) + { + if (p) + { + const uint32 num = reinterpret_cast(p)[-1]; + CRND_ASSERT(num && (num == ~reinterpret_cast(p)[-2])); - crnd_free(reinterpret_cast(p) - CRND_MIN_ALLOC_ALIGNMENT); - } -} + helpers::destruct_array(p, num); -} // namespace crnd + crnd_free(reinterpret_cast(p) - CRND_MIN_ALLOC_ALIGNMENT); + } + } +} // namespace crnd // File: crnd_math.h -namespace crnd { -namespace math { -const float cNearlyInfinite = 1.0e+37f; +namespace crnd +{ + namespace math + { + const float cNearlyInfinite = 1.0e+37f; -const float cDegToRad = 0.01745329252f; -const float cRadToDeg = 57.29577951f; + const float cDegToRad = 0.01745329252f; + const float cRadToDeg = 57.29577951f; -extern uint32 g_bitmasks[32]; + extern uint32 g_bitmasks[32]; -// Yes I know these should probably be pass by ref, not val: -// http://www.stepanovpapers.com/notes.pdf -// Just don't use them on non-simple (non built-in) types! -template -inline T minimum(T a, T b) { - return (a < b) ? a : b; -} + // Yes I know these should probably be pass by ref, not val: + // http://www.stepanovpapers.com/notes.pdf + // Just don't use them on non-simple (non built-in) types! + template + inline T minimum(T a, T b) + { + return (a < b) ? a : b; + } -template -inline T minimum(T a, T b, T c) { - return minimum(minimum(a, b), c); -} + template + inline T minimum(T a, T b, T c) + { + return minimum(minimum(a, b), c); + } -template -inline T maximum(T a, T b) { - return (a > b) ? a : b; -} + template + inline T maximum(T a, T b) + { + return (a > b) ? a : b; + } -template -inline T maximum(T a, T b, T c) { - return maximum(maximum(a, b), c); -} + template + inline T maximum(T a, T b, T c) + { + return maximum(maximum(a, b), c); + } -template -inline T clamp(T value, T low, T high) { - return (value < low) ? low : ((value > high) ? high : value); -} + template + inline T clamp(T value, T low, T high) + { + return (value < low) ? low : ((value > high) ? high : value); + } -template -inline T square(T value) { - return value * value; -} + template + inline T square(T value) + { + return value * value; + } -inline bool is_power_of_2(uint32 x) { - return x && ((x & (x - 1U)) == 0U); -} + inline bool is_power_of_2(uint32 x) + { + return x && ((x & (x - 1U)) == 0U); + } -// From "Hackers Delight" -inline int next_pow2(uint32 val) { - val--; - val |= val >> 16; - val |= val >> 8; - val |= val >> 4; - val |= val >> 2; - val |= val >> 1; - return val + 1; -} + // From "Hackers Delight" + inline int next_pow2(uint32 val) + { + val--; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } -// Returns the total number of bits needed to encode v. -inline uint32 total_bits(uint32 v) { - uint32 l = 0; - while (v > 0U) { - v >>= 1; - l++; - } - return l; -} + // Returns the total number of bits needed to encode v. + inline uint32 total_bits(uint32 v) + { + uint32 l = 0; + while (v > 0U) + { + v >>= 1; + l++; + } + return l; + } -inline uint floor_log2i(uint v) { - uint l = 0; - while (v > 1U) { - v >>= 1; - l++; - } - return l; -} + inline uint floor_log2i(uint v) + { + uint l = 0; + while (v > 1U) + { + v >>= 1; + l++; + } + return l; + } -inline uint ceil_log2i(uint v) { - uint l = floor_log2i(v); - if ((l != cIntBits) && (v > (1U << l))) - l++; - return l; -} -} + inline uint ceil_log2i(uint v) + { + uint l = floor_log2i(v); + if ((l != cIntBits) && (v > (1U << l))) + l++; + return l; + } + } } // File: crnd_utils.h -namespace crnd { -namespace utils { -template -inline void zero_object(T& obj) { - memset(&obj, 0, sizeof(obj)); -} - -template -inline void zero_this(T* pObj) { - memset(pObj, 0, sizeof(*pObj)); -} +namespace crnd +{ + namespace utils + { + template + inline void zero_object(T& obj) + { + memset(&obj, 0, sizeof(obj)); + } -template -inline void swap(T& left, T& right) { - T temp(left); - left = right; - right = temp; -} + template + inline void zero_this(T* pObj) + { + memset(pObj, 0, sizeof(*pObj)); + } -inline void invert_buf(void* pBuf, uint32 size) { - uint8* p = static_cast(pBuf); + template + inline void swap(T& left, T& right) + { + T temp(left); + left = right; + right = temp; + } - const uint32 half_size = size >> 1; - for (uint32 i = 0; i < half_size; i++) - swap(p[i], p[size - 1U - i]); -} + inline void invert_buf(void* pBuf, uint32 size) + { + uint8* p = static_cast(pBuf); -static inline uint16 swap16(uint16 x) { - return static_cast((x << 8) | (x >> 8)); -} -static inline uint32 swap32(uint32 x) { - return ((x << 24) | ((x << 8) & 0x00FF0000) | ((x >> 8) & 0x0000FF00) | (x >> 24)); -} + const uint32 half_size = size >> 1; + for (uint32 i = 0; i < half_size; i++) + swap(p[i], p[size - 1U - i]); + } -uint32 compute_max_mips(uint32 width, uint32 height); + static inline uint16 swap16(uint16 x) + { + return static_cast((x << 8) | (x >> 8)); + } -} // namespace utils + static inline uint32 swap32(uint32 x) + { + return ((x << 24) | ((x << 8) & 0x00FF0000) | ((x >> 8) & 0x0000FF00) | (x >> 24)); + } -} // namespace crnd + uint32 compute_max_mips(uint32 width, uint32 height); + } // namespace utils +} // namespace crnd // File: crnd_vector.h -namespace crnd { -struct elemental_vector { - void* m_p; - uint32 m_size; - uint32 m_capacity; +namespace crnd +{ + struct elemental_vector + { + void* m_p; + uint32 m_size; + uint32 m_capacity; - typedef void (*object_mover)(void* pDst, void* pSrc, uint32 num); + typedef void (*object_mover)(void* pDst, void* pSrc, uint32 num); - bool increase_capacity(uint32 min_new_capacity, bool grow_hint, uint32 element_size, object_mover pRelocate); -}; + bool increase_capacity(uint32 min_new_capacity, bool grow_hint, uint32 element_size, object_mover pRelocate); + }; #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable : 4127) // warning C4127: conditional expression is constant #endif -template -class vector : public helpers::rel_ops > { - public: - typedef T* iterator; - typedef const T* const_iterator; - typedef T value_type; - typedef T& reference; - typedef const T& const_reference; - typedef T* pointer; - typedef const T* const_pointer; - - inline vector() - : m_p(NULL), - m_size(0), - m_capacity(0), - m_alloc_failed(false) { - } - - inline vector(const vector& other) - : m_p(NULL), - m_size(0), - m_capacity(0), - m_alloc_failed(false) { - *this = other; - } - - inline vector(uint32 size) - : m_p(NULL), - m_size(0), - m_capacity(0), - m_alloc_failed(false) { - resize(size); - } - - inline ~vector() { - clear(); - } - - // I don't like this. Not at all. But exceptions, or just failing suck worse. - inline bool get_alloc_failed() const { return m_alloc_failed; } - inline void clear_alloc_failed() { m_alloc_failed = false; } - - inline bool assign(const vector& other) { - if (this == &other) - return true; - - if (m_capacity == other.m_size) - resize(0); - else { - clear(); - - if (!increase_capacity(other.m_size, false)) - return false; - } + template + class vector : public helpers::rel_ops> + { + public: + typedef T* iterator; + typedef const T* const_iterator; + typedef T value_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + + inline vector() : + m_p(NULL), + m_size(0), + m_capacity(0), + m_alloc_failed(false) + { + } - if (scalar_type::cFlag) - memcpy(m_p, other.m_p, other.m_size * sizeof(T)); - else { - T* pDst = m_p; - const T* pSrc = other.m_p; - for (uint32 i = other.m_size; i > 0; i--) - helpers::construct(pDst++, *pSrc++); - } - - m_size = other.m_size; - - return true; - } - - inline vector& operator=(const vector& other) { - assign(other); - return *this; - } - - inline const T* begin() const { return m_p; } - T* begin() { return m_p; } - - inline const T* end() const { return m_p + m_size; } - T* end() { return m_p + m_size; } - - inline bool empty() const { return !m_size; } - inline uint32 size() const { return m_size; } - inline uint32 capacity() const { return m_capacity; } - - inline const T& operator[](uint32 i) const { - CRND_ASSERT(i < m_size); - return m_p[i]; - } - inline T& operator[](uint32 i) { - CRND_ASSERT(i < m_size); - return m_p[i]; - } - - inline const T& front() const { - CRND_ASSERT(m_size); - return m_p[0]; - } - inline T& front() { - CRND_ASSERT(m_size); - return m_p[0]; - } - - inline const T& back() const { - CRND_ASSERT(m_size); - return m_p[m_size - 1]; - } - inline T& back() { - CRND_ASSERT(m_size); - return m_p[m_size - 1]; - } - - inline void clear() { - if (m_p) { - scalar_type::destruct_array(m_p, m_size); - crnd_free(m_p); - m_p = NULL; - m_size = 0; - m_capacity = 0; - } - - m_alloc_failed = false; - } - - inline bool reserve(uint32 new_capacity) { - if (!increase_capacity(new_capacity, false)) - return false; - - return true; - } - - inline bool resize(uint32 new_size) { - if (m_size != new_size) { - if (new_size < m_size) - scalar_type::destruct_array(m_p + new_size, m_size - new_size); - else { - if (new_size > m_capacity) { - if (!increase_capacity(new_size, new_size == (m_size + 1))) - return false; + inline vector(const vector& other) : + m_p(NULL), + m_size(0), + m_capacity(0), + m_alloc_failed(false) + { + *this = other; } - scalar_type::construct_array(m_p + m_size, new_size - m_size); - } + inline vector(uint32 size) : + m_p(NULL), + m_size(0), + m_capacity(0), + m_alloc_failed(false) + { + resize(size); + } - m_size = new_size; - } + inline ~vector() + { + clear(); + } - return true; - } + // I don't like this. Not at all. But exceptions, or just failing suck worse. + inline bool get_alloc_failed() const + { + return m_alloc_failed; + } - inline bool push_back(const T& obj) { - CRND_ASSERT(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + inline void clear_alloc_failed() + { + m_alloc_failed = false; + } - if (m_size >= m_capacity) { - if (!increase_capacity(m_size + 1, true)) - return false; - } + inline bool assign(const vector& other) + { + if (this == &other) + return true; - scalar_type::construct(m_p + m_size, obj); - m_size++; + if (m_capacity == other.m_size) + resize(0); + else + { + clear(); - return true; - } + if (!increase_capacity(other.m_size, false)) + return false; + } - inline void pop_back() { - CRND_ASSERT(m_size); + if (scalar_type::cFlag) + memcpy(m_p, other.m_p, other.m_size * sizeof(T)); + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (uint32 i = other.m_size; i > 0; i--) + helpers::construct(pDst++, *pSrc++); + } - if (m_size) { - m_size--; - scalar_type::destruct(&m_p[m_size]); - } - } + m_size = other.m_size; - inline void insert(uint32 index, const T* p, uint32 n) { - CRND_ASSERT(index <= m_size); - if (!n) - return; + return true; + } - const uint32 orig_size = m_size; - resize(m_size + n); + inline vector& operator=(const vector& other) + { + assign(other); + return *this; + } - const T* pSrc = m_p + orig_size - 1; - T* pDst = const_cast(pSrc) + n; + inline const T* begin() const + { + return m_p; + } - const uint32 num_to_move = orig_size - index; + T* begin() + { + return m_p; + } - for (uint32 i = 0; i < num_to_move; i++) { - CRND_ASSERT((pDst - m_p) < (int)m_size); - *pDst-- = *pSrc--; - } + inline const T* end() const + { + return m_p + m_size; + } - pSrc = p; - pDst = m_p + index; + T* end() + { + return m_p + m_size; + } - for (uint32 i = 0; i < n; i++) { - CRND_ASSERT((pDst - m_p) < (int)m_size); - *pDst++ = *p++; - } - } + inline bool empty() const + { + return !m_size; + } - inline void erase(uint32 start, uint32 n) { - CRND_ASSERT((start + n) <= m_size); + inline uint32 size() const + { + return m_size; + } - if (!n) - return; + inline uint32 capacity() const + { + return m_capacity; + } - const uint32 num_to_move = m_size - (start + n); + inline const T& operator[](uint32 i) const + { + CRND_ASSERT(i < m_size); + return m_p[i]; + } - T* pDst = m_p + start; - T* pDst_end = pDst + num_to_move; - const T* pSrc = m_p + start + n; + inline T& operator[](uint32 i) + { + CRND_ASSERT(i < m_size); + return m_p[i]; + } - while (pDst != pDst_end) - *pDst++ = *pSrc++; + inline const T& front() const + { + CRND_ASSERT(m_size); + return m_p[0]; + } - scalar_type::destruct_array(pDst_end, n); + inline T& front() + { + CRND_ASSERT(m_size); + return m_p[0]; + } - m_size -= n; - } + inline const T& back() const + { + CRND_ASSERT(m_size); + return m_p[m_size - 1]; + } - inline void erase(uint32 index) { - erase(index, 1); - } + inline T& back() + { + CRND_ASSERT(m_size); + return m_p[m_size - 1]; + } - inline void erase(T* p) { - CRND_ASSERT((p >= m_p) && (p < (m_p + m_size))); - erase(p - m_p); - } + inline void clear() + { + if (m_p) + { + scalar_type::destruct_array(m_p, m_size); + crnd_free(m_p); + m_p = NULL; + m_size = 0; + m_capacity = 0; + } - inline bool operator==(const vector& rhs) const { - if (m_size != rhs.m_size) - return false; - else if (m_size) { - if (scalar_type::cFlag) - return memcmp(m_p, rhs.m_p, sizeof(T) * m_size) == 0; - else { - const T* pSrc = m_p; - const T* pDst = rhs.m_p; - for (uint32 i = m_size; i; i--) - if (!(*pSrc++ == *pDst++)) - return false; - } - } + m_alloc_failed = false; + } - return true; - } + inline bool reserve(uint32 new_capacity) + { + if (!increase_capacity(new_capacity, false)) + return false; - inline bool operator<(const vector& rhs) const { - const uint32 min_size = math::minimum(m_size, rhs.m_size); + return true; + } - const T* pSrc = m_p; - const T* pSrc_end = m_p + min_size; - const T* pDst = rhs.m_p; + inline bool resize(uint32 new_size) + { + if (m_size != new_size) + { + if (new_size < m_size) + scalar_type::destruct_array(m_p + new_size, m_size - new_size); + else + { + if (new_size > m_capacity) + { + if (!increase_capacity(new_size, new_size == (m_size + 1))) + return false; + } + + scalar_type::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } - while ((pSrc < pSrc_end) && (*pSrc == *pDst)) { - pSrc++; - pDst++; - } + return true; + } - if (pSrc < pSrc_end) - return *pSrc < *pDst; + inline bool push_back(const T& obj) + { + CRND_ASSERT(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); - return m_size < rhs.m_size; - } + if (m_size >= m_capacity) + { + if (!increase_capacity(m_size + 1, true)) + return false; + } - void swap(vector& other) { - std::swap(m_p, other.m_p); - std::swap(m_size, other.m_size); - std::swap(m_capacity, other.m_capacity); - } + scalar_type::construct(m_p + m_size, obj); + m_size++; - private: - T* m_p; - uint32 m_size; - uint32 m_capacity; - bool m_alloc_failed; + return true; + } - template - struct is_vector { - enum { cFlag = false }; - }; - template - struct is_vector > { - enum { cFlag = true }; - }; + inline void pop_back() + { + CRND_ASSERT(m_size); - static void object_mover(void* pDst_void, void* pSrc_void, uint32 num) { - T* pSrc = static_cast(pSrc_void); - T* const pSrc_end = pSrc + num; - T* pDst = static_cast(pDst_void); + if (m_size) + { + m_size--; + scalar_type::destruct(&m_p[m_size]); + } + } - while (pSrc != pSrc_end) { - helpers::construct(pDst, *pSrc); - pSrc->~T(); - pSrc++; - pDst++; - } - } + inline void insert(uint32 index, const T* p, uint32 n) + { + CRND_ASSERT(index <= m_size); + if (!n) + return; - inline bool increase_capacity(uint32 min_new_capacity, bool grow_hint) { - if (!reinterpret_cast(this)->increase_capacity( - min_new_capacity, grow_hint, sizeof(T), - ((scalar_type::cFlag) || (is_vector::cFlag) || (bitwise_movable::cFlag) || CRND_IS_POD(T)) ? NULL : object_mover)) { - m_alloc_failed = true; - return false; - } - return true; - } -}; + const uint32 orig_size = m_size; + resize(m_size + n); -#ifdef _MSC_VER -#pragma warning(pop) -#endif + const T* pSrc = m_p + orig_size - 1; + T* pDst = const_cast(pSrc) + n; -extern void vector_test(); + const uint32 num_to_move = orig_size - index; -} // namespace crnd + for (uint32 i = 0; i < num_to_move; i++) + { + CRND_ASSERT((pDst - m_p) < (int)m_size); + *pDst-- = *pSrc--; + } -// File: crnd_private.h -namespace crnd { -const crn_header* crnd_get_header(const void* pData, uint32 data_size); + pSrc = p; + pDst = m_p + index; -} // namespace crnd + for (uint32 i = 0; i < n; i++) + { + CRND_ASSERT((pDst - m_p) < (int)m_size); + *pDst++ = *p++; + } + } -// File: checksum.h -namespace crnd { -// crc16() intended for small buffers - doesn't use an acceleration table. -const uint16 cInitCRC16 = 0; -uint16 crc16(const void* pBuf, uint32 len, uint16 crc = cInitCRC16); + inline void erase(uint32 start, uint32 n) + { + CRND_ASSERT((start + n) <= m_size); -} // namespace crnd + if (!n) + return; -// File: crnd_color.h -namespace crnd { -template -struct color_quad_component_traits { - enum { - cSigned = false, - cFloat = false, - cMin = cUINT8_MIN, - cMax = cUINT8_MAX - }; -}; - -template <> -struct color_quad_component_traits { - enum { - cSigned = true, - cFloat = false, - cMin = cINT16_MIN, - cMax = cINT16_MAX - }; -}; - -template <> -struct color_quad_component_traits { - enum { - cSigned = false, - cFloat = false, - cMin = cUINT16_MIN, - cMax = cUINT16_MAX - }; -}; - -template <> -struct color_quad_component_traits { - enum { - cSigned = true, - cFloat = false, - cMin = cINT32_MIN, - cMax = cINT32_MAX - }; -}; - -template <> -struct color_quad_component_traits { - enum { - cSigned = false, - cFloat = false, - cMin = cUINT32_MIN, - cMax = cUINT32_MAX - }; -}; - -template <> -struct color_quad_component_traits { - enum { - cSigned = false, - cFloat = true, - cMin = cINT32_MIN, - cMax = cINT32_MAX - }; -}; - -template <> -struct color_quad_component_traits { - enum { - cSigned = false, - cFloat = true, - cMin = cINT32_MIN, - cMax = cINT32_MAX - }; -}; + const uint32 num_to_move = m_size - (start + n); -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4201) // warning C4201: nonstandard extension used : nameless struct/union -#pragma warning(disable : 4127) // warning C4127: conditional expression is constant -#endif + T* pDst = m_p + start; + T* pDst_end = pDst + num_to_move; + const T* pSrc = m_p + start + n; -template -class color_quad : public helpers::rel_ops > { - static parameter_type clamp(parameter_type v) { - if (component_traits::cFloat) - return v; - else { - if (v < component_traits::cMin) - return component_traits::cMin; - else if (v > component_traits::cMax) - return component_traits::cMax; - return v; - } - } - - public: - typedef component_type component_t; - typedef parameter_type parameter_t; - typedef color_quad_component_traits component_traits; - - enum { cNumComps = 4 }; - - union { - struct - { - component_type r; - component_type g; - component_type b; - component_type a; - }; + while (pDst != pDst_end) + *pDst++ = *pSrc++; - component_type c[cNumComps]; - }; + scalar_type::destruct_array(pDst_end, n); - inline color_quad() { - } - - inline color_quad(eClear) - : r(0), g(0), b(0), a(0) { - } - - inline color_quad(const color_quad& other) - : r(other.r), g(other.g), b(other.b), a(other.a) { - } - - inline color_quad(parameter_type y, parameter_type alpha = component_traits::cMax) { - set(y, alpha); - } - - inline color_quad(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) { - set(red, green, blue, alpha); - } - - template - inline color_quad(const color_quad& other) - : r(clamp(other.r)), g(clamp(other.g)), b(clamp(other.b)), a(clamp(other.a)) { - } - - inline void clear() { - r = 0; - g = 0; - b = 0; - a = 0; - } - - inline color_quad& operator=(const color_quad& other) { - r = other.r; - g = other.g; - b = other.b; - a = other.a; - return *this; - } - - template - inline color_quad& operator=(const color_quad& other) { - r = clamp(other.r); - g = clamp(other.g); - b = clamp(other.b); - a = clamp(other.a); - return *this; - } - - inline color_quad& set(parameter_type y, parameter_type alpha = component_traits::cMax) { - y = clamp(y); - r = static_cast(y); - g = static_cast(y); - b = static_cast(y); - a = static_cast(alpha); - return *this; - } - - inline color_quad& set(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) { - r = static_cast(clamp(red)); - g = static_cast(clamp(green)); - b = static_cast(clamp(blue)); - a = static_cast(clamp(alpha)); - return *this; - } - - inline color_quad& set_noclamp_rgba(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha) { - r = static_cast(red); - g = static_cast(green); - b = static_cast(blue); - a = static_cast(alpha); - return *this; - } - - inline color_quad& set_noclamp_rgb(parameter_type red, parameter_type green, parameter_type blue) { - r = static_cast(red); - g = static_cast(green); - b = static_cast(blue); - return *this; - } - - static inline parameter_type get_min_comp() { return component_traits::cMin; } - static inline parameter_type get_max_comp() { return component_traits::cMax; } - static inline bool get_comps_are_signed() { return component_traits::cSigned; } - - inline component_type operator[](uint32 i) const { - CRND_ASSERT(i < cNumComps); - return c[i]; - } - inline component_type& operator[](uint32 i) { - CRND_ASSERT(i < cNumComps); - return c[i]; - } - - inline color_quad& set_component(uint32 i, parameter_type f) { - CRND_ASSERT(i < cNumComps); - - c[i] = static_cast(clamp(f)); - - return *this; - } - - inline color_quad& clamp(const color_quad& l, const color_quad& h) { - for (uint32 i = 0; i < cNumComps; i++) - c[i] = static_cast(math::clamp(c[i], l[i], h[i])); - return *this; - } - - inline color_quad& clamp(parameter_type l, parameter_type h) { - for (uint32 i = 0; i < cNumComps; i++) - c[i] = static_cast(math::clamp(c[i], l, h)); - return *this; - } - - // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y). - inline parameter_type get_luma() const { - return static_cast((19595U * r + 38470U * g + 7471U * b + 32768) >> 16U); - } - - // Returns REC 709 luma. - inline parameter_type get_luma_rec709() const { - return static_cast((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); - } - - inline uint32 squared_distance(const color_quad& c, bool alpha = true) const { - return math::square(r - c.r) + math::square(g - c.g) + math::square(b - c.b) + (alpha ? math::square(a - c.a) : 0); - } - - inline bool rgb_equals(const color_quad& rhs) const { - return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); - } - - inline bool operator==(const color_quad& rhs) const { - return (r == rhs.r) && (g == rhs.g) && (b == rhs.b) && (a == rhs.a); - } - - inline bool operator<(const color_quad& rhs) const { - for (uint32 i = 0; i < cNumComps; i++) { - if (c[i] < rhs.c[i]) - return true; - else if (!(c[i] == rhs.c[i])) - return false; - } - return false; - } - - inline color_quad& operator+=(const color_quad& other) { - for (uint32 i = 0; i < 4; i++) - c[i] = static_cast(clamp(c[i] + other.c[i])); - return *this; - } - - inline color_quad& operator-=(const color_quad& other) { - for (uint32 i = 0; i < 4; i++) - c[i] = static_cast(clamp(c[i] - other.c[i])); - return *this; - } - - inline color_quad& operator*=(parameter_type v) { - for (uint32 i = 0; i < 4; i++) - c[i] = static_cast(clamp(c[i] * v)); - return *this; - } - - inline color_quad& operator/=(parameter_type v) { - for (uint32 i = 0; i < 4; i++) - c[i] = static_cast(c[i] / v); - return *this; - } - - inline color_quad get_swizzled(uint32 x, uint32 y, uint32 z, uint32 w) const { - CRND_ASSERT((x | y | z | w) < 4); - return color_quad(c[x], c[y], c[z], c[w]); - } - - inline friend color_quad operator+(const color_quad& lhs, const color_quad& rhs) { - color_quad result(lhs); - result += rhs; - return result; - } - - inline friend color_quad operator-(const color_quad& lhs, const color_quad& rhs) { - color_quad result(lhs); - result -= rhs; - return result; - } - - inline friend color_quad operator*(const color_quad& lhs, parameter_type v) { - color_quad result(lhs); - result *= v; - return result; - } - - friend inline color_quad operator/(const color_quad& lhs, parameter_type v) { - color_quad result(lhs); - result /= v; - return result; - } - - friend inline color_quad operator*(parameter_type v, const color_quad& rhs) { - color_quad result(rhs); - result *= v; - return result; - } - - inline uint32 get_min_component_index(bool alpha = true) const { - uint32 index = 0; - uint32 limit = alpha ? cNumComps : (cNumComps - 1); - for (uint32 i = 1; i < limit; i++) - if (c[i] < c[index]) - index = i; - return index; - } - - inline uint32 get_max_component_index(bool alpha = true) const { - uint32 index = 0; - uint32 limit = alpha ? cNumComps : (cNumComps - 1); - for (uint32 i = 1; i < limit; i++) - if (c[i] > c[index]) - index = i; - return index; - } - - inline void get_float4(float* pDst) { - for (uint32 i = 0; i < 4; i++) - pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); - } - - inline void get_float3(float* pDst) { - for (uint32 i = 0; i < 3; i++) - pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); - } - - static inline color_quad make_black() { - return color_quad(0, 0, 0, component_traits::cMax); - } - - static inline color_quad make_white() { - return color_quad(component_traits::cMax, component_traits::cMax, component_traits::cMax, component_traits::cMax); - } -}; // class color_quad + m_size -= n; + } -#ifdef _MSC_VER -#pragma warning(pop) -#endif + inline void erase(uint32 index) + { + erase(index, 1); + } -template -struct scalar_type > { - enum { cFlag = true }; - static inline void construct(color_quad* p) {} - static inline void construct(color_quad* p, const color_quad& init) { memcpy(p, &init, sizeof(color_quad)); } - static inline void construct_array(color_quad* p, uint32 n) { p, n; } - static inline void destruct(color_quad* p) { p; } - static inline void destruct_array(color_quad* p, uint32 n) { p, n; } -}; - -typedef color_quad color_quad_u8; -typedef color_quad color_quad_i16; -typedef color_quad color_quad_u16; -typedef color_quad color_quad_i32; -typedef color_quad color_quad_u32; -typedef color_quad color_quad_f; -typedef color_quad color_quad_d; - -} // namespace crnd + inline void erase(T* p) + { + CRND_ASSERT((p >= m_p) && (p < (m_p + m_size))); + erase(p - m_p); + } -// File: crnd_dxt.h -namespace crnd { -enum dxt_format { - cDXTInvalid = -1, + inline bool operator==(const vector& rhs) const + { + if (m_size != rhs.m_size) + return false; + else if (m_size) + { + if (scalar_type::cFlag) + return memcmp(m_p, rhs.m_p, sizeof(T) * m_size) == 0; + else + { + const T* pSrc = m_p; + const T* pDst = rhs.m_p; + for (uint32 i = m_size; i; i--) + if (!(*pSrc++ == *pDst++)) + return false; + } + } - // cDXT1/1A must appear first! - cDXT1, - cDXT1A, + return true; + } - cDXT3, - cDXT5, - cDXT5A, + inline bool operator<(const vector& rhs) const + { + const uint32 min_size = math::minimum(m_size, rhs.m_size); - cDXN_XY, // inverted relative to standard ATI2, 360's DXN - cDXN_YX // standard ATI2 -}; + const T* pSrc = m_p; + const T* pSrc_end = m_p + min_size; + const T* pDst = rhs.m_p; -enum dxt_constants { - cDXTBlockShift = 2U, - cDXTBlockSize = 1U << cDXTBlockShift, + while ((pSrc < pSrc_end) && (*pSrc == *pDst)) + { + pSrc++; + pDst++; + } - cDXT1BytesPerBlock = 8U, - cDXT5NBytesPerBlock = 16U, + if (pSrc < pSrc_end) + return *pSrc < *pDst; - cDXT1SelectorBits = 2U, - cDXT1SelectorValues = 1U << cDXT1SelectorBits, - cDXT1SelectorMask = cDXT1SelectorValues - 1U, + return m_size < rhs.m_size; + } - cDXT5SelectorBits = 3U, - cDXT5SelectorValues = 1U << cDXT5SelectorBits, - cDXT5SelectorMask = cDXT5SelectorValues - 1U -}; + void swap(vector& other) + { + std::swap(m_p, other.m_p); + std::swap(m_size, other.m_size); + std::swap(m_capacity, other.m_capacity); + } -const float cDXT1MaxLinearValue = 3.0f; -const float cDXT1InvMaxLinearValue = 1.0f / 3.0f; + private: + T* m_p; + uint32 m_size; + uint32 m_capacity; + bool m_alloc_failed; + + template + struct is_vector + { + enum { cFlag = false }; + }; + + template + struct is_vector> + { + enum { cFlag = true }; + }; + + static void object_mover(void* pDst_void, void* pSrc_void, uint32 num) + { + T* pSrc = static_cast(pSrc_void); + T* const pSrc_end = pSrc + num; + T* pDst = static_cast(pDst_void); + + while (pSrc != pSrc_end) + { + helpers::construct(pDst, *pSrc); + pSrc->~T(); + pSrc++; + pDst++; + } + } -const float cDXT5MaxLinearValue = 7.0f; -const float cDXT5InvMaxLinearValue = 1.0f / 7.0f; + inline bool increase_capacity(uint32 min_new_capacity, bool grow_hint) + { + if (!reinterpret_cast(this)->increase_capacity( + min_new_capacity, grow_hint, sizeof(T), + ((scalar_type::cFlag) || (is_vector::cFlag) || (bitwise_movable::cFlag) || CRND_IS_POD(T)) ? NULL : object_mover)) + { + m_alloc_failed = true; + return false; + } + return true; + } + }; -// Converts DXT1 raw color selector index to a linear value. -extern const uint8 g_dxt1_to_linear[cDXT1SelectorValues]; +#ifdef _MSC_VER +#pragma warning(pop) +#endif -// Converts DXT5 raw alpha selector index to a linear value. -extern const uint8 g_dxt5_to_linear[cDXT5SelectorValues]; + extern void vector_test(); +} // namespace crnd -// Converts DXT1 linear color selector index to a raw value (inverse of g_dxt1_to_linear). -extern const uint8 g_dxt1_from_linear[cDXT1SelectorValues]; +// File: crnd_private.h +namespace crnd +{ + const crn_header* crnd_get_header(const void* pData, uint32 data_size); +} // namespace crnd -// Converts DXT5 linear alpha selector index to a raw value (inverse of g_dxt5_to_linear). -extern const uint8 g_dxt5_from_linear[cDXT5SelectorValues]; +// File: checksum.h +namespace crnd +{ + // crc16() intended for small buffers - doesn't use an acceleration table. + const uint16 cInitCRC16 = 0; + uint16 crc16(const void* pBuf, uint32 len, uint16 crc = cInitCRC16); +} // namespace crnd -extern const uint8 g_six_alpha_invert_table[cDXT5SelectorValues]; -extern const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues]; +// File: crnd_color.h +namespace crnd +{ + template + struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = false, + cMin = cUINT8_MIN, + cMax = cUINT8_MAX + }; + }; -struct dxt1_block { - uint8 m_low_color[2]; - uint8 m_high_color[2]; + template<> + struct color_quad_component_traits + { + enum + { + cSigned = true, + cFloat = false, + cMin = cINT16_MIN, + cMax = cINT16_MAX + }; + }; + + template<> + struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = false, + cMin = cUINT16_MIN, + cMax = cUINT16_MAX + }; + }; + + template<> + struct color_quad_component_traits + { + enum + { + cSigned = true, + cFloat = false, + cMin = cINT32_MIN, + cMax = cINT32_MAX + }; + }; - enum { cNumSelectorBytes = 4 }; - uint8 m_selectors[cNumSelectorBytes]; + template<> + struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = false, + cMin = cUINT32_MIN, + cMax = cUINT32_MAX + }; + }; + + template<> + struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = true, + cMin = cINT32_MIN, + cMax = cINT32_MAX + }; + }; - inline void clear() { - utils::zero_this(this); - } + template<> + struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = true, + cMin = cINT32_MIN, + cMax = cINT32_MAX + }; + }; - // These methods assume the in-memory rep is in LE byte order. - inline uint32 get_low_color() const { - return m_low_color[0] | (m_low_color[1] << 8U); - } +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4201) // warning C4201: nonstandard extension used : nameless struct/union +#pragma warning(disable : 4127) // warning C4127: conditional expression is constant +#endif - inline uint32 get_high_color() const { - return m_high_color[0] | (m_high_color[1] << 8U); - } + template + class color_quad : public helpers::rel_ops> + { + static parameter_type clamp(parameter_type v) + { + if (component_traits::cFloat) + return v; + else + { + if (v < component_traits::cMin) + return component_traits::cMin; + else if (v > component_traits::cMax) + return component_traits::cMax; + return v; + } + } - inline void set_low_color(uint16 c) { - m_low_color[0] = static_cast(c & 0xFF); - m_low_color[1] = static_cast((c >> 8) & 0xFF); - } + public: + typedef component_type component_t; + typedef parameter_type parameter_t; + typedef color_quad_component_traits component_traits; - inline void set_high_color(uint16 c) { - m_high_color[0] = static_cast(c & 0xFF); - m_high_color[1] = static_cast((c >> 8) & 0xFF); - } + enum { cNumComps = 4 }; - inline uint32 get_selector(uint32 x, uint32 y) const { - CRND_ASSERT((x < 4U) && (y < 4U)); - return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; - } + union + { + struct + { + component_type r; + component_type g; + component_type b; + component_type a; + }; - inline void set_selector(uint32 x, uint32 y, uint32 val) { - CRND_ASSERT((x < 4U) && (y < 4U) && (val < 4U)); + component_type c[cNumComps]; + }; - m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); - m_selectors[y] |= (val << (x * cDXT1SelectorBits)); - } + inline color_quad() + { + } - static uint16 pack_color(const color_quad_u8& color, bool scaled, uint32 bias = 127U); - static uint16 pack_color(uint32 r, uint32 g, uint32 b, bool scaled, uint32 bias = 127U); + inline color_quad(eClear) : + r(0), + g(0), + b(0), + a(0) + { + } - static color_quad_u8 unpack_color(uint16 packed_color, bool scaled, uint32 alpha = 255U); - static void unpack_color(uint32& r, uint32& g, uint32& b, uint16 packed_color, bool scaled); + inline color_quad(const color_quad& other) : + r(other.r), + g(other.g), + b(other.b), + a(other.a) + { + } - static uint32 get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1); - static uint32 get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1); - // pDst must point to an array at least cDXT1SelectorValues long. - static uint32 get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1); + inline color_quad(parameter_type y, parameter_type alpha = component_traits::cMax) + { + set(y, alpha); + } - static color_quad_u8 unpack_endpoint(uint32 endpoints, uint32 index, bool scaled, uint32 alpha = 255U); - static uint32 pack_endpoints(uint32 lo, uint32 hi); -}; + inline color_quad(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) + { + set(red, green, blue, alpha); + } -CRND_DEFINE_BITWISE_MOVABLE(dxt1_block); + template + inline color_quad(const color_quad& other) : + r(clamp(other.r)), + g(clamp(other.g)), + b(clamp(other.b)), + a(clamp(other.a)) + { + } -struct dxt3_block { - enum { cNumAlphaBytes = 8 }; - uint8 m_alpha[cNumAlphaBytes]; + inline void clear() + { + r = 0; + g = 0; + b = 0; + a = 0; + } - void set_alpha(uint32 x, uint32 y, uint32 value, bool scaled); - uint32 get_alpha(uint32 x, uint32 y, bool scaled) const; -}; + inline color_quad& operator=(const color_quad& other) + { + r = other.r; + g = other.g; + b = other.b; + a = other.a; + return *this; + } -CRND_DEFINE_BITWISE_MOVABLE(dxt3_block); + template + inline color_quad& operator=(const color_quad& other) + { + r = clamp(other.r); + g = clamp(other.g); + b = clamp(other.b); + a = clamp(other.a); + return *this; + } -struct dxt5_block { - uint8 m_endpoints[2]; + inline color_quad& set(parameter_type y, parameter_type alpha = component_traits::cMax) + { + y = clamp(y); + r = static_cast(y); + g = static_cast(y); + b = static_cast(y); + a = static_cast(alpha); + return *this; + } - enum { cNumSelectorBytes = 6 }; - uint8 m_selectors[cNumSelectorBytes]; + inline color_quad& set(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) + { + r = static_cast(clamp(red)); + g = static_cast(clamp(green)); + b = static_cast(clamp(blue)); + a = static_cast(clamp(alpha)); + return *this; + } - inline void clear() { - utils::zero_this(this); - } + inline color_quad& set_noclamp_rgba(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha) + { + r = static_cast(red); + g = static_cast(green); + b = static_cast(blue); + a = static_cast(alpha); + return *this; + } - inline uint32 get_low_alpha() const { - return m_endpoints[0]; - } + inline color_quad& set_noclamp_rgb(parameter_type red, parameter_type green, parameter_type blue) + { + r = static_cast(red); + g = static_cast(green); + b = static_cast(blue); + return *this; + } - inline uint32 get_high_alpha() const { - return m_endpoints[1]; - } + static inline parameter_type get_min_comp() + { + return component_traits::cMin; + } - inline void set_low_alpha(uint32 i) { - CRND_ASSERT(i <= cUINT8_MAX); - m_endpoints[0] = static_cast(i); - } + static inline parameter_type get_max_comp() + { + return component_traits::cMax; + } - inline void set_high_alpha(uint32 i) { - CRND_ASSERT(i <= cUINT8_MAX); - m_endpoints[1] = static_cast(i); - } + static inline bool get_comps_are_signed() + { + return component_traits::cSigned; + } - uint32 get_endpoints_as_word() const { return m_endpoints[0] | (m_endpoints[1] << 8); } + inline component_type operator[](uint32 i) const + { + CRND_ASSERT(i < cNumComps); + return c[i]; + } - uint32 get_selectors_as_word(uint32 index) { - CRND_ASSERT(index < 3); - return m_selectors[index * 2] | (m_selectors[index * 2 + 1] << 8); - } + inline component_type& operator[](uint32 i) + { + CRND_ASSERT(i < cNumComps); + return c[i]; + } - inline uint32 get_selector(uint32 x, uint32 y) const { - CRND_ASSERT((x < 4U) && (y < 4U)); + inline color_quad& set_component(uint32 i, parameter_type f) + { + CRND_ASSERT(i < cNumComps); - uint32 selector_index = (y * 4) + x; - uint32 bit_index = selector_index * cDXT5SelectorBits; + c[i] = static_cast(clamp(f)); - uint32 byte_index = bit_index >> 3; - uint32 bit_ofs = bit_index & 7; + return *this; + } - uint32 v = m_selectors[byte_index]; - if (byte_index < (cNumSelectorBytes - 1)) - v |= (m_selectors[byte_index + 1] << 8); + inline color_quad& clamp(const color_quad& l, const color_quad& h) + { + for (uint32 i = 0; i < cNumComps; i++) + c[i] = static_cast(math::clamp(c[i], l[i], h[i])); + return *this; + } - return (v >> bit_ofs) & 7; - } + inline color_quad& clamp(parameter_type l, parameter_type h) + { + for (uint32 i = 0; i < cNumComps; i++) + c[i] = static_cast(math::clamp(c[i], l, h)); + return *this; + } - inline void set_selector(uint32 x, uint32 y, uint32 val) { - CRND_ASSERT((x < 4U) && (y < 4U) && (val < 8U)); + // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y). + inline parameter_type get_luma() const + { + return static_cast((19595U * r + 38470U * g + 7471U * b + 32768) >> 16U); + } - uint32 selector_index = (y * 4) + x; - uint32 bit_index = selector_index * cDXT5SelectorBits; + // Returns REC 709 luma. + inline parameter_type get_luma_rec709() const + { + return static_cast((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); + } - uint32 byte_index = bit_index >> 3; - uint32 bit_ofs = bit_index & 7; + inline uint32 squared_distance(const color_quad& c, bool alpha = true) const + { + return math::square(r - c.r) + math::square(g - c.g) + math::square(b - c.b) + (alpha ? math::square(a - c.a) : 0); + } - uint32 v = m_selectors[byte_index]; - if (byte_index < (cNumSelectorBytes - 1)) - v |= (m_selectors[byte_index + 1] << 8); + inline bool rgb_equals(const color_quad& rhs) const + { + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); + } - v &= (~(7 << bit_ofs)); - v |= (val << bit_ofs); + inline bool operator==(const color_quad& rhs) const + { + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b) && (a == rhs.a); + } - m_selectors[byte_index] = static_cast(v); - if (byte_index < (cNumSelectorBytes - 1)) - m_selectors[byte_index + 1] = static_cast(v >> 8); - } + inline bool operator<(const color_quad& rhs) const + { + for (uint32 i = 0; i < cNumComps; i++) + { + if (c[i] < rhs.c[i]) + return true; + else if (!(c[i] == rhs.c[i])) + return false; + } + return false; + } - // Results written to alpha channel. - static uint32 get_block_values6(color_quad_u8* pDst, uint32 l, uint32 h); - static uint32 get_block_values8(color_quad_u8* pDst, uint32 l, uint32 h); - static uint32 get_block_values(color_quad_u8* pDst, uint32 l, uint32 h); + inline color_quad& operator+=(const color_quad& other) + { + for (uint32 i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] + other.c[i])); + return *this; + } - static uint32 get_block_values6(uint32* pDst, uint32 l, uint32 h); - static uint32 get_block_values8(uint32* pDst, uint32 l, uint32 h); - // pDst must point to an array at least cDXT5SelectorValues long. - static uint32 get_block_values(uint32* pDst, uint32 l, uint32 h); + inline color_quad& operator-=(const color_quad& other) + { + for (uint32 i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] - other.c[i])); + return *this; + } - static uint32 unpack_endpoint(uint32 packed, uint32 index); - static uint32 pack_endpoints(uint32 lo, uint32 hi); -}; + inline color_quad& operator*=(parameter_type v) + { + for (uint32 i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] * v)); + return *this; + } -CRND_DEFINE_BITWISE_MOVABLE(dxt5_block); + inline color_quad& operator/=(parameter_type v) + { + for (uint32 i = 0; i < 4; i++) + c[i] = static_cast(c[i] / v); + return *this; + } -} // namespace crnd + inline color_quad get_swizzled(uint32 x, uint32 y, uint32 z, uint32 w) const + { + CRND_ASSERT((x | y | z | w) < 4); + return color_quad(c[x], c[y], c[z], c[w]); + } -// File: crnd_prefix_coding.h -#ifdef _XBOX -#define CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE 1 -#else -#define CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE 0 -#endif + inline friend color_quad operator+(const color_quad& lhs, const color_quad& rhs) + { + color_quad result(lhs); + result += rhs; + return result; + } -namespace crnd { -namespace prefix_coding { -const uint32 cMaxExpectedCodeSize = 16; -const uint32 cMaxSupportedSyms = 8192; -const uint32 cMaxTableBits = 11; + inline friend color_quad operator-(const color_quad& lhs, const color_quad& rhs) + { + color_quad result(lhs); + result -= rhs; + return result; + } -class decoder_tables { - public: - inline decoder_tables() - : m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) { - } + inline friend color_quad operator*(const color_quad& lhs, parameter_type v) + { + color_quad result(lhs); + result *= v; + return result; + } - inline decoder_tables(const decoder_tables& other) - : m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) { - *this = other; - } + friend inline color_quad operator/(const color_quad& lhs, parameter_type v) + { + color_quad result(lhs); + result /= v; + return result; + } - decoder_tables& operator=(const decoder_tables& other) { - if (this == &other) - return *this; + friend inline color_quad operator*(parameter_type v, const color_quad& rhs) + { + color_quad result(rhs); + result *= v; + return result; + } - clear(); + inline uint32 get_min_component_index(bool alpha = true) const + { + uint32 index = 0; + uint32 limit = alpha ? cNumComps : (cNumComps - 1); + for (uint32 i = 1; i < limit; i++) + if (c[i] < c[index]) + index = i; + return index; + } - memcpy(this, &other, sizeof(*this)); + inline uint32 get_max_component_index(bool alpha = true) const + { + uint32 index = 0; + uint32 limit = alpha ? cNumComps : (cNumComps - 1); + for (uint32 i = 1; i < limit; i++) + if (c[i] > c[index]) + index = i; + return index; + } - if (other.m_lookup) { - m_lookup = crnd_new_array(m_cur_lookup_size); - if (m_lookup) - memcpy(m_lookup, other.m_lookup, sizeof(m_lookup[0]) * m_cur_lookup_size); - } + inline void get_float4(float* pDst) + { + for (uint32 i = 0; i < 4; i++) + pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); + } - if (other.m_sorted_symbol_order) { - m_sorted_symbol_order = crnd_new_array(m_cur_sorted_symbol_order_size); - if (m_sorted_symbol_order) - memcpy(m_sorted_symbol_order, other.m_sorted_symbol_order, sizeof(m_sorted_symbol_order[0]) * m_cur_sorted_symbol_order_size); - } + inline void get_float3(float* pDst) + { + for (uint32 i = 0; i < 3; i++) + pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); + } - return *this; - } + static inline color_quad make_black() + { + return color_quad(0, 0, 0, component_traits::cMax); + } - inline void clear() { - if (m_lookup) { - crnd_delete_array(m_lookup); - m_lookup = 0; - m_cur_lookup_size = 0; - } + static inline color_quad make_white() + { + return color_quad(component_traits::cMax, component_traits::cMax, component_traits::cMax, component_traits::cMax); + } + }; // class color_quad - if (m_sorted_symbol_order) { - crnd_delete_array(m_sorted_symbol_order); - m_sorted_symbol_order = NULL; - m_cur_sorted_symbol_order_size = 0; - } - } +#ifdef _MSC_VER +#pragma warning(pop) +#endif - inline ~decoder_tables() { - if (m_lookup) - crnd_delete_array(m_lookup); + template + struct scalar_type> + { + enum { cFlag = true }; - if (m_sorted_symbol_order) - crnd_delete_array(m_sorted_symbol_order); - } + static inline void construct(color_quad* p) + { + } - bool init(uint32 num_syms, const uint8* pCodesizes, uint32 table_bits); + static inline void construct(color_quad* p, const color_quad& init) + { + memcpy(p, &init, sizeof(color_quad)); + } - // DO NOT use any complex classes here - it is bitwise copied. + static inline void construct_array(color_quad* p, uint32 n) + { + p, n; + } - uint32 m_num_syms; - uint32 m_total_used_syms; - uint32 m_table_bits; - uint32 m_table_shift; - uint32 m_table_max_code; - uint32 m_decode_start_code_size; + static inline void destruct(color_quad* p) + { + p; + } - uint8 m_min_code_size; - uint8 m_max_code_size; + static inline void destruct_array(color_quad* p, uint32 n) + { + p, n; + } + }; - uint32 m_max_codes[cMaxExpectedCodeSize + 1]; - int32 m_val_ptrs[cMaxExpectedCodeSize + 1]; + typedef color_quad color_quad_u8; + typedef color_quad color_quad_i16; + typedef color_quad color_quad_u16; + typedef color_quad color_quad_i32; + typedef color_quad color_quad_u32; + typedef color_quad color_quad_f; + typedef color_quad color_quad_d; +} // namespace crnd - uint32 m_cur_lookup_size; - uint32* m_lookup; +// File: crnd_dxt.h +namespace crnd +{ + enum dxt_format + { + cDXTInvalid = -1, - uint32 m_cur_sorted_symbol_order_size; - uint16* m_sorted_symbol_order; + // cDXT1/1A must appear first! + cDXT1, + cDXT1A, - inline uint32 get_unshifted_max_code(uint32 len) const { - CRND_ASSERT((len >= 1) && (len <= cMaxExpectedCodeSize)); - uint32 k = m_max_codes[len - 1]; - if (!k) - return crnd::cUINT32_MAX; - return (k - 1) >> (16 - len); - } -}; + cDXT3, + cDXT5, + cDXT5A, -} // namespace prefix_coding + cDXN_XY, + // inverted relative to standard ATI2, 360's DXN + cDXN_YX // standard ATI2 + }; -} // namespace crnd + enum dxt_constants + { + cDXTBlockShift = 2U, + cDXTBlockSize = 1U << cDXTBlockShift, -// File: crnd_symbol_codec.h -namespace crnd { -class static_huffman_data_model { - public: - static_huffman_data_model(); - static_huffman_data_model(const static_huffman_data_model& other); - ~static_huffman_data_model(); + cDXT1BytesPerBlock = 8U, + cDXT5NBytesPerBlock = 16U, - static_huffman_data_model& operator=(const static_huffman_data_model& rhs); + cDXT1SelectorBits = 2U, + cDXT1SelectorValues = 1U << cDXT1SelectorBits, + cDXT1SelectorMask = cDXT1SelectorValues - 1U, - bool init(uint32 total_syms, const uint8* pCode_sizes, uint32 code_size_limit); - void clear(); + cDXT5SelectorBits = 3U, + cDXT5SelectorValues = 1U << cDXT5SelectorBits, + cDXT5SelectorMask = cDXT5SelectorValues - 1U + }; - inline bool is_valid() const { return m_pDecode_tables != NULL; } + const float cDXT1MaxLinearValue = 3.0f; + const float cDXT1InvMaxLinearValue = 1.0f / 3.0f; - inline uint32 get_total_syms() const { return m_total_syms; } + const float cDXT5MaxLinearValue = 7.0f; + const float cDXT5InvMaxLinearValue = 1.0f / 7.0f; - inline uint32 get_code_size(uint32 sym) const { return m_code_sizes[sym]; } + // Converts DXT1 raw color selector index to a linear value. + extern const uint8 g_dxt1_to_linear[cDXT1SelectorValues]; - inline const uint8* get_code_sizes() const { return m_code_sizes.empty() ? NULL : &m_code_sizes[0]; } + // Converts DXT5 raw alpha selector index to a linear value. + extern const uint8 g_dxt5_to_linear[cDXT5SelectorValues]; - public: - uint32 m_total_syms; - crnd::vector m_code_sizes; - prefix_coding::decoder_tables* m_pDecode_tables; + // Converts DXT1 linear color selector index to a raw value (inverse of g_dxt1_to_linear). + extern const uint8 g_dxt1_from_linear[cDXT1SelectorValues]; - private: - bool prepare_decoder_tables(); - uint compute_decoder_table_bits() const; + // Converts DXT5 linear alpha selector index to a raw value (inverse of g_dxt5_to_linear). + extern const uint8 g_dxt5_from_linear[cDXT5SelectorValues]; - friend class symbol_codec; -}; + extern const uint8 g_six_alpha_invert_table[cDXT5SelectorValues]; + extern const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues]; -class symbol_codec { - public: - symbol_codec(); + struct dxt1_block + { + uint8 m_low_color[2]; + uint8 m_high_color[2]; - bool start_decoding(const uint8* pBuf, uint32 buf_size); - bool decode_receive_static_data_model(static_huffman_data_model& model); + enum { cNumSelectorBytes = 4 }; - uint32 decode_bits(uint32 num_bits); - uint32 decode(const static_huffman_data_model& model); + uint8 m_selectors[cNumSelectorBytes]; - uint64 stop_decoding(); + inline void clear() + { + utils::zero_this(this); + } - public: - const uint8* m_pDecode_buf; - const uint8* m_pDecode_buf_next; - const uint8* m_pDecode_buf_end; - uint32 m_decode_buf_size; + // These methods assume the in-memory rep is in LE byte order. + inline uint32 get_low_color() const + { + return m_low_color[0] | (m_low_color[1] << 8U); + } - typedef uint32 bit_buf_type; - enum { cBitBufSize = 32U }; - bit_buf_type m_bit_buf; + inline uint32 get_high_color() const + { + return m_high_color[0] | (m_high_color[1] << 8U); + } - int m_bit_count; + inline void set_low_color(uint16 c) + { + m_low_color[0] = static_cast(c & 0xFF); + m_low_color[1] = static_cast((c >> 8) & 0xFF); + } - private: - void get_bits_init(); - uint32 get_bits(uint32 num_bits); -}; + inline void set_high_color(uint16 c) + { + m_high_color[0] = static_cast(c & 0xFF); + m_high_color[1] = static_cast((c >> 8) & 0xFF); + } -} // namespace crnd + inline uint32 get_selector(uint32 x, uint32 y) const + { + CRND_ASSERT((x < 4U) && (y < 4U)); + return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; + } -namespace crnd { -void crnd_assert(const char* pExp, const char* pFile, unsigned line) { - char buf[512]; + inline void set_selector(uint32 x, uint32 y, uint32 val) + { + CRND_ASSERT((x < 4U) && (y < 4U) && (val < 4U)); -#if defined(WIN32) && defined(_MSC_VER) - sprintf_s(buf, sizeof(buf), "%s(%u): Assertion failure: \"%s\"\n", pFile, line, pExp); -#else - sprintf(buf, "%s(%u): Assertion failure: \"%s\"\n", pFile, line, pExp); -#endif + m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); + m_selectors[y] |= (val << (x * cDXT1SelectorBits)); + } - crnd_output_debug_string(buf); + static uint16 pack_color(const color_quad_u8& color, bool scaled, uint32 bias = 127U); + static uint16 pack_color(uint32 r, uint32 g, uint32 b, bool scaled, uint32 bias = 127U); - puts(buf); + static color_quad_u8 unpack_color(uint16 packed_color, bool scaled, uint32 alpha = 255U); + static void unpack_color(uint32& r, uint32& g, uint32& b, uint16 packed_color, bool scaled); - if (crnd_is_debugger_present()) - crnd_debug_break(); -} + static uint32 get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1); + static uint32 get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1); + // pDst must point to an array at least cDXT1SelectorValues long. + static uint32 get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1); -void crnd_trace(const char* pFmt, va_list args) { - if (crnd_is_debugger_present()) { - char buf[512]; -#if defined(WIN32) && defined(_MSC_VER) - vsprintf_s(buf, sizeof(buf), pFmt, args); -#else - vsprintf(buf, pFmt, args); -#endif + static color_quad_u8 unpack_endpoint(uint32 endpoints, uint32 index, bool scaled, uint32 alpha = 255U); + static uint32 pack_endpoints(uint32 lo, uint32 hi); + }; - crnd_output_debug_string(buf); - } -}; + CRND_DEFINE_BITWISE_MOVABLE(dxt1_block); -void crnd_trace(const char* pFmt, ...) { - va_list args; - va_start(args, pFmt); - crnd_trace(pFmt, args); - va_end(args); -}; + struct dxt3_block + { + enum { cNumAlphaBytes = 8 }; -} // namespace crnd + uint8 m_alpha[cNumAlphaBytes]; -// File: checksum.cpp -// From the public domain stb.h header. -namespace crnd { -uint16 crc16(const void* pBuf, uint32 len, uint16 crc) { - crc = ~crc; - - const uint8* p = reinterpret_cast(pBuf); - while (len) { - const uint16 q = *p++ ^ (crc >> 8U); - crc <<= 8U; - - uint16 r = (q >> 4U) ^ q; - crc ^= r; - r <<= 5U; - crc ^= r; - r <<= 7U; - crc ^= r; - - len--; - } - - return static_cast(~crc); -} + void set_alpha(uint32 x, uint32 y, uint32 value, bool scaled); + uint32 get_alpha(uint32 x, uint32 y, bool scaled) const; + }; -} // namespace crnd + CRND_DEFINE_BITWISE_MOVABLE(dxt3_block); -// File: crnd_vector.cpp -namespace crnd { -bool elemental_vector::increase_capacity(uint32 min_new_capacity, bool grow_hint, uint32 element_size, object_mover pMover) { - CRND_ASSERT(m_size <= m_capacity); - CRND_ASSERT(min_new_capacity < (0x7FFF0000U / element_size)); - - if (m_capacity >= min_new_capacity) - return true; - - uint32 new_capacity = min_new_capacity; - if ((grow_hint) && (!math::is_power_of_2(new_capacity))) - new_capacity = math::next_pow2(new_capacity); - - CRND_ASSERT(new_capacity && (new_capacity > m_capacity)); - - const uint32 desired_size = element_size * new_capacity; - size_t actual_size; - if (!pMover) { - void* new_p = crnd_realloc(m_p, desired_size, &actual_size, true); - if (!new_p) - return false; - m_p = new_p; - } else { - void* new_p = crnd_malloc(desired_size, &actual_size); - if (!new_p) - return false; - - (*pMover)(new_p, m_p, m_size); - - if (m_p) - crnd_free(m_p); - - m_p = new_p; - } - - if (actual_size > desired_size) - m_capacity = static_cast(actual_size / element_size); - else - m_capacity = new_capacity; - - return true; -} + struct dxt5_block + { + uint8 m_endpoints[2]; -} // namespace crnd + enum { cNumSelectorBytes = 6 }; -// File: crnd_utils.cpp -namespace crnd { -namespace utils { -uint32 compute_max_mips(uint32 width, uint32 height) { - if ((width | height) == 0) - return 0; + uint8 m_selectors[cNumSelectorBytes]; - uint32 num_mips = 1; + inline void clear() + { + utils::zero_this(this); + } - while ((width > 1U) || (height > 1U)) { - width >>= 1U; - height >>= 1U; - num_mips++; - } + inline uint32 get_low_alpha() const + { + return m_endpoints[0]; + } - return num_mips; -} + inline uint32 get_high_alpha() const + { + return m_endpoints[1]; + } -} // namespace utils + inline void set_low_alpha(uint32 i) + { + CRND_ASSERT(i <= cUINT8_MAX); + m_endpoints[0] = static_cast(i); + } -} // namespace crnd + inline void set_high_alpha(uint32 i) + { + CRND_ASSERT(i <= cUINT8_MAX); + m_endpoints[1] = static_cast(i); + } -// File: crnd_prefix_coding.cpp -namespace crnd { -namespace prefix_coding { -bool decoder_tables::init(uint32 num_syms, const uint8* pCodesizes, uint32 table_bits) { - uint32 min_codes[cMaxExpectedCodeSize]; - if ((!num_syms) || (table_bits > cMaxTableBits)) - return false; + uint32 get_endpoints_as_word() const + { + return m_endpoints[0] | (m_endpoints[1] << 8); + } - m_num_syms = num_syms; + uint32 get_selectors_as_word(uint32 index) + { + CRND_ASSERT(index < 3); + return m_selectors[index * 2] | (m_selectors[index * 2 + 1] << 8); + } - uint32 num_codes[cMaxExpectedCodeSize + 1]; - utils::zero_object(num_codes); + inline uint32 get_selector(uint32 x, uint32 y) const + { + CRND_ASSERT((x < 4U) && (y < 4U)); - for (uint32 i = 0; i < num_syms; i++) { - uint32 c = pCodesizes[i]; - if (c) - num_codes[c]++; - } + uint32 selector_index = (y * 4) + x; + uint32 bit_index = selector_index * cDXT5SelectorBits; - uint32 sorted_positions[cMaxExpectedCodeSize + 1]; + uint32 byte_index = bit_index >> 3; + uint32 bit_ofs = bit_index & 7; - uint32 cur_code = 0; + uint32 v = m_selectors[byte_index]; + if (byte_index < (cNumSelectorBytes - 1)) + v |= (m_selectors[byte_index + 1] << 8); - uint32 total_used_syms = 0; - uint32 max_code_size = 0; - uint32 min_code_size = cUINT32_MAX; - for (uint32 i = 1; i <= cMaxExpectedCodeSize; i++) { - const uint32 n = num_codes[i]; + return (v >> bit_ofs) & 7; + } - if (!n) - m_max_codes[i - 1] = 0; //UINT_MAX; - else { - min_code_size = math::minimum(min_code_size, i); - max_code_size = math::maximum(max_code_size, i); + inline void set_selector(uint32 x, uint32 y, uint32 val) + { + CRND_ASSERT((x < 4U) && (y < 4U) && (val < 8U)); - min_codes[i - 1] = cur_code; + uint32 selector_index = (y * 4) + x; + uint32 bit_index = selector_index * cDXT5SelectorBits; - m_max_codes[i - 1] = cur_code + n - 1; - m_max_codes[i - 1] = 1 + ((m_max_codes[i - 1] << (16 - i)) | ((1 << (16 - i)) - 1)); + uint32 byte_index = bit_index >> 3; + uint32 bit_ofs = bit_index & 7; - m_val_ptrs[i - 1] = total_used_syms; + uint32 v = m_selectors[byte_index]; + if (byte_index < (cNumSelectorBytes - 1)) + v |= (m_selectors[byte_index + 1] << 8); - sorted_positions[i] = total_used_syms; + v &= (~(7 << bit_ofs)); + v |= (val << bit_ofs); - cur_code += n; - total_used_syms += n; - } + m_selectors[byte_index] = static_cast(v); + if (byte_index < (cNumSelectorBytes - 1)) + m_selectors[byte_index + 1] = static_cast(v >> 8); + } - cur_code <<= 1; - } + // Results written to alpha channel. + static uint32 get_block_values6(color_quad_u8* pDst, uint32 l, uint32 h); + static uint32 get_block_values8(color_quad_u8* pDst, uint32 l, uint32 h); + static uint32 get_block_values(color_quad_u8* pDst, uint32 l, uint32 h); - m_total_used_syms = total_used_syms; + static uint32 get_block_values6(uint32* pDst, uint32 l, uint32 h); + static uint32 get_block_values8(uint32* pDst, uint32 l, uint32 h); + // pDst must point to an array at least cDXT5SelectorValues long. + static uint32 get_block_values(uint32* pDst, uint32 l, uint32 h); - if (total_used_syms > m_cur_sorted_symbol_order_size) { - m_cur_sorted_symbol_order_size = total_used_syms; + static uint32 unpack_endpoint(uint32 packed, uint32 index); + static uint32 pack_endpoints(uint32 lo, uint32 hi); + }; - if (!math::is_power_of_2(total_used_syms)) - m_cur_sorted_symbol_order_size = math::minimum(num_syms, math::next_pow2(total_used_syms)); + CRND_DEFINE_BITWISE_MOVABLE(dxt5_block); +} // namespace crnd - if (m_sorted_symbol_order) - crnd_delete_array(m_sorted_symbol_order); +// File: crnd_prefix_coding.h +#ifdef _XBOX +#define CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE 1 +#else +#define CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE 0 +#endif - m_sorted_symbol_order = crnd_new_array(m_cur_sorted_symbol_order_size); - if (!m_sorted_symbol_order) - return false; - } +namespace crnd +{ + namespace prefix_coding + { + const uint32 cMaxExpectedCodeSize = 16; + const uint32 cMaxSupportedSyms = 8192; + const uint32 cMaxTableBits = 11; + + class decoder_tables + { + public: + inline decoder_tables() : + m_cur_lookup_size(0), + m_lookup(NULL), + m_cur_sorted_symbol_order_size(0), + m_sorted_symbol_order(NULL) + { + } - m_min_code_size = static_cast(min_code_size); - m_max_code_size = static_cast(max_code_size); + inline decoder_tables(const decoder_tables& other) : + m_cur_lookup_size(0), + m_lookup(NULL), + m_cur_sorted_symbol_order_size(0), + m_sorted_symbol_order(NULL) + { + *this = other; + } - for (uint32 i = 0; i < num_syms; i++) { - uint32 c = pCodesizes[i]; - if (c) { - CRND_ASSERT(num_codes[c]); + decoder_tables& operator=(const decoder_tables& other) + { + if (this == &other) + return *this; - uint32 sorted_pos = sorted_positions[c]++; + clear(); - CRND_ASSERT(sorted_pos < total_used_syms); + memcpy(this, &other, sizeof(*this)); - m_sorted_symbol_order[sorted_pos] = static_cast(i); - } - } + if (other.m_lookup) + { + m_lookup = crnd_new_array(m_cur_lookup_size); + if (m_lookup) + memcpy(m_lookup, other.m_lookup, sizeof(m_lookup[0]) * m_cur_lookup_size); + } - if (table_bits <= m_min_code_size) - table_bits = 0; - m_table_bits = table_bits; + if (other.m_sorted_symbol_order) + { + m_sorted_symbol_order = crnd_new_array(m_cur_sorted_symbol_order_size); + if (m_sorted_symbol_order) + memcpy(m_sorted_symbol_order, other.m_sorted_symbol_order, sizeof(m_sorted_symbol_order[0]) * m_cur_sorted_symbol_order_size); + } - if (table_bits) { - uint32 table_size = 1 << table_bits; - if (table_size > m_cur_lookup_size) { - m_cur_lookup_size = table_size; + return *this; + } - if (m_lookup) - crnd_delete_array(m_lookup); + inline void clear() + { + if (m_lookup) + { + crnd_delete_array(m_lookup); + m_lookup = 0; + m_cur_lookup_size = 0; + } + + if (m_sorted_symbol_order) + { + crnd_delete_array(m_sorted_symbol_order); + m_sorted_symbol_order = NULL; + m_cur_sorted_symbol_order_size = 0; + } + } - m_lookup = crnd_new_array(table_size); - if (!m_lookup) - return false; - } + inline ~decoder_tables() + { + if (m_lookup) + crnd_delete_array(m_lookup); + + if (m_sorted_symbol_order) + crnd_delete_array(m_sorted_symbol_order); + } + + bool init(uint32 num_syms, const uint8* pCodesizes, uint32 table_bits); + + // DO NOT use any complex classes here - it is bitwise copied. - memset(m_lookup, 0xFF, (uint)sizeof(m_lookup[0]) * (1UL << table_bits)); + uint32 m_num_syms; + uint32 m_total_used_syms; + uint32 m_table_bits; + uint32 m_table_shift; + uint32 m_table_max_code; + uint32 m_decode_start_code_size; - for (uint32 codesize = 1; codesize <= table_bits; codesize++) { - if (!num_codes[codesize]) - continue; + uint8 m_min_code_size; + uint8 m_max_code_size; - const uint32 fillsize = table_bits - codesize; - const uint32 fillnum = 1 << fillsize; + uint32 m_max_codes[cMaxExpectedCodeSize + 1]; + int32 m_val_ptrs[cMaxExpectedCodeSize + 1]; - const uint32 min_code = min_codes[codesize - 1]; - const uint32 max_code = get_unshifted_max_code(codesize); - const uint32 val_ptr = m_val_ptrs[codesize - 1]; + uint32 m_cur_lookup_size; + uint32* m_lookup; - for (uint32 code = min_code; code <= max_code; code++) { - const uint32 sym_index = m_sorted_symbol_order[val_ptr + code - min_code]; - CRND_ASSERT(pCodesizes[sym_index] == codesize); + uint32 m_cur_sorted_symbol_order_size; + uint16* m_sorted_symbol_order; - for (uint32 j = 0; j < fillnum; j++) { - const uint32 t = j + (code << fillsize); + inline uint32 get_unshifted_max_code(uint32 len) const + { + CRND_ASSERT((len >= 1) && (len <= cMaxExpectedCodeSize)); + uint32 k = m_max_codes[len - 1]; + if (!k) + return crnd::cUINT32_MAX; + return (k - 1) >> (16 - len); + } + }; + } // namespace prefix_coding +} // namespace crnd + +// File: crnd_symbol_codec.h +namespace crnd +{ + class static_huffman_data_model + { + public: + static_huffman_data_model(); + static_huffman_data_model(const static_huffman_data_model& other); + ~static_huffman_data_model(); - CRND_ASSERT(t < (1U << table_bits)); + static_huffman_data_model& operator=(const static_huffman_data_model& rhs); - CRND_ASSERT(m_lookup[t] == cUINT32_MAX); + bool init(uint32 total_syms, const uint8* pCode_sizes, uint32 code_size_limit); + void clear(); - m_lookup[t] = sym_index | (codesize << 16U); + inline bool is_valid() const + { + return m_pDecode_tables != NULL; } - } - } - } - for (uint32 i = 0; i < cMaxExpectedCodeSize; i++) - m_val_ptrs[i] -= min_codes[i]; + inline uint32 get_total_syms() const + { + return m_total_syms; + } - m_table_max_code = 0; - m_decode_start_code_size = m_min_code_size; + inline uint32 get_code_size(uint32 sym) const + { + return m_code_sizes[sym]; + } - if (table_bits) { - uint32 i; - for (i = table_bits; i >= 1; i--) { - if (num_codes[i]) { - m_table_max_code = m_max_codes[i - 1]; - break; - } - } - if (i >= 1) { - m_decode_start_code_size = table_bits + 1; - for (uint32 j = table_bits + 1; j <= max_code_size; j++) { - if (num_codes[j]) { - m_decode_start_code_size = j; - break; + inline const uint8* get_code_sizes() const + { + return m_code_sizes.empty() ? NULL : &m_code_sizes[0]; } - } - } - } - // sentinels - m_max_codes[cMaxExpectedCodeSize] = cUINT32_MAX; - m_val_ptrs[cMaxExpectedCodeSize] = 0xFFFFF; + public: + uint32 m_total_syms; + crnd::vector m_code_sizes; + prefix_coding::decoder_tables* m_pDecode_tables; - m_table_shift = 32 - m_table_bits; - return true; -} + private: + bool prepare_decoder_tables(); + uint compute_decoder_table_bits() const; -} // namespace prefix_codig + friend class symbol_codec; + }; -} // namespace crnd + class symbol_codec + { + public: + symbol_codec(); -// File: crnd_platform.cpp -namespace crnd { -bool crnd_is_debugger_present() { -#ifdef CRND_DEVEL - return IsDebuggerPresent() != 0; -#else - return false; -#endif -} + bool start_decoding(const uint8* pBuf, uint32 buf_size); + bool decode_receive_static_data_model(static_huffman_data_model& model); -void crnd_debug_break() { -#ifdef CRND_DEVEL - DebugBreak(); -#endif -} + uint32 decode_bits(uint32 num_bits); + uint32 decode(const static_huffman_data_model& model); -void crnd_output_debug_string(const char* p) { - (void)p; -#ifdef CRND_DEVEL - OutputDebugStringA(p); -#endif -} + uint64 stop_decoding(); -} // namespace crnd + public: + const uint8* m_pDecode_buf; + const uint8* m_pDecode_buf_next; + const uint8* m_pDecode_buf_end; + uint32 m_decode_buf_size; -// File: crnd_mem.cpp -namespace crnd { -const uint32 MAX_POSSIBLE_BLOCK_SIZE = 0x7FFF0000U; + typedef uint32 bit_buf_type; -static void* crnd_default_realloc(void* p, size_t size, size_t* pActual_size, bool movable, void*) { - void* p_new; + enum { cBitBufSize = 32U }; - if (!p) { - p_new = ::malloc(size); + bit_buf_type m_bit_buf; - if (pActual_size) { -#ifdef WIN32 - *pActual_size = p_new ? ::_msize(p_new) : 0; -#elif defined(__APPLE__) - *pActual_size = p_new ? malloc_size(p_new) : 0; -#else - *pActual_size = p_new ? malloc_usable_size(p_new) : 0; -#endif - } - } else if (!size) { - ::free(p); - p_new = NULL; + int m_bit_count; - if (pActual_size) - *pActual_size = 0; - } else { - void* p_final_block = p; -#ifdef WIN32 - p_new = ::_expand(p, size); + private: + void get_bits_init(); + uint32 get_bits(uint32 num_bits); + }; +} // namespace crnd + +namespace crnd +{ + void crnd_assert(const char* pExp, const char* pFile, unsigned line) + { + char buf[512]; + +#if defined(WIN32) && defined(_MSC_VER) + sprintf_s(buf, sizeof(buf), "%s(%u): Assertion failure: \"%s\"\n", pFile, line, pExp); #else - p_new = NULL; + sprintf(buf, "%s(%u): Assertion failure: \"%s\"\n", pFile, line, pExp); #endif - if (p_new) - p_final_block = p_new; - else if (movable) { - p_new = ::realloc(p, size); + crnd_output_debug_string(buf); - if (p_new) - p_final_block = p_new; - } + puts(buf); - if (pActual_size) { -#ifdef WIN32 - *pActual_size = ::_msize(p_final_block); -#elif defined(__APPLE__) - *pActual_size = ::malloc_size(p_final_block); -#else - *pActual_size = ::malloc_usable_size(p_final_block); -#endif + if (crnd_is_debugger_present()) + crnd_debug_break(); } - } - - return p_new; -} -static size_t crnd_default_msize(void* p, void* pUser_data) { - (void)pUser_data; -#ifdef WIN32 - return p ? _msize(p) : 0; -#elif defined(__APPLE__) - return p ? malloc_size(p) : 0; + void crnd_trace(const char* pFmt, va_list args) + { + if (crnd_is_debugger_present()) + { + char buf[512]; +#if defined(WIN32) && defined(_MSC_VER) + vsprintf_s(buf, sizeof(buf), pFmt, args); #else - return p ? malloc_usable_size(p) : 0; + vsprintf(buf, pFmt, args); #endif -} -static crnd_realloc_func g_pRealloc = crnd_default_realloc; -static crnd_msize_func g_pMSize = crnd_default_msize; -static void* g_pUser_data; - -void crnd_set_memory_callbacks(crnd_realloc_func pRealloc, crnd_msize_func pMSize, void* pUser_data) { - if ((!pRealloc) || (!pMSize)) { - g_pRealloc = crnd_default_realloc; - g_pMSize = crnd_default_msize; - g_pUser_data = NULL; - } else { - g_pRealloc = pRealloc; - g_pMSize = pMSize; - g_pUser_data = pUser_data; - } -} + crnd_output_debug_string(buf); + } + }; -static inline void crnd_mem_error(const char* p_msg) { - crnd_assert(p_msg, __FILE__, __LINE__); -} + void crnd_trace(const char* pFmt, ...) + { + va_list args; + va_start(args, pFmt); + crnd_trace(pFmt, args); + va_end(args); + }; +} // namespace crnd -void* crnd_malloc(size_t size, size_t* pActual_size) { - size = (size + sizeof(uint32) - 1U) & ~(sizeof(uint32) - 1U); - if (!size) - size = sizeof(uint32); +// File: checksum.cpp +// From the public domain stb.h header. +namespace crnd +{ + uint16 crc16(const void* pBuf, uint32 len, uint16 crc) + { + crc = ~crc; + + const uint8* p = reinterpret_cast(pBuf); + while (len) + { + const uint16 q = *p++ ^ (crc >> 8U); + crc <<= 8U; + + uint16 r = (q >> 4U) ^ q; + crc ^= r; + r <<= 5U; + crc ^= r; + r <<= 7U; + crc ^= r; + + len--; + } - if (size > MAX_POSSIBLE_BLOCK_SIZE) { - crnd_mem_error("crnd_malloc: size too big"); - return NULL; - } + return static_cast(~crc); + } +} // namespace crnd - size_t actual_size = size; - uint8* p_new = static_cast((*g_pRealloc)(NULL, size, &actual_size, true, g_pUser_data)); +// File: crnd_vector.cpp +namespace crnd +{ + bool elemental_vector::increase_capacity(uint32 min_new_capacity, bool grow_hint, uint32 element_size, object_mover pMover) + { + CRND_ASSERT(m_size <= m_capacity); + CRND_ASSERT(min_new_capacity < (0x7FFF0000U / element_size)); + + if (m_capacity >= min_new_capacity) + return true; + + uint32 new_capacity = min_new_capacity; + if ((grow_hint) && (!math::is_power_of_2(new_capacity))) + new_capacity = math::next_pow2(new_capacity); + + CRND_ASSERT(new_capacity && (new_capacity > m_capacity)); + + const uint32 desired_size = element_size * new_capacity; + size_t actual_size; + if (!pMover) + { + void* new_p = crnd_realloc(m_p, desired_size, &actual_size, true); + if (!new_p) + return false; + m_p = new_p; + } + else + { + void* new_p = crnd_malloc(desired_size, &actual_size); + if (!new_p) + return false; - if (pActual_size) - *pActual_size = actual_size; + (*pMover)(new_p, m_p, m_size); - if ((!p_new) || (actual_size < size)) { - crnd_mem_error("crnd_malloc: out of memory"); - return NULL; - } + if (m_p) + crnd_free(m_p); - CRND_ASSERT(((uint32) reinterpret_cast(p_new) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) == 0); + m_p = new_p; + } - return p_new; -} + if (actual_size > desired_size) + m_capacity = static_cast(actual_size / element_size); + else + m_capacity = new_capacity; -void* crnd_realloc(void* p, size_t size, size_t* pActual_size, bool movable) { - if ((uint32) reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) { - crnd_mem_error("crnd_realloc: bad ptr"); - return NULL; - } + return true; + } +} // namespace crnd - if (size > MAX_POSSIBLE_BLOCK_SIZE) { - crnd_mem_error("crnd_malloc: size too big"); - return NULL; - } +// File: crnd_utils.cpp +namespace crnd +{ + namespace utils + { + uint32 compute_max_mips(uint32 width, uint32 height) + { + if ((width | height) == 0) + return 0; + + uint32 num_mips = 1; + + while ((width > 1U) || (height > 1U)) + { + width >>= 1U; + height >>= 1U; + num_mips++; + } - size_t actual_size = size; - void* p_new = (*g_pRealloc)(p, size, &actual_size, movable, g_pUser_data); + return num_mips; + } + } // namespace utils +} // namespace crnd - if (pActual_size) - *pActual_size = actual_size; +// File: crnd_prefix_coding.cpp +namespace crnd +{ + namespace prefix_coding + { + bool decoder_tables::init(uint32 num_syms, const uint8* pCodesizes, uint32 table_bits) + { + uint32 min_codes[cMaxExpectedCodeSize]; + if ((!num_syms) || (table_bits > cMaxTableBits)) + return false; + + m_num_syms = num_syms; + + uint32 num_codes[cMaxExpectedCodeSize + 1]; + utils::zero_object(num_codes); + + for (uint32 i = 0; i < num_syms; i++) + { + uint32 c = pCodesizes[i]; + if (c) + num_codes[c]++; + } - CRND_ASSERT(((uint32) reinterpret_cast(p_new) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) == 0); + uint32 sorted_positions[cMaxExpectedCodeSize + 1]; - return p_new; -} + uint32 cur_code = 0; -void crnd_free(void* p) { - if (!p) - return; + uint32 total_used_syms = 0; + uint32 max_code_size = 0; + uint32 min_code_size = cUINT32_MAX; + for (uint32 i = 1; i <= cMaxExpectedCodeSize; i++) + { + const uint32 n = num_codes[i]; - if ((uint32) reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) { - crnd_mem_error("crnd_free: bad ptr"); - return; - } + if (!n) + m_max_codes[i - 1] = 0; //UINT_MAX; + else + { + min_code_size = math::minimum(min_code_size, i); + max_code_size = math::maximum(max_code_size, i); - (*g_pRealloc)(p, 0, NULL, true, g_pUser_data); -} + min_codes[i - 1] = cur_code; -size_t crnd_msize(void* p) { - if (!p) - return 0; + m_max_codes[i - 1] = cur_code + n - 1; + m_max_codes[i - 1] = 1 + ((m_max_codes[i - 1] << (16 - i)) | ((1 << (16 - i)) - 1)); - if ((uint32) reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) { - crnd_mem_error("crnd_msize: bad ptr"); - return 0; - } + m_val_ptrs[i - 1] = total_used_syms; - return (*g_pMSize)(p, g_pUser_data); -} + sorted_positions[i] = total_used_syms; -} // namespace crnd + cur_code += n; + total_used_syms += n; + } -// File: crnd_math.cpp -namespace crnd { -namespace math { -uint32 g_bitmasks[32] = - { - 1U << 0U, 1U << 1U, 1U << 2U, 1U << 3U, - 1U << 4U, 1U << 5U, 1U << 6U, 1U << 7U, - 1U << 8U, 1U << 9U, 1U << 10U, 1U << 11U, - 1U << 12U, 1U << 13U, 1U << 14U, 1U << 15U, - 1U << 16U, 1U << 17U, 1U << 18U, 1U << 19U, - 1U << 20U, 1U << 21U, 1U << 22U, 1U << 23U, - 1U << 24U, 1U << 25U, 1U << 26U, 1U << 27U, - 1U << 28U, 1U << 29U, 1U << 30U, 1U << 31U}; - -} // namespace math -} // namespace crnd + cur_code <<= 1; + } -// File: crnd_info.cpp -namespace crnd { -#define CRND_FOURCC(a, b, c, d) ((a) | ((b) << 8U) | ((c) << 16U) | ((d) << 24U)) + m_total_used_syms = total_used_syms; -uint32 crnd_crn_format_to_fourcc(crn_format fmt) { - switch (fmt) { - case cCRNFmtDXT1: - return CRND_FOURCC('D', 'X', 'T', '1'); - case cCRNFmtDXT3: - return CRND_FOURCC('D', 'X', 'T', '3'); - case cCRNFmtDXT5: - return CRND_FOURCC('D', 'X', 'T', '5'); - case cCRNFmtDXN_XY: - return CRND_FOURCC('A', '2', 'X', 'Y'); - case cCRNFmtDXN_YX: - return CRND_FOURCC('A', 'T', 'I', '2'); - case cCRNFmtDXT5A: - return CRND_FOURCC('A', 'T', 'I', '1'); - case cCRNFmtDXT5_CCxY: - return CRND_FOURCC('C', 'C', 'x', 'Y'); - case cCRNFmtDXT5_xGxR: - return CRND_FOURCC('x', 'G', 'x', 'R'); - case cCRNFmtDXT5_xGBR: - return CRND_FOURCC('x', 'G', 'B', 'R'); - case cCRNFmtDXT5_AGBR: - return CRND_FOURCC('A', 'G', 'B', 'R'); - case cCRNFmtETC1: - return CRND_FOURCC('E', 'T', 'C', '1'); - case cCRNFmtETC2: - return CRND_FOURCC('E', 'T', 'C', '2'); - case cCRNFmtETC2A: - return CRND_FOURCC('E', 'T', '2', 'A'); - case cCRNFmtETC1S: - return CRND_FOURCC('E', 'T', '1', 'S'); - case cCRNFmtETC2AS: - return CRND_FOURCC('E', '2', 'A', 'S'); - default: - break; - } - CRND_ASSERT(false); - return 0; -} + if (total_used_syms > m_cur_sorted_symbol_order_size) + { + m_cur_sorted_symbol_order_size = total_used_syms; -crn_format crnd_get_fundamental_dxt_format(crn_format fmt) { - switch (fmt) { - case cCRNFmtDXT5_CCxY: - case cCRNFmtDXT5_xGxR: - case cCRNFmtDXT5_xGBR: - case cCRNFmtDXT5_AGBR: - return cCRNFmtDXT5; - default: - break; - } - return fmt; -} + if (!math::is_power_of_2(total_used_syms)) + m_cur_sorted_symbol_order_size = math::minimum(num_syms, math::next_pow2(total_used_syms)); -uint32 crnd_get_crn_format_bits_per_texel(crn_format fmt) { - switch (fmt) { - case cCRNFmtDXT1: - case cCRNFmtDXT5A: - case cCRNFmtETC1: - case cCRNFmtETC2: - case cCRNFmtETC1S: - return 4; - case cCRNFmtDXT3: - case cCRNFmtDXT5: - case cCRNFmtDXN_XY: - case cCRNFmtDXN_YX: - case cCRNFmtDXT5_CCxY: - case cCRNFmtDXT5_xGxR: - case cCRNFmtDXT5_xGBR: - case cCRNFmtDXT5_AGBR: - case cCRNFmtETC2A: - case cCRNFmtETC2AS: - return 8; - default: - break; - } - CRND_ASSERT(false); - return 0; -} + if (m_sorted_symbol_order) + crnd_delete_array(m_sorted_symbol_order); -uint32 crnd_get_bytes_per_dxt_block(crn_format fmt) { - return (crnd_get_crn_format_bits_per_texel(fmt) << 4) >> 3; -} + m_sorted_symbol_order = crnd_new_array(m_cur_sorted_symbol_order_size); + if (!m_sorted_symbol_order) + return false; + } -// TODO: tmp_header isn't used/This function is a helper to support old headers. -const crn_header* crnd_get_header(const void* pData, uint32 data_size) { - if ((!pData) || (data_size < sizeof(crn_header))) - return NULL; + m_min_code_size = static_cast(min_code_size); + m_max_code_size = static_cast(max_code_size); - const crn_header& file_header = *static_cast(pData); - if (file_header.m_sig != crn_header::cCRNSigValue) - return NULL; + for (uint32 i = 0; i < num_syms; i++) + { + uint32 c = pCodesizes[i]; + if (c) + { + CRND_ASSERT(num_codes[c]); - if ((file_header.m_header_size < sizeof(crn_header)) || (data_size < file_header.m_data_size)) - return NULL; + uint32 sorted_pos = sorted_positions[c]++; - return &file_header; -} + CRND_ASSERT(sorted_pos < total_used_syms); -bool crnd_validate_file(const void* pData, uint32 data_size, crn_file_info* pFile_info) { - if (pFile_info) { - if (pFile_info->m_struct_size != sizeof(crn_file_info)) - return false; + m_sorted_symbol_order[sorted_pos] = static_cast(i); + } + } - memset(&pFile_info->m_struct_size + 1, 0, sizeof(crn_file_info) - sizeof(pFile_info->m_struct_size)); - } + if (table_bits <= m_min_code_size) + table_bits = 0; + m_table_bits = table_bits; - if ((!pData) || (data_size < cCRNHeaderMinSize)) - return false; + if (table_bits) + { + uint32 table_size = 1 << table_bits; + if (table_size > m_cur_lookup_size) + { + m_cur_lookup_size = table_size; - const crn_header* pHeader = crnd_get_header(pData, data_size); - if (!pHeader) - return false; + if (m_lookup) + crnd_delete_array(m_lookup); - const uint32 header_crc = crc16(&pHeader->m_data_size, (uint32)(pHeader->m_header_size - ((const uint8*)&pHeader->m_data_size - (const uint8*)pHeader))); - if (header_crc != pHeader->m_header_crc16) - return false; + m_lookup = crnd_new_array(table_size); + if (!m_lookup) + return false; + } - const uint32 data_crc = crc16((const uint8*)pData + pHeader->m_header_size, pHeader->m_data_size - pHeader->m_header_size); - if (data_crc != pHeader->m_data_crc16) - return false; + memset(m_lookup, 0xFF, (uint)sizeof(m_lookup[0]) * (1UL << table_bits)); - if ((pHeader->m_faces != 1) && (pHeader->m_faces != 6)) - return false; - if ((pHeader->m_width < 1) || (pHeader->m_width > cCRNMaxLevelResolution)) - return false; - if ((pHeader->m_height < 1) || (pHeader->m_height > cCRNMaxLevelResolution)) - return false; - if ((pHeader->m_levels < 1) || (pHeader->m_levels > utils::compute_max_mips(pHeader->m_width, pHeader->m_height))) - return false; - if (((int)pHeader->m_format < cCRNFmtDXT1) || ((int)pHeader->m_format >= cCRNFmtTotal)) - return false; + for (uint32 codesize = 1; codesize <= table_bits; codesize++) + { + if (!num_codes[codesize]) + continue; - if (pFile_info) { - pFile_info->m_actual_data_size = pHeader->m_data_size; - pFile_info->m_header_size = pHeader->m_header_size; - pFile_info->m_total_palette_size = pHeader->m_color_endpoints.m_size + pHeader->m_color_selectors.m_size + pHeader->m_alpha_endpoints.m_size + pHeader->m_alpha_selectors.m_size; - pFile_info->m_tables_size = pHeader->m_tables_size; + const uint32 fillsize = table_bits - codesize; + const uint32 fillnum = 1 << fillsize; - pFile_info->m_levels = pHeader->m_levels; + const uint32 min_code = min_codes[codesize - 1]; + const uint32 max_code = get_unshifted_max_code(codesize); + const uint32 val_ptr = m_val_ptrs[codesize - 1]; - for (uint32 i = 0; i < pHeader->m_levels; i++) { - uint32 next_ofs = pHeader->m_data_size; + for (uint32 code = min_code; code <= max_code; code++) + { + const uint32 sym_index = m_sorted_symbol_order[val_ptr + code - min_code]; + CRND_ASSERT(pCodesizes[sym_index] == codesize); - // assumes the levels are packed together sequentially - if ((i + 1) < pHeader->m_levels) - next_ofs = pHeader->m_level_ofs[i + 1]; + for (uint32 j = 0; j < fillnum; j++) + { + const uint32 t = j + (code << fillsize); - pFile_info->m_level_compressed_size[i] = next_ofs - pHeader->m_level_ofs[i]; - } + CRND_ASSERT(t < (1U << table_bits)); - pFile_info->m_color_endpoint_palette_entries = pHeader->m_color_endpoints.m_num; - pFile_info->m_color_selector_palette_entries = pHeader->m_color_selectors.m_num; - ; - pFile_info->m_alpha_endpoint_palette_entries = pHeader->m_alpha_endpoints.m_num; - ; - pFile_info->m_alpha_selector_palette_entries = pHeader->m_alpha_selectors.m_num; - ; - } + CRND_ASSERT(m_lookup[t] == cUINT32_MAX); - return true; -} + m_lookup[t] = sym_index | (codesize << 16U); + } + } + } + } -bool crnd_get_texture_info(const void* pData, uint32 data_size, crn_texture_info* pInfo) { - if ((!pData) || (data_size < sizeof(crn_header)) || (!pInfo)) - return false; + for (uint32 i = 0; i < cMaxExpectedCodeSize; i++) + m_val_ptrs[i] -= min_codes[i]; + + m_table_max_code = 0; + m_decode_start_code_size = m_min_code_size; + + if (table_bits) + { + uint32 i; + for (i = table_bits; i >= 1; i--) + { + if (num_codes[i]) + { + m_table_max_code = m_max_codes[i - 1]; + break; + } + } + if (i >= 1) + { + m_decode_start_code_size = table_bits + 1; + for (uint32 j = table_bits + 1; j <= max_code_size; j++) + { + if (num_codes[j]) + { + m_decode_start_code_size = j; + break; + } + } + } + } - if (pInfo->m_struct_size != sizeof(crn_texture_info)) - return false; + // sentinels + m_max_codes[cMaxExpectedCodeSize] = cUINT32_MAX; + m_val_ptrs[cMaxExpectedCodeSize] = 0xFFFFF; - const crn_header* pHeader = crnd_get_header(pData, data_size); - if (!pHeader) - return false; + m_table_shift = 32 - m_table_bits; + return true; + } + } // namespace prefix_codig +} // namespace crnd - pInfo->m_width = pHeader->m_width; - pInfo->m_height = pHeader->m_height; - pInfo->m_levels = pHeader->m_levels; - pInfo->m_faces = pHeader->m_faces; - pInfo->m_format = static_cast((uint32)pHeader->m_format); - pInfo->m_bytes_per_block = pHeader->m_format == cCRNFmtDXT1 || pHeader->m_format == cCRNFmtDXT5A || pHeader->m_format == cCRNFmtETC1 || pHeader->m_format == cCRNFmtETC2 || pHeader->m_format == cCRNFmtETC1S ? 8 : 16; - pInfo->m_userdata0 = pHeader->m_userdata0; - pInfo->m_userdata1 = pHeader->m_userdata1; +// File: crnd_platform.cpp +namespace crnd +{ + bool crnd_is_debugger_present() + { +#ifdef CRND_DEVEL + return IsDebuggerPresent() != 0; +#else + return false; +#endif + } - return true; -} + void crnd_debug_break() + { +#ifdef CRND_DEVEL + DebugBreak(); +#endif + } -bool crnd_get_level_info(const void* pData, uint32 data_size, uint32 level_index, crn_level_info* pLevel_info) { - if ((!pData) || (data_size < cCRNHeaderMinSize) || (!pLevel_info)) - return false; + void crnd_output_debug_string(const char* p) + { + (void)p; +#ifdef CRND_DEVEL + OutputDebugStringA(p); +#endif + } +} // namespace crnd - if (pLevel_info->m_struct_size != sizeof(crn_level_info)) - return false; +// File: crnd_mem.cpp +namespace crnd +{ + const uint32 MAX_POSSIBLE_BLOCK_SIZE = 0x7FFF0000U; - const crn_header* pHeader = crnd_get_header(pData, data_size); - if (!pHeader) - return false; + static void* crnd_default_realloc(void* p, size_t size, size_t* pActual_size, bool movable, void*) + { + void* p_new; - if (level_index >= pHeader->m_levels) - return false; + if (!p) + { + p_new = ::malloc(size); - uint32 width = math::maximum(1U, pHeader->m_width >> level_index); - uint32 height = math::maximum(1U, pHeader->m_height >> level_index); + if (pActual_size) + { +#ifdef WIN32 + *pActual_size = p_new ? ::_msize(p_new) : 0; +#elif defined(__APPLE__) + *pActual_size = p_new ? malloc_size(p_new) : 0; +#else + *pActual_size = p_new ? malloc_usable_size(p_new) : 0; +#endif + } + } + else if (!size) + { + ::free(p); + p_new = NULL; - pLevel_info->m_width = width; - pLevel_info->m_height = height; - pLevel_info->m_faces = pHeader->m_faces; - pLevel_info->m_blocks_x = (width + 3) >> 2; - pLevel_info->m_blocks_y = (height + 3) >> 2; - pLevel_info->m_bytes_per_block = ((pHeader->m_format == cCRNFmtDXT1) || (pHeader->m_format == cCRNFmtDXT5A)) ? 8 : 16; - pLevel_info->m_format = static_cast((uint32)pHeader->m_format); + if (pActual_size) + *pActual_size = 0; + } + else + { + void* p_final_block = p; +#ifdef WIN32 + p_new = ::_expand(p, size); +#else + p_new = NULL; +#endif - return true; -} + if (p_new) + p_final_block = p_new; + else if (movable) + { + p_new = ::realloc(p, size); -const void* crnd_get_level_data(const void* pData, uint32 data_size, uint32 level_index, uint32* pSize) { - if (pSize) - *pSize = 0; + if (p_new) + p_final_block = p_new; + } - if ((!pData) || (data_size < cCRNHeaderMinSize)) - return NULL; + if (pActual_size) + { +#ifdef WIN32 + *pActual_size = ::_msize(p_final_block); +#elif defined(__APPLE__) + *pActual_size = ::malloc_size(p_final_block); +#else + *pActual_size = ::malloc_usable_size(p_final_block); +#endif + } + } - const crn_header* pHeader = crnd_get_header(pData, data_size); - if (!pHeader) - return NULL; + return p_new; + } - if (level_index >= pHeader->m_levels) - return NULL; + static size_t crnd_default_msize(void* p, void* pUser_data) + { + (void)pUser_data; +#ifdef WIN32 + return p ? _msize(p) : 0; +#elif defined(__APPLE__) + return p ? malloc_size(p) : 0; +#else + return p ? malloc_usable_size(p) : 0; +#endif + } - uint32 cur_level_ofs = pHeader->m_level_ofs[level_index]; + static crnd_realloc_func g_pRealloc = crnd_default_realloc; + static crnd_msize_func g_pMSize = crnd_default_msize; + static void* g_pUser_data; - if (pSize) { - uint32 next_level_ofs = data_size; - if ((level_index + 1) < (pHeader->m_levels)) - next_level_ofs = pHeader->m_level_ofs[level_index + 1]; + void crnd_set_memory_callbacks(crnd_realloc_func pRealloc, crnd_msize_func pMSize, void* pUser_data) + { + if ((!pRealloc) || (!pMSize)) + { + g_pRealloc = crnd_default_realloc; + g_pMSize = crnd_default_msize; + g_pUser_data = NULL; + } + else + { + g_pRealloc = pRealloc; + g_pMSize = pMSize; + g_pUser_data = pUser_data; + } + } - *pSize = next_level_ofs - cur_level_ofs; - } + static inline void crnd_mem_error(const char* p_msg) + { + crnd_assert(p_msg, __FILE__, __LINE__); + } - return static_cast(pData) + cur_level_ofs; -} + void* crnd_malloc(size_t size, size_t* pActual_size) + { + size = (size + sizeof(uint32) - 1U) & ~(sizeof(uint32) - 1U); + if (!size) + size = sizeof(uint32); + + if (size > MAX_POSSIBLE_BLOCK_SIZE) + { + crnd_mem_error("crnd_malloc: size too big"); + return NULL; + } -uint32 crnd_get_segmented_file_size(const void* pData, uint32 data_size) { - if ((!pData) || (data_size < cCRNHeaderMinSize)) - return NULL; + size_t actual_size = size; + uint8* p_new = static_cast((*g_pRealloc)(NULL, size, &actual_size, true, g_pUser_data)); - const crn_header* pHeader = crnd_get_header(pData, data_size); - if (!pHeader) - return NULL; + if (pActual_size) + *pActual_size = actual_size; - uint32 size = pHeader->m_header_size; + if ((!p_new) || (actual_size < size)) + { + crnd_mem_error("crnd_malloc: out of memory"); + return NULL; + } - size = math::maximum(size, pHeader->m_color_endpoints.m_ofs + pHeader->m_color_endpoints.m_size); - size = math::maximum(size, pHeader->m_color_selectors.m_ofs + pHeader->m_color_selectors.m_size); - size = math::maximum(size, pHeader->m_alpha_endpoints.m_ofs + pHeader->m_alpha_endpoints.m_size); - size = math::maximum(size, pHeader->m_alpha_selectors.m_ofs + pHeader->m_alpha_selectors.m_size); - size = math::maximum(size, pHeader->m_tables_ofs + pHeader->m_tables_size); + CRND_ASSERT(((uint32) reinterpret_cast(p_new) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) == 0); - return size; -} + return p_new; + } -bool crnd_create_segmented_file(const void* pData, uint32 data_size, void* pBase_data, uint base_data_size) { - if ((!pData) || (data_size < cCRNHeaderMinSize)) - return false; + void* crnd_realloc(void* p, size_t size, size_t* pActual_size, bool movable) + { + if ((uint32)reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) + { + crnd_mem_error("crnd_realloc: bad ptr"); + return NULL; + } - const crn_header* pHeader = crnd_get_header(pData, data_size); - if (!pHeader) - return false; + if (size > MAX_POSSIBLE_BLOCK_SIZE) + { + crnd_mem_error("crnd_malloc: size too big"); + return NULL; + } - if (pHeader->m_flags & cCRNHeaderFlagSegmented) - return false; + size_t actual_size = size; + void* p_new = (*g_pRealloc)(p, size, &actual_size, movable, g_pUser_data); - const uint actual_base_data_size = crnd_get_segmented_file_size(pData, data_size); - if (base_data_size < actual_base_data_size) - return false; + if (pActual_size) + *pActual_size = actual_size; - memcpy(pBase_data, pData, actual_base_data_size); + CRND_ASSERT(((uint32) reinterpret_cast(p_new) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) == 0); - crn_header& new_header = *static_cast(pBase_data); - new_header.m_flags = new_header.m_flags | cCRNHeaderFlagSegmented; - new_header.m_data_size = actual_base_data_size; + return p_new; + } - new_header.m_data_crc16 = crc16((const uint8*)pBase_data + new_header.m_header_size, new_header.m_data_size - new_header.m_header_size); + void crnd_free(void* p) + { + if (!p) + return; - new_header.m_header_crc16 = crc16(&new_header.m_data_size, new_header.m_header_size - (uint32)((const uint8*)&new_header.m_data_size - (const uint8*)&new_header)); + if ((uint32)reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) + { + crnd_mem_error("crnd_free: bad ptr"); + return; + } - CRND_ASSERT(crnd_validate_file(&new_header, actual_base_data_size, NULL)); + (*g_pRealloc)(p, 0, NULL, true, g_pUser_data); + } - return true; -} + size_t crnd_msize(void* p) + { + if (!p) + return 0; + + if ((uint32)reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) + { + crnd_mem_error("crnd_msize: bad ptr"); + return 0; + } + + return (*g_pMSize)(p, g_pUser_data); + } +} // namespace crnd + +// File: crnd_math.cpp +namespace crnd +{ + namespace math + { + uint32 g_bitmasks[32] = + { + 1U << 0U, 1U << 1U, 1U << 2U, 1U << 3U, + 1U << 4U, 1U << 5U, 1U << 6U, 1U << 7U, + 1U << 8U, 1U << 9U, 1U << 10U, 1U << 11U, + 1U << 12U, 1U << 13U, 1U << 14U, 1U << 15U, + 1U << 16U, 1U << 17U, 1U << 18U, 1U << 19U, + 1U << 20U, 1U << 21U, 1U << 22U, 1U << 23U, + 1U << 24U, 1U << 25U, 1U << 26U, 1U << 27U, + 1U << 28U, 1U << 29U, 1U << 30U, 1U << 31U }; + } // namespace math +} // namespace crnd + +// File: crnd_info.cpp +namespace crnd +{ +#define CRND_FOURCC(a, b, c, d) ((a) | ((b) << 8U) | ((c) << 16U) | ((d) << 24U)) -} // namespace crnd + uint32 crnd_crn_format_to_fourcc(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXT1: + return CRND_FOURCC('D', 'X', 'T', '1'); + case cCRNFmtDXT3: + return CRND_FOURCC('D', 'X', 'T', '3'); + case cCRNFmtDXT5: + return CRND_FOURCC('D', 'X', 'T', '5'); + case cCRNFmtDXN_XY: + return CRND_FOURCC('A', '2', 'X', 'Y'); + case cCRNFmtDXN_YX: + return CRND_FOURCC('A', 'T', 'I', '2'); + case cCRNFmtDXT5A: + return CRND_FOURCC('A', 'T', 'I', '1'); + case cCRNFmtDXT5_CCxY: + return CRND_FOURCC('C', 'C', 'x', 'Y'); + case cCRNFmtDXT5_xGxR: + return CRND_FOURCC('x', 'G', 'x', 'R'); + case cCRNFmtDXT5_xGBR: + return CRND_FOURCC('x', 'G', 'B', 'R'); + case cCRNFmtDXT5_AGBR: + return CRND_FOURCC('A', 'G', 'B', 'R'); + case cCRNFmtETC1: + return CRND_FOURCC('E', 'T', 'C', '1'); + case cCRNFmtETC2: + return CRND_FOURCC('E', 'T', 'C', '2'); + case cCRNFmtETC2A: + return CRND_FOURCC('E', 'T', '2', 'A'); + case cCRNFmtETC1S: + return CRND_FOURCC('E', 'T', '1', 'S'); + case cCRNFmtETC2AS: + return CRND_FOURCC('E', '2', 'A', 'S'); + default: + break; + } + CRND_ASSERT(false); + return 0; + } + + crn_format crnd_get_fundamental_dxt_format(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXT5_CCxY: + case cCRNFmtDXT5_xGxR: + case cCRNFmtDXT5_xGBR: + case cCRNFmtDXT5_AGBR: + return cCRNFmtDXT5; + default: + break; + } + return fmt; + } + + uint32 crnd_get_crn_format_bits_per_texel(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXT1: + case cCRNFmtDXT5A: + case cCRNFmtETC1: + case cCRNFmtETC2: + case cCRNFmtETC1S: + return 4; + case cCRNFmtDXT3: + case cCRNFmtDXT5: + case cCRNFmtDXN_XY: + case cCRNFmtDXN_YX: + case cCRNFmtDXT5_CCxY: + case cCRNFmtDXT5_xGxR: + case cCRNFmtDXT5_xGBR: + case cCRNFmtDXT5_AGBR: + case cCRNFmtETC2A: + case cCRNFmtETC2AS: + return 8; + default: + break; + } + CRND_ASSERT(false); + return 0; + } + + uint32 crnd_get_bytes_per_dxt_block(crn_format fmt) + { + return (crnd_get_crn_format_bits_per_texel(fmt) << 4) >> 3; + } + + // TODO: tmp_header isn't used/This function is a helper to support old headers. + const crn_header* crnd_get_header(const void* pData, uint32 data_size) + { + if ((!pData) || (data_size < sizeof(crn_header))) + return NULL; + + const crn_header& file_header = *static_cast(pData); + if (file_header.m_sig != crn_header::cCRNSigValue) + return NULL; + + if ((file_header.m_header_size < sizeof(crn_header)) || (data_size < file_header.m_data_size)) + return NULL; + + return &file_header; + } + + bool crnd_validate_file(const void* pData, uint32 data_size, crn_file_info* pFile_info) + { + if (pFile_info) + { + if (pFile_info->m_struct_size != sizeof(crn_file_info)) + return false; + + memset(&pFile_info->m_struct_size + 1, 0, sizeof(crn_file_info) - sizeof(pFile_info->m_struct_size)); + } + + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return false; + + const crn_header* pHeader = crnd_get_header(pData, data_size); + if (!pHeader) + return false; + + const uint32 header_crc = crc16(&pHeader->m_data_size, (uint32)(pHeader->m_header_size - ((const uint8*)&pHeader->m_data_size - (const uint8*)pHeader))); + if (header_crc != pHeader->m_header_crc16) + return false; + + const uint32 data_crc = crc16((const uint8*)pData + pHeader->m_header_size, pHeader->m_data_size - pHeader->m_header_size); + if (data_crc != pHeader->m_data_crc16) + return false; + + if ((pHeader->m_faces != 1) && (pHeader->m_faces != 6)) + return false; + if ((pHeader->m_width < 1) || (pHeader->m_width > cCRNMaxLevelResolution)) + return false; + if ((pHeader->m_height < 1) || (pHeader->m_height > cCRNMaxLevelResolution)) + return false; + if ((pHeader->m_levels < 1) || (pHeader->m_levels > utils::compute_max_mips(pHeader->m_width, pHeader->m_height))) + return false; + if (((int)pHeader->m_format < cCRNFmtDXT1) || ((int)pHeader->m_format >= cCRNFmtTotal)) + return false; + + if (pFile_info) + { + pFile_info->m_actual_data_size = pHeader->m_data_size; + pFile_info->m_header_size = pHeader->m_header_size; + pFile_info->m_total_palette_size = pHeader->m_color_endpoints.m_size + pHeader->m_color_selectors.m_size + pHeader->m_alpha_endpoints.m_size + pHeader->m_alpha_selectors.m_size; + pFile_info->m_tables_size = pHeader->m_tables_size; + + pFile_info->m_levels = pHeader->m_levels; + + for (uint32 i = 0; i < pHeader->m_levels; i++) + { + uint32 next_ofs = pHeader->m_data_size; + + // assumes the levels are packed together sequentially + if ((i + 1) < pHeader->m_levels) + next_ofs = pHeader->m_level_ofs[i + 1]; + + pFile_info->m_level_compressed_size[i] = next_ofs - pHeader->m_level_ofs[i]; + } + + pFile_info->m_color_endpoint_palette_entries = pHeader->m_color_endpoints.m_num; + pFile_info->m_color_selector_palette_entries = pHeader->m_color_selectors.m_num;; + pFile_info->m_alpha_endpoint_palette_entries = pHeader->m_alpha_endpoints.m_num;; + pFile_info->m_alpha_selector_palette_entries = pHeader->m_alpha_selectors.m_num;; + } + + return true; + } + + bool crnd_get_texture_info(const void* pData, uint32 data_size, crn_texture_info* pInfo) + { + if ((!pData) || (data_size < sizeof(crn_header)) || (!pInfo)) + return false; + + if (pInfo->m_struct_size != sizeof(crn_texture_info)) + return false; + + const crn_header* pHeader = crnd_get_header(pData, data_size); + if (!pHeader) + return false; + + pInfo->m_width = pHeader->m_width; + pInfo->m_height = pHeader->m_height; + pInfo->m_levels = pHeader->m_levels; + pInfo->m_faces = pHeader->m_faces; + pInfo->m_format = static_cast((uint32)pHeader->m_format); + pInfo->m_bytes_per_block = pHeader->m_format == cCRNFmtDXT1 || pHeader->m_format == cCRNFmtDXT5A || pHeader->m_format == cCRNFmtETC1 || pHeader->m_format == cCRNFmtETC2 || pHeader->m_format == cCRNFmtETC1S ? 8 : 16; + pInfo->m_userdata0 = pHeader->m_userdata0; + pInfo->m_userdata1 = pHeader->m_userdata1; + + return true; + } + + bool crnd_get_level_info(const void* pData, uint32 data_size, uint32 level_index, crn_level_info* pLevel_info) + { + if ((!pData) || (data_size < cCRNHeaderMinSize) || (!pLevel_info)) + return false; + + if (pLevel_info->m_struct_size != sizeof(crn_level_info)) + return false; + + const crn_header* pHeader = crnd_get_header(pData, data_size); + if (!pHeader) + return false; + + if (level_index >= pHeader->m_levels) + return false; + + uint32 width = math::maximum(1U, pHeader->m_width >> level_index); + uint32 height = math::maximum(1U, pHeader->m_height >> level_index); + + pLevel_info->m_width = width; + pLevel_info->m_height = height; + pLevel_info->m_faces = pHeader->m_faces; + pLevel_info->m_blocks_x = (width + 3) >> 2; + pLevel_info->m_blocks_y = (height + 3) >> 2; + pLevel_info->m_bytes_per_block = ((pHeader->m_format == cCRNFmtDXT1) || (pHeader->m_format == cCRNFmtDXT5A)) ? 8 : 16; + pLevel_info->m_format = static_cast((uint32)pHeader->m_format); + + return true; + } + + const void* crnd_get_level_data(const void* pData, uint32 data_size, uint32 level_index, uint32* pSize) + { + if (pSize) + *pSize = 0; + + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return NULL; + + const crn_header* pHeader = crnd_get_header(pData, data_size); + if (!pHeader) + return NULL; + + if (level_index >= pHeader->m_levels) + return NULL; + + uint32 cur_level_ofs = pHeader->m_level_ofs[level_index]; + + if (pSize) + { + uint32 next_level_ofs = data_size; + if ((level_index + 1) < (pHeader->m_levels)) + next_level_ofs = pHeader->m_level_ofs[level_index + 1]; + + *pSize = next_level_ofs - cur_level_ofs; + } + + return static_cast(pData) + cur_level_ofs; + } + + uint32 crnd_get_segmented_file_size(const void* pData, uint32 data_size) + { + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return NULL; + + const crn_header* pHeader = crnd_get_header(pData, data_size); + if (!pHeader) + return NULL; + + uint32 size = pHeader->m_header_size; + + size = math::maximum(size, pHeader->m_color_endpoints.m_ofs + pHeader->m_color_endpoints.m_size); + size = math::maximum(size, pHeader->m_color_selectors.m_ofs + pHeader->m_color_selectors.m_size); + size = math::maximum(size, pHeader->m_alpha_endpoints.m_ofs + pHeader->m_alpha_endpoints.m_size); + size = math::maximum(size, pHeader->m_alpha_selectors.m_ofs + pHeader->m_alpha_selectors.m_size); + size = math::maximum(size, pHeader->m_tables_ofs + pHeader->m_tables_size); + + return size; + } + + bool crnd_create_segmented_file(const void* pData, uint32 data_size, void* pBase_data, uint base_data_size) + { + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return false; + + const crn_header* pHeader = crnd_get_header(pData, data_size); + if (!pHeader) + return false; + + if (pHeader->m_flags & cCRNHeaderFlagSegmented) + return false; + + const uint actual_base_data_size = crnd_get_segmented_file_size(pData, data_size); + if (base_data_size < actual_base_data_size) + return false; + + memcpy(pBase_data, pData, actual_base_data_size); + + crn_header& new_header = *static_cast(pBase_data); + new_header.m_flags = new_header.m_flags | cCRNHeaderFlagSegmented; + new_header.m_data_size = actual_base_data_size; + + new_header.m_data_crc16 = crc16((const uint8*)pBase_data + new_header.m_header_size, new_header.m_data_size - new_header.m_header_size); + + new_header.m_header_crc16 = crc16(&new_header.m_data_size, new_header.m_header_size - (uint32)((const uint8*)&new_header.m_data_size - (const uint8*)&new_header)); + + CRND_ASSERT(crnd_validate_file(&new_header, actual_base_data_size, NULL)); + + return true; + } +} // namespace crnd // File: symbol_codec.cpp -namespace crnd { -static_huffman_data_model::static_huffman_data_model() - : m_total_syms(0), - m_pDecode_tables(NULL) { -} +namespace crnd +{ + static_huffman_data_model::static_huffman_data_model() : + m_total_syms(0), + m_pDecode_tables(NULL) + { + } -static_huffman_data_model::static_huffman_data_model(const static_huffman_data_model& other) - : m_total_syms(0), - m_pDecode_tables(NULL) { - *this = other; -} + static_huffman_data_model::static_huffman_data_model(const static_huffman_data_model& other) : + m_total_syms(0), + m_pDecode_tables(NULL) + { + *this = other; + } -static_huffman_data_model::~static_huffman_data_model() { - if (m_pDecode_tables) - crnd_delete(m_pDecode_tables); -} + static_huffman_data_model::~static_huffman_data_model() + { + if (m_pDecode_tables) + crnd_delete(m_pDecode_tables); + } -static_huffman_data_model& static_huffman_data_model::operator=(const static_huffman_data_model& rhs) { - if (this == &rhs) - return *this; - - m_total_syms = rhs.m_total_syms; - m_code_sizes = rhs.m_code_sizes; - if (m_code_sizes.get_alloc_failed()) { - clear(); - return *this; - } - - if (rhs.m_pDecode_tables) { - if (m_pDecode_tables) - *m_pDecode_tables = *rhs.m_pDecode_tables; - else - m_pDecode_tables = crnd_new(*rhs.m_pDecode_tables); - } else { - crnd_delete(m_pDecode_tables); - m_pDecode_tables = NULL; - } - - return *this; -} + static_huffman_data_model& static_huffman_data_model::operator=(const static_huffman_data_model& rhs) + { + if (this == &rhs) + return *this; + + m_total_syms = rhs.m_total_syms; + m_code_sizes = rhs.m_code_sizes; + if (m_code_sizes.get_alloc_failed()) + { + clear(); + return *this; + } -void static_huffman_data_model::clear() { - m_total_syms = 0; - m_code_sizes.clear(); - if (m_pDecode_tables) { - crnd_delete(m_pDecode_tables); - m_pDecode_tables = NULL; - } -} + if (rhs.m_pDecode_tables) + { + if (m_pDecode_tables) + *m_pDecode_tables = *rhs.m_pDecode_tables; + else + m_pDecode_tables = crnd_new(*rhs.m_pDecode_tables); + } + else + { + crnd_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + } -bool static_huffman_data_model::init(uint32 total_syms, const uint8* pCode_sizes, uint32 code_size_limit) { - CRND_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); + return *this; + } + + void static_huffman_data_model::clear() + { + m_total_syms = 0; + m_code_sizes.clear(); + if (m_pDecode_tables) + { + crnd_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + } + } + + bool static_huffman_data_model::init(uint32 total_syms, const uint8* pCode_sizes, uint32 code_size_limit) + { + CRND_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); - code_size_limit = math::minimum(code_size_limit, prefix_coding::cMaxExpectedCodeSize); + code_size_limit = math::minimum(code_size_limit, prefix_coding::cMaxExpectedCodeSize); - if (!m_code_sizes.resize(total_syms)) - return false; + if (!m_code_sizes.resize(total_syms)) + return false; - uint32 min_code_size = cUINT32_MAX; - uint32 max_code_size = 0; + uint32 min_code_size = cUINT32_MAX; + uint32 max_code_size = 0; - for (uint32 i = 0; i < total_syms; i++) { - uint32 s = pCode_sizes[i]; - m_code_sizes[i] = static_cast(s); - min_code_size = math::minimum(min_code_size, s); - max_code_size = math::maximum(max_code_size, s); - } + for (uint32 i = 0; i < total_syms; i++) + { + uint32 s = pCode_sizes[i]; + m_code_sizes[i] = static_cast(s); + min_code_size = math::minimum(min_code_size, s); + max_code_size = math::maximum(max_code_size, s); + } - if ((max_code_size < 1) || (max_code_size > 32) || (min_code_size > code_size_limit)) - return false; + if ((max_code_size < 1) || (max_code_size > 32) || (min_code_size > code_size_limit)) + return false; - if (max_code_size > code_size_limit) - return false; + if (max_code_size > code_size_limit) + return false; - if (!m_pDecode_tables) - m_pDecode_tables = crnd_new(); + if (!m_pDecode_tables) + m_pDecode_tables = crnd_new(); - if (!m_pDecode_tables->init(m_total_syms, &m_code_sizes[0], compute_decoder_table_bits())) - return false; + if (!m_pDecode_tables->init(m_total_syms, &m_code_sizes[0], compute_decoder_table_bits())) + return false; - return true; -} + return true; + } -bool static_huffman_data_model::prepare_decoder_tables() { - uint32 total_syms = m_code_sizes.size(); + bool static_huffman_data_model::prepare_decoder_tables() + { + uint32 total_syms = m_code_sizes.size(); - CRND_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms)); + CRND_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms)); - m_total_syms = total_syms; + m_total_syms = total_syms; - if (!m_pDecode_tables) - m_pDecode_tables = crnd_new(); + if (!m_pDecode_tables) + m_pDecode_tables = crnd_new(); - return m_pDecode_tables->init(m_total_syms, &m_code_sizes[0], compute_decoder_table_bits()); -} + return m_pDecode_tables->init(m_total_syms, &m_code_sizes[0], compute_decoder_table_bits()); + } -uint static_huffman_data_model::compute_decoder_table_bits() const { + uint static_huffman_data_model::compute_decoder_table_bits() const + { #if CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE return prefix_coding::cMaxTableBits; #else - uint32 decoder_table_bits = 0; - if (m_total_syms > 16) - decoder_table_bits = static_cast(math::minimum(1 + math::ceil_log2i(m_total_syms), prefix_coding::cMaxTableBits)); - return decoder_table_bits; + uint32 decoder_table_bits = 0; + if (m_total_syms > 16) + decoder_table_bits = static_cast(math::minimum(1 + math::ceil_log2i(m_total_syms), prefix_coding::cMaxTableBits)); + return decoder_table_bits; #endif -} + } -symbol_codec::symbol_codec() - : m_pDecode_buf(NULL), - m_pDecode_buf_next(NULL), - m_pDecode_buf_end(NULL), - m_decode_buf_size(0), - m_bit_buf(0), - m_bit_count(0) { -} + symbol_codec::symbol_codec() : + m_pDecode_buf(NULL), + m_pDecode_buf_next(NULL), + m_pDecode_buf_end(NULL), + m_decode_buf_size(0), + m_bit_buf(0), + m_bit_count(0) + { + } -// Code length encoding symbols: -// 0-16 - actual code lengths -const uint32 cMaxCodelengthCodes = 21; + // Code length encoding symbols: + // 0-16 - actual code lengths + const uint32 cMaxCodelengthCodes = 21; -const uint32 cSmallZeroRunCode = 17; -const uint32 cLargeZeroRunCode = 18; -const uint32 cSmallRepeatCode = 19; -const uint32 cLargeRepeatCode = 20; + const uint32 cSmallZeroRunCode = 17; + const uint32 cLargeZeroRunCode = 18; + const uint32 cSmallRepeatCode = 19; + const uint32 cLargeRepeatCode = 20; -const uint32 cMinSmallZeroRunSize = 3; -const uint32 cMaxSmallZeroRunSize = 10; -const uint32 cMinLargeZeroRunSize = 11; -const uint32 cMaxLargeZeroRunSize = 138; + const uint32 cMinSmallZeroRunSize = 3; + const uint32 cMaxSmallZeroRunSize = 10; + const uint32 cMinLargeZeroRunSize = 11; + const uint32 cMaxLargeZeroRunSize = 138; -const uint32 cSmallMinNonZeroRunSize = 3; -const uint32 cSmallMaxNonZeroRunSize = 6; -const uint32 cLargeMinNonZeroRunSize = 7; -const uint32 cLargeMaxNonZeroRunSize = 70; + const uint32 cSmallMinNonZeroRunSize = 3; + const uint32 cSmallMaxNonZeroRunSize = 6; + const uint32 cLargeMinNonZeroRunSize = 7; + const uint32 cLargeMaxNonZeroRunSize = 70; -const uint32 cSmallZeroRunExtraBits = 3; -const uint32 cLargeZeroRunExtraBits = 7; -const uint32 cSmallNonZeroRunExtraBits = 2; -const uint32 cLargeNonZeroRunExtraBits = 6; + const uint32 cSmallZeroRunExtraBits = 3; + const uint32 cLargeZeroRunExtraBits = 7; + const uint32 cSmallNonZeroRunExtraBits = 2; + const uint32 cLargeNonZeroRunExtraBits = 6; -static const uint8 g_most_probable_codelength_codes[] = + static const uint8 g_most_probable_codelength_codes[] = { cSmallZeroRunCode, cLargeZeroRunCode, cSmallRepeatCode, cLargeRepeatCode, @@ -2547,1291 +3038,1453 @@ static const uint8 g_most_probable_codelength_codes[] = 3, 13, 2, 14, 1, 15, - 16}; -const uint32 cNumMostProbableCodelengthCodes = sizeof(g_most_probable_codelength_codes) / sizeof(g_most_probable_codelength_codes[0]); + 16 }; + const uint32 cNumMostProbableCodelengthCodes = sizeof(g_most_probable_codelength_codes) / sizeof(g_most_probable_codelength_codes[0]); -bool symbol_codec::decode_receive_static_data_model(static_huffman_data_model& model) { - const uint32 total_used_syms = decode_bits(math::total_bits(prefix_coding::cMaxSupportedSyms)); + bool symbol_codec::decode_receive_static_data_model(static_huffman_data_model& model) + { + const uint32 total_used_syms = decode_bits(math::total_bits(prefix_coding::cMaxSupportedSyms)); - if (!total_used_syms) { - model.clear(); - return true; - } + if (!total_used_syms) + { + model.clear(); + return true; + } - if (!model.m_code_sizes.resize(total_used_syms)) - return false; + if (!model.m_code_sizes.resize(total_used_syms)) + return false; - memset(&model.m_code_sizes[0], 0, sizeof(model.m_code_sizes[0]) * total_used_syms); + memset(&model.m_code_sizes[0], 0, sizeof(model.m_code_sizes[0]) * total_used_syms); - const uint32 num_codelength_codes_to_send = decode_bits(5); - if ((num_codelength_codes_to_send < 1) || (num_codelength_codes_to_send > cMaxCodelengthCodes)) - return false; + const uint32 num_codelength_codes_to_send = decode_bits(5); + if ((num_codelength_codes_to_send < 1) || (num_codelength_codes_to_send > cMaxCodelengthCodes)) + return false; - static_huffman_data_model dm; - if (!dm.m_code_sizes.resize(cMaxCodelengthCodes)) - return false; + static_huffman_data_model dm; + if (!dm.m_code_sizes.resize(cMaxCodelengthCodes)) + return false; - for (uint32 i = 0; i < num_codelength_codes_to_send; i++) - dm.m_code_sizes[g_most_probable_codelength_codes[i]] = static_cast(decode_bits(3)); + for (uint32 i = 0; i < num_codelength_codes_to_send; i++) + dm.m_code_sizes[g_most_probable_codelength_codes[i]] = static_cast(decode_bits(3)); - if (!dm.prepare_decoder_tables()) - return false; + if (!dm.prepare_decoder_tables()) + return false; - uint32 ofs = 0; - while (ofs < total_used_syms) { - const uint32 num_remaining = total_used_syms - ofs; + uint32 ofs = 0; + while (ofs < total_used_syms) + { + const uint32 num_remaining = total_used_syms - ofs; + + uint32 code = decode(dm); + if (code <= 16) + model.m_code_sizes[ofs++] = static_cast(code); + else if (code == cSmallZeroRunCode) + { + uint32 len = decode_bits(cSmallZeroRunExtraBits) + cMinSmallZeroRunSize; + if (len > num_remaining) + return false; + ofs += len; + } + else if (code == cLargeZeroRunCode) + { + uint32 len = decode_bits(cLargeZeroRunExtraBits) + cMinLargeZeroRunSize; + if (len > num_remaining) + return false; + ofs += len; + } + else if ((code == cSmallRepeatCode) || (code == cLargeRepeatCode)) + { + uint32 len; + if (code == cSmallRepeatCode) + len = decode_bits(cSmallNonZeroRunExtraBits) + cSmallMinNonZeroRunSize; + else + len = decode_bits(cLargeNonZeroRunExtraBits) + cLargeMinNonZeroRunSize; + + if ((!ofs) || (len > num_remaining)) + return false; + const uint32 prev = model.m_code_sizes[ofs - 1]; + if (!prev) + return false; + const uint32 end = ofs + len; + while (ofs < end) + model.m_code_sizes[ofs++] = static_cast(prev); + } + else + { + CRND_ASSERT(0); + return false; + } + } - uint32 code = decode(dm); - if (code <= 16) - model.m_code_sizes[ofs++] = static_cast(code); - else if (code == cSmallZeroRunCode) { - uint32 len = decode_bits(cSmallZeroRunExtraBits) + cMinSmallZeroRunSize; - if (len > num_remaining) - return false; - ofs += len; - } else if (code == cLargeZeroRunCode) { - uint32 len = decode_bits(cLargeZeroRunExtraBits) + cMinLargeZeroRunSize; - if (len > num_remaining) - return false; - ofs += len; - } else if ((code == cSmallRepeatCode) || (code == cLargeRepeatCode)) { - uint32 len; - if (code == cSmallRepeatCode) - len = decode_bits(cSmallNonZeroRunExtraBits) + cSmallMinNonZeroRunSize; - else - len = decode_bits(cLargeNonZeroRunExtraBits) + cLargeMinNonZeroRunSize; - - if ((!ofs) || (len > num_remaining)) - return false; - const uint32 prev = model.m_code_sizes[ofs - 1]; - if (!prev) - return false; - const uint32 end = ofs + len; - while (ofs < end) - model.m_code_sizes[ofs++] = static_cast(prev); - } else { - CRND_ASSERT(0); - return false; + if (ofs != total_used_syms) + return false; + + return model.prepare_decoder_tables(); } - } - if (ofs != total_used_syms) - return false; + bool symbol_codec::start_decoding(const uint8* pBuf, uint32 buf_size) + { + if (!buf_size) + return false; - return model.prepare_decoder_tables(); -} + m_pDecode_buf = pBuf; + m_pDecode_buf_next = pBuf; + m_decode_buf_size = buf_size; + m_pDecode_buf_end = pBuf + buf_size; -bool symbol_codec::start_decoding(const uint8* pBuf, uint32 buf_size) { - if (!buf_size) - return false; + get_bits_init(); - m_pDecode_buf = pBuf; - m_pDecode_buf_next = pBuf; - m_decode_buf_size = buf_size; - m_pDecode_buf_end = pBuf + buf_size; + return true; + } - get_bits_init(); + void symbol_codec::get_bits_init() + { + m_bit_buf = 0; + m_bit_count = 0; + } - return true; -} + uint32 symbol_codec::decode_bits(uint32 num_bits) + { + if (!num_bits) + return 0; -void symbol_codec::get_bits_init() { - m_bit_buf = 0; - m_bit_count = 0; -} + if (num_bits > 16) + { + uint32 a = get_bits(num_bits - 16); + uint32 b = get_bits(16); -uint32 symbol_codec::decode_bits(uint32 num_bits) { - if (!num_bits) - return 0; + return (a << 16) | b; + } + else + return get_bits(num_bits); + } - if (num_bits > 16) { - uint32 a = get_bits(num_bits - 16); - uint32 b = get_bits(16); + uint32 symbol_codec::get_bits(uint32 num_bits) + { + CRND_ASSERT(num_bits <= 32U); - return (a << 16) | b; - } else - return get_bits(num_bits); -} + while (m_bit_count < (int)num_bits) + { + bit_buf_type c = 0; + if (m_pDecode_buf_next != m_pDecode_buf_end) + c = *m_pDecode_buf_next++; -uint32 symbol_codec::get_bits(uint32 num_bits) { - CRND_ASSERT(num_bits <= 32U); + m_bit_count += 8; + CRND_ASSERT(m_bit_count <= cBitBufSize); - while (m_bit_count < (int)num_bits) { - bit_buf_type c = 0; - if (m_pDecode_buf_next != m_pDecode_buf_end) - c = *m_pDecode_buf_next++; + m_bit_buf |= (c << (cBitBufSize - m_bit_count)); + } - m_bit_count += 8; - CRND_ASSERT(m_bit_count <= cBitBufSize); + uint32 result = static_cast(m_bit_buf >> (cBitBufSize - num_bits)); - m_bit_buf |= (c << (cBitBufSize - m_bit_count)); - } + m_bit_buf <<= num_bits; + m_bit_count -= num_bits; - uint32 result = static_cast(m_bit_buf >> (cBitBufSize - num_bits)); + return result; + } - m_bit_buf <<= num_bits; - m_bit_count -= num_bits; + uint32 symbol_codec::decode(const static_huffman_data_model& model) + { + const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; + + if (m_bit_count < 24) + { + if (m_bit_count < 16) + { + uint32 c0 = 0, c1 = 0; + const uint8* p = m_pDecode_buf_next; + if (p < m_pDecode_buf_end) + c0 = *p++; + if (p < m_pDecode_buf_end) + c1 = *p++; + m_pDecode_buf_next = p; + m_bit_count += 16; + uint32 c = (c0 << 8) | c1; + m_bit_buf |= (c << (32 - m_bit_count)); + } + else + { + uint32 c = (m_pDecode_buf_next < m_pDecode_buf_end) ? *m_pDecode_buf_next++ : 0; + m_bit_count += 8; + m_bit_buf |= (c << (32 - m_bit_count)); + } + } - return result; -} + uint32 k = (m_bit_buf >> 16) + 1; + uint32 sym, len; -uint32 symbol_codec::decode(const static_huffman_data_model& model) { - const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; + if (k <= pTables->m_table_max_code) + { + uint32 t = pTables->m_lookup[m_bit_buf >> (32 - pTables->m_table_bits)]; - if (m_bit_count < 24) { - if (m_bit_count < 16) { - uint32 c0 = 0, c1 = 0; - const uint8* p = m_pDecode_buf_next; - if (p < m_pDecode_buf_end) - c0 = *p++; - if (p < m_pDecode_buf_end) - c1 = *p++; - m_pDecode_buf_next = p; - m_bit_count += 16; - uint32 c = (c0 << 8) | c1; - m_bit_buf |= (c << (32 - m_bit_count)); - } else { - uint32 c = (m_pDecode_buf_next < m_pDecode_buf_end) ? *m_pDecode_buf_next++ : 0; - m_bit_count += 8; - m_bit_buf |= (c << (32 - m_bit_count)); + CRND_ASSERT(t != cUINT32_MAX); + sym = t & cUINT16_MAX; + len = t >> 16; + + CRND_ASSERT(model.m_code_sizes[sym] == len); + } + else + { + len = pTables->m_decode_start_code_size; + + for (;;) + { + if (k <= pTables->m_max_codes[len - 1]) + break; + len++; + } + + int val_ptr = pTables->m_val_ptrs[len - 1] + (m_bit_buf >> (32 - len)); + + if (((uint32)val_ptr >= model.m_total_syms)) + { + // corrupted stream, or a bug + CRND_ASSERT(0); + return 0; + } + + sym = pTables->m_sorted_symbol_order[val_ptr]; + } + + m_bit_buf <<= len; + m_bit_count -= len; + + return sym; } - } - uint32 k = (m_bit_buf >> 16) + 1; - uint32 sym, len; + uint64 symbol_codec::stop_decoding() + { + return static_cast(m_pDecode_buf_next - m_pDecode_buf); + } +} // namespace crnd + +// File: crnd_dxt.cpp +namespace crnd +{ + const uint8 g_dxt1_to_linear[cDXT1SelectorValues] = { 0U, 3U, 1U, 2U }; + const uint8 g_dxt1_from_linear[cDXT1SelectorValues] = { 0U, 2U, 3U, 1U }; + const uint8 g_etc1_from_linear[cDXT1SelectorValues] = { 3U, 2U, 0U, 1U }; - if (k <= pTables->m_table_max_code) { - uint32 t = pTables->m_lookup[m_bit_buf >> (32 - pTables->m_table_bits)]; + const uint8 g_dxt5_to_linear[cDXT5SelectorValues] = { 0U, 7U, 1U, 2U, 3U, 4U, 5U, 6U }; + const uint8 g_dxt5_from_linear[cDXT5SelectorValues] = { 0U, 2U, 3U, 4U, 5U, 6U, 7U, 1U }; - CRND_ASSERT(t != cUINT32_MAX); - sym = t & cUINT16_MAX; - len = t >> 16; + const uint8 g_six_alpha_invert_table[cDXT5SelectorValues] = { 1, 0, 5, 4, 3, 2, 6, 7 }; + const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues] = { 1, 0, 7, 6, 5, 4, 3, 2 }; + + uint16 dxt1_block::pack_color(const color_quad_u8& color, bool scaled, uint32 bias) + { + uint32 r = color.r; + uint32 g = color.g; + uint32 b = color.b; + + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 63U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } - CRND_ASSERT(model.m_code_sizes[sym] == len); - } else { - len = pTables->m_decode_start_code_size; + r = math::minimum(r, 31U); + g = math::minimum(g, 63U); + b = math::minimum(b, 31U); - for (;;) { - if (k <= pTables->m_max_codes[len - 1]) - break; - len++; + return static_cast(b | (g << 5U) | (r << 11U)); } - int val_ptr = pTables->m_val_ptrs[len - 1] + (m_bit_buf >> (32 - len)); + uint16 dxt1_block::pack_color(uint32 r, uint32 g, uint32 b, bool scaled, uint32 bias) + { + return pack_color(color_quad_u8(r, g, b, 0), scaled, bias); + } + + color_quad_u8 dxt1_block::unpack_color(uint16 packed_color, bool scaled, uint32 alpha) + { + uint32 b = packed_color & 31U; + uint32 g = (packed_color >> 5U) & 63U; + uint32 r = (packed_color >> 11U) & 31U; + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 2U) | (g >> 4U); + r = (r << 3U) | (r >> 2U); + } - if (((uint32)val_ptr >= model.m_total_syms)) { - // corrupted stream, or a bug - CRND_ASSERT(0); - return 0; + return color_quad_u8(r, g, b, alpha); } - sym = pTables->m_sorted_symbol_order[val_ptr]; - } + void dxt1_block::unpack_color(uint32& r, uint32& g, uint32& b, uint16 packed_color, bool scaled) + { + color_quad_u8 c(unpack_color(packed_color, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } - m_bit_buf <<= len; - m_bit_count -= len; + uint32 dxt1_block::get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + color_quad_u8 c0(unpack_color(color0, true)); + color_quad_u8 c1(unpack_color(color1, true)); - return sym; -} + pDst[0] = c0; + pDst[1] = c1; + pDst[2].set((c0.r + c1.r) >> 1U, (c0.g + c1.g) >> 1U, (c0.b + c1.b) >> 1U, 255U); + pDst[3].set(0, 0, 0, 0); -uint64 symbol_codec::stop_decoding() { - return static_cast(m_pDecode_buf_next - m_pDecode_buf); -} + return 3; + } -} // namespace crnd + uint32 dxt1_block::get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + color_quad_u8 c0(unpack_color(color0, true)); + color_quad_u8 c1(unpack_color(color1, true)); -// File: crnd_dxt.cpp -namespace crnd { -const uint8 g_dxt1_to_linear[cDXT1SelectorValues] = {0U, 3U, 1U, 2U}; -const uint8 g_dxt1_from_linear[cDXT1SelectorValues] = {0U, 2U, 3U, 1U}; -const uint8 g_etc1_from_linear[cDXT1SelectorValues] = {3U, 2U, 0U, 1U}; + pDst[0] = c0; + pDst[1] = c1; -const uint8 g_dxt5_to_linear[cDXT5SelectorValues] = {0U, 7U, 1U, 2U, 3U, 4U, 5U, 6U}; -const uint8 g_dxt5_from_linear[cDXT5SelectorValues] = {0U, 2U, 3U, 4U, 5U, 6U, 7U, 1U}; + // 12/14/09 - Supposed to round according to DX docs, but this conflicts with the OpenGL S3TC spec. ? + // Turns out some GPU's round and some don't. Great. + //pDst[2].set( (c0.r * 2 + c1.r + 1) / 3, (c0.g * 2 + c1.g + 1) / 3, (c0.b * 2 + c1.b + 1) / 3, 255U); + //pDst[3].set( (c1.r * 2 + c0.r + 1) / 3, (c1.g * 2 + c0.g + 1) / 3, (c1.b * 2 + c0.b + 1) / 3, 255U); -const uint8 g_six_alpha_invert_table[cDXT5SelectorValues] = {1, 0, 5, 4, 3, 2, 6, 7}; -const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues] = {1, 0, 7, 6, 5, 4, 3, 2}; + pDst[2].set((c0.r * 2 + c1.r) / 3, (c0.g * 2 + c1.g) / 3, (c0.b * 2 + c1.b) / 3, 255U); + pDst[3].set((c1.r * 2 + c0.r) / 3, (c1.g * 2 + c0.g) / 3, (c1.b * 2 + c0.b) / 3, 255U); -uint16 dxt1_block::pack_color(const color_quad_u8& color, bool scaled, uint32 bias) { - uint32 r = color.r; - uint32 g = color.g; - uint32 b = color.b; + return 4; + } - if (scaled) { - r = (r * 31U + bias) / 255U; - g = (g * 63U + bias) / 255U; - b = (b * 31U + bias) / 255U; - } + uint32 dxt1_block::get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + if (color0 > color1) + return get_block_colors4(pDst, color0, color1); + else + return get_block_colors3(pDst, color0, color1); + } - r = math::minimum(r, 31U); - g = math::minimum(g, 63U); - b = math::minimum(b, 31U); + color_quad_u8 dxt1_block::unpack_endpoint(uint32 endpoints, uint32 index, bool scaled, uint32 alpha) + { + CRND_ASSERT(index < 2); + return unpack_color(static_cast((endpoints >> (index * 16U)) & 0xFFFFU), scaled, alpha); + } - return static_cast(b | (g << 5U) | (r << 11U)); -} + uint32 dxt1_block::pack_endpoints(uint32 lo, uint32 hi) + { + CRND_ASSERT((lo <= 0xFFFFU) && (hi <= 0xFFFFU)); + return lo | (hi << 16U); + } -uint16 dxt1_block::pack_color(uint32 r, uint32 g, uint32 b, bool scaled, uint32 bias) { - return pack_color(color_quad_u8(r, g, b, 0), scaled, bias); -} + void dxt3_block::set_alpha(uint32 x, uint32 y, uint32 value, bool scaled) + { + CRND_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); -color_quad_u8 dxt1_block::unpack_color(uint16 packed_color, bool scaled, uint32 alpha) { - uint32 b = packed_color & 31U; - uint32 g = (packed_color >> 5U) & 63U; - uint32 r = (packed_color >> 11U) & 31U; + if (scaled) + { + CRND_ASSERT(value <= 0xFF); + value = (value * 15U + 128U) / 255U; + } + else + { + CRND_ASSERT(value <= 0xF); + } - if (scaled) { - b = (b << 3U) | (b >> 2U); - g = (g << 2U) | (g >> 4U); - r = (r << 3U) | (r >> 2U); - } + uint32 ofs = (y << 1U) + (x >> 1U); + uint32 c = m_alpha[ofs]; - return color_quad_u8(r, g, b, alpha); -} + c &= ~(0xF << ((x & 1U) << 2U)); + c |= (value << ((x & 1U) << 2U)); -void dxt1_block::unpack_color(uint32& r, uint32& g, uint32& b, uint16 packed_color, bool scaled) { - color_quad_u8 c(unpack_color(packed_color, scaled, 0)); - r = c.r; - g = c.g; - b = c.b; -} + m_alpha[ofs] = static_cast(c); + } -uint32 dxt1_block::get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1) { - color_quad_u8 c0(unpack_color(color0, true)); - color_quad_u8 c1(unpack_color(color1, true)); + uint32 dxt3_block::get_alpha(uint32 x, uint32 y, bool scaled) const + { + CRND_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); - pDst[0] = c0; - pDst[1] = c1; - pDst[2].set((c0.r + c1.r) >> 1U, (c0.g + c1.g) >> 1U, (c0.b + c1.b) >> 1U, 255U); - pDst[3].set(0, 0, 0, 0); + uint32 value = m_alpha[(y << 1U) + (x >> 1U)]; + if (x & 1) + value >>= 4; + value &= 0xF; - return 3; -} + if (scaled) + value = (value << 4U) | value; -uint32 dxt1_block::get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1) { - color_quad_u8 c0(unpack_color(color0, true)); - color_quad_u8 c1(unpack_color(color1, true)); + return value; + } - pDst[0] = c0; - pDst[1] = c1; + uint32 dxt5_block::get_block_values6(color_quad_u8* pDst, uint32 l, uint32 h) + { + pDst[0].a = static_cast(l); + pDst[1].a = static_cast(h); + pDst[2].a = static_cast((l * 4 + h) / 5); + pDst[3].a = static_cast((l * 3 + h * 2) / 5); + pDst[4].a = static_cast((l * 2 + h * 3) / 5); + pDst[5].a = static_cast((l + h * 4) / 5); + pDst[6].a = 0; + pDst[7].a = 255; + return 6; + } - // 12/14/09 - Supposed to round according to DX docs, but this conflicts with the OpenGL S3TC spec. ? - // Turns out some GPU's round and some don't. Great. - //pDst[2].set( (c0.r * 2 + c1.r + 1) / 3, (c0.g * 2 + c1.g + 1) / 3, (c0.b * 2 + c1.b + 1) / 3, 255U); - //pDst[3].set( (c1.r * 2 + c0.r + 1) / 3, (c1.g * 2 + c0.g + 1) / 3, (c1.b * 2 + c0.b + 1) / 3, 255U); + uint32 dxt5_block::get_block_values8(color_quad_u8* pDst, uint32 l, uint32 h) + { + pDst[0].a = static_cast(l); + pDst[1].a = static_cast(h); + pDst[2].a = static_cast((l * 6 + h) / 7); + pDst[3].a = static_cast((l * 5 + h * 2) / 7); + pDst[4].a = static_cast((l * 4 + h * 3) / 7); + pDst[5].a = static_cast((l * 3 + h * 4) / 7); + pDst[6].a = static_cast((l * 2 + h * 5) / 7); + pDst[7].a = static_cast((l + h * 6) / 7); + return 8; + } - pDst[2].set((c0.r * 2 + c1.r) / 3, (c0.g * 2 + c1.g) / 3, (c0.b * 2 + c1.b) / 3, 255U); - pDst[3].set((c1.r * 2 + c0.r) / 3, (c1.g * 2 + c0.g) / 3, (c1.b * 2 + c0.b) / 3, 255U); + uint32 dxt5_block::get_block_values(color_quad_u8* pDst, uint32 l, uint32 h) + { + if (l > h) + return get_block_values8(pDst, l, h); + else + return get_block_values6(pDst, l, h); + } - return 4; -} + uint32 dxt5_block::get_block_values6(uint32* pDst, uint32 l, uint32 h) + { + pDst[0] = l; + pDst[1] = h; + pDst[2] = (l * 4 + h) / 5; + pDst[3] = (l * 3 + h * 2) / 5; + pDst[4] = (l * 2 + h * 3) / 5; + pDst[5] = (l + h * 4) / 5; + pDst[6] = 0; + pDst[7] = 255; + return 6; + } -uint32 dxt1_block::get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1) { - if (color0 > color1) - return get_block_colors4(pDst, color0, color1); - else - return get_block_colors3(pDst, color0, color1); -} + uint32 dxt5_block::get_block_values8(uint32* pDst, uint32 l, uint32 h) + { + pDst[0] = l; + pDst[1] = h; + pDst[2] = (l * 6 + h) / 7; + pDst[3] = (l * 5 + h * 2) / 7; + pDst[4] = (l * 4 + h * 3) / 7; + pDst[5] = (l * 3 + h * 4) / 7; + pDst[6] = (l * 2 + h * 5) / 7; + pDst[7] = (l + h * 6) / 7; + return 8; + } -color_quad_u8 dxt1_block::unpack_endpoint(uint32 endpoints, uint32 index, bool scaled, uint32 alpha) { - CRND_ASSERT(index < 2); - return unpack_color(static_cast((endpoints >> (index * 16U)) & 0xFFFFU), scaled, alpha); -} + uint32 dxt5_block::unpack_endpoint(uint32 packed, uint32 index) + { + CRND_ASSERT(index < 2); + return (packed >> (8 * index)) & 0xFF; + } -uint32 dxt1_block::pack_endpoints(uint32 lo, uint32 hi) { - CRND_ASSERT((lo <= 0xFFFFU) && (hi <= 0xFFFFU)); - return lo | (hi << 16U); -} + uint32 dxt5_block::pack_endpoints(uint32 lo, uint32 hi) + { + CRND_ASSERT((lo <= 0xFF) && (hi <= 0xFF)); + return lo | (hi << 8U); + } -void dxt3_block::set_alpha(uint32 x, uint32 y, uint32 value, bool scaled) { - CRND_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); + uint32 dxt5_block::get_block_values(uint32* pDst, uint32 l, uint32 h) + { + if (l > h) + return get_block_values8(pDst, l, h); + else + return get_block_values6(pDst, l, h); + } +} // namespace crnd - if (scaled) { - CRND_ASSERT(value <= 0xFF); - value = (value * 15U + 128U) / 255U; - } else { - CRND_ASSERT(value <= 0xF); - } +// File: crnd_decode.cpp - uint32 ofs = (y << 1U) + (x >> 1U); - uint32 c = m_alpha[ofs]; +namespace crnd +{ + class crn_unpacker + { + public: + inline crn_unpacker() : + m_magic(cMagicValue), + m_pData(NULL), + m_data_size(0), + m_pHeader(NULL) + { + } - c &= ~(0xF << ((x & 1U) << 2U)); - c |= (value << ((x & 1U) << 2U)); + inline ~crn_unpacker() + { + m_magic = 0; + } - m_alpha[ofs] = static_cast(c); -} + inline bool is_valid() const + { + return m_magic == cMagicValue; + } -uint32 dxt3_block::get_alpha(uint32 x, uint32 y, bool scaled) const { - CRND_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); + bool init(const void* pData, uint32 data_size) + { + m_pHeader = crnd_get_header(pData, data_size); + if (!m_pHeader) + return false; - uint32 value = m_alpha[(y << 1U) + (x >> 1U)]; - if (x & 1) - value >>= 4; - value &= 0xF; + m_pData = static_cast(pData); + m_data_size = data_size; - if (scaled) - value = (value << 4U) | value; + if (!init_tables()) + return false; - return value; -} + if (!decode_palettes()) + return false; -uint32 dxt5_block::get_block_values6(color_quad_u8* pDst, uint32 l, uint32 h) { - pDst[0].a = static_cast(l); - pDst[1].a = static_cast(h); - pDst[2].a = static_cast((l * 4 + h) / 5); - pDst[3].a = static_cast((l * 3 + h * 2) / 5); - pDst[4].a = static_cast((l * 2 + h * 3) / 5); - pDst[5].a = static_cast((l + h * 4) / 5); - pDst[6].a = 0; - pDst[7].a = 255; - return 6; -} + return true; + } -uint32 dxt5_block::get_block_values8(color_quad_u8* pDst, uint32 l, uint32 h) { - pDst[0].a = static_cast(l); - pDst[1].a = static_cast(h); - pDst[2].a = static_cast((l * 6 + h) / 7); - pDst[3].a = static_cast((l * 5 + h * 2) / 7); - pDst[4].a = static_cast((l * 4 + h * 3) / 7); - pDst[5].a = static_cast((l * 3 + h * 4) / 7); - pDst[6].a = static_cast((l * 2 + h * 5) / 7); - pDst[7].a = static_cast((l + h * 6) / 7); - return 8; -} + bool unpack_level( + void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index) + { + uint32 cur_level_ofs = m_pHeader->m_level_ofs[level_index]; -uint32 dxt5_block::get_block_values(color_quad_u8* pDst, uint32 l, uint32 h) { - if (l > h) - return get_block_values8(pDst, l, h); - else - return get_block_values6(pDst, l, h); -} + uint32 next_level_ofs = m_data_size; + if ((level_index + 1) < (m_pHeader->m_levels)) + next_level_ofs = m_pHeader->m_level_ofs[level_index + 1]; -uint32 dxt5_block::get_block_values6(uint32* pDst, uint32 l, uint32 h) { - pDst[0] = l; - pDst[1] = h; - pDst[2] = (l * 4 + h) / 5; - pDst[3] = (l * 3 + h * 2) / 5; - pDst[4] = (l * 2 + h * 3) / 5; - pDst[5] = (l + h * 4) / 5; - pDst[6] = 0; - pDst[7] = 255; - return 6; -} + CRND_ASSERT(next_level_ofs > cur_level_ofs); -uint32 dxt5_block::get_block_values8(uint32* pDst, uint32 l, uint32 h) { - pDst[0] = l; - pDst[1] = h; - pDst[2] = (l * 6 + h) / 7; - pDst[3] = (l * 5 + h * 2) / 7; - pDst[4] = (l * 4 + h * 3) / 7; - pDst[5] = (l * 3 + h * 4) / 7; - pDst[6] = (l * 2 + h * 5) / 7; - pDst[7] = (l + h * 6) / 7; - return 8; -} + return unpack_level(m_pData + cur_level_ofs, next_level_ofs - cur_level_ofs, pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); + } -uint32 dxt5_block::unpack_endpoint(uint32 packed, uint32 index) { - CRND_ASSERT(index < 2); - return (packed >> (8 * index)) & 0xFF; -} + bool unpack_level( + const void* pSrc, uint32 src_size_in_bytes, + void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index) + { +#ifdef CRND_BUILD_DEBUG + for (uint32 f = 0; f < m_pHeader->m_faces; f++) + if (!pDst[f]) + return false; +#endif -uint32 dxt5_block::pack_endpoints(uint32 lo, uint32 hi) { - CRND_ASSERT((lo <= 0xFF) && (hi <= 0xFF)); - return lo | (hi << 8U); -} + const uint32 width = math::maximum(m_pHeader->m_width >> level_index, 1U); + const uint32 height = math::maximum(m_pHeader->m_height >> level_index, 1U); + const uint32 blocks_x = (width + 3U) >> 2U; + const uint32 blocks_y = (height + 3U) >> 2U; + const uint32 block_size = m_pHeader->m_format == cCRNFmtDXT1 || m_pHeader->m_format == cCRNFmtDXT5A || m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC1S ? 8 : 16; + + uint32 minimal_row_pitch = block_size * blocks_x; + if (!row_pitch_in_bytes) + row_pitch_in_bytes = minimal_row_pitch; + else if ((row_pitch_in_bytes < minimal_row_pitch) || (row_pitch_in_bytes & 3)) + return false; + if (dst_size_in_bytes < row_pitch_in_bytes * blocks_y) + return false; + + if (!m_codec.start_decoding(static_cast(pSrc), src_size_in_bytes)) + return false; + + bool status = false; + switch (m_pHeader->m_format) + { + case cCRNFmtDXT1: + case cCRNFmtETC1S: + status = unpack_dxt1((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); + break; + case cCRNFmtDXT5: + case cCRNFmtDXT5_CCxY: + case cCRNFmtDXT5_xGBR: + case cCRNFmtDXT5_AGBR: + case cCRNFmtDXT5_xGxR: + case cCRNFmtETC2AS: + status = unpack_dxt5((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); + break; + case cCRNFmtDXT5A: + status = unpack_dxt5a((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); + break; + case cCRNFmtDXN_XY: + case cCRNFmtDXN_YX: + status = unpack_dxn((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); + break; + case cCRNFmtETC1: + status = unpack_etc1((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); + break; + case cCRNFmtETC2: + status = unpack_etc1((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); + break; + case cCRNFmtETC2A: + status = unpack_etc2a((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); + break; + default: + return false; + } + if (!status) + return false; -uint32 dxt5_block::get_block_values(uint32* pDst, uint32 l, uint32 h) { - if (l > h) - return get_block_values8(pDst, l, h); - else - return get_block_values6(pDst, l, h); -} + m_codec.stop_decoding(); + return true; + } -} // namespace crnd + inline const void* get_data() const + { + return m_pData; + } -// File: crnd_decode.cpp + inline uint32 get_data_size() const + { + return m_data_size; + } -namespace crnd { + private: + enum { cMagicValue = 0x1EF9CABD }; -class crn_unpacker { - public: - inline crn_unpacker() - : m_magic(cMagicValue), - m_pData(NULL), - m_data_size(0), - m_pHeader(NULL) { - } + uint32 m_magic; - inline ~crn_unpacker() { - m_magic = 0; - } + const uint8* m_pData; + uint32 m_data_size; + const crn_header* m_pHeader; - inline bool is_valid() const { return m_magic == cMagicValue; } + symbol_codec m_codec; - bool init(const void* pData, uint32 data_size) { - m_pHeader = crnd_get_header(pData, data_size); - if (!m_pHeader) - return false; + static_huffman_data_model m_reference_encoding_dm; + static_huffman_data_model m_endpoint_delta_dm[2]; + static_huffman_data_model m_selector_delta_dm[2]; - m_pData = static_cast(pData); - m_data_size = data_size; + crnd::vector m_color_endpoints; + crnd::vector m_color_selectors; - if (!init_tables()) - return false; + crnd::vector m_alpha_endpoints; + crnd::vector m_alpha_selectors; - if (!decode_palettes()) - return false; + struct block_buffer_element + { + uint16 endpoint_reference; + uint16 color_endpoint_index; + uint16 alpha0_endpoint_index; + uint16 alpha1_endpoint_index; + }; - return true; - } + crnd::vector m_block_buffer; - bool unpack_level( - void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, - uint32 level_index) { - uint32 cur_level_ofs = m_pHeader->m_level_ofs[level_index]; + bool init_tables() + { + if (!m_codec.start_decoding(m_pData + m_pHeader->m_tables_ofs, m_pHeader->m_tables_size)) + return false; - uint32 next_level_ofs = m_data_size; - if ((level_index + 1) < (m_pHeader->m_levels)) - next_level_ofs = m_pHeader->m_level_ofs[level_index + 1]; + if (!m_codec.decode_receive_static_data_model(m_reference_encoding_dm)) + return false; - CRND_ASSERT(next_level_ofs > cur_level_ofs); + if ((!m_pHeader->m_color_endpoints.m_num) && (!m_pHeader->m_alpha_endpoints.m_num)) + return false; - return unpack_level(m_pData + cur_level_ofs, next_level_ofs - cur_level_ofs, pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); - } + if (m_pHeader->m_color_endpoints.m_num) + { + if (!m_codec.decode_receive_static_data_model(m_endpoint_delta_dm[0])) + return false; + if (!m_codec.decode_receive_static_data_model(m_selector_delta_dm[0])) + return false; + } - bool unpack_level( - const void* pSrc, uint32 src_size_in_bytes, - void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, - uint32 level_index) { + if (m_pHeader->m_alpha_endpoints.m_num) + { + if (!m_codec.decode_receive_static_data_model(m_endpoint_delta_dm[1])) + return false; + if (!m_codec.decode_receive_static_data_model(m_selector_delta_dm[1])) + return false; + } -#ifdef CRND_BUILD_DEBUG - for (uint32 f = 0; f < m_pHeader->m_faces; f++) - if (!pDst[f]) - return false; -#endif + m_codec.stop_decoding(); - const uint32 width = math::maximum(m_pHeader->m_width >> level_index, 1U); - const uint32 height = math::maximum(m_pHeader->m_height >> level_index, 1U); - const uint32 blocks_x = (width + 3U) >> 2U; - const uint32 blocks_y = (height + 3U) >> 2U; - const uint32 block_size = m_pHeader->m_format == cCRNFmtDXT1 || m_pHeader->m_format == cCRNFmtDXT5A || m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC1S ? 8 : 16; - - uint32 minimal_row_pitch = block_size * blocks_x; - if (!row_pitch_in_bytes) - row_pitch_in_bytes = minimal_row_pitch; - else if ((row_pitch_in_bytes < minimal_row_pitch) || (row_pitch_in_bytes & 3)) - return false; - if (dst_size_in_bytes < row_pitch_in_bytes * blocks_y) - return false; - - if (!m_codec.start_decoding(static_cast(pSrc), src_size_in_bytes)) - return false; - - bool status = false; - switch (m_pHeader->m_format) { - case cCRNFmtDXT1: - case cCRNFmtETC1S: - status = unpack_dxt1((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); - break; - case cCRNFmtDXT5: - case cCRNFmtDXT5_CCxY: - case cCRNFmtDXT5_xGBR: - case cCRNFmtDXT5_AGBR: - case cCRNFmtDXT5_xGxR: - case cCRNFmtETC2AS: - status = unpack_dxt5((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); - break; - case cCRNFmtDXT5A: - status = unpack_dxt5a((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); - break; - case cCRNFmtDXN_XY: - case cCRNFmtDXN_YX: - status = unpack_dxn((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); - break; - case cCRNFmtETC1: - status = unpack_etc1((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); - break; - case cCRNFmtETC2: - status = unpack_etc1((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); - break; - case cCRNFmtETC2A: - status = unpack_etc2a((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); - break; - default: - return false; - } - if (!status) - return false; + return true; + } - m_codec.stop_decoding(); - return true; - } + bool decode_palettes() + { + if (m_pHeader->m_color_endpoints.m_num) + { + if (!decode_color_endpoints()) + return false; + if (!decode_color_selectors()) + return false; + } - inline const void* get_data() const { return m_pData; } - inline uint32 get_data_size() const { return m_data_size; } + if (m_pHeader->m_alpha_endpoints.m_num) + { + if (!decode_alpha_endpoints()) + return false; + if (!(m_pHeader->m_format == cCRNFmtETC2AS ? decode_alpha_selectors_etcs() : m_pHeader->m_format == cCRNFmtETC2A ? decode_alpha_selectors_etc() : decode_alpha_selectors())) + return false; + } - private: - enum { cMagicValue = 0x1EF9CABD }; - uint32 m_magic; + return true; + } - const uint8* m_pData; - uint32 m_data_size; - const crn_header* m_pHeader; + bool decode_color_endpoints() + { + const uint32 num_color_endpoints = m_pHeader->m_color_endpoints.m_num; + const bool has_etc_color_blocks = m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC2A || m_pHeader->m_format == cCRNFmtETC1S || m_pHeader->m_format == cCRNFmtETC2AS; + const bool has_subblocks = m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC2A; + + if (!m_color_endpoints.resize(num_color_endpoints)) + return false; + + if (!m_codec.start_decoding(m_pData + m_pHeader->m_color_endpoints.m_ofs, m_pHeader->m_color_endpoints.m_size)) + return false; + + static_huffman_data_model dm[2]; + for (uint32 i = 0; i < (has_etc_color_blocks ? 1 : 2); i++) + if (!m_codec.decode_receive_static_data_model(dm[i])) + return false; + + uint32 a = 0, b = 0, c = 0; + uint32 d = 0, e = 0, f = 0; + + uint32* CRND_RESTRICT pDst = &m_color_endpoints[0]; + + for (uint32 i = 0; i < num_color_endpoints; i++) + { + if (has_etc_color_blocks) + { + for (b = 0; b < 32; b += 8) + a += m_codec.decode(dm[0]) << b; + a &= 0x1F1F1F1F; + *pDst++ = has_subblocks ? a : (a & 0x07000000) << 5 | (a & 0x07000000) << 2 | 0x02000000 | (a & 0x001F1F1F) << 3; + } + else + { + a = (a + m_codec.decode(dm[0])) & 31; + b = (b + m_codec.decode(dm[1])) & 63; + c = (c + m_codec.decode(dm[0])) & 31; + d = (d + m_codec.decode(dm[0])) & 31; + e = (e + m_codec.decode(dm[1])) & 63; + f = (f + m_codec.decode(dm[0])) & 31; + *pDst++ = c | (b << 5U) | (a << 11U) | (f << 16U) | (e << 21U) | (d << 27U); + } + } - symbol_codec m_codec; + m_codec.stop_decoding(); - static_huffman_data_model m_reference_encoding_dm; - static_huffman_data_model m_endpoint_delta_dm[2]; - static_huffman_data_model m_selector_delta_dm[2]; + return true; + } - crnd::vector m_color_endpoints; - crnd::vector m_color_selectors; + bool decode_color_selectors() + { + const bool has_etc_color_blocks = m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC2A || m_pHeader->m_format == cCRNFmtETC1S || m_pHeader->m_format == cCRNFmtETC2AS; + const bool has_subblocks = m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC2A; + m_codec.start_decoding(m_pData + m_pHeader->m_color_selectors.m_ofs, m_pHeader->m_color_selectors.m_size); + static_huffman_data_model dm; + m_codec.decode_receive_static_data_model(dm); + m_color_selectors.resize(m_pHeader->m_color_selectors.m_num << (has_subblocks ? 1 : 0)); + for (uint32 s = 0, i = 0; i < m_pHeader->m_color_selectors.m_num; i++) + { + for (uint32 j = 0; j < 32; j += 4) + s ^= m_codec.decode(dm) << j; + if (has_etc_color_blocks) + { + for (uint32 selector = (~s & 0xAAAAAAAA) | (~(s ^ s >> 1) & 0x55555555), t = 8, h = 0; h < 4; h++, t -= 15) + { + for (uint32 w = 0; w < 4; w++, t += 4) + { + if (has_subblocks) + { + uint32 s0 = selector >> (w << 3 | h << 1); + m_color_selectors[i << 1] |= ((s0 >> 1 & 1) | (s0 & 1) << 16) << (t & 15); + } + uint32 s1 = selector >> (h << 3 | w << 1); + m_color_selectors[has_subblocks ? i << 1 | 1 : i] |= ((s1 >> 1 & 1) | (s1 & 1) << 16) << (t & 15); + } + } + } + else + { + m_color_selectors[i] = ((s ^ s << 1) & 0xAAAAAAAA) | (s >> 1 & 0x55555555); + } + } + m_codec.stop_decoding(); + return true; + } - crnd::vector m_alpha_endpoints; - crnd::vector m_alpha_selectors; - - struct block_buffer_element { - uint16 endpoint_reference; - uint16 color_endpoint_index; - uint16 alpha0_endpoint_index; - uint16 alpha1_endpoint_index; - }; - crnd::vector m_block_buffer; + bool decode_alpha_endpoints() + { + const uint32 num_alpha_endpoints = m_pHeader->m_alpha_endpoints.m_num; - bool init_tables() { - if (!m_codec.start_decoding(m_pData + m_pHeader->m_tables_ofs, m_pHeader->m_tables_size)) - return false; + if (!m_codec.start_decoding(m_pData + m_pHeader->m_alpha_endpoints.m_ofs, m_pHeader->m_alpha_endpoints.m_size)) + return false; - if (!m_codec.decode_receive_static_data_model(m_reference_encoding_dm)) - return false; + static_huffman_data_model dm; + if (!m_codec.decode_receive_static_data_model(dm)) + return false; - if ((!m_pHeader->m_color_endpoints.m_num) && (!m_pHeader->m_alpha_endpoints.m_num)) - return false; + if (!m_alpha_endpoints.resize(num_alpha_endpoints)) + return false; - if (m_pHeader->m_color_endpoints.m_num) { - if (!m_codec.decode_receive_static_data_model(m_endpoint_delta_dm[0])) - return false; - if (!m_codec.decode_receive_static_data_model(m_selector_delta_dm[0])) - return false; - } + uint16* CRND_RESTRICT pDst = &m_alpha_endpoints[0]; + uint32 a = 0, b = 0; - if (m_pHeader->m_alpha_endpoints.m_num) { - if (!m_codec.decode_receive_static_data_model(m_endpoint_delta_dm[1])) - return false; - if (!m_codec.decode_receive_static_data_model(m_selector_delta_dm[1])) - return false; - } + for (uint32 i = 0; i < num_alpha_endpoints; i++) + { + a = (a + m_codec.decode(dm)) & 255; + b = (b + m_codec.decode(dm)) & 255; + *pDst++ = (uint16)(a | (b << 8)); + } - m_codec.stop_decoding(); + m_codec.stop_decoding(); - return true; - } + return true; + } - bool decode_palettes() { - if (m_pHeader->m_color_endpoints.m_num) { - if (!decode_color_endpoints()) - return false; - if (!decode_color_selectors()) - return false; - } + bool decode_alpha_selectors() + { + m_codec.start_decoding(m_pData + m_pHeader->m_alpha_selectors.m_ofs, m_pHeader->m_alpha_selectors.m_size); + static_huffman_data_model dm; + m_codec.decode_receive_static_data_model(dm); + m_alpha_selectors.resize(m_pHeader->m_alpha_selectors.m_num * 3); + uint8 dxt5_from_linear[64]; + for (uint32 i = 0; i < 64; i++) + dxt5_from_linear[i] = g_dxt5_from_linear[i & 7] | g_dxt5_from_linear[i >> 3] << 3; + for (uint32 s0_linear = 0, s1_linear = 0, i = 0; i < m_alpha_selectors.size();) + { + uint32 s0 = 0, s1 = 0; + for (uint32 j = 0; j < 24; s0 |= dxt5_from_linear[s0_linear >> j & 0x3F] << j, j += 6) + s0_linear ^= m_codec.decode(dm) << j; + for (uint32 j = 0; j < 24; s1 |= dxt5_from_linear[s1_linear >> j & 0x3F] << j, j += 6) + s1_linear ^= m_codec.decode(dm) << j; + m_alpha_selectors[i++] = s0; + m_alpha_selectors[i++] = s0 >> 16 | s1 << 8; + m_alpha_selectors[i++] = s1 >> 8; + } + m_codec.stop_decoding(); + return true; + } - if (m_pHeader->m_alpha_endpoints.m_num) { - if (!decode_alpha_endpoints()) - return false; - if (!(m_pHeader->m_format == cCRNFmtETC2AS ? decode_alpha_selectors_etcs() : m_pHeader->m_format == cCRNFmtETC2A ? decode_alpha_selectors_etc() : decode_alpha_selectors())) - return false; - } + bool decode_alpha_selectors_etc() + { + m_codec.start_decoding(m_pData + m_pHeader->m_alpha_selectors.m_ofs, m_pHeader->m_alpha_selectors.m_size); + static_huffman_data_model dm; + m_codec.decode_receive_static_data_model(dm); + m_alpha_selectors.resize(m_pHeader->m_alpha_selectors.m_num * 6); + uint8 s_linear[8] = {}; + uint8* data = (uint8*)m_alpha_selectors.begin(); + for (uint i = 0; i < m_alpha_selectors.size(); i += 6, data += 12) + { + for (uint s_group = 0, p = 0; p < 16; p++) + { + s_group = p & 1 ? s_group >> 3 : s_linear[p >> 1] ^= m_codec.decode(dm); + uint8 s = s_group & 7; + if (s <= 3) + s = 3 - s; + uint8 d = 3 * (p + 1); + uint8 byte_offset = d >> 3; + uint8 bit_offset = d & 7; + data[byte_offset] |= s << (8 - bit_offset); + if (bit_offset < 3) + data[byte_offset - 1] |= s >> bit_offset; + d += 9 * ((p & 3) - (p >> 2)); + byte_offset = d >> 3; + bit_offset = d & 7; + data[byte_offset + 6] |= s << (8 - bit_offset); + if (bit_offset < 3) + data[byte_offset + 5] |= s >> bit_offset; + } + } + m_codec.stop_decoding(); + return true; + } - return true; - } + bool decode_alpha_selectors_etcs() + { + m_codec.start_decoding(m_pData + m_pHeader->m_alpha_selectors.m_ofs, m_pHeader->m_alpha_selectors.m_size); + static_huffman_data_model dm; + m_codec.decode_receive_static_data_model(dm); + m_alpha_selectors.resize(m_pHeader->m_alpha_selectors.m_num * 3); + uint8 s_linear[8] = {}; + uint8* data = (uint8*)m_alpha_selectors.begin(); + for (uint i = 0; i < (m_alpha_selectors.size() << 1); i += 6) + { + for (uint s_group = 0, p = 0; p < 16; p++) + { + s_group = p & 1 ? s_group >> 3 : s_linear[p >> 1] ^= m_codec.decode(dm); + uint8 s = s_group & 7; + if (s <= 3) + s = 3 - s; + uint8 d = 3 * (p + 1) + 9 * ((p & 3) - (p >> 2)); + uint8 byte_offset = d >> 3; + uint8 bit_offset = d & 7; + data[i + byte_offset] |= s << (8 - bit_offset); + if (bit_offset < 3) + data[i + byte_offset - 1] |= s >> bit_offset; + } + } + m_codec.stop_decoding(); + return true; + } - bool decode_color_endpoints() { - const uint32 num_color_endpoints = m_pHeader->m_color_endpoints.m_num; - const bool has_etc_color_blocks = m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC2A || m_pHeader->m_format == cCRNFmtETC1S || m_pHeader->m_format == cCRNFmtETC2AS; - const bool has_subblocks = m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC2A; + static inline uint32 tiled_offset_2d_outer(uint32 y, uint32 AlignedWidth, uint32 LogBpp) + { + uint32 Macro = ((y >> 5) * (AlignedWidth >> 5)) << (LogBpp + 7); + uint32 Micro = ((y & 6) << 2) << LogBpp; - if (!m_color_endpoints.resize(num_color_endpoints)) - return false; + return Macro + + ((Micro & ~15) << 1) + + (Micro & 15) + + ((y & 8) << (3 + LogBpp)) + ((y & 1) << 4); + } - if (!m_codec.start_decoding(m_pData + m_pHeader->m_color_endpoints.m_ofs, m_pHeader->m_color_endpoints.m_size)) - return false; + static inline uint32 tiled_offset_2d_inner(uint32 x, uint32 y, uint32 LogBpp, uint32 BaseOffset) + { + uint32 Macro = (x >> 5) << (LogBpp + 7); + uint32 Micro = (x & 7) << LogBpp; + uint32 Offset = BaseOffset + Macro + ((Micro & ~15) << 1) + (Micro & 15); - static_huffman_data_model dm[2]; - for (uint32 i = 0; i < (has_etc_color_blocks ? 1 : 2); i++) - if (!m_codec.decode_receive_static_data_model(dm[i])) - return false; + return ((Offset & ~511) << 3) + ((Offset & 448) << 2) + (Offset & 63) + + ((y & 16) << 7) + + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6); + } - uint32 a = 0, b = 0, c = 0; - uint32 d = 0, e = 0, f = 0; - - uint32* CRND_RESTRICT pDst = &m_color_endpoints[0]; - - for (uint32 i = 0; i < num_color_endpoints; i++) { - if (has_etc_color_blocks) { - for (b = 0; b < 32; b += 8) - a += m_codec.decode(dm[0]) << b; - a &= 0x1F1F1F1F; - *pDst++ = has_subblocks ? a : (a & 0x07000000) << 5 | (a & 0x07000000) << 2 | 0x02000000 | (a & 0x001F1F1F) << 3; - } else { - a = (a + m_codec.decode(dm[0])) & 31; - b = (b + m_codec.decode(dm[1])) & 63; - c = (c + m_codec.decode(dm[0])) & 31; - d = (d + m_codec.decode(dm[0])) & 31; - e = (e + m_codec.decode(dm[1])) & 63; - f = (f + m_codec.decode(dm[0])) & 31; - *pDst++ = c | (b << 5U) | (a << 11U) | (f << 16U) | (e << 21U) | (d << 27U); - } - } - - m_codec.stop_decoding(); - - return true; - } - - bool decode_color_selectors() { - const bool has_etc_color_blocks = m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC2A || m_pHeader->m_format == cCRNFmtETC1S || m_pHeader->m_format == cCRNFmtETC2AS; - const bool has_subblocks = m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC2A; - m_codec.start_decoding(m_pData + m_pHeader->m_color_selectors.m_ofs, m_pHeader->m_color_selectors.m_size); - static_huffman_data_model dm; - m_codec.decode_receive_static_data_model(dm); - m_color_selectors.resize(m_pHeader->m_color_selectors.m_num << (has_subblocks ? 1 : 0)); - for (uint32 s = 0, i = 0; i < m_pHeader->m_color_selectors.m_num; i++) { - for (uint32 j = 0; j < 32; j += 4) - s ^= m_codec.decode(dm) << j; - if (has_etc_color_blocks) { - for (uint32 selector = (~s & 0xAAAAAAAA) | (~(s ^ s >> 1) & 0x55555555), t = 8, h = 0; h < 4; h++, t -= 15) { - for (uint32 w = 0; w < 4; w++, t += 4) { - if (has_subblocks) { - uint32 s0 = selector >> (w << 3 | h << 1); - m_color_selectors[i << 1] |= ((s0 >> 1 & 1) | (s0 & 1) << 16) << (t & 15); + static inline void limit(uint& x, uint n) + { + int v = x - n; + int msk = (v >> 31); + x = (x & msk) | (v & ~msk); + } + + bool unpack_dxt1(uint8** pDst, uint32 output_pitch_in_bytes, uint32 output_width, uint32 output_height) + { + const uint32 num_color_endpoints = m_color_endpoints.size(); + const uint32 width = (output_width + 1) & ~1; + const uint32 height = (output_height + 1) & ~1; + const int32 delta_pitch_in_dwords = (output_pitch_in_bytes >> 2) - (width << 1); + + if (m_block_buffer.size() < width) + m_block_buffer.resize(width); + + uint32 color_endpoint_index = 0; + uint8 reference_group = 0; + + for (uint32 f = 0; f < m_pHeader->m_faces; f++) + { + uint32* pData = (uint32*)pDst[f]; + for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) + { + bool visible = y < output_height; + for (uint32 x = 0; x < width; x++, pData += 2) + { + visible = visible && x < output_width; + if (!(y & 1) && !(x & 1)) + reference_group = m_codec.decode(m_reference_encoding_dm); + block_buffer_element& buffer = m_block_buffer[x]; + uint8 endpoint_reference; + if (y & 1) + { + endpoint_reference = buffer.endpoint_reference; + } + else + { + endpoint_reference = reference_group & 3; + reference_group >>= 2; + buffer.endpoint_reference = reference_group & 3; + reference_group >>= 2; + } + if (!endpoint_reference) + { + color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); + if (color_endpoint_index >= num_color_endpoints) + color_endpoint_index -= num_color_endpoints; + buffer.color_endpoint_index = color_endpoint_index; + } + else if (endpoint_reference == 1) + { + buffer.color_endpoint_index = color_endpoint_index; + } + else + { + color_endpoint_index = buffer.color_endpoint_index; + } + uint32 color_selector_index = m_codec.decode(m_selector_delta_dm[0]); + if (visible) + { + pData[0] = m_color_endpoints[color_endpoint_index]; + pData[1] = m_color_selectors[color_selector_index]; + } + } + } } - uint32 s1 = selector >> (h << 3 | w << 1); - m_color_selectors[has_subblocks ? i << 1 | 1 : i] |= ((s1 >> 1 & 1) | (s1 & 1) << 16) << (t & 15); - } - } - } else { - m_color_selectors[i] = ((s ^ s << 1) & 0xAAAAAAAA) | (s >> 1 & 0x55555555); - } - } - m_codec.stop_decoding(); - return true; - } - - bool decode_alpha_endpoints() { - const uint32 num_alpha_endpoints = m_pHeader->m_alpha_endpoints.m_num; - - if (!m_codec.start_decoding(m_pData + m_pHeader->m_alpha_endpoints.m_ofs, m_pHeader->m_alpha_endpoints.m_size)) - return false; - - static_huffman_data_model dm; - if (!m_codec.decode_receive_static_data_model(dm)) - return false; - - if (!m_alpha_endpoints.resize(num_alpha_endpoints)) - return false; - - uint16* CRND_RESTRICT pDst = &m_alpha_endpoints[0]; - uint32 a = 0, b = 0; - - for (uint32 i = 0; i < num_alpha_endpoints; i++) { - a = (a + m_codec.decode(dm)) & 255; - b = (b + m_codec.decode(dm)) & 255; - *pDst++ = (uint16)(a | (b << 8)); - } - - m_codec.stop_decoding(); - - return true; - } - - bool decode_alpha_selectors() { - m_codec.start_decoding(m_pData + m_pHeader->m_alpha_selectors.m_ofs, m_pHeader->m_alpha_selectors.m_size); - static_huffman_data_model dm; - m_codec.decode_receive_static_data_model(dm); - m_alpha_selectors.resize(m_pHeader->m_alpha_selectors.m_num * 3); - uint8 dxt5_from_linear[64]; - for (uint32 i = 0; i < 64; i++) - dxt5_from_linear[i] = g_dxt5_from_linear[i & 7] | g_dxt5_from_linear[i >> 3] << 3; - for (uint32 s0_linear = 0, s1_linear = 0, i = 0; i < m_alpha_selectors.size();) { - uint32 s0 = 0, s1 = 0; - for (uint32 j = 0; j < 24; s0 |= dxt5_from_linear[s0_linear >> j & 0x3F] << j, j += 6) - s0_linear ^= m_codec.decode(dm) << j; - for (uint32 j = 0; j < 24; s1 |= dxt5_from_linear[s1_linear >> j & 0x3F] << j, j += 6) - s1_linear ^= m_codec.decode(dm) << j; - m_alpha_selectors[i++] = s0; - m_alpha_selectors[i++] = s0 >> 16 | s1 << 8; - m_alpha_selectors[i++] = s1 >> 8; - } - m_codec.stop_decoding(); - return true; - } - - bool decode_alpha_selectors_etc() { - m_codec.start_decoding(m_pData + m_pHeader->m_alpha_selectors.m_ofs, m_pHeader->m_alpha_selectors.m_size); - static_huffman_data_model dm; - m_codec.decode_receive_static_data_model(dm); - m_alpha_selectors.resize(m_pHeader->m_alpha_selectors.m_num * 6); - uint8 s_linear[8] = {}; - uint8* data = (uint8*)m_alpha_selectors.begin(); - for (uint i = 0; i < m_alpha_selectors.size(); i += 6, data += 12) { - for (uint s_group = 0, p = 0; p < 16; p++) { - s_group = p & 1 ? s_group >> 3 : s_linear[p >> 1] ^= m_codec.decode(dm); - uint8 s = s_group & 7; - if (s <= 3) - s = 3 - s; - uint8 d = 3 * (p + 1); - uint8 byte_offset = d >> 3; - uint8 bit_offset = d & 7; - data[byte_offset] |= s << (8 - bit_offset); - if (bit_offset < 3) - data[byte_offset - 1] |= s >> bit_offset; - d += 9 * ((p & 3) - (p >> 2)); - byte_offset = d >> 3; - bit_offset = d & 7; - data[byte_offset + 6] |= s << (8 - bit_offset); - if (bit_offset < 3) - data[byte_offset + 5] |= s >> bit_offset; - } - } - m_codec.stop_decoding(); - return true; - } - - bool decode_alpha_selectors_etcs() { - m_codec.start_decoding(m_pData + m_pHeader->m_alpha_selectors.m_ofs, m_pHeader->m_alpha_selectors.m_size); - static_huffman_data_model dm; - m_codec.decode_receive_static_data_model(dm); - m_alpha_selectors.resize(m_pHeader->m_alpha_selectors.m_num * 3); - uint8 s_linear[8] = {}; - uint8* data = (uint8*)m_alpha_selectors.begin(); - for (uint i = 0; i < (m_alpha_selectors.size() << 1); i += 6) { - for (uint s_group = 0, p = 0; p < 16; p++) { - s_group = p & 1 ? s_group >> 3 : s_linear[p >> 1] ^= m_codec.decode(dm); - uint8 s = s_group & 7; - if (s <= 3) - s = 3 - s; - uint8 d = 3 * (p + 1) + 9 * ((p & 3) - (p >> 2)); - uint8 byte_offset = d >> 3; - uint8 bit_offset = d & 7; - data[i + byte_offset] |= s << (8 - bit_offset); - if (bit_offset < 3) - data[i + byte_offset - 1] |= s >> bit_offset; - } - } - m_codec.stop_decoding(); - return true; - } - - static inline uint32 tiled_offset_2d_outer(uint32 y, uint32 AlignedWidth, uint32 LogBpp) { - uint32 Macro = ((y >> 5) * (AlignedWidth >> 5)) << (LogBpp + 7); - uint32 Micro = ((y & 6) << 2) << LogBpp; - - return Macro + - ((Micro & ~15) << 1) + - (Micro & 15) + - ((y & 8) << (3 + LogBpp)) + ((y & 1) << 4); - } - - static inline uint32 tiled_offset_2d_inner(uint32 x, uint32 y, uint32 LogBpp, uint32 BaseOffset) { - uint32 Macro = (x >> 5) << (LogBpp + 7); - uint32 Micro = (x & 7) << LogBpp; - uint32 Offset = BaseOffset + Macro + ((Micro & ~15) << 1) + (Micro & 15); - - return ((Offset & ~511) << 3) + ((Offset & 448) << 2) + (Offset & 63) + - ((y & 16) << 7) + - (((((y & 8) >> 2) + (x >> 3)) & 3) << 6); - } - - static inline void limit(uint& x, uint n) { - int v = x - n; - int msk = (v >> 31); - x = (x & msk) | (v & ~msk); - } - - bool unpack_dxt1(uint8** pDst, uint32 output_pitch_in_bytes, uint32 output_width, uint32 output_height) { - const uint32 num_color_endpoints = m_color_endpoints.size(); - const uint32 width = (output_width + 1) & ~1; - const uint32 height = (output_height + 1) & ~1; - const int32 delta_pitch_in_dwords = (output_pitch_in_bytes >> 2) - (width << 1); - - if (m_block_buffer.size() < width) - m_block_buffer.resize(width); - - uint32 color_endpoint_index = 0; - uint8 reference_group = 0; - - for (uint32 f = 0; f < m_pHeader->m_faces; f++) { - uint32* pData = (uint32*)pDst[f]; - for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { - bool visible = y < output_height; - for (uint32 x = 0; x < width; x++, pData += 2) { - visible = visible && x < output_width; - if (!(y & 1) && !(x & 1)) - reference_group = m_codec.decode(m_reference_encoding_dm); - block_buffer_element &buffer = m_block_buffer[x]; - uint8 endpoint_reference; - if (y & 1) { - endpoint_reference = buffer.endpoint_reference; - } else { - endpoint_reference = reference_group & 3; - reference_group >>= 2; - buffer.endpoint_reference = reference_group & 3; - reference_group >>= 2; - } - if (!endpoint_reference) { - color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); - if (color_endpoint_index >= num_color_endpoints) - color_endpoint_index -= num_color_endpoints; - buffer.color_endpoint_index = color_endpoint_index; - } else if (endpoint_reference == 1) { - buffer.color_endpoint_index = color_endpoint_index; - } else { - color_endpoint_index = buffer.color_endpoint_index; - } - uint32 color_selector_index = m_codec.decode(m_selector_delta_dm[0]); - if (visible) { - pData[0] = m_color_endpoints[color_endpoint_index]; - pData[1] = m_color_selectors[color_selector_index]; - } - } - } - } - return true; - } - - bool unpack_dxt5(uint8** pDst, uint32 row_pitch_in_bytes, uint32 output_width, uint32 output_height) { - const uint32 num_color_endpoints = m_color_endpoints.size(); - const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); - const uint32 width = (output_width + 1) & ~1; - const uint32 height = (output_height + 1) & ~1; - const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (width << 2); - - if (m_block_buffer.size() < width) - m_block_buffer.resize(width); - - uint32 color_endpoint_index = 0; - uint32 alpha0_endpoint_index = 0; - uint8 reference_group = 0; - - for (uint32 f = 0; f < m_pHeader->m_faces; f++) { - uint32* pData = (uint32*)pDst[f]; - for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { - bool visible = y < output_height; - for (uint32 x = 0; x < width; x++, pData += 4) { - visible = visible && x < output_width; - if (!(y & 1) && !(x & 1)) - reference_group = m_codec.decode(m_reference_encoding_dm); - block_buffer_element &buffer = m_block_buffer[x]; - uint8 endpoint_reference; - if (y & 1) { - endpoint_reference = buffer.endpoint_reference; - } else { - endpoint_reference = reference_group & 3; - reference_group >>= 2; - buffer.endpoint_reference = reference_group & 3; - reference_group >>= 2; - } - if (!endpoint_reference) { - color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); - if (color_endpoint_index >= num_color_endpoints) - color_endpoint_index -= num_color_endpoints; - buffer.color_endpoint_index = color_endpoint_index; - alpha0_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); - if (alpha0_endpoint_index >= num_alpha_endpoints) - alpha0_endpoint_index -= num_alpha_endpoints; - buffer.alpha0_endpoint_index = alpha0_endpoint_index; - } else if (endpoint_reference == 1) { - buffer.color_endpoint_index = color_endpoint_index; - buffer.alpha0_endpoint_index = alpha0_endpoint_index; - } else { - color_endpoint_index = buffer.color_endpoint_index; - alpha0_endpoint_index = buffer.alpha0_endpoint_index; - } - uint32 color_selector_index = m_codec.decode(m_selector_delta_dm[0]); - uint32 alpha0_selector_index = m_codec.decode(m_selector_delta_dm[1]); - if (visible) { - const uint16* pAlpha0_selectors = &m_alpha_selectors[alpha0_selector_index * 3]; - pData[0] = m_alpha_endpoints[alpha0_endpoint_index] | (pAlpha0_selectors[0] << 16); - pData[1] = pAlpha0_selectors[1] | (pAlpha0_selectors[2] << 16); - pData[2] = m_color_endpoints[color_endpoint_index]; - pData[3] = m_color_selectors[color_selector_index]; - } - } - } - } - return true; - } - - bool unpack_dxn(uint8** pDst, uint32 row_pitch_in_bytes, uint32 output_width, uint32 output_height) { - const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); - const uint32 width = (output_width + 1) & ~1; - const uint32 height = (output_height + 1) & ~1; - const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (width << 2); - - if (m_block_buffer.size() < width) - m_block_buffer.resize(width); - - uint32 alpha0_endpoint_index = 0; - uint32 alpha1_endpoint_index = 0; - uint8 reference_group = 0; - - for (uint32 f = 0; f < m_pHeader->m_faces; f++) { - uint32* pData = (uint32*)pDst[f]; - for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { - bool visible = y < output_height; - for (uint32 x = 0; x < width; x++, pData += 4) { - visible = visible && x < output_width; - if (!(y & 1) && !(x & 1)) - reference_group = m_codec.decode(m_reference_encoding_dm); - block_buffer_element &buffer = m_block_buffer[x]; - uint8 endpoint_reference; - if (y & 1) { - endpoint_reference = buffer.endpoint_reference; - } else { - endpoint_reference = reference_group & 3; - reference_group >>= 2; - buffer.endpoint_reference = reference_group & 3; - reference_group >>= 2; - } - if (!endpoint_reference) { - alpha0_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); - if (alpha0_endpoint_index >= num_alpha_endpoints) - alpha0_endpoint_index -= num_alpha_endpoints; - buffer.alpha0_endpoint_index = alpha0_endpoint_index; - alpha1_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); - if (alpha1_endpoint_index >= num_alpha_endpoints) - alpha1_endpoint_index -= num_alpha_endpoints; - buffer.alpha1_endpoint_index = alpha1_endpoint_index; - } else if (endpoint_reference == 1) { - buffer.alpha0_endpoint_index = alpha0_endpoint_index; - buffer.alpha1_endpoint_index = alpha1_endpoint_index; - } else { - alpha0_endpoint_index = buffer.alpha0_endpoint_index; - alpha1_endpoint_index = buffer.alpha1_endpoint_index; - } - uint32 alpha0_selector_index = m_codec.decode(m_selector_delta_dm[1]); - uint32 alpha1_selector_index = m_codec.decode(m_selector_delta_dm[1]); - if (visible) { - const uint16* pAlpha0_selectors = &m_alpha_selectors[alpha0_selector_index * 3]; - const uint16* pAlpha1_selectors = &m_alpha_selectors[alpha1_selector_index * 3]; - pData[0] = m_alpha_endpoints[alpha0_endpoint_index] | (pAlpha0_selectors[0] << 16); - pData[1] = pAlpha0_selectors[1] | (pAlpha0_selectors[2] << 16); - pData[2] = m_alpha_endpoints[alpha1_endpoint_index] | (pAlpha1_selectors[0] << 16); - pData[3] = pAlpha1_selectors[1] | (pAlpha1_selectors[2] << 16); - } - } - } - } - return true; - } - - bool unpack_dxt5a(uint8** pDst, uint32 row_pitch_in_bytes, uint32 output_width, uint32 output_height) { - const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); - const uint32 width = (output_width + 1) & ~1; - const uint32 height = (output_height + 1) & ~1; - const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (width << 1); - - if (m_block_buffer.size() < width) - m_block_buffer.resize(width); - - uint32 alpha0_endpoint_index = 0; - uint8 reference_group = 0; - - for (uint32 f = 0; f < m_pHeader->m_faces; f++) { - uint32* pData = (uint32*)pDst[f]; - for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { - bool visible = y < output_height; - for (uint32 x = 0; x < width; x++, pData += 2) { - visible = visible && x < output_width; - if (!(y & 1) && !(x & 1)) - reference_group = m_codec.decode(m_reference_encoding_dm); - block_buffer_element &buffer = m_block_buffer[x]; - uint8 endpoint_reference; - if (y & 1) { - endpoint_reference = buffer.endpoint_reference; - } else { - endpoint_reference = reference_group & 3; - reference_group >>= 2; - buffer.endpoint_reference = reference_group & 3; - reference_group >>= 2; - } - if (!endpoint_reference) { - alpha0_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); - if (alpha0_endpoint_index >= num_alpha_endpoints) - alpha0_endpoint_index -= num_alpha_endpoints; - buffer.alpha0_endpoint_index = alpha0_endpoint_index; - } else if (endpoint_reference == 1) { - buffer.alpha0_endpoint_index = alpha0_endpoint_index; - } else { - alpha0_endpoint_index = buffer.alpha0_endpoint_index; - } - uint32 alpha0_selector_index = m_codec.decode(m_selector_delta_dm[1]); - if (visible) { - const uint16* pAlpha0_selectors = &m_alpha_selectors[alpha0_selector_index * 3]; - pData[0] = m_alpha_endpoints[alpha0_endpoint_index] | (pAlpha0_selectors[0] << 16); - pData[1] = pAlpha0_selectors[1] | (pAlpha0_selectors[2] << 16); - } - } - } - } - return true; - } - - bool unpack_etc1(uint8** pDst, uint32 output_pitch_in_bytes, uint32 output_width, uint32 output_height) { - const uint32 num_color_endpoints = m_color_endpoints.size(); - const uint32 width = (output_width + 1) & ~1; - const uint32 height = (output_height + 1) & ~1; - const int32 delta_pitch_in_dwords = (output_pitch_in_bytes >> 2) - (width << 1); - - if (m_block_buffer.size() < width << 1) - m_block_buffer.resize(width << 1); - - uint32 color_endpoint_index = 0, diagonal_color_endpoint_index = 0; - uint8 reference_group = 0; - - for (uint32 f = 0; f < m_pHeader->m_faces; f++) { - uint32* pData = (uint32*)pDst[f]; - for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { - bool visible = y < output_height; - for (uint32 x = 0; x < width; x++, pData += 2) { - visible = visible && x < output_width; - block_buffer_element &buffer = m_block_buffer[x << 1]; - uint8 endpoint_reference, block_endpoint[4], e0[4], e1[4]; - if (y & 1) { - endpoint_reference = buffer.endpoint_reference; - } else { - reference_group = m_codec.decode(m_reference_encoding_dm); - endpoint_reference = (reference_group & 3) | (reference_group >> 2 & 12); - buffer.endpoint_reference = (reference_group >> 2 & 3) | (reference_group >> 4 & 12); - } - if (!(endpoint_reference & 3)) { - color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); - if (color_endpoint_index >= num_color_endpoints) - color_endpoint_index -= num_color_endpoints; - buffer.color_endpoint_index = color_endpoint_index; - } else if ((endpoint_reference & 3) == 1) { - buffer.color_endpoint_index = color_endpoint_index; - } else if ((endpoint_reference & 3) == 3) { - buffer.color_endpoint_index = color_endpoint_index = diagonal_color_endpoint_index; - } else { - color_endpoint_index = buffer.color_endpoint_index; - } - endpoint_reference >>= 2; - *(uint32*)&e0 = m_color_endpoints[color_endpoint_index]; - uint32 selector_index = m_codec.decode(m_selector_delta_dm[0]); - if (endpoint_reference) { - color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); - if (color_endpoint_index >= num_color_endpoints) - color_endpoint_index -= num_color_endpoints; - } - diagonal_color_endpoint_index = m_block_buffer[x << 1 | 1].color_endpoint_index; - m_block_buffer[x << 1 | 1].color_endpoint_index = color_endpoint_index; - *(uint32*)&e1 = m_color_endpoints[color_endpoint_index]; - if (visible) { - uint32 flip = endpoint_reference >> 1 ^ 1, diff = 1; - for (uint c = 0; diff && c < 3; c++) - diff = e0[c] + 3 >= e1[c] && e1[c] + 4 >= e0[c] ? diff : 0; - for (uint c = 0; c < 3; c++) - block_endpoint[c] = diff ? e0[c] << 3 | ((e1[c] - e0[c]) & 7) : (e0[c] << 3 & 0xF0) | e1[c] >> 1; - block_endpoint[3] = e0[3] << 5 | e1[3] << 2 | diff << 1 | flip; - pData[0] = *(uint32*)&block_endpoint; - pData[1] = m_color_selectors[selector_index << 1 | flip]; - } - } - } - } - return true; - } - - bool unpack_etc2a(uint8** pDst, uint32 output_pitch_in_bytes, uint32 output_width, uint32 output_height) { - const uint32 num_color_endpoints = m_color_endpoints.size(); - const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); - const uint32 width = (output_width + 1) & ~1; - const uint32 height = (output_height + 1) & ~1; - const int32 delta_pitch_in_dwords = (output_pitch_in_bytes >> 2) - (width << 2); - - if (m_block_buffer.size() < width << 1) - m_block_buffer.resize(width << 1); - - uint32 color_endpoint_index = 0, diagonal_color_endpoint_index = 0, alpha0_endpoint_index = 0, diagonal_alpha0_endpoint_index = 0; - uint8 reference_group = 0; - - for (uint32 f = 0; f < m_pHeader->m_faces; f++) { - uint32* pData = (uint32*)pDst[f]; - for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { - bool visible = y < output_height; - for (uint32 x = 0; x < width; x++, pData += 4) { - visible = visible && x < output_width; - block_buffer_element &buffer = m_block_buffer[x << 1]; - uint8 endpoint_reference, block_endpoint[4], e0[4], e1[4]; - if (y & 1) { - endpoint_reference = buffer.endpoint_reference; - } else { - reference_group = m_codec.decode(m_reference_encoding_dm); - endpoint_reference = (reference_group & 3) | (reference_group >> 2 & 12); - buffer.endpoint_reference = (reference_group >> 2 & 3) | (reference_group >> 4 & 12); - } - if (!(endpoint_reference & 3)) { - color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); - if (color_endpoint_index >= num_color_endpoints) - color_endpoint_index -= num_color_endpoints; - alpha0_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); - if (alpha0_endpoint_index >= num_alpha_endpoints) - alpha0_endpoint_index -= num_alpha_endpoints; - buffer.color_endpoint_index = color_endpoint_index; - buffer.alpha0_endpoint_index = alpha0_endpoint_index; - } else if ((endpoint_reference & 3) == 1) { - buffer.color_endpoint_index = color_endpoint_index; - buffer.alpha0_endpoint_index = alpha0_endpoint_index; - } else if ((endpoint_reference & 3) == 3) { - buffer.color_endpoint_index = color_endpoint_index = diagonal_color_endpoint_index; - buffer.alpha0_endpoint_index = alpha0_endpoint_index = diagonal_alpha0_endpoint_index; - } else { - color_endpoint_index = buffer.color_endpoint_index; - alpha0_endpoint_index = buffer.alpha0_endpoint_index; - } - endpoint_reference >>= 2; - *(uint32*)&e0 = m_color_endpoints[color_endpoint_index]; - uint32 color_selector_index = m_codec.decode(m_selector_delta_dm[0]); - uint32 alpha0_selector_index = m_codec.decode(m_selector_delta_dm[1]); - if (endpoint_reference) { - color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); - if (color_endpoint_index >= num_color_endpoints) - color_endpoint_index -= num_color_endpoints; - } - *(uint32*)&e1 = m_color_endpoints[color_endpoint_index]; - diagonal_color_endpoint_index = m_block_buffer[x << 1 | 1].color_endpoint_index; - diagonal_alpha0_endpoint_index = m_block_buffer[x << 1 | 1].alpha0_endpoint_index; - m_block_buffer[x << 1 | 1].color_endpoint_index = color_endpoint_index; - m_block_buffer[x << 1 | 1].alpha0_endpoint_index = alpha0_endpoint_index; - if (visible) { - uint32 flip = endpoint_reference >> 1 ^ 1, diff = 1; - for (uint c = 0; diff && c < 3; c++) - diff = e0[c] + 3 >= e1[c] && e1[c] + 4 >= e0[c] ? diff : 0; - for (uint c = 0; c < 3; c++) - block_endpoint[c] = diff ? e0[c] << 3 | ((e1[c] - e0[c]) & 7) : (e0[c] << 3 & 0xF0) | e1[c] >> 1; - block_endpoint[3] = e0[3] << 5 | e1[3] << 2 | diff << 1 | flip; - const uint16* pAlpha0_selectors = &m_alpha_selectors[alpha0_selector_index * 6 + (flip ? 3 : 0)]; - pData[0] = m_alpha_endpoints[alpha0_endpoint_index] | pAlpha0_selectors[0] << 16; - pData[1] = pAlpha0_selectors[1] | pAlpha0_selectors[2] << 16; - pData[2] = *(uint32*)&block_endpoint; - pData[3] = m_color_selectors[color_selector_index << 1 | flip]; - } - } - } - } - return true; - } - -}; - -crnd_unpack_context crnd_unpack_begin(const void* pData, uint32 data_size) { - if ((!pData) || (data_size < cCRNHeaderMinSize)) - return NULL; - - crn_unpacker* p = crnd_new(); - if (!p) - return NULL; - - if (!p->init(pData, data_size)) { - crnd_delete(p); - return NULL; - } - - return p; -} + return true; + } -bool crnd_get_data(crnd_unpack_context pContext, const void** ppData, uint32* pData_size) { - if (!pContext) - return false; + bool unpack_dxt5(uint8** pDst, uint32 row_pitch_in_bytes, uint32 output_width, uint32 output_height) + { + const uint32 num_color_endpoints = m_color_endpoints.size(); + const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); + const uint32 width = (output_width + 1) & ~1; + const uint32 height = (output_height + 1) & ~1; + const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (width << 2); + + if (m_block_buffer.size() < width) + m_block_buffer.resize(width); + + uint32 color_endpoint_index = 0; + uint32 alpha0_endpoint_index = 0; + uint8 reference_group = 0; + + for (uint32 f = 0; f < m_pHeader->m_faces; f++) + { + uint32* pData = (uint32*)pDst[f]; + for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) + { + bool visible = y < output_height; + for (uint32 x = 0; x < width; x++, pData += 4) + { + visible = visible && x < output_width; + if (!(y & 1) && !(x & 1)) + reference_group = m_codec.decode(m_reference_encoding_dm); + block_buffer_element& buffer = m_block_buffer[x]; + uint8 endpoint_reference; + if (y & 1) + { + endpoint_reference = buffer.endpoint_reference; + } + else + { + endpoint_reference = reference_group & 3; + reference_group >>= 2; + buffer.endpoint_reference = reference_group & 3; + reference_group >>= 2; + } + if (!endpoint_reference) + { + color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); + if (color_endpoint_index >= num_color_endpoints) + color_endpoint_index -= num_color_endpoints; + buffer.color_endpoint_index = color_endpoint_index; + alpha0_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); + if (alpha0_endpoint_index >= num_alpha_endpoints) + alpha0_endpoint_index -= num_alpha_endpoints; + buffer.alpha0_endpoint_index = alpha0_endpoint_index; + } + else if (endpoint_reference == 1) + { + buffer.color_endpoint_index = color_endpoint_index; + buffer.alpha0_endpoint_index = alpha0_endpoint_index; + } + else + { + color_endpoint_index = buffer.color_endpoint_index; + alpha0_endpoint_index = buffer.alpha0_endpoint_index; + } + uint32 color_selector_index = m_codec.decode(m_selector_delta_dm[0]); + uint32 alpha0_selector_index = m_codec.decode(m_selector_delta_dm[1]); + if (visible) + { + const uint16* pAlpha0_selectors = &m_alpha_selectors[alpha0_selector_index * 3]; + pData[0] = m_alpha_endpoints[alpha0_endpoint_index] | (pAlpha0_selectors[0] << 16); + pData[1] = pAlpha0_selectors[1] | (pAlpha0_selectors[2] << 16); + pData[2] = m_color_endpoints[color_endpoint_index]; + pData[3] = m_color_selectors[color_selector_index]; + } + } + } + } + return true; + } - crn_unpacker* pUnpacker = static_cast(pContext); + bool unpack_dxn(uint8** pDst, uint32 row_pitch_in_bytes, uint32 output_width, uint32 output_height) + { + const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); + const uint32 width = (output_width + 1) & ~1; + const uint32 height = (output_height + 1) & ~1; + const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (width << 2); + + if (m_block_buffer.size() < width) + m_block_buffer.resize(width); + + uint32 alpha0_endpoint_index = 0; + uint32 alpha1_endpoint_index = 0; + uint8 reference_group = 0; + + for (uint32 f = 0; f < m_pHeader->m_faces; f++) + { + uint32* pData = (uint32*)pDst[f]; + for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) + { + bool visible = y < output_height; + for (uint32 x = 0; x < width; x++, pData += 4) + { + visible = visible && x < output_width; + if (!(y & 1) && !(x & 1)) + reference_group = m_codec.decode(m_reference_encoding_dm); + block_buffer_element& buffer = m_block_buffer[x]; + uint8 endpoint_reference; + if (y & 1) + { + endpoint_reference = buffer.endpoint_reference; + } + else + { + endpoint_reference = reference_group & 3; + reference_group >>= 2; + buffer.endpoint_reference = reference_group & 3; + reference_group >>= 2; + } + if (!endpoint_reference) + { + alpha0_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); + if (alpha0_endpoint_index >= num_alpha_endpoints) + alpha0_endpoint_index -= num_alpha_endpoints; + buffer.alpha0_endpoint_index = alpha0_endpoint_index; + alpha1_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); + if (alpha1_endpoint_index >= num_alpha_endpoints) + alpha1_endpoint_index -= num_alpha_endpoints; + buffer.alpha1_endpoint_index = alpha1_endpoint_index; + } + else if (endpoint_reference == 1) + { + buffer.alpha0_endpoint_index = alpha0_endpoint_index; + buffer.alpha1_endpoint_index = alpha1_endpoint_index; + } + else + { + alpha0_endpoint_index = buffer.alpha0_endpoint_index; + alpha1_endpoint_index = buffer.alpha1_endpoint_index; + } + uint32 alpha0_selector_index = m_codec.decode(m_selector_delta_dm[1]); + uint32 alpha1_selector_index = m_codec.decode(m_selector_delta_dm[1]); + if (visible) + { + const uint16* pAlpha0_selectors = &m_alpha_selectors[alpha0_selector_index * 3]; + const uint16* pAlpha1_selectors = &m_alpha_selectors[alpha1_selector_index * 3]; + pData[0] = m_alpha_endpoints[alpha0_endpoint_index] | (pAlpha0_selectors[0] << 16); + pData[1] = pAlpha0_selectors[1] | (pAlpha0_selectors[2] << 16); + pData[2] = m_alpha_endpoints[alpha1_endpoint_index] | (pAlpha1_selectors[0] << 16); + pData[3] = pAlpha1_selectors[1] | (pAlpha1_selectors[2] << 16); + } + } + } + } + return true; + } - if (!pUnpacker->is_valid()) - return false; + bool unpack_dxt5a(uint8** pDst, uint32 row_pitch_in_bytes, uint32 output_width, uint32 output_height) + { + const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); + const uint32 width = (output_width + 1) & ~1; + const uint32 height = (output_height + 1) & ~1; + const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (width << 1); + + if (m_block_buffer.size() < width) + m_block_buffer.resize(width); + + uint32 alpha0_endpoint_index = 0; + uint8 reference_group = 0; + + for (uint32 f = 0; f < m_pHeader->m_faces; f++) + { + uint32* pData = (uint32*)pDst[f]; + for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) + { + bool visible = y < output_height; + for (uint32 x = 0; x < width; x++, pData += 2) + { + visible = visible && x < output_width; + if (!(y & 1) && !(x & 1)) + reference_group = m_codec.decode(m_reference_encoding_dm); + block_buffer_element& buffer = m_block_buffer[x]; + uint8 endpoint_reference; + if (y & 1) + { + endpoint_reference = buffer.endpoint_reference; + } + else + { + endpoint_reference = reference_group & 3; + reference_group >>= 2; + buffer.endpoint_reference = reference_group & 3; + reference_group >>= 2; + } + if (!endpoint_reference) + { + alpha0_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); + if (alpha0_endpoint_index >= num_alpha_endpoints) + alpha0_endpoint_index -= num_alpha_endpoints; + buffer.alpha0_endpoint_index = alpha0_endpoint_index; + } + else if (endpoint_reference == 1) + { + buffer.alpha0_endpoint_index = alpha0_endpoint_index; + } + else + { + alpha0_endpoint_index = buffer.alpha0_endpoint_index; + } + uint32 alpha0_selector_index = m_codec.decode(m_selector_delta_dm[1]); + if (visible) + { + const uint16* pAlpha0_selectors = &m_alpha_selectors[alpha0_selector_index * 3]; + pData[0] = m_alpha_endpoints[alpha0_endpoint_index] | (pAlpha0_selectors[0] << 16); + pData[1] = pAlpha0_selectors[1] | (pAlpha0_selectors[2] << 16); + } + } + } + } + return true; + } - if (ppData) - *ppData = pUnpacker->get_data(); + bool unpack_etc1(uint8** pDst, uint32 output_pitch_in_bytes, uint32 output_width, uint32 output_height) + { + const uint32 num_color_endpoints = m_color_endpoints.size(); + const uint32 width = (output_width + 1) & ~1; + const uint32 height = (output_height + 1) & ~1; + const int32 delta_pitch_in_dwords = (output_pitch_in_bytes >> 2) - (width << 1); + + if (m_block_buffer.size() < width << 1) + m_block_buffer.resize(width << 1); + + uint32 color_endpoint_index = 0, diagonal_color_endpoint_index = 0; + uint8 reference_group = 0; + + for (uint32 f = 0; f < m_pHeader->m_faces; f++) + { + uint32* pData = (uint32*)pDst[f]; + for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) + { + bool visible = y < output_height; + for (uint32 x = 0; x < width; x++, pData += 2) + { + visible = visible && x < output_width; + block_buffer_element& buffer = m_block_buffer[x << 1]; + uint8 endpoint_reference, block_endpoint[4], e0[4], e1[4]; + if (y & 1) + { + endpoint_reference = buffer.endpoint_reference; + } + else + { + reference_group = m_codec.decode(m_reference_encoding_dm); + endpoint_reference = (reference_group & 3) | (reference_group >> 2 & 12); + buffer.endpoint_reference = (reference_group >> 2 & 3) | (reference_group >> 4 & 12); + } + if (!(endpoint_reference & 3)) + { + color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); + if (color_endpoint_index >= num_color_endpoints) + color_endpoint_index -= num_color_endpoints; + buffer.color_endpoint_index = color_endpoint_index; + } + else if ((endpoint_reference & 3) == 1) + { + buffer.color_endpoint_index = color_endpoint_index; + } + else if ((endpoint_reference & 3) == 3) + { + buffer.color_endpoint_index = color_endpoint_index = diagonal_color_endpoint_index; + } + else + { + color_endpoint_index = buffer.color_endpoint_index; + } + endpoint_reference >>= 2; + *(uint32*)&e0 = m_color_endpoints[color_endpoint_index]; + uint32 selector_index = m_codec.decode(m_selector_delta_dm[0]); + if (endpoint_reference) + { + color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); + if (color_endpoint_index >= num_color_endpoints) + color_endpoint_index -= num_color_endpoints; + } + diagonal_color_endpoint_index = m_block_buffer[x << 1 | 1].color_endpoint_index; + m_block_buffer[x << 1 | 1].color_endpoint_index = color_endpoint_index; + *(uint32*)&e1 = m_color_endpoints[color_endpoint_index]; + if (visible) + { + uint32 flip = endpoint_reference >> 1 ^ 1, diff = 1; + for (uint c = 0; diff && c < 3; c++) + diff = e0[c] + 3 >= e1[c] && e1[c] + 4 >= e0[c] ? diff : 0; + for (uint c = 0; c < 3; c++) + block_endpoint[c] = diff ? e0[c] << 3 | ((e1[c] - e0[c]) & 7) : (e0[c] << 3 & 0xF0) | e1[c] >> 1; + block_endpoint[3] = e0[3] << 5 | e1[3] << 2 | diff << 1 | flip; + pData[0] = *(uint32*)&block_endpoint; + pData[1] = m_color_selectors[selector_index << 1 | flip]; + } + } + } + } + return true; + } - if (pData_size) - *pData_size = pUnpacker->get_data_size(); + bool unpack_etc2a(uint8** pDst, uint32 output_pitch_in_bytes, uint32 output_width, uint32 output_height) + { + const uint32 num_color_endpoints = m_color_endpoints.size(); + const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); + const uint32 width = (output_width + 1) & ~1; + const uint32 height = (output_height + 1) & ~1; + const int32 delta_pitch_in_dwords = (output_pitch_in_bytes >> 2) - (width << 2); + + if (m_block_buffer.size() < width << 1) + m_block_buffer.resize(width << 1); + + uint32 color_endpoint_index = 0, diagonal_color_endpoint_index = 0, alpha0_endpoint_index = 0, diagonal_alpha0_endpoint_index = 0; + uint8 reference_group = 0; + + for (uint32 f = 0; f < m_pHeader->m_faces; f++) + { + uint32* pData = (uint32*)pDst[f]; + for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) + { + bool visible = y < output_height; + for (uint32 x = 0; x < width; x++, pData += 4) + { + visible = visible && x < output_width; + block_buffer_element& buffer = m_block_buffer[x << 1]; + uint8 endpoint_reference, block_endpoint[4], e0[4], e1[4]; + if (y & 1) + { + endpoint_reference = buffer.endpoint_reference; + } + else + { + reference_group = m_codec.decode(m_reference_encoding_dm); + endpoint_reference = (reference_group & 3) | (reference_group >> 2 & 12); + buffer.endpoint_reference = (reference_group >> 2 & 3) | (reference_group >> 4 & 12); + } + if (!(endpoint_reference & 3)) + { + color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); + if (color_endpoint_index >= num_color_endpoints) + color_endpoint_index -= num_color_endpoints; + alpha0_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); + if (alpha0_endpoint_index >= num_alpha_endpoints) + alpha0_endpoint_index -= num_alpha_endpoints; + buffer.color_endpoint_index = color_endpoint_index; + buffer.alpha0_endpoint_index = alpha0_endpoint_index; + } + else if ((endpoint_reference & 3) == 1) + { + buffer.color_endpoint_index = color_endpoint_index; + buffer.alpha0_endpoint_index = alpha0_endpoint_index; + } + else if ((endpoint_reference & 3) == 3) + { + buffer.color_endpoint_index = color_endpoint_index = diagonal_color_endpoint_index; + buffer.alpha0_endpoint_index = alpha0_endpoint_index = diagonal_alpha0_endpoint_index; + } + else + { + color_endpoint_index = buffer.color_endpoint_index; + alpha0_endpoint_index = buffer.alpha0_endpoint_index; + } + endpoint_reference >>= 2; + *(uint32*)&e0 = m_color_endpoints[color_endpoint_index]; + uint32 color_selector_index = m_codec.decode(m_selector_delta_dm[0]); + uint32 alpha0_selector_index = m_codec.decode(m_selector_delta_dm[1]); + if (endpoint_reference) + { + color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); + if (color_endpoint_index >= num_color_endpoints) + color_endpoint_index -= num_color_endpoints; + } + *(uint32*)&e1 = m_color_endpoints[color_endpoint_index]; + diagonal_color_endpoint_index = m_block_buffer[x << 1 | 1].color_endpoint_index; + diagonal_alpha0_endpoint_index = m_block_buffer[x << 1 | 1].alpha0_endpoint_index; + m_block_buffer[x << 1 | 1].color_endpoint_index = color_endpoint_index; + m_block_buffer[x << 1 | 1].alpha0_endpoint_index = alpha0_endpoint_index; + if (visible) + { + uint32 flip = endpoint_reference >> 1 ^ 1, diff = 1; + for (uint c = 0; diff && c < 3; c++) + diff = e0[c] + 3 >= e1[c] && e1[c] + 4 >= e0[c] ? diff : 0; + for (uint c = 0; c < 3; c++) + block_endpoint[c] = diff ? e0[c] << 3 | ((e1[c] - e0[c]) & 7) : (e0[c] << 3 & 0xF0) | e1[c] >> 1; + block_endpoint[3] = e0[3] << 5 | e1[3] << 2 | diff << 1 | flip; + const uint16* pAlpha0_selectors = &m_alpha_selectors[alpha0_selector_index * 6 + (flip ? 3 : 0)]; + pData[0] = m_alpha_endpoints[alpha0_endpoint_index] | pAlpha0_selectors[0] << 16; + pData[1] = pAlpha0_selectors[1] | pAlpha0_selectors[2] << 16; + pData[2] = *(uint32*)&block_endpoint; + pData[3] = m_color_selectors[color_selector_index << 1 | flip]; + } + } + } + } + return true; + } + }; - return true; -} + crnd_unpack_context crnd_unpack_begin(const void* pData, uint32 data_size) + { + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return NULL; -bool crnd_unpack_level( - crnd_unpack_context pContext, - void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, - uint32 level_index) { - if ((!pContext) || (!pDst) || (dst_size_in_bytes < 8U) || (level_index >= cCRNMaxLevels)) - return false; + crn_unpacker* p = crnd_new(); + if (!p) + return NULL; - crn_unpacker* pUnpacker = static_cast(pContext); + if (!p->init(pData, data_size)) + { + crnd_delete(p); + return NULL; + } - if (!pUnpacker->is_valid()) - return false; + return p; + } - return pUnpacker->unpack_level(pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); -} + bool crnd_get_data(crnd_unpack_context pContext, const void** ppData, uint32* pData_size) + { + if (!pContext) + return false; -bool crnd_unpack_level_segmented( - crnd_unpack_context pContext, - const void* pSrc, uint32 src_size_in_bytes, - void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, - uint32 level_index) { - if ((!pContext) || (!pSrc) || (!pDst) || (dst_size_in_bytes < 8U) || (level_index >= cCRNMaxLevels)) - return false; + crn_unpacker* pUnpacker = static_cast(pContext); - crn_unpacker* pUnpacker = static_cast(pContext); + if (!pUnpacker->is_valid()) + return false; - if (!pUnpacker->is_valid()) - return false; + if (ppData) + *ppData = pUnpacker->get_data(); - return pUnpacker->unpack_level(pSrc, src_size_in_bytes, pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); -} + if (pData_size) + *pData_size = pUnpacker->get_data_size(); -bool crnd_unpack_end(crnd_unpack_context pContext) { - if (!pContext) - return false; + return true; + } - crn_unpacker* pUnpacker = static_cast(pContext); + bool crnd_unpack_level( + crnd_unpack_context pContext, + void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index) + { + if ((!pContext) || (!pDst) || (dst_size_in_bytes < 8U) || (level_index >= cCRNMaxLevels)) + return false; - if (!pUnpacker->is_valid()) - return false; + crn_unpacker* pUnpacker = static_cast(pContext); - crnd_delete(pUnpacker); + if (!pUnpacker->is_valid()) + return false; - return true; -} + return pUnpacker->unpack_level(pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); + } -} // namespace crnd + bool crnd_unpack_level_segmented( + crnd_unpack_context pContext, + const void* pSrc, uint32 src_size_in_bytes, + void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index) + { + if ((!pContext) || (!pSrc) || (!pDst) || (dst_size_in_bytes < 8U) || (level_index >= cCRNMaxLevels)) + return false; -#endif // CRND_INCLUDE_CRND_H + crn_unpacker* pUnpacker = static_cast(pContext); -//------------------------------------------------------------------------------ -// -// crn_decomp.h uses the ZLIB license: -// http://opensource.org/licenses/Zlib -// -// Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC -// Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet -// -// This software is provided 'as-is', without any express or implied -// warranty. In no event will the authors be held liable for any damages -// arising from the use of this software. -// -// Permission is granted to anyone to use this software for any purpose, -// including commercial applications, and to alter it and redistribute it -// freely, subject to the following restrictions: -// -// 1. The origin of this software must not be misrepresented; you must not -// claim that you wrote the original software. If you use this software -// in a product, an acknowledgment in the product documentation would be -// appreciated but is not required. -// -// 2. Altered source versions must be plainly marked as such, and must not be -// misrepresented as being the original software. -// -// 3. This notice may not be removed or altered from any source distribution. -// -//------------------------------------------------------------------------------ + if (!pUnpacker->is_valid()) + return false; + + return pUnpacker->unpack_level(pSrc, src_size_in_bytes, pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); + } + + bool crnd_unpack_end(crnd_unpack_context pContext) + { + if (!pContext) + return false; + + crn_unpacker* pUnpacker = static_cast(pContext); + + if (!pUnpacker->is_valid()) + return false; + + crnd_delete(pUnpacker); + + return true; + } +} // namespace crnd + +#endif // CRND_INCLUDE_CRND_H diff --git a/inc/crn_defs.h b/inc/crn_defs.h index 9c39753..1dfda27 100644 --- a/inc/crn_defs.h +++ b/inc/crn_defs.h @@ -1,3 +1,26 @@ +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + #ifndef CRND_INCLUDE_CRN_DEFS_H #define CRND_INCLUDE_CRN_DEFS_H @@ -15,278 +38,309 @@ #endif // CRN decompression API -namespace crnd { -typedef unsigned char uint8; -typedef signed char int8; -typedef unsigned short uint16; -typedef signed short int16; -typedef unsigned int uint32; -typedef uint32 uint32; -typedef unsigned int uint; -typedef signed int int32; +namespace crnd +{ + typedef unsigned char uint8; + typedef signed char int8; + typedef unsigned short uint16; + typedef signed short int16; + typedef unsigned int uint32; + typedef uint32 uint32; + typedef unsigned int uint; + typedef signed int int32; #ifdef __GNUC__ -typedef unsigned long long uint64; -typedef long long int64; + typedef unsigned long long uint64; + typedef long long int64; #else -typedef unsigned __int64 uint64; -typedef signed __int64 int64; + typedef unsigned __int64 uint64; + typedef signed __int64 int64; #endif -// The crnd library assumes all allocation blocks have at least CRND_MIN_ALLOC_ALIGNMENT alignment. -const uint32 CRND_MIN_ALLOC_ALIGNMENT = sizeof(uint32) * 2U; - -// realloc callback: -// Used to allocate, resize, or free memory blocks. -// If p is NULL, the realloc function attempts to allocate a block of at least size bytes. Returns NULL on out of memory. -// *pActual_size must be set to the actual size of the allocated block, which must be greater than or equal to the requested size. -// If p is not NULL, and size is 0, the realloc function frees the specified block, and always returns NULL. *pActual_size should be set to 0. -// If p is not NULL, and size is non-zero, the realloc function attempts to resize the specified block: -// If movable is false, the realloc function attempts to shrink or expand the block in-place. NULL is returned if the block cannot be resized in place, or if the -// underlying heap implementation doesn't support in-place resizing. Otherwise, the pointer to the original block is returned. -// If movable is true, it is permissible to move the block's contents if it cannot be resized in place. NULL is returned if the block cannot be resized in place, and there -// is not enough memory to relocate the block. -// In all cases, *pActual_size must be set to the actual size of the allocated block, whether it was successfully resized or not. -typedef void* (*crnd_realloc_func)(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data); - -// msize callback: Returns the size of the memory block in bytes, or 0 if the pointer or block is invalid. -typedef size_t (*crnd_msize_func)(void* p, void* pUser_data); - -// crnd_set_memory_callbacks() - Use to override the crnd library's memory allocation functions. -// If any input parameters are NULL, the memory callback functions are reset to the default functions. -// The default functions call malloc(), free(), _msize(), _expand(), etc. -CRN_EXPORT void crnd_set_memory_callbacks(crnd_realloc_func pRealloc, crnd_msize_func pMSize, void* pUser_data); - -struct crn_file_info { - inline crn_file_info() - : m_struct_size(sizeof(crn_file_info)) {} - - uint32 m_struct_size; - uint32 m_actual_data_size; - uint32 m_header_size; - uint32 m_total_palette_size; - uint32 m_tables_size; - uint32 m_levels; - uint32 m_level_compressed_size[cCRNMaxLevels]; - uint32 m_color_endpoint_palette_entries; - uint32 m_color_selector_palette_entries; - uint32 m_alpha_endpoint_palette_entries; - uint32 m_alpha_selector_palette_entries; -}; - -struct crn_texture_info { - inline crn_texture_info() - : m_struct_size(sizeof(crn_texture_info)) {} - - uint32 m_struct_size; - uint32 m_width; - uint32 m_height; - uint32 m_levels; - uint32 m_faces; - uint32 m_bytes_per_block; - uint32 m_userdata0; - uint32 m_userdata1; - crn_format m_format; -}; - -struct crn_level_info { - inline crn_level_info() - : m_struct_size(sizeof(crn_level_info)) {} - - uint32 m_struct_size; - uint32 m_width; - uint32 m_height; - uint32 m_faces; - uint32 m_blocks_x; - uint32 m_blocks_y; - uint32 m_bytes_per_block; - crn_format m_format; -}; - -// Returns the FOURCC format code corresponding to the specified CRN format. -CRN_EXPORT uint32 crnd_crn_format_to_fourcc(crn_format fmt); - -// Returns the fundamental GPU format given a potentially swizzled DXT5 crn_format. -CRN_EXPORT crn_format crnd_get_fundamental_dxt_format(crn_format fmt); - -// Returns the size of the crn_format in bits/texel (either 4 or 8). -CRN_EXPORT uint32 crnd_get_crn_format_bits_per_texel(crn_format fmt); - -// Returns the number of bytes per DXTn block (8 or 16). -CRN_EXPORT uint32 crnd_get_bytes_per_dxt_block(crn_format fmt); - -// Validates the entire file by checking the header and data CRC's. -// This is not something you want to be doing much! -// The crn_file_info.m_struct_size field must be set before calling this function. -CRN_EXPORT bool crnd_validate_file(const void* pData, uint32 data_size, crn_file_info* pFile_info); - -// Retrieves texture information from the CRN file. -// The crn_texture_info.m_struct_size field must be set before calling this function. -CRN_EXPORT bool crnd_get_texture_info(const void* pData, uint32 data_size, crn_texture_info* pTexture_info); - -// Retrieves mipmap level specific information from the CRN file. -// The crn_level_info.m_struct_size field must be set before calling this function. -CRN_EXPORT bool crnd_get_level_info(const void* pData, uint32 data_size, uint32 level_index, crn_level_info* pLevel_info); - -// Transcode/unpack context handle. -typedef void* crnd_unpack_context; - -// crnd_unpack_begin() - Decompresses the texture's decoder tables and endpoint/selector palettes. -// Once you call this function, you may call crnd_unpack_level() to unpack one or more mip levels. -// Don't call this once per mip level (unless you absolutely must)! -// This function allocates enough memory to hold: Huffman decompression tables, and the endpoint/selector palettes (color and/or alpha). -// Worst case allocation is approx. 200k, assuming all palettes contain 8192 entries. -// pData must point to a buffer holding all of the compressed .CRN file data. -// This buffer must be stable until crnd_unpack_end() is called. -// Returns NULL if out of memory, or if any of the input parameters are invalid. -CRN_EXPORT crnd_unpack_context crnd_unpack_begin(const void* pData, uint32 data_size); - -// Returns a pointer to the compressed .CRN data associated with a crnd_unpack_context. -// Returns false if any of the input parameters are invalid. -CRN_EXPORT bool crnd_get_data(crnd_unpack_context pContext, const void** ppData, uint32* pData_size); - -// crnd_unpack_level() - Transcodes the specified mipmap level to a destination buffer in cached or write combined memory. -// pContext - Context created by a call to crnd_unpack_begin(). -// ppDst - A pointer to an array of 1 or 6 destination buffer pointers. Cubemaps require an array of 6 pointers, 2D textures require an array of 1 pointer. -// dst_size_in_bytes - Optional size of each destination buffer. Only used for debugging - OK to set to UINT32_MAX. -// row_pitch_in_bytes - The pitch in bytes from one row of DXT blocks to the next. Must be a multiple of 4. -// level_index - mipmap level index, where 0 is the largest/first level. -// Returns false if any of the input parameters, or the compressed stream, are invalid. -// This function does not allocate any memory. -CRN_EXPORT bool crnd_unpack_level( - crnd_unpack_context pContext, - void** ppDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, - uint32 level_index); - -// crnd_unpack_level_segmented() - Unpacks the specified mipmap level from a "segmented" CRN file. -// See the crnd_create_segmented_file() API below. -// Segmented files allow the user to control where the compressed mipmap data is stored. -CRN_EXPORT bool crnd_unpack_level_segmented( - crnd_unpack_context pContext, - const void* pSrc, uint32 src_size_in_bytes, - void** ppDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, - uint32 level_index); - -// crnd_unpack_end() - Frees the decompress tables and unpacked palettes associated with the specified unpack context. -// Returns false if the context is NULL, or if it points to an invalid context. -// This function frees all memory associated with the context. -CRN_EXPORT bool crnd_unpack_end(crnd_unpack_context pContext); - -// The following API's allow the user to create "segmented" CRN files. A segmented file contains multiple pieces: -// - Base data: Header + compression tables -// - Level data: Individual mipmap levels -// This allows mipmap levels from multiple CRN files to be tightly packed together into single files. - -// Returns a pointer to the level's compressed data, and optionally returns the level's compressed data size if pSize is not NULL. -CRN_EXPORT const void* crnd_get_level_data(const void* pData, uint32 data_size, uint32 level_index, uint32* pSize); - -// Returns the compressed size of the texture's header and compression tables (but no levels). -CRN_EXPORT uint32 crnd_get_segmented_file_size(const void* pData, uint32 data_size); - -// Creates a "segmented" CRN texture from a normal CRN texture. The new texture will be created at pBase_data, and will be crnd_get_base_data_size() bytes long. -// base_data_size must be >= crnd_get_base_data_size(). -// The base data will contain the CRN header and compression tables, but no mipmap data. -CRN_EXPORT bool crnd_create_segmented_file(const void* pData, uint32 data_size, void* pBase_data, uint base_data_size); - -} // namespace crnd + // The crnd library assumes all allocation blocks have at least CRND_MIN_ALLOC_ALIGNMENT alignment. + const uint32 CRND_MIN_ALLOC_ALIGNMENT = sizeof(uint32) * 2U; + + // realloc callback: + // Used to allocate, resize, or free memory blocks. + // If p is NULL, the realloc function attempts to allocate a block of at least size bytes. Returns NULL on out of memory. + // *pActual_size must be set to the actual size of the allocated block, which must be greater than or equal to the requested size. + // If p is not NULL, and size is 0, the realloc function frees the specified block, and always returns NULL. *pActual_size should be set to 0. + // If p is not NULL, and size is non-zero, the realloc function attempts to resize the specified block: + // If movable is false, the realloc function attempts to shrink or expand the block in-place. NULL is returned if the block cannot be resized in place, or if the + // underlying heap implementation doesn't support in-place resizing. Otherwise, the pointer to the original block is returned. + // If movable is true, it is permissible to move the block's contents if it cannot be resized in place. NULL is returned if the block cannot be resized in place, and there + // is not enough memory to relocate the block. + // In all cases, *pActual_size must be set to the actual size of the allocated block, whether it was successfully resized or not. + typedef void* (*crnd_realloc_func)(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data); + + // msize callback: Returns the size of the memory block in bytes, or 0 if the pointer or block is invalid. + typedef size_t (*crnd_msize_func)(void* p, void* pUser_data); + + // crnd_set_memory_callbacks() - Use to override the crnd library's memory allocation functions. + // If any input parameters are NULL, the memory callback functions are reset to the default functions. + // The default functions call malloc(), free(), _msize(), _expand(), etc. + CRN_EXPORT void crnd_set_memory_callbacks(crnd_realloc_func pRealloc, crnd_msize_func pMSize, void* pUser_data); + + struct crn_file_info + { + inline crn_file_info() : + m_struct_size(sizeof(crn_file_info)) + { + } + + uint32 m_struct_size; + uint32 m_actual_data_size; + uint32 m_header_size; + uint32 m_total_palette_size; + uint32 m_tables_size; + uint32 m_levels; + uint32 m_level_compressed_size[cCRNMaxLevels]; + uint32 m_color_endpoint_palette_entries; + uint32 m_color_selector_palette_entries; + uint32 m_alpha_endpoint_palette_entries; + uint32 m_alpha_selector_palette_entries; + }; + + struct crn_texture_info + { + inline crn_texture_info() : + m_struct_size(sizeof(crn_texture_info)) + { + } + + uint32 m_struct_size; + uint32 m_width; + uint32 m_height; + uint32 m_levels; + uint32 m_faces; + uint32 m_bytes_per_block; + uint32 m_userdata0; + uint32 m_userdata1; + crn_format m_format; + }; + + struct crn_level_info + { + inline crn_level_info() : + m_struct_size(sizeof(crn_level_info)) + { + } + + uint32 m_struct_size; + uint32 m_width; + uint32 m_height; + uint32 m_faces; + uint32 m_blocks_x; + uint32 m_blocks_y; + uint32 m_bytes_per_block; + crn_format m_format; + }; + + // Returns the FOURCC format code corresponding to the specified CRN format. + CRN_EXPORT uint32 crnd_crn_format_to_fourcc(crn_format fmt); + + // Returns the fundamental GPU format given a potentially swizzled DXT5 crn_format. + CRN_EXPORT crn_format crnd_get_fundamental_dxt_format(crn_format fmt); + + // Returns the size of the crn_format in bits/texel (either 4 or 8). + CRN_EXPORT uint32 crnd_get_crn_format_bits_per_texel(crn_format fmt); + + // Returns the number of bytes per DXTn block (8 or 16). + CRN_EXPORT uint32 crnd_get_bytes_per_dxt_block(crn_format fmt); + + // Validates the entire file by checking the header and data CRC's. + // This is not something you want to be doing much! + // The crn_file_info.m_struct_size field must be set before calling this function. + CRN_EXPORT bool crnd_validate_file(const void* pData, uint32 data_size, crn_file_info* pFile_info); + + // Retrieves texture information from the CRN file. + // The crn_texture_info.m_struct_size field must be set before calling this function. + CRN_EXPORT bool crnd_get_texture_info(const void* pData, uint32 data_size, crn_texture_info* pTexture_info); + + // Retrieves mipmap level specific information from the CRN file. + // The crn_level_info.m_struct_size field must be set before calling this function. + CRN_EXPORT bool crnd_get_level_info(const void* pData, uint32 data_size, uint32 level_index, crn_level_info* pLevel_info); + + // Transcode/unpack context handle. + typedef void* crnd_unpack_context; + + // crnd_unpack_begin() - Decompresses the texture's decoder tables and endpoint/selector palettes. + // Once you call this function, you may call crnd_unpack_level() to unpack one or more mip levels. + // Don't call this once per mip level (unless you absolutely must)! + // This function allocates enough memory to hold: Huffman decompression tables, and the endpoint/selector palettes (color and/or alpha). + // Worst case allocation is approx. 200k, assuming all palettes contain 8192 entries. + // pData must point to a buffer holding all of the compressed .CRN file data. + // This buffer must be stable until crnd_unpack_end() is called. + // Returns NULL if out of memory, or if any of the input parameters are invalid. + CRN_EXPORT crnd_unpack_context crnd_unpack_begin(const void* pData, uint32 data_size); + + // Returns a pointer to the compressed .CRN data associated with a crnd_unpack_context. + // Returns false if any of the input parameters are invalid. + CRN_EXPORT bool crnd_get_data(crnd_unpack_context pContext, const void** ppData, uint32* pData_size); + + // crnd_unpack_level() - Transcodes the specified mipmap level to a destination buffer in cached or write combined memory. + // pContext - Context created by a call to crnd_unpack_begin(). + // ppDst - A pointer to an array of 1 or 6 destination buffer pointers. Cubemaps require an array of 6 pointers, 2D textures require an array of 1 pointer. + // dst_size_in_bytes - Optional size of each destination buffer. Only used for debugging - OK to set to UINT32_MAX. + // row_pitch_in_bytes - The pitch in bytes from one row of DXT blocks to the next. Must be a multiple of 4. + // level_index - mipmap level index, where 0 is the largest/first level. + // Returns false if any of the input parameters, or the compressed stream, are invalid. + // This function does not allocate any memory. + CRN_EXPORT bool crnd_unpack_level( + crnd_unpack_context pContext, + void** ppDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index); + + // crnd_unpack_level_segmented() - Unpacks the specified mipmap level from a "segmented" CRN file. + // See the crnd_create_segmented_file() API below. + // Segmented files allow the user to control where the compressed mipmap data is stored. + CRN_EXPORT bool crnd_unpack_level_segmented( + crnd_unpack_context pContext, + const void* pSrc, uint32 src_size_in_bytes, + void** ppDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index); + + // crnd_unpack_end() - Frees the decompress tables and unpacked palettes associated with the specified unpack context. + // Returns false if the context is NULL, or if it points to an invalid context. + // This function frees all memory associated with the context. + CRN_EXPORT bool crnd_unpack_end(crnd_unpack_context pContext); + + // The following API's allow the user to create "segmented" CRN files. A segmented file contains multiple pieces: + // - Base data: Header + compression tables + // - Level data: Individual mipmap levels + // This allows mipmap levels from multiple CRN files to be tightly packed together into single files. + + // Returns a pointer to the level's compressed data, and optionally returns the level's compressed data size if pSize is not NULL. + CRN_EXPORT const void* crnd_get_level_data(const void* pData, uint32 data_size, uint32 level_index, uint32* pSize); + + // Returns the compressed size of the texture's header and compression tables (but no levels). + CRN_EXPORT uint32 crnd_get_segmented_file_size(const void* pData, uint32 data_size); + + // Creates a "segmented" CRN texture from a normal CRN texture. The new texture will be created at pBase_data, and will be crnd_get_base_data_size() bytes long. + // base_data_size must be >= crnd_get_base_data_size(). + // The base data will contain the CRN header and compression tables, but no mipmap data. + CRN_EXPORT bool crnd_create_segmented_file(const void* pData, uint32 data_size, void* pBase_data, uint base_data_size); +} // namespace crnd // Low-level CRN file header cracking. -namespace crnd { -template -struct crn_packed_uint { - inline crn_packed_uint() {} - - inline crn_packed_uint(unsigned int val) { *this = val; } - - inline crn_packed_uint(const crn_packed_uint& other) { *this = other; } - - inline crn_packed_uint& operator=(const crn_packed_uint& rhs) { - if (this != &rhs) - memcpy(m_buf, rhs.m_buf, sizeof(m_buf)); - return *this; - } - - inline crn_packed_uint& operator=(unsigned int val) { - //CRND_ASSERT((N == 4U) || (val < (1U << (N * 8U)))); - - val <<= (8U * (4U - N)); - - for (unsigned int i = 0; i < N; i++) { - m_buf[i] = static_cast(val >> 24U); - val <<= 8U; - } - - return *this; - } - - inline operator unsigned int() const { - switch (N) { - case 1: - return m_buf[0]; - case 2: - return (m_buf[0] << 8U) | m_buf[1]; - case 3: - return (m_buf[0] << 16U) | (m_buf[1] << 8U) | (m_buf[2]); - default: - return (m_buf[0] << 24U) | (m_buf[1] << 16U) | (m_buf[2] << 8U) | (m_buf[3]); - } - } - - unsigned char m_buf[N]; -}; +namespace crnd +{ + template + struct crn_packed_uint + { + inline crn_packed_uint() + { + } + + inline crn_packed_uint(unsigned int val) + { + *this = val; + } + + inline crn_packed_uint(const crn_packed_uint& other) + { + *this = other; + } + + inline crn_packed_uint& operator=(const crn_packed_uint& rhs) + { + if (this != &rhs) + { + memcpy(m_buf, rhs.m_buf, sizeof(m_buf)); + } + return *this; + } + + inline crn_packed_uint& operator=(unsigned int val) + { + //CRND_ASSERT((N == 4U) || (val < (1U << (N * 8U)))); + + val <<= (8U * (4U - N)); + + for (unsigned int i = 0; i < N; i++) + { + m_buf[i] = static_cast(val >> 24U); + val <<= 8U; + } + + return *this; + } + + inline operator unsigned int() const + { + switch (N) + { + case 1: + return m_buf[0]; + case 2: + return (m_buf[0] << 8U) | m_buf[1]; + case 3: + return (m_buf[0] << 16U) | (m_buf[1] << 8U) | (m_buf[2]); + default: + return (m_buf[0] << 24U) | (m_buf[1] << 16U) | (m_buf[2] << 8U) | (m_buf[3]); + } + } + + unsigned char m_buf[N]; + }; #pragma pack(push) #pragma pack(1) -struct crn_palette { - crn_packed_uint<3> m_ofs; - crn_packed_uint<3> m_size; - crn_packed_uint<2> m_num; -}; + struct crn_palette + { + crn_packed_uint<3> m_ofs; + crn_packed_uint<3> m_size; + crn_packed_uint<2> m_num; + }; -enum crn_header_flags { - // If set, the compressed mipmap level data is not located after the file's base data - it will be separately managed by the user instead. - cCRNHeaderFlagSegmented = 1 -}; + enum crn_header_flags + { + // If set, the compressed mipmap level data is not located after the file's base data - it will be separately managed by the user instead. + cCRNHeaderFlagSegmented = 1 + }; -struct crn_header { - enum { cCRNSigValue = ('H' << 8) | 'x' }; + struct crn_header + { + enum + { + cCRNSigValue = ('H' << 8) | 'x' + }; - crn_packed_uint<2> m_sig; - crn_packed_uint<2> m_header_size; - crn_packed_uint<2> m_header_crc16; + crn_packed_uint<2> m_sig; + crn_packed_uint<2> m_header_size; + crn_packed_uint<2> m_header_crc16; - crn_packed_uint<4> m_data_size; - crn_packed_uint<2> m_data_crc16; + crn_packed_uint<4> m_data_size; + crn_packed_uint<2> m_data_crc16; - crn_packed_uint<2> m_width; - crn_packed_uint<2> m_height; + crn_packed_uint<2> m_width; + crn_packed_uint<2> m_height; - crn_packed_uint<1> m_levels; - crn_packed_uint<1> m_faces; + crn_packed_uint<1> m_levels; + crn_packed_uint<1> m_faces; - crn_packed_uint<1> m_format; - crn_packed_uint<2> m_flags; + crn_packed_uint<1> m_format; + crn_packed_uint<2> m_flags; - crn_packed_uint<4> m_reserved; - crn_packed_uint<4> m_userdata0; - crn_packed_uint<4> m_userdata1; + crn_packed_uint<4> m_reserved; + crn_packed_uint<4> m_userdata0; + crn_packed_uint<4> m_userdata1; - crn_palette m_color_endpoints; - crn_palette m_color_selectors; + crn_palette m_color_endpoints; + crn_palette m_color_selectors; - crn_palette m_alpha_endpoints; - crn_palette m_alpha_selectors; + crn_palette m_alpha_endpoints; + crn_palette m_alpha_selectors; - crn_packed_uint<2> m_tables_size; - crn_packed_uint<3> m_tables_ofs; + crn_packed_uint<2> m_tables_size; + crn_packed_uint<3> m_tables_ofs; - // m_level_ofs[] is actually an array of offsets: m_level_ofs[m_levels] - crn_packed_uint<4> m_level_ofs[1]; -}; + // m_level_ofs[] is actually an array of offsets: m_level_ofs[m_levels] + crn_packed_uint<4> m_level_ofs[1]; + }; -const unsigned int cCRNHeaderMinSize = 62U; + const unsigned int cCRNHeaderMinSize = 62U; #pragma pack(pop) +} // namespace crnd -} // namespace crnd - -#endif // CRND_INCLUDE_CRN_DEFS_H +#endif // CRND_INCLUDE_CRN_DEFS_H diff --git a/inc/crnlib.h b/inc/crnlib.h index d79e942..f4483d2 100644 --- a/inc/crnlib.h +++ b/inc/crnlib.h @@ -1,8 +1,26 @@ -// File: crnlib.h - Advanced DXTn texture compression library. -// Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC -// Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet -// See copyright notice and license at the end of this file. -// +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + // This header file contains the public crnlib declarations for DXTn, // clustered DXTn, and CRN compression/decompression. // @@ -10,15 +28,18 @@ // all you want to do is transcode .CRN files to raw DXTn bits at run-time. // The crn_decomp.h header file library contains all the code necessary for // decompression. -// + // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing + #ifndef CRNLIB_H #define CRNLIB_H +#include + #include "crn_export.h" #ifdef _MSC_VER -#pragma warning(disable : 4127) // conditional expression is constant +#pragma warning(disable : 4127) // conditional expression is constant #endif #define CRNLIB_SUPPORT_ATI_COMPRESS 0 @@ -33,7 +54,8 @@ typedef signed int crn_int32; typedef unsigned int crn_bool; // crnlib can compress to these file types. -enum crn_file_type { +enum crn_file_type +{ // .CRN cCRNFileTypeCRN = 0, @@ -46,7 +68,8 @@ enum crn_file_type { // Supported compressed pixel formats. // Basically all the standard DX9 formats, with some swizzled DXT5 formats // (most of them supported by ATI's Compressonator), along with some ATI/X360 GPU specific formats. -enum crn_format { +enum crn_format +{ cCRNFmtInvalid = -1, cCRNFmtDXT1 = 0, @@ -59,10 +82,14 @@ enum crn_format { cCRNFmtDXT5, // Various DXT5 derivatives - cCRNFmtDXT5_CCxY, // Luma-chroma - cCRNFmtDXT5_xGxR, // Swizzled 2-component - cCRNFmtDXT5_xGBR, // Swizzled 3-component - cCRNFmtDXT5_AGBR, // Swizzled 4-component + cCRNFmtDXT5_CCxY, + // Luma-chroma + cCRNFmtDXT5_xGxR, + // Swizzled 2-component + cCRNFmtDXT5_xGBR, + // Swizzled 3-component + cCRNFmtDXT5_AGBR, + // Swizzled 4-component // ATI 3DC and X360 DXN cCRNFmtDXN_XY, @@ -83,7 +110,8 @@ enum crn_format { }; // Various library/file format limits. -enum crn_limits { +enum crn_limits +{ // Max. mipmap level resolution on any axis. cCRNMaxLevelResolution = 4096, @@ -101,7 +129,8 @@ enum crn_limits { // CRN/DDS compression flags. // See the m_flags member in the crn_comp_params struct, below. -enum crn_comp_flags { +enum crn_comp_flags +{ // Enables perceptual colorspace distance metrics if set. // Important: Be sure to disable this when compressing non-sRGB colorspace images, like normal maps! // Default: Set @@ -156,7 +185,8 @@ enum crn_comp_flags { }; // Controls DXTn quality vs. speed control - only used when compressing to .DDS. -enum crn_dxt_quality { +enum crn_dxt_quality +{ cCRNDXTQualitySuperFast, cCRNDXTQualityFast, cCRNDXTQualityNormal, @@ -169,10 +199,14 @@ enum crn_dxt_quality { }; // Which DXTn compressor to use when compressing to plain (non-clustered) .DDS. -enum crn_dxt_compressor_type { - cCRNDXTCompressorCRN, // Use crnlib's ETC1 or DXTc block compressor (default, highest quality, comparable or better than ati_compress or squish, and crnlib's ETC1 is a lot fasterw with similiar quality to Erricson's) - cCRNDXTCompressorCRNF, // Use crnlib's "fast" DXTc block compressor - cCRNDXTCompressorRYG, // Use RYG's DXTc block compressor (low quality, but very fast) +enum crn_dxt_compressor_type +{ + cCRNDXTCompressorCRN, + // Use crnlib's ETC1 or DXTc block compressor (default, highest quality, comparable or better than ati_compress or squish, and crnlib's ETC1 is a lot fasterw with similiar quality to Erricson's) + cCRNDXTCompressorCRNF, + // Use crnlib's "fast" DXTc block compressor + cCRNDXTCompressorRYG, +// Use RYG's DXTc block compressor (low quality, but very fast) #if CRNLIB_SUPPORT_ATI_COMPRESS cCRNDXTCompressorATI, @@ -191,14 +225,19 @@ enum crn_dxt_compressor_type { // Processing will stop prematurely (and fail) if the callback returns false. // phase_index, total_phases - high level progress // subphase_index, total_subphases - progress within current phase -typedef crn_bool(*crn_progress_callback_func)(crn_uint32 phase_index, crn_uint32 total_phases, crn_uint32 subphase_index, crn_uint32 total_subphases, void* pUser_data_ptr); +typedef crn_bool (*crn_progress_callback_func)(crn_uint32 phase_index, crn_uint32 total_phases, crn_uint32 subphase_index, crn_uint32 total_subphases, void* pUser_data_ptr); // CRN/DDS compression parameters struct. -struct crn_comp_params { - inline crn_comp_params() { clear(); } +struct crn_comp_params +{ + inline crn_comp_params() + { + clear(); + } // Clear struct to default parameters. - inline void clear() { + inline void clear() + { m_size_of_obj = sizeof(*this); m_file_type = cCRNFileTypeCRN; m_faces = 1; @@ -209,8 +248,12 @@ struct crn_comp_params { m_flags = cCRNCompFlagPerceptual | cCRNCompFlagHierarchical | cCRNCompFlagUseBothBlockTypes; for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) + { for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) + { m_pImages[f][l] = NULL; + } + } m_target_bitrate = 0.0f; m_quality_level = cCRNMaxQualityLevel; @@ -233,12 +276,14 @@ struct crn_comp_params { m_pProgress_func_data = NULL; } - inline bool operator==(const crn_comp_params& rhs) const { -#define CRNLIB_COMP(x) \ - do { \ - if ((x) != (rhs.x)) \ - return false; \ - } while (0) + inline bool operator==(const crn_comp_params& rhs) const + { +#define CRNLIB_COMP(x) \ + do \ + { \ + if ((x) != (rhs.x)) \ + return false; \ + } while (0) CRNLIB_COMP(m_size_of_obj); CRNLIB_COMP(m_file_type); CRNLIB_COMP(m_faces); @@ -274,7 +319,8 @@ struct crn_comp_params { } // Returns true if the input parameters are reasonable. - inline bool check() const { + inline bool check() const + { if ((m_file_type > cCRNFileTypeDDS) || (((int)m_quality_level < (int)cCRNMinQualityLevel) || ((int)m_quality_level > (int)cCRNMaxQualityLevel)) || (m_dxt1a_alpha_threshold > 255) || @@ -290,32 +336,40 @@ struct crn_comp_params { (m_alpha_component > 3) || (m_num_helper_threads > cCRNMaxHelperThreads) || (m_dxt_quality > cCRNDXTQualityUber) || - (m_dxt_compressor_type >= cCRNTotalDXTCompressors)) { + (m_dxt_compressor_type >= cCRNTotalDXTCompressors)) + { return false; } return true; } // Helper to set/get flags from m_flags member. - inline bool get_flag(crn_comp_flags flag) const { return (m_flags & flag) != 0; } - inline void set_flag(crn_comp_flags flag, bool val) { + inline bool get_flag(crn_comp_flags flag) const + { + return (m_flags & flag) != 0; + } + + inline void set_flag(crn_comp_flags flag, bool val) + { m_flags &= ~flag; if (val) + { m_flags |= flag; + } } crn_uint32 m_size_of_obj; - crn_file_type m_file_type; // Output file type: cCRNFileTypeCRN or cCRNFileTypeDDS. + crn_file_type m_file_type; // Output file type: cCRNFileTypeCRN or cCRNFileTypeDDS. - crn_uint32 m_faces; // 1 (2D map) or 6 (cubemap) - crn_uint32 m_width; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK - crn_uint32 m_height; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK - crn_uint32 m_levels; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK + crn_uint32 m_faces; // 1 (2D map) or 6 (cubemap) + crn_uint32 m_width; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK + crn_uint32 m_height; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK + crn_uint32 m_levels; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK - crn_format m_format; // Output pixel format. + crn_format m_format; // Output pixel format. - crn_uint32 m_flags; // see crn_comp_flags enum + crn_uint32 m_flags; // see crn_comp_flags enum // Array of pointers to 32bpp input images. const crn_uint32* m_pImages[cCRNMaxFaces][cCRNMaxLevels]; @@ -327,7 +381,7 @@ struct crn_comp_params { // Desired quality level. // Currently, CRN and DDS quality levels are not compatible with eachother from an image quality standpoint. - crn_uint32 m_quality_level; // [cCRNMinQualityLevel, cCRNMaxQualityLevel] + crn_uint32 m_quality_level; // [cCRNMinQualityLevel, cCRNMaxQualityLevel] // DXTn compression parameters. crn_uint32 m_dxt1a_alpha_threshold; @@ -341,11 +395,11 @@ struct crn_comp_params { float m_crn_adaptive_tile_color_psnr_derating; float m_crn_adaptive_tile_alpha_psnr_derating; - crn_uint32 m_crn_color_endpoint_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] - crn_uint32 m_crn_color_selector_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + crn_uint32 m_crn_color_endpoint_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + crn_uint32 m_crn_color_selector_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] - crn_uint32 m_crn_alpha_endpoint_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] - crn_uint32 m_crn_alpha_selector_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + crn_uint32 m_crn_alpha_endpoint_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + crn_uint32 m_crn_alpha_selector_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] // Number of helper threads to create during compression. 0=no threading. crn_uint32 m_num_helper_threads; @@ -360,11 +414,16 @@ struct crn_comp_params { }; // Mipmap generator's mode. -enum crn_mip_mode { - cCRNMipModeUseSourceOrGenerateMips, // Use source texture's mipmaps if it has any, otherwise generate new mipmaps - cCRNMipModeUseSourceMips, // Use source texture's mipmaps if it has any, otherwise the output has no mipmaps - cCRNMipModeGenerateMips, // Always generate new mipmaps - cCRNMipModeNoMips, // Output texture has no mipmaps +enum crn_mip_mode +{ + cCRNMipModeUseSourceOrGenerateMips, + // Use source texture's mipmaps if it has any, otherwise generate new mipmaps + cCRNMipModeUseSourceMips, + // Use source texture's mipmaps if it has any, otherwise the output has no mipmaps + cCRNMipModeGenerateMips, + // Always generate new mipmaps + cCRNMipModeNoMips, + // Output texture has no mipmaps cCRNMipModeTotal, @@ -375,12 +434,14 @@ CRN_EXPORT const char* crn_get_mip_mode_desc(crn_mip_mode m); CRN_EXPORT const char* crn_get_mip_mode_name(crn_mip_mode m); // Mipmap generator's filter kernel. -enum crn_mip_filter { +enum crn_mip_filter +{ cCRNMipFilterBox, cCRNMipFilterTent, cCRNMipFilterLanczos4, cCRNMipFilterMitchell, - cCRNMipFilterKaiser, // Kaiser=default mipmap filter + cCRNMipFilterKaiser, + // Kaiser=default mipmap filter cCRNMipFilterTotal, @@ -390,7 +451,8 @@ enum crn_mip_filter { CRN_EXPORT const char* crn_get_mip_filter_name(crn_mip_filter f); // Mipmap generator's scale mode. -enum crn_scale_mode { +enum crn_scale_mode +{ cCRNSMDisabled, cCRNSMAbsolute, cCRNSMRelative, @@ -406,10 +468,15 @@ enum crn_scale_mode { CRN_EXPORT const char* crn_get_scale_mode_desc(crn_scale_mode sm); // Mipmap generator parameters. -struct crn_mipmap_params { - inline crn_mipmap_params() { clear(); } +struct crn_mipmap_params +{ + inline crn_mipmap_params() + { + clear(); + } - inline void clear() { + inline void clear() + { m_size_of_obj = sizeof(*this); m_mode = cCRNMipModeUseSourceOrGenerateMips; m_filter = cCRNMipFilterKaiser; @@ -437,14 +504,19 @@ struct crn_mipmap_params { m_clamp_height = 0; } - inline bool check() const { return true; } + inline bool check() const + { + return true; + } - inline bool operator==(const crn_mipmap_params& rhs) const { -#define CRNLIB_COMP(x) \ - do { \ - if ((x) != (rhs.x)) \ - return false; \ - } while (0) + inline bool operator==(const crn_mipmap_params& rhs) const + { +#define CRNLIB_COMP(x) \ + do \ + { \ + if ((x) != (rhs.x)) \ + return false; \ + } while (0) CRNLIB_COMP(m_size_of_obj); CRNLIB_COMP(m_mode); CRNLIB_COMP(m_filter); @@ -469,6 +541,7 @@ struct crn_mipmap_params { return true; #undef CRNLIB_COMP } + crn_uint32 m_size_of_obj; crn_mip_mode m_mode; @@ -509,7 +582,7 @@ struct crn_mipmap_params { // Function to set an optional user provided memory allocation/reallocation/msize routines. // By default, crnlib just uses malloc(), free(), etc. for all allocations. typedef void* (*crn_realloc_func)(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data); -typedef size_t(*crn_msize_func)(void* p, void* pUser_data); +typedef size_t (*crn_msize_func)(void* p, void* pUser_data); CRN_EXPORT void crn_set_memory_callbacks(crn_realloc_func pRealloc, crn_msize_func pMSize, void* pUser_data); // Frees memory blocks allocated by crn_compress(), crn_decompress_crn_to_dds(), or crn_decompress_dds_to_images(). @@ -549,13 +622,15 @@ CRN_EXPORT void* crn_decompress_crn_to_dds(const void* pCRN_file_data, crn_uint3 // Decompresses an entire DDS file in any supported format to uncompressed 32-bit/pixel image(s). // See the crnlib::pixel_format enum in inc/dds_defs.h for a list of the supported DDS formats. // You are responsible for freeing each image block, either by calling crn_free_all_images() or manually calling crn_free_block() on each image pointer. -struct crn_texture_desc { +struct crn_texture_desc +{ crn_uint32 m_faces; crn_uint32 m_width; crn_uint32 m_height; crn_uint32 m_levels; - crn_uint32 m_fmt_fourcc; // Same as crnlib::pixel_format + crn_uint32 m_fmt_fourcc; // Same as crnlib::pixel_format }; + CRN_EXPORT bool crn_decompress_dds_to_images(const void* pDDS_file_data, crn_uint32 dds_file_size, crn_uint32** ppImages, crn_texture_desc& tex_desc); // Frees all images allocated by crn_decompress_dds_to_images(). @@ -614,7 +689,6 @@ CRN_EXPORT void crn_free_block_compressor(crn_block_compressor_context_t pContex // Returns false if the crn_fmt is invalid. CRN_EXPORT bool crn_decompress_block(const void* pSrc_block, crn_uint32* pDst_pixels, crn_format crn_fmt); - #define CRNLIB_VERSION 104 CRN_EXPORT const char* crn_get_version(); @@ -623,32 +697,4 @@ CRN_EXPORT int crn_get_version_major(); CRN_EXPORT int crn_get_version_minor(); CRN_EXPORT int crn_get_version_patch(); -#endif // CRNLIB_H - -//------------------------------------------------------------------------------ -// -// crnlib uses the ZLIB license: -// http://opensource.org/licenses/Zlib -// -// Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC -// Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet -// -// This software is provided 'as-is', without any express or implied -// warranty. In no event will the authors be held liable for any damages -// arising from the use of this software. -// -// Permission is granted to anyone to use this software for any purpose, -// including commercial applications, and to alter it and redistribute it -// freely, subject to the following restrictions: -// -// 1. The origin of this software must not be misrepresented; you must not -// claim that you wrote the original software. If you use this software -// in a product, an acknowledgment in the product documentation would be -// appreciated but is not required. -// -// 2. Altered source versions must be plainly marked as such, and must not be -// misrepresented as being the original software. -// -// 3. This notice may not be removed or altered from any source distribution. -// -//------------------------------------------------------------------------------ +#endif // CRNLIB_H diff --git a/inc/dds_defs.h b/inc/dds_defs.h index e58588c..594c399 100644 --- a/inc/dds_defs.h +++ b/inc/dds_defs.h @@ -1,4 +1,26 @@ -// File: dds_defs.h +/* + * Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC + * Copyright (c) 2020 FrozenStorm Interactive, Yoann Potinet + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation or credits + * is required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + // DX9 .DDS file header definitions. #ifndef CRNLIB_DDS_DEFS_H @@ -19,9 +41,12 @@ namespace crnlib PIXEL_FMT_DXT3 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '3'), PIXEL_FMT_DXT4 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '4'), PIXEL_FMT_DXT5 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '5'), - PIXEL_FMT_3DC = CRNLIB_PIXEL_FMT_FOURCC('A', 'T', 'I', '2'), // DXN_YX - PIXEL_FMT_DXN = CRNLIB_PIXEL_FMT_FOURCC('A', '2', 'X', 'Y'), // DXN_XY - PIXEL_FMT_DXT5A = CRNLIB_PIXEL_FMT_FOURCC('A', 'T', 'I', '1'), // ATI1N, http://developer.amd.com/media/gpu_assets/Radeon_X1x00_Programming_Guide.pdf + PIXEL_FMT_3DC = CRNLIB_PIXEL_FMT_FOURCC('A', 'T', 'I', '2'), + // DXN_YX + PIXEL_FMT_DXN = CRNLIB_PIXEL_FMT_FOURCC('A', '2', 'X', 'Y'), + // DXN_XY + PIXEL_FMT_DXT5A = CRNLIB_PIXEL_FMT_FOURCC('A', 'T', 'I', '1'), + // ATI1N, http://developer.amd.com/media/gpu_assets/Radeon_X1x00_Programming_Guide.pdf // Non-standard, crnlib-specific pixel formats (some of these are supported by ATI's Compressonator) PIXEL_FMT_DXT5_CCxY = CRNLIB_PIXEL_FMT_FOURCC('C', 'C', 'x', 'Y'), @@ -62,7 +87,7 @@ namespace crnlib crn_uint32 dwSize; crn_uint32 dwFlags; crn_uint32 dwFourCC; - crn_uint32 dwRGBBitCount; // ATI compressonator and crnlib will place a FOURCC code here for swizzled/cooked DXTn formats + crn_uint32 dwRGBBitCount; // ATI compressonator and crnlib will place a FOURCC code here for swizzled/cooked DXTn formats crn_uint32 dwRBitMask; crn_uint32 dwGBitMask; crn_uint32 dwBBitMask; @@ -83,10 +108,13 @@ namespace crnlib crn_uint32 dwFlags; crn_uint32 dwHeight; crn_uint32 dwWidth; - union { + + union + { crn_int32 lPitch; crn_uint32 dwLinearSize; }; + crn_uint32 dwBackBufferCount; crn_uint32 dwMipMapCount; crn_uint32 dwAlphaBitDepth; @@ -150,7 +178,6 @@ namespace crnlib const crn_uint32 DDSCAPS2_CUBEMAP_NEGATIVEZ = 0x00008000; const crn_uint32 DDSCAPS2_VOLUME = 0x00200000; - -} // namespace crnlib +} // namespace crnlib #endif // CRNLIB_DDS_DEFS_H From c4bab54193a27facf2358a1dd3fa94a84fd7baf4 Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Sun, 21 Feb 2021 17:00:36 -0500 Subject: [PATCH 15/18] Build examples on Windows --- azure-pipelines.yml | 2 +- crnlib/crn_tree_clusterizer.h | 23 +++++++++++------------ 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 028c24d..b61bd16 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -54,7 +54,7 @@ stages: displayName: Configure inputs: cmakeArgs: -G "$(GENERATOR)" -A $(GENERATOR_ARCH) -S $(Build.SourcesDirectory) -B . -DCMAKE_BUILD_TYPE=$(BUILD_CONFIG) - -DCRN_BUILD_EXAMPLES=OFF -DCRN_BUILD_SHARED_LIBS=$(BUILD_SHARED) -DCMAKE_INSTALL_PREFIX=$(Build.ArtifactStagingDirectory) + -DCRN_BUILD_EXAMPLES=ON -DCRN_BUILD_SHARED_LIBS=$(BUILD_SHARED) -DCMAKE_INSTALL_PREFIX=$(Build.ArtifactStagingDirectory) - task: CMake@1 displayName: Build diff --git a/crnlib/crn_tree_clusterizer.h b/crnlib/crn_tree_clusterizer.h index bcb5721..f0fa23c 100644 --- a/crnlib/crn_tree_clusterizer.h +++ b/crnlib/crn_tree_clusterizer.h @@ -5,19 +5,19 @@ * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages * arising from the use of this software. - * + * * Permission is granted to anyone to use this software for any purpose, * including commercial applications, and to alter it and redistribute it * freely, subject to the following restrictions: - * + * * 1. The origin of this software must not be misrepresented; you must not * claim that you wrote the original software. If you use this software * in a product, an acknowledgment in the product documentation or credits * is required. - * + * * 2. Altered source versions must be plainly marked as such, and must not be * misrepresented as being the original software. - * + * * 3. This notice may not be removed or altered from any source distribution. */ @@ -28,10 +28,9 @@ #include "crn_matrix.h" #include "crn_threading.h" - namespace crnlib { - template + template class tree_clusterizer { public: @@ -56,7 +55,7 @@ namespace crnlib } bool operator<(const NodeInfo& other) const { - return m_index < other.m_index ? m_variance < other.m_variance : !(other.m_variance < m_variance); + return m_index < other.m_index ? m_variance < other.m_variance : other.m_variance >= m_variance; } }; @@ -87,7 +86,6 @@ namespace crnlib m_nodes[pParams->main_node].m_alternative = true; } - void generate_codebook(VectorType* vectors, uint* weights, uint size, uint max_splits, bool generate_node_index_map = false, task_pool* pTask_pool = 0) { m_vectors = vectors; @@ -203,7 +201,7 @@ namespace crnlib struct vq_node { - vq_node(): + vq_node() : m_centroid(cClear), m_total_weight(0), m_left(-1), @@ -573,7 +571,7 @@ namespace crnlib }; template - void split_vectors(VectorType(&vectors)[64], uint(&weights)[64], uint size, VectorType(&result)[2]) + void split_vectors(VectorType (&vectors)[64], uint (&weights)[64], uint size, VectorType (&result)[2]) { VectorType weightedVectors[64]; double weightedDotProducts[64]; @@ -635,7 +633,9 @@ namespace crnlib for (uint x = 0; x < N; x++) { for (uint y = x; y < N; y++) + { covar[x][y] = covar[x][y] + v[x] * w[y]; + } } } float divider = (float)total_weight; @@ -751,5 +751,4 @@ namespace crnlib result[0] = left_child; result[1] = right_child; } - -} // namespace crnlib +} // namespace crnlib From 3cb7e355c3c3e04e466daa500e82e076f7cbb9a4 Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Sun, 21 Feb 2021 17:14:15 -0500 Subject: [PATCH 16/18] Print warning when enabling examples on non-Windows platforms. --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index d81f53b..82ae64f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,5 +28,8 @@ add_subdirectory(3rdparty/miniz) add_subdirectory(3rdparty/stb) if (CRN_BUILD_EXAMPLES) + if(NOT WIN32) + message(WARNING "Examples aren't supported this platform.") + endif() add_subdirectory(examples) endif(CRN_BUILD_EXAMPLES) From 1fc82aaa5fe064c4d8522662ef1e940b9916f778 Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Sun, 21 Feb 2021 18:56:58 -0500 Subject: [PATCH 17/18] Format some files --- crnlib/crn_rg_etc1.cpp | 1849 ++++++++++++++++++----------- crnlib/crn_rg_etc1.h | 46 +- crnlib/crn_threading_pthreads.cpp | 2 +- crnlib/crn_threading_pthreads.h | 20 +- 4 files changed, 1199 insertions(+), 718 deletions(-) diff --git a/crnlib/crn_rg_etc1.cpp b/crnlib/crn_rg_etc1.cpp index 9941cbe..ab355cb 100644 --- a/crnlib/crn_rg_etc1.cpp +++ b/crnlib/crn_rg_etc1.cpp @@ -16,7 +16,7 @@ #include #if defined(_MSC_VER) -#pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union +#pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union #endif #if defined(_DEBUG) || defined(DEBUG) @@ -25,9 +25,10 @@ #define RG_ETC1_ASSERT CRNLIB_ASSERT -namespace crnlib { - - namespace rg_etc1 { +namespace crnlib +{ + namespace rg_etc1 + { typedef unsigned char uint8; typedef unsigned short uint16; typedef unsigned int uint; @@ -36,57 +37,74 @@ namespace crnlib { typedef unsigned long long uint64; const uint32 cUINT32_MAX = 0xFFFFFFFFU; - const uint64 cUINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64; + const uint64 cUINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64; - template - inline T minimum(T a, T b) { + template + inline T minimum(T a, T b) + { return (a < b) ? a : b; } - template - inline T minimum(T a, T b, T c) { + template + inline T minimum(T a, T b, T c) + { return minimum(minimum(a, b), c); } - template - inline T maximum(T a, T b) { + template + inline T maximum(T a, T b) + { return (a > b) ? a : b; } - template - inline T maximum(T a, T b, T c) { + template + inline T maximum(T a, T b, T c) + { return maximum(maximum(a, b), c); } - template - inline T clamp(T value, T low, T high) { + template + inline T clamp(T value, T low, T high) + { return (value < low) ? low : ((value > high) ? high : value); } - template - inline T square(T value) { + template + inline T square(T value) + { return value * value; } - template - inline void zero_object(T& obj) { + template + inline void zero_object(T& obj) + { memset((void*)&obj, 0, sizeof(obj)); } - template - inline void zero_this(T* pObj) { + template + inline void zero_this(T* pObj) + { memset((void*)pObj, 0, sizeof(*pObj)); } - template - T decay_array_to_subtype(T(&a)[N]); + template + T decay_array_to_subtype(T (&a)[N]); #define RG_ETC1_ARRAY_SIZE(X) (sizeof(X) / sizeof(decay_array_to_subtype(X))) - enum eNoClamp { cNoClamp }; + enum eNoClamp + { + cNoClamp + }; - struct color_quad_u8 { - static inline int clamp(int v) { + struct color_quad_u8 + { + static inline int clamp(int v) + { if (v & 0xFFFFFF00U) + { v = (~(static_cast(v) >> 31)) & 0xFF; + } return v; } - struct component_traits { - enum { + struct component_traits + { + enum + { cSigned = false, cFloat = false, cMin = 0U, @@ -98,9 +116,13 @@ namespace crnlib { typedef unsigned char component_t; typedef int parameter_t; - enum { cNumComps = 4 }; + enum + { + cNumComps = 4 + }; - union { + union + { struct { component_t r; @@ -114,51 +136,62 @@ namespace crnlib { uint32 m_u32; }; - inline color_quad_u8() { + inline color_quad_u8() + { } - inline color_quad_u8(const color_quad_u8& other) - : m_u32(other.m_u32) { + inline color_quad_u8(const color_quad_u8& other) : + m_u32(other.m_u32) + { } - explicit inline color_quad_u8(parameter_t y, parameter_t alpha = component_traits::cMax) { + explicit inline color_quad_u8(parameter_t y, parameter_t alpha = component_traits::cMax) + { set(y, alpha); } - inline color_quad_u8(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) { + inline color_quad_u8(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) + { set(red, green, blue, alpha); } - explicit inline color_quad_u8(eNoClamp, parameter_t y, parameter_t alpha = component_traits::cMax) { + explicit inline color_quad_u8(eNoClamp, parameter_t y, parameter_t alpha = component_traits::cMax) + { set_noclamp_y_alpha(y, alpha); } - inline color_quad_u8(eNoClamp, parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) { + inline color_quad_u8(eNoClamp, parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) + { set_noclamp_rgba(red, green, blue, alpha); } - inline void clear() { + inline void clear() + { m_u32 = 0; } - inline color_quad_u8& operator=(const color_quad_u8& other) { + inline color_quad_u8& operator=(const color_quad_u8& other) + { m_u32 = other.m_u32; return *this; } - inline color_quad_u8& set_rgb(const color_quad_u8& other) { + inline color_quad_u8& set_rgb(const color_quad_u8& other) + { r = other.r; g = other.g; b = other.b; return *this; } - inline color_quad_u8& operator=(parameter_t y) { + inline color_quad_u8& operator=(parameter_t y) + { set(y, component_traits::cMax); return *this; } - inline color_quad_u8& set(parameter_t y, parameter_t alpha = component_traits::cMax) { + inline color_quad_u8& set(parameter_t y, parameter_t alpha = component_traits::cMax) + { y = clamp(y); alpha = clamp(alpha); r = static_cast(y); @@ -168,7 +201,8 @@ namespace crnlib { return *this; } - inline color_quad_u8& set_noclamp_y_alpha(parameter_t y, parameter_t alpha = component_traits::cMax) { + inline color_quad_u8& set_noclamp_y_alpha(parameter_t y, parameter_t alpha = component_traits::cMax) + { RG_ETC1_ASSERT((y >= component_traits::cMin) && (y <= component_traits::cMax)); RG_ETC1_ASSERT((alpha >= component_traits::cMin) && (alpha <= component_traits::cMax)); @@ -179,7 +213,8 @@ namespace crnlib { return *this; } - inline color_quad_u8& set(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) { + inline color_quad_u8& set(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) + { r = static_cast(clamp(red)); g = static_cast(clamp(green)); b = static_cast(clamp(blue)); @@ -187,7 +222,8 @@ namespace crnlib { return *this; } - inline color_quad_u8& set_noclamp_rgba(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha) { + inline color_quad_u8& set_noclamp_rgba(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha) + { RG_ETC1_ASSERT((red >= component_traits::cMin) && (red <= component_traits::cMax)); RG_ETC1_ASSERT((green >= component_traits::cMin) && (green <= component_traits::cMax)); RG_ETC1_ASSERT((blue >= component_traits::cMin) && (blue <= component_traits::cMax)); @@ -200,7 +236,8 @@ namespace crnlib { return *this; } - inline color_quad_u8& set_noclamp_rgb(parameter_t red, parameter_t green, parameter_t blue) { + inline color_quad_u8& set_noclamp_rgb(parameter_t red, parameter_t green, parameter_t blue) + { RG_ETC1_ASSERT((red >= component_traits::cMin) && (red <= component_traits::cMax)); RG_ETC1_ASSERT((green >= component_traits::cMin) && (green <= component_traits::cMax)); RG_ETC1_ASSERT((blue >= component_traits::cMin) && (blue <= component_traits::cMax)); @@ -211,20 +248,32 @@ namespace crnlib { return *this; } - static inline parameter_t get_min_comp() { return component_traits::cMin; } - static inline parameter_t get_max_comp() { return component_traits::cMax; } - static inline bool get_comps_are_signed() { return component_traits::cSigned; } + static inline parameter_t get_min_comp() + { + return component_traits::cMin; + } + static inline parameter_t get_max_comp() + { + return component_traits::cMax; + } + static inline bool get_comps_are_signed() + { + return component_traits::cSigned; + } - inline component_t operator[](uint i) const { + inline component_t operator[](uint i) const + { RG_ETC1_ASSERT(i < cNumComps); return c[i]; } - inline component_t& operator[](uint i) { + inline component_t& operator[](uint i) + { RG_ETC1_ASSERT(i < cNumComps); return c[i]; } - inline color_quad_u8& set_component(uint i, parameter_t f) { + inline color_quad_u8& set_component(uint i, parameter_t f) + { RG_ETC1_ASSERT(i < cNumComps); c[i] = static_cast(clamp(f)); @@ -232,7 +281,8 @@ namespace crnlib { return *this; } - inline color_quad_u8& set_grayscale(parameter_t l) { + inline color_quad_u8& set_grayscale(parameter_t l) + { component_t x = static_cast(clamp(l)); c[0] = x; c[1] = x; @@ -240,103 +290,136 @@ namespace crnlib { return *this; } - inline color_quad_u8& clamp(const color_quad_u8& l, const color_quad_u8& h) { + inline color_quad_u8& clamp(const color_quad_u8& l, const color_quad_u8& h) + { for (uint i = 0; i < cNumComps; i++) + { c[i] = static_cast(rg_etc1::clamp(c[i], l[i], h[i])); + } return *this; } - inline color_quad_u8& clamp(parameter_t l, parameter_t h) { + inline color_quad_u8& clamp(parameter_t l, parameter_t h) + { for (uint i = 0; i < cNumComps; i++) + { c[i] = static_cast(rg_etc1::clamp(c[i], l, h)); + } return *this; } // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y). - inline parameter_t get_luma() const { + inline parameter_t get_luma() const + { return static_cast((19595U * r + 38470U * g + 7471U * b + 32768U) >> 16U); } // Returns REC 709 luma. - inline parameter_t get_luma_rec709() const { + inline parameter_t get_luma_rec709() const + { return static_cast((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); } - inline uint squared_distance_rgb(const color_quad_u8& c) const { + inline uint squared_distance_rgb(const color_quad_u8& c) const + { return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b); } - inline uint squared_distance_rgba(const color_quad_u8& c) const { + inline uint squared_distance_rgba(const color_quad_u8& c) const + { return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b) + rg_etc1::square(a - c.a); } - inline bool rgb_equals(const color_quad_u8& rhs) const { + inline bool rgb_equals(const color_quad_u8& rhs) const + { return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); } - inline bool operator==(const color_quad_u8& rhs) const { + inline bool operator==(const color_quad_u8& rhs) const + { return m_u32 == rhs.m_u32; } - color_quad_u8& operator+=(const color_quad_u8& other) { + color_quad_u8& operator+=(const color_quad_u8& other) + { for (uint i = 0; i < 4; i++) + { c[i] = static_cast(clamp(c[i] + other.c[i])); + } return *this; } - color_quad_u8& operator-=(const color_quad_u8& other) { + color_quad_u8& operator-=(const color_quad_u8& other) + { for (uint i = 0; i < 4; i++) + { c[i] = static_cast(clamp(c[i] - other.c[i])); + } return *this; } - friend color_quad_u8 operator+(const color_quad_u8& lhs, const color_quad_u8& rhs) { + friend color_quad_u8 operator+(const color_quad_u8& lhs, const color_quad_u8& rhs) + { color_quad_u8 result(lhs); result += rhs; return result; } - friend color_quad_u8 operator-(const color_quad_u8& lhs, const color_quad_u8& rhs) { + friend color_quad_u8 operator-(const color_quad_u8& lhs, const color_quad_u8& rhs) + { color_quad_u8 result(lhs); result -= rhs; return result; } - }; // class color_quad_u8 + }; // class color_quad_u8 - struct vec3F { + struct vec3F + { float m_s[3]; - inline vec3F() {} - inline vec3F(float s) { + inline vec3F() + { + } + inline vec3F(float s) + { m_s[0] = s; m_s[1] = s; m_s[2] = s; } - inline vec3F(float x, float y, float z) { + inline vec3F(float x, float y, float z) + { m_s[0] = x; m_s[1] = y; m_s[2] = z; } - inline float operator[](uint i) const { + inline float operator[](uint i) const + { RG_ETC1_ASSERT(i < 3); return m_s[i]; } - inline vec3F& operator+=(const vec3F& other) { + inline vec3F& operator+=(const vec3F& other) + { for (uint i = 0; i < 3; i++) + { m_s[i] += other.m_s[i]; + } return *this; } - inline vec3F& operator*=(float s) { + inline vec3F& operator*=(float s) + { for (uint i = 0; i < 3; i++) + { m_s[i] *= s; + } return *this; } }; - enum etc_constants { + enum etc_constants + { cETC1BytesPerBlock = 8U, cETC1SelectorBits = 2U, @@ -396,403 +479,403 @@ namespace crnlib { static uint8 g_quant5_tab[256 + 16]; - static const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = - { - {-8, -2, 2, 8}, - {-17, -5, 5, 17}, - {-29, -9, 9, 29}, - {-42, -13, 13, 42}, - {-60, -18, 18, 60}, - {-80, -24, 24, 80}, - {-106, -33, 33, 106}, - {-183, -47, 47, 183} }; + static const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = { + { -8, -2, 2, 8 }, + { -17, -5, 5, 17 }, + { -29, -9, 9, 29 }, + { -42, -13, 13, 42 }, + { -60, -18, 18, 60 }, + { -80, -24, 24, 80 }, + { -106, -33, 33, 106 }, + { -183, -47, 47, 183 } + }; static const uint8 g_etc2_modifier_table[8] = { 3, 6, 11, 16, 23, 32, 41, 64 }; static const int g_etc2a_modifier_table[16][8] = { - { -3, -6, -9, -15, 2, 5, 8, 14}, - { -3, -7, -10, -13, 2, 6, 9, 12}, - { -2, -5, -8, -13, 1, 4, 7, 12}, - { -2, -4, -6, -13, 1, 3, 5, 12}, - { -3, -6, -8, -12, 2, 5, 7, 11}, - { -3, -7, -9, -11, 2, 6, 8, 10}, - { -4, -7, -8, -11, 3, 6, 7, 10}, - { -3, -5, -8, -11, 2, 4, 7, 10}, - { -2, -6, -8, -10, 1, 5, 7, 9}, - { -2, -5, -8, -10, 1, 4, 7, 9}, - { -2, -4, -8, -10, 1, 3, 7, 9}, - { -2, -5, -7, -10, 1, 4, 6, 9}, - { -3, -4, -7, -10, 2, 3, 6, 9}, - { -1, -2, -3, -10, 0, 1, 2, 9}, - { -4, -6, -8, -9, 3, 5, 7, 8}, - { -3, -5, -7, -9, 2, 4, 6, 8}, + { -3, -6, -9, -15, 2, 5, 8, 14 }, + { -3, -7, -10, -13, 2, 6, 9, 12 }, + { -2, -5, -8, -13, 1, 4, 7, 12 }, + { -2, -4, -6, -13, 1, 3, 5, 12 }, + { -3, -6, -8, -12, 2, 5, 7, 11 }, + { -3, -7, -9, -11, 2, 6, 8, 10 }, + { -4, -7, -8, -11, 3, 6, 7, 10 }, + { -3, -5, -8, -11, 2, 4, 7, 10 }, + { -2, -6, -8, -10, 1, 5, 7, 9 }, + { -2, -5, -8, -10, 1, 4, 7, 9 }, + { -2, -4, -8, -10, 1, 3, 7, 9 }, + { -2, -5, -7, -10, 1, 4, 6, 9 }, + { -3, -4, -7, -10, 2, 3, 6, 9 }, + { -1, -2, -3, -10, 0, 1, 2, 9 }, + { -4, -6, -8, -9, 3, 5, 7, 8 }, + { -3, -5, -7, -9, 2, 4, 6, 8 }, }; static const uint8 g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; static const uint8 g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte. - static uint16 g_etc1_inverse_lookup[2 * 8 * 4][256]; // [diff/inten_table/selector][desired_color] + static uint16 g_etc1_inverse_lookup[2 * 8 * 4][256]; // [diff/inten_table/selector][desired_color] // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color. // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8) - static const uint16 g_color8_to_etc_block_config_0_255[2][33] = - { - {0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E, - 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF}, - {0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E, - 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF}, + static const uint16 g_color8_to_etc_block_config_0_255[2][33] = { + { 0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E, + 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF }, + { 0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E, + 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF }, }; // Really only [254][11]. - static const uint16 g_color8_to_etc_block_config_1_to_254[254][12] = - { - {0x021C, 0x0D0D, 0xFFFF}, - {0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF}, - {0x0113, 0x0217, 0xFFFF}, - {0x0116, 0x031E, - 0x0B0E, 0x0405, 0xFFFF}, - {0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF}, - {0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF}, - {0x0303, 0x0215, 0x0607, 0xFFFF}, - {0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF}, - {0x0100, 0x0024, 0x0306, - 0x0025, 0x041B, 0x0E0D, 0xFFFF}, - {0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF}, - {0x0213, 0x0317, 0xFFFF}, - {0x0112, - 0x0505, 0xFFFF}, - {0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF}, - {0x0211, 0x0909, 0xFFFF}, - {0x0110, 0x0315, 0x0707, - 0x0419, 0x180F, 0xFFFF}, - {0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF}, - {0x0032, 0x0202, 0x0033, 0x0125, 0x051B, - 0x0F0D, 0xFFFF}, - {0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF}, - {0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF}, - {0x0605, - 0x0417, 0xFFFF}, - {0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF}, - {0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF}, - {0x0519, 0x190F, 0xFFFF}, - {0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF}, - {0x0130, 0x0214, - 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF}, - {0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF}, - {0x031A, - 0x0D0B, 0x091F, 0xFFFF}, - {0x0413, 0x0705, 0x0517, 0xFFFF}, - {0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF}, - {0x0126, 0x080C, 0x0B09, 0xFFFF}, - {0x0411, 0x0619, 0x1A0F, 0xFFFF}, - {0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B, - 0xFFFF}, - {0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF}, - {0x0132, 0x0302, 0x0229, 0x110D, - 0xFFFF}, - {0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF}, - {0x0220, 0x0513, 0x0617, 0xFFFF}, - {0x0135, 0x0805, - 0x0327, 0xFFFF}, - {0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF}, - {0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F, - 0xFFFF}, - {0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF}, - {0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF}, - {0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF}, - {0x0300, 0x0224, 0x0506, 0x0521, - 0x0F0B, 0x0B1F, 0xFFFF}, - {0x041A, 0x0613, 0x0717, 0xFFFF}, - {0x0235, 0x0905, 0xFFFF}, - {0x0312, 0x0134, 0x0523, - 0x0427, 0xFFFF}, - {0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF}, - {0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F, - 0xFFFF}, - {0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF}, - {0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B, - 0x130D, 0xFFFF}, - {0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF}, - {0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF}, - {0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF}, - {0x0623, 0x0527, 0xFFFF}, - {0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F, - 0xFFFF}, - {0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF}, - {0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D, - 0xFFFF}, - {0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF}, - {0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529, - 0x140D, 0xFFFF}, - {0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF}, - {0x051A, 0x0813, 0x0B05, 0x0917, - 0xFFFF}, - {0x0723, 0x0435, 0x0627, 0xFFFF}, - {0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF}, - {0x0326, 0x0A0C, 0x012E, - 0x0811, 0x0A19, 0x1E0F, 0xFFFF}, - {0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF}, - {0x0410, 0x0901, 0x0633, 0x0725, - 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF}, - {0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF}, - {0x0332, 0x0502, 0x0821, 0x0139, - 0x120B, 0x0E1F, 0xFFFF}, - {0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF}, - {0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF}, - {0x0823, 0x032F, 0xFFFF}, - {0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF}, - {0x0422, 0x0604, 0x090A, - 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF}, - {0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF}, - {0x032A, 0x0825, 0x0437, - 0x0729, 0x0C1B, 0x160D, 0xFFFF}, - {0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF}, - {0x0500, - 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF}, - {0x061A, 0x0635, 0x0D05, 0xFFFF}, - {0x0923, 0x0827, 0xFFFF}, - {0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF}, - {0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19, - 0x072B, 0xFFFF}, - {0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF}, - {0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D, - 0xFFFF}, - {0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF}, - {0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF}, - {0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF}, - {0x0520, 0x0A23, 0x0927, 0xFFFF}, - {0x0B11, 0x1209, 0x013B, 0x052F, - 0xFFFF}, - {0x0616, 0x081E, 0x0D19, 0xFFFF}, - {0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D, - 0x0F1D, 0xFFFF}, - {0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF}, - {0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF}, - {0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF}, - {0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05, - 0x0D17, 0xFFFF}, - {0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF}, - {0x1309, 0x023B, 0x062F, 0xFFFF}, - {0x0612, 0x0434, - 0x013A, 0x0C11, 0x0E19, 0xFFFF}, - {0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF}, - {0x0D01, - 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF}, - {0x0610, 0x0A29, 0x190D, 0xFFFF}, - {0x0718, 0x042C, 0x0C21, - 0x0539, 0x160B, 0x121F, 0xFFFF}, - {0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF}, - {0x0528, 0x081C, 0x0935, 0x1005, 0x0B27, - 0xFFFF}, - {0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF}, - {0x0D11, 0x0F19, 0x1409, 0xFFFF}, - {0x0716, 0x003C, 0x091E, - 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF}, - {0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D, - 0xFFFF}, - {0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF}, - {0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF}, - {0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF}, - {0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF}, - {0x081A, 0x0D23, 0x0C27, 0xFFFF}, - {0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF}, - {0x0712, 0x0534, 0x023A, 0x0F15, 0x1307, - 0x1019, 0x0B2B, 0x013D, 0xFFFF}, - {0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF}, - {0x0C33, - 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF}, - {0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF}, - {0x0818, 0x052C, 0x0F13, 0x180B, - 0x141F, 0xFFFF}, - {0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF}, - {0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF}, - {0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF}, - {0x1119, 0x023D, 0xFFFF}, - {0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103, - 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF}, - {0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B, - 0xFFFF}, - {0x0F21, 0x0D29, 0x1C0D, 0xFFFF}, - {0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF}, - {0x0730, 0x0814, 0x0536, - 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF}, - {0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF}, - {0x091A, - 0x1709, 0x063B, 0x0A2F, 0xFFFF}, - {0x1011, 0x1219, 0x033D, 0xFFFF}, - {0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115, - 0x1507, 0x0D2B, 0xFFFF}, - {0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF}, - {0x0E29, 0x1D0D, 0xFFFF}, - {0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF}, - {0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF}, - {0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF}, - {0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF}, - {0x0820, - 0x1111, 0x1319, 0x1809, 0xFFFF}, - {0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF}, - {0x0916, 0x023C, 0x0B1E, 0x1031, - 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF}, - {0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF}, - {0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF}, - {0x072A, 0x1213, 0x1317, 0xFFFF}, - {0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35, - 0x1505, 0xFFFF}, - {0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF}, - {0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F, - 0xFFFF}, - {0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF}, - {0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D, - 0x161D, 0xFFFF}, - {0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF}, - {0x1221, 0x0B39, 0x1029, - 0xFFFF}, - {0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF}, - {0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF}, - {0x0832, - 0x0A02, 0x1223, 0x1127, 0xFFFF}, - {0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF}, - {0x0920, 0x1519, 0x063D, - 0xFFFF}, - {0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF}, - {0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133, - 0x1225, 0x0E37, 0x161B, 0xFFFF}, - {0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF}, - {0x0C39, 0x1D0B, 0x191F, 0xFFFF}, - {0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF}, - {0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF}, - {0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF}, - {0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF}, - {0x1331, - 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF}, - {0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D, - 0x181D, 0xFFFF}, - {0x0926, 0x072E, 0x1229, 0xFFFF}, - {0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF}, - {0x0A10, 0x1513, - 0x1617, 0xFFFF}, - {0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF}, - {0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF}, - {0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF}, - {0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF}, - {0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF}, - {0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF}, - {0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF}, - {0x1613, 0x1717, 0xFFFF}, - {0x092A, 0x1235, 0x1905, - 0xFFFF}, - {0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF}, - {0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09, - 0x0C3B, 0x102F, 0xFFFF}, - {0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF}, - {0x1531, 0x1701, 0x1803, 0x122D, - 0x1A1D, 0xFFFF}, - {0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF}, - {0x0A26, 0x003E, 0x082E, 0x1621, - 0x0F39, 0x1429, 0x003F, 0xFFFF}, - {0x1713, 0x1C1F, 0xFFFF}, - {0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF}, - {0x0C18, - 0x092C, 0x1623, 0x1527, 0xFFFF}, - {0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF}, - {0x0A28, 0x0D1C, 0x1919, - 0x0A3D, 0xFFFF}, - {0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF}, - {0x1801, 0x1533, 0x1625, - 0x1237, 0x1A1B, 0xFFFF}, - {0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF}, - {0x0B22, 0x0D04, 0x1039, 0x1D1F, - 0xFFFF}, - {0x1813, 0x1B05, 0x1917, 0xFFFF}, - {0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF}, - {0x0B30, 0x0C14, 0x0936, - 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF}, - {0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF}, - {0x0D1A, - 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF}, - {0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF}, - {0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF}, - {0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF}, - {0x1913, - 0x1A17, 0xFFFF}, - {0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF}, - {0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF}, - {0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF}, - {0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF}, - {0x0C20, - 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF}, - {0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF}, - {0x0D16, 0x063C, - 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF}, - {0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF}, - {0x1635, 0x1D05, 0xFFFF}, - {0x0B2A, 0x1923, 0x1827, 0xFFFF}, - {0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF}, - {0x0D00, 0x0C24, 0x0F06, - 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF}, - {0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF}, - {0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF}, - {0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF}, - {0x0C26, - 0x023E, 0x0A2E, 0x1B13, 0xFFFF}, - {0x1735, 0x1E05, 0x1C17, 0xFFFF}, - {0x0D10, 0x1A23, 0x1927, 0xFFFF}, - {0x0E18, - 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF}, - {0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF}, - {0x0C28, 0x0F1C, 0x1A31, 0x1D03, - 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF}, - {0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF}, - {0x1B21, 0x1929, - 0x053F, 0xFFFF}, - {0x0E16, 0x073C, 0x1439, 0xFFFF}, - {0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF}, - {0x1B23, - 0x1835, 0x1A27, 0xFFFF}, - {0x0C2A, 0x123B, 0x162F, 0xFFFF}, - {0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF}, - {0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF}, - {0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B, - 0x182D, 0xFFFF}, - {0x1A29, 0x063F, 0xFFFF}, - {0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF}, - {0x0D26, 0x033E, - 0x0B2E, 0x1D13, 0x1E17, 0xFFFF}, - {0x1935, 0x1B27, 0xFFFF}, - {0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF}, - {0x0F18, - 0x0C2C, 0x1D11, 0x1F19, 0xFFFF}, - {0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF}, - {0x0D28, 0x1C31, 0x1E01, - 0x1B33, 0x192D, 0xFFFF}, - {0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF}, - {0x1D21, 0x1639, 0xFFFF}, - {0x0F16, - 0x083C, 0x1E13, 0x1F17, 0xFFFF}, - {0x0E22, 0x1A35, 0xFFFF}, - {0x1D23, 0x1C27, 0xFFFF}, - {0x0D2A, 0x1E11, 0x143B, - 0x182F, 0xFFFF}, - {0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF}, - {0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01, - 0x1A2D, 0xFFFF}, - {0x1C33, 0x1D25, 0x1937, 0xFFFF}, - {0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF}, - {0x0F12, 0x0D34, - 0x0A3A, 0x1F13, 0xFFFF}, - {0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF}, - {0x1E23, 0x1D27, 0xFFFF}, - {0x0F10, 0x1F11, - 0x153B, 0x192F, 0xFFFF}, - {0x0D2C, 0x123D, 0xFFFF}, + static const uint16 g_color8_to_etc_block_config_1_to_254[254][12] = { + { 0x021C, 0x0D0D, 0xFFFF }, + { 0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF }, + { 0x0113, 0x0217, 0xFFFF }, + { 0x0116, 0x031E, + 0x0B0E, 0x0405, 0xFFFF }, + { 0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF }, + { 0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF }, + { 0x0303, 0x0215, 0x0607, 0xFFFF }, + { 0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF }, + { 0x0100, 0x0024, 0x0306, + 0x0025, 0x041B, 0x0E0D, 0xFFFF }, + { 0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF }, + { 0x0213, 0x0317, 0xFFFF }, + { 0x0112, + 0x0505, 0xFFFF }, + { 0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF }, + { 0x0211, 0x0909, 0xFFFF }, + { 0x0110, 0x0315, 0x0707, + 0x0419, 0x180F, 0xFFFF }, + { 0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF }, + { 0x0032, 0x0202, 0x0033, 0x0125, 0x051B, + 0x0F0D, 0xFFFF }, + { 0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF }, + { 0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF }, + { 0x0605, + 0x0417, 0xFFFF }, + { 0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF }, + { 0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF }, + { 0x0519, 0x190F, 0xFFFF }, + { 0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF }, + { 0x0130, 0x0214, + 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF }, + { 0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF }, + { 0x031A, + 0x0D0B, 0x091F, 0xFFFF }, + { 0x0413, 0x0705, 0x0517, 0xFFFF }, + { 0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF }, + { 0x0126, 0x080C, 0x0B09, 0xFFFF }, + { 0x0411, 0x0619, 0x1A0F, 0xFFFF }, + { 0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B, + 0xFFFF }, + { 0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF }, + { 0x0132, 0x0302, 0x0229, 0x110D, + 0xFFFF }, + { 0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF }, + { 0x0220, 0x0513, 0x0617, 0xFFFF }, + { 0x0135, 0x0805, + 0x0327, 0xFFFF }, + { 0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF }, + { 0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F, + 0xFFFF }, + { 0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF }, + { 0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF }, + { 0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF }, + { 0x0300, 0x0224, 0x0506, 0x0521, + 0x0F0B, 0x0B1F, 0xFFFF }, + { 0x041A, 0x0613, 0x0717, 0xFFFF }, + { 0x0235, 0x0905, 0xFFFF }, + { 0x0312, 0x0134, 0x0523, + 0x0427, 0xFFFF }, + { 0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF }, + { 0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F, + 0xFFFF }, + { 0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF }, + { 0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B, + 0x130D, 0xFFFF }, + { 0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF }, + { 0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF }, + { 0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF }, + { 0x0623, 0x0527, 0xFFFF }, + { 0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F, + 0xFFFF }, + { 0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF }, + { 0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D, + 0xFFFF }, + { 0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF }, + { 0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529, + 0x140D, 0xFFFF }, + { 0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF }, + { 0x051A, 0x0813, 0x0B05, 0x0917, + 0xFFFF }, + { 0x0723, 0x0435, 0x0627, 0xFFFF }, + { 0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF }, + { 0x0326, 0x0A0C, 0x012E, + 0x0811, 0x0A19, 0x1E0F, 0xFFFF }, + { 0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF }, + { 0x0410, 0x0901, 0x0633, 0x0725, + 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF }, + { 0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF }, + { 0x0332, 0x0502, 0x0821, 0x0139, + 0x120B, 0x0E1F, 0xFFFF }, + { 0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF }, + { 0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF }, + { 0x0823, 0x032F, 0xFFFF }, + { 0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF }, + { 0x0422, 0x0604, 0x090A, + 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF }, + { 0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF }, + { 0x032A, 0x0825, 0x0437, + 0x0729, 0x0C1B, 0x160D, 0xFFFF }, + { 0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF }, + { 0x0500, + 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF }, + { 0x061A, 0x0635, 0x0D05, 0xFFFF }, + { 0x0923, 0x0827, 0xFFFF }, + { 0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF }, + { 0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19, + 0x072B, 0xFFFF }, + { 0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF }, + { 0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D, + 0xFFFF }, + { 0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF }, + { 0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF }, + { 0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF }, + { 0x0520, 0x0A23, 0x0927, 0xFFFF }, + { 0x0B11, 0x1209, 0x013B, 0x052F, + 0xFFFF }, + { 0x0616, 0x081E, 0x0D19, 0xFFFF }, + { 0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D, + 0x0F1D, 0xFFFF }, + { 0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF }, + { 0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF }, + { 0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF }, + { 0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05, + 0x0D17, 0xFFFF }, + { 0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF }, + { 0x1309, 0x023B, 0x062F, 0xFFFF }, + { 0x0612, 0x0434, + 0x013A, 0x0C11, 0x0E19, 0xFFFF }, + { 0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF }, + { 0x0D01, + 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF }, + { 0x0610, 0x0A29, 0x190D, 0xFFFF }, + { 0x0718, 0x042C, 0x0C21, + 0x0539, 0x160B, 0x121F, 0xFFFF }, + { 0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF }, + { 0x0528, 0x081C, 0x0935, 0x1005, 0x0B27, + 0xFFFF }, + { 0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF }, + { 0x0D11, 0x0F19, 0x1409, 0xFFFF }, + { 0x0716, 0x003C, 0x091E, + 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF }, + { 0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D, + 0xFFFF }, + { 0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF }, + { 0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF }, + { 0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF }, + { 0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF }, + { 0x081A, 0x0D23, 0x0C27, 0xFFFF }, + { 0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF }, + { 0x0712, 0x0534, 0x023A, 0x0F15, 0x1307, + 0x1019, 0x0B2B, 0x013D, 0xFFFF }, + { 0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF }, + { 0x0C33, + 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF }, + { 0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF }, + { 0x0818, 0x052C, 0x0F13, 0x180B, + 0x141F, 0xFFFF }, + { 0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF }, + { 0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF }, + { 0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF }, + { 0x1119, 0x023D, 0xFFFF }, + { 0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103, + 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF }, + { 0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B, + 0xFFFF }, + { 0x0F21, 0x0D29, 0x1C0D, 0xFFFF }, + { 0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF }, + { 0x0730, 0x0814, 0x0536, + 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF }, + { 0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF }, + { 0x091A, + 0x1709, 0x063B, 0x0A2F, 0xFFFF }, + { 0x1011, 0x1219, 0x033D, 0xFFFF }, + { 0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115, + 0x1507, 0x0D2B, 0xFFFF }, + { 0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF }, + { 0x0E29, 0x1D0D, 0xFFFF }, + { 0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF }, + { 0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF }, + { 0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF }, + { 0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF }, + { 0x0820, + 0x1111, 0x1319, 0x1809, 0xFFFF }, + { 0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF }, + { 0x0916, 0x023C, 0x0B1E, 0x1031, + 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF }, + { 0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF }, + { 0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF }, + { 0x072A, 0x1213, 0x1317, 0xFFFF }, + { 0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35, + 0x1505, 0xFFFF }, + { 0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF }, + { 0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F, + 0xFFFF }, + { 0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF }, + { 0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D, + 0x161D, 0xFFFF }, + { 0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF }, + { 0x1221, 0x0B39, 0x1029, + 0xFFFF }, + { 0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF }, + { 0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF }, + { 0x0832, + 0x0A02, 0x1223, 0x1127, 0xFFFF }, + { 0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF }, + { 0x0920, 0x1519, 0x063D, + 0xFFFF }, + { 0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF }, + { 0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133, + 0x1225, 0x0E37, 0x161B, 0xFFFF }, + { 0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF }, + { 0x0C39, 0x1D0B, 0x191F, 0xFFFF }, + { 0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF }, + { 0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF }, + { 0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF }, + { 0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF }, + { 0x1331, + 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF }, + { 0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D, + 0x181D, 0xFFFF }, + { 0x0926, 0x072E, 0x1229, 0xFFFF }, + { 0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF }, + { 0x0A10, 0x1513, + 0x1617, 0xFFFF }, + { 0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF }, + { 0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF }, + { 0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF }, + { 0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF }, + { 0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF }, + { 0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF }, + { 0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF }, + { 0x1613, 0x1717, 0xFFFF }, + { 0x092A, 0x1235, 0x1905, + 0xFFFF }, + { 0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF }, + { 0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09, + 0x0C3B, 0x102F, 0xFFFF }, + { 0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF }, + { 0x1531, 0x1701, 0x1803, 0x122D, + 0x1A1D, 0xFFFF }, + { 0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF }, + { 0x0A26, 0x003E, 0x082E, 0x1621, + 0x0F39, 0x1429, 0x003F, 0xFFFF }, + { 0x1713, 0x1C1F, 0xFFFF }, + { 0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF }, + { 0x0C18, + 0x092C, 0x1623, 0x1527, 0xFFFF }, + { 0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF }, + { 0x0A28, 0x0D1C, 0x1919, + 0x0A3D, 0xFFFF }, + { 0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF }, + { 0x1801, 0x1533, 0x1625, + 0x1237, 0x1A1B, 0xFFFF }, + { 0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF }, + { 0x0B22, 0x0D04, 0x1039, 0x1D1F, + 0xFFFF }, + { 0x1813, 0x1B05, 0x1917, 0xFFFF }, + { 0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF }, + { 0x0B30, 0x0C14, 0x0936, + 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF }, + { 0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF }, + { 0x0D1A, + 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF }, + { 0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF }, + { 0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF }, + { 0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF }, + { 0x1913, + 0x1A17, 0xFFFF }, + { 0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF }, + { 0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF }, + { 0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF }, + { 0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF }, + { 0x0C20, + 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF }, + { 0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF }, + { 0x0D16, 0x063C, + 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF }, + { 0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF }, + { 0x1635, 0x1D05, 0xFFFF }, + { 0x0B2A, 0x1923, 0x1827, 0xFFFF }, + { 0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF }, + { 0x0D00, 0x0C24, 0x0F06, + 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF }, + { 0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF }, + { 0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF }, + { 0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF }, + { 0x0C26, + 0x023E, 0x0A2E, 0x1B13, 0xFFFF }, + { 0x1735, 0x1E05, 0x1C17, 0xFFFF }, + { 0x0D10, 0x1A23, 0x1927, 0xFFFF }, + { 0x0E18, + 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF }, + { 0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF }, + { 0x0C28, 0x0F1C, 0x1A31, 0x1D03, + 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF }, + { 0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF }, + { 0x1B21, 0x1929, + 0x053F, 0xFFFF }, + { 0x0E16, 0x073C, 0x1439, 0xFFFF }, + { 0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF }, + { 0x1B23, + 0x1835, 0x1A27, 0xFFFF }, + { 0x0C2A, 0x123B, 0x162F, 0xFFFF }, + { 0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF }, + { 0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF }, + { 0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B, + 0x182D, 0xFFFF }, + { 0x1A29, 0x063F, 0xFFFF }, + { 0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF }, + { 0x0D26, 0x033E, + 0x0B2E, 0x1D13, 0x1E17, 0xFFFF }, + { 0x1935, 0x1B27, 0xFFFF }, + { 0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF }, + { 0x0F18, + 0x0C2C, 0x1D11, 0x1F19, 0xFFFF }, + { 0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF }, + { 0x0D28, 0x1C31, 0x1E01, + 0x1B33, 0x192D, 0xFFFF }, + { 0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF }, + { 0x1D21, 0x1639, 0xFFFF }, + { 0x0F16, + 0x083C, 0x1E13, 0x1F17, 0xFFFF }, + { 0x0E22, 0x1A35, 0xFFFF }, + { 0x1D23, 0x1C27, 0xFFFF }, + { 0x0D2A, 0x1E11, 0x143B, + 0x182F, 0xFFFF }, + { 0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF }, + { 0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01, + 0x1A2D, 0xFFFF }, + { 0x1C33, 0x1D25, 0x1937, 0xFFFF }, + { 0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF }, + { 0x0F12, 0x0D34, + 0x0A3A, 0x1F13, 0xFFFF }, + { 0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF }, + { 0x1E23, 0x1D27, 0xFFFF }, + { 0x0F10, 0x1F11, + 0x153B, 0x192F, 0xFFFF }, + { 0x0D2C, 0x123D, 0xFFFF }, }; - struct etc1_block { + struct etc1_block + { // big endian uint64: // bit ofs: 56 48 40 32 24 16 8 0 // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 - union { + union + { uint64 m_uint64; uint8 m_bytes[8]; }; @@ -800,14 +883,19 @@ namespace crnlib { uint8 m_low_color[2]; uint8 m_high_color[2]; - enum { cNumSelectorBytes = 4 }; + enum + { + cNumSelectorBytes = 4 + }; uint8 m_selectors[cNumSelectorBytes]; - inline void clear() { + inline void clear() + { zero_this(this); } - inline uint get_byte_bits(uint ofs, uint num) const { + inline uint get_byte_bits(uint ofs, uint num) const + { RG_ETC1_ASSERT((ofs + num) <= 64U); RG_ETC1_ASSERT(num && (num <= 8U)); RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); @@ -816,7 +904,8 @@ namespace crnlib { return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1); } - inline void set_byte_bits(uint ofs, uint num, uint bits) { + inline void set_byte_bits(uint ofs, uint num, uint bits) + { RG_ETC1_ASSERT((ofs + num) <= 64U); RG_ETC1_ASSERT(num && (num < 32U)); RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); @@ -830,34 +919,40 @@ namespace crnlib { // false = left/right subblocks // true = upper/lower subblocks - inline bool get_flip_bit() const { + inline bool get_flip_bit() const + { return (m_bytes[3] & 1) != 0; } - inline void set_flip_bit(bool flip) { + inline void set_flip_bit(bool flip) + { m_bytes[3] &= ~1; m_bytes[3] |= static_cast(flip); } - inline bool get_diff_bit() const { + inline bool get_diff_bit() const + { return (m_bytes[3] & 2) != 0; } - inline void set_diff_bit(bool diff) { + inline void set_diff_bit(bool diff) + { m_bytes[3] &= ~2; m_bytes[3] |= (static_cast(diff) << 1); } // Returns intensity modifier table (0-7) used by subblock subblock_id. // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2) - inline uint get_inten_table(uint subblock_id) const { + inline uint get_inten_table(uint subblock_id) const + { RG_ETC1_ASSERT(subblock_id < 2); const uint ofs = subblock_id ? 2 : 5; return (m_bytes[3] >> ofs) & 7; } // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1) - inline void set_inten_table(uint subblock_id, uint t) { + inline void set_inten_table(uint subblock_id, uint t) + { RG_ETC1_ASSERT(subblock_id < 2); RG_ETC1_ASSERT(t < 8); const uint ofs = subblock_id ? 2 : 5; @@ -866,7 +961,8 @@ namespace crnlib { } // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. - inline uint get_selector(uint x, uint y) const { + inline uint get_selector(uint x, uint y) const + { RG_ETC1_ASSERT((x | y) < 4); const uint bit_index = x * 4 + y; @@ -880,7 +976,8 @@ namespace crnlib { } // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables. - inline void set_selector(uint x, uint y, uint val) { + inline void set_selector(uint x, uint y, uint val) + { RG_ETC1_ASSERT((x | y | val) < 4); const uint bit_index = x * 4 + y; @@ -901,27 +998,33 @@ namespace crnlib { p[-2] |= (msb << byte_bit_ofs); } - inline void set_base4_color(uint idx, uint16 c) { - if (idx) { + inline void set_base4_color(uint idx, uint16 c) + { + if (idx) + { set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15); set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15); set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15); } - else { + else + { set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15); set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15); set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15); } } - inline uint16 get_base4_color(uint idx) const { + inline uint16 get_base4_color(uint idx) const + { uint r, g, b; - if (idx) { + if (idx) + { r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4); g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4); b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4); } - else { + else + { r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4); g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4); b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4); @@ -929,26 +1032,30 @@ namespace crnlib { return static_cast(b | (g << 4U) | (r << 8U)); } - inline void set_base5_color(uint16 c) { + inline void set_base5_color(uint16 c) + { set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31); set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31); set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31); } - inline uint16 get_base5_color() const { + inline uint16 get_base5_color() const + { const uint r = get_byte_bits(cETC1BaseColor5RBitOffset, 5); const uint g = get_byte_bits(cETC1BaseColor5GBitOffset, 5); const uint b = get_byte_bits(cETC1BaseColor5BBitOffset, 5); return static_cast(b | (g << 5U) | (r << 10U)); } - void set_delta3_color(uint16 c) { + void set_delta3_color(uint16 c) + { set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7); set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7); set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7); } - inline uint16 get_delta3_color() const { + inline uint16 get_delta3_color() const + { const uint r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3); const uint g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3); const uint b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); @@ -984,13 +1091,16 @@ namespace crnlib { static bool get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx); static void get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx); - static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4) { - if (color4) { + static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4) + { + if (color4) + { dst.r = src.r | (src.r << 4); dst.g = src.g | (src.g << 4); dst.b = src.b | (src.b << 4); } - else { + else + { dst.r = (src.r >> 2) | (src.r << 3); dst.g = (src.g >> 2) | (src.g << 3); dst.b = (src.b >> 2) | (src.b << 3); @@ -1000,22 +1110,27 @@ namespace crnlib { }; // Returns pointer to sorted array. - template - T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices) { + template + T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices) + { RG_ETC1_ASSERT((key_ofs >= 0) && (key_ofs < sizeof(T))); RG_ETC1_ASSERT((key_size >= 1) && (key_size <= 4)); - if (init_indices) { + if (init_indices) + { T* p = pIndices0; T* q = pIndices0 + (num_indices >> 1) * 2; uint i; - for (i = 0; p != q; p += 2, i += 2) { + for (i = 0; p != q; p += 2, i += 2) + { p[0] = static_cast(i); p[1] = static_cast(i + 1); } if (num_indices & 1) + { *p = static_cast(i); + } } uint hist[256 * 4]; @@ -1025,10 +1140,12 @@ namespace crnlib { #define RG_ETC1_GET_KEY(p) (*(const uint*)((const uint8*)(pKeys + *(p)) + key_ofs)) #define RG_ETC1_GET_KEY_FROM_INDEX(i) (*(const uint*)((const uint8*)(pKeys + (i)) + key_ofs)) - if (key_size == 4) { + if (key_size == 4) + { T* p = pIndices0; T* q = pIndices0 + num_indices; - for (; p != q; p++) { + for (; p != q; p++) + { const uint key = RG_ETC1_GET_KEY(p); hist[key & 0xFF]++; @@ -1037,10 +1154,12 @@ namespace crnlib { hist[768 + ((key >> 24) & 0xFF)]++; } } - else if (key_size == 3) { + else if (key_size == 3) + { T* p = pIndices0; T* q = pIndices0 + num_indices; - for (; p != q; p++) { + for (; p != q; p++) + { const uint key = RG_ETC1_GET_KEY(p); hist[key & 0xFF]++; @@ -1048,11 +1167,13 @@ namespace crnlib { hist[512 + ((key >> 16) & 0xFF)]++; } } - else if (key_size == 2) { + else if (key_size == 2) + { T* p = pIndices0; T* q = pIndices0 + (num_indices >> 1) * 2; - for (; p != q; p += 2) { + for (; p != q; p += 2) + { const uint key0 = RG_ETC1_GET_KEY(p); const uint key1 = RG_ETC1_GET_KEY(p + 1); @@ -1063,22 +1184,27 @@ namespace crnlib { hist[256 + ((key1 >> 8) & 0xFF)]++; } - if (num_indices & 1) { + if (num_indices & 1) + { const uint key = RG_ETC1_GET_KEY(p); hist[key & 0xFF]++; hist[256 + ((key >> 8) & 0xFF)]++; } } - else { + else + { RG_ETC1_ASSERT(key_size == 1); if (key_size != 1) + { return nullptr; + } T* p = pIndices0; T* q = pIndices0 + (num_indices >> 1) * 2; - for (; p != q; p += 2) { + for (; p != q; p += 2) + { const uint key0 = RG_ETC1_GET_KEY(p); const uint key1 = RG_ETC1_GET_KEY(p + 1); @@ -1086,7 +1212,8 @@ namespace crnlib { hist[key1 & 0xFF]++; } - if (num_indices & 1) { + if (num_indices & 1) + { const uint key = RG_ETC1_GET_KEY(p); hist[key & 0xFF]++; @@ -1096,13 +1223,15 @@ namespace crnlib { T* pCur = pIndices0; T* pNew = pIndices1; - for (uint pass = 0; pass < key_size; pass++) { + for (uint pass = 0; pass < key_size; pass++) + { const uint* pHist = &hist[pass << 8]; uint offsets[256]; uint cur_ofs = 0; - for (uint i = 0; i < 256; i += 2) { + for (uint i = 0; i < 256; i += 2) + { offsets[i] = cur_ofs; cur_ofs += pHist[i]; @@ -1115,14 +1244,16 @@ namespace crnlib { T* p = pCur; T* q = pCur + (num_indices >> 1) * 2; - for (; p != q; p += 2) { + for (; p != q; p += 2) + { uint index0 = p[0]; uint index1 = p[1]; uint c0 = (RG_ETC1_GET_KEY_FROM_INDEX(index0) >> pass_shift) & 0xFF; uint c1 = (RG_ETC1_GET_KEY_FROM_INDEX(index1) >> pass_shift) & 0xFF; - if (c0 == c1) { + if (c0 == c1) + { uint dst_offset0 = offsets[c0]; offsets[c0] = dst_offset0 + 2; @@ -1130,7 +1261,8 @@ namespace crnlib { pNew[dst_offset0] = static_cast(index0); pNew[dst_offset0 + 1] = static_cast(index1); } - else { + else + { uint dst_offset0 = offsets[c0]++; uint dst_offset1 = offsets[c1]++; @@ -1139,7 +1271,8 @@ namespace crnlib { } } - if (num_indices & 1) { + if (num_indices & 1) + { uint index = *p; uint c = (RG_ETC1_GET_KEY_FROM_INDEX(index) >> pass_shift) & 0xFF; @@ -1160,12 +1293,15 @@ namespace crnlib { #undef RG_ETC1_GET_KEY #undef RG_ETC1_GET_KEY_FROM_INDEX - uint16 etc1_block::pack_color5(const color_quad_u8& color, bool scaled, uint bias) { + uint16 etc1_block::pack_color5(const color_quad_u8& color, bool scaled, uint bias) + { return pack_color5(color.r, color.g, color.b, scaled, bias); } - uint16 etc1_block::pack_color5(uint r, uint g, uint b, bool scaled, uint bias) { - if (scaled) { + uint16 etc1_block::pack_color5(uint r, uint g, uint b, bool scaled, uint bias) + { + if (scaled) + { r = (r * 31U + bias) / 255U; g = (g * 31U + bias) / 255U; b = (b * 31U + bias) / 255U; @@ -1178,12 +1314,14 @@ namespace crnlib { return static_cast(b | (g << 5U) | (r << 10U)); } - color_quad_u8 etc1_block::unpack_color5(uint16 packed_color5, bool scaled, uint alpha) { + color_quad_u8 etc1_block::unpack_color5(uint16 packed_color5, bool scaled, uint alpha) + { uint b = packed_color5 & 31U; uint g = (packed_color5 >> 5U) & 31U; uint r = (packed_color5 >> 10U) & 31U; - if (scaled) { + if (scaled) + { b = (b << 3U) | (b >> 2U); g = (g << 3U) | (g >> 2U); r = (r << 3U) | (r >> 2U); @@ -1192,14 +1330,16 @@ namespace crnlib { return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U)); } - void etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, bool scaled) { + void etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, bool scaled) + { color_quad_u8 c(unpack_color5(packed_color5, scaled, 0)); r = c.r; g = c.g; b = c.b; } - bool etc1_block::unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) { + bool etc1_block::unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) + { int dc_r, dc_g, dc_b; unpack_delta3(dc_r, dc_g, dc_b, packed_delta3); @@ -1208,14 +1348,16 @@ namespace crnlib { int r = ((packed_color5 >> 10U) & 31U) + dc_r; bool success = true; - if (static_cast(r | g | b) > 31U) { + if (static_cast(r | g | b) > 31U) + { success = false; r = rg_etc1::clamp(r, 0, 31); g = rg_etc1::clamp(g, 0, 31); b = rg_etc1::clamp(b, 0, 31); } - if (scaled) { + if (scaled) + { b = (b << 3U) | (b >> 2U); g = (g << 3U) | (g >> 2U); r = (r << 3U) | (r >> 2U); @@ -1225,7 +1367,8 @@ namespace crnlib { return success; } - bool etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) { + bool etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) + { color_quad_u8 result; const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha); r = result.r; @@ -1234,37 +1377,54 @@ namespace crnlib { return success; } - uint16 etc1_block::pack_delta3(int r, int g, int b) { + uint16 etc1_block::pack_delta3(int r, int g, int b) + { RG_ETC1_ASSERT((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); RG_ETC1_ASSERT((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); RG_ETC1_ASSERT((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); if (r < 0) + { r += 8; + } if (g < 0) + { g += 8; + } if (b < 0) + { b += 8; + } return static_cast(b | (g << 3) | (r << 6)); } - void etc1_block::unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3) { + void etc1_block::unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3) + { r = (packed_delta3 >> 6) & 7; g = (packed_delta3 >> 3) & 7; b = packed_delta3 & 7; if (r >= 4) + { r -= 8; + } if (g >= 4) + { g -= 8; + } if (b >= 4) + { b -= 8; + } } - uint16 etc1_block::pack_color4(const color_quad_u8& color, bool scaled, uint bias) { + uint16 etc1_block::pack_color4(const color_quad_u8& color, bool scaled, uint bias) + { return pack_color4(color.r, color.g, color.b, scaled, bias); } - uint16 etc1_block::pack_color4(uint r, uint g, uint b, bool scaled, uint bias) { - if (scaled) { + uint16 etc1_block::pack_color4(uint r, uint g, uint b, bool scaled, uint bias) + { + if (scaled) + { r = (r * 15U + bias) / 255U; g = (g * 15U + bias) / 255U; b = (b * 15U + bias) / 255U; @@ -1277,12 +1437,14 @@ namespace crnlib { return static_cast(b | (g << 4U) | (r << 8U)); } - color_quad_u8 etc1_block::unpack_color4(uint16 packed_color4, bool scaled, uint alpha) { + color_quad_u8 etc1_block::unpack_color4(uint16 packed_color4, bool scaled, uint alpha) + { uint b = packed_color4 & 15U; uint g = (packed_color4 >> 4U) & 15U; uint r = (packed_color4 >> 8U) & 15U; - if (scaled) { + if (scaled) + { b = (b << 4U) | b; g = (g << 4U) | g; r = (r << 4U) | r; @@ -1291,14 +1453,16 @@ namespace crnlib { return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U)); } - void etc1_block::unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled) { + void etc1_block::unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled) + { color_quad_u8 c(unpack_color4(packed_color4, scaled, 0)); r = c.r; g = c.g; b = c.b; } - void etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx) { + void etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx) + { RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues); const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; @@ -1320,7 +1484,8 @@ namespace crnlib { pDst[3].set(ir + y3, ig + y3, ib + y3); } - bool etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx) { + bool etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx) + { RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues); const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; @@ -1344,7 +1509,8 @@ namespace crnlib { return success; } - void etc1_block::get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx) { + void etc1_block::get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx) + { RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues); const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; @@ -1366,7 +1532,8 @@ namespace crnlib { pDst[3].set(ir + y3, ig + y3, ib + y3); } - bool unpack_etc1_block(const void* pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha) { + bool unpack_etc1_block(const void* pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha) + { color_quad_u8* pDst = reinterpret_cast(pDst_pixels_rgba); const etc1_block& block = *static_cast(pETC1_block); @@ -1379,15 +1546,19 @@ namespace crnlib { color_quad_u8 subblock_colors1[4]; bool success = true; - if (diff_flag) { + if (diff_flag) + { const uint16 base_color5 = block.get_base5_color(); const uint16 delta_color3 = block.get_delta3_color(); etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); if (!etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1)) + { success = false; + } } - else { + else + { const uint16 base_color4_0 = block.get_base4_color(0); etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); @@ -1395,9 +1566,12 @@ namespace crnlib { etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); } - if (preserve_alpha) { - if (flip_flag) { - for (uint y = 0; y < 2; y++) { + if (preserve_alpha) + { + if (flip_flag) + { + for (uint y = 0; y < 2; y++) + { pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]); @@ -1405,7 +1579,8 @@ namespace crnlib { pDst += 4; } - for (uint y = 2; y < 4; y++) { + for (uint y = 2; y < 4; y++) + { pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]); pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]); pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); @@ -1413,8 +1588,10 @@ namespace crnlib { pDst += 4; } } - else { - for (uint y = 0; y < 4; y++) { + else + { + for (uint y = 0; y < 4; y++) + { pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); @@ -1423,13 +1600,16 @@ namespace crnlib { } } } - else { - if (flip_flag) { + else + { + if (flip_flag) + { // 0000 // 0000 // 1111 // 1111 - for (uint y = 0; y < 2; y++) { + for (uint y = 0; y < 2; y++) + { pDst[0] = subblock_colors0[block.get_selector(0, y)]; pDst[1] = subblock_colors0[block.get_selector(1, y)]; pDst[2] = subblock_colors0[block.get_selector(2, y)]; @@ -1437,7 +1617,8 @@ namespace crnlib { pDst += 4; } - for (uint y = 2; y < 4; y++) { + for (uint y = 2; y < 4; y++) + { pDst[0] = subblock_colors1[block.get_selector(0, y)]; pDst[1] = subblock_colors1[block.get_selector(1, y)]; pDst[2] = subblock_colors1[block.get_selector(2, y)]; @@ -1445,12 +1626,14 @@ namespace crnlib { pDst += 4; } } - else { + else + { // 0011 // 0011 // 0011 // 0011 - for (uint y = 0; y < 4; y++) { + for (uint y = 0; y < 4; y++) + { pDst[0] = subblock_colors0[block.get_selector(0, y)]; pDst[1] = subblock_colors0[block.get_selector(1, y)]; pDst[2] = subblock_colors1[block.get_selector(2, y)]; @@ -1463,9 +1646,12 @@ namespace crnlib { return success; } - bool unpack_etc2_color(const void* pBlock, unsigned int* pDst_pixels_rgba, bool preserve_alpha) { + bool unpack_etc2_color(const void* pBlock, unsigned int* pDst_pixels_rgba, bool preserve_alpha) + { if (unpack_etc1_block(pBlock, pDst_pixels_rgba, preserve_alpha)) + { return true; + } color_quad_u8* pDst = reinterpret_cast(pDst_pixels_rgba); const etc1_block& block = *static_cast(pBlock); @@ -1473,30 +1659,37 @@ namespace crnlib { const bool rOverflow = ((int8(B[0] << 5) >> 5) + (B[0] >> 3)) & 0x20; const bool gOverflow = ((int8(B[1] << 5) >> 5) + (B[1] >> 3)) & 0x20; - if (rOverflow || gOverflow) { + if (rOverflow || gOverflow) + { color_quad_u8 block_colors[4]; uint8 unpacked[3]; - if (rOverflow) { + if (rOverflow) + { unpacked[0] = (B[0] & 0x3) | (B[0] >> 1 & 0xC) | (B[2] & 0xF0); unpacked[1] = B[1] >> 4 | B[2] << 4; unpacked[2] = (B[1] & 0xF) | (B[3] & 0xF0); uint8 delta = g_etc2_modifier_table[(B[3] & 1) | (B[3] >> 1 & 6)]; - for (uint c = 0; c < 3; c++) { + for (uint c = 0; c < 3; c++) + { block_colors[2][c] = unpacked[c] << 4 | (unpacked[c] & 0xF); block_colors[1][c] = unpacked[c] >> 4 | (unpacked[c] & 0xF0); block_colors[0][c] = math::maximum(0, block_colors[1][c] - delta); block_colors[3][c] = math::minimum(255, block_colors[1][c] + delta); } } - else { + else + { unpacked[0] = (B[0] >> 3 & 0xF) | (B[2] << 1 & 0xF0); unpacked[1] = (B[1] >> 4 & 0x1) | (B[0] << 1 & 0xE) | (B[3] >> 3 & 0x10) | B[2] << 5; unpacked[2] = B[2] >> 7 | (B[1] << 1 & 0x6) | (B[1] & 0x8) | (B[3] << 1 & 0xF0); uint8 modifier = (B[3] & 4) | (B[3] << 1 & 2) | 1; for (int d = 0, c = 0; !d && c < 3; c++, modifier &= d < 0 ? 6 : 7) + { d = (unpacked[c] & 0xF) - (unpacked[c] >> 4); + } uint8 delta = g_etc2_modifier_table[modifier]; - for (uint c = 0; c < 3; c++) { + for (uint c = 0; c < 3; c++) + { uint8 c0 = unpacked[c] << 4 | (unpacked[c] & 0xF); uint8 c1 = unpacked[c] >> 4 | (unpacked[c] & 0xF0); block_colors[0][c] = math::maximum(0, c1 - delta); @@ -1505,15 +1698,20 @@ namespace crnlib { block_colors[3][c] = math::maximum(0, c0 - delta); } } - for (uint i = 0; i < 4; i++) { - for (uint j = 0; j < 4; j++, pDst++) { + for (uint i = 0; i < 4; i++) + { + for (uint j = 0; j < 4; j++, pDst++) + { pDst->set_rgb(block_colors[block.get_selector(j, i)]); if (!preserve_alpha) + { pDst->a = 255; + } } } } - else { + else + { int16 base[3], dj[3], di[3], color[3]; base[0] = (B[0] << 1 & 0xFC) | (B[0] >> 5 & 3); base[1] = (B[0] << 7 & 0x80) | (B[1] & 0x7E) | (B[0] & 1); @@ -1525,85 +1723,112 @@ namespace crnlib { dj[1] = ((B[4] & 0xFE) | B[4] >> 7) - base[1]; dj[2] = ((B[4] << 7 & 0x80) | (B[5] >> 1 & 0x7C) | (B[4] << 1 & 0x2) | B[5] >> 7) - base[2]; for (uint c = 0; c < 3; c++) + { base[c] = (base[c] << 2) + 2; - for (uint i = 0; i < 4; i++) { + } + for (uint i = 0; i < 4; i++) + { for (uint c = 0; c < 3; base[c] += di[c], c++) + { color[c] = base[c]; - for (uint j = 0; j < 4; j++, pDst++) { + } + for (uint j = 0; j < 4; j++, pDst++) + { for (uint c = 0; c < 3; color[c] += dj[c], c++) + { pDst->c[c] = math::clamp(color[c], 0, 1020) >> 2; + } if (!preserve_alpha) + { pDst->a = 255; + } } } } return true; } - bool unpack_etc2_alpha(const void* pBlock, unsigned int* pDst_pixels_rgba, int comp_index) { + bool unpack_etc2_alpha(const void* pBlock, unsigned int* pDst_pixels_rgba, int comp_index) + { color_quad_u8* pDst = (color_quad_u8*)pDst_pixels_rgba; const uint8* B = (const uint8*)pBlock; const int* modifier = g_etc2a_modifier_table[B[1] & 0xF]; uint8 values[8]; for (int base_codeword = B[0], multiplier = B[1] >> 4, i = 0; i < 8; i++) + { values[i] = math::clamp(base_codeword + modifier[i] * multiplier, 0, 255); - for (uint d0 = 3, i = 0; i < 4; i++, d0 += 3) { - for (uint d = d0, j = 0; j < 4; j++, pDst++, d += 12) { + } + for (uint d0 = 3, i = 0; i < 4; i++, d0 += 3) + { + for (uint d = d0, j = 0; j < 4; j++, pDst++, d += 12) + { int byte_offset = 2 + (d >> 3); int bit_offset = d & 7; int s = B[byte_offset] >> (8 - bit_offset) & 7; if (bit_offset < 3) + { s |= B[byte_offset - 1] << bit_offset & 7; + } pDst->c[comp_index] = values[s]; } } return true; } - struct etc1_solution_coordinates { - inline etc1_solution_coordinates() - : m_unscaled_color(0, 0, 0, 0), + struct etc1_solution_coordinates + { + inline etc1_solution_coordinates() : + m_unscaled_color(0, 0, 0, 0), m_inten_table(0), - m_color4(false) { + m_color4(false) + { } - inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) - : m_unscaled_color(r, g, b, 255), + inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) : + m_unscaled_color(r, g, b, 255), m_inten_table(inten_table), - m_color4(color4) { + m_color4(color4) + { } - inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) - : m_unscaled_color(c), + inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) : + m_unscaled_color(c), m_inten_table(inten_table), - m_color4(color4) { + m_color4(color4) + { } - inline etc1_solution_coordinates(const etc1_solution_coordinates& other) { + inline etc1_solution_coordinates(const etc1_solution_coordinates& other) + { *this = other; } - inline etc1_solution_coordinates& operator=(const etc1_solution_coordinates& rhs) { + inline etc1_solution_coordinates& operator=(const etc1_solution_coordinates& rhs) + { m_unscaled_color = rhs.m_unscaled_color; m_inten_table = rhs.m_inten_table; m_color4 = rhs.m_color4; return *this; } - inline void clear() { + inline void clear() + { m_unscaled_color.clear(); m_inten_table = 0; m_color4 = false; } - inline color_quad_u8 get_scaled_color() const { + inline color_quad_u8 get_scaled_color() const + { int br, bg, bb; - if (m_color4) { + if (m_color4) + { br = m_unscaled_color.r | (m_unscaled_color.r << 4); bg = m_unscaled_color.g | (m_unscaled_color.g << 4); bb = m_unscaled_color.b | (m_unscaled_color.b << 4); } - else { + else + { br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); @@ -1611,14 +1836,17 @@ namespace crnlib { return color_quad_u8(br, bg, bb); } - inline void get_block_colors(color_quad_u8* pBlock_colors) { + inline void get_block_colors(color_quad_u8* pBlock_colors) + { int br, bg, bb; - if (m_color4) { + if (m_color4) + { br = m_unscaled_color.r | (m_unscaled_color.r << 4); bg = m_unscaled_color.g | (m_unscaled_color.g << 4); bb = m_unscaled_color.b | (m_unscaled_color.b << 4); } - else { + else + { br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); @@ -1635,38 +1863,46 @@ namespace crnlib { bool m_color4; }; - class etc1_optimizer { + class etc1_optimizer + { etc1_optimizer(const etc1_optimizer&); etc1_optimizer& operator=(const etc1_optimizer&); public: - etc1_optimizer() { + etc1_optimizer() + { clear(); } - void clear() { + void clear() + { m_pParams = nullptr; m_pResult = nullptr; m_pSorted_luma = nullptr; m_pSorted_luma_indices = nullptr; } - struct params : etc1_pack_params { - params() { + struct params : etc1_pack_params + { + params() + { clear(); } - params(const etc1_pack_params& base_params) - : etc1_pack_params(base_params) { + params(const etc1_pack_params& base_params) : + etc1_pack_params(base_params) + { clear_optimizer_params(); } - void clear() { + void clear() + { etc1_pack_params::clear(); clear_optimizer_params(); } - void clear_optimizer_params() { + void clear_optimizer_params() + { m_num_src_pixels = 0; m_pSrc_pixels = 0; @@ -1690,7 +1926,8 @@ namespace crnlib { bool m_constrain_against_base_color5; }; - struct results { + struct results + { uint64 m_error; color_quad_u8 m_block_color_unscaled; uint m_block_inten_table; @@ -1698,7 +1935,8 @@ namespace crnlib { uint8* m_pSelectors; bool m_block_color4; - inline results& operator=(const results& rhs) { + inline results& operator=(const results& rhs) + { m_block_color_unscaled = rhs.m_block_color_unscaled; m_block_color4 = rhs.m_block_color4; m_block_inten_table = rhs.m_block_inten_table; @@ -1713,9 +1951,11 @@ namespace crnlib { bool compute(); private: - struct potential_solution { - potential_solution() - : m_coords(), m_error(cUINT64_MAX), m_valid(false) { + struct potential_solution + { + potential_solution() : + m_coords(), m_error(cUINT64_MAX), m_valid(false) + { } etc1_solution_coordinates m_coords; @@ -1723,7 +1963,8 @@ namespace crnlib { uint64 m_error; bool m_valid; - void clear() { + void clear() + { m_coords.clear(); m_error = cUINT64_MAX; m_valid = false; @@ -1753,44 +1994,66 @@ namespace crnlib { bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); }; - bool etc1_optimizer::compute() { + bool etc1_optimizer::compute() + { const uint n = m_pParams->m_num_src_pixels; const int scan_delta_size = m_pParams->m_scan_delta_size; // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color. // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index. - for (int zdi = 0; zdi < scan_delta_size; zdi++) { + for (int zdi = 0; zdi < scan_delta_size; zdi++) + { const int zd = m_pParams->m_pScan_deltas[zdi]; const int mbb = m_bb + zd; if (mbb < 0) + { continue; + } else if (mbb > m_limit) + { break; + } - for (int ydi = 0; ydi < scan_delta_size; ydi++) { + for (int ydi = 0; ydi < scan_delta_size; ydi++) + { const int yd = m_pParams->m_pScan_deltas[ydi]; const int mbg = m_bg + yd; if (mbg < 0) + { continue; + } else if (mbg > m_limit) + { break; + } - for (int xdi = 0; xdi < scan_delta_size; xdi++) { + for (int xdi = 0; xdi < scan_delta_size; xdi++) + { const int xd = m_pParams->m_pScan_deltas[xdi]; const int mbr = m_br + xd; if (mbr < 0) + { continue; + } else if (mbr > m_limit) + { break; + } etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4); - if (m_pParams->m_quality == cHighQuality) { + if (m_pParams->m_quality == cHighQuality) + { if (!evaluate_solution(coords, m_trial_solution, &m_best_solution)) + { continue; + } } - else { + else + { if (!evaluate_solution_fast(coords, m_trial_solution, &m_best_solution)) + { continue; + } } // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index. @@ -1811,13 +2074,15 @@ namespace crnlib { // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping. const uint max_refinement_trials = (m_pParams->m_quality == cLowQuality) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2); - for (uint refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++) { + for (uint refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++) + { const uint8* pSelectors = m_best_solution.m_selectors; const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; const color_quad_u8 base_color(m_best_solution.m_coords.get_scaled_color()); - for (uint r = 0; r < n; r++) { + for (uint r = 0; r < n; r++) + { const uint s = *pSelectors++; const int yd = pInten_table[s]; // Compute actual delta being applied to each pixel, taking into account clamping. @@ -1826,7 +2091,9 @@ namespace crnlib { delta_sum_b += rg_etc1::clamp(base_color.b + yd, 0, 255) - base_color.b; } if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) + { break; + } const float avg_delta_r_f = static_cast(delta_sum_r) / n; const float avg_delta_g_f = static_cast(delta_sum_g) / n; const float avg_delta_b_f = static_cast(delta_sum_b) / n; @@ -1837,32 +2104,47 @@ namespace crnlib { bool skip = false; if ((mbr == br1) && (mbg == bg1) && (mbb == bb1)) + { skip = true; + } else if ((br1 == m_best_solution.m_coords.m_unscaled_color.r) && (bg1 == m_best_solution.m_coords.m_unscaled_color.g) && (bb1 == m_best_solution.m_coords.m_unscaled_color.b)) + { skip = true; + } else if ((m_br == br1) && (m_bg == bg1) && (m_bb == bb1)) + { skip = true; + } if (skip) + { break; + } etc1_solution_coordinates coords1(br1, bg1, bb1, 0, m_pParams->m_use_color4); - if (m_pParams->m_quality == cHighQuality) { + if (m_pParams->m_quality == cHighQuality) + { if (!evaluate_solution(coords1, m_trial_solution, &m_best_solution)) + { break; + } } - else { + else + { if (!evaluate_solution_fast(coords1, m_trial_solution, &m_best_solution)) + { break; + } } - } // refinement_trial + } // refinement_trial - } // xdi - } // ydi - } // zdi + } // xdi + } // ydi + } // zdi - if (!m_best_solution.m_valid) { + if (!m_best_solution.m_valid) + { m_pResult->m_error = cUINT32_MAX; return false; } @@ -1895,7 +2177,8 @@ namespace crnlib { return true; } - void etc1_optimizer::init(const params& p, results& r) { + void etc1_optimizer::init(const params& p, results& r) + { // This version is hardcoded for 8 pixel subblocks. RG_ETC1_ASSERT(p.m_num_src_pixels == 8); @@ -1908,7 +2191,8 @@ namespace crnlib { vec3F avg_color(0.0f); - for (uint i = 0; i < n; i++) { + for (uint i = 0; i < n; i++) + { const color_quad_u8& c = m_pParams->m_pSrc_pixels[i]; const vec3F fc(c.r, c.g, c.b); @@ -1924,14 +2208,19 @@ namespace crnlib { m_bg = rg_etc1::clamp(static_cast(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit); m_bb = rg_etc1::clamp(static_cast(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit); - if (m_pParams->m_quality <= cMediumQuality) { + if (m_pParams->m_quality <= cMediumQuality) + { m_pSorted_luma_indices = indirect_radix_sort(n, m_sorted_luma[0], m_sorted_luma[1], m_luma, 0, sizeof(m_luma[0]), false); m_pSorted_luma = m_sorted_luma[0]; if (m_pSorted_luma_indices == m_sorted_luma[0]) + { m_pSorted_luma = m_sorted_luma[1]; + } for (uint i = 0; i < n; i++) + { m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; + } } m_best_solution.m_coords.clear(); @@ -1939,16 +2228,20 @@ namespace crnlib { m_best_solution.m_error = cUINT64_MAX; } - bool etc1_optimizer::evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) { + bool etc1_optimizer::evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { trial_solution.m_valid = false; - if (m_pParams->m_constrain_against_base_color5) { + if (m_pParams->m_constrain_against_base_color5) + { const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { return false; + } } const color_quad_u8 base_color(coords.get_scaled_color()); @@ -1957,11 +2250,13 @@ namespace crnlib { trial_solution.m_error = cUINT64_MAX; - for (uint inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) { + for (uint inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) + { const int* pInten_table = g_etc1_inten_tables[inten_table]; color_quad_u8 block_colors[4]; - for (uint s = 0; s < 4; s++) { + for (uint s = 0; s < 4; s++) + { const int yd = pInten_table[s]; block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); } @@ -1969,26 +2264,30 @@ namespace crnlib { uint64 total_error = 0; const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; - for (uint c = 0; c < n; c++) { + for (uint c = 0; c < n; c++) + { const color_quad_u8& src_pixel = *pSrc_pixels++; uint best_selector_index = 0; uint best_error = rg_etc1::square(src_pixel.r - block_colors[0].r) + rg_etc1::square(src_pixel.g - block_colors[0].g) + rg_etc1::square(src_pixel.b - block_colors[0].b); uint trial_error = rg_etc1::square(src_pixel.r - block_colors[1].r) + rg_etc1::square(src_pixel.g - block_colors[1].g) + rg_etc1::square(src_pixel.b - block_colors[1].b); - if (trial_error < best_error) { + if (trial_error < best_error) + { best_error = trial_error; best_selector_index = 1; } trial_error = rg_etc1::square(src_pixel.r - block_colors[2].r) + rg_etc1::square(src_pixel.g - block_colors[2].g) + rg_etc1::square(src_pixel.b - block_colors[2].b); - if (trial_error < best_error) { + if (trial_error < best_error) + { best_error = trial_error; best_selector_index = 2; } trial_error = rg_etc1::square(src_pixel.r - block_colors[3].r) + rg_etc1::square(src_pixel.g - block_colors[3].g) + rg_etc1::square(src_pixel.b - block_colors[3].b); - if (trial_error < best_error) { + if (trial_error < best_error) + { best_error = trial_error; best_selector_index = 3; } @@ -1997,10 +2296,13 @@ namespace crnlib { total_error += best_error; if (total_error >= trial_solution.m_error) + { break; + } } - if (total_error < trial_solution.m_error) { + if (total_error < trial_solution.m_error) + { trial_solution.m_error = total_error; trial_solution.m_coords.m_inten_table = inten_table; memcpy(trial_solution.m_selectors, m_temp_selectors, 8); @@ -2011,8 +2313,10 @@ namespace crnlib { trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; bool success = false; - if (pBest_solution) { - if (trial_solution.m_error < pBest_solution->m_error) { + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { *pBest_solution = trial_solution; success = true; } @@ -2021,13 +2325,16 @@ namespace crnlib { return success; } - bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) { - if (m_pParams->m_constrain_against_base_color5) { + bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + if (m_pParams->m_constrain_against_base_color5) + { const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; - if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax)) { + if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { trial_solution.m_valid = false; return false; } @@ -2039,12 +2346,14 @@ namespace crnlib { trial_solution.m_error = cUINT64_MAX; - for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) { + for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) + { const int* pInten_table = g_etc1_inten_tables[inten_table]; uint block_inten[4]; color_quad_u8 block_colors[4]; - for (uint s = 0; s < 4; s++) { + for (uint s = 0; s < 4; s++) + { const int yd = pInten_table[s]; color_quad_u8 block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); block_colors[s] = block_color; @@ -2059,43 +2368,62 @@ namespace crnlib { uint64 total_error = 0; const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; - if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) { - if (block_inten[0] > m_pSorted_luma[n - 1]) { + if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) + { + if (block_inten[0] > m_pSorted_luma[n - 1]) + { const uint min_error = block_inten[0] - m_pSorted_luma[n - 1]; if (min_error >= trial_solution.m_error) + { continue; + } } memset(&m_temp_selectors[0], 0, n); for (uint c = 0; c < n; c++) + { total_error += block_colors[0].squared_distance_rgb(pSrc_pixels[c]); + } } - else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) { - if (m_pSorted_luma[0] > block_inten[3]) { + else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) + { + if (m_pSorted_luma[0] > block_inten[3]) + { const uint min_error = m_pSorted_luma[0] - block_inten[3]; if (min_error >= trial_solution.m_error) + { continue; + } } memset(&m_temp_selectors[0], 3, n); for (uint c = 0; c < n; c++) + { total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[c]); + } } - else { + else + { uint cur_selector = 0, c; - for (c = 0; c < n; c++) { + for (c = 0; c < n; c++) + { const uint y = m_pSorted_luma[c]; while ((y * 2) >= block_inten_midpoints[cur_selector]) + { if (++cur_selector > 2) + { goto done; + } + } const uint sorted_pixel_index = m_pSorted_luma_indices[c]; m_temp_selectors[sorted_pixel_index] = static_cast(cur_selector); total_error += block_colors[cur_selector].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]); } done: - while (c < n) { + while (c < n) + { const uint sorted_pixel_index = m_pSorted_luma_indices[c]; m_temp_selectors[sorted_pixel_index] = 3; total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]); @@ -2103,21 +2431,26 @@ namespace crnlib { } } - if (total_error < trial_solution.m_error) { + if (total_error < trial_solution.m_error) + { trial_solution.m_error = total_error; trial_solution.m_coords.m_inten_table = inten_table; memcpy(trial_solution.m_selectors, m_temp_selectors, n); trial_solution.m_valid = true; if (!total_error) + { break; + } } } trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; bool success = false; - if (pBest_solution) { - if (trial_solution.m_error < pBest_solution->m_error) { + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { *pBest_solution = trial_solution; success = true; } @@ -2126,40 +2459,55 @@ namespace crnlib { return success; } - static uint etc1_decode_value(uint diff, uint inten, uint selector, uint packed_c) { + static uint etc1_decode_value(uint diff, uint inten, uint selector, uint packed_c) + { RG_ETC1_ASSERT((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < (diff ? 32 : 16))); int c; if (diff) + { c = (packed_c >> 2) | (packed_c << 3); + } else + { c = packed_c | (packed_c << 4); + } c += g_etc1_inten_tables[inten][selector]; c = rg_etc1::clamp(c, 0, 255); return c; } - static inline int mul_8bit(int a, int b) { + static inline int mul_8bit(int a, int b) + { int t = a * b + 128; return (t + (t >> 8)) >> 8; } - void pack_etc1_block_init() { - for (uint diff = 0; diff < 2; diff++) { + void pack_etc1_block_init() + { + for (uint diff = 0; diff < 2; diff++) + { const uint limit = diff ? 32 : 16; - for (uint inten = 0; inten < 8; inten++) { - for (uint selector = 0; selector < 4; selector++) { + for (uint inten = 0; inten < 8; inten++) + { + for (uint selector = 0; selector < 4; selector++) + { const uint inverse_table_index = diff + (inten << 1) + (selector << 4); - for (int color = 0; color < 256; color++) { + for (int color = 0; color < 256; color++) + { uint best_error = cUINT32_MAX, best_packed_c = 0; - for (uint packed_c = 0; packed_c < limit; packed_c++) { + for (uint packed_c = 0; packed_c < limit; packed_c++) + { int v = etc1_decode_value(diff, inten, selector, packed_c); uint err = labs(v - color); - if (err < best_error) { + if (err < best_error) + { best_error = err; best_packed_c = packed_c; if (!best_error) + { break; + } } } RG_ETC1_ASSERT(best_error <= 255); @@ -2171,9 +2519,12 @@ namespace crnlib { uint expand5[32]; for (int i = 0; i < 32; i++) + { expand5[i] = (i << 3) | (i >> 2); + } - for (int i = 0; i < 256 + 16; i++) { + for (int i = 0; i < 256 + 16; i++) + { int v = clamp(i - 8, 0, 255); g_quant5_tab[i] = static_cast(expand5[mul_8bit(v, 31)]); } @@ -2181,7 +2532,8 @@ namespace crnlib { // Packs solid color blocks efficiently using a set of small precomputed tables. // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time. - static uint64 pack_etc1_block_solid_color(etc1_block& block, const uint8* pColor) { + static uint64 pack_etc1_block_solid_color(etc1_block& block, const uint8* pColor) + { RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]); static uint s_next_comp[4] = { 1, 2, 0, 1 }; @@ -2190,22 +2542,31 @@ namespace crnlib { int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. - for (uint i = 0; i < 3; i++) { + for (uint i = 0; i < 3; i++) + { const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; const int delta_range = 1; - for (int delta = -delta_range; delta <= delta_range; delta++) { + for (int delta = -delta_range; delta <= delta_range; delta++) + { const int c_plus_delta = rg_etc1::clamp(pColor[i] + delta, 0, 255); const uint16* pTable; if (!c_plus_delta) + { pTable = g_color8_to_etc_block_config_0_255[0]; + } else if (c_plus_delta == 255) + { pTable = g_color8_to_etc_block_config_0_255[1]; + } else + { pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; + } - do { + do + { const uint x = *pTable++; #ifdef RG_ETC1_BUILD_DEBUG @@ -2220,14 +2581,17 @@ namespace crnlib { uint16 p1 = pInverse_table[c1]; uint16 p2 = pInverse_table[c2]; const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8); - if (trial_error < best_error) { + if (trial_error < best_error) + { best_error = trial_error; best_x = x; best_packed_c1 = p1 & 0xFF; best_packed_c2 = p2 & 0xFF; best_i = i; if (!best_error) + { goto found_perfect_match; + } } } while (*pTable != 0xFFFF); } @@ -2244,12 +2608,14 @@ namespace crnlib { *reinterpret_cast(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0; const uint best_packed_c0 = (best_x >> 8) & 255; - if (diff) { + if (diff) + { block.m_bytes[best_i] = static_cast(best_packed_c0 << 3); block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 << 3); block.m_bytes[s_next_comp[best_i + 1]] = static_cast(best_packed_c2 << 3); } - else { + else + { block.m_bytes[best_i] = static_cast(best_packed_c0 | (best_packed_c0 << 4)); block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 | (best_packed_c1 << 4)); block.m_bytes[s_next_comp[best_i + 1]] = static_cast(best_packed_c2 | (best_packed_c2 << 4)); @@ -2262,7 +2628,8 @@ namespace crnlib { etc1_optimizer::results& results, uint num_colors, const uint8* pColor, bool use_diff, - const color_quad_u8* pBase_color5_unscaled) { + const color_quad_u8* pBase_color5_unscaled) + { RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]); static uint s_next_comp[4] = { 1, 2, 0, 1 }; @@ -2271,36 +2638,52 @@ namespace crnlib { int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. - for (uint i = 0; i < 3; i++) { + for (uint i = 0; i < 3; i++) + { const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; const int delta_range = 1; - for (int delta = -delta_range; delta <= delta_range; delta++) { + for (int delta = -delta_range; delta <= delta_range; delta++) + { const int c_plus_delta = rg_etc1::clamp(pColor[i] + delta, 0, 255); const uint16* pTable; if (!c_plus_delta) + { pTable = g_color8_to_etc_block_config_0_255[0]; + } else if (c_plus_delta == 255) + { pTable = g_color8_to_etc_block_config_0_255[1]; + } else + { pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; + } - do { + do + { const uint x = *pTable++; const uint diff = x & 1; - if (static_cast(use_diff) != diff) { + if (static_cast(use_diff) != diff) + { if (*pTable == 0xFFFF) + { break; + } continue; } - if ((diff) && (pBase_color5_unscaled)) { + if ((diff) && (pBase_color5_unscaled)) + { const int p0 = (x >> 8) & 255; int delta = p0 - static_cast(pBase_color5_unscaled->c[i]); - if ((delta < cETC1ColorDeltaMin) || (delta > cETC1ColorDeltaMax)) { + if ((delta < cETC1ColorDeltaMin) || (delta > cETC1ColorDeltaMax)) + { if (*pTable == 0xFFFF) + { break; + } continue; } } @@ -2318,25 +2701,32 @@ namespace crnlib { uint16 p1 = pInverse_table[c1]; uint16 p2 = pInverse_table[c2]; - if ((diff) && (pBase_color5_unscaled)) { + if ((diff) && (pBase_color5_unscaled)) + { int delta1 = (p1 & 0xFF) - static_cast(pBase_color5_unscaled->c[s_next_comp[i]]); int delta2 = (p2 & 0xFF) - static_cast(pBase_color5_unscaled->c[s_next_comp[i + 1]]); - if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax)) { + if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax)) + { if (*pTable == 0xFFFF) + { break; + } continue; } } const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8); - if (trial_error < best_error) { + if (trial_error < best_error) + { best_error = trial_error; best_x = x; best_packed_c1 = p1 & 0xFF; best_packed_c2 = p2 & 0xFF; best_i = i; if (!best_error) + { goto found_perfect_match; + } } } while (*pTable != 0xFFFF); } @@ -2344,7 +2734,9 @@ namespace crnlib { found_perfect_match: if (best_error == cUINT32_MAX) + { return best_error; + } best_error *= num_colors; @@ -2363,14 +2755,16 @@ namespace crnlib { } // Function originally from RYG's public domain real-time DXT1 compressor, modified for 555. - static void dither_block_555(color_quad_u8* dest, const color_quad_u8* block) { - int err[8], * ep1 = err, * ep2 = err + 4; + static void dither_block_555(color_quad_u8* dest, const color_quad_u8* block) + { + int err[8], *ep1 = err, *ep2 = err + 4; uint8* quant = g_quant5_tab + 8; memset(dest, 0xFF, sizeof(color_quad_u8) * 16); // process channels seperately - for (int ch = 0; ch < 3; ch++) { + for (int ch = 0; ch < 3; ch++) + { uint8* bp = (uint8*)block; uint8* dp = (uint8*)dest; @@ -2378,7 +2772,8 @@ namespace crnlib { dp += ch; memset(err, 0, sizeof(err)); - for (int y = 0; y < 4; y++) { + for (int y = 0; y < 4; y++) + { // pixel 0 dp[0] = quant[bp[0] + ((3 * ep2[1] + 5 * ep2[0]) >> 4)]; ep1[0] = bp[0] - dp[0]; @@ -2405,7 +2800,8 @@ namespace crnlib { } } - unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params) { + unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params) + { const color_quad_u8* pSrc_pixels = reinterpret_cast(pSrc_pixels_rgba); etc1_block& dst_block = *static_cast(pETC1_block); @@ -2415,13 +2811,21 @@ namespace crnlib { const uint32 first_pixel_u32 = pSrc_pixels->m_u32; int r; for (r = 15; r >= 1; --r) + { if (pSrc_pixels[r].m_u32 != first_pixel_u32) + { break; + } + } + if (!r) + { return static_cast(16 * pack_etc1_block_solid_color(dst_block, &pSrc_pixels[0].r)); + } color_quad_u8 dithered_pixels[16]; - if (pack_params.m_dithering) { + if (pack_params.m_dithering) + { dither_block_555(dithered_pixels, pSrc_pixels); pSrc_pixels = dithered_pixels; } @@ -2433,7 +2837,8 @@ namespace crnlib { uint8 best_selectors[2][8]; etc1_optimizer::results best_results[2]; - for (uint i = 0; i < 2; i++) { + for (uint i = 0; i < 2; i++) + { best_results[i].m_n = 8; best_results[i].m_pSelectors = best_selectors[i]; } @@ -2441,7 +2846,8 @@ namespace crnlib { uint8 selectors[3][8]; etc1_optimizer::results results[3]; - for (uint i = 0; i < 3; i++) { + for (uint i = 0; i < 3; i++) + { results[i].m_n = 8; results[i].m_pSelectors = selectors[i]; } @@ -2452,15 +2858,21 @@ namespace crnlib { params.m_num_src_pixels = 8; params.m_pSrc_pixels = subblock_pixels; - for (uint flip = 0; flip < 2; flip++) { - for (uint use_color4 = 0; use_color4 < 2; use_color4++) { + for (uint flip = 0; flip < 2; flip++) + { + for (uint use_color4 = 0; use_color4 < 2; use_color4++) + { uint64 trial_error = 0; uint subblock; - for (subblock = 0; subblock < 2; subblock++) { + for (subblock = 0; subblock < 2; subblock++) + { if (flip) + { memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8); - else { + } + else + { const color_quad_u8* pSrc_col = pSrc_pixels + subblock * 2; subblock_pixels[0] = pSrc_col[0]; subblock_pixels[1] = pSrc_col[4]; @@ -2473,12 +2885,18 @@ namespace crnlib { } results[2].m_error = cUINT64_MAX; - if ((params.m_quality >= cMediumQuality) && ((subblock) || (use_color4))) { + if ((params.m_quality >= cMediumQuality) && ((subblock) || (use_color4))) + { const uint32 subblock_pixel0_u32 = subblock_pixels[0].m_u32; for (r = 7; r >= 1; --r) + { if (subblock_pixels[r].m_u32 != subblock_pixel0_u32) + { break; - if (!r) { + } + } + if (!r) + { pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixels[0].r, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : nullptr); } } @@ -2486,22 +2904,26 @@ namespace crnlib { params.m_use_color4 = (use_color4 != 0); params.m_constrain_against_base_color5 = false; - if ((!use_color4) && (subblock)) { + if ((!use_color4) && (subblock)) + { params.m_constrain_against_base_color5 = true; params.m_base_color5 = results[0].m_block_color_unscaled; } - if (params.m_quality == cHighQuality) { + if (params.m_quality == cHighQuality) + { static const int s_scan_delta_0_to_4[] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 }; params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_4); params.m_pScan_deltas = s_scan_delta_0_to_4; } - else if (params.m_quality == cMediumQuality) { + else if (params.m_quality == cMediumQuality) + { static const int s_scan_delta_0_to_1[] = { -1, 0, 1 }; params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_1); params.m_pScan_deltas = s_scan_delta_0_to_1; } - else { + else + { static const int s_scan_delta_0[] = { 0 }; params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0); params.m_pScan_deltas = s_scan_delta_0; @@ -2509,46 +2931,62 @@ namespace crnlib { optimizer.init(params, results[subblock]); if (!optimizer.compute()) + { break; + } - if (params.m_quality >= cMediumQuality) { + if (params.m_quality >= cMediumQuality) + { // TODO: Fix fairly arbitrary/unrefined thresholds that control how far away to scan for potentially better solutions. const uint refinement_error_thresh0 = 3000; const uint refinement_error_thresh1 = 6000; - if (results[subblock].m_error > refinement_error_thresh0) { - if (params.m_quality == cMediumQuality) { + if (results[subblock].m_error > refinement_error_thresh0) + { + if (params.m_quality == cMediumQuality) + { static const int s_scan_delta_2_to_3[] = { -3, -2, 2, 3 }; params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_2_to_3); params.m_pScan_deltas = s_scan_delta_2_to_3; } - else { + else + { static const int s_scan_delta_5_to_5[] = { -5, 5 }; static const int s_scan_delta_5_to_8[] = { -8, -7, -6, -5, 5, 6, 7, 8 }; - if (results[subblock].m_error > refinement_error_thresh1) { + if (results[subblock].m_error > refinement_error_thresh1) + { params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_8); params.m_pScan_deltas = s_scan_delta_5_to_8; } - else { + else + { params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_5); params.m_pScan_deltas = s_scan_delta_5_to_5; } } if (!optimizer.compute()) + { break; + } } if (results[2].m_error < results[subblock].m_error) + { results[subblock] = results[2]; + } } trial_error += results[subblock].m_error; if (trial_error >= best_error) + { break; + } } if (subblock < 2) + { continue; + } best_error = trial_error; best_results[0] = results[0]; @@ -2556,36 +2994,45 @@ namespace crnlib { best_flip = flip; best_use_color4 = use_color4; - } // use_color4 + } // use_color4 - } // flip + } // flip int dr = best_results[1].m_block_color_unscaled.r - best_results[0].m_block_color_unscaled.r; int dg = best_results[1].m_block_color_unscaled.g - best_results[0].m_block_color_unscaled.g; int db = best_results[1].m_block_color_unscaled.b - best_results[0].m_block_color_unscaled.b; RG_ETC1_ASSERT(best_use_color4 || (rg_etc1::minimum(dr, dg, db) >= cETC1ColorDeltaMin) && (rg_etc1::maximum(dr, dg, db) <= cETC1ColorDeltaMax)); - if (best_use_color4) { + if (best_use_color4) + { dst_block.m_bytes[0] = static_cast(best_results[1].m_block_color_unscaled.r | (best_results[0].m_block_color_unscaled.r << 4)); dst_block.m_bytes[1] = static_cast(best_results[1].m_block_color_unscaled.g | (best_results[0].m_block_color_unscaled.g << 4)); dst_block.m_bytes[2] = static_cast(best_results[1].m_block_color_unscaled.b | (best_results[0].m_block_color_unscaled.b << 4)); } - else { + else + { if (dr < 0) + { dr += 8; + } dst_block.m_bytes[0] = static_cast((best_results[0].m_block_color_unscaled.r << 3) | dr); if (dg < 0) + { dg += 8; + } dst_block.m_bytes[1] = static_cast((best_results[0].m_block_color_unscaled.g << 3) | dg); if (db < 0) + { db += 8; + } dst_block.m_bytes[2] = static_cast((best_results[0].m_block_color_unscaled.b << 3) | db); } dst_block.m_bytes[3] = static_cast((best_results[1].m_block_inten_table << 2) | (best_results[0].m_block_inten_table << 5) | ((~best_use_color4 & 1) << 1) | best_flip); uint selector0 = 0, selector1 = 0; - if (best_flip) { + if (best_flip) + { // flipped: // { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, // { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } @@ -2594,7 +3041,8 @@ namespace crnlib { // { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } const uint8* pSelectors0 = best_results[0].m_pSelectors; const uint8* pSelectors1 = best_results[1].m_pSelectors; - for (int x = 3; x >= 0; --x) { + for (int x = 3; x >= 0; --x) + { uint b; b = g_selector_index_to_etc1[pSelectors1[4 + x]]; selector0 = (selector0 << 1) | (b & 1); @@ -2613,16 +3061,19 @@ namespace crnlib { selector1 = (selector1 << 1) | (b >> 1); } } - else { + else + { // non-flipped: // { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, // { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } // // { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, // { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } - for (int subblock = 1; subblock >= 0; --subblock) { + for (int subblock = 1; subblock >= 0; --subblock) + { const uint8* pSelectors = best_results[subblock].m_pSelectors + 4; - for (uint i = 0; i < 2; i++) { + for (uint i = 0; i < 2; i++) + { uint b; b = g_selector_index_to_etc1[pSelectors[3]]; selector0 = (selector0 << 1) | (b & 1); @@ -2653,7 +3104,8 @@ namespace crnlib { return static_cast(best_error); } - unsigned int pack_etc2_alpha(void* pBlock, const unsigned int* pSrc_pixels_rgba, etc2a_pack_params& pack_params) { + unsigned int pack_etc2_alpha(void* pBlock, const unsigned int* pSrc_pixels_rgba, etc2a_pack_params& pack_params) + { crnlib::color_quad_u8* pixels = (crnlib::color_quad_u8*)pSrc_pixels_rgba; dxt5_endpoint_optimizer dxt5_optimizer; @@ -2665,29 +3117,37 @@ namespace crnlib { params.m_pPixels = pixels; params.m_num_pixels = 16; params.m_comp_index = pack_params.comp_index; - params.m_quality = pack_params.m_quality == cHighQuality ? cCRNDXTQualityUber : pack_params.m_quality == cMediumQuality ? cCRNDXTQualityNormal : cCRNDXTQualityFast; + params.m_quality = pack_params.m_quality == cHighQuality ? cCRNDXTQualityUber : pack_params.m_quality == cMediumQuality ? cCRNDXTQualityNormal + : cCRNDXTQualityFast; params.m_use_both_block_types = false; dxt5_optimizer.compute(params, results); uint base_codeword = (results.m_first_endpoint + results.m_second_endpoint + 1) >> 1; uint best_error = cUINT32_MAX; - for (int modifier_index = 0; modifier_index < 16; modifier_index++) { + for (int modifier_index = 0; modifier_index < 16; modifier_index++) + { const int* modifier = g_etc2a_modifier_table[modifier_index]; int multiplier = math::clamp((results.m_first_endpoint - results.m_second_endpoint + modifier[7] + (modifier[7] >> 1)) / (modifier[7] << 1), 1, 15); uint8 data[8] = { (uint8)base_codeword, (uint8)(multiplier << 4 | modifier_index) }, values[8]; for (int i = 0; i < 8; i++) + { values[i] = math::clamp(base_codeword + modifier[i] * multiplier, 0, 255); + } uint error = 0; - for (uint d0 = 3, t = 0, i = 0; i < 4; i++, d0 += 3) { - for (uint d = d0, j = 0; j < 4; j++, t++, d += 12) { + for (uint d0 = 3, t = 0, i = 0; i < 4; i++, d0 += 3) + { + for (uint d = d0, j = 0; j < 4; j++, t++, d += 12) + { int a = pixels[t].a; uint byte_offset = 2 + (d >> 3); uint bit_offset = d & 7; uint best_s = 0; uint best_delta = cUINT32_MAX; - for (uint s = 0; s < 8; s++) { + for (uint s = 0; s < 8; s++) + { uint delta = abs(a - values[s]); - if (delta < best_delta) { + if (delta < best_delta) + { best_s = s; best_delta = delta; } @@ -2695,10 +3155,13 @@ namespace crnlib { error += best_delta * best_delta; data[byte_offset] |= best_s << (8 - bit_offset); if (bit_offset < 3) + { data[byte_offset - 1] |= best_s >> bit_offset; + } } } - if (error < best_error) { + if (error < best_error) + { memcpy(pBlock, data, 8); best_error = error; } @@ -2707,6 +3170,6 @@ namespace crnlib { return best_error; } - } // namespace rg_etc1 + } // namespace rg_etc1 -} // namespace crnlib +} // namespace crnlib diff --git a/crnlib/crn_rg_etc1.h b/crnlib/crn_rg_etc1.h index 4976f2e..fdf4e4a 100644 --- a/crnlib/crn_rg_etc1.h +++ b/crnlib/crn_rg_etc1.h @@ -4,48 +4,58 @@ #include "crn_export.h" -namespace crnlib { - - namespace rg_etc1 { +namespace crnlib +{ + namespace rg_etc1 + { // Unpacks an 8-byte ETC1 compressed block to a block of 4x4 32bpp RGBA pixels. // Returns false if the block is invalid. Invalid blocks will still be unpacked with clamping. // This function is thread safe, and does not dynamically allocate any memory. // If preserve_alpha is true, the alpha channel of the destination pixels will not be overwritten. Otherwise, alpha will be set to 255. - CRN_EXPORT bool unpack_etc1_block(const void* pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha = false); - CRN_EXPORT bool unpack_etc2_color(const void* pBlock, unsigned int* pDst_pixels_rgba, bool preserve_alpha = false); + CRN_EXPORT bool unpack_etc1_block(const void* pETC1_block, unsigned int* pDst_pixels_rgba, + bool preserve_alpha = false); + CRN_EXPORT bool unpack_etc2_color(const void* pBlock, unsigned int* pDst_pixels_rgba, + bool preserve_alpha = false); CRN_EXPORT bool unpack_etc2_alpha(const void* pBlock, unsigned int* pDst_pixels_rgba, int comp_index = 3); // Quality setting = the higher the quality, the slower. // To pack large textures, it is highly recommended to call pack_etc1_block() in parallel, on different blocks, from multiple threads (particularly when using cHighQuality). - enum etc1_quality { + enum etc1_quality + { cLowQuality, cMediumQuality, cHighQuality, }; - struct etc1_pack_params { + struct etc1_pack_params + { etc1_quality m_quality; bool m_dithering; - inline etc1_pack_params() { + inline etc1_pack_params() + { clear(); } - void clear() { + void clear() + { m_quality = cHighQuality; m_dithering = false; } }; - struct etc2a_pack_params { + struct etc2a_pack_params + { etc1_quality m_quality; int comp_index; - inline etc2a_pack_params() { + inline etc2a_pack_params() + { clear(); } - void clear() { + void clear() + { m_quality = cHighQuality; comp_index = 3; } @@ -59,12 +69,12 @@ namespace crnlib { // Returns squared error of result. // This function is thread safe, and does not dynamically allocate any memory. // pack_etc1_block() does not currently support "perceptual" colorspace metrics - it primarily optimizes for RGB RMSE. - CRN_EXPORT unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params); - CRN_EXPORT unsigned int pack_etc2_alpha(void* pBlock, const unsigned int* pSrc_pixels_rgba, etc2a_pack_params& pack_params); - - } // namespace rg_etc1 - -} // namespace crnlib + CRN_EXPORT unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, + etc1_pack_params& pack_params); + CRN_EXPORT unsigned int pack_etc2_alpha(void* pBlock, const unsigned int* pSrc_pixels_rgba, + etc2a_pack_params& pack_params); + } // namespace rg_etc1 +} // namespace crnlib //------------------------------------------------------------------------------ // diff --git a/crnlib/crn_threading_pthreads.cpp b/crnlib/crn_threading_pthreads.cpp index 904a5b3..7e18cb8 100644 --- a/crnlib/crn_threading_pthreads.cpp +++ b/crnlib/crn_threading_pthreads.cpp @@ -506,6 +506,6 @@ namespace crnlib return nullptr; } -} // namespace crnlib +} // namespace crnlib #endif // CRNLIB_USE_PTHREADS_API diff --git a/crnlib/crn_threading_pthreads.h b/crnlib/crn_threading_pthreads.h index feb2703..2662293 100644 --- a/crnlib/crn_threading_pthreads.h +++ b/crnlib/crn_threading_pthreads.h @@ -86,6 +86,7 @@ namespace crnlib { m_mutex.lock(); } + inline ~scoped_mutex() { m_mutex.unlock(); @@ -139,6 +140,7 @@ namespace crnlib { m_lock.lock(); } + inline ~scoped_spinlock() { m_lock.unlock(); @@ -208,6 +210,7 @@ namespace crnlib ~task_pool(); enum { cMaxThreads = 16 }; + bool init(uint num_threads); void deinit(); @@ -215,6 +218,7 @@ namespace crnlib { return m_num_threads; } + inline uint32 get_num_outstanding_tasks() const { return m_total_submitted_tasks - m_total_completed_tasks; @@ -237,7 +241,8 @@ namespace crnlib inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = nullptr); template - inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = nullptr); + inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, + void* pData_ptr = nullptr); void join(); @@ -255,7 +260,8 @@ namespace crnlib uint64 m_data; void* m_pData_ptr; - union { + union + { task_callback_func m_callback; executable_task* m_pObj; }; @@ -328,6 +334,7 @@ namespace crnlib { return m_pObject; } + object_method_ptr get_method() const { return m_pMethod; @@ -354,7 +361,7 @@ namespace crnlib template inline bool task_pool::queue_object_task(S* pObject, T pObject_method, uint64 data, void* pData_ptr) { - object_task* pTask = crnlib_new >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); + object_task* pTask = crnlib_new>(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); if (!pTask) { return false; @@ -363,7 +370,8 @@ namespace crnlib } template - inline bool task_pool::queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr) + inline bool task_pool::queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, + void* pData_ptr) { CRNLIB_ASSERT(pObject); CRNLIB_ASSERT(num_tasks); @@ -379,7 +387,7 @@ namespace crnlib { task tsk; - tsk.m_pObj = crnlib_new >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); + tsk.m_pObj = crnlib_new>(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); if (!tsk.m_pObj) { status = false; @@ -408,6 +416,6 @@ namespace crnlib return status; } -} // namespace crnlib +} // namespace crnlib #endif // CRNLIB_USE_PTHREADS_API From df658adade84bd19ef74baa39722776249bc3d39 Mon Sep 17 00:00:00 2001 From: Yoann Potinet Date: Sun, 21 Feb 2021 18:58:51 -0500 Subject: [PATCH 18/18] Ignore .idea directory --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 7afced5..92bb7f3 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ /out CMakeSettings.json test_package/build/ +.idea