Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SZ 3.2.0 #60

Merged
merged 16 commits into from
Aug 16, 2024
23 changes: 15 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
cmake_minimum_required(VERSION 3.18)
project(SZ3 VERSION 3.1.8)
project(SZ3 VERSION 3.2.0)

#data version defines the version of the compressed data format
#it is not always equal to the program version (e.g., SZ3 v3.1.0 and SZ3 v.3.1.1 may use the same data version of v.3.1.0)
#only update data version if the new version of the program changes compressed data format
set(SZ3_DATA_VERSION 3.2.0)

include(GNUInstallDirs)
include(CTest)

Expand All @@ -19,8 +25,9 @@ endif ()

find_package(PkgConfig)


configure_file(${CMAKE_CURRENT_SOURCE_DIR}/include/SZ3/version.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/include/SZ3/version.hpp)
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/include/SZ3/version.hpp.in
${CMAKE_CURRENT_BINARY_DIR}/include/SZ3/version.hpp)

add_library(${PROJECT_NAME} INTERFACE)
target_include_directories(
Expand All @@ -31,7 +38,7 @@ target_include_directories(
)
target_compile_features(${PROJECT_NAME}
INTERFACE cxx_std_17
)
)

find_package(OpenMP)
if (OpenMP_FOUND)
Expand Down Expand Up @@ -84,18 +91,18 @@ install(TARGETS ${PROJECT_NAME}
EXPORT SZ3Targets
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
)
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/include/SZ3/version.hpp"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/SZ3/"
)
)
install(EXPORT SZ3Targets NAMESPACE SZ3:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/SZ3)
include(CMakePackageConfigHelpers)
configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/SZ3Config.cmake.in
"${CMAKE_CURRENT_BINARY_DIR}/SZ3Config.cmake"
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/SZ3
)
)
write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/SZ3ConfigVersion.cmake"
VERSION ${PROJECT_VERSION}
Expand All @@ -105,6 +112,6 @@ install(FILES
${CMAKE_CURRENT_BINARY_DIR}/SZ3Config.cmake
${CMAKE_CURRENT_BINARY_DIR}/SZ3ConfigVersion.cmake
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/SZ3
)
)
#export sz3 target for external use
export(TARGETS SZ3 FILE SZ3.cmake)
25 changes: 15 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,20 @@ Scripts without parameters below should work fine by replacing SZ2 with SZ3.
* Visit [this Github repository](https://github.com/ofmla/sz3_simple_example) for details

#### H5Z-SZ3
* Located in 'tools/H5Z-SZ3'
* Please add "-DBUILD_H5Z_FILTER=ON" to enable this function for CMake.
* sz3ToHDF5 and HDF5ToSz3 are provided for testing.

* Use examples/print_h5repack_args.c to construct the cd_values parameters based on the specified error configuration.

* Compression example:
`h5repack -f UD=32024,0,5,0,981668463,0,0,0 -i ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.h5 -o ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.sz3.h5`

* Decompression example:
`h5repack -f NONE -i ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.sz3.h5 -o ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.sz3.out.h5`

* Alternatively, the error bound information can also be given through sz3.config (when there are no cd_values for h5repack). Example (You need to put sz3.config in the current local directory so that it will read sz3.config to get error bounds):
`h5repack -f UD=32024,0 -i ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.h5 -o ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.sz3.h5`
[//]: # (* Use examples/print_h5repack_args.c to construct the cd_values parameters based on the specified error configuration.)
[//]: # ()
[//]: # (* Compression example: )
[//]: # (`h5repack -f UD=32024,0,5,0,981668463,0,0,0 -i ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.h5 -o ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.sz3.h5`)
[//]: # ()
[//]: # (* Decompression example:)
[//]: # (`h5repack -f NONE -i ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.sz3.h5 -o ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.sz3.out.h5`)
[//]: # ()
[//]: # (* Alternatively, the error bound information can also be given through sz3.config (when there are no cd_values for h5repack). Example (You need to put sz3.config in the current local directory so that it will read sz3.config to get error bounds):)
[//]: # (`h5repack -f UD=32024,0 -i ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.h5 -o ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.sz3.h5`)



Expand All @@ -91,6 +94,8 @@ Version New features
* SZ 3.1.6 Support C API and Python API.
* SZ 3.1.7 Initial MDZ(https://github.com/szcompressor/SZ3/tree/master/tools/mdz) support.
* SZ 3.1.8 namespace changed from SZ to SZ3. H5Z-SZ3 supports configuration file now.
* SZ 3.2.0 API reconstructed for FZ. H5Z-SZ3 rewrite. Compression version checking.

## Citations

**Kindly note**: If you mention SZ in your paper, the most appropriate citation is including these three references (**TBD22, ICDE21, Bigdata18**), because they cover the design and implementation of the latest version of SZ.
Expand Down
42 changes: 42 additions & 0 deletions include/SZ3/api/impl/SZAlgo.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#ifndef SZ3_SZALGO_HPP
#define SZ3_SZALGO_HPP

#include "SZ3/compressor/SZGenericCompressor.hpp"
#include "SZ3/decomposition/NoPredictionDecomposition.hpp"
#include "SZ3/quantizer/IntegerQuantizer.hpp"
#include "SZ3/lossless/Lossless_zstd.hpp"
#include "SZ3/encoder/HuffmanEncoder.hpp"
#include "SZ3/utils/Config.hpp"

namespace SZ3 {
template<class T, uint N>
size_t SZ_compress_nopred(Config &conf, T *data, uchar *cmpData, size_t cmpCap) {
assert(N == conf.N);
assert(conf.cmprAlgo == ALGO_INTERP);
calAbsErrorBound(conf, data);

auto sz = make_compressor_sz_generic<T, N>(
make_decomposition_noprediction<T, N>(conf,
LinearQuantizer<T>(conf.absErrorBound, conf.quantbinCnt / 2)),
HuffmanEncoder<int>(),
Lossless_zstd());
return sz->compress(conf, data, cmpData, cmpCap);
// return cmpData;
}


template<class T, uint N>
void SZ_decompress_nopred(const Config &conf, const uchar *cmpData, size_t cmpSize, T *decData) {
assert(conf.cmprAlgo == ALGO_INTERP);
auto cmpDataPos = cmpData;
auto sz = make_compressor_sz_generic<T, N>(
make_decomposition_noprediction<T, N>(conf,
LinearQuantizer<T>(conf.absErrorBound, conf.quantbinCnt / 2)),
HuffmanEncoder<int>(),
Lossless_zstd());
sz->decompress(conf, cmpDataPos, cmpSize, decData);
}


}
#endif
Original file line number Diff line number Diff line change
@@ -1,91 +1,89 @@
#ifndef SZ3_SZINTERP_HPP
#define SZ3_SZINTERP_HPP
#ifndef SZ3_SZALGOINTERP_HPP
#define SZ3_SZALGOINTERP_HPP

#include "SZ3/compressor/SZInterpolationCompressor.hpp"
#include "SZ3/compressor/deprecated/SZBlockInterpolationCompressor.hpp"
#include "SZ3/decomposition/InterpolationDecomposition.hpp"
#include "SZ3/compressor/specialized/SZBlockInterpolationCompressor.hpp"
#include "SZ3/quantizer/IntegerQuantizer.hpp"
#include "SZ3/lossless/Lossless_zstd.hpp"
#include "SZ3/utils/Iterator.hpp"
#include "SZ3/utils/Statistic.hpp"
#include "SZ3/utils/Extraction.hpp"
#include "SZ3/utils/QuantOptimizatioin.hpp"
#include "SZ3/utils/Config.hpp"
#include "SZ3/api/impl/SZLorenzoReg.hpp"
#include "SZ3/api/impl/SZAlgoLorenzoReg.hpp"
#include <cmath>
#include <memory>

namespace SZ3 {
template<class T, uint N>
char *SZ_compress_Interp(Config &conf, T *data, size_t &outSize) {


size_t SZ_compress_Interp(Config &conf, T *data, uchar *cmpData, size_t cmpCap) {
assert(N == conf.N);
assert(conf.cmprAlgo == ALGO_INTERP);
calAbsErrorBound(conf, data);

auto sz = SZInterpolationCompressor<T, N, LinearQuantizer<T>, HuffmanEncoder<int>, Lossless_zstd>(
LinearQuantizer<T>(conf.absErrorBound, conf.quantbinCnt / 2),
HuffmanEncoder<int>(),
Lossless_zstd());
char *cmpData = (char *) sz.compress(conf, data, outSize);
return cmpData;

auto sz = make_compressor_sz_generic<T, N>(
make_decomposition_interpolation<T, N>(conf,
LinearQuantizer<T>(conf.absErrorBound, conf.quantbinCnt / 2)),
HuffmanEncoder<int>(),
Lossless_zstd());
return sz->compress(conf, data, cmpData, cmpCap);
// return cmpData;
}



template<class T, uint N>
void SZ_decompress_Interp(const Config &conf, char *cmpData, size_t cmpSize, T *decData) {
void SZ_decompress_Interp(const Config &conf, const uchar *cmpData, size_t cmpSize, T *decData) {
assert(conf.cmprAlgo == ALGO_INTERP);
uchar const *cmpDataPos = (uchar *) cmpData;
auto sz = SZInterpolationCompressor<T, N, LinearQuantizer<T>, HuffmanEncoder<int>, Lossless_zstd>(
LinearQuantizer<T>(),
HuffmanEncoder<int>(),
Lossless_zstd());
sz.decompress(cmpDataPos, cmpSize, decData);
auto cmpDataPos = cmpData;
auto sz = make_compressor_sz_generic<T, N>(
make_decomposition_interpolation<T, N>(conf,
LinearQuantizer<T>(conf.absErrorBound, conf.quantbinCnt / 2)),
HuffmanEncoder<int>(),
Lossless_zstd());
sz->decompress(conf, cmpDataPos, cmpSize, decData);
}



template<class T, uint N>
double do_not_use_this_interp_compress_block_test(T *data, std::vector<size_t> dims, size_t num,
double eb, int interp_op, int direction_op, int block_size) {

double eb, int interp_op, int direction_op, int block_size, uchar* buffer, size_t bufferCap) {
std::vector<T> data1(data, data + num);
size_t outSize = 0;


Config conf;
conf.absErrorBound = eb;
conf.setDims(dims.begin(), dims.end());
conf.blockSize = block_size;
conf.interpAlgo = interp_op;
conf.interpDirection = direction_op;
auto sz = SZBlockInterpolationCompressor<T, N, LinearQuantizer<T>, HuffmanEncoder<int>, Lossless_zstd>(
LinearQuantizer<T>(eb),
HuffmanEncoder<int>(),
Lossless_zstd());
char *cmpData = (char *) sz.compress(conf, data1.data(), outSize);
delete[]cmpData;
LinearQuantizer<T>(eb),
HuffmanEncoder<int>(),
Lossless_zstd());

size_t outSize = sz.compress(conf, data1.data(), buffer, bufferCap);

auto compression_ratio = num * sizeof(T) * 1.0 / outSize;
return compression_ratio;
}

template<class T, uint N>
char *SZ_compress_Interp_lorenzo(Config &conf, T *data, size_t &outSize) {
size_t SZ_compress_Interp_lorenzo(Config &conf, T *data, uchar *cmpData, size_t cmpCap) {
assert(conf.cmprAlgo == ALGO_INTERP_LORENZO);

Timer timer(true);

// Timer timer(true);
calAbsErrorBound(conf, data);

size_t sampling_num, sampling_block;
std::vector<size_t> sample_dims(N);
std::vector<T> sampling_data = sampling<T, N>(data, conf.dims, sampling_num, sample_dims, sampling_block);
if (sampling_num == conf.num) {
conf.cmprAlgo = ALGO_INTERP;
return SZ_compress_Interp<T, N>(conf, data, outSize);
return SZ_compress_Interp<T, N>(conf, data, cmpData, cmpCap);
}

double best_lorenzo_ratio = 0, best_interp_ratio = 0, ratio;
size_t sampleOutSize;
char *cmprData;
size_t bufferCap = conf.num * sizeof(T);
auto buffer = (uchar *) malloc(bufferCap);
Config lorenzo_config = conf;
{
//test lorenzo
Expand All @@ -99,61 +97,59 @@ namespace SZ3 {
lorenzo_config.blockSize = 5;
// lorenzo_config.quantbinCnt = 65536 * 2;
std::vector<T> data1(sampling_data);
cmprData = SZ_compress_LorenzoReg<T, N>(lorenzo_config, data1.data(), sampleOutSize);
delete[]cmprData;
size_t sampleOutSize = SZ_compress_LorenzoReg<T, N>(lorenzo_config, data1.data(), buffer, bufferCap);
// delete[]cmprData;
// printf("Lorenzo ratio = %.2f\n", ratio);
best_lorenzo_ratio = sampling_num * 1.0 * sizeof(T) / sampleOutSize;
}

{
//tune interp
for (auto &interp_op: {INTERP_ALGO_LINEAR, INTERP_ALGO_CUBIC}) {
for (auto &interp_op : {INTERP_ALGO_LINEAR, INTERP_ALGO_CUBIC}) {
ratio = do_not_use_this_interp_compress_block_test<T, N>(sampling_data.data(), sample_dims, sampling_num, conf.absErrorBound,
interp_op, conf.interpDirection, sampling_block);
interp_op, conf.interpDirection, sampling_block, buffer, bufferCap);
if (ratio > best_interp_ratio) {
best_interp_ratio = ratio;
conf.interpAlgo = interp_op;
}
}

int direction_op = factorial(N) - 1;
ratio = do_not_use_this_interp_compress_block_test<T, N>(sampling_data.data(), sample_dims, sampling_num, conf.absErrorBound,
conf.interpAlgo, direction_op, sampling_block);
conf.interpAlgo, direction_op, sampling_block, buffer, bufferCap);
if (ratio > best_interp_ratio * 1.02) {
best_interp_ratio = ratio;
conf.interpDirection = direction_op;
}
}

bool useInterp = !(best_lorenzo_ratio > best_interp_ratio && best_lorenzo_ratio < 80 && best_interp_ratio < 80);

size_t cmpSize = 0;
if (useInterp) {
conf.cmprAlgo = ALGO_INTERP;
double tuning_time = timer.stop();
return SZ_compress_Interp<T, N>(conf, data, outSize);
cmpSize = SZ_compress_Interp<T, N>(conf, data, cmpData, cmpCap);
} else {
//further tune lorenzo
if (N == 3) {
float pred_freq, mean_freq;
T mean_guess;
lorenzo_config.quantbinCnt = optimize_quant_invl_3d<T>(data, conf.dims[0], conf.dims[1], conf.dims[2],
conf.absErrorBound, pred_freq, mean_freq, mean_guess);
conf.absErrorBound, pred_freq, mean_freq, mean_guess);
lorenzo_config.pred_dim = 2;
cmprData = SZ_compress_LorenzoReg<T, N>(lorenzo_config, sampling_data.data(), sampleOutSize);
delete[]cmprData;
size_t sampleOutSize = SZ_compress_LorenzoReg<T, N>(lorenzo_config, sampling_data.data(), buffer, bufferCap);
ratio = sampling_num * 1.0 * sizeof(T) / sampleOutSize;
if (ratio > best_lorenzo_ratio * 1.02) {
best_lorenzo_ratio = ratio;
} else {
lorenzo_config.pred_dim = 3;
}
}

if (conf.relErrorBound < 1.01e-6 && best_lorenzo_ratio > 5 && lorenzo_config.quantbinCnt != 16384) {
auto quant_num = lorenzo_config.quantbinCnt;
lorenzo_config.quantbinCnt = 16384;
cmprData = SZ_compress_LorenzoReg<T, N>(lorenzo_config, sampling_data.data(), sampleOutSize);
delete[]cmprData;
size_t sampleOutSize = SZ_compress_LorenzoReg<T, N>(lorenzo_config, sampling_data.data(), buffer, bufferCap);
// delete[]cmprData;
ratio = sampling_num * 1.0 * sizeof(T) / sampleOutSize;
if (ratio > best_lorenzo_ratio * 1.02) {
best_lorenzo_ratio = ratio;
Expand All @@ -163,11 +159,12 @@ namespace SZ3 {
}
lorenzo_config.setDims(conf.dims.begin(), conf.dims.end());
conf = lorenzo_config;
double tuning_time = timer.stop();
return SZ_compress_LorenzoReg<T, N>(conf, data, outSize);
// double tuning_time = timer.stop();
cmpSize = SZ_compress_LorenzoReg<T, N>(conf, data, cmpData, cmpCap);
}



free(buffer);
return cmpSize;
}
}
#endif
Loading
Loading