Skip to content

Commit

Permalink
change namespace from SZ to SZ3, to avoid conflict with SZ2
Browse files Browse the repository at this point in the history
  • Loading branch information
ayzk committed Oct 31, 2023
1 parent bdc4b50 commit e1af6be
Show file tree
Hide file tree
Showing 55 changed files with 485 additions and 480 deletions.
68 changes: 34 additions & 34 deletions include/SZ3/api/impl/SZDispatcher.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,43 +8,43 @@
#include "SZ3/api/impl/SZLorenzoReg.hpp"
#include <cmath>


template<class T, SZ::uint N>
char *SZ_compress_dispatcher(SZ::Config &conf, T *data, size_t &outSize) {

assert(N == conf.N);
SZ::calAbsErrorBound(conf, data);

char *cmpData;
if (conf.absErrorBound == 0) {
auto zstd = SZ::Lossless_zstd();
cmpData = (char *) zstd.compress((SZ::uchar *) data, conf.num * sizeof(T), outSize);
} else if (conf.cmprAlgo == SZ::ALGO_LORENZO_REG) {
cmpData = (char *) SZ_compress_LorenzoReg<T, N>(conf, data, outSize);
} else if (conf.cmprAlgo == SZ::ALGO_INTERP) {
cmpData = (char *) SZ_compress_Interp<T, N>(conf, data, outSize);
} else if (conf.cmprAlgo == SZ::ALGO_INTERP_LORENZO) {
cmpData = (char *) SZ_compress_Interp_lorenzo<T, N>(conf, data, outSize);
namespace SZ3 {
template<class T, uint N>
char *SZ_compress_dispatcher(Config &conf, T *data, size_t &outSize) {

assert(N == conf.N);
calAbsErrorBound(conf, data);

char *cmpData;
if (conf.absErrorBound == 0) {
auto zstd = Lossless_zstd();
cmpData = (char *) zstd.compress((uchar *) data, conf.num * sizeof(T), outSize);
} else if (conf.cmprAlgo == ALGO_LORENZO_REG) {
cmpData = (char *) SZ_compress_LorenzoReg<T, N>(conf, data, outSize);
} else if (conf.cmprAlgo == ALGO_INTERP) {
cmpData = (char *) SZ_compress_Interp<T, N>(conf, data, outSize);
} else if (conf.cmprAlgo == ALGO_INTERP_LORENZO) {
cmpData = (char *) SZ_compress_Interp_lorenzo<T, N>(conf, data, outSize);
}
return cmpData;
}
return cmpData;
}


template<class T, SZ::uint N>
void SZ_decompress_dispatcher(SZ::Config &conf, char *cmpData, size_t cmpSize, T *decData) {
if (conf.absErrorBound == 0) {
auto zstd = SZ::Lossless_zstd();
auto zstdOut = zstd.decompress((SZ::uchar *) cmpData, cmpSize);
memcpy(decData, zstdOut, conf.num * sizeof(T));
} else if (conf.cmprAlgo == SZ::ALGO_LORENZO_REG) {
SZ_decompress_LorenzoReg<T, N>(conf, cmpData, cmpSize, decData);
} else if (conf.cmprAlgo == SZ::ALGO_INTERP) {
SZ_decompress_Interp<T, N>(conf, cmpData, cmpSize, decData);
} else {
printf("SZ_decompress_dispatcher, Method not supported\n");
exit(0);
}
template<class T, uint N>
void SZ_decompress_dispatcher(Config &conf, char *cmpData, size_t cmpSize, T *decData) {
if (conf.absErrorBound == 0) {
auto zstd = Lossless_zstd();
auto zstdOut = zstd.decompress((uchar *) cmpData, cmpSize);
memcpy(decData, zstdOut, conf.num * sizeof(T));
} else if (conf.cmprAlgo == ALGO_LORENZO_REG) {
SZ_decompress_LorenzoReg<T, N>(conf, cmpData, cmpSize, decData);
} else if (conf.cmprAlgo == ALGO_INTERP) {
SZ_decompress_Interp<T, N>(conf, cmpData, cmpSize, decData);
} else {
printf("SZ_decompress_dispatcher, Method not supported\n");
exit(0);
}

}
}

#endif
37 changes: 19 additions & 18 deletions include/SZ3/api/impl/SZImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,34 @@
#include "SZ3/api/impl/SZImplOMP.hpp"
#include <cmath>

template<class T, SZ::uint N>
char *SZ_compress_impl(SZ::Config &conf, const T *data, size_t &outSize) {
namespace SZ3 {
template<class T, uint N>
char *SZ_compress_impl(Config &conf, const T *data, size_t &outSize) {
#ifndef _OPENMP
conf.openmp=false;
conf.openmp=false;
#endif
if (conf.openmp) {
//dataCopy for openMP is handled by each thread
return SZ_compress_OMP<T, N>(conf, data, outSize);
} else {
std::vector<T> dataCopy(data, data + conf.num);
return SZ_compress_dispatcher<T, N>(conf, dataCopy.data(), outSize);
if (conf.openmp) {
//dataCopy for openMP is handled by each thread
return SZ_compress_OMP<T, N>(conf, data, outSize);
} else {
std::vector<T> dataCopy(data, data + conf.num);
return SZ_compress_dispatcher<T, N>(conf, dataCopy.data(), outSize);
}
}
}


template<class T, SZ::uint N>
void SZ_decompress_impl(SZ::Config &conf, char *cmpData, size_t cmpSize, T *decData) {
template<class T, uint N>
void SZ_decompress_impl(Config &conf, char *cmpData, size_t cmpSize, T *decData) {


#ifndef _OPENMP
conf.openmp=false;
conf.openmp=false;
#endif
if (conf.openmp) {
SZ_decompress_OMP<T, N>(conf, cmpData, cmpSize, decData);
} else {
SZ_decompress_dispatcher<T, N>(conf, cmpData, cmpSize, decData);
if (conf.openmp) {
SZ_decompress_OMP<T, N>(conf, cmpData, cmpSize, decData);
} else {
SZ_decompress_dispatcher<T, N>(conf, cmpData, cmpSize, decData);
}
}
}

#endif
191 changes: 96 additions & 95 deletions include/SZ3/api/impl/SZImplOMP.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,144 +5,145 @@
#include <cmath>
#include <memory>


#ifdef _OPENMP
#include <omp.h>
#endif

template<class T, SZ::uint N>
char *SZ_compress_OMP(SZ::Config &conf, const T *data, size_t &outSize) {
unsigned char *buffer, *buffer_pos;
namespace SZ3 {
template<class T, uint N>
char *SZ_compress_OMP(Config &conf, const T *data, size_t &outSize) {
unsigned char *buffer, *buffer_pos;

#ifdef _OPENMP

assert(N == conf.N);
assert(N == conf.N);

std::vector<char *> compressed_t;
std::vector<size_t> cmp_size_t, cmp_start_t;
std::vector<T> min_t, max_t;
std::vector<SZ::Config> conf_t;
// SZ::Timer timer(true);
int nThreads = 1;
double eb;
std::vector<char *> compressed_t;
std::vector<size_t> cmp_size_t, cmp_start_t;
std::vector<T> min_t, max_t;
std::vector<Config> conf_t;
// Timer timer(true);
int nThreads = 1;
double eb;
#pragma omp parallel
{
#pragma omp single
{
nThreads = omp_get_num_threads();
if (conf.dims[0] < nThreads) {
nThreads = conf.dims[0];
#pragma omp single
{
nThreads = omp_get_num_threads();
if (conf.dims[0] < nThreads) {
nThreads = conf.dims[0];
}
//printf("OpenMP threads = %d\n", nThreads);
compressed_t.resize(nThreads);
cmp_size_t.resize(nThreads + 1);
cmp_start_t.resize(nThreads + 1);
conf_t.resize(nThreads);
min_t.resize(nThreads);
max_t.resize(nThreads);
}
//printf("OpenMP threads = %d\n", nThreads);
compressed_t.resize(nThreads);
cmp_size_t.resize(nThreads + 1);
cmp_start_t.resize(nThreads + 1);
conf_t.resize(nThreads);
min_t.resize(nThreads);
max_t.resize(nThreads);
}


int tid = omp_get_thread_num();
int tid = omp_get_thread_num();

auto dims_t = conf.dims;
int lo = tid * conf.dims[0] / nThreads;
int hi = (tid + 1) * conf.dims[0] / nThreads;
dims_t[0] = hi - lo;
auto it = dims_t.begin();
size_t num_t_base = std::accumulate(++it, dims_t.end(), (size_t) 1, std::multiplies<size_t>());
size_t num_t = dims_t[0] * num_t_base;
auto dims_t = conf.dims;
int lo = tid * conf.dims[0] / nThreads;
int hi = (tid + 1) * conf.dims[0] / nThreads;
dims_t[0] = hi - lo;
auto it = dims_t.begin();
size_t num_t_base = std::accumulate(++it, dims_t.end(), (size_t) 1, std::multiplies<size_t>());
size_t num_t = dims_t[0] * num_t_base;

// T *data_t = data + lo * num_t_base;
std::vector<T> data_t(data + lo * num_t_base, data + lo * num_t_base + num_t);
if (conf.errorBoundMode != SZ::EB_ABS) {
auto minmax = std::minmax_element(data_t.begin(), data_t.end());
min_t[tid] = *minmax.first;
max_t[tid] = *minmax.second;
std::vector<T> data_t(data + lo * num_t_base, data + lo * num_t_base + num_t);
if (conf.errorBoundMode != EB_ABS) {
auto minmax = std::minmax_element(data_t.begin(), data_t.end());
min_t[tid] = *minmax.first;
max_t[tid] = *minmax.second;
#pragma omp barrier
#pragma omp single
{
T range = *std::max_element(max_t.begin(), max_t.end()) - *std::min_element(min_t.begin(), min_t.end());
SZ::calAbsErrorBound<T>(conf, data, range);
{
T range = *std::max_element(max_t.begin(), max_t.end()) - *std::min_element(min_t.begin(), min_t.end());
calAbsErrorBound<T>(conf, data, range);
// timer.stop("OMP init");
// timer.start();
}
}
}

conf_t[tid] = conf;
conf_t[tid].setDims(dims_t.begin(), dims_t.end());
compressed_t[tid] = SZ_compress_dispatcher<T, N>(conf_t[tid], data_t.data(), cmp_size_t[tid]);
conf_t[tid] = conf;
conf_t[tid].setDims(dims_t.begin(), dims_t.end());
compressed_t[tid] = SZ_compress_dispatcher<T, N>(conf_t[tid], data_t.data(), cmp_size_t[tid]);

#pragma omp barrier
#pragma omp single
{
{
// timer.stop("OMP compression");
// timer.start();
cmp_start_t[0] = 0;
for (int i = 1; i <= nThreads; i++) {
cmp_start_t[i] = cmp_start_t[i - 1] + cmp_size_t[i - 1];
cmp_start_t[0] = 0;
for (int i = 1; i <= nThreads; i++) {
cmp_start_t[i] = cmp_start_t[i - 1] + cmp_size_t[i - 1];
}
size_t bufferSize = sizeof(int) + (nThreads + 1) * Config::size_est() + cmp_start_t[nThreads];
buffer = new uchar[bufferSize];
buffer_pos = buffer;
write(nThreads, buffer_pos);
for (int i = 0; i < nThreads; i++) {
conf_t[i].save(buffer_pos);
}
write(cmp_size_t.data(), nThreads, buffer_pos);
}
size_t bufferSize = sizeof(int) + (nThreads + 1) * SZ::Config::size_est() + cmp_start_t[nThreads];
buffer = new SZ::uchar[bufferSize];
buffer_pos = buffer;
SZ::write(nThreads, buffer_pos);
for (int i = 0; i < nThreads; i++) {
conf_t[i].save(buffer_pos);
}
SZ::write(cmp_size_t.data(), nThreads, buffer_pos);
}

memcpy(buffer_pos + cmp_start_t[tid], compressed_t[tid], cmp_size_t[tid]);
delete[] compressed_t[tid];
}
memcpy(buffer_pos + cmp_start_t[tid], compressed_t[tid], cmp_size_t[tid]);
delete[] compressed_t[tid];
}

outSize = buffer_pos - buffer + cmp_start_t[nThreads];
outSize = buffer_pos - buffer + cmp_start_t[nThreads];
// timer.stop("OMP memcpy");

#endif
return (char *) buffer;
}
return (char *) buffer;
}


template<class T, SZ::uint N>
void SZ_decompress_OMP(const SZ::Config &conf, char *cmpData, size_t cmpSize, T *decData) {
template<class T, uint N>
void SZ_decompress_OMP(const Config &conf, char *cmpData, size_t cmpSize, T *decData) {
#ifdef _OPENMP

const unsigned char *cmpr_data_pos = (unsigned char *) cmpData;
int nThreads = 1;
SZ::read(nThreads, cmpr_data_pos);
omp_set_num_threads(nThreads);
const unsigned char *cmpr_data_pos = (unsigned char *) cmpData;
int nThreads = 1;
read(nThreads, cmpr_data_pos);
omp_set_num_threads(nThreads);
// printf("OpenMP threads = %d\n", nThreads);

std::vector<SZ::Config> conf_t(nThreads);
for (int i = 0; i < nThreads; i++) {
conf_t[i].load(cmpr_data_pos);
}
std::vector<Config> conf_t(nThreads);
for (int i = 0; i < nThreads; i++) {
conf_t[i].load(cmpr_data_pos);
}

std::vector<size_t> cmp_start_t, cmp_size_t;
cmp_size_t.resize(nThreads);
SZ::read(cmp_size_t.data(), nThreads, cmpr_data_pos);
char *cmpr_data_p = cmpData + (cmpr_data_pos - (unsigned char *) cmpData);
std::vector<size_t> cmp_start_t, cmp_size_t;
cmp_size_t.resize(nThreads);
read(cmp_size_t.data(), nThreads, cmpr_data_pos);
char *cmpr_data_p = cmpData + (cmpr_data_pos - (unsigned char *) cmpData);

cmp_start_t.resize(nThreads + 1);
cmp_start_t[0] = 0;
for (int i = 1; i <= nThreads; i++) {
cmp_start_t[i] = cmp_start_t[i - 1] + cmp_size_t[i - 1];
}
cmp_start_t.resize(nThreads + 1);
cmp_start_t[0] = 0;
for (int i = 1; i <= nThreads; i++) {
cmp_start_t[i] = cmp_start_t[i - 1] + cmp_size_t[i - 1];
}

#pragma omp parallel
{
int tid = omp_get_thread_num();
auto dims_t = conf.dims;
int lo = tid * conf.dims[0] / nThreads;
int hi = (tid + 1) * conf.dims[0] / nThreads;
dims_t[0] = hi - lo;
auto it = dims_t.begin();
size_t num_t_base = std::accumulate(++it, dims_t.end(), (size_t) 1, std::multiplies<size_t>());

SZ_decompress_dispatcher<T, N>(conf_t[tid], cmpr_data_p + cmp_start_t[tid], cmp_size_t[tid], decData + lo * num_t_base);
}
{
int tid = omp_get_thread_num();
auto dims_t = conf.dims;
int lo = tid * conf.dims[0] / nThreads;
int hi = (tid + 1) * conf.dims[0] / nThreads;
dims_t[0] = hi - lo;
auto it = dims_t.begin();
size_t num_t_base = std::accumulate(++it, dims_t.end(), (size_t) 1, std::multiplies<size_t>());

SZ_decompress_dispatcher<T, N>(conf_t[tid], cmpr_data_p + cmp_start_t[tid], cmp_size_t[tid], decData + lo * num_t_base);
}
#endif
}
}


#endif
Loading

0 comments on commit e1af6be

Please sign in to comment.