diff --git a/include/SZ3/encoder/HuffmanEncoder.hpp b/include/SZ3/encoder/HuffmanEncoder.hpp index d515f04d..a1de6308 100644 --- a/include/SZ3/encoder/HuffmanEncoder.hpp +++ b/include/SZ3/encoder/HuffmanEncoder.hpp @@ -1,5 +1,5 @@ -#ifndef _SZ_HUFFMAN_ENCODER_HPP -#define _SZ_HUFFMAN_ENCODER_HPP +#ifndef _SZ_HUFFMAN_ENCODER_LZ_HPP +#define _SZ_HUFFMAN_ENCODER_LZ_HPP #include "SZ3/def.hpp" #include "SZ3/encoder/Encoder.hpp" @@ -7,642 +7,663 @@ #include "SZ3/utils/MemoryUtil.hpp" #include "SZ3/utils/Timer.hpp" #include "SZ3/utils/ska_hash/unordered_map.hpp" -#include -#include -#include -#include #include +#include #include +#include #include #include -#include +#include +#include + +namespace SZ3{ + + template + class HuffmanEncoder:public concepts::EncoderInterface{ + private: -namespace SZ3 { + class Node{ + public: - template - class HuffmanEncoder : public concepts::EncoderInterface { + Node(T c_=0,Node *lp=nullptr,Node *rp=nullptr){ - public: + c=c_; + p[0]=lp; + p[1]=rp; + } - typedef struct node_t { - struct node_t *left, *right; - size_t freq; - char t; //in_node:0; otherwise:1 T c; - } *node; - - typedef struct HuffmanTree { - unsigned int stateNum; - unsigned int allNodes; - struct node_t *pool; - node *qqq, *qq; //the root node of the HuffmanTree is qq[1] - int n_nodes; //n_nodes is for compression - int qend; - uint64_t **code; - unsigned char *cout; - int n_inode; //n_inode is for decompression - int maxBitCount; - } HuffmanTree; - - - HuffmanEncoder() { - int x = 1; - char *y = (char *) &x; - if (*y == 1) - sysEndianType = 0; - else //=0 - sysEndianType = 1; - } + Node *p[2]; - ~HuffmanEncoder() { - SZ_FreeHuffman(); - } + inline uchar isLeaf(){ - //build huffman tree - HuffmanTree *createHuffmanTree(int stateNum) { - HuffmanTree *huffmanTree = (HuffmanTree *) malloc(sizeof(HuffmanTree)); - memset(huffmanTree, 0, sizeof(HuffmanTree)); - huffmanTree->stateNum = stateNum; - huffmanTree->allNodes = 2 * stateNum; - - huffmanTree->pool = (struct node_t *) malloc(huffmanTree->allNodes * 2 * sizeof(struct node_t)); - huffmanTree->qqq = (node *) malloc(huffmanTree->allNodes * 2 * sizeof(node)); - huffmanTree->code = (uint64_t **) malloc(huffmanTree->stateNum * sizeof(uint64_t *)); - huffmanTree->cout = (unsigned char *) malloc(huffmanTree->stateNum * sizeof(unsigned char)); - - memset(huffmanTree->pool, 0, huffmanTree->allNodes * 2 * sizeof(struct node_t)); - memset(huffmanTree->qqq, 0, huffmanTree->allNodes * 2 * sizeof(node)); - memset(huffmanTree->code, 0, huffmanTree->stateNum * sizeof(uint64_t *)); - memset(huffmanTree->cout, 0, huffmanTree->stateNum * sizeof(unsigned char)); - huffmanTree->qq = huffmanTree->qqq - 1; - huffmanTree->n_nodes = 0; - huffmanTree->n_inode = 0; - huffmanTree->qend = 1; - - return huffmanTree; - } + return p[0]==nullptr; + } + }; - /** - * build huffman tree using bins - * @param bins - * @param stateNum - */ - void preprocess_encode(const std::vector &bins, int stateNum) { - preprocess_encode(bins.data(), bins.size(), stateNum); - } + class HuffmanTree{ + + private: + + uchar _constructed=0; - /** - * build huffman tree using bins - * @param bins - * @param num_bin - * @param stateNum - */ - void preprocess_encode(const T *bins, size_t num_bin, int stateNum) { - nodeCount = 0; - if (num_bin == 0) { - printf("Huffman bins should not be empty\n"); - exit(0); + uchar len=0; + int vec=0; + + void dfs(Node* u){ + + if(u->isLeaf()){ + + mplen[u->c]=len; + mpcode[u->c]=vec; + + limit=std::max(limit,len); + + return; + } + + ++len; + dfs(u->p[0]); + --len; + + vec^=1<p[1]); + vec^=1<<--len; } - init(bins, num_bin); - for (int i = 0; i < huffmanTree->stateNum; i++) - if (huffmanTree->code[i]) nodeCount++; - nodeCount = nodeCount * 2 - 1; - } - //save the huffman Tree in the compressed data - void save(uchar *&c) { - auto cc = c; - write(offset, c); - int32ToBytes_bigEndian(c, nodeCount); - c += sizeof(int); - int32ToBytes_bigEndian(c, huffmanTree->stateNum / 2); - c += sizeof(int); - uint totalSize = 0;// = convert_HuffTree_to_bytes_anyStates(nodeCount, c); - // std::cout << "nodeCount = " << nodeCount << std::endl; - if (nodeCount <= 256) - totalSize = convert_HuffTree_to_bytes_anyStates(nodeCount, c); - else if (nodeCount <= 65536) - totalSize = convert_HuffTree_to_bytes_anyStates(nodeCount, c); - else - totalSize = convert_HuffTree_to_bytes_anyStates(nodeCount, c); - c += totalSize; -// return c - cc; - } + class cmp{ + public: + bool operator()(const std::pair& u, const std::pair& v) { + return u.second==v.second?u.first>v.first:u.second>v.second; + } + }; - size_t size_est() { - size_t b = (nodeCount <= 256) ? sizeof(unsigned char) : ((nodeCount <= 65536) ? sizeof(unsigned short) : sizeof(unsigned int)); - return 1 + 2 * nodeCount * b + nodeCount * sizeof(unsigned char) + nodeCount * sizeof(T) + sizeof(int) + sizeof(int) + sizeof(T); - } + public: - //perform encoding - size_t encode(const std::vector &bins, uchar *&bytes) { - return encode(bins.data(), bins.size(), bytes); - } + std::vector mplen; + std::vector mpcode; - //perform encoding - size_t encode(const T *bins, size_t num_bin, uchar *&bytes) { - size_t outSize = 0; - size_t i = 0; - unsigned char bitSize = 0, byteSize, byteSizep; - int state; - uchar *p = bytes + sizeof(size_t); - int lackBits = 0; - //int64_t totalBitSize = 0, maxBitSize = 0, bitSize21 = 0, bitSize32 = 0; - for (i = 0; i < num_bin; i++) { - state = bins[i] - offset; - bitSize = huffmanTree->cout[state]; - - if (lackBits == 0) { - byteSize = bitSize % 8 == 0 ? bitSize / 8 : bitSize / 8 + - 1; //it's equal to the number of bytes involved (for *outSize) - byteSizep = bitSize / 8; //it's used to move the pointer p for next data - if (byteSize <= 8) { - int64ToBytes_bigEndian(p, (huffmanTree->code[state])[0]); - p += byteSizep; - } else //byteSize>8 - { - int64ToBytes_bigEndian(p, (huffmanTree->code[state])[0]); - p += 8; - int64ToBytes_bigEndian(p, (huffmanTree->code[state])[1]); - p += (byteSizep - 8); - } - outSize += byteSize; - lackBits = bitSize % 8 == 0 ? 0 : 8 - bitSize % 8; - } else { - *p = (*p) | (unsigned char) ((huffmanTree->code[state])[0] >> (64 - lackBits)); - if (lackBits < bitSize) { - p++; - - int64_t newCode = (huffmanTree->code[state])[0] << lackBits; - int64ToBytes_bigEndian(p, newCode); - - if (bitSize <= 64) { - bitSize -= lackBits; - byteSize = bitSize % 8 == 0 ? bitSize / 8 : bitSize / 8 + 1; - byteSizep = bitSize / 8; - p += byteSizep; - outSize += byteSize; - lackBits = bitSize % 8 == 0 ? 0 : 8 - bitSize % 8; - } else //bitSize > 64 - { - byteSizep = 7; //must be 7 bytes, because lackBits!=0 - p += byteSizep; - outSize += byteSize; - - bitSize -= 64; - if (lackBits < bitSize) { - *p = (*p) | (unsigned char) ((huffmanTree->code[state])[0] >> (64 - lackBits)); - p++; - newCode = (huffmanTree->code[state])[1] << lackBits; - int64ToBytes_bigEndian(p, newCode); - bitSize -= lackBits; - byteSize = bitSize % 8 == 0 ? bitSize / 8 : bitSize / 8 + 1; - byteSizep = bitSize / 8; - p += byteSizep; - outSize += byteSize; - lackBits = bitSize % 8 == 0 ? 0 : 8 - bitSize % 8; - } else //lackBits >= bitSize - { - *p = (*p) | (unsigned char) ((huffmanTree->code[state])[0] >> (64 - bitSize)); - lackBits -= bitSize; - } - } - } else //lackBits >= bitSize - { - lackBits -= bitSize; - if (lackBits == 0) - p++; - } + T offset; + // minimum bits for T + uchar mbft; + uchar limit; + + void init(){ + + _constructed=0; + ht.clear(); + mplen.clear(); + mpcode.clear(); + freq.clear(); + + offset=0; + mbft=0; + root=0; + n=0; + maxval=0; + limit=0; + } + + HuffmanTree(){ + + init(); + } + + int root; + int n; + int maxval; + std::vector ht; + std::vector freq; + + void addElement(T c,size_t freqc){ + + assert(!_constructed); + + ht.push_back(Node(c)); + freq[c]=freqc; + ++n; + } + + void constructHuffmanTree(){ + + assert(!_constructed); + assert(ht.size()>1); + + if(maxval==1){ + + mbft=1; + ht.push_back(Node(0,&ht[0],nullptr)); + mplen[0]=1; + mpcode[0]=0; + limit=1; + setConstructed(); + return; } + + Timer timer(true); + + mbft=1; + while((1<,std::vector>,cmp> q; + + for(int i=0;i1){ + + int u=q.top().first; + size_t freq_u=q.top().second; + q.pop(); + int v=q.top().first; + size_t freq_v=q.top().second; + q.pop(); + + ht.push_back(Node(0,&ht[u],&ht[v])); + + q.push({ht.size()-1,freq_u+freq_v}); + } + + root=ht.size()-1; + + dfs(&ht[root]); + + setConstructed(); + + timer.stop("construct huffman tree"); } - *reinterpret_cast(bytes) = outSize; - bytes += sizeof(size_t) + outSize; - return outSize; - } - void postprocess_encode() { - SZ_FreeHuffman(); - } + uchar isConstructed(){ + + return _constructed; + } - void preprocess_decode() {}; - - //perform decoding - std::vector decode(const uchar *&bytes, size_t targetLength) { - node t = treeRoot; - std::vector out(targetLength); - size_t i = 0, byteIndex = 0, count = 0; - int r; - node n = treeRoot; - size_t encodedLength = *reinterpret_cast(bytes); - bytes += sizeof(size_t); - if (n->t) //root->t==1 means that all state values are the same (constant) - { - for (count = 0; count < targetLength; count++) - out[count] = n->c + offset; - return out; + void setConstructed(){ + + _constructed=1; } + }; + + HuffmanTree tree; + + public: - for (i = 0; count < targetLength; i++) { - byteIndex = i >> 3; //i/8 - r = i % 8; - if (((bytes[byteIndex] >> (7 - r)) & 0x01) == 0) - n = n->left; - else - n = n->right; - - if (n->t) { - out[count] = n->c + offset; - n = t; - count++; + void preprocess_encode(const T *const bins,size_t num_bin,int stateNum){ + + Timer timer(true); + + tree.init(); + + T __minval,__maxval; + + if(stateNum==0){ + + __minval=*bins; + __maxval=*bins; + for(int i=1;inew_node2(C[0], t[0]); - this->unpad_tree(L, R, C, t, 0, root); - free(L); - free(R); - free(C); - free(t); - return root; - } else if (nodeCount <= 65536) { - unsigned short *L = (unsigned short *) malloc(nodeCount * sizeof(unsigned short)); - memset(L, 0, nodeCount * sizeof(unsigned short)); - unsigned short *R = (unsigned short *) malloc(nodeCount * sizeof(unsigned short)); - memset(R, 0, nodeCount * sizeof(unsigned short)); - T *C = (T *) malloc(nodeCount * sizeof(T)); - memset(C, 0, nodeCount * sizeof(T)); - unsigned char *t = (unsigned char *) malloc(nodeCount * sizeof(unsigned char)); - memset(t, 0, nodeCount * sizeof(unsigned char)); - - // TODO: Endian type - // unsigned char cmpSysEndianType = bytes[0]; - // if(cmpSysEndianType!=(unsigned char)sysEndianType) - // { - // unsigned char* p = (unsigned char*)(bytes+1); - // size_t i = 0, size = 3*nodeCount*sizeof(unsigned int); - // while(1) - // { - // symTransform_4bytes(p); - // i+=sizeof(unsigned int); - // if(inew_node2(0, 0); - this->unpad_tree(L, R, C, t, 0, root); - free(L); - free(R); - free(C); - free(t); - return root; - } else //nodeCount>65536 - { - unsigned int *L = (unsigned int *) malloc(nodeCount * sizeof(unsigned int)); - memset(L, 0, nodeCount * sizeof(unsigned int)); - unsigned int *R = (unsigned int *) malloc(nodeCount * sizeof(unsigned int)); - memset(R, 0, nodeCount * sizeof(unsigned int)); - T *C = (T *) malloc(nodeCount * sizeof(T)); - memset(C, 0, nodeCount * sizeof(T)); - unsigned char *t = (unsigned char *) malloc(nodeCount * sizeof(unsigned char)); - memset(t, 0, nodeCount * sizeof(unsigned char)); - // TODO: Endian type - // unsigned char cmpSysEndianType = bytes[0]; - // if(cmpSysEndianType!=(unsigned char)sysEndianType) - // { - // unsigned char* p = (unsigned char*)(bytes+1); - // size_t i = 0, size = 3*nodeCount*sizeof(unsigned int); - // while(1) - // { - // symTransform_4bytes(p); - // i+=sizeof(unsigned int); - // if(inew_node2(0, 0); - this->unpad_tree(L, R, C, t, 0, root); - free(L); - free(R); - free(C); - free(t); - return root; + // ska::unordered_map freq; + std::vector freq(tree.maxval); + // freq.reserve(4*stateNum); + + for(int i=0;ipool + huffmanTree->n_nodes++; - if (freq) { - n->c = c; - n->freq = freq; - n->t = 1; - } else { - n->left = a; - n->right = b; - n->freq = a->freq + b->freq; - n->t = 0; - //n->c = 0; + tree.ht.reserve(freq.size()<<1); + + for(int i=0;ipool[huffmanTree->n_nodes].c = c; - huffmanTree->pool[huffmanTree->n_nodes].t = t; - return huffmanTree->pool + huffmanTree->n_nodes++; + // printf("begins to construct huffman tree\n"); + + tree.constructHuffmanTree(); + + timer.stop("preprocess_encode"); } - /* priority queue */ - void qinsert(node n) { - int j, i = huffmanTree->qend++; - while ((j = (i >> 1))) //j=i/2 - { - if (huffmanTree->qq[j]->freq <= n->freq) break; - huffmanTree->qq[i] = huffmanTree->qq[j], i = j; - } - huffmanTree->qq[i] = n; + void preprocess_encode(const std::vector &bins,int stateNum){ + + preprocess_encode(bins.data(),bins.size(),stateNum); } - node qremove() { - int i, l; - node n = huffmanTree->qq[i = 1]; - node p; - if (huffmanTree->qend < 2) return 0; - huffmanTree->qend--; - huffmanTree->qq[i] = huffmanTree->qq[huffmanTree->qend]; - - while ((l = (i << 1)) < huffmanTree->qend) { //l=(i*2) - if (l + 1 < huffmanTree->qend && huffmanTree->qq[l + 1]->freq < huffmanTree->qq[l]->freq) l++; - if (huffmanTree->qq[i]->freq > huffmanTree->qq[l]->freq) { - p = huffmanTree->qq[i]; - huffmanTree->qq[i] = huffmanTree->qq[l]; - huffmanTree->qq[l] = p; - i = l; - } else { - break; - } + void saveAsCode(uchar *&c){ + + Timer timer(true); + + uchar *head=c; + + // whether the tree is full binary tree + + uchar& limit=tree.limit; + + std::vector> mp(limit+1); + + for(int i=0;it) { - huffmanTree->code[n->c] = (uint64_t *) malloc(2 * sizeof(uint64_t)); - if (len <= 64) { - (huffmanTree->code[n->c])[0] = out1 << (64 - len); - (huffmanTree->code[n->c])[1] = out2; - } else { - (huffmanTree->code[n->c])[0] = out1; - (huffmanTree->code[n->c])[1] = out2 << (128 - len); + uchar mask=0; + uchar index=0; + + assert(sizeof(T)<=8); + + if(mp[limit].size()==tree.n){ + + // 00 XXXXXX (mbft) + if(tree.maxval>1) writeBytesByte(c,tree.mbft); + else writeBytesByte(c,0x80|tree.mbft); + + writeBytesByte(c,((sizeof(T)-1)<<5)|(limit-1)); + + writeBytes(c,tree.offset,sizeof(T)<<3,mask,index); + + int32ToBytes_bigEndian(c,tree.n); + c+=4; + + int cnt=mp[limit].size(); + + uchar logcnt=0; + while(logcnt<32&&(1<cout[n->c] = (unsigned char) len; + + writeBytesClearMask(c,mask,index); + return; } - int index = len >> 6; //=len/64 - if (index == 0) { - out1 = out1 << 1; - out1 = out1 | 0; - build_code(n->left, len + 1, out1, 0); - out1 = out1 | 1; - build_code(n->right, len + 1, out1, 0); - } else { - if (len % 64 != 0) - out2 = out2 << 1; - out2 = out2 | 0; - build_code(n->left, len + 1, out1, out2); - out2 = out2 | 1; - build_code(n->right, len + 1, out1, out2); + + writeBytesByte(c,0x40|tree.mbft); + + writeBytesByte(c,((sizeof(T)-1)<<5)|(limit-1)); + + writeBytes(c,tree.offset,sizeof(T)<<3,mask,index); + + int32ToBytes_bigEndian(c,tree.maxval); + c+=4; + + for(uchar len=1;len<=limit;len++){ + + int cnt=mp[len].size(); + + writeBytes(c,cnt,len,mask,index); + + if(cnt){ + + for(const T& it:mp[len]){ + + writeBytes(c,it,tree.mbft,mask,index); + + const int code=tree.mpcode[it]; + + writeBytes(c,code,len,mask,index); + } + } } + + writeBytesClearMask(c,mask,index); + + timer.stop("saveAsCode"); + + // printf("huffman tree size = %d\n",(int)(c-head)); + + // Lossless_zstd zstd; + // size_t compressed_tree_size; + + // // uchar *compressed_tree = zstd.compress(head,c-head,compressed_tree_size); + // delete[] zstd.compress(head,c-head,compressed_tree_size); + + // printf("compressed huffman tree size = %d\n",(int)compressed_tree_size); + + return; } - /** - * Compute the frequency of the data and build the Huffman tree - * @param HuffmanTree* huffmanTree (output) - * @param int *s (input) - * @param size_t length (input) - * */ - void init(const T *s, size_t length) { - T max = s[0]; - offset = s[0]; //offset is min - - ska::unordered_map frequency; - for (size_t i = 0; i < length; i++) { - frequency[s[i]]++; + void loadAsCode(const uchar *&bytes,size_t &remaining_length){ + + Timer timer(true); + + tree.init(); + + uchar feature=(*bytes)>>6; + tree.mbft=(*bytes)&0x3f; + ++bytes; + + uchar szT=((*bytes)>>5)+1; + tree.limit=((*bytes)&0x1f)+1; + ++bytes; + + assert(szT==sizeof(T)); + + for(int i=0;i max) { - max = k; - } - if (k < offset) { - offset = k; + tree.maxval=bytesToInt32_bigEndian(bytes); + bytes+=4; + + tree.ht.reserve(tree.maxval<<1); + tree.freq.resize(tree.maxval); + tree.mplen.resize(tree.maxval); + tree.mpcode.resize(tree.maxval); + + tree.ht.push_back(Node()); + + if(feature==0x00||feature==0x02){ + + int i=0; + tree.n=1<p[e]==nullptr){ + + tree.ht.push_back(Node()); + u->p[e]=&tree.ht[tree.ht.size()-1]; + } + + u=u->p[e]; + } + + u->c=c; + assert(c>3; + + return; } - int stateNum = max - offset + 2; - huffmanTree = createHuffmanTree(stateNum); + tree.n=0; + + int i=0; + + for(uchar len=1;len<=tree.limit;len++){ - for (const auto &f: frequency) { - qinsert(new_node(f.second, f.first - offset, 0, 0)); + int cnt=0; + + for(uchar j=0;jp[e]==nullptr){ + + tree.ht.push_back(Node()); + u->p[e]=&tree.ht[tree.ht.size()-1]; + } + + u=u->p[e]; + } + + u->c=c; + ++tree.n; + tree.mplen[c]=len; + tree.mpcode[c]=vec; + } } - while (huffmanTree->qend > 2) - qinsert(new_node(0, 0, qremove(), qremove())); + bytes+=(i+7)>>3; - build_code(huffmanTree->qq[1], 0, 0, 0); - treeRoot = huffmanTree->qq[1]; + timer.stop("loadAsCode"); + tree.setConstructed(); } - template - void pad_tree(T1 *L, T1 *R, T *C, unsigned char *t, unsigned int i, node root) { - C[i] = root->c; - t[i] = root->t; - node lroot = root->left; - if (lroot != 0) { - huffmanTree->n_inode++; - L[i] = huffmanTree->n_inode; - pad_tree(L, R, C, t, huffmanTree->n_inode, lroot); + size_t encode(const T *bins, size_t num_bin, uchar *&bytes){ + + if(tree.maxval==1){ + + int32ToBytes_bigEndian(bytes,num_bin^0x1234abcd); + bytes+=4; + return 4; } - node rroot = root->right; - if (rroot != 0) { - huffmanTree->n_inode++; - R[i] = huffmanTree->n_inode; - pad_tree(L, R, C, t, huffmanTree->n_inode, rroot); + + Timer timer(true); + + assert(tree.isConstructed()); + + uchar *head=bytes; + bytes+=4; + + int len=0; + + uchar mask=0; + uchar index=0; + + for(int i=0;i - void unpad_tree(T1 *L, T1 *R, T *C, unsigned char *t, unsigned int i, node root) { - //root->c = C[i]; - if (root->t == 0) { - T1 l, r; - l = L[i]; - if (l != 0) { - node lroot = new_node2(C[l], t[l]); - root->left = lroot; - unpad_tree(L, R, C, t, l, lroot); + size_t encode(const std::vector &bins, uchar *&bytes){ + + return encode(bins.data(),bins.size(),bytes); + } + + void postprocess_encode(){ + + } + + void preprocess_decode(){ + + } + + std::vector decode(const uchar *&bytes, size_t targetLength){ + + if(tree.maxval==1){ + + int len=bytesToInt32_bigEndian(bytes)^0x1234abcd; + bytes+=4; + assert(len==targetLength); + return std::vector(len,tree.offset); + } + + Timer timer(true); + + assert(tree.isConstructed()); + + assert(targetLength>4); + + Node *u=&tree.ht[tree.root]; + + int len=bytesToInt32_bigEndian(bytes)^0x1234abcd; + bytes+=4; + + std::vector a(targetLength); + int sza=0; + // a.reserve(targetLength); + + // for(int i=0;ip[readBit(bytes,i++)]; + + // if(u->isLeaf()){ + + // a[sza++]=u->c+tree.offset; + // u=&tree.ht[tree.root]; + // } + // } + + // use unroll loops to optimize the above code + + int byteIndex=0; + int i=0; + uchar b; + for(;i+8p[b&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>1)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>2)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>3)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; } - r = R[i]; - if (r != 0) { - node rroot = new_node2(C[r], t[r]); - root->right = rroot; - unpad_tree(L, R, C, t, r, rroot); + u=u->p[(b>>4)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>5)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>6)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>7)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; } } - } - template - unsigned int convert_HuffTree_to_bytes_anyStates(unsigned int nodeCount, unsigned char *out) { - T1 *L = (T1 *) malloc(nodeCount * sizeof(T1)); - memset(L, 0, nodeCount * sizeof(T1)); - T1 *R = (T1 *) malloc(nodeCount * sizeof(T1)); - memset(R, 0, nodeCount * sizeof(T1)); - T *C = (T *) malloc(nodeCount * sizeof(T)); - memset(C, 0, nodeCount * sizeof(T)); - unsigned char *t = (unsigned char *) malloc(nodeCount * sizeof(unsigned char)); - memset(t, 0, nodeCount * sizeof(unsigned char)); - - pad_tree(L, R, C, t, 0, huffmanTree->qq[1]); - - unsigned int totalSize = - 1 + 2 * nodeCount * sizeof(T1) + nodeCount * sizeof(unsigned char) + nodeCount * sizeof(T); - //*out = (unsigned char*)malloc(totalSize); - out[0] = (unsigned char) sysEndianType; - memcpy(out + 1, L, nodeCount * sizeof(T1)); - memcpy(out + 1 + nodeCount * sizeof(T1), R, nodeCount * sizeof(T1)); - memcpy(out + 1 + 2 * nodeCount * sizeof(T1), C, nodeCount * sizeof(T)); - memcpy(out + 1 + 2 * nodeCount * sizeof(T1) + nodeCount * sizeof(T), t, nodeCount * sizeof(unsigned char)); - - free(L); - free(R); - free(C); - free(t); - return totalSize; - } + b=bytes[byteIndex]; + + for(int j=0;jpool); - huffmanTree->pool = NULL; - free(huffmanTree->qqq); - huffmanTree->qqq = NULL; - for (i = 0; i < huffmanTree->stateNum; i++) { - if (huffmanTree->code[i] != NULL) - free(huffmanTree->code[i]); + u=u->p[(b>>j)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; } - free(huffmanTree->code); - huffmanTree->code = NULL; - free(huffmanTree->cout); - huffmanTree->cout = NULL; - free(huffmanTree); - huffmanTree = NULL; } + + bytes+=(len+7)>>3; + + timer.stop("decode"); + + return a; + } + + void postprocess_decode(){ + + } + + void save(uchar *&c){ + + saveAsCode(c); + } + + void load(const uchar *&c,size_t &remaining_length){ + + loadAsCode(c,remaining_length); } }; + } -#endif +#endif \ No newline at end of file diff --git a/include/SZ3/utils/ByteUtil.hpp b/include/SZ3/utils/ByteUtil.hpp index f408fd67..b0e21eec 100644 --- a/include/SZ3/utils/ByteUtil.hpp +++ b/include/SZ3/utils/ByteUtil.hpp @@ -7,7 +7,6 @@ #include "SZ3/def.hpp" #include -#include namespace SZ3 { @@ -230,5 +229,67 @@ namespace SZ3 { return lfBuf_cur.value; } + inline void writeBytesBit(uchar *&c, uchar val, uchar &mask, uchar &index) { + + assert(val == 0 || val == 1); + + mask |= val << index++; + if (index == 8) { + *c++ = mask; + mask = index = 0; + } + } + + template + inline void writeBytes(uchar *&c, T val, uchar len, uchar &mask, uchar &index) { + + assert(len >= 1 && len <= sizeof(T) * 8); + + if (len + index >= 8) { + + mask |= (val & ((1 << (8 - index)) - 1)) << index; + val >>= 8 - index; + len -= 8 - index; + *c++ = mask; + mask = index = 0; + + while (len >= 8) { + + *c++ = val & (1 << 8) - 1; + val >>= 8; + len -= 8; + } + } + + mask |= (val & (1 << len) - 1) << index; + index += len; + + // for(int i=0;i>=1; + // } + } + + inline void writeBytesByte(uchar *&c, uchar val) { + *c++ = val; + } + + inline void writeBytesClearMask(uchar *&c, uchar &mask, uchar &index) { + + if (index > 0) { + *c++ = mask; + // mask=i=0; + } + } + + inline uchar readBit(const uchar *const &c, int i) { + + return ((*(c + (i >> 3))) >> (i & 7)) & 1; + } + }; #endif //SZ3_BYTEUTIL_HPP