Skip to content

Commit

Permalink
update zstd to 1.4.4
Browse files Browse the repository at this point in the history
  • Loading branch information
Genwald committed Dec 23, 2019
1 parent ec97ed4 commit 2850724
Show file tree
Hide file tree
Showing 6 changed files with 897 additions and 59 deletions.
147 changes: 147 additions & 0 deletions libs/include/cover.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memset */
#include <time.h> /* clock */
#include "mem.h" /* read */
#include "pool.h"
#include "threading.h"
#include "zstd_internal.h" /* includes zstd.h */
#ifndef ZDICT_STATIC_LINKING_ONLY
#define ZDICT_STATIC_LINKING_ONLY
#endif
#include "zdict.h"

/**
* COVER_best_t is used for two purposes:
* 1. Synchronizing threads.
* 2. Saving the best parameters and dictionary.
*
* All of the methods except COVER_best_init() are thread safe if zstd is
* compiled with multithreaded support.
*/
typedef struct COVER_best_s {
ZSTD_pthread_mutex_t mutex;
ZSTD_pthread_cond_t cond;
size_t liveJobs;
void *dict;
size_t dictSize;
ZDICT_cover_params_t parameters;
size_t compressedSize;
} COVER_best_t;

/**
* A segment is a range in the source as well as the score of the segment.
*/
typedef struct {
U32 begin;
U32 end;
U32 score;
} COVER_segment_t;

/**
*Number of epochs and size of each epoch.
*/
typedef struct {
U32 num;
U32 size;
} COVER_epoch_info_t;

/**
* Struct used for the dictionary selection function.
*/
typedef struct COVER_dictSelection {
BYTE* dictContent;
size_t dictSize;
size_t totalCompressedSize;
} COVER_dictSelection_t;

/**
* Computes the number of epochs and the size of each epoch.
* We will make sure that each epoch gets at least 10 * k bytes.
*
* The COVER algorithms divide the data up into epochs of equal size and
* select one segment from each epoch.
*
* @param maxDictSize The maximum allowed dictionary size.
* @param nbDmers The number of dmers we are training on.
* @param k The parameter k (segment size).
* @param passes The target number of passes over the dmer corpus.
* More passes means a better dictionary.
*/
COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers,
U32 k, U32 passes);

/**
* Warns the user when their corpus is too small.
*/
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel);

/**
* Checks total compressed size of a dictionary
*/
size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
const size_t *samplesSizes, const BYTE *samples,
size_t *offsets,
size_t nbTrainSamples, size_t nbSamples,
BYTE *const dict, size_t dictBufferCapacity);

/**
* Returns the sum of the sample sizes.
*/
size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ;

/**
* Initialize the `COVER_best_t`.
*/
void COVER_best_init(COVER_best_t *best);

/**
* Wait until liveJobs == 0.
*/
void COVER_best_wait(COVER_best_t *best);

/**
* Call COVER_best_wait() and then destroy the COVER_best_t.
*/
void COVER_best_destroy(COVER_best_t *best);

/**
* Called when a thread is about to be launched.
* Increments liveJobs.
*/
void COVER_best_start(COVER_best_t *best);

/**
* Called when a thread finishes executing, both on error or success.
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
* If this dictionary is the best so far save it and its parameters.
*/
void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
COVER_dictSelection_t selection);
/**
* Error function for COVER_selectDict function. Checks if the return
* value is an error.
*/
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);

/**
* Error function for COVER_selectDict function. Returns a struct where
* return.totalCompressedSize is a ZSTD error.
*/
COVER_dictSelection_t COVER_dictSelectionError(size_t error);

/**
* Always call after selectDict is called to free up used memory from
* newly created dictionary.
*/
void COVER_dictSelectionFree(COVER_dictSelection_t selection);

/**
* Called to finalize the dictionary and select one based on whether or not
* the shrink-dict flag was enabled. If enabled the dictionary used is the
* smallest dictionary within a specified regression of the compressed size
* from the largest dictionary.
*/
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
214 changes: 214 additions & 0 deletions libs/include/zbuff.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/

/* ***************************************************************
* NOTES/WARNINGS
******************************************************************/
/* The streaming API defined here is deprecated.
* Consider migrating towards ZSTD_compressStream() API in `zstd.h`
* See 'lib/README.md'.
*****************************************************************/


#if defined (__cplusplus)
extern "C" {
#endif

#ifndef ZSTD_BUFFERED_H_23987
#define ZSTD_BUFFERED_H_23987

/* *************************************
* Dependencies
***************************************/
#include <stddef.h> /* size_t */
#include "zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */


/* ***************************************************************
* Compiler specifics
*****************************************************************/
/* Deprecation warnings */
/* Should these warnings be a problem,
* it is generally possible to disable them,
* typically with -Wno-deprecated-declarations for gcc
* or _CRT_SECURE_NO_WARNINGS in Visual.
* Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS
*/
#ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */
#else
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
# define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
# elif defined(__GNUC__) && (__GNUC__ >= 3)
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
# elif defined(_MSC_VER)
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __declspec(deprecated(message))
# else
# pragma message("WARNING: You need to implement ZBUFF_DEPRECATED for this compiler")
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API
# endif
#endif /* ZBUFF_DISABLE_DEPRECATE_WARNINGS */


/* *************************************
* Streaming functions
***************************************/
/* This is the easier "buffered" streaming API,
* using an internal buffer to lift all restrictions on user-provided buffers
* which can be any size, any place, for both input and output.
* ZBUFF and ZSTD are 100% interoperable,
* frames created by one can be decoded by the other one */

typedef ZSTD_CStream ZBUFF_CCtx;
ZBUFF_DEPRECATED("use ZSTD_createCStream") ZBUFF_CCtx* ZBUFF_createCCtx(void);
ZBUFF_DEPRECATED("use ZSTD_freeCStream") size_t ZBUFF_freeCCtx(ZBUFF_CCtx* cctx);

ZBUFF_DEPRECATED("use ZSTD_initCStream") size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel);
ZBUFF_DEPRECATED("use ZSTD_initCStream_usingDict") size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);

ZBUFF_DEPRECATED("use ZSTD_compressStream") size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr, const void* src, size_t* srcSizePtr);
ZBUFF_DEPRECATED("use ZSTD_flushStream") size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
ZBUFF_DEPRECATED("use ZSTD_endStream") size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);

/*-*************************************************
* Streaming compression - howto
*
* A ZBUFF_CCtx object is required to track streaming operation.
* Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources.
* ZBUFF_CCtx objects can be reused multiple times.
*
* Start by initializing ZBUF_CCtx.
* Use ZBUFF_compressInit() to start a new compression operation.
* Use ZBUFF_compressInitDictionary() for a compression which requires a dictionary.
*
* Use ZBUFF_compressContinue() repetitively to consume input stream.
* *srcSizePtr and *dstCapacityPtr can be any size.
* The function will report how many bytes were read or written within *srcSizePtr and *dstCapacityPtr.
* Note that it may not consume the entire input, in which case it's up to the caller to present again remaining data.
* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each call, so save its content if it matters or change @dst .
* @return : a hint to preferred nb of bytes to use as input for next function call (it's just a hint, to improve latency)
* or an error code, which can be tested using ZBUFF_isError().
*
* At any moment, it's possible to flush whatever data remains within buffer, using ZBUFF_compressFlush().
* The nb of bytes written into `dst` will be reported into *dstCapacityPtr.
* Note that the function cannot output more than *dstCapacityPtr,
* therefore, some content might still be left into internal buffer if *dstCapacityPtr is too small.
* @return : nb of bytes still present into internal buffer (0 if it's empty)
* or an error code, which can be tested using ZBUFF_isError().
*
* ZBUFF_compressEnd() instructs to finish a frame.
* It will perform a flush and write frame epilogue.
* The epilogue is required for decoders to consider a frame completed.
* Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small.
* In which case, call again ZBUFF_compressFlush() to complete the flush.
* @return : nb of bytes still present into internal buffer (0 if it's empty)
* or an error code, which can be tested using ZBUFF_isError().
*
* Hint : _recommended buffer_ sizes (not compulsory) : ZBUFF_recommendedCInSize() / ZBUFF_recommendedCOutSize()
* input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, use this value to reduce intermediate stages (better latency)
* output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering.
* By using both, it ensures that input will be entirely consumed, and output will always contain the result, reducing intermediate buffering.
* **************************************************/


typedef ZSTD_DStream ZBUFF_DCtx;
ZBUFF_DEPRECATED("use ZSTD_createDStream") ZBUFF_DCtx* ZBUFF_createDCtx(void);
ZBUFF_DEPRECATED("use ZSTD_freeDStream") size_t ZBUFF_freeDCtx(ZBUFF_DCtx* dctx);

ZBUFF_DEPRECATED("use ZSTD_initDStream") size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx);
ZBUFF_DEPRECATED("use ZSTD_initDStream_usingDict") size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* dctx, const void* dict, size_t dictSize);

ZBUFF_DEPRECATED("use ZSTD_decompressStream") size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx,
void* dst, size_t* dstCapacityPtr,
const void* src, size_t* srcSizePtr);

/*-***************************************************************************
* Streaming decompression howto
*
* A ZBUFF_DCtx object is required to track streaming operations.
* Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
* Use ZBUFF_decompressInit() to start a new decompression operation,
* or ZBUFF_decompressInitDictionary() if decompression requires a dictionary.
* Note that ZBUFF_DCtx objects can be re-init multiple times.
*
* Use ZBUFF_decompressContinue() repetitively to consume your input.
* *srcSizePtr and *dstCapacityPtr can be any size.
* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`.
* @return : 0 when a frame is completely decoded and fully flushed,
* 1 when there is still some data left within internal buffer to flush,
* >1 when more data is expected, with value being a suggested next input size (it's just a hint, which helps latency),
* or an error code, which can be tested using ZBUFF_isError().
*
* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() and ZBUFF_recommendedDOutSize()
* output : ZBUFF_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
* input : ZBUFF_recommendedDInSize == 128KB + 3;
* just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
* *******************************************************************************/


/* *************************************
* Tool functions
***************************************/
ZBUFF_DEPRECATED("use ZSTD_isError") unsigned ZBUFF_isError(size_t errorCode);
ZBUFF_DEPRECATED("use ZSTD_getErrorName") const char* ZBUFF_getErrorName(size_t errorCode);

/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
* These sizes are just hints, they tend to offer better latency */
ZBUFF_DEPRECATED("use ZSTD_CStreamInSize") size_t ZBUFF_recommendedCInSize(void);
ZBUFF_DEPRECATED("use ZSTD_CStreamOutSize") size_t ZBUFF_recommendedCOutSize(void);
ZBUFF_DEPRECATED("use ZSTD_DStreamInSize") size_t ZBUFF_recommendedDInSize(void);
ZBUFF_DEPRECATED("use ZSTD_DStreamOutSize") size_t ZBUFF_recommendedDOutSize(void);

#endif /* ZSTD_BUFFERED_H_23987 */


#ifdef ZBUFF_STATIC_LINKING_ONLY
#ifndef ZBUFF_STATIC_H_30298098432
#define ZBUFF_STATIC_H_30298098432

/* ====================================================================================
* The definitions in this section are considered experimental.
* They should never be used in association with a dynamic library, as they may change in the future.
* They are provided for advanced usages.
* Use them only in association with static linking.
* ==================================================================================== */

/*--- Dependency ---*/
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_customMem */
#include "zstd.h"


/*--- Custom memory allocator ---*/
/*! ZBUFF_createCCtx_advanced() :
* Create a ZBUFF compression context using external alloc and free functions */
ZBUFF_DEPRECATED("use ZSTD_createCStream_advanced") ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem);

/*! ZBUFF_createDCtx_advanced() :
* Create a ZBUFF decompression context using external alloc and free functions */
ZBUFF_DEPRECATED("use ZSTD_createDStream_advanced") ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem);


/*--- Advanced Streaming Initialization ---*/
ZBUFF_DEPRECATED("use ZSTD_initDStream_usingDict") size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
const void* dict, size_t dictSize,
ZSTD_parameters params, unsigned long long pledgedSrcSize);


#endif /* ZBUFF_STATIC_H_30298098432 */
#endif /* ZBUFF_STATIC_LINKING_ONLY */


#if defined (__cplusplus)
}
#endif
Loading

0 comments on commit 2850724

Please sign in to comment.