From c8d43b7d8365f784990b7e6271b045f5e32962ba Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Mon, 3 Jun 2024 12:35:33 +0200 Subject: [PATCH 1/8] apply formatting --- source/Lib/vvdec/vfgs_fw.c | 874 +++++++++++++++++---------------- source/Lib/vvdec/vfgs_fw.h | 36 +- source/Lib/vvdec/vfgs_hw.c | 465 +++++++++--------- source/Lib/vvdec/vfgs_hw.h | 33 +- source/Lib/vvdec/vvdecimpl.cpp | 170 +++---- 5 files changed, 794 insertions(+), 784 deletions(-) diff --git a/source/Lib/vvdec/vfgs_fw.c b/source/Lib/vvdec/vfgs_fw.c index e944cd59..cd424a61 100644 --- a/source/Lib/vvdec/vfgs_fw.c +++ b/source/Lib/vvdec/vfgs_fw.c @@ -59,139 +59,140 @@ POSSIBILITY OF SUCH DAMAGE. #include #include -#define min(a,b) ((a)<(b)?(a):(b)) -#define round(a,s) (((a)+(1<<((s)-1)))>>(s)) -#define clip(x,lo,hi) ((x)>(hi)?hi:(x)<(lo)?(lo):(x)) +#define min( a, b ) ( ( a ) < ( b ) ? ( a ) : ( b ) ) +#define round( a, s ) ( ( ( a ) + ( 1 << ( ( s ) - 1 ) ) ) >> ( s ) ) +#define clip( x, lo, hi ) ( ( x ) > ( hi ) ? hi : ( x ) < ( lo ) ? ( lo ) : ( x ) ) +// clang-format off static const int8 Gaussian_LUT[2048] = {}; static const uint32 Seed_LUT[256] = { @@ -299,337 +300,342 @@ static const uint32 Seed_LUT[256] = { static const int8 DCT2_64[64][64] = \ DEFINE_DCT2_P64_MATRIX(64, 83, 36, 89, 75, 50, 18, 90, 87, 80, 70, 57, 43, 25, 9, 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4, 91, 90, 90, 90, 88, 87, 86, 84, 83, 81, 79, 77, 73, 71, 69, 65, 62, 59, 56, 52, 48, 44, 41, 37, 33, 28, 24, 20, 15, 11, 7, 2); +// clang-format on /** Pseudo-random number generator (32-bit) */ -static uint32 prng(uint32 x) +static uint32 prng( uint32 x ) { -#if 1 // same as HW (bit-reversed RDD-5) - uint32 s = ((x << 30) ^ (x << 2)) & 0x80000000; - x = s | (x >> 1); -#else // RDD-5 - uint32 s = ((x >> 30) ^ (x >> 2)) & 1; - x = (x << 1) | s; +#if 1 // same as HW (bit-reversed RDD-5) + uint32 s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; + x = s | ( x >> 1 ); +#else // RDD-5 + uint32 s = ( ( x >> 30 ) ^ ( x >> 2 ) ) & 1; + x = ( x << 1 ) | s; #endif - return x; + return x; } /** Apply iDCT2 to block B[64][64] + clipping */ -static void idct2_64(int8 B[][64]) +static void idct2_64( int8 B[][64] ) { - int16 X[64][64]; - int i,j,k; - int32 acc; - - /* 1st pass (DCT2_64'*B) = vertical */ - for (j=0; j<64; j++) - for (i=0; i<64; i++) - { - acc = 256; - for (k=0; k<64; k++) - acc += (int32)DCT2_64[k][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64) - - X[j][i] = (acc >> 9); - } - - /* 2nd pass (...)*DCT2_64 = horizontal + clipping */ - for (j=0; j<64; j++) - for (i=0; i<64; i++) - { - acc = 256; - for (k=0; k<64; k++) - acc += (int32)X[j][k] * DCT2_64[k][i]; - - acc >>= 9; - if (acc > 127) acc = 127; - if (acc < -127) acc = -127; - B[j][i] = acc; - } + int16 X[64][64]; + int i, j, k; + int32 acc; + + /* 1st pass (DCT2_64'*B) = vertical */ + for( j = 0; j < 64; j++ ) + for( i = 0; i < 64; i++ ) + { + acc = 256; + for( k = 0; k < 64; k++ ) + acc += (int32) DCT2_64[k][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64) + + X[j][i] = ( acc >> 9 ); + } + + /* 2nd pass (...)*DCT2_64 = horizontal + clipping */ + for( j = 0; j < 64; j++ ) + for( i = 0; i < 64; i++ ) + { + acc = 256; + for( k = 0; k < 64; k++ ) + acc += (int32) X[j][k] * DCT2_64[k][i]; + + acc >>= 9; + if( acc > 127 ) + acc = 127; + if( acc < -127 ) + acc = -127; + B[j][i] = acc; + } } /** Apply iDCT2 to block B[32][32] + clipping */ -static void idct2_32(int8 B[][32]) +static void idct2_32( int8 B[][32] ) { - int16 X[32][32]; - int i,j,k; - int32 acc; - - /* 1st pass (R32'*B) = vertical */ - for (j=0; j<32; j++) - for (i=0; i<32; i++) - { - acc = 128; - for (k=0; k<32; k++) - acc += (int32)DCT2_64[k*2][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64=DCT) - - X[j][i] = (acc >> 8); - } - - /* 2nd pass (...)*R32 = horizontal + clipping */ - for (j=0; j<32; j++) - for (i=0; i<32; i++) - { - acc = 256; - for (k=0; k<32; k++) - acc += (int32)X[j][k] * DCT2_64[k*2][i]; - - acc >>= 9; - if (acc > 127) acc = 127; - if (acc < -127) acc = -127; - B[j][i] = acc; - } + int16 X[32][32]; + int i, j, k; + int32 acc; + + /* 1st pass (R32'*B) = vertical */ + for( j = 0; j < 32; j++ ) + for( i = 0; i < 32; i++ ) + { + acc = 128; + for( k = 0; k < 32; k++ ) + acc += (int32) DCT2_64[k * 2][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64=DCT) + + X[j][i] = ( acc >> 8 ); + } + + /* 2nd pass (...)*R32 = horizontal + clipping */ + for( j = 0; j < 32; j++ ) + for( i = 0; i < 32; i++ ) + { + acc = 256; + for( k = 0; k < 32; k++ ) + acc += (int32) X[j][k] * DCT2_64[k * 2][i]; + + acc >>= 9; + if( acc > 127 ) + acc = 127; + if( acc < -127 ) + acc = -127; + B[j][i] = acc; + } } -static void vfgs_make_sei_ff_pattern64(int8 B[][64], int fh, int fv) +static void vfgs_make_sei_ff_pattern64( int8 B[][64], int fh, int fv ) { - int k, l; - uint32 n; - fh = 4*(fh+1); - fv = 4*(fv+1); - - n = Seed_LUT[0]; - memset(B, 0, 64*64*sizeof(int8)); - for (l=0; l<64; l++) - for (k=0; k<64; k+=4) - { - if (k1) ? 44 : 82; - height = (suby>1) ? 38 : 73; - - switch (nb_coef) - { - case 6: - // SEI.AR mode - coef[3][2] = ar_coef[1]; // left - coef[2][3] = (ar_coef[1] * ar_coef[4]) >> scale; // top - coef[2][2] = (ar_coef[3] * ar_coef[4]) >> scale; // top-left - coef[2][4] = (ar_coef[3] * ar_coef[4]) >> scale; // top-right - coef[3][1] = ar_coef[5]; // left-left - coef[1][3] = ((int32)ar_coef[5] * ar_coef[4] * ar_coef[4]) >> (2*scale) ; // top-top - L = 2; - break; - - default: - assert(0); - } - if (nb_coef != 6) - for (k=0, j=-L; j<=0; j++) - for (i=-L; i<=L && (i<0 || j<0); i++, k++) - coef[3+j][3+i] = ar_coef[k]; - - memset(buf, 0, width*height); // debug (not needed) - for (y=0; y=3 && y=3 && x 1 ) ? 44 : 82; + height = ( suby > 1 ) ? 38 : 73; + + switch( nb_coef ) + { + case 6: + // SEI.AR mode + coef[3][2] = ar_coef[1]; // left + coef[2][3] = ( ar_coef[1] * ar_coef[4] ) >> scale; // top + coef[2][2] = ( ar_coef[3] * ar_coef[4] ) >> scale; // top-left + coef[2][4] = ( ar_coef[3] * ar_coef[4] ) >> scale; // top-right + coef[3][1] = ar_coef[5]; // left-left + coef[1][3] = ( (int32) ar_coef[5] * ar_coef[4] * ar_coef[4] ) >> ( 2 * scale ); // top-top + + L = 2; + break; + + default: + assert( 0 ); + } + if( nb_coef != 6 ) + for( k = 0, j = -L; j <= 0; j++ ) + for( i = -L; i <= L && ( i < 0 || j < 0 ); i++, k++ ) + coef[3 + j][3 + i] = ar_coef[k]; + + memset( buf, 0, width * height ); // debug (not needed) + for( y = 0; y < height; y++ ) + for( x = 0; x < width; x++ ) + { + // Filter + g = 0; + if( y >= 3 && y < height && x >= 3 && x < width - 3 ) + { + for( j = -3; j <= 0; j++ ) + for( i = -3; i <= 3 && ( i < 0 || j < 0 ); i++ ) + g += (int) coef[3 + j][3 + i] * buf[width * ( y + j ) + x + i]; + + g = round( g, scale ); + } + + // Add random noise + g += round( Gaussian_LUT[rnd & 2047], shift ); + rnd = prng( rnd ); + + buf[width * y + x] = clip( g, -127, 127 ); + } + + // Copy cropped area to output + memset( P, 0, size * size ); + for( y = 0; y < 64 / suby; y++ ) + for( x = 0; x < 64 / subx; x++ ) + P[size * y + x] = buf[width * ( 3 + 6 / suby + y ) + ( 3 + 6 / subx + x )]; } -int same_pattern(fgs_sei* cfg, int32 a, int32 b) +int same_pattern( fgs_sei* cfg, int32 a, int32 b ) { - int16* coef_a = &cfg->comp_model_value[0][0][0] + a; - int16* coef_b = &cfg->comp_model_value[0][0][0] + b; + int16* coef_a = &cfg->comp_model_value[0][0][0] + a; + int16* coef_b = &cfg->comp_model_value[0][0][0] + b; - for (int i=1; icomp_model_present_flag[c]) - { - for (k=0; knum_intensity_intervals[c]; k++) - { - a = cfg->intensity_interval_lower_bound[c][k]; - uint32 id = SEI_MAX_MODEL_VALUES*(k + 256*c); - - for (i=0; i0; i--) - { - if (intensities[i-1] > a) - { - intensities[i] = intensities[i-1]; - patterns[i] = patterns[i-1]; - } - else - break; - } - intensities[i] = a; - patterns[i] = id; - np ++; - } - } - } - if (c==0 || c==2) - { - // 2. Register the patterns (with correct order) - for (i=0; icomp_model_value[0][0][0] + patterns[i]; - - if (c==0) - { - if (cfg->model_id) - vfgs_make_ar_pattern(Lbuf, P, 64, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[0]); - else - vfgs_make_sei_ff_pattern64((int8 (*)[64])P, coef[1], coef[2]); - - vfgs_set_luma_pattern(i, P); - } - else if (c==2) - { - if (cfg->model_id) - vfgs_make_ar_pattern(Cbuf, P, 32, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[1]); - else - vfgs_make_sei_ff_pattern32((int8 (*)[32])P, coef[1], coef[2]); - - vfgs_set_chroma_pattern(i, P); - } - } - // 3. Fill up LUTs - for (int cc=min(c,1); cc<=c; cc++) - { - if (cfg->comp_model_present_flag[cc]) - { - memset(plut, 255, sizeof(plut)); - // 3a. Fill valid patterns - for (k=0; knum_intensity_intervals[cc]; k++) - { - a = cfg->intensity_interval_lower_bound[cc][k]; - b = cfg->intensity_interval_upper_bound[cc][k]; - uint32 id = SEI_MAX_MODEL_VALUES*(k + 256*cc); - - for (i=0; icomp_model_value[cc][k][0]; - if (ilog2_scale_factor - (cfg->model_id ? 1 : 0)); // -1 for grain shift in pattern generation (see above) + int8 P[64 * 64]; + int8 Lbuf[73 * 82]; + int8 Cbuf[38 * 44]; + uint8 slut[256]; + uint8 plut[256]; + uint8 intensities[VFGS_MAX_PATTERNS]; + uint32 patterns[VFGS_MAX_PATTERNS]; + uint8 np = 0; // number of patterns + uint8 a, b, i; + int c, k; + + for( c = 0; c < 3; c++ ) + { + memset( slut, 0, sizeof( slut ) ); + if( c < 2 ) + { + np = 0; + memset( intensities, 0, sizeof( intensities ) ); + memset( patterns, ~0, sizeof( patterns ) ); + } + // 1. Look for different patterns, up to max supported number + if( cfg->comp_model_present_flag[c] ) + { + for( k = 0; k < cfg->num_intensity_intervals[c]; k++ ) + { + a = cfg->intensity_interval_lower_bound[c][k]; + uint32 id = SEI_MAX_MODEL_VALUES * ( k + 256 * c ); + + for( i = 0; i < VFGS_MAX_PATTERNS; i++ ) + if( same_pattern( cfg, patterns[i], id ) ) + break; + + if( i == VFGS_MAX_PATTERNS && np < VFGS_MAX_PATTERNS ) // can add it + { + // keep them sorted (by intensity). The goal of this sort is + // to enable meaningful pattern interpolation + for( i = np; i > 0; i-- ) + { + if( intensities[i - 1] > a ) + { + intensities[i] = intensities[i - 1]; + patterns[i] = patterns[i - 1]; + } + else + break; + } + intensities[i] = a; + patterns[i] = id; + np++; + } + } + } + if( c == 0 || c == 2 ) + { + // 2. Register the patterns (with correct order) + for( i = 0; i < np; i++ ) + { + int16* coef = &cfg->comp_model_value[0][0][0] + patterns[i]; + + if( c == 0 ) + { + if( cfg->model_id ) + vfgs_make_ar_pattern( Lbuf, P, 64, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[0] ); + else + vfgs_make_sei_ff_pattern64( (int8( * )[64]) P, coef[1], coef[2] ); + + vfgs_set_luma_pattern( i, P ); + } + else if( c == 2 ) + { + if( cfg->model_id ) + vfgs_make_ar_pattern( Cbuf, P, 32, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[1] ); + else + vfgs_make_sei_ff_pattern32( (int8( * )[32]) P, coef[1], coef[2] ); + + vfgs_set_chroma_pattern( i, P ); + } + } + // 3. Fill up LUTs + for( int cc = min( c, 1 ); cc <= c; cc++ ) + { + if( cfg->comp_model_present_flag[cc] ) + { + memset( plut, 255, sizeof( plut ) ); + // 3a. Fill valid patterns + for( k = 0; k < cfg->num_intensity_intervals[cc]; k++ ) + { + a = cfg->intensity_interval_lower_bound[cc][k]; + b = cfg->intensity_interval_upper_bound[cc][k]; + uint32 id = SEI_MAX_MODEL_VALUES * ( k + 256 * cc ); + + for( i = 0; i < VFGS_MAX_PATTERNS; i++ ) + if( same_pattern( cfg, patterns[i], id ) ) + break; + // Note: if not found, could try to find interpolation value + + for( int l = a; l <= b; l++ ) + { + slut[l] = (uint8) cfg->comp_model_value[cc][k][0]; + if( i < VFGS_MAX_PATTERNS ) + plut[l] = i << 4; + } + } + // 3b. Fill holes (no interp. yet, just repeat last) + i = 0; + for( k = 0; k < 256; k++ ) + { + if( plut[k] == 255 ) + plut[k] = i; + else + i = plut[k]; + } + } + else + { + memset( plut, 0, sizeof( plut ) ); + } + // 3c. Register LUTs + vfgs_set_scale_lut( cc, slut ); + vfgs_set_pattern_lut( cc, plut ); + } + } + } + + vfgs_set_scale_shift( cfg->log2_scale_factor - ( cfg->model_id ? 1 : 0 ) ); // -1 for grain shift in pattern generation (see above) } - diff --git a/source/Lib/vvdec/vfgs_fw.h b/source/Lib/vvdec/vfgs_fw.h index cfcad9ea..6100a2e5 100644 --- a/source/Lib/vvdec/vfgs_fw.h +++ b/source/Lib/vvdec/vfgs_fw.h @@ -58,28 +58,28 @@ POSSIBILITY OF SUCH DAMAGE. #define _VFGS_FW_H_ #ifndef int32 -#define int32 signed int -#define uint32 unsigned int -#define int16 signed short -#define uint16 unsigned short -#define int8 signed char -#define uint8 unsigned char +# define int32 signed int +# define uint32 unsigned int +# define int16 signed short +# define uint16 unsigned short +# define int8 signed char +# define uint8 unsigned char #endif #define SEI_MAX_MODEL_VALUES 6 -typedef struct fgs_sei_s { - uint8 model_id; - uint8 log2_scale_factor; - uint8 comp_model_present_flag[3]; - uint16 num_intensity_intervals[3]; - uint8 num_model_values[3]; - uint8 intensity_interval_lower_bound[3][256]; - uint8 intensity_interval_upper_bound[3][256]; - int16 comp_model_value[3][256][SEI_MAX_MODEL_VALUES]; +typedef struct fgs_sei_s +{ + uint8 model_id; + uint8 log2_scale_factor; + uint8 comp_model_present_flag[3]; + uint16 num_intensity_intervals[3]; + uint8 num_model_values[3]; + uint8 intensity_interval_lower_bound[3][256]; + uint8 intensity_interval_upper_bound[3][256]; + int16 comp_model_value[3][256][SEI_MAX_MODEL_VALUES]; } fgs_sei; -void vfgs_init_sei(fgs_sei* cfg); - -#endif // _VFGS_FW_H_ +void vfgs_init_sei( fgs_sei* cfg ); +#endif // _VFGS_FW_H_ diff --git a/source/Lib/vvdec/vfgs_hw.c b/source/Lib/vvdec/vfgs_hw.c index 1cbf434e..9d34ce87 100644 --- a/source/Lib/vvdec/vfgs_hw.c +++ b/source/Lib/vvdec/vfgs_hw.c @@ -55,47 +55,52 @@ POSSIBILITY OF SUCH DAMAGE. */ #include "vfgs_hw.h" -#include // memcpy +#include // memcpy #include -#define min(a,b) ((a)<(b)?(a):(b)) -#define max(a,b) ((a)>(b)?(a):(b)) -#define round(a,s) (((a)+(1<<((s)-1)))>>(s)) +#define min( a, b ) ( ( a ) < ( b ) ? ( a ) : ( b ) ) +#define max( a, b ) ( ( a ) > ( b ) ? ( a ) : ( b ) ) +#define round( a, s ) ( ( ( a ) + ( 1 << ( ( s ) - 1 ) ) ) >> ( s ) ) #define PATTERN_INTERPOLATION 0 // Note: declarations optimized for code readability; e.g. pattern storage in // actual hardware implementation would differ significantly -static int8 pattern[2][VFGS_MAX_PATTERNS+1][64][64] = {0, }; // +1 to simplify interpolation code -static uint8 sLUT[3][256] = {0, }; -static uint8 pLUT[3][256] = {0, }; -static uint32 rnd = 0xdeadbeef; -static uint32 rnd_up = 0xdeadbeef; -static uint32 line_rnd = 0xdeadbeef; +static int8 pattern[2][VFGS_MAX_PATTERNS + 1][64][64] = { + 0, +}; // +1 to simplify interpolation code +static uint8 sLUT[3][256] = { + 0, +}; +static uint8 pLUT[3][256] = { + 0, +}; +static uint32 rnd = 0xdeadbeef; +static uint32 rnd_up = 0xdeadbeef; +static uint32 line_rnd = 0xdeadbeef; static uint32 line_rnd_up = 0xdeadbeef; -static uint8 scale_shift = 5+6; -static uint8 bs = 0; // bitshift = bitdepth - 8 -static uint8 Y_min = 0; -static uint8 Y_max = 255; -static uint8 C_min = 0; -static uint8 C_max = 255; -static int csubx = 2; -static int csuby = 2; - +static uint8 scale_shift = 5 + 6; +static uint8 bs = 0; // bitshift = bitdepth - 8 +static uint8 Y_min = 0; +static uint8 Y_max = 255; +static uint8 C_min = 0; +static uint8 C_max = 255; +static int csubx = 2; +static int csuby = 2; // Processing pipeline (needs only 2 registers for each color actually, for horizontal deblocking) -static int16 grain[3][32]; // 9 bit needed because of overlap (has norm > 1) +static int16 grain[3][32]; // 9 bit needed because of overlap (has norm > 1) static uint8 scale[3][32]; /** Pseudo-random number generator * Note: loops on the 31 MSBs, so seed should be MSB-aligned in the register * (the register LSB has basically no effect since it is never fed back) */ -static uint32 prng(uint32 x) +static uint32 prng( uint32 x ) { - uint32 s = ((x << 30) ^ (x << 2)) & 0x80000000; - x = s | (x >> 1); - return x; + uint32 s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; + x = s | ( x >> 1 ); + return x; } /** Derive Y x/y offsets from (random) number @@ -116,276 +121,276 @@ static uint32 prng(uint32 x) * Note: to fully support cross-component correlation within patterns, we would * need to align luma/chroma offsets. */ -static void get_offset_y(uint32 val, int *s, uint8 *x, uint8 *y) +static void get_offset_y( uint32 val, int* s, uint8* x, uint8* y ) { - uint32 bf; // bit field + uint32 bf; // bit field - *s = ((val >> 31) & 1) ? -1 : 1; + *s = ( ( val >> 31 ) & 1 ) ? -1 : 1; - bf = (val >> 0) & 0x3ff; - *x = ((bf * 13) >> 10) * 4; // 13 = 8 + 4 + 1 (two adders) + bf = ( val >> 0 ) & 0x3ff; + *x = ( ( bf * 13 ) >> 10 ) * 4; // 13 = 8 + 4 + 1 (two adders) - bf = (val >> 14) & 0x3ff; - *y = ((bf * 12) >> 10) * 4; // 12 = 8 + 4 (one adder) - // Note: could shift 9 and * 2, to make a multiple of 2 and make use of all - // pattern samples (when using overlap). + bf = ( val >> 14 ) & 0x3ff; + *y = ( ( bf * 12 ) >> 10 ) * 4; // 12 = 8 + 4 (one adder) + // Note: could shift 9 and * 2, to make a multiple of 2 and make use of all + // pattern samples (when using overlap). } -static void get_offset_u(uint32 val, int *s, uint8 *x, uint8 *y) +static void get_offset_u( uint32 val, int* s, uint8* x, uint8* y ) { - uint32 bf; // bit field + uint32 bf; // bit field - *s = ((val >> 2) & 1) ? -1 : 1; + *s = ( ( val >> 2 ) & 1 ) ? -1 : 1; - bf = (val >> 10) & 0x3ff; - *x = ((bf * 13) >> 10) * (4/csubx); + bf = ( val >> 10 ) & 0x3ff; + *x = ( ( bf * 13 ) >> 10 ) * ( 4 / csubx ); - bf = ((val >> 24) & 0x0ff) | ((val << 8) & 0x300); - *y = ((bf * 12) >> 10) * (4/csuby); + bf = ( ( val >> 24 ) & 0x0ff ) | ( ( val << 8 ) & 0x300 ); + *y = ( ( bf * 12 ) >> 10 ) * ( 4 / csuby ); } -static void get_offset_v(uint32 val, int *s, uint8 *x, uint8 *y) +static void get_offset_v( uint32 val, int* s, uint8* x, uint8* y ) { - uint32 bf; // bit field + uint32 bf; // bit field - *s = ((val >> 15) & 1) ? -1 : 1; + *s = ( ( val >> 15 ) & 1 ) ? -1 : 1; - bf = (val >> 20) & 0x3ff; - *x = ((bf * 13) >> 10) * (4/csubx); + bf = ( val >> 20 ) & 0x3ff; + *x = ( ( bf * 13 ) >> 10 ) * ( 4 / csubx ); - bf = (val >> 4) & 0x3ff; - *y = ((bf * 12) >> 10) * (4/csuby); + bf = ( val >> 4 ) & 0x3ff; + *y = ( ( bf * 12 ) >> 10 ) * ( 4 / csuby ); } -static void add_grain_block(void* I, int c, int x, int y, int width) +static void add_grain_block( void* I, int c, int x, int y, int width ) { - uint8 *I8 = (uint8*)I; - uint16 *I16 = (uint16*)I; - - int s, s_up; // random sign flip (current + upper row) - uint8 ox, oy; // random offset (current) - uint8 ox_up, oy_up; // random offset (upper row) - uint8 oc1, oc2; // overlapping coefficients - uint8 pi; // pattern index integer part - int i, j; - int P; // Pattern sample (from current pattern index) + uint8* I8 = (uint8*) I; + uint16* I16 = (uint16*) I; + + int s, s_up; // random sign flip (current + upper row) + uint8 ox, oy; // random offset (current) + uint8 ox_up, oy_up; // random offset (upper row) + uint8 oc1, oc2; // overlapping coefficients + uint8 pi; // pattern index integer part + int i, j; + int P; // Pattern sample (from current pattern index) #if PATTERN_INTERPOLATION - int Pn; // Next-pattern sample (from pattern index+1) - uint8 pf; // pattern index fractional part + int Pn; // Next-pattern sample (from pattern index+1) + uint8 pf; // pattern index fractional part #endif - uint8 intensity; - int flush = 0; - int subx = c ? csubx : 1; - int suby = c ? csuby : 1; - uint8 I_min = c ? C_min : Y_min; - uint8 I_max = c ? C_max : Y_max; - - if ((y & 1) && suby > 1) - return; - - assert(!(x & 15)); - assert(width > 128); - assert(bs == 0 || bs == 2); - assert(scale_shift + bs >= 8 && scale_shift + bs <= 13); - // TODO: assert subx, suby, Y/C min/max, max pLUT values, etc - - j = y & 0xf; - - if (y > 15 && j == 0) // first line of overlap - { - oc1 = (suby > 1) ? 20 : 12; // current - oc2 = (suby > 1) ? 20 : 24; // upper - } - else if (y > 15 && j == 1) // second line of overlap - { - oc1 = 24; - oc2 = 12; - } - else - { - oc1 = oc2 = 0; - } - - // Derive block offsets + sign - if (c==0) - get_offset_y(rnd, &s, &ox, &oy); - else if (c==1) - get_offset_u(rnd, &s, &ox, &oy); - else - get_offset_v(rnd, &s, &ox, &oy); - oy += j/suby; - - // Same for upper block (overlap) - if (c==0) - get_offset_y(rnd_up, &s_up, &ox_up, &oy_up); - else if (c==1) - get_offset_u(rnd_up, &s_up, &ox_up, &oy_up); - else - get_offset_v(rnd_up, &s_up, &ox_up, &oy_up); - oy_up += (16 + j)/suby; - - // Make grain pattern - for (i=0; i<16/subx; i++) - { - intensity = bs ? I16[x/subx+i] >> bs : I8[x/subx+i]; - pi = pLUT[c][intensity] >> 4; // pattern index (integer part) + uint8 intensity; + int flush = 0; + int subx = c ? csubx : 1; + int suby = c ? csuby : 1; + uint8 I_min = c ? C_min : Y_min; + uint8 I_max = c ? C_max : Y_max; + + if( ( y & 1 ) && suby > 1 ) + return; + + assert( !( x & 15 ) ); + assert( width > 128 ); + assert( bs == 0 || bs == 2 ); + assert( scale_shift + bs >= 8 && scale_shift + bs <= 13 ); + // TODO: assert subx, suby, Y/C min/max, max pLUT values, etc + + j = y & 0xf; + + if( y > 15 && j == 0 ) // first line of overlap + { + oc1 = ( suby > 1 ) ? 20 : 12; // current + oc2 = ( suby > 1 ) ? 20 : 24; // upper + } + else if( y > 15 && j == 1 ) // second line of overlap + { + oc1 = 24; + oc2 = 12; + } + else + { + oc1 = oc2 = 0; + } + + // Derive block offsets + sign + if( c == 0 ) + get_offset_y( rnd, &s, &ox, &oy ); + else if( c == 1 ) + get_offset_u( rnd, &s, &ox, &oy ); + else + get_offset_v( rnd, &s, &ox, &oy ); + oy += j / suby; + + // Same for upper block (overlap) + if( c == 0 ) + get_offset_y( rnd_up, &s_up, &ox_up, &oy_up ); + else if( c == 1 ) + get_offset_u( rnd_up, &s_up, &ox_up, &oy_up ); + else + get_offset_v( rnd_up, &s_up, &ox_up, &oy_up ); + oy_up += ( 16 + j ) / suby; + + // Make grain pattern + for( i = 0; i < 16 / subx; i++ ) + { + intensity = bs ? I16[x / subx + i] >> bs : I8[x / subx + i]; + pi = pLUT[c][intensity] >> 4; // pattern index (integer part) #if PATTERN_INTERPOLATION - pf = pLUT[c][intensity] & 15; // fractional part (interpolate with next) -- could restrict to less bits (e.g. 2) + pf = pLUT[c][intensity] & 15; // fractional part (interpolate with next) -- could restrict to less bits (e.g. 2) #endif - // Pattern - P = pattern[c?1:0][pi ][oy][ox + i] * s; // We could consider just XORing the sign bit + // Pattern + P = pattern[c ? 1 : 0][pi][oy][ox + i] * s; // We could consider just XORing the sign bit #if PATTERN_INTERPOLATION - Pn = pattern[c?1:0][pi+1][oy][ox + i] * s; // But there are equivalent hw tricks, e.g. storing values as sign + amplitude instead of two's complement + Pn = + pattern[c ? 1 : 0][pi + 1][oy][ox + i] * s; // But there are equivalent hw tricks, e.g. storing values as sign + amplitude instead of two's complement #endif - if (oc1) // overlap - { - P = round(P * oc1 + pattern[c?1:0][pi ][oy_up][ox_up + i] * oc2 * s_up, 5); + if( oc1 ) // overlap + { + P = round( P * oc1 + pattern[c ? 1 : 0][pi][oy_up][ox_up + i] * oc2 * s_up, 5 ); #if PATTERN_INTERPOLATION - Pn = round(Pn * oc1 + pattern[c?1:0][pi+1][oy_up][ox_up + i] * oc2 * s_up, 5); + Pn = round( Pn * oc1 + pattern[c ? 1 : 0][pi + 1][oy_up][ox_up + i] * oc2 * s_up, 5 ); #endif - } + } #if PATTERN_INTERPOLATION - // Pattern interpolation: P is current, Pn is next, pf is interpolation coefficient - grain[c][16/subx+i] = round(P * (16-pf) + Pn * pf, 4); + // Pattern interpolation: P is current, Pn is next, pf is interpolation coefficient + grain[c][16 / subx + i] = round( P * ( 16 - pf ) + Pn * pf, 4 ); #else - grain[c][16/subx+i] = P; + grain[c][16 / subx + i] = P; #endif - // Scale sign already integrated above because of overlap - scale[c][16/subx+i] = sLUT[c][intensity]; - } - - // Scale & output - do - { - if (x > 0) - { - int32 g; - int16 l1, l0, r0, r1; - - if (!flush) - { - // Horizontal deblock (across previous block) - l1 = grain[c][16/subx -2]; - l0 = grain[c][16/subx -1]; - r0 = grain[c][16/subx +0]; - r1 = grain[c][16/subx +1]; - grain[c][16/subx -1] = round(l1 + 3*l0 + r0, 2); - grain[c][16/subx +0] = round(l0 + 3*r0 + r1, 2); - } - for (i=0; i<16/subx; i++) - { - // Output previous block (or flush current) - g = round(scale[c][i] * (int16)grain[c][i], scale_shift); - if (bs) - I16[(x-16)/subx+i] = max(I_min<= width) - { - flush ++; - x += 16; - } - } while (flush == 1); + // Scale sign already integrated above because of overlap + scale[c][16 / subx + i] = sLUT[c][intensity]; + } + + // Scale & output + do + { + if( x > 0 ) + { + int32 g; + int16 l1, l0, r0, r1; + + if( !flush ) + { + // Horizontal deblock (across previous block) + l1 = grain[c][16 / subx - 2]; + l0 = grain[c][16 / subx - 1]; + r0 = grain[c][16 / subx + 0]; + r1 = grain[c][16 / subx + 1]; + grain[c][16 / subx - 1] = round( l1 + 3 * l0 + r0, 2 ); + grain[c][16 / subx + 0] = round( l0 + 3 * r0 + r1, 2 ); + } + for( i = 0; i < 16 / subx; i++ ) + { + // Output previous block (or flush current) + g = round( scale[c][i] * (int16) grain[c][i], scale_shift ); + if( bs ) + I16[( x - 16 ) / subx + i] = max( I_min << bs, min( I_max << bs, I16[( x - 16 ) / subx + i] + g ) ); + else + I8[( x - 16 ) / subx + i] = max( I_min, min( I_max, I8[( x - 16 ) / subx + i] + g ) ); + } + } + + // Shift pipeline + for( i = 0; i < 16 / subx && !flush; i++ ) + { + grain[c][i] = grain[c][i + 16 / subx]; + scale[c][i] = scale[c][i + 16 / subx]; + } + + if( x + 16 >= width ) + { + flush++; + x += 16; + } + } while( flush == 1 ); } /* Public interface ***********************************************************/ -void vfgs_add_grain_line(void* Y, void* U, void* V, int y, int width) +void vfgs_add_grain_line( void* Y, void* U, void* V, int y, int width ) { - // Generate / backup / restore per-line random seeds (needed to make multi-line blocks) - if (y && (y & 0x0f) == 0) - { - // new line of blocks --> backup + copy current to upper - line_rnd_up = line_rnd; - line_rnd = rnd; - } - rnd_up = line_rnd_up; - rnd = line_rnd; - - // Process line - for (int x=0; x backup + copy current to upper + line_rnd_up = line_rnd; + line_rnd = rnd; + } + rnd_up = line_rnd_up; + rnd = line_rnd; + + // Process line + for( int x = 0; x < width; x += 16 ) + { + // Process pixels for each color component + add_grain_block( Y, 0, x, y, width ); + add_grain_block( U, 1, x, y, width ); + add_grain_block( V, 2, x, y, width ); + + // Crank random generator + rnd = prng( rnd ); + rnd_up = prng( rnd_up ); // upper block (overlapping) + } } -void vfgs_set_luma_pattern(int index, int8* P) +void vfgs_set_luma_pattern( int index, int8* P ) { - assert(index >= 0 && index < 8); - memcpy(pattern[0][index], P, 64*64); + assert( index >= 0 && index < 8 ); + memcpy( pattern[0][index], P, 64 * 64 ); } -void vfgs_set_chroma_pattern(int index, int8 *P) +void vfgs_set_chroma_pattern( int index, int8* P ) { - assert(index >= 0 && index < 8); - for (int i=0; i<64/csuby; i++) - memcpy(pattern[1][index][i], P + (64/csuby)*i, 64/csubx); + assert( index >= 0 && index < 8 ); + for( int i = 0; i < 64 / csuby; i++ ) + memcpy( pattern[1][index][i], P + ( 64 / csuby ) * i, 64 / csubx ); } -void vfgs_set_scale_lut(int c, uint8 lut[]) +void vfgs_set_scale_lut( int c, uint8 lut[] ) { - assert(c>=0 && c<3); - memcpy(sLUT[c], lut, 256); + assert( c >= 0 && c < 3 ); + memcpy( sLUT[c], lut, 256 ); } -void vfgs_set_pattern_lut(int c, uint8 lut[]) +void vfgs_set_pattern_lut( int c, uint8 lut[] ) { - assert(c>=0 && c<3); - memcpy(pLUT[c], lut, 256); + assert( c >= 0 && c < 3 ); + memcpy( pLUT[c], lut, 256 ); } -void vfgs_set_seed(uint32 seed) +void vfgs_set_seed( uint32 seed ) { - // Note: shift left the seed as the LFSR loops on the 31 MSBs, so - // the LFSR register LSB has no effect on random sequence initialization - rnd = rnd_up = line_rnd = line_rnd_up = (seed << 1); + // Note: shift left the seed as the LFSR loops on the 31 MSBs, so + // the LFSR register LSB has no effect on random sequence initialization + rnd = rnd_up = line_rnd = line_rnd_up = ( seed << 1 ); } -void vfgs_set_scale_shift(int shift) +void vfgs_set_scale_shift( int shift ) { - assert(shift >= 2 && shift < 8); - scale_shift = shift + 6 - bs; + assert( shift >= 2 && shift < 8 ); + scale_shift = shift + 6 - bs; } -void vfgs_set_depth(int depth) +void vfgs_set_depth( int depth ) { - assert(depth==8 || depth==10); + assert( depth == 8 || depth == 10 ); - if (bs==0 && depth>8) - scale_shift -= 2; - if (bs==2 && depth==8) - scale_shift += 2; + if( bs == 0 && depth > 8 ) + scale_shift -= 2; + if( bs == 2 && depth == 8 ) + scale_shift += 2; - bs = depth - 8; + bs = depth - 8; } -void vfgs_set_chroma_subsampling(int subx, int suby) +void vfgs_set_chroma_subsampling( int subx, int suby ) { - assert(subx==1 || subx==2); - assert(suby==1 || suby==2); - csubx = subx; - csuby = suby; + assert( subx == 1 || subx == 2 ); + assert( suby == 1 || suby == 2 ); + csubx = subx; + csuby = suby; } - diff --git a/source/Lib/vvdec/vfgs_hw.h b/source/Lib/vvdec/vfgs_hw.h index 4e81ab4e..0bdb8d05 100644 --- a/source/Lib/vvdec/vfgs_hw.h +++ b/source/Lib/vvdec/vfgs_hw.h @@ -58,27 +58,26 @@ POSSIBILITY OF SUCH DAMAGE. #define _VFGS_HW_H_ #ifndef int32 -#define int32 signed int -#define uint32 unsigned int -#define int16 signed short -#define uint16 unsigned short -#define int8 signed char -#define uint8 unsigned char +# define int32 signed int +# define uint32 unsigned int +# define int16 signed short +# define uint16 unsigned short +# define int8 signed char +# define uint8 unsigned char #endif #define VFGS_MAX_PATTERNS 8 -void vfgs_set_luma_pattern(int index, int8* P); -void vfgs_set_chroma_pattern(int index, int8 *P); -void vfgs_set_scale_lut(int c, uint8 lut[]); -void vfgs_set_pattern_lut(int c, uint8 lut[]); +void vfgs_set_luma_pattern( int index, int8* P ); +void vfgs_set_chroma_pattern( int index, int8* P ); +void vfgs_set_scale_lut( int c, uint8 lut[] ); +void vfgs_set_pattern_lut( int c, uint8 lut[] ); -void vfgs_set_seed(uint32 seed); -void vfgs_set_scale_shift(int shift); -void vfgs_set_depth(int depth); -void vfgs_set_chroma_subsampling(int subx, int suby); +void vfgs_set_seed( uint32 seed ); +void vfgs_set_scale_shift( int shift ); +void vfgs_set_depth( int depth ); +void vfgs_set_chroma_subsampling( int subx, int suby ); -void vfgs_add_grain_line(void* Y, void* U, void* V, int y, int width); - -#endif // _VFGS_HW_H_ +void vfgs_add_grain_line( void* Y, void* U, void* V, int y, int width ); +#endif // _VFGS_HW_H_ diff --git a/source/Lib/vvdec/vvdecimpl.cpp b/source/Lib/vvdec/vvdecimpl.cpp index 2bf94f62..45238681 100644 --- a/source/Lib/vvdec/vvdecimpl.cpp +++ b/source/Lib/vvdec/vvdecimpl.cpp @@ -827,98 +827,98 @@ int VVDecImpl::copyComp( const unsigned char* pucSrc, unsigned char* pucDest, un } #if ENABLE_FILM_GRAIN -int VVDecImpl::xUpdateFGC( vvdecSEI *s ) +int VVDecImpl::xUpdateFGC( vvdecSEI* s ) { - vvdecSEIFilmGrainCharacteristics* sei =(vvdecSEIFilmGrainCharacteristics*)s->payload; + vvdecSEIFilmGrainCharacteristics* sei = (vvdecSEIFilmGrainCharacteristics*) s->payload; - if (!sei->filmGrainCharacteristicsCancelFlag) + if( !sei->filmGrainCharacteristicsCancelFlag ) + { + fgs_sei fgs; // TODO: maybe make it a member ? (idea would be to re-seed patterns for each picture) + // Copy SEI message in vfgs structure format + // TODO: check some values and warn about unsupported stuff ? + fgs.model_id = sei->filmGrainModelId; + fgs.log2_scale_factor = sei->log2ScaleFactor; + for( int c = 0; c < 3; c++ ) + { + vvdecCompModel& cm = sei->compModel[c]; + if( cm.presentFlag ) + { + fgs.comp_model_present_flag[c] = 1; + fgs.num_intensity_intervals[c] = cm.numIntensityIntervals; + fgs.num_model_values[c] = cm.numModelValues; + for( int i = 0; i < fgs.num_intensity_intervals[c]; i++ ) + { + vvdecCompModelIntensityValues& cmiv = cm.intensityValues[i]; + fgs.intensity_interval_lower_bound[c][i] = cmiv.intensityIntervalLowerBound; + fgs.intensity_interval_upper_bound[c][i] = cmiv.intensityIntervalUpperBound; + for( int v = 0; v < fgs.num_model_values[c]; v++ ) + fgs.comp_model_value[c][i][v] = cmiv.compModelValue[v]; + // Fill with default model values (VFGS needs them; it actually ignores num_model_values) + if( fgs.num_model_values[c] < 2 ) fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; // H high cutoff / 1st AR coef (left & top) + if( fgs.num_model_values[c] < 3 ) fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; // V high cutoff / x-comp corr + if( fgs.num_model_values[c] < 4 ) fgs.comp_model_value[c][i][3] = 0; // H low cutoff / 2nd AR coef (top-left, top-right) + if( fgs.num_model_values[c] < 5 ) fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; // V low cutoff / aspect ratio + if( fgs.num_model_values[c] < 5 ) fgs.comp_model_value[c][i][5] = 0; // x-comp corr / 3rd AR coef (left-left, top-top) + } + } + } + + vfgs_set_depth( 10 ); + vfgs_set_chroma_subsampling( 2, 2 ); + // Conversion of component model values for 4:2:0 chroma format + if( fgs.model_id == 0 ) { - fgs_sei fgs; // TODO: maybe make it a member ? (idea would be to re-seed patterns for each picture) - // Copy SEI message in vfgs structure format - // TODO: check some values and warn about unsupported stuff ? - fgs.model_id = sei->filmGrainModelId; - fgs.log2_scale_factor = sei->log2ScaleFactor; - for (int c=0; c<3; c++) - { - vvdecCompModel &cm = sei->compModel[c]; - if (cm.presentFlag) - { - fgs.comp_model_present_flag[c] = 1; - fgs.num_intensity_intervals[c] = cm.numIntensityIntervals; - fgs.num_model_values[c] = cm.numModelValues; - for (int i=0; i>= 1; - } - } - vfgs_init_sei(&fgs); - - // if (!m_bFgs) - // // TODO: get something random - // // TODO: make seed also impact the pattern gen - // vfgs_set_seed(uint32 seed); - - m_eFgs = sei->filmGrainCharacteristicsPersistenceFlag ? 2 : 1; - } - else - { - m_eFgs = 0; - } - - return VVDEC_OK; + for( int c = 1; c < 3; c++ ) + if( fgs.comp_model_present_flag[c] ) + for( int k = 0; k < fgs.num_intensity_intervals[c]; k++ ) + { + fgs.comp_model_value[c][k][1] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][1] << 1 ) ); // Horizontal frequency + fgs.comp_model_value[c][k][2] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][2] << 1 ) ); // Vertical frequency + fgs.comp_model_value[c][k][0] >>= 1; + } + } + vfgs_init_sei( &fgs ); + + // if (!m_bFgs) + // // TODO: get something random + // // TODO: make seed also impact the pattern gen + // vfgs_set_seed(uint32 seed); + + m_eFgs = sei->filmGrainCharacteristicsPersistenceFlag ? 2 : 1; + } + else + { + m_eFgs = 0; + } + + return VVDEC_OK; } -int VVDecImpl::xAddGrain( vvdecFrame *frame ) +int VVDecImpl::xAddGrain( vvdecFrame* frame ) { - if (m_eFgs) - { - uint8 *Y = (uint8*)frame->planes[0].ptr; - uint8 *U = (uint8*)frame->planes[1].ptr; - uint8 *V = (uint8*)frame->planes[2].ptr; - - CHECK(frame->bitDepth != 10, "Bitdepth is not 10"); - - for (int y=0; yplanes[0].height; y++) - { - vfgs_add_grain_line(Y, U, V, y, frame->planes[0].width); - Y += frame->planes[0].stride; - if ((y & 1) || (frame->planes[0].height == frame->planes[1].height)) - { - U += frame->planes[1].stride; - V += frame->planes[1].stride; - } - } - - if (m_eFgs < 2) // Not persistent - m_eFgs = 0; + if( m_eFgs ) + { + uint8* Y = (uint8*) frame->planes[0].ptr; + uint8* U = (uint8*) frame->planes[1].ptr; + uint8* V = (uint8*) frame->planes[2].ptr; + + CHECK( frame->bitDepth != 10, "Bitdepth is not 10" ); + + for( int y = 0; y < frame->planes[0].height; y++ ) + { + vfgs_add_grain_line( Y, U, V, y, frame->planes[0].width ); + Y += frame->planes[0].stride; + if( ( y & 1 ) || ( frame->planes[0].height == frame->planes[1].height ) ) + { + U += frame->planes[1].stride; + V += frame->planes[1].stride; + } } - return VVDEC_OK; + + if( m_eFgs < 2 ) // Not persistent + m_eFgs = 0; + } + return VVDEC_OK; } #endif // ENABLE_FILM_GRAIN From 026a89b9d5c54fcbe1f87c4798d37bed64b1d0e2 Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Mon, 3 Jun 2024 13:21:49 +0200 Subject: [PATCH 2/8] clang-format InsertBraces --- source/Lib/vvdec/vfgs_fw.c | 72 ++++++++++++++++++++++++++++++++++ source/Lib/vvdec/vfgs_hw.c | 24 ++++++++++++ source/Lib/vvdec/vvdecimpl.cpp | 18 ++++++--- 3 files changed, 109 insertions(+), 5 deletions(-) diff --git a/source/Lib/vvdec/vfgs_fw.c b/source/Lib/vvdec/vfgs_fw.c index cd424a61..993b1e77 100644 --- a/source/Lib/vvdec/vfgs_fw.c +++ b/source/Lib/vvdec/vfgs_fw.c @@ -324,30 +324,42 @@ static void idct2_64( int8 B[][64] ) /* 1st pass (DCT2_64'*B) = vertical */ for( j = 0; j < 64; j++ ) + { for( i = 0; i < 64; i++ ) { acc = 256; for( k = 0; k < 64; k++ ) + { acc += (int32) DCT2_64[k][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64) + } X[j][i] = ( acc >> 9 ); } + } /* 2nd pass (...)*DCT2_64 = horizontal + clipping */ for( j = 0; j < 64; j++ ) + { for( i = 0; i < 64; i++ ) { acc = 256; for( k = 0; k < 64; k++ ) + { acc += (int32) X[j][k] * DCT2_64[k][i]; + } acc >>= 9; if( acc > 127 ) + { acc = 127; + } if( acc < -127 ) + { acc = -127; + } B[j][i] = acc; } + } } /** Apply iDCT2 to block B[32][32] + clipping */ @@ -359,30 +371,42 @@ static void idct2_32( int8 B[][32] ) /* 1st pass (R32'*B) = vertical */ for( j = 0; j < 32; j++ ) + { for( i = 0; i < 32; i++ ) { acc = 128; for( k = 0; k < 32; k++ ) + { acc += (int32) DCT2_64[k * 2][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64=DCT) + } X[j][i] = ( acc >> 8 ); } + } /* 2nd pass (...)*R32 = horizontal + clipping */ for( j = 0; j < 32; j++ ) + { for( i = 0; i < 32; i++ ) { acc = 256; for( k = 0; k < 32; k++ ) + { acc += (int32) X[j][k] * DCT2_64[k * 2][i]; + } acc >>= 9; if( acc > 127 ) + { acc = 127; + } if( acc < -127 ) + { acc = -127; + } B[j][i] = acc; } + } } static void vfgs_make_sei_ff_pattern64( int8 B[][64], int fh, int fv ) @@ -395,6 +419,7 @@ static void vfgs_make_sei_ff_pattern64( int8 B[][64], int fh, int fv ) n = Seed_LUT[0]; memset( B, 0, 64 * 64 * sizeof( int8 ) ); for( l = 0; l < 64; l++ ) + { for( k = 0; k < 64; k += 4 ) { if( k < fh && l < fv ) @@ -406,6 +431,7 @@ static void vfgs_make_sei_ff_pattern64( int8 B[][64], int fh, int fv ) } n = prng( n ); } + } B[0][0] = 0; idct2_64( B ); } @@ -420,6 +446,7 @@ static void vfgs_make_sei_ff_pattern32( int8 B[][32], int fh, int fv ) n = Seed_LUT[1]; memset( B, 0, 32 * 32 * sizeof( int8 ) ); for( l = 0; l < 32; l++ ) + { for( k = 0; k < 32; k += 2 ) { if( k < fh && l < fv ) @@ -429,6 +456,7 @@ static void vfgs_make_sei_ff_pattern32( int8 B[][32], int fh, int fv ) } n = prng( n ); } + } B[0][0] = 0; idct2_32( B ); } @@ -465,12 +493,19 @@ static void vfgs_make_ar_pattern( int8 buf[], int8 P[], int size, const int16 ar assert( 0 ); } if( nb_coef != 6 ) + { for( k = 0, j = -L; j <= 0; j++ ) + { for( i = -L; i <= L && ( i < 0 || j < 0 ); i++, k++ ) + { coef[3 + j][3 + i] = ar_coef[k]; + } + } + } memset( buf, 0, width * height ); // debug (not needed) for( y = 0; y < height; y++ ) + { for( x = 0; x < width; x++ ) { // Filter @@ -478,8 +513,12 @@ static void vfgs_make_ar_pattern( int8 buf[], int8 P[], int size, const int16 ar if( y >= 3 && y < height && x >= 3 && x < width - 3 ) { for( j = -3; j <= 0; j++ ) + { for( i = -3; i <= 3 && ( i < 0 || j < 0 ); i++ ) + { g += (int) coef[3 + j][3 + i] * buf[width * ( y + j ) + x + i]; + } + } g = round( g, scale ); } @@ -490,12 +529,17 @@ static void vfgs_make_ar_pattern( int8 buf[], int8 P[], int size, const int16 ar buf[width * y + x] = clip( g, -127, 127 ); } + } // Copy cropped area to output memset( P, 0, size * size ); for( y = 0; y < 64 / suby; y++ ) + { for( x = 0; x < 64 / subx; x++ ) + { P[size * y + x] = buf[width * ( 3 + 6 / suby + y ) + ( 3 + 6 / subx + x )]; + } + } } int same_pattern( fgs_sei* cfg, int32 a, int32 b ) @@ -504,8 +548,12 @@ int same_pattern( fgs_sei* cfg, int32 a, int32 b ) int16* coef_b = &cfg->comp_model_value[0][0][0] + b; for( int i = 1; i < SEI_MAX_MODEL_VALUES; i++ ) + { if( coef_a[i] != coef_b[i] ) + { return 0; + } + } return 1; } @@ -542,8 +590,12 @@ void vfgs_init_sei( fgs_sei* cfg ) uint32 id = SEI_MAX_MODEL_VALUES * ( k + 256 * c ); for( i = 0; i < VFGS_MAX_PATTERNS; i++ ) + { if( same_pattern( cfg, patterns[i], id ) ) + { break; + } + } if( i == VFGS_MAX_PATTERNS && np < VFGS_MAX_PATTERNS ) // can add it { @@ -557,7 +609,9 @@ void vfgs_init_sei( fgs_sei* cfg ) patterns[i] = patterns[i - 1]; } else + { break; + } } intensities[i] = a; patterns[i] = id; @@ -575,18 +629,26 @@ void vfgs_init_sei( fgs_sei* cfg ) if( c == 0 ) { if( cfg->model_id ) + { vfgs_make_ar_pattern( Lbuf, P, 64, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[0] ); + } else + { vfgs_make_sei_ff_pattern64( (int8( * )[64]) P, coef[1], coef[2] ); + } vfgs_set_luma_pattern( i, P ); } else if( c == 2 ) { if( cfg->model_id ) + { vfgs_make_ar_pattern( Cbuf, P, 32, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[1] ); + } else + { vfgs_make_sei_ff_pattern32( (int8( * )[32]) P, coef[1], coef[2] ); + } vfgs_set_chroma_pattern( i, P ); } @@ -605,15 +667,21 @@ void vfgs_init_sei( fgs_sei* cfg ) uint32 id = SEI_MAX_MODEL_VALUES * ( k + 256 * cc ); for( i = 0; i < VFGS_MAX_PATTERNS; i++ ) + { if( same_pattern( cfg, patterns[i], id ) ) + { break; + } + } // Note: if not found, could try to find interpolation value for( int l = a; l <= b; l++ ) { slut[l] = (uint8) cfg->comp_model_value[cc][k][0]; if( i < VFGS_MAX_PATTERNS ) + { plut[l] = i << 4; + } } } // 3b. Fill holes (no interp. yet, just repeat last) @@ -621,9 +689,13 @@ void vfgs_init_sei( fgs_sei* cfg ) for( k = 0; k < 256; k++ ) { if( plut[k] == 255 ) + { plut[k] = i; + } else + { i = plut[k]; + } } } else diff --git a/source/Lib/vvdec/vfgs_hw.c b/source/Lib/vvdec/vfgs_hw.c index 9d34ce87..791a958d 100644 --- a/source/Lib/vvdec/vfgs_hw.c +++ b/source/Lib/vvdec/vfgs_hw.c @@ -187,7 +187,9 @@ static void add_grain_block( void* I, int c, int x, int y, int width ) uint8 I_max = c ? C_max : Y_max; if( ( y & 1 ) && suby > 1 ) + { return; + } assert( !( x & 15 ) ); assert( width > 128 ); @@ -214,20 +216,32 @@ static void add_grain_block( void* I, int c, int x, int y, int width ) // Derive block offsets + sign if( c == 0 ) + { get_offset_y( rnd, &s, &ox, &oy ); + } else if( c == 1 ) + { get_offset_u( rnd, &s, &ox, &oy ); + } else + { get_offset_v( rnd, &s, &ox, &oy ); + } oy += j / suby; // Same for upper block (overlap) if( c == 0 ) + { get_offset_y( rnd_up, &s_up, &ox_up, &oy_up ); + } else if( c == 1 ) + { get_offset_u( rnd_up, &s_up, &ox_up, &oy_up ); + } else + { get_offset_v( rnd_up, &s_up, &ox_up, &oy_up ); + } oy_up += ( 16 + j ) / suby; // Make grain pattern @@ -288,9 +302,13 @@ static void add_grain_block( void* I, int c, int x, int y, int width ) // Output previous block (or flush current) g = round( scale[c][i] * (int16) grain[c][i], scale_shift ); if( bs ) + { I16[( x - 16 ) / subx + i] = max( I_min << bs, min( I_max << bs, I16[( x - 16 ) / subx + i] + g ) ); + } else + { I8[( x - 16 ) / subx + i] = max( I_min, min( I_max, I8[( x - 16 ) / subx + i] + g ) ); + } } } @@ -347,7 +365,9 @@ void vfgs_set_chroma_pattern( int index, int8* P ) { assert( index >= 0 && index < 8 ); for( int i = 0; i < 64 / csuby; i++ ) + { memcpy( pattern[1][index][i], P + ( 64 / csuby ) * i, 64 / csubx ); + } } void vfgs_set_scale_lut( int c, uint8 lut[] ) @@ -380,9 +400,13 @@ void vfgs_set_depth( int depth ) assert( depth == 8 || depth == 10 ); if( bs == 0 && depth > 8 ) + { scale_shift -= 2; + } if( bs == 2 && depth == 8 ) + { scale_shift += 2; + } bs = depth - 8; } diff --git a/source/Lib/vvdec/vvdecimpl.cpp b/source/Lib/vvdec/vvdecimpl.cpp index 45238681..fb7e1fa4 100644 --- a/source/Lib/vvdec/vvdecimpl.cpp +++ b/source/Lib/vvdec/vvdecimpl.cpp @@ -852,13 +852,15 @@ int VVDecImpl::xUpdateFGC( vvdecSEI* s ) fgs.intensity_interval_lower_bound[c][i] = cmiv.intensityIntervalLowerBound; fgs.intensity_interval_upper_bound[c][i] = cmiv.intensityIntervalUpperBound; for( int v = 0; v < fgs.num_model_values[c]; v++ ) + { fgs.comp_model_value[c][i][v] = cmiv.compModelValue[v]; + } // Fill with default model values (VFGS needs them; it actually ignores num_model_values) - if( fgs.num_model_values[c] < 2 ) fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; // H high cutoff / 1st AR coef (left & top) - if( fgs.num_model_values[c] < 3 ) fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; // V high cutoff / x-comp corr - if( fgs.num_model_values[c] < 4 ) fgs.comp_model_value[c][i][3] = 0; // H low cutoff / 2nd AR coef (top-left, top-right) - if( fgs.num_model_values[c] < 5 ) fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; // V low cutoff / aspect ratio - if( fgs.num_model_values[c] < 5 ) fgs.comp_model_value[c][i][5] = 0; // x-comp corr / 3rd AR coef (left-left, top-top) + if( fgs.num_model_values[c] < 2 ) { fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; } // H high cutoff / 1st AR coef (left & top) + if( fgs.num_model_values[c] < 3 ) { fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; } // V high cutoff / x-comp corr + if( fgs.num_model_values[c] < 4 ) { fgs.comp_model_value[c][i][3] = 0; } // H low cutoff / 2nd AR coef (top-left, top-right) + if( fgs.num_model_values[c] < 5 ) { fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; } // V low cutoff / aspect ratio + if( fgs.num_model_values[c] < 5 ) { fgs.comp_model_value[c][i][5] = 0; } // x-comp corr / 3rd AR coef (left-left, top-top) } } } @@ -869,13 +871,17 @@ int VVDecImpl::xUpdateFGC( vvdecSEI* s ) if( fgs.model_id == 0 ) { for( int c = 1; c < 3; c++ ) + { if( fgs.comp_model_present_flag[c] ) + { for( int k = 0; k < fgs.num_intensity_intervals[c]; k++ ) { fgs.comp_model_value[c][k][1] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][1] << 1 ) ); // Horizontal frequency fgs.comp_model_value[c][k][2] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][2] << 1 ) ); // Vertical frequency fgs.comp_model_value[c][k][0] >>= 1; } + } + } } vfgs_init_sei( &fgs ); @@ -916,7 +922,9 @@ int VVDecImpl::xAddGrain( vvdecFrame* frame ) } if( m_eFgs < 2 ) // Not persistent + { m_eFgs = 0; + } } return VVDEC_OK; } From 5815d01bd17aa718a3e36d1736c20549b537742e Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Mon, 3 Jun 2024 13:27:24 +0200 Subject: [PATCH 3/8] use standard int types --- source/Lib/vvdec/vfgs_fw.c | 126 ++++++++++++++++----------------- source/Lib/vvdec/vfgs_fw.h | 25 +++---- source/Lib/vvdec/vfgs_hw.c | 104 +++++++++++++-------------- source/Lib/vvdec/vfgs_hw.h | 19 ++--- source/Lib/vvdec/vvdecimpl.cpp | 8 +-- 5 files changed, 134 insertions(+), 148 deletions(-) diff --git a/source/Lib/vvdec/vfgs_fw.c b/source/Lib/vvdec/vfgs_fw.c index 993b1e77..5b9af023 100644 --- a/source/Lib/vvdec/vfgs_fw.c +++ b/source/Lib/vvdec/vfgs_fw.c @@ -64,7 +64,7 @@ POSSIBILITY OF SUCH DAMAGE. #define clip( x, lo, hi ) ( ( x ) > ( hi ) ? hi : ( x ) < ( lo ) ? ( lo ) : ( x ) ) // clang-format off -static const int8 Gaussian_LUT[2048] = { +static const int8_t Gaussian_LUT[2048] = { -11, 12, 103, -11, 42, -35, 12, 59, 77, 98, -87, 3, 65, -78, 45, 56, -51, 21, 13, -11, -20, -19, 33,-127, 17, -6,-105, 18, 19, 71, 48, -10, -38, 42, -2, 75, -67, 52, -90, 33, -47, 21, -3, -56, 49, 1, -57, -42, @@ -195,7 +195,7 @@ static const int8 Gaussian_LUT[2048] = { 19, 2,-111, 4, -66, -81, 122, -20, -34, -37, -84, 127, 68, 46, 17, 47 }; -static const uint32 Seed_LUT[256] = { +static const uint32_t Seed_LUT[256] = { 747538460, 1088979410, 1744950180, 1767011913, 1403382928, 521866116, 1060417601, 2110622736, 1557184770, 105289385, 585624216, 1827676546, 1191843873, 1018104344, 1123590530, 663361569, 2023850500, 76561770, 1226763489, 80325252, 1992581442, 502705249, 740409860, 516219202, @@ -298,29 +298,29 @@ static const uint32 Seed_LUT[256] = { { ck, -cj, ci, -ch, cg, -cf, ce, -cd, cc, -cb, ca, -bz, by, -bx, bw, -bv, bu, -bt, bs, -br, bq, -bp, bo, -bn, bm, -bl, bk, -bj, bi, -bh, bg, -bf, bf, -bg, bh, -bi, bj, -bk, bl, -bm, bn, -bo, bp, -bq, br, -bs, bt, -bu, bv, -bw, bx, -by, bz, -ca, cb, -cc, cd, -ce, cf, -cg, ch, -ci, cj, -ck }, \ } -static const int8 DCT2_64[64][64] = \ +static const int8_t DCT2_64[64][64] = \ DEFINE_DCT2_P64_MATRIX(64, 83, 36, 89, 75, 50, 18, 90, 87, 80, 70, 57, 43, 25, 9, 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4, 91, 90, 90, 90, 88, 87, 86, 84, 83, 81, 79, 77, 73, 71, 69, 65, 62, 59, 56, 52, 48, 44, 41, 37, 33, 28, 24, 20, 15, 11, 7, 2); // clang-format on /** Pseudo-random number generator (32-bit) */ -static uint32 prng( uint32 x ) +static uint32_t prng( uint32_t x ) { #if 1 // same as HW (bit-reversed RDD-5) - uint32 s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; - x = s | ( x >> 1 ); + uint32_t s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; + x = s | ( x >> 1 ); #else // RDD-5 - uint32 s = ( ( x >> 30 ) ^ ( x >> 2 ) ) & 1; - x = ( x << 1 ) | s; + uint32_t s = ( ( x >> 30 ) ^ ( x >> 2 ) ) & 1; + x = ( x << 1 ) | s; #endif return x; } /** Apply iDCT2 to block B[64][64] + clipping */ -static void idct2_64( int8 B[][64] ) +static void idct2_64( int8_t B[][64] ) { - int16 X[64][64]; - int i, j, k; - int32 acc; + int16_t X[64][64]; + int i, j, k; + int32_t acc; /* 1st pass (DCT2_64'*B) = vertical */ for( j = 0; j < 64; j++ ) @@ -330,7 +330,7 @@ static void idct2_64( int8 B[][64] ) acc = 256; for( k = 0; k < 64; k++ ) { - acc += (int32) DCT2_64[k][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64) + acc += (int32_t) DCT2_64[k][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64) } X[j][i] = ( acc >> 9 ); @@ -345,7 +345,7 @@ static void idct2_64( int8 B[][64] ) acc = 256; for( k = 0; k < 64; k++ ) { - acc += (int32) X[j][k] * DCT2_64[k][i]; + acc += (int32_t) X[j][k] * DCT2_64[k][i]; } acc >>= 9; @@ -363,11 +363,11 @@ static void idct2_64( int8 B[][64] ) } /** Apply iDCT2 to block B[32][32] + clipping */ -static void idct2_32( int8 B[][32] ) +static void idct2_32( int8_t B[][32] ) { - int16 X[32][32]; - int i, j, k; - int32 acc; + int16_t X[32][32]; + int i, j, k; + int32_t acc; /* 1st pass (R32'*B) = vertical */ for( j = 0; j < 32; j++ ) @@ -377,7 +377,7 @@ static void idct2_32( int8 B[][32] ) acc = 128; for( k = 0; k < 32; k++ ) { - acc += (int32) DCT2_64[k * 2][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64=DCT) + acc += (int32_t) DCT2_64[k * 2][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64=DCT) } X[j][i] = ( acc >> 8 ); @@ -392,7 +392,7 @@ static void idct2_32( int8 B[][32] ) acc = 256; for( k = 0; k < 32; k++ ) { - acc += (int32) X[j][k] * DCT2_64[k * 2][i]; + acc += (int32_t) X[j][k] * DCT2_64[k * 2][i]; } acc >>= 9; @@ -409,15 +409,15 @@ static void idct2_32( int8 B[][32] ) } } -static void vfgs_make_sei_ff_pattern64( int8 B[][64], int fh, int fv ) +static void vfgs_make_sei_ff_pattern64( int8_t B[][64], int fh, int fv ) { - int k, l; - uint32 n; + int k, l; + uint32_t n; fh = 4 * ( fh + 1 ); fv = 4 * ( fv + 1 ); n = Seed_LUT[0]; - memset( B, 0, 64 * 64 * sizeof( int8 ) ); + memset( B, 0, 64 * 64 * sizeof( int8_t ) ); for( l = 0; l < 64; l++ ) { for( k = 0; k < 64; k += 4 ) @@ -436,15 +436,15 @@ static void vfgs_make_sei_ff_pattern64( int8 B[][64], int fh, int fv ) idct2_64( B ); } -static void vfgs_make_sei_ff_pattern32( int8 B[][32], int fh, int fv ) +static void vfgs_make_sei_ff_pattern32( int8_t B[][32], int fh, int fv ) { - int k, l; - uint32 n; + int k, l; + uint32_t n; fh = 2 * ( fh + 1 ); fv = 2 * ( fv + 1 ); n = Seed_LUT[1]; - memset( B, 0, 32 * 32 * sizeof( int8 ) ); + memset( B, 0, 32 * 32 * sizeof( int8_t ) ); for( l = 0; l < 32; l++ ) { for( k = 0; k < 32; k += 2 ) @@ -461,14 +461,14 @@ static void vfgs_make_sei_ff_pattern32( int8 B[][32], int fh, int fv ) idct2_32( B ); } -static void vfgs_make_ar_pattern( int8 buf[], int8 P[], int size, const int16 ar_coef[], int nb_coef, int shift, int scale, uint32 seed ) +static void vfgs_make_ar_pattern( int8_t buf[], int8_t P[], int size, const int16_t ar_coef[], int nb_coef, int shift, int scale, uint32_t seed ) { - int16 coef[4][7]; - int L = 0; - int x, y, i, j, k; - int g; - int subx, suby, width, height; - uint32 rnd = seed; + int16_t coef[4][7]; + int L = 0; + int x, y, i, j, k; + int g; + int subx, suby, width, height; + uint32_t rnd = seed; memset( coef, 0, sizeof( coef ) ); subx = suby = ( size == 32 ) ? 2 : 1; @@ -479,12 +479,12 @@ static void vfgs_make_ar_pattern( int8 buf[], int8 P[], int size, const int16 ar { case 6: // SEI.AR mode - coef[3][2] = ar_coef[1]; // left - coef[2][3] = ( ar_coef[1] * ar_coef[4] ) >> scale; // top - coef[2][2] = ( ar_coef[3] * ar_coef[4] ) >> scale; // top-left - coef[2][4] = ( ar_coef[3] * ar_coef[4] ) >> scale; // top-right - coef[3][1] = ar_coef[5]; // left-left - coef[1][3] = ( (int32) ar_coef[5] * ar_coef[4] * ar_coef[4] ) >> ( 2 * scale ); // top-top + coef[3][2] = ar_coef[1]; // left + coef[2][3] = ( ar_coef[1] * ar_coef[4] ) >> scale; // top + coef[2][2] = ( ar_coef[3] * ar_coef[4] ) >> scale; // top-left + coef[2][4] = ( ar_coef[3] * ar_coef[4] ) >> scale; // top-right + coef[3][1] = ar_coef[5]; // left-left + coef[1][3] = ( (int32_t) ar_coef[5] * ar_coef[4] * ar_coef[4] ) >> ( 2 * scale ); // top-top L = 2; break; @@ -542,10 +542,10 @@ static void vfgs_make_ar_pattern( int8 buf[], int8 P[], int size, const int16 ar } } -int same_pattern( fgs_sei* cfg, int32 a, int32 b ) +int same_pattern( fgs_sei* cfg, int32_t a, int32_t b ) { - int16* coef_a = &cfg->comp_model_value[0][0][0] + a; - int16* coef_b = &cfg->comp_model_value[0][0][0] + b; + int16_t* coef_a = &cfg->comp_model_value[0][0][0] + a; + int16_t* coef_b = &cfg->comp_model_value[0][0][0] + b; for( int i = 1; i < SEI_MAX_MODEL_VALUES; i++ ) { @@ -561,16 +561,16 @@ int same_pattern( fgs_sei* cfg, int32 a, int32 b ) /** Initialize "hardware" interface from FGS SEI parameters */ void vfgs_init_sei( fgs_sei* cfg ) { - int8 P[64 * 64]; - int8 Lbuf[73 * 82]; - int8 Cbuf[38 * 44]; - uint8 slut[256]; - uint8 plut[256]; - uint8 intensities[VFGS_MAX_PATTERNS]; - uint32 patterns[VFGS_MAX_PATTERNS]; - uint8 np = 0; // number of patterns - uint8 a, b, i; - int c, k; + int8_t P[64 * 64]; + int8_t Lbuf[73 * 82]; + int8_t Cbuf[38 * 44]; + uint8_t slut[256]; + uint8_t plut[256]; + uint8_t intensities[VFGS_MAX_PATTERNS]; + uint32_t patterns[VFGS_MAX_PATTERNS]; + uint8_t np = 0; // number of patterns + uint8_t a, b, i; + int c, k; for( c = 0; c < 3; c++ ) { @@ -586,8 +586,8 @@ void vfgs_init_sei( fgs_sei* cfg ) { for( k = 0; k < cfg->num_intensity_intervals[c]; k++ ) { - a = cfg->intensity_interval_lower_bound[c][k]; - uint32 id = SEI_MAX_MODEL_VALUES * ( k + 256 * c ); + a = cfg->intensity_interval_lower_bound[c][k]; + uint32_t id = SEI_MAX_MODEL_VALUES * ( k + 256 * c ); for( i = 0; i < VFGS_MAX_PATTERNS; i++ ) { @@ -624,7 +624,7 @@ void vfgs_init_sei( fgs_sei* cfg ) // 2. Register the patterns (with correct order) for( i = 0; i < np; i++ ) { - int16* coef = &cfg->comp_model_value[0][0][0] + patterns[i]; + int16_t* coef = &cfg->comp_model_value[0][0][0] + patterns[i]; if( c == 0 ) { @@ -634,7 +634,7 @@ void vfgs_init_sei( fgs_sei* cfg ) } else { - vfgs_make_sei_ff_pattern64( (int8( * )[64]) P, coef[1], coef[2] ); + vfgs_make_sei_ff_pattern64( (int8_t( * )[64]) P, coef[1], coef[2] ); } vfgs_set_luma_pattern( i, P ); @@ -647,7 +647,7 @@ void vfgs_init_sei( fgs_sei* cfg ) } else { - vfgs_make_sei_ff_pattern32( (int8( * )[32]) P, coef[1], coef[2] ); + vfgs_make_sei_ff_pattern32( (int8_t( * )[32]) P, coef[1], coef[2] ); } vfgs_set_chroma_pattern( i, P ); @@ -662,9 +662,9 @@ void vfgs_init_sei( fgs_sei* cfg ) // 3a. Fill valid patterns for( k = 0; k < cfg->num_intensity_intervals[cc]; k++ ) { - a = cfg->intensity_interval_lower_bound[cc][k]; - b = cfg->intensity_interval_upper_bound[cc][k]; - uint32 id = SEI_MAX_MODEL_VALUES * ( k + 256 * cc ); + a = cfg->intensity_interval_lower_bound[cc][k]; + b = cfg->intensity_interval_upper_bound[cc][k]; + uint32_t id = SEI_MAX_MODEL_VALUES * ( k + 256 * cc ); for( i = 0; i < VFGS_MAX_PATTERNS; i++ ) { @@ -677,7 +677,7 @@ void vfgs_init_sei( fgs_sei* cfg ) for( int l = a; l <= b; l++ ) { - slut[l] = (uint8) cfg->comp_model_value[cc][k][0]; + slut[l] = (uint8_t) cfg->comp_model_value[cc][k][0]; if( i < VFGS_MAX_PATTERNS ) { plut[l] = i << 4; diff --git a/source/Lib/vvdec/vfgs_fw.h b/source/Lib/vvdec/vfgs_fw.h index 6100a2e5..fc3b4fe7 100644 --- a/source/Lib/vvdec/vfgs_fw.h +++ b/source/Lib/vvdec/vfgs_fw.h @@ -57,27 +57,20 @@ POSSIBILITY OF SUCH DAMAGE. #ifndef _VFGS_FW_H_ #define _VFGS_FW_H_ -#ifndef int32 -# define int32 signed int -# define uint32 unsigned int -# define int16 signed short -# define uint16 unsigned short -# define int8 signed char -# define uint8 unsigned char -#endif +#include #define SEI_MAX_MODEL_VALUES 6 typedef struct fgs_sei_s { - uint8 model_id; - uint8 log2_scale_factor; - uint8 comp_model_present_flag[3]; - uint16 num_intensity_intervals[3]; - uint8 num_model_values[3]; - uint8 intensity_interval_lower_bound[3][256]; - uint8 intensity_interval_upper_bound[3][256]; - int16 comp_model_value[3][256][SEI_MAX_MODEL_VALUES]; + uint8_t model_id; + uint8_t log2_scale_factor; + uint8_t comp_model_present_flag[3]; + uint16_t num_intensity_intervals[3]; + uint8_t num_model_values[3]; + uint8_t intensity_interval_lower_bound[3][256]; + uint8_t intensity_interval_upper_bound[3][256]; + int16_t comp_model_value[3][256][SEI_MAX_MODEL_VALUES]; } fgs_sei; void vfgs_init_sei( fgs_sei* cfg ); diff --git a/source/Lib/vvdec/vfgs_hw.c b/source/Lib/vvdec/vfgs_hw.c index 791a958d..a3d88ed6 100644 --- a/source/Lib/vvdec/vfgs_hw.c +++ b/source/Lib/vvdec/vfgs_hw.c @@ -66,40 +66,40 @@ POSSIBILITY OF SUCH DAMAGE. // Note: declarations optimized for code readability; e.g. pattern storage in // actual hardware implementation would differ significantly -static int8 pattern[2][VFGS_MAX_PATTERNS + 1][64][64] = { +static int8_t pattern[2][VFGS_MAX_PATTERNS + 1][64][64] = { 0, }; // +1 to simplify interpolation code -static uint8 sLUT[3][256] = { +static uint8_t sLUT[3][256] = { 0, }; -static uint8 pLUT[3][256] = { +static uint8_t pLUT[3][256] = { 0, }; -static uint32 rnd = 0xdeadbeef; -static uint32 rnd_up = 0xdeadbeef; -static uint32 line_rnd = 0xdeadbeef; -static uint32 line_rnd_up = 0xdeadbeef; -static uint8 scale_shift = 5 + 6; -static uint8 bs = 0; // bitshift = bitdepth - 8 -static uint8 Y_min = 0; -static uint8 Y_max = 255; -static uint8 C_min = 0; -static uint8 C_max = 255; -static int csubx = 2; -static int csuby = 2; +static uint32_t rnd = 0xdeadbeef; +static uint32_t rnd_up = 0xdeadbeef; +static uint32_t line_rnd = 0xdeadbeef; +static uint32_t line_rnd_up = 0xdeadbeef; +static uint8_t scale_shift = 5 + 6; +static uint8_t bs = 0; // bitshift = bitdepth - 8 +static uint8_t Y_min = 0; +static uint8_t Y_max = 255; +static uint8_t C_min = 0; +static uint8_t C_max = 255; +static int csubx = 2; +static int csuby = 2; // Processing pipeline (needs only 2 registers for each color actually, for horizontal deblocking) -static int16 grain[3][32]; // 9 bit needed because of overlap (has norm > 1) -static uint8 scale[3][32]; +static int16_t grain[3][32]; // 9 bit needed because of overlap (has norm > 1) +static uint8_t scale[3][32]; /** Pseudo-random number generator * Note: loops on the 31 MSBs, so seed should be MSB-aligned in the register * (the register LSB has basically no effect since it is never fed back) */ -static uint32 prng( uint32 x ) +static uint32_t prng( uint32_t x ) { - uint32 s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; - x = s | ( x >> 1 ); + uint32_t s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; + x = s | ( x >> 1 ); return x; } @@ -121,9 +121,9 @@ static uint32 prng( uint32 x ) * Note: to fully support cross-component correlation within patterns, we would * need to align luma/chroma offsets. */ -static void get_offset_y( uint32 val, int* s, uint8* x, uint8* y ) +static void get_offset_y( uint32_t val, int* s, uint8_t* x, uint8_t* y ) { - uint32 bf; // bit field + uint32_t bf; // bit field *s = ( ( val >> 31 ) & 1 ) ? -1 : 1; @@ -136,9 +136,9 @@ static void get_offset_y( uint32 val, int* s, uint8* x, uint8* y ) // pattern samples (when using overlap). } -static void get_offset_u( uint32 val, int* s, uint8* x, uint8* y ) +static void get_offset_u( uint32_t val, int* s, uint8_t* x, uint8_t* y ) { - uint32 bf; // bit field + uint32_t bf; // bit field *s = ( ( val >> 2 ) & 1 ) ? -1 : 1; @@ -149,9 +149,9 @@ static void get_offset_u( uint32 val, int* s, uint8* x, uint8* y ) *y = ( ( bf * 12 ) >> 10 ) * ( 4 / csuby ); } -static void get_offset_v( uint32 val, int* s, uint8* x, uint8* y ) +static void get_offset_v( uint32_t val, int* s, uint8_t* x, uint8_t* y ) { - uint32 bf; // bit field + uint32_t bf; // bit field *s = ( ( val >> 15 ) & 1 ) ? -1 : 1; @@ -164,27 +164,27 @@ static void get_offset_v( uint32 val, int* s, uint8* x, uint8* y ) static void add_grain_block( void* I, int c, int x, int y, int width ) { - uint8* I8 = (uint8*) I; - uint16* I16 = (uint16*) I; - - int s, s_up; // random sign flip (current + upper row) - uint8 ox, oy; // random offset (current) - uint8 ox_up, oy_up; // random offset (upper row) - uint8 oc1, oc2; // overlapping coefficients - uint8 pi; // pattern index integer part - int i, j; - int P; // Pattern sample (from current pattern index) + uint8_t* I8 = (uint8_t*) I; + uint16_t* I16 = (uint16_t*) I; + + int s, s_up; // random sign flip (current + upper row) + uint8_t ox, oy; // random offset (current) + uint8_t ox_up, oy_up; // random offset (upper row) + uint8_t oc1, oc2; // overlapping coefficients + uint8_t pi; // pattern index integer part + int i, j; + int P; // Pattern sample (from current pattern index) #if PATTERN_INTERPOLATION - int Pn; // Next-pattern sample (from pattern index+1) - uint8 pf; // pattern index fractional part + int Pn; // Next-pattern sample (from pattern index+1) + uint8_t pf; // pattern index fractional part #endif - uint8 intensity; - int flush = 0; - int subx = c ? csubx : 1; - int suby = c ? csuby : 1; - uint8 I_min = c ? C_min : Y_min; - uint8 I_max = c ? C_max : Y_max; + uint8_t intensity; + int flush = 0; + int subx = c ? csubx : 1; + int suby = c ? csuby : 1; + uint8_t I_min = c ? C_min : Y_min; + uint8_t I_max = c ? C_max : Y_max; if( ( y & 1 ) && suby > 1 ) { @@ -284,8 +284,8 @@ static void add_grain_block( void* I, int c, int x, int y, int width ) { if( x > 0 ) { - int32 g; - int16 l1, l0, r0, r1; + int32_t g; + int16_t l1, l0, r0, r1; if( !flush ) { @@ -300,7 +300,7 @@ static void add_grain_block( void* I, int c, int x, int y, int width ) for( i = 0; i < 16 / subx; i++ ) { // Output previous block (or flush current) - g = round( scale[c][i] * (int16) grain[c][i], scale_shift ); + g = round( scale[c][i] * (int16_t) grain[c][i], scale_shift ); if( bs ) { I16[( x - 16 ) / subx + i] = max( I_min << bs, min( I_max << bs, I16[( x - 16 ) / subx + i] + g ) ); @@ -355,13 +355,13 @@ void vfgs_add_grain_line( void* Y, void* U, void* V, int y, int width ) } } -void vfgs_set_luma_pattern( int index, int8* P ) +void vfgs_set_luma_pattern( int index, int8_t* P ) { assert( index >= 0 && index < 8 ); memcpy( pattern[0][index], P, 64 * 64 ); } -void vfgs_set_chroma_pattern( int index, int8* P ) +void vfgs_set_chroma_pattern( int index, int8_t* P ) { assert( index >= 0 && index < 8 ); for( int i = 0; i < 64 / csuby; i++ ) @@ -370,19 +370,19 @@ void vfgs_set_chroma_pattern( int index, int8* P ) } } -void vfgs_set_scale_lut( int c, uint8 lut[] ) +void vfgs_set_scale_lut( int c, uint8_t lut[] ) { assert( c >= 0 && c < 3 ); memcpy( sLUT[c], lut, 256 ); } -void vfgs_set_pattern_lut( int c, uint8 lut[] ) +void vfgs_set_pattern_lut( int c, uint8_t lut[] ) { assert( c >= 0 && c < 3 ); memcpy( pLUT[c], lut, 256 ); } -void vfgs_set_seed( uint32 seed ) +void vfgs_set_seed( uint32_t seed ) { // Note: shift left the seed as the LFSR loops on the 31 MSBs, so // the LFSR register LSB has no effect on random sequence initialization diff --git a/source/Lib/vvdec/vfgs_hw.h b/source/Lib/vvdec/vfgs_hw.h index 0bdb8d05..d4df7576 100644 --- a/source/Lib/vvdec/vfgs_hw.h +++ b/source/Lib/vvdec/vfgs_hw.h @@ -57,23 +57,16 @@ POSSIBILITY OF SUCH DAMAGE. #ifndef _VFGS_HW_H_ #define _VFGS_HW_H_ -#ifndef int32 -# define int32 signed int -# define uint32 unsigned int -# define int16 signed short -# define uint16 unsigned short -# define int8 signed char -# define uint8 unsigned char -#endif +#include #define VFGS_MAX_PATTERNS 8 -void vfgs_set_luma_pattern( int index, int8* P ); -void vfgs_set_chroma_pattern( int index, int8* P ); -void vfgs_set_scale_lut( int c, uint8 lut[] ); -void vfgs_set_pattern_lut( int c, uint8 lut[] ); +void vfgs_set_luma_pattern( int index, int8_t* P ); +void vfgs_set_chroma_pattern( int index, int8_t* P ); +void vfgs_set_scale_lut( int c, uint8_t lut[] ); +void vfgs_set_pattern_lut( int c, uint8_t lut[] ); -void vfgs_set_seed( uint32 seed ); +void vfgs_set_seed( uint32_t seed ); void vfgs_set_scale_shift( int shift ); void vfgs_set_depth( int depth ); void vfgs_set_chroma_subsampling( int subx, int suby ); diff --git a/source/Lib/vvdec/vvdecimpl.cpp b/source/Lib/vvdec/vvdecimpl.cpp index fb7e1fa4..f3b6e1b4 100644 --- a/source/Lib/vvdec/vvdecimpl.cpp +++ b/source/Lib/vvdec/vvdecimpl.cpp @@ -888,7 +888,7 @@ int VVDecImpl::xUpdateFGC( vvdecSEI* s ) // if (!m_bFgs) // // TODO: get something random // // TODO: make seed also impact the pattern gen - // vfgs_set_seed(uint32 seed); + // vfgs_set_seed(uint32_t seed); m_eFgs = sei->filmGrainCharacteristicsPersistenceFlag ? 2 : 1; } @@ -904,9 +904,9 @@ int VVDecImpl::xAddGrain( vvdecFrame* frame ) { if( m_eFgs ) { - uint8* Y = (uint8*) frame->planes[0].ptr; - uint8* U = (uint8*) frame->planes[1].ptr; - uint8* V = (uint8*) frame->planes[2].ptr; + uint8_t* Y = (uint8_t*) frame->planes[0].ptr; + uint8_t* U = (uint8_t*) frame->planes[1].ptr; + uint8_t* V = (uint8_t*) frame->planes[2].ptr; CHECK( frame->bitDepth != 10, "Bitdepth is not 10" ); From ca70edf0267c7341ee9903097f25bc8b493c3162 Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Mon, 3 Jun 2024 14:07:36 +0200 Subject: [PATCH 4/8] convert vfgs film gain sythesis to c++ class and make static global variables to class members --- .../vfgs_fw.c => FilmGrain/FilmGrain.cpp} | 132 ++++++++++-------- .../vfgs_fw.h => FilmGrain/FilmGrain.h} | 26 +++- .../vfgs_hw.c => FilmGrain/FilmGrainImpl.cpp} | 107 ++++++-------- .../vfgs_hw.h => FilmGrain/FilmGrainImpl.h} | 68 ++++++--- source/Lib/vvdec/CMakeLists.txt | 9 +- source/Lib/vvdec/vvdecimpl.cpp | 127 +++++++++-------- source/Lib/vvdec/vvdecimpl.h | 9 +- 7 files changed, 263 insertions(+), 215 deletions(-) rename source/Lib/{vvdec/vfgs_fw.c => FilmGrain/FilmGrain.cpp} (90%) rename source/Lib/{vvdec/vfgs_fw.h => FilmGrain/FilmGrain.h} (90%) rename source/Lib/{vvdec/vfgs_hw.c => FilmGrain/FilmGrainImpl.cpp} (78%) rename source/Lib/{vvdec/vfgs_hw.h => FilmGrain/FilmGrainImpl.h} (61%) diff --git a/source/Lib/vvdec/vfgs_fw.c b/source/Lib/FilmGrain/FilmGrain.cpp similarity index 90% rename from source/Lib/vvdec/vfgs_fw.c rename to source/Lib/FilmGrain/FilmGrain.cpp index 5b9af023..28c6e0e1 100644 --- a/source/Lib/vvdec/vfgs_fw.c +++ b/source/Lib/FilmGrain/FilmGrain.cpp @@ -54,17 +54,25 @@ POSSIBILITY OF SUCH DAMAGE. * message). */ -#include "vfgs_fw.h" -#include "vfgs_hw.h" -#include -#include +#include "FilmGrain.h" -#define min( a, b ) ( ( a ) < ( b ) ? ( a ) : ( b ) ) -#define round( a, s ) ( ( ( a ) + ( 1 << ( ( s ) - 1 ) ) ) >> ( s ) ) -#define clip( x, lo, hi ) ( ( x ) > ( hi ) ? hi : ( x ) < ( lo ) ? ( lo ) : ( x ) ) +#include +#include +#include + +#include "CommonDef.h" + +namespace vvdec +{ + +template +constexpr inline auto round( T a, uint8_t s ) +{ + return ( a + ( 1 << ( s - 1 ) ) ) >> s; +} // clang-format off -static const int8_t Gaussian_LUT[2048] = { +static constexpr int8_t Gaussian_LUT[2048] = { -11, 12, 103, -11, 42, -35, 12, 59, 77, 98, -87, 3, 65, -78, 45, 56, -51, 21, 13, -11, -20, -19, 33,-127, 17, -6,-105, 18, 19, 71, 48, -10, -38, 42, -2, 75, -67, 52, -90, 33, -47, 21, -3, -56, 49, 1, -57, -42, @@ -195,39 +203,39 @@ static const int8_t Gaussian_LUT[2048] = { 19, 2,-111, 4, -66, -81, 122, -20, -34, -37, -84, 127, 68, 46, 17, 47 }; -static const uint32_t Seed_LUT[256] = { - 747538460, 1088979410, 1744950180, 1767011913, 1403382928, 521866116, 1060417601, 2110622736, - 1557184770, 105289385, 585624216, 1827676546, 1191843873, 1018104344, 1123590530, 663361569, - 2023850500, 76561770, 1226763489, 80325252, 1992581442, 502705249, 740409860, 516219202, - 557974537, 1883843076, 720112066, 1640137737, 1820967556, 40667586, 155354121, 1820967557, - 1115949072, 1631803309, 98284748, 287433856, 2119719977, 988742797, 1827432592, 579378475, - 1017745956, 1309377032, 1316535465, 2074315269, 1923385360, 209722667, 1546228260, 168102420, - 135274561, 355958469, 248291472, 2127839491, 146920100, 585982612, 1611702337, 696506029, - 1386498192, 1258072451, 1212240548, 1043171860, 1217404993, 1090770605, 1386498193, 169093201, - 541098240, 1468005469, 456510673, 1578687785, 1838217424, 2010752065, 2089828354, 1362717428, - 970073673, 854129835, 714793201, 1266069081, 1047060864, 1991471829, 1098097741, 913883585, - 1669598224, 1337918685, 1219264706, 1799741108, 1834116681, 683417731, 1120274457, 1073098457, - 1648396544, 176642749, 31171789, 718317889, 1266977808, 1400892508, 549749008, 1808010512, - 67112961, 1005669825, 903663673, 1771104465, 1277749632, 1229754427, 950632997, 1979371465, - 2074373264, 305357524, 1049387408, 1171033360, 1686114305, 2147468765, 1941195985, 117709841, - 809550080, 991480851, 1816248997, 1561503561, 329575568, 780651196, 1659144592, 1910793616, - 604016641, 1665084765, 1530186961, 1870928913, 809550081, 2079346113, 71307521, 876663040, - 1073807360, 832356664, 1573927377, 204073344, 2026918147, 1702476788, 2043881033, 57949587, - 2001393952, 1197426649, 1186508931, 332056865, 950043140, 890043474, 349099312, 148914948, - 236204097, 2022643605, 1441981517, 498130129, 1443421481, 924216797, 1817491777, 1913146664, - 1411989632, 929068432, 495735097, 1684636033, 1284520017, 432816184, 1344884865, 210843729, - 676364544, 234449232, 12112337, 1350619139, 1753272996, 2037118872, 1408560528, 533334916, - 1043640385, 357326099, 201376421, 110375493, 541106497, 416159637, 242512193, 777294080, - 1614872576, 1535546636, 870600145, 910810409, 1821440209, 1605432464, 1145147393, 951695441, - 1758494976, 1506656568, 1557150160, 608221521, 1073840384, 217672017, 684818688, 1750138880, - 16777217, 677990609, 953274371, 1770050213, 1359128393, 1797602707, 1984616737, 1865815816, - 2120835200, 2051677060, 1772234061, 1579794881, 1652821009, 1742099468, 1887260865, 46468113, - 1011925248, 1134107920, 881643832, 1354774993, 472508800, 1892499769, 1752793472, 1962502272, - 687898625, 883538000, 1354355153, 1761673473, 944820481, 2020102353, 22020353, 961597696, - 1342242816, 964808962, 1355809701, 17016649, 1386540177, 647682692, 1849012289, 751668241, - 1557184768, 127374604, 1927564752, 1045744913, 1614921984, 43588881, 1016185088, 1544617984, - 1090519041, 136122424, 215038417, 1563027841, 2026918145, 1688778833, 701530369, 1372639488, - 1342242817, 2036945104, 953274369, 1750192384, 16842753, 964808960, 1359020032, 1358954497 +static constexpr uint32_t Seed_LUT[256] = { + 747538460, 1088979410, 1744950180, 1767011913, 1403382928, 521866116, 1060417601, 2110622736, + 1557184770, 105289385, 585624216, 1827676546, 1191843873, 1018104344, 1123590530, 663361569, + 2023850500, 76561770, 1226763489, 80325252, 1992581442, 502705249, 740409860, 516219202, + 557974537, 1883843076, 720112066, 1640137737, 1820967556, 40667586, 155354121, 1820967557, + 1115949072, 1631803309, 98284748, 287433856, 2119719977, 988742797, 1827432592, 579378475, + 1017745956, 1309377032, 1316535465, 2074315269, 1923385360, 209722667, 1546228260, 168102420, + 135274561, 355958469, 248291472, 2127839491, 146920100, 585982612, 1611702337, 696506029, + 1386498192, 1258072451, 1212240548, 1043171860, 1217404993, 1090770605, 1386498193, 169093201, + 541098240, 1468005469, 456510673, 1578687785, 1838217424, 2010752065, 2089828354, 1362717428, + 970073673, 854129835, 714793201, 1266069081, 1047060864, 1991471829, 1098097741, 913883585, + 1669598224, 1337918685, 1219264706, 1799741108, 1834116681, 683417731, 1120274457, 1073098457, + 1648396544, 176642749, 31171789, 718317889, 1266977808, 1400892508, 549749008, 1808010512, + 67112961, 1005669825, 903663673, 1771104465, 1277749632, 1229754427, 950632997, 1979371465, + 2074373264, 305357524, 1049387408, 1171033360, 1686114305, 2147468765, 1941195985, 117709841, + 809550080, 991480851, 1816248997, 1561503561, 329575568, 780651196, 1659144592, 1910793616, + 604016641, 1665084765, 1530186961, 1870928913, 809550081, 2079346113, 71307521, 876663040, + 1073807360, 832356664, 1573927377, 204073344, 2026918147, 1702476788, 2043881033, 57949587, + 2001393952, 1197426649, 1186508931, 332056865, 950043140, 890043474, 349099312, 148914948, + 236204097, 2022643605, 1441981517, 498130129, 1443421481, 924216797, 1817491777, 1913146664, + 1411989632, 929068432, 495735097, 1684636033, 1284520017, 432816184, 1344884865, 210843729, + 676364544, 234449232, 12112337, 1350619139, 1753272996, 2037118872, 1408560528, 533334916, + 1043640385, 357326099, 201376421, 110375493, 541106497, 416159637, 242512193, 777294080, + 1614872576, 1535546636, 870600145, 910810409, 1821440209, 1605432464, 1145147393, 951695441, + 1758494976, 1506656568, 1557150160, 608221521, 1073840384, 217672017, 684818688, 1750138880, + 16777217, 677990609, 953274371, 1770050213, 1359128393, 1797602707, 1984616737, 1865815816, + 2120835200, 2051677060, 1772234061, 1579794881, 1652821009, 1742099468, 1887260865, 46468113, + 1011925248, 1134107920, 881643832, 1354774993, 472508800, 1892499769, 1752793472, 1962502272, + 687898625, 883538000, 1354355153, 1761673473, 944820481, 2020102353, 22020353, 961597696, + 1342242816, 964808962, 1355809701, 17016649, 1386540177, 647682692, 1849012289, 751668241, + 1557184768, 127374604, 1927564752, 1045744913, 1614921984, 43588881, 1016185088, 1544617984, + 1090519041, 136122424, 215038417, 1563027841, 2026918145, 1688778833, 701530369, 1372639488, + 1342242817, 2036945104, 953274369, 1750192384, 16842753, 964808960, 1359020032, 1358954497 }; #define DEFINE_DCT2_P64_MATRIX(aa, ab, ac, ad, ae, af, ag, ah, ai, aj, ak, al, am, an, ao, ap, aq, ar, as, at, au, av, aw, ax, ay, az, ba, bb, bc, bd, be, bf, bg, bh, bi, bj, bk, bl, bm, bn, bo, bp, bq, br, bs, bt, bu, bv, bw, bx, by, bz, ca, cb, cc, cd, ce, cf, cg, ch, ci, cj, ck) \ @@ -296,14 +304,14 @@ static const uint32_t Seed_LUT[256] = { { cj, -cg, cd, -ca, bx, -bu, br, -bo, bl, -bi, bf, -bh, bk, -bn, bq, -bt, bw, -bz, cc, -cf, ci, ck, -ch, ce, -cb, by, -bv, bs, -bp, bm, -bj, bg, -bg, bj, -bm, bp, -bs, bv, -by, cb, -ce, ch, -ck, -ci, cf, -cc, bz, -bw, bt, -bq, bn, -bk, bh, -bf, bi, -bl, bo, -br, bu, -bx, ca, -cd, cg, -cj }, \ { be, -bd, bc, -bb, ba, -az, ay, -ax, aw, -av, au, -at, as, -ar, aq, -ap, ap, -aq, ar, -as, at, -au, av, -aw, ax, -ay, az, -ba, bb, -bc, bd, -be, -be, bd, -bc, bb, -ba, az, -ay, ax, -aw, av, -au, at, -as, ar, -aq, ap, -ap, aq, -ar, as, -at, au, -av, aw, -ax, ay, -az, ba, -bb, bc, -bd, be }, \ { ck, -cj, ci, -ch, cg, -cf, ce, -cd, cc, -cb, ca, -bz, by, -bx, bw, -bv, bu, -bt, bs, -br, bq, -bp, bo, -bn, bm, -bl, bk, -bj, bi, -bh, bg, -bf, bf, -bg, bh, -bi, bj, -bk, bl, -bm, bn, -bo, bp, -bq, br, -bs, bt, -bu, bv, -bw, bx, -by, bz, -ca, cb, -cc, cd, -ce, cf, -cg, ch, -ci, cj, -ck }, \ - } +} -static const int8_t DCT2_64[64][64] = \ +static constexpr int8_t DCT2_64[64][64] = \ DEFINE_DCT2_P64_MATRIX(64, 83, 36, 89, 75, 50, 18, 90, 87, 80, 70, 57, 43, 25, 9, 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4, 91, 90, 90, 90, 88, 87, 86, 84, 83, 81, 79, 77, 73, 71, 69, 65, 62, 59, 56, 52, 48, 44, 41, 37, 33, 28, 24, 20, 15, 11, 7, 2); // clang-format on /** Pseudo-random number generator (32-bit) */ -static uint32_t prng( uint32_t x ) +static inline uint32_t prng( uint32_t x ) { #if 1 // same as HW (bit-reversed RDD-5) uint32_t s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; @@ -409,7 +417,7 @@ static void idct2_32( int8_t B[][32] ) } } -static void vfgs_make_sei_ff_pattern64( int8_t B[][64], int fh, int fv ) +static void make_sei_ff_pattern64( int8_t B[][64], int fh, int fv ) { int k, l; uint32_t n; @@ -436,7 +444,7 @@ static void vfgs_make_sei_ff_pattern64( int8_t B[][64], int fh, int fv ) idct2_64( B ); } -static void vfgs_make_sei_ff_pattern32( int8_t B[][32], int fh, int fv ) +static void make_sei_ff_pattern32( int8_t B[][32], int fh, int fv ) { int k, l; uint32_t n; @@ -461,7 +469,7 @@ static void vfgs_make_sei_ff_pattern32( int8_t B[][32], int fh, int fv ) idct2_32( B ); } -static void vfgs_make_ar_pattern( int8_t buf[], int8_t P[], int size, const int16_t ar_coef[], int nb_coef, int shift, int scale, uint32_t seed ) +static void make_ar_pattern( int8_t buf[], int8_t P[], int size, const int16_t ar_coef[], int nb_coef, int shift, int scale, uint32_t seed ) { int16_t coef[4][7]; int L = 0; @@ -490,7 +498,7 @@ static void vfgs_make_ar_pattern( int8_t buf[], int8_t P[], int size, const int1 break; default: - assert( 0 ); + THROW_FATAL( "nb_coef != 6 not implemented" ); } if( nb_coef != 6 ) { @@ -527,7 +535,7 @@ static void vfgs_make_ar_pattern( int8_t buf[], int8_t P[], int size, const int1 g += round( Gaussian_LUT[rnd & 2047], shift ); rnd = prng( rnd ); - buf[width * y + x] = clip( g, -127, 127 ); + buf[width * y + x] = Clip3( -127, 127, g ); } } @@ -542,7 +550,7 @@ static void vfgs_make_ar_pattern( int8_t buf[], int8_t P[], int size, const int1 } } -int same_pattern( fgs_sei* cfg, int32_t a, int32_t b ) +static int same_pattern( fgs_sei* cfg, int32_t a, int32_t b ) { int16_t* coef_a = &cfg->comp_model_value[0][0][0] + a; int16_t* coef_b = &cfg->comp_model_value[0][0][0] + b; @@ -559,7 +567,7 @@ int same_pattern( fgs_sei* cfg, int32_t a, int32_t b ) } /** Initialize "hardware" interface from FGS SEI parameters */ -void vfgs_init_sei( fgs_sei* cfg ) +void FilmGrain::init_sei( fgs_sei* cfg ) { int8_t P[64 * 64]; int8_t Lbuf[73 * 82]; @@ -630,31 +638,31 @@ void vfgs_init_sei( fgs_sei* cfg ) { if( cfg->model_id ) { - vfgs_make_ar_pattern( Lbuf, P, 64, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[0] ); + make_ar_pattern( Lbuf, P, 64, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[0] ); } else { - vfgs_make_sei_ff_pattern64( (int8_t( * )[64]) P, coef[1], coef[2] ); + make_sei_ff_pattern64( (int8_t( * )[64]) P, coef[1], coef[2] ); } - vfgs_set_luma_pattern( i, P ); + set_luma_pattern( i, P ); } else if( c == 2 ) { if( cfg->model_id ) { - vfgs_make_ar_pattern( Cbuf, P, 32, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[1] ); + make_ar_pattern( Cbuf, P, 32, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[1] ); } else { - vfgs_make_sei_ff_pattern32( (int8_t( * )[32]) P, coef[1], coef[2] ); + make_sei_ff_pattern32( (int8_t( * )[32]) P, coef[1], coef[2] ); } - vfgs_set_chroma_pattern( i, P ); + set_chroma_pattern( i, P ); } } // 3. Fill up LUTs - for( int cc = min( c, 1 ); cc <= c; cc++ ) + for( int cc = std::min( c, 1 ); cc <= c; cc++ ) { if( cfg->comp_model_present_flag[cc] ) { @@ -703,11 +711,13 @@ void vfgs_init_sei( fgs_sei* cfg ) memset( plut, 0, sizeof( plut ) ); } // 3c. Register LUTs - vfgs_set_scale_lut( cc, slut ); - vfgs_set_pattern_lut( cc, plut ); + set_scale_lut( cc, slut ); + set_pattern_lut( cc, plut ); } } } - vfgs_set_scale_shift( cfg->log2_scale_factor - ( cfg->model_id ? 1 : 0 ) ); // -1 for grain shift in pattern generation (see above) + set_scale_shift( cfg->log2_scale_factor - ( cfg->model_id ? 1 : 0 ) ); // -1 for grain shift in pattern generation (see above) } + +} // namespace vvdec diff --git a/source/Lib/vvdec/vfgs_fw.h b/source/Lib/FilmGrain/FilmGrain.h similarity index 90% rename from source/Lib/vvdec/vfgs_fw.h rename to source/Lib/FilmGrain/FilmGrain.h index fc3b4fe7..45b892dd 100644 --- a/source/Lib/vvdec/vfgs_fw.h +++ b/source/Lib/FilmGrain/FilmGrain.h @@ -54,14 +54,16 @@ POSSIBILITY OF SUCH DAMAGE. * message). */ -#ifndef _VFGS_FW_H_ -#define _VFGS_FW_H_ +#pragma once -#include +#include "FilmGrainImpl.h" + +namespace vvdec +{ #define SEI_MAX_MODEL_VALUES 6 -typedef struct fgs_sei_s +struct fgs_sei { uint8_t model_id; uint8_t log2_scale_factor; @@ -71,8 +73,18 @@ typedef struct fgs_sei_s uint8_t intensity_interval_lower_bound[3][256]; uint8_t intensity_interval_upper_bound[3][256]; int16_t comp_model_value[3][256][SEI_MAX_MODEL_VALUES]; -} fgs_sei; +}; + +class FilmGrain : public FilmGrainImpl +{ +public: + FilmGrain( int depth, int chromaSubsampling ) + { + set_depth( depth ); + set_chroma_subsampling( chromaSubsampling, chromaSubsampling ); + } -void vfgs_init_sei( fgs_sei* cfg ); + void init_sei( fgs_sei* cfg ); +}; -#endif // _VFGS_FW_H_ +} // namespace vvdec diff --git a/source/Lib/vvdec/vfgs_hw.c b/source/Lib/FilmGrain/FilmGrainImpl.cpp similarity index 78% rename from source/Lib/vvdec/vfgs_hw.c rename to source/Lib/FilmGrain/FilmGrainImpl.cpp index a3d88ed6..b198334c 100644 --- a/source/Lib/vvdec/vfgs_hw.c +++ b/source/Lib/FilmGrain/FilmGrainImpl.cpp @@ -54,49 +54,29 @@ POSSIBILITY OF SUCH DAMAGE. * message). */ -#include "vfgs_hw.h" -#include // memcpy -#include +#include "FilmGrainImpl.h" -#define min( a, b ) ( ( a ) < ( b ) ? ( a ) : ( b ) ) -#define max( a, b ) ( ( a ) > ( b ) ? ( a ) : ( b ) ) -#define round( a, s ) ( ( ( a ) + ( 1 << ( ( s ) - 1 ) ) ) >> ( s ) ) +#include // memcpy +#include + +#include #define PATTERN_INTERPOLATION 0 -// Note: declarations optimized for code readability; e.g. pattern storage in -// actual hardware implementation would differ significantly -static int8_t pattern[2][VFGS_MAX_PATTERNS + 1][64][64] = { - 0, -}; // +1 to simplify interpolation code -static uint8_t sLUT[3][256] = { - 0, -}; -static uint8_t pLUT[3][256] = { - 0, -}; -static uint32_t rnd = 0xdeadbeef; -static uint32_t rnd_up = 0xdeadbeef; -static uint32_t line_rnd = 0xdeadbeef; -static uint32_t line_rnd_up = 0xdeadbeef; -static uint8_t scale_shift = 5 + 6; -static uint8_t bs = 0; // bitshift = bitdepth - 8 -static uint8_t Y_min = 0; -static uint8_t Y_max = 255; -static uint8_t C_min = 0; -static uint8_t C_max = 255; -static int csubx = 2; -static int csuby = 2; - -// Processing pipeline (needs only 2 registers for each color actually, for horizontal deblocking) -static int16_t grain[3][32]; // 9 bit needed because of overlap (has norm > 1) -static uint8_t scale[3][32]; +namespace vvdec +{ + +template +constexpr inline auto round( T a, uint8_t s ) +{ + return ( a + ( 1 << ( s - 1 ) ) ) >> s; +} /** Pseudo-random number generator * Note: loops on the 31 MSBs, so seed should be MSB-aligned in the register * (the register LSB has basically no effect since it is never fed back) */ -static uint32_t prng( uint32_t x ) +static inline uint32_t prng( uint32_t x ) { uint32_t s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; x = s | ( x >> 1 ); @@ -136,7 +116,7 @@ static void get_offset_y( uint32_t val, int* s, uint8_t* x, uint8_t* y ) // pattern samples (when using overlap). } -static void get_offset_u( uint32_t val, int* s, uint8_t* x, uint8_t* y ) +void FilmGrainImpl::get_offset_u( uint32_t val, int* s, uint8_t* x, uint8_t* y ) { uint32_t bf; // bit field @@ -149,7 +129,7 @@ static void get_offset_u( uint32_t val, int* s, uint8_t* x, uint8_t* y ) *y = ( ( bf * 12 ) >> 10 ) * ( 4 / csuby ); } -static void get_offset_v( uint32_t val, int* s, uint8_t* x, uint8_t* y ) +void FilmGrainImpl::get_offset_v( uint32_t val, int* s, uint8_t* x, uint8_t* y ) { uint32_t bf; // bit field @@ -162,7 +142,7 @@ static void get_offset_v( uint32_t val, int* s, uint8_t* x, uint8_t* y ) *y = ( ( bf * 12 ) >> 10 ) * ( 4 / csuby ); } -static void add_grain_block( void* I, int c, int x, int y, int width ) +void FilmGrainImpl::add_grain_block( void* I, int c, int x, int y, int width ) { uint8_t* I8 = (uint8_t*) I; uint16_t* I16 = (uint16_t*) I; @@ -191,10 +171,11 @@ static void add_grain_block( void* I, int c, int x, int y, int width ) return; } - assert( !( x & 15 ) ); - assert( width > 128 ); - assert( bs == 0 || bs == 2 ); - assert( scale_shift + bs >= 8 && scale_shift + bs <= 13 ); + CHECK( x & 15, "x not a multiple of 16" ); + CHECK( width <= 128, "wrong width" ); + CHECK( bs != 0 && bs != 2, "wrong bs" ); + CHECK( scale_shift + bs < 8 || scale_shift + bs > 13, "wrong scale_shift" ); + // TODO: assert subx, suby, Y/C min/max, max pLUT values, etc j = y & 0xf; @@ -303,11 +284,11 @@ static void add_grain_block( void* I, int c, int x, int y, int width ) g = round( scale[c][i] * (int16_t) grain[c][i], scale_shift ); if( bs ) { - I16[( x - 16 ) / subx + i] = max( I_min << bs, min( I_max << bs, I16[( x - 16 ) / subx + i] + g ) ); + I16[( x - 16 ) / subx + i] = std::max( I_min << bs, std::min( I_max << bs, I16[( x - 16 ) / subx + i] + g ) ); } else { - I8[( x - 16 ) / subx + i] = max( I_min, min( I_max, I8[( x - 16 ) / subx + i] + g ) ); + I8[( x - 16 ) / subx + i] = std::max( I_min, std::min( I_max, I8[( x - 16 ) / subx + i] + g ) ); } } } @@ -329,7 +310,7 @@ static void add_grain_block( void* I, int c, int x, int y, int width ) /* Public interface ***********************************************************/ -void vfgs_add_grain_line( void* Y, void* U, void* V, int y, int width ) +void FilmGrainImpl::add_grain_line( void* Y, void* U, void* V, int y, int width ) { // Generate / backup / restore per-line random seeds (needed to make multi-line blocks) if( y && ( y & 0x0f ) == 0 ) @@ -355,49 +336,47 @@ void vfgs_add_grain_line( void* Y, void* U, void* V, int y, int width ) } } -void vfgs_set_luma_pattern( int index, int8_t* P ) +void FilmGrainImpl::set_luma_pattern( int index, int8_t* P ) { - assert( index >= 0 && index < 8 ); + CHECK( index < 0 || index >= 8, "luma pattern index out of bounds" ); memcpy( pattern[0][index], P, 64 * 64 ); } -void vfgs_set_chroma_pattern( int index, int8_t* P ) +void FilmGrainImpl::set_chroma_pattern( int index, int8_t* P ) { - assert( index >= 0 && index < 8 ); + CHECK( index < 0 || index >= 8, "chroma pattern index out of bounds" ); for( int i = 0; i < 64 / csuby; i++ ) { memcpy( pattern[1][index][i], P + ( 64 / csuby ) * i, 64 / csubx ); } } -void vfgs_set_scale_lut( int c, uint8_t lut[] ) +void FilmGrainImpl::set_scale_lut( int c, uint8_t lut[] ) { - assert( c >= 0 && c < 3 ); + CHECK( c < 0 || c >= 3, "scale lut idx out of bounds" ); memcpy( sLUT[c], lut, 256 ); } -void vfgs_set_pattern_lut( int c, uint8_t lut[] ) +void FilmGrainImpl::set_pattern_lut( int c, uint8_t lut[] ) { - assert( c >= 0 && c < 3 ); + CHECK( c < 0 || c >= 3, "pattern lut idx out of bounds" ); memcpy( pLUT[c], lut, 256 ); } -void vfgs_set_seed( uint32_t seed ) +void FilmGrainImpl::set_seed( uint32_t seed ) { - // Note: shift left the seed as the LFSR loops on the 31 MSBs, so - // the LFSR register LSB has no effect on random sequence initialization - rnd = rnd_up = line_rnd = line_rnd_up = ( seed << 1 ); + rnd = rnd_up = line_rnd = line_rnd_up = seed; } -void vfgs_set_scale_shift( int shift ) +void FilmGrainImpl::set_scale_shift( int shift ) { - assert( shift >= 2 && shift < 8 ); + CHECK( shift < 2 || shift >= 8, "scale shift out of range" ); scale_shift = shift + 6 - bs; } -void vfgs_set_depth( int depth ) +void FilmGrainImpl::set_depth( int depth ) { - assert( depth == 8 || depth == 10 ); + CHECK( depth != 8 && depth != 10, "only bit depth 8 and 10 supported." ) if( bs == 0 && depth > 8 ) { @@ -411,10 +390,12 @@ void vfgs_set_depth( int depth ) bs = depth - 8; } -void vfgs_set_chroma_subsampling( int subx, int suby ) +void FilmGrainImpl::set_chroma_subsampling( int subx, int suby ) { - assert( subx == 1 || subx == 2 ); - assert( suby == 1 || suby == 2 ); + CHECK( subx != 1 && subx != 2, "chroma subsampling should be 1 or 2" ); + CHECK( suby != 1 && suby != 2, "chroma subsampling should be 1 or 2" ); csubx = subx; csuby = suby; } + +} // namespace vvdec diff --git a/source/Lib/vvdec/vfgs_hw.h b/source/Lib/FilmGrain/FilmGrainImpl.h similarity index 61% rename from source/Lib/vvdec/vfgs_hw.h rename to source/Lib/FilmGrain/FilmGrainImpl.h index d4df7576..0fbb85d9 100644 --- a/source/Lib/vvdec/vfgs_hw.h +++ b/source/Lib/FilmGrain/FilmGrainImpl.h @@ -54,23 +54,59 @@ POSSIBILITY OF SUCH DAMAGE. * message). */ -#ifndef _VFGS_HW_H_ -#define _VFGS_HW_H_ +#pragma once -#include +#include #define VFGS_MAX_PATTERNS 8 -void vfgs_set_luma_pattern( int index, int8_t* P ); -void vfgs_set_chroma_pattern( int index, int8_t* P ); -void vfgs_set_scale_lut( int c, uint8_t lut[] ); -void vfgs_set_pattern_lut( int c, uint8_t lut[] ); - -void vfgs_set_seed( uint32_t seed ); -void vfgs_set_scale_shift( int shift ); -void vfgs_set_depth( int depth ); -void vfgs_set_chroma_subsampling( int subx, int suby ); - -void vfgs_add_grain_line( void* Y, void* U, void* V, int y, int width ); - -#endif // _VFGS_HW_H_ +namespace vvdec +{ + +class FilmGrainImpl +{ + // Note: declarations optimized for code readability; e.g. pattern storage in + // actual hardware implementation would differ significantly + int8_t pattern[2][VFGS_MAX_PATTERNS + 1][64][64] = { 0, }; // +1 to simplify interpolation code + uint8_t sLUT[3][256] = { 0, }; + uint8_t pLUT[3][256] = { 0, }; + + uint32_t rnd = 0xdeadbeef; + uint32_t rnd_up = 0xdeadbeef; + uint32_t line_rnd = 0xdeadbeef; + uint32_t line_rnd_up = 0xdeadbeef; + uint8_t scale_shift = 5 + 6; + uint8_t bs = 0; // bitshift = bitdepth - 8 + int csubx = 2; + int csuby = 2; + + constexpr static uint8_t Y_min = 0; + constexpr static uint8_t Y_max = 255; + constexpr static uint8_t C_min = 0; + constexpr static uint8_t C_max = 255; + + // Processing pipeline (needs only 2 registers for each color actually, for horizontal deblocking) + int16_t grain[3][32]; // 9 bit needed because of overlap (has norm > 1) + uint8_t scale[3][32]; + + void get_offset_u( uint32_t val, int* s, uint8_t* x, uint8_t* y ); + void get_offset_v( uint32_t val, int* s, uint8_t* x, uint8_t* y ); + void add_grain_block( void* I, int c, int x, int y, int width ); + +protected: + void set_luma_pattern( int index, int8_t* P ); + void set_chroma_pattern( int index, int8_t* P ); + void set_scale_lut( int c, uint8_t lut[] ); + void set_pattern_lut( int c, uint8_t lut[] ); + + void set_seed( uint32_t seed ); + void set_scale_shift( int shift ); + +public: + void set_depth( int depth ); + void set_chroma_subsampling( int subx, int suby ); + + void add_grain_line( void* Y, void* U, void* V, int y, int width ); +}; + +} // namespace vvdec diff --git a/source/Lib/vvdec/CMakeLists.txt b/source/Lib/vvdec/CMakeLists.txt index 88d052ff..d33af357 100644 --- a/source/Lib/vvdec/CMakeLists.txt +++ b/source/Lib/vvdec/CMakeLists.txt @@ -46,8 +46,8 @@ file( GLOB MD5_SRC_FILES "../libmd5/*.cpp" ) file( GLOB MD5_INC_FILES "../libmd5/*.h" ) if( VVDEC_ENABLE_FILM_GRAIN ) - file( GLOB FGS_SRC_FILES "vfgs_*.c" ) - file( GLOB FGS_INC_FILES "vfgs_*.h" ) + file( GLOB FGS_SRC_FILES "../FilmGrain/*.cpp" ) + file( GLOB FGS_INC_FILES "../FilmGrain/*.h" ) set_property( SOURCE vvdec.cpp vvdecimpl.cpp APPEND PROPERTY COMPILE_DEFINITIONS ENABLE_FILM_GRAIN ) endif() @@ -94,10 +94,12 @@ if( VVDEC_ENABLE_X86_SIMD ) #set_property( SOURCE ${X86_SSE42_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE42 ) #set_property( SOURCE ${X86_AVX_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX ) set_property( SOURCE ${X86_AVX2_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX2 ) + set_property( SOURCE ${FGS_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX2 ) # set needed compile flags if( MSVC ) #set_property( SOURCE ${X86_AVX_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX" ) set_property( SOURCE ${X86_AVX2_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) + set_property( SOURCE ${FGS_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) elseif( UNIX OR MINGW ) include( vvdecCompilerSupport ) @@ -114,10 +116,11 @@ if( VVDEC_ENABLE_X86_SIMD ) #set_property( SOURCE ${X86_SSE42_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "${FLAG_msse42}" ) #set_property( SOURCE ${X86_AVX_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "${FLAG_mavx}" ) set_property( SOURCE ${X86_AVX2_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "${FLAG_mavx2}" ) + set_property( SOURCE ${FGS_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "${FLAG_mavx2}" ) endif() #add_library( ${LIB_NAME}_x86_simd OBJECT ${X86_SSE41_SRC_FILES} ${X86_SSE42_SRC_FILES} ${X86_AVX_SRC_FILES} ${X86_AVX2_SRC_FILES} ) - add_library( ${LIB_NAME}_x86_simd OBJECT ${X86_SSE41_SRC_FILES} ${X86_AVX2_SRC_FILES} ) + add_library( ${LIB_NAME}_x86_simd OBJECT ${X86_SSE41_SRC_FILES} ${X86_AVX2_SRC_FILES} ${X86_AVX2_C_FILES} ) target_link_libraries( ${LIB_NAME}_x86_simd ${INTEL_ITT_LINK_TARGET} ) # disble LTO for the files compiled with special architecture flags diff --git a/source/Lib/vvdec/vvdecimpl.cpp b/source/Lib/vvdec/vvdecimpl.cpp index f3b6e1b4..51a76f77 100644 --- a/source/Lib/vvdec/vvdecimpl.cpp +++ b/source/Lib/vvdec/vvdecimpl.cpp @@ -55,16 +55,15 @@ POSSIBILITY OF SUCH DAMAGE. #if ENABLE_FILM_GRAIN # include "vvdec/sei.h" -extern "C" -{ -# include "vfgs_fw.h" -# include "vfgs_hw.h" -} +# include "FilmGrain/FilmGrain.h" #endif // ENABLE_FILM_GRAIN namespace vvdec { +VVDecImpl::VVDecImpl() = default; +VVDecImpl::~VVDecImpl() = default; + int VVDecImpl::init( const vvdecParams& params, vvdecCreateBufferCallback createBufCallback, vvdecUnrefBufferCallback unrefBufCallback ) { if( m_bInitialized ){ return VVDEC_ERR_INITIALIZE; } @@ -108,7 +107,7 @@ int VVDecImpl::init( const vvdecParams& params, vvdecCreateBufferCallback create m_cUserAllocator = UserAllocator(); } - m_cDecLib.reset( new DecLib() ); + m_cDecLib = std::make_unique(); initROM(); @@ -176,6 +175,10 @@ int VVDecImpl::uninit() m_bInitialized = false; m_eState = INTERNAL_STATE_UNINITIALIZED; +#if ENABLE_FILM_GRAIN + m_filmGrainSynth.reset(); +#endif + return VVDEC_OK; } @@ -831,71 +834,75 @@ int VVDecImpl::xUpdateFGC( vvdecSEI* s ) { vvdecSEIFilmGrainCharacteristics* sei = (vvdecSEIFilmGrainCharacteristics*) s->payload; - if( !sei->filmGrainCharacteristicsCancelFlag ) + if( sei->filmGrainCharacteristicsCancelFlag ) + { + m_eFgs = 0; + return VVDEC_OK; + } + + if( !m_filmGrainSynth ) + { + m_filmGrainSynth = std::make_unique( 10, 2 ); + } + + fgs_sei fgs; // TODO: maybe make it a member ? (idea would be to re-seed patterns for each picture) + // Copy SEI message in vfgs structure format + // TODO: check some values and warn about unsupported stuff ? + fgs.model_id = sei->filmGrainModelId; + fgs.log2_scale_factor = sei->log2ScaleFactor; + for( int c = 0; c < 3; c++ ) { - fgs_sei fgs; // TODO: maybe make it a member ? (idea would be to re-seed patterns for each picture) - // Copy SEI message in vfgs structure format - // TODO: check some values and warn about unsupported stuff ? - fgs.model_id = sei->filmGrainModelId; - fgs.log2_scale_factor = sei->log2ScaleFactor; - for( int c = 0; c < 3; c++ ) + vvdecCompModel& cm = sei->compModel[c]; + if( cm.presentFlag ) { - vvdecCompModel& cm = sei->compModel[c]; - if( cm.presentFlag ) + fgs.comp_model_present_flag[c] = 1; + fgs.num_intensity_intervals[c] = cm.numIntensityIntervals; + fgs.num_model_values[c] = cm.numModelValues; + for( int i = 0; i < fgs.num_intensity_intervals[c]; i++ ) { - fgs.comp_model_present_flag[c] = 1; - fgs.num_intensity_intervals[c] = cm.numIntensityIntervals; - fgs.num_model_values[c] = cm.numModelValues; - for( int i = 0; i < fgs.num_intensity_intervals[c]; i++ ) + vvdecCompModelIntensityValues& cmiv = cm.intensityValues[i]; + fgs.intensity_interval_lower_bound[c][i] = cmiv.intensityIntervalLowerBound; + fgs.intensity_interval_upper_bound[c][i] = cmiv.intensityIntervalUpperBound; + for( int v = 0; v < fgs.num_model_values[c]; v++ ) { - vvdecCompModelIntensityValues& cmiv = cm.intensityValues[i]; - fgs.intensity_interval_lower_bound[c][i] = cmiv.intensityIntervalLowerBound; - fgs.intensity_interval_upper_bound[c][i] = cmiv.intensityIntervalUpperBound; - for( int v = 0; v < fgs.num_model_values[c]; v++ ) - { - fgs.comp_model_value[c][i][v] = cmiv.compModelValue[v]; - } - // Fill with default model values (VFGS needs them; it actually ignores num_model_values) - if( fgs.num_model_values[c] < 2 ) { fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; } // H high cutoff / 1st AR coef (left & top) - if( fgs.num_model_values[c] < 3 ) { fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; } // V high cutoff / x-comp corr - if( fgs.num_model_values[c] < 4 ) { fgs.comp_model_value[c][i][3] = 0; } // H low cutoff / 2nd AR coef (top-left, top-right) - if( fgs.num_model_values[c] < 5 ) { fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; } // V low cutoff / aspect ratio - if( fgs.num_model_values[c] < 5 ) { fgs.comp_model_value[c][i][5] = 0; } // x-comp corr / 3rd AR coef (left-left, top-top) + fgs.comp_model_value[c][i][v] = cmiv.compModelValue[v]; } + // Fill with default model values (VFGS needs them; it actually ignores num_model_values) + if( fgs.num_model_values[c] < 2 ) { fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; } // H high cutoff / 1st AR coef (left & top) + if( fgs.num_model_values[c] < 3 ) { fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; } // V high cutoff / x-comp corr + if( fgs.num_model_values[c] < 4 ) { fgs.comp_model_value[c][i][3] = 0; } // H low cutoff / 2nd AR coef (top-left, top-right) + if( fgs.num_model_values[c] < 5 ) { fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; } // V low cutoff / aspect ratio + if( fgs.num_model_values[c] < 5 ) { fgs.comp_model_value[c][i][5] = 0; } // x-comp corr / 3rd AR coef (left-left, top-top) } } + } - vfgs_set_depth( 10 ); - vfgs_set_chroma_subsampling( 2, 2 ); - // Conversion of component model values for 4:2:0 chroma format - if( fgs.model_id == 0 ) + // Conversion of component model values for 4:2:0 chroma format + if( fgs.model_id == 0 ) + { + for( int c = 1; c < 3; c++ ) { - for( int c = 1; c < 3; c++ ) + if( fgs.comp_model_present_flag[c] ) { - if( fgs.comp_model_present_flag[c] ) + for( int k = 0; k < fgs.num_intensity_intervals[c]; k++ ) { - for( int k = 0; k < fgs.num_intensity_intervals[c]; k++ ) - { - fgs.comp_model_value[c][k][1] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][1] << 1 ) ); // Horizontal frequency - fgs.comp_model_value[c][k][2] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][2] << 1 ) ); // Vertical frequency - fgs.comp_model_value[c][k][0] >>= 1; - } + fgs.comp_model_value[c][k][1] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][1] << 1 ) ); // Horizontal frequency + fgs.comp_model_value[c][k][2] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][2] << 1 ) ); // Vertical frequency + fgs.comp_model_value[c][k][0] >>= 1; } } } - vfgs_init_sei( &fgs ); + } + m_filmGrainSynth->set_depth( 10 ); + m_filmGrainSynth->set_chroma_subsampling( 2, 2 ); + m_filmGrainSynth->init_sei( &fgs ); - // if (!m_bFgs) - // // TODO: get something random - // // TODO: make seed also impact the pattern gen - // vfgs_set_seed(uint32_t seed); + // if (!m_bFgs) + // // TODO: get something random + // // TODO: make seed also impact the pattern gen + // vfgs_set_seed(uint32_t seed); - m_eFgs = sei->filmGrainCharacteristicsPersistenceFlag ? 2 : 1; - } - else - { - m_eFgs = 0; - } + m_eFgs = sei->filmGrainCharacteristicsPersistenceFlag ? 2 : 1; return VVDEC_OK; } @@ -912,7 +919,7 @@ int VVDecImpl::xAddGrain( vvdecFrame* frame ) for( int y = 0; y < frame->planes[0].height; y++ ) { - vfgs_add_grain_line( Y, U, V, y, frame->planes[0].width ); + m_filmGrainSynth->add_grain_line( Y, U, V, y, frame->planes[0].width ); Y += frame->planes[0].stride; if( ( y & 1 ) || ( frame->planes[0].height == frame->planes[1].height ) ) { @@ -969,12 +976,12 @@ int VVDecImpl::xAddPicture( Picture* pcPic ) #if ENABLE_FILM_GRAIN // find FGC SEI - for( auto& s : pcPic->seiMessageList ) + for( auto& sei: pcPic->seiMessageList ) { - if( s->payloadType == VVDEC_FILM_GRAIN_CHARACTERISTICS ) + if( sei->payloadType == VVDEC_FILM_GRAIN_CHARACTERISTICS ) { - xUpdateFGC( s ); - msg( INFO, "vvdecimpl [detail]: SEI FILM_GRAIN_CHARACTERISTICS\n"); + xUpdateFGC( sei ); + msg( DETAILS, "vvdecimpl [detail]: SEI FILM_GRAIN_CHARACTERISTICS\n"); } } bCreateStorage = bCreateStorage || m_eFgs; diff --git a/source/Lib/vvdec/vvdecimpl.h b/source/Lib/vvdec/vvdecimpl.h index cc2ff401..d0bd5390 100644 --- a/source/Lib/vvdec/vvdecimpl.h +++ b/source/Lib/vvdec/vvdecimpl.h @@ -47,6 +47,7 @@ POSSIBILITY OF SUCH DAMAGE. namespace vvdec { +class FilmGrain; static const char * const vvdecNalTypeNames[] = { "NAL_UNIT_CODED_SLICE_TRAIL", "NAL_UNIT_CODED_SLICE_STSA", "NAL_UNIT_CODED_SLICE_RADL", "NAL_UNIT_CODED_SLICE_RASL", "NAL_UNIT_RESERVED_VCL_4", "NAL_UNIT_RESERVED_VCL_5", "NAL_UNIT_RESERVED_VCL_6", @@ -98,11 +99,8 @@ class VVDecImpl public: - /// Constructor - VVDecImpl() = default; - - /// Destructor - ~VVDecImpl() = default; + VVDecImpl(); + ~VVDecImpl(); class FrameStorage { @@ -220,6 +218,7 @@ class VVDecImpl uint64_t m_uiSeqNumOutput = 0; #if ENABLE_FILM_GRAIN int m_eFgs = 0; + std::unique_ptr m_filmGrainSynth; #endif // ENABLE_FILM_GRAIN }; From 279c3de6ecf7cd528943824612e61a111cd507ce Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Thu, 23 May 2024 16:58:36 +0200 Subject: [PATCH 5/8] small cleanup --- source/Lib/vvdec/vvdecimpl.cpp | 71 ++++++++++++++++++---------------- source/Lib/vvdec/vvdecimpl.h | 11 ++++-- 2 files changed, 46 insertions(+), 36 deletions(-) diff --git a/source/Lib/vvdec/vvdecimpl.cpp b/source/Lib/vvdec/vvdecimpl.cpp index 51a76f77..60e4d88b 100644 --- a/source/Lib/vvdec/vvdecimpl.cpp +++ b/source/Lib/vvdec/vvdecimpl.cpp @@ -232,7 +232,7 @@ int VVDecImpl::reset() #endif #if ENABLE_FILM_GRAIN - m_eFgs = 0; + m_filmGrainCharacteristicsState = FgcNone; #endif // ENABLE_FILM_GRAIN m_uiSeqNumber = 0; m_uiSeqNumOutput = 0; @@ -830,19 +830,19 @@ int VVDecImpl::copyComp( const unsigned char* pucSrc, unsigned char* pucDest, un } #if ENABLE_FILM_GRAIN -int VVDecImpl::xUpdateFGC( vvdecSEI* s ) +void VVDecImpl::xUpdateFGC( vvdecSEI* s ) { vvdecSEIFilmGrainCharacteristics* sei = (vvdecSEIFilmGrainCharacteristics*) s->payload; if( sei->filmGrainCharacteristicsCancelFlag ) { - m_eFgs = 0; - return VVDEC_OK; + m_filmGrainCharacteristicsState = FgcNone; + return; } if( !m_filmGrainSynth ) { - m_filmGrainSynth = std::make_unique( 10, 2 ); + m_filmGrainSynth = std::make_unique( 10, 2 ); // TODO: (GH) set correct bit depth and color format, and apply changes } fgs_sei fgs; // TODO: maybe make it a member ? (idea would be to re-seed patterns for each picture) @@ -868,11 +868,17 @@ int VVDecImpl::xUpdateFGC( vvdecSEI* s ) fgs.comp_model_value[c][i][v] = cmiv.compModelValue[v]; } // Fill with default model values (VFGS needs them; it actually ignores num_model_values) - if( fgs.num_model_values[c] < 2 ) { fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; } // H high cutoff / 1st AR coef (left & top) - if( fgs.num_model_values[c] < 3 ) { fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; } // V high cutoff / x-comp corr - if( fgs.num_model_values[c] < 4 ) { fgs.comp_model_value[c][i][3] = 0; } // H low cutoff / 2nd AR coef (top-left, top-right) - if( fgs.num_model_values[c] < 5 ) { fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; } // V low cutoff / aspect ratio - if( fgs.num_model_values[c] < 5 ) { fgs.comp_model_value[c][i][5] = 0; } // x-comp corr / 3rd AR coef (left-left, top-top) + switch( fgs.num_model_values[c] ) + { + // clang-format off + case 0: + case 1: fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; // H high cutoff / 1st AR coef (left & top) + case 2: fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; // V high cutoff / x-comp corr + case 3: fgs.comp_model_value[c][i][3] = 0; // H low cutoff / 2nd AR coef (top-left, top-right) + case 4: fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; // V low cutoff / aspect ratio + fgs.comp_model_value[c][i][5] = 0; // x-comp corr / 3rd AR coef (left-left, top-top) + // clang-format on + } } } } @@ -902,38 +908,37 @@ int VVDecImpl::xUpdateFGC( vvdecSEI* s ) // // TODO: make seed also impact the pattern gen // vfgs_set_seed(uint32_t seed); - m_eFgs = sei->filmGrainCharacteristicsPersistenceFlag ? 2 : 1; - - return VVDEC_OK; + m_filmGrainCharacteristicsState = sei->filmGrainCharacteristicsPersistenceFlag ? FgcPersist : FgcDontPersist; } -int VVDecImpl::xAddGrain( vvdecFrame* frame ) +void VVDecImpl::xAddGrain( vvdecFrame* frame ) { - if( m_eFgs ) + if( m_filmGrainCharacteristicsState == FgcNone ) { - uint8_t* Y = (uint8_t*) frame->planes[0].ptr; - uint8_t* U = (uint8_t*) frame->planes[1].ptr; - uint8_t* V = (uint8_t*) frame->planes[2].ptr; + return; + } - CHECK( frame->bitDepth != 10, "Bitdepth is not 10" ); + uint8_t* Y = (uint8_t*) frame->planes[0].ptr; + uint8_t* U = (uint8_t*) frame->planes[1].ptr; + uint8_t* V = (uint8_t*) frame->planes[2].ptr; - for( int y = 0; y < frame->planes[0].height; y++ ) - { - m_filmGrainSynth->add_grain_line( Y, U, V, y, frame->planes[0].width ); - Y += frame->planes[0].stride; - if( ( y & 1 ) || ( frame->planes[0].height == frame->planes[1].height ) ) - { - U += frame->planes[1].stride; - V += frame->planes[1].stride; - } - } + CHECK( frame->bitDepth != 10, "Bitdepth is not 10" ); - if( m_eFgs < 2 ) // Not persistent + for( int y = 0; y < frame->planes[0].height; y++ ) + { + m_filmGrainSynth->add_grain_line( Y, U, V, y, frame->planes[0].width ); + Y += frame->planes[0].stride; + if( ( y & 1 ) || ( frame->planes[0].height == frame->planes[1].height ) ) { - m_eFgs = 0; + U += frame->planes[1].stride; + V += frame->planes[2].stride; } } - return VVDEC_OK; + + if( m_filmGrainCharacteristicsState != FgcPersist ) // Not persistent + { + m_filmGrainCharacteristicsState = FgcNone; + } } #endif // ENABLE_FILM_GRAIN @@ -984,7 +989,7 @@ int VVDecImpl::xAddPicture( Picture* pcPic ) msg( DETAILS, "vvdecimpl [detail]: SEI FILM_GRAIN_CHARACTERISTICS\n"); } } - bCreateStorage = bCreateStorage || m_eFgs; + bCreateStorage = bCreateStorage || m_filmGrainCharacteristicsState; #endif // ENABLE_FILM_GRAIN // create a brand new picture object diff --git a/source/Lib/vvdec/vvdecimpl.h b/source/Lib/vvdec/vvdecimpl.h index d0bd5390..5399ef89 100644 --- a/source/Lib/vvdec/vvdecimpl.h +++ b/source/Lib/vvdec/vvdecimpl.h @@ -176,8 +176,8 @@ class VVDecImpl int xAddPicture ( Picture* pcPic ); int xCreateFrame ( vvdecFrame& frame, const CPelUnitBuf& rcPicBuf, uint32_t uiWidth, uint32_t uiHeight, const BitDepths& rcBitDepths, bool bCreateStorage ); - int xUpdateFGC ( vvdecSEI *sei ); - int xAddGrain ( vvdecFrame *frame ); + void xUpdateFGC ( vvdecSEI *sei ); + void xAddGrain ( vvdecFrame *frame ); static int xRetrieveNalStartCode ( unsigned char *pB, int iZerosInStartcode ); static int xConvertPayloadToRBSP ( const uint8_t* payload, size_t payloadLen, InputBitstream* bitstream, bool isVclNalUnit ); @@ -217,7 +217,12 @@ class VVDecImpl uint64_t m_uiSeqNumber = 0; uint64_t m_uiSeqNumOutput = 0; #if ENABLE_FILM_GRAIN - int m_eFgs = 0; + enum + { + FgcNone = 0, + FgcDontPersist = 1, + FgcPersist = 2 + } m_filmGrainCharacteristicsState = FgcNone; std::unique_ptr m_filmGrainSynth; #endif // ENABLE_FILM_GRAIN }; From a61e8ffb694bc5d8798700b9847f3b3701919435 Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Fri, 24 May 2024 10:08:07 +0200 Subject: [PATCH 6/8] move updateFGC() to FilmGrain class --- source/Lib/FilmGrain/FilmGrain.cpp | 65 ++++++++++++++++++++++++++++++ source/Lib/FilmGrain/FilmGrain.h | 8 ++++ source/Lib/vvdec/vvdecimpl.cpp | 64 +---------------------------- 3 files changed, 74 insertions(+), 63 deletions(-) diff --git a/source/Lib/FilmGrain/FilmGrain.cpp b/source/Lib/FilmGrain/FilmGrain.cpp index 28c6e0e1..b644204b 100644 --- a/source/Lib/FilmGrain/FilmGrain.cpp +++ b/source/Lib/FilmGrain/FilmGrain.cpp @@ -720,4 +720,69 @@ void FilmGrain::init_sei( fgs_sei* cfg ) set_scale_shift( cfg->log2_scale_factor - ( cfg->model_id ? 1 : 0 ) ); // -1 for grain shift in pattern generation (see above) } +void FilmGrain::updateFGC( vvdecSEIFilmGrainCharacteristics* fgc ) +{ + fgs_sei fgs; // TODO: maybe make it a member ? (idea would be to re-seed patterns for each picture) + // Copy SEI message in vfgs structure format + // TODO: check some values and warn about unsupported stuff ? + fgs.model_id = fgc->filmGrainModelId; + fgs.log2_scale_factor = fgc->log2ScaleFactor; + for( int c = 0; c < 3; c++ ) + { + vvdecCompModel& cm = fgc->compModel[c]; + if( cm.presentFlag ) + { + fgs.comp_model_present_flag[c] = 1; + fgs.num_intensity_intervals[c] = cm.numIntensityIntervals; + fgs.num_model_values[c] = cm.numModelValues; + for( int i = 0; i < fgs.num_intensity_intervals[c]; i++ ) + { + vvdecCompModelIntensityValues& cmiv = cm.intensityValues[i]; + fgs.intensity_interval_lower_bound[c][i] = cmiv.intensityIntervalLowerBound; + fgs.intensity_interval_upper_bound[c][i] = cmiv.intensityIntervalUpperBound; + for( int v = 0; v < fgs.num_model_values[c]; v++ ) + { + fgs.comp_model_value[c][i][v] = cmiv.compModelValue[v]; + } + // Fill with default model values (VFGS needs them; it actually ignores num_model_values) + switch( fgs.num_model_values[c] ) + { + // clang-format off + case 0: + case 1: fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; // H high cutoff / 1st AR coef (left & top) + case 2: fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; // V high cutoff / x-comp corr + case 3: fgs.comp_model_value[c][i][3] = 0; // H low cutoff / 2nd AR coef (top-left, top-right) + case 4: fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; // V low cutoff / aspect ratio + fgs.comp_model_value[c][i][5] = 0; // x-comp corr / 3rd AR coef (left-left, top-top) + // clang-format on + } + } + } + } + + // Conversion of component model values for 4:2:0 chroma format + if( fgs.model_id == 0 ) + { + for( int c = 1; c < 3; c++ ) + { + if( fgs.comp_model_present_flag[c] ) + { + for( int k = 0; k < fgs.num_intensity_intervals[c]; k++ ) + { + fgs.comp_model_value[c][k][1] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][1] << 1 ) ); // Horizontal frequency + fgs.comp_model_value[c][k][2] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][2] << 1 ) ); // Vertical frequency + fgs.comp_model_value[c][k][0] >>= 1; + } + } + } + } + + init_sei( &fgs ); + + // if (!m_bFgs) + // // TODO: get something random + // // TODO: make seed also impact the pattern gen + // vfgs_set_seed(uint32_t seed); +} + } // namespace vvdec diff --git a/source/Lib/FilmGrain/FilmGrain.h b/source/Lib/FilmGrain/FilmGrain.h index 45b892dd..73303d92 100644 --- a/source/Lib/FilmGrain/FilmGrain.h +++ b/source/Lib/FilmGrain/FilmGrain.h @@ -58,6 +58,10 @@ POSSIBILITY OF SUCH DAMAGE. #include "FilmGrainImpl.h" +#include + +#include "vvdec/sei.h" + namespace vvdec { @@ -65,6 +69,8 @@ namespace vvdec struct fgs_sei { + fgs_sei() { memset( this, 0, sizeof( *this ) ); } + uint8_t model_id; uint8_t log2_scale_factor; uint8_t comp_model_present_flag[3]; @@ -83,7 +89,9 @@ class FilmGrain : public FilmGrainImpl set_depth( depth ); set_chroma_subsampling( chromaSubsampling, chromaSubsampling ); } + void updateFGC( vvdecSEIFilmGrainCharacteristics* fgc ); +private: void init_sei( fgs_sei* cfg ); }; diff --git a/source/Lib/vvdec/vvdecimpl.cpp b/source/Lib/vvdec/vvdecimpl.cpp index 60e4d88b..89423a5a 100644 --- a/source/Lib/vvdec/vvdecimpl.cpp +++ b/source/Lib/vvdec/vvdecimpl.cpp @@ -845,69 +845,7 @@ void VVDecImpl::xUpdateFGC( vvdecSEI* s ) m_filmGrainSynth = std::make_unique( 10, 2 ); // TODO: (GH) set correct bit depth and color format, and apply changes } - fgs_sei fgs; // TODO: maybe make it a member ? (idea would be to re-seed patterns for each picture) - // Copy SEI message in vfgs structure format - // TODO: check some values and warn about unsupported stuff ? - fgs.model_id = sei->filmGrainModelId; - fgs.log2_scale_factor = sei->log2ScaleFactor; - for( int c = 0; c < 3; c++ ) - { - vvdecCompModel& cm = sei->compModel[c]; - if( cm.presentFlag ) - { - fgs.comp_model_present_flag[c] = 1; - fgs.num_intensity_intervals[c] = cm.numIntensityIntervals; - fgs.num_model_values[c] = cm.numModelValues; - for( int i = 0; i < fgs.num_intensity_intervals[c]; i++ ) - { - vvdecCompModelIntensityValues& cmiv = cm.intensityValues[i]; - fgs.intensity_interval_lower_bound[c][i] = cmiv.intensityIntervalLowerBound; - fgs.intensity_interval_upper_bound[c][i] = cmiv.intensityIntervalUpperBound; - for( int v = 0; v < fgs.num_model_values[c]; v++ ) - { - fgs.comp_model_value[c][i][v] = cmiv.compModelValue[v]; - } - // Fill with default model values (VFGS needs them; it actually ignores num_model_values) - switch( fgs.num_model_values[c] ) - { - // clang-format off - case 0: - case 1: fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; // H high cutoff / 1st AR coef (left & top) - case 2: fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; // V high cutoff / x-comp corr - case 3: fgs.comp_model_value[c][i][3] = 0; // H low cutoff / 2nd AR coef (top-left, top-right) - case 4: fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; // V low cutoff / aspect ratio - fgs.comp_model_value[c][i][5] = 0; // x-comp corr / 3rd AR coef (left-left, top-top) - // clang-format on - } - } - } - } - - // Conversion of component model values for 4:2:0 chroma format - if( fgs.model_id == 0 ) - { - for( int c = 1; c < 3; c++ ) - { - if( fgs.comp_model_present_flag[c] ) - { - for( int k = 0; k < fgs.num_intensity_intervals[c]; k++ ) - { - fgs.comp_model_value[c][k][1] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][1] << 1 ) ); // Horizontal frequency - fgs.comp_model_value[c][k][2] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][2] << 1 ) ); // Vertical frequency - fgs.comp_model_value[c][k][0] >>= 1; - } - } - } - } - m_filmGrainSynth->set_depth( 10 ); - m_filmGrainSynth->set_chroma_subsampling( 2, 2 ); - m_filmGrainSynth->init_sei( &fgs ); - - // if (!m_bFgs) - // // TODO: get something random - // // TODO: make seed also impact the pattern gen - // vfgs_set_seed(uint32_t seed); - + m_filmGrainSynth->updateFGC( sei ); m_filmGrainCharacteristicsState = sei->filmGrainCharacteristicsPersistenceFlag ? FgcPersist : FgcDontPersist; } From c0bce238a0abd2f9bb74f1b2afd9dd5170d6eee7 Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Mon, 3 Jun 2024 15:52:00 +0200 Subject: [PATCH 7/8] move duplicate implementations of prng() and round() --- source/Lib/FilmGrain/FilmGrain.cpp | 19 ------------------- source/Lib/FilmGrain/FilmGrainImpl.cpp | 17 ----------------- source/Lib/FilmGrain/FilmGrainImpl.h | 22 ++++++++++++++++++++++ 3 files changed, 22 insertions(+), 36 deletions(-) diff --git a/source/Lib/FilmGrain/FilmGrain.cpp b/source/Lib/FilmGrain/FilmGrain.cpp index b644204b..dacc3ff4 100644 --- a/source/Lib/FilmGrain/FilmGrain.cpp +++ b/source/Lib/FilmGrain/FilmGrain.cpp @@ -65,12 +65,6 @@ POSSIBILITY OF SUCH DAMAGE. namespace vvdec { -template -constexpr inline auto round( T a, uint8_t s ) -{ - return ( a + ( 1 << ( s - 1 ) ) ) >> s; -} - // clang-format off static constexpr int8_t Gaussian_LUT[2048] = { -11, 12, 103, -11, 42, -35, 12, 59, 77, 98, -87, 3, 65, -78, 45, 56, @@ -310,19 +304,6 @@ static constexpr int8_t DCT2_64[64][64] = \ DEFINE_DCT2_P64_MATRIX(64, 83, 36, 89, 75, 50, 18, 90, 87, 80, 70, 57, 43, 25, 9, 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4, 91, 90, 90, 90, 88, 87, 86, 84, 83, 81, 79, 77, 73, 71, 69, 65, 62, 59, 56, 52, 48, 44, 41, 37, 33, 28, 24, 20, 15, 11, 7, 2); // clang-format on -/** Pseudo-random number generator (32-bit) */ -static inline uint32_t prng( uint32_t x ) -{ -#if 1 // same as HW (bit-reversed RDD-5) - uint32_t s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; - x = s | ( x >> 1 ); -#else // RDD-5 - uint32_t s = ( ( x >> 30 ) ^ ( x >> 2 ) ) & 1; - x = ( x << 1 ) | s; -#endif - return x; -} - /** Apply iDCT2 to block B[64][64] + clipping */ static void idct2_64( int8_t B[][64] ) { diff --git a/source/Lib/FilmGrain/FilmGrainImpl.cpp b/source/Lib/FilmGrain/FilmGrainImpl.cpp index b198334c..b2d8a616 100644 --- a/source/Lib/FilmGrain/FilmGrainImpl.cpp +++ b/source/Lib/FilmGrain/FilmGrainImpl.cpp @@ -66,23 +66,6 @@ POSSIBILITY OF SUCH DAMAGE. namespace vvdec { -template -constexpr inline auto round( T a, uint8_t s ) -{ - return ( a + ( 1 << ( s - 1 ) ) ) >> s; -} - -/** Pseudo-random number generator - * Note: loops on the 31 MSBs, so seed should be MSB-aligned in the register - * (the register LSB has basically no effect since it is never fed back) - */ -static inline uint32_t prng( uint32_t x ) -{ - uint32_t s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; - x = s | ( x >> 1 ); - return x; -} - /** Derive Y x/y offsets from (random) number * * Bit fields are designed to minimize overlaps across color channels, to diff --git a/source/Lib/FilmGrain/FilmGrainImpl.h b/source/Lib/FilmGrain/FilmGrainImpl.h index 0fbb85d9..24056326 100644 --- a/source/Lib/FilmGrain/FilmGrainImpl.h +++ b/source/Lib/FilmGrain/FilmGrainImpl.h @@ -63,6 +63,28 @@ POSSIBILITY OF SUCH DAMAGE. namespace vvdec { +/** Pseudo-random number generator (32-bit) + * Note: loops on the 31 MSBs, so seed should be MSB-aligned in the register + * (the register LSB has basically no effect since it is never fed back) + */ +static inline uint32_t prng( uint32_t x ) +{ +#if 1 // same as HW (bit-reversed RDD-5) + uint32_t s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; + x = s | ( x >> 1 ); +#else // RDD-5 + uint32_t s = ( ( x >> 30 ) ^ ( x >> 2 ) ) & 1; + x = ( x << 1 ) | s; +#endif + return x; +} + +template +constexpr inline auto round( T a, uint8_t s ) +{ + return ( a + ( 1 << ( s - 1 ) ) ) >> s; +} + class FilmGrainImpl { // Note: declarations optimized for code readability; e.g. pattern storage in From d36438971ed040888c9c92e77c1782d9590ec669 Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Mon, 3 Jun 2024 16:24:56 +0200 Subject: [PATCH 8/8] fix clang build and utf-8 encoding --- source/Lib/FilmGrain/FilmGrainImpl.cpp | 11 ++++++++++- source/Lib/FilmGrain/FilmGrainImpl.h | 8 +++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/source/Lib/FilmGrain/FilmGrainImpl.cpp b/source/Lib/FilmGrain/FilmGrainImpl.cpp index b2d8a616..85858487 100644 --- a/source/Lib/FilmGrain/FilmGrainImpl.cpp +++ b/source/Lib/FilmGrain/FilmGrainImpl.cpp @@ -6,7 +6,7 @@ the Software are granted under this license. The Clear BSD License -Copyright (c) 2018-2024, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors. +Copyright (c) 2018-2024, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, @@ -381,4 +381,13 @@ void FilmGrainImpl::set_chroma_subsampling( int subx, int suby ) csuby = suby; } +FilmGrainImpl::FilmGrainImpl() +{ + memset( pattern, 0, sizeof( pattern ) ); + memset( sLUT, 0, sizeof( sLUT ) ); + memset( pLUT, 0, sizeof( pLUT ) ); + memset( grain, 0, sizeof( grain ) ); + memset( scale, 0, sizeof( scale ) ); +} + } // namespace vvdec diff --git a/source/Lib/FilmGrain/FilmGrainImpl.h b/source/Lib/FilmGrain/FilmGrainImpl.h index 24056326..41150271 100644 --- a/source/Lib/FilmGrain/FilmGrainImpl.h +++ b/source/Lib/FilmGrain/FilmGrainImpl.h @@ -89,9 +89,9 @@ class FilmGrainImpl { // Note: declarations optimized for code readability; e.g. pattern storage in // actual hardware implementation would differ significantly - int8_t pattern[2][VFGS_MAX_PATTERNS + 1][64][64] = { 0, }; // +1 to simplify interpolation code - uint8_t sLUT[3][256] = { 0, }; - uint8_t pLUT[3][256] = { 0, }; + int8_t pattern[2][VFGS_MAX_PATTERNS + 1][64][64]; // +1 to simplify interpolation code + uint8_t sLUT[3][256]; + uint8_t pLUT[3][256]; uint32_t rnd = 0xdeadbeef; uint32_t rnd_up = 0xdeadbeef; @@ -116,6 +116,8 @@ class FilmGrainImpl void add_grain_block( void* I, int c, int x, int y, int width ); protected: + FilmGrainImpl(); + void set_luma_pattern( int index, int8_t* P ); void set_chroma_pattern( int index, int8_t* P ); void set_scale_lut( int c, uint8_t lut[] );