From 2d18502b389debe4e3ef5c2377ef8d336be62ddb Mon Sep 17 00:00:00 2001 From: MallikarjunKamble <108510690+MallikarjunKamble@users.noreply.github.com> Date: Mon, 22 Apr 2024 22:50:10 +0530 Subject: [PATCH] Multi-threading Implementation for Float and Integer (#15) Co-authored-by: Priyanka-885 Co-authored-by: Mallikarjun Kamble --- Makefile | 4 - .../funque/arm64/integer_funque_motion_neon.c | 55 ++++ .../funque/arm64/integer_funque_motion_neon.h | 21 +- .../funque/arm64/integer_funque_strred_neon.c | 79 +++-- .../funque/arm64/integer_funque_strred_neon.h | 20 +- .../feature/third_party/funque/float_funque.c | 285 ++++++++++++++-- .../funque/funque_global_options.h | 5 + .../third_party/funque/funque_motion.c | 30 +- .../third_party/funque/funque_motion.h | 6 +- .../third_party/funque/funque_picture_copy.c | 25 +- .../third_party/funque/funque_picture_copy.h | 4 + .../feature/third_party/funque/funque_ssim.h | 2 +- .../third_party/funque/funque_strred.c | 96 +++--- .../third_party/funque/funque_strred.h | 14 +- .../third_party/funque/integer_funque.c | 307 +++++++++++++++--- .../funque/integer_funque_filters.h | 25 +- .../funque/integer_funque_motion.c | 33 +- .../funque/integer_funque_motion.h | 7 +- .../funque/integer_funque_strred.c | 92 +++--- .../funque/integer_funque_strred.h | 41 +-- .../third_party/funque/integer_picture_copy.c | 25 ++ .../third_party/funque/integer_picture_copy.h | 4 + .../src/feature/third_party/funque/resizer.h | 2 + .../funque/x86/integer_funque_filters_avx2.h | 3 +- .../funque/x86/integer_funque_motion_avx2.c | 58 +++- .../funque/x86/integer_funque_motion_avx2.h | 3 +- .../funque/x86/integer_funque_motion_avx512.c | 134 ++++++++ .../funque/x86/integer_funque_motion_avx512.h | 26 ++ .../funque/x86/integer_funque_strred_avx2.c | 79 +++-- .../funque/x86/integer_funque_strred_avx2.h | 20 +- .../funque/x86/integer_funque_strred_avx512.c | 81 +++-- .../funque/x86/integer_funque_strred_avx512.h | 20 +- libvmaf/src/meson.build | 3 + libvmaf/src/output.c | 103 +++++- libvmaf/src/output.h | 2 + model/funque_float.json | 17 +- model/funque_integer.json | 14 +- 37 files changed, 1404 insertions(+), 341 deletions(-) create mode 100644 libvmaf/src/feature/third_party/funque/x86/integer_funque_motion_avx512.c create mode 100644 libvmaf/src/feature/third_party/funque/x86/integer_funque_motion_avx512.h diff --git a/Makefile b/Makefile index 09725115f..402a15cb7 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,3 @@ -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD VENV=.venv .PHONY: all install clean distclean deps diff --git a/libvmaf/src/feature/third_party/funque/arm64/integer_funque_motion_neon.c b/libvmaf/src/feature/third_party/funque/arm64/integer_funque_motion_neon.c index da984c37e..cd85d44c5 100644 --- a/libvmaf/src/feature/third_party/funque/arm64/integer_funque_motion_neon.c +++ b/libvmaf/src/feature/third_party/funque/arm64/integer_funque_motion_neon.c @@ -18,6 +18,8 @@ #include #include +#include "../integer_funque_filters.h" +#include "../integer_funque_motion.h" double integer_funque_image_mad_neon(const dwt2_dtype *img1, const dwt2_dtype *img2, int width, int height, int img1_stride, int img2_stride, float pending_div_factor) { @@ -98,3 +100,56 @@ double integer_funque_image_mad_neon(const dwt2_dtype *img1, const dwt2_dtype *i double d_accum = (double)accum / pending_div_factor; return (d_accum / (width * height)); } + +int integer_compute_motion_funque_neon(const dwt2_dtype *prev, const dwt2_dtype *curr, int w, int h, int prev_stride, int curr_stride, int pending_div_factor_arg, double *score) +{ + float pending_div_factor = (1 << pending_div_factor_arg) * 255; + + if (prev_stride % sizeof(dwt2_dtype) != 0) + { + printf("error: prev_stride %% sizeof(dwt2_dtype) != 0, prev_stride = %d, sizeof(dwt2_dtype) = %zu.\n", prev_stride, sizeof(dwt2_dtype)); + fflush(stdout); + goto fail; + } + if (curr_stride % sizeof(dwt2_dtype) != 0) + { + printf("error: curr_stride %% sizeof(dwt2_dtype) != 0, curr_stride = %d, sizeof(dwt2_dtype) = %zu.\n", curr_stride, sizeof(dwt2_dtype)); + fflush(stdout); + goto fail; + } + // stride for integer_funque_image_mad_c is in terms of (sizeof(dwt2_dtype) bytes) + + *score = integer_funque_image_mad_neon(prev, curr, w, h, prev_stride / sizeof(dwt2_dtype), curr_stride / sizeof(dwt2_dtype), pending_div_factor); + + return 0; + +fail: + return 1; +} + +int integer_compute_mad_funque_neon(const dwt2_dtype *ref, const dwt2_dtype *dis, int w, int h, int ref_stride, int dis_stride, int pending_div_factor_arg, double *score) +{ + + float pending_div_factor = (1 << pending_div_factor_arg) * 255; + + if (ref_stride % sizeof(dwt2_dtype) != 0) + { + printf("error: ref_stride %% sizeof(dwt2_dtype) != 0, ref_stride = %d, sizeof(dwt2_dtype) = %zu.\n", ref_stride, sizeof(dwt2_dtype)); + fflush(stdout); + goto fail; + } + if (dis_stride % sizeof(dwt2_dtype) != 0) + { + printf("error: dis_stride %% sizeof(dwt2_dtype) != 0, dis_stride = %d, sizeof(dwt2_dtype) = %zu.\n", dis_stride, sizeof(dwt2_dtype)); + fflush(stdout); + goto fail; + } + // stride for integer_funque_image_mad_c is in terms of (sizeof(dwt2_dtype) bytes) + + *score = integer_funque_image_mad_neon(ref, dis, w, h, ref_stride / sizeof(dwt2_dtype), dis_stride / sizeof(dwt2_dtype), pending_div_factor); + + return 0; + +fail: + return 1; +} \ No newline at end of file diff --git a/libvmaf/src/feature/third_party/funque/arm64/integer_funque_motion_neon.h b/libvmaf/src/feature/third_party/funque/arm64/integer_funque_motion_neon.h index 16a150511..ac6e75a58 100644 --- a/libvmaf/src/feature/third_party/funque/arm64/integer_funque_motion_neon.h +++ b/libvmaf/src/feature/third_party/funque/arm64/integer_funque_motion_neon.h @@ -1,3 +1,22 @@ +/** + * + * Copyright 2016-2020 Netflix, Inc. + * + * Licensed under the BSD+Patent License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSDplusPatent + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + #include "../integer_funque_filters.h" -double integer_funque_image_mad_neon(const dwt2_dtype *img1, const dwt2_dtype *img2, int width, int height, int img1_stride, int img2_stride, float pending_div_factor); \ No newline at end of file +int integer_compute_motion_funque_neon(const dwt2_dtype *prev, const dwt2_dtype *curr, int w, int h, int prev_stride, int curr_stride, int pending_div_factor_arg, double *score); +int integer_compute_mad_funque_neon(const dwt2_dtype *ref, const dwt2_dtype *dis, int w, int h, int ref_stride, int dis_stride, int pending_div_factor_arg, double *score); diff --git a/libvmaf/src/feature/third_party/funque/arm64/integer_funque_strred_neon.c b/libvmaf/src/feature/third_party/funque/arm64/integer_funque_strred_neon.c index 52d4ad74f..0e69229d4 100644 --- a/libvmaf/src/feature/third_party/funque/arm64/integer_funque_strred_neon.c +++ b/libvmaf/src/feature/third_party/funque/arm64/integer_funque_strred_neon.c @@ -215,29 +215,21 @@ float integer_rred_entropies_and_scales_neon(const dwt2_dtype *x_t, const dwt2_d return agg_abs_accum; } -int integer_compute_strred_funque_neon( - const struct i_dwt2buffers *ref, const struct i_dwt2buffers *dist, - struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, size_t width, size_t height, - struct strred_results *strred_scores, int block_size, int level, - uint32_t *log_lut, int32_t shift_val_arg, double sigma_nsq_t, uint8_t check_enable_spatial_csf) +int integer_compute_srred_funque_neon(const struct i_dwt2buffers *ref, + const struct i_dwt2buffers *dist, size_t width, size_t height, + float **spat_scales_ref, float **spat_scales_dist, + struct strred_results *strred_scores, int block_size, int level, + uint32_t *log_lut, int32_t shift_val_arg, + double sigma_nsq_t, uint8_t check_enable_spatial_csf) { int ret; UNUSED(block_size); size_t total_subbands = DEFAULT_STRRED_SUBBANDS; size_t subband; - float spat_values[DEFAULT_STRRED_SUBBANDS], temp_values[DEFAULT_STRRED_SUBBANDS]; - float fspat_val[DEFAULT_STRRED_SUBBANDS], ftemp_val[DEFAULT_STRRED_SUBBANDS]; + float spat_values[DEFAULT_STRRED_SUBBANDS], fspat_val[DEFAULT_STRRED_SUBBANDS]; uint8_t enable_temp = 0; int32_t shift_val; - /* amount of reflecting */ - int x_reflect = (int) ((STRRED_WINDOW_SIZE - 1) / 2); - size_t r_width = width + (2 * x_reflect); - size_t r_height = height + (2 * x_reflect); - - float *scales_spat_x = (float *) calloc(r_width * r_height, sizeof(float)); - float *scales_spat_y = (float *) calloc(r_width * r_height, sizeof(float)); - for(subband = 1; subband < total_subbands; subband++) { enable_temp = 0; spat_values[subband] = 0; @@ -249,8 +241,48 @@ int integer_compute_strred_funque_neon( } spat_values[subband] = integer_rred_entropies_and_scales_neon( ref->bands[subband], dist->bands[subband], width, height, log_lut, sigma_nsq_t, - shift_val, enable_temp, scales_spat_x, scales_spat_y, check_enable_spatial_csf); + shift_val, enable_temp, spat_scales_ref[subband], spat_scales_dist[subband], + check_enable_spatial_csf); fspat_val[subband] = spat_values[subband] / (width * height); + } + + strred_scores->spat_vals[level] = (fspat_val[1] + fspat_val[2] + fspat_val[3]) / 3; + + // Add equations to compute S-RRED using norm factors + int norm_factor = 1, num_level; + for(num_level = 0; num_level <= level; num_level++) norm_factor = num_level + 1; + + strred_scores->spat_vals_cumsum += strred_scores->spat_vals[level]; + + strred_scores->srred_vals[level] = strred_scores->spat_vals_cumsum / norm_factor; + + ret = 0; + return ret; +} + +int integer_compute_strred_funque_neon(const struct i_dwt2buffers *ref, + const struct i_dwt2buffers *dist, + struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, + size_t width, size_t height, float **spat_scales_ref, + float **spat_scales_dist, struct strred_results *strred_scores, + int block_size, int level, uint32_t *log_lut, + int32_t shift_val_arg, double sigma_nsq_t, + uint8_t check_enable_spatial_csf) +{ + int ret; + UNUSED(block_size); + size_t total_subbands = DEFAULT_STRRED_SUBBANDS; + size_t subband; + float temp_values[DEFAULT_STRRED_SUBBANDS], ftemp_val[DEFAULT_STRRED_SUBBANDS]; + uint8_t enable_temp = 0; + int32_t shift_val; + + for(subband = 1; subband < total_subbands; subband++) { + if(check_enable_spatial_csf == 1) + shift_val = 2 * shift_val_arg; + else { + shift_val = 2 * i_nadenau_pending_div_factors[level][subband]; + } if(prev_ref != NULL && prev_dist != NULL) { enable_temp = 1; @@ -258,19 +290,19 @@ int integer_compute_strred_funque_neon( dwt2_dtype *dist_temporal = (dwt2_dtype *) calloc(width * height, sizeof(dwt2_dtype)); temp_values[subband] = 0; - integer_subract_subbands_neon(ref->bands[subband], prev_ref->bands[subband], - ref_temporal, dist->bands[subband], - prev_dist->bands[subband], dist_temporal, width, height); + integer_subract_subbands_neon(ref->bands[subband], prev_ref->bands[subband], ref_temporal, + dist->bands[subband], prev_dist->bands[subband], + dist_temporal, width, height); temp_values[subband] = integer_rred_entropies_and_scales_neon( ref_temporal, dist_temporal, width, height, log_lut, sigma_nsq_t, shift_val, - enable_temp, scales_spat_x, scales_spat_y, check_enable_spatial_csf); + enable_temp, spat_scales_ref[subband], spat_scales_dist[subband], + check_enable_spatial_csf); ftemp_val[subband] = temp_values[subband] / (width * height); free(ref_temporal); free(dist_temporal); } } - strred_scores->spat_vals[level] = (fspat_val[1] + fspat_val[2] + fspat_val[3]) / 3; strred_scores->temp_vals[level] = (ftemp_val[1] + ftemp_val[2] + ftemp_val[3]) / 3; strred_scores->spat_temp_vals[level] = strred_scores->spat_vals[level] * strred_scores->temp_vals[level]; @@ -279,17 +311,12 @@ int integer_compute_strred_funque_neon( int norm_factor = 1, num_level; for(num_level = 0; num_level <= level; num_level++) norm_factor = num_level + 1; - strred_scores->spat_vals_cumsum += strred_scores->spat_vals[level]; strred_scores->temp_vals_cumsum += strred_scores->temp_vals[level]; strred_scores->spat_temp_vals_cumsum += strred_scores->spat_temp_vals[level]; - strred_scores->srred_vals[level] = strred_scores->spat_vals_cumsum / norm_factor; strred_scores->trred_vals[level] = strred_scores->temp_vals_cumsum / norm_factor; strred_scores->strred_vals[level] = strred_scores->spat_temp_vals_cumsum / norm_factor; - free(scales_spat_x); - free(scales_spat_y); - ret = 0; return ret; } \ No newline at end of file diff --git a/libvmaf/src/feature/third_party/funque/arm64/integer_funque_strred_neon.h b/libvmaf/src/feature/third_party/funque/arm64/integer_funque_strred_neon.h index 57f227293..af71c6752 100644 --- a/libvmaf/src/feature/third_party/funque/arm64/integer_funque_strred_neon.h +++ b/libvmaf/src/feature/third_party/funque/arm64/integer_funque_strred_neon.h @@ -21,11 +21,21 @@ #include "../common/macros.h" #include "../funque_global_options.h" -int integer_compute_strred_funque_neon( - const struct i_dwt2buffers *ref, const struct i_dwt2buffers *dist, - struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, size_t width, size_t height, - struct strred_results *strred_scores, int block_size, int level, - uint32_t *log_lut, int32_t shift_val, double sigma_nsq_t, uint8_t enable_spatial_csf); +int integer_compute_srred_funque_neon(const struct i_dwt2buffers *ref, + const struct i_dwt2buffers *dist, size_t width, size_t height, + float **spat_scales_ref, float **spat_scales_dist, + struct strred_results *strred_scores, int block_size, int level, + uint32_t *log_lut, int32_t shift_val_arg, + double sigma_nsq_t, uint8_t check_enable_spatial_csf); + +int integer_compute_strred_funque_neon(const struct i_dwt2buffers *ref, + const struct i_dwt2buffers *dist, + struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, + size_t width, size_t height, float **spat_scales_ref, + float **spat_scales_dist, struct strred_results *strred_scores, + int block_size, int level, uint32_t *log_lut, + int32_t shift_val_arg, double sigma_nsq_t, + uint8_t check_enable_spatial_csf); void integer_subract_subbands_neon(const dwt2_dtype *ref_src, const dwt2_dtype *ref_prev_src, dwt2_dtype *ref_dst, const dwt2_dtype *dist_src, diff --git a/libvmaf/src/feature/third_party/funque/float_funque.c b/libvmaf/src/feature/third_party/funque/float_funque.c index 7f3c85546..b516a6a7d 100644 --- a/libvmaf/src/feature/third_party/funque/float_funque.c +++ b/libvmaf/src/feature/third_party/funque/float_funque.c @@ -22,6 +22,7 @@ // #include "config.h" #include "dict.h" +#include "framesync.h" #include "feature_collector.h" #include "feature_extractor.h" #include "feature_name.h" @@ -34,7 +35,7 @@ #include "funque_adm.h" #include "funque_adm_options.h" #include "funque_ssim_options.h" -//#include "funque_motion.h" +#include "funque_motion.h" #include "funque_picture_copy.h" #include "funque_ssim.h" #include "resizer.h" @@ -65,6 +66,9 @@ typedef struct FunqueState { dwt2buffers dist_dwt2out[4]; strredbuffers prev_ref[4]; strredbuffers prev_dist[4]; + dwt2buffers shared_ref[4]; + dwt2buffers shared_dist[4]; + strred_results strred_scores; // funque configurable parameters @@ -81,6 +85,8 @@ typedef struct FunqueState { double norm_view_dist; int ref_display_height; int strred_levels; + int motion_levels; + int mad_levels; int process_ref_width; int process_ref_height; int process_dist_width; @@ -97,6 +103,7 @@ typedef struct FunqueState { VmafDictionary *feature_name_dict; ResizerState resize_module; MsSsimScore *score; + FrameBufLen frame_buf_len; } FunqueState; @@ -252,7 +259,26 @@ static const VmafOption options[] = { .min = MIN_LEVELS, .max = MAX_LEVELS, }, - + { + .name = "motion_levels", + .alias = "motion", + .help = "Number of levels in MOTION", + .offset = offsetof(FunqueState, motion_levels), + .type = VMAF_OPT_TYPE_INT, + .default_val.i = DEFAULT_MOTION_LEVELS, + .min = MIN_LEVELS, + .max = MAX_LEVELS, + }, + { + .name = "mad_levels", + .alias = "mad", + .help = "Number of levels in Mean absolute difference", + .offset = offsetof(FunqueState, mad_levels), + .type = VMAF_OPT_TYPE_INT, + .default_val.i = DEFAULT_MAD_LEVELS, + .min = MIN_LEVELS, + .max = MAX_LEVELS, + }, { 0 } }; @@ -324,6 +350,8 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt, (void)bpc; FunqueState *s = fex->priv; + s->frame_buf_len.total_buf_size = 0; + s->feature_name_dict = vmaf_feature_name_dict_from_provided_features(fex->provided_features, fex->options, s); @@ -335,8 +363,8 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt, h = (h+1)>>1; } - s->needed_dwt_levels = MAX5(s->vif_levels, s->adm_levels, s->ssim_levels, s->ms_ssim_levels, s->strred_levels); - s->needed_full_dwt_levels = MAX4(s->adm_levels, s->ssim_levels, s->ms_ssim_levels, s->strred_levels); + s->needed_dwt_levels = MAX7(s->vif_levels, s->adm_levels, s->ssim_levels, s->ms_ssim_levels, s->strred_levels, s->motion_levels, s->mad_levels); + s->needed_full_dwt_levels = MAX6(s->adm_levels, s->ssim_levels, s->ms_ssim_levels, s->strred_levels, s->motion_levels, s->mad_levels); int ref_process_width, ref_process_height, dist_process_width, dist_process_height, process_wh_div_factor; @@ -475,16 +503,25 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt, tdist_width = (dist_process_width + (process_wh_div_factor * 3/4)) / process_wh_div_factor; tdist_height = (dist_process_height + (process_wh_div_factor * 3/4)) / process_wh_div_factor; + for(int subband = 0; subband < DEFAULT_BANDS; subband++) { + s->frame_buf_len.buf_size[level][subband] = tref_width * tref_height; + s->frame_buf_len.total_buf_size += s->frame_buf_len.buf_size[level][subband]; + } + err |= alloc_dwt2buffers(&s->ref_dwt2out[level], tref_width, tref_height); err |= alloc_dwt2buffers(&s->dist_dwt2out[level], tdist_width, tdist_height); - s->prev_ref[level].bands[0] = NULL; - s->prev_dist[level].bands[0] = NULL; - - for(int subband = 1; subband < 4; subband++) { - s->prev_ref[level].bands[subband] = (float*) calloc(tref_width * tref_height, sizeof(float)); - s->prev_dist[level].bands[subband] = (float*) calloc(tref_width * tref_height, sizeof(float)); + for(int subband = 0; subband < DEFAULT_BANDS; subband++) { + s->prev_ref[level].bands[subband] = NULL; + s->prev_dist[level].bands[subband] = NULL; } + s->prev_ref[level].width = s->ref_dwt2out[level].width; + s->prev_ref[level].height = s->ref_dwt2out[level].height; + s->prev_ref[level].stride = s->ref_dwt2out[level].stride; + + s->prev_dist[level].width = s->dist_dwt2out[level].width; + s->prev_dist[level].height = s->dist_dwt2out[level].height; + s->prev_dist[level].stride = s->dist_dwt2out[level].stride; last_w = (int) (last_w + 1) / 2; last_h = (int) (last_h + 1) / 2; @@ -518,6 +555,7 @@ static int extract(VmafFeatureExtractor *fex, { FunqueState *s = fex->priv; int err = 0; + int mt_err = 0; (void) ref_pic_90; (void) dist_pic_90; @@ -525,6 +563,8 @@ static int extract(VmafFeatureExtractor *fex, VmafPicture *res_ref_pic = &s->res_ref_pic; VmafPicture *res_dist_pic = &s->res_dist_pic; + VmafFrameSyncContext *framesync = fex->framesync; + if(s->enable_resize) { res_ref_pic->bpc = ref_pic->bpc; @@ -635,6 +675,9 @@ static int extract(VmafFeatureExtractor *fex, } #endif SsimScore ssim_score[MAX_LEVELS]; + double motion_score[MAX_LEVELS]; + double mad_score[MAX_LEVELS]; + MsSsimScore ms_ssim_score[MAX_LEVELS]; s->score = ms_ssim_score; double adm_score[MAX_LEVELS], adm_score_num[MAX_LEVELS], adm_score_den[MAX_LEVELS]; @@ -658,6 +701,44 @@ static int extract(VmafFeatureExtractor *fex, s->strred_scores.temp_vals_cumsum = 0; s->strred_scores.spat_temp_vals_cumsum = 0; + float *spat_scales_ref[DEFAULT_STRRED_LEVELS][DEFAULT_STRRED_SUBBANDS]; + float *spat_scales_dist[DEFAULT_STRRED_LEVELS][DEFAULT_STRRED_SUBBANDS]; + size_t total_subbands = DEFAULT_STRRED_SUBBANDS; + + if((s->strred_levels != 0) && (index != 0)) { + for(int level = 0; level <= s->strred_levels - 1; level++) { + for(size_t subband = 1; subband < total_subbands; subband++) { + size_t x_reflect = (size_t) ((STRRED_WINDOW_SIZE - 1) / 2); + size_t r_width = s->ref_dwt2out[level].width + (2 * x_reflect); + size_t r_height = s->ref_dwt2out[level].height + (2 * x_reflect); + + spat_scales_ref[level][subband] = + (float *) calloc((r_width + 1) * (r_height + 1), sizeof(float)); + spat_scales_dist[level][subband] = + (float *) calloc((r_width + 1) * (r_height + 1), sizeof(float)); + } + } + } + + float *shared_buf, *shared_buf_temp; + // Total_buf_size is multiplied by 2 for ref and dist + mt_err = + vmaf_framesync_acquire_new_buf(framesync, (void **) &shared_buf, + s->frame_buf_len.total_buf_size * 2 * sizeof(float), index); + if(mt_err) + return mt_err; + + shared_buf_temp = shared_buf; + // Distibute the big buffer to smaller ones for each levels and bands + for(int level = 0; level < s->needed_dwt_levels; level++) { + for(int subband = 0; subband < DEFAULT_BANDS; subband++) { + s->shared_ref[level].bands[subband] = shared_buf; + s->shared_dist[level].bands[subband] = shared_buf + s->frame_buf_len.total_buf_size; + + shared_buf += s->frame_buf_len.buf_size[level][subband]; + } + } + for (int level = 0; level < s->needed_dwt_levels; level++) { // pre-compute the next level of DWT if (level+1 < s->needed_dwt_levels) { @@ -674,7 +755,7 @@ static int extract(VmafFeatureExtractor *fex, } if (!s->enable_spatial_csf) { - if (level < s->adm_levels || level < s->ssim_levels || level < s->ms_ssim_levels || level < s->strred_levels) { + if (level < s->adm_levels || level < s->ssim_levels || level < s->ms_ssim_levels || level < s->strred_levels || level < s->motion_levels || level < s->mad_levels) { // we need full CSF on all bands funque_dwt2_inplace_csf(&s->ref_dwt2out[level], s->csf_factors[level], 0, 3); funque_dwt2_inplace_csf(&s->dist_dwt2out[level], s->csf_factors[level], 0, 3); @@ -685,6 +766,17 @@ static int extract(VmafFeatureExtractor *fex, } } + // Function to copy all bands from ref_dwt2out, dist_dwt2out (2 copies) + err |= copy_frame_funque(&s->ref_dwt2out[level], &s->dist_dwt2out[level], + &s->shared_ref[level], &s->shared_dist[level], + s->ref_dwt2out[level].width, s->ref_dwt2out[level].height); + } + + mt_err = vmaf_framesync_submit_filled_data(framesync, shared_buf_temp, index); + if(mt_err) + return mt_err; + + for(int level = 0; level < s->needed_dwt_levels; level++) { if ((s->adm_levels != 0) && (level <= s->adm_levels - 1)) { err |= compute_adm_funque(s->ref_dwt2out[level], s->dist_dwt2out[level], &adm_score[level], &adm_score_num[level], &adm_score_den[level], ADM_BORDER_FACTOR); adm_num += adm_score_num[level]; @@ -721,29 +813,72 @@ static int extract(VmafFeatureExtractor *fex, vif_den += vif_score_den[level]; } - if((s->strred_levels != 0) && (level <= s->strred_levels - 1)) { - if(index == 0) { - err |= copy_prev_frame_strred_funque( - &s->ref_dwt2out[level], &s->dist_dwt2out[level], &s->prev_ref[level], - &s->prev_dist[level], s->ref_dwt2out[level].width, - s->ref_dwt2out[level].height); + if((s->strred_levels != 0) && (level <= s->strred_levels - 1) && (index != 0)) { + err |= compute_srred_funque(&s->ref_dwt2out[level], &s->dist_dwt2out[level], + s->ref_dwt2out[level].width, s->ref_dwt2out[level].height, + spat_scales_ref[level], spat_scales_dist[level], + &s->strred_scores, BLOCK_SIZE, level); + } + + if((s->mad_levels != 0) && (level <= s->mad_levels - 1)) { + err |= compute_mad_funque(s->ref_dwt2out[level].bands[0], s->dist_dwt2out[level].bands[0], + s->ref_dwt2out[level].width, s->ref_dwt2out[level].height, + s->prev_ref[level].stride, s->ref_dwt2out[level].stride, &mad_score[level]); + } + + if(err) + return err; + } + + float *dependent_buf, *dependent_buf_temp; + if(index != 0) { + mt_err = + vmaf_framesync_retrieve_filled_data(framesync, (void **) &dependent_buf, (index - 1)); + if(mt_err) + return mt_err; + + dependent_buf_temp = dependent_buf; + + // Distribute buffers + for(int level = 0; level < s->needed_dwt_levels; level++) { + for(int subband = 0; subband < DEFAULT_BANDS; subband++) { + s->prev_ref[level].bands[subband] = dependent_buf; + s->prev_dist[level].bands[subband] = + dependent_buf + s->frame_buf_len.total_buf_size; + + dependent_buf += s->frame_buf_len.buf_size[level][subband]; } - else { + } + } + + for(int level = 0; level < s->needed_dwt_levels; level++) { + if((s->strred_levels != 0) && (level <= s->strred_levels - 1)) { + if(index != 0) { err |= compute_strred_funque( &s->ref_dwt2out[level], &s->dist_dwt2out[level], &s->prev_ref[level], &s->prev_dist[level], s->ref_dwt2out[level].width, s->ref_dwt2out[level].height, - &s->strred_scores, BLOCK_SIZE, level); + spat_scales_ref[level], spat_scales_dist[level], &s->strred_scores, BLOCK_SIZE, + level); + } + } - err |= copy_prev_frame_strred_funque( - &s->ref_dwt2out[level], &s->dist_dwt2out[level], &s->prev_ref[level], - &s->prev_dist[level], s->ref_dwt2out[level].width, - s->ref_dwt2out[level].height); + if((s->motion_levels != 0) && (level <= s->motion_levels - 1)) { + if(index != 0) { + err |= compute_motion_funque(s->prev_ref[level].bands[0], s->ref_dwt2out[level].bands[0], + s->ref_dwt2out[level].width, s->ref_dwt2out[level].height, + s->prev_ref[level].stride, s->ref_dwt2out[level].stride, &motion_score[level]); } } if (err) return err; } + if(index != 0) { + mt_err = vmaf_framesync_release_buf(framesync, dependent_buf_temp, (index - 1)); + if(mt_err) + return mt_err; + } + if(s->ms_ssim_levels != 0) { err |= compute_ms_ssim_mean_scales(ms_ssim_score, s->ms_ssim_levels); } @@ -835,13 +970,87 @@ if (s->ssim_levels > 0) { ssim_score[2].mink3, index); if (s->ssim_levels > 3) { + err |= vmaf_feature_collector_append_with_dict(feature_collector, + s->feature_name_dict, "FUNQUE_feature_ssim_mean_scale3_score", + ssim_score[3].mean, index); + + err |= vmaf_feature_collector_append_with_dict(feature_collector, + s->feature_name_dict, "FUNQUE_feature_ssim_mink3_scale3_score", + ssim_score[3].mink3, index); + } + } + } +} + +if(s->motion_levels > 0) { + if(index == 0) { + err |= + vmaf_feature_collector_append_with_dict(feature_collector, s->feature_name_dict, + "FUNQUE_feature_motion_scale0_score", 0, index); + + if(s->motion_levels > 1) { + err |= vmaf_feature_collector_append_with_dict(feature_collector, s->feature_name_dict, + "FUNQUE_feature_motion_scale1_score", 0, + index); + + if(s->motion_levels > 2) { err |= vmaf_feature_collector_append_with_dict( - feature_collector, s->feature_name_dict, - "FUNQUE_feature_ssim_mean_scale3_score", ssim_score[3].mean, index); + feature_collector, s->feature_name_dict, "FUNQUE_feature_motion_scale2_score", + 0, index); + if(s->motion_levels > 3) { + err |= vmaf_feature_collector_append_with_dict( + feature_collector, s->feature_name_dict, + "FUNQUE_feature_motion_scale3_score", 0, index); + } + } + } + } else { + err |= vmaf_feature_collector_append_with_dict(feature_collector, s->feature_name_dict, + "FUNQUE_feature_motion_scale0_score", + motion_score[0], index); + + if(s->motion_levels > 1) { + err |= vmaf_feature_collector_append_with_dict(feature_collector, s->feature_name_dict, + "FUNQUE_feature_motion_scale1_score", + motion_score[1], index); + + if(s->motion_levels > 2) { err |= vmaf_feature_collector_append_with_dict( - feature_collector, s->feature_name_dict, - "FUNQUE_feature_ssim_mink3_scale3_score", ssim_score[3].mink3, index); + feature_collector, s->feature_name_dict, "FUNQUE_feature_motion_scale2_score", + motion_score[2], index); + + if(s->motion_levels > 3) { + err |= vmaf_feature_collector_append_with_dict( + feature_collector, s->feature_name_dict, + "FUNQUE_feature_motion_scale3_score", motion_score[3], index); + } + } + } + } +} + +if(s->mad_levels > 0){ + { + err |= vmaf_feature_collector_append_with_dict(feature_collector, s->feature_name_dict, + "FUNQUE_feature_mad_scale0_score", + mad_score[0], index); + + if(s->mad_levels > 1) { + err |= vmaf_feature_collector_append_with_dict(feature_collector, s->feature_name_dict, + "FUNQUE_feature_mad_scale1_score", + mad_score[1], index); + + if(s->mad_levels > 2) { + err |= vmaf_feature_collector_append_with_dict( + feature_collector, s->feature_name_dict, "FUNQUE_feature_mad_scale2_score", + mad_score[2], index); + + if(s->mad_levels > 3) { + err |= vmaf_feature_collector_append_with_dict( + feature_collector, s->feature_name_dict, + "FUNQUE_feature_mad_scale3_score", mad_score[3], index); + } } } } @@ -940,6 +1149,15 @@ if(s->ms_ssim_levels > 0) { free(var_y_cum); free(cov_xy_cum); + if((s->strred_levels != 0) && (index != 0)) { + for(int level = 0; level <= s->strred_levels - 1; level++) { + for(size_t subband = 1; subband < total_subbands; subband++) { + free(spat_scales_ref[level][subband]); + free(spat_scales_dist[level][subband]); + } + } + } + return err; } @@ -964,11 +1182,6 @@ static int close(VmafFeatureExtractor *fex) if (s->ref_dwt2out[level].bands[i]) aligned_free(s->ref_dwt2out[level].bands[i]); if (s->dist_dwt2out[level].bands[i]) aligned_free(s->dist_dwt2out[level].bands[i]); } - for(unsigned i=1; i<4; i++) - { - if (s->prev_ref[level].bands[i]) free(s->prev_ref[level].bands[i]); - if (s->prev_dist[level].bands[i]) free(s->prev_dist[level].bands[i]); - } } vmaf_dictionary_free(&s->feature_name_dict); @@ -988,6 +1201,13 @@ static const char *provided_features[] = { "FUNQUE_feature_ssim_mean_scale2_score", "FUNQUE_feature_ssim_mean_scale3_score", "FUNQUE_feature_ssim_mink3_scale0_score", "FUNQUE_feature_ssim_mink3_scale1_score", "FUNQUE_feature_ssim_mink3_scale2_score", "FUNQUE_feature_ssim_mink3_scale3_score", + + "FUNQUE_feature_motion_scale0_score", "FUNQUE_feature_motion_scale1_score", + "FUNQUE_feature_motion_scale2_score", "FUNQUE_feature_motion_scale3_score", + + "FUNQUE_feature_mad_scale0_score", "FUNQUE_feature_mad_scale1_score", + "FUNQUE_feature_mad_scale2_score", "FUNQUE_feature_mad_scale3_score", + "FUNQUE_feature_strred_scale0_score", "FUNQUE_feature_strred_scale1_score", "FUNQUE_feature_strred_scale2_score", "FUNQUE_feature_strred_scale3_score", @@ -1007,4 +1227,5 @@ VmafFeatureExtractor vmaf_fex_float_funque = { .close = close, .priv_size = sizeof(FunqueState), .provided_features = provided_features, + .flags = VMAF_FEATURE_FRAME_SYNC, }; \ No newline at end of file diff --git a/libvmaf/src/feature/third_party/funque/funque_global_options.h b/libvmaf/src/feature/third_party/funque/funque_global_options.h index bd94d8349..be79a03ca 100644 --- a/libvmaf/src/feature/third_party/funque/funque_global_options.h +++ b/libvmaf/src/feature/third_party/funque/funque_global_options.h @@ -13,6 +13,11 @@ #define MAX_LEVELS 4 #define MIN_LEVELS 0 +#define DEFAULT_BANDS 4 +typedef struct FrameBufLen { + int buf_size[4][4]; + int total_buf_size; +} FrameBufLen; #endif //VMAF_FUNQUE_GLOBAL_OPTIONS_H diff --git a/libvmaf/src/feature/third_party/funque/funque_motion.c b/libvmaf/src/feature/third_party/funque/funque_motion.c index dd9582811..8b489ab3e 100644 --- a/libvmaf/src/feature/third_party/funque/funque_motion.c +++ b/libvmaf/src/feature/third_party/funque/funque_motion.c @@ -33,14 +33,14 @@ float funque_image_mad_c(const float *img1, const float *img2, int width, int he float accum = (float)0.0; for (int i = 0; i < height; ++i) { - float accum_line = (float)0.0; + float accum_line = (float)0.0; for (int j = 0; j < width; ++j) { float img1px = img1[i * img1_stride + j]; float img2px = img2[i * img2_stride + j]; accum_line += fabs(img1px - img2px); } - accum += accum_line; + accum += accum_line; } return (float) (accum / (width * height)); @@ -49,7 +49,31 @@ float funque_image_mad_c(const float *img1, const float *img2, int width, int he /** * Note: ref_stride and dis_stride are in terms of bytes */ -int compute_motion_funque(const float *ref, const float *dis, int w, int h, int ref_stride, int dis_stride, double *score) +int compute_motion_funque(const float *prev, const float *curr, int w, int h, int prev_stride, int curr_stride, double *score) +{ + + if (prev_stride % sizeof(float) != 0) + { + printf("error: prev_stride %% sizeof(float) != 0, prev_stride = %d, sizeof(float) = %zu.\n", prev_stride, sizeof(float)); + fflush(stdout); + goto fail; + } + if (curr_stride % sizeof(float) != 0) + { + printf("error: curr_stride %% sizeof(float) != 0, curr_stride = %d, sizeof(float) = %zu.\n", curr_stride, sizeof(float)); + fflush(stdout); + goto fail; + } + // stride for funque_image_mad_c is in terms of (sizeof(float) bytes) + *score = funque_image_mad_c(prev, curr, w, h, prev_stride / sizeof(float), curr_stride / sizeof(float)); + + return 0; + +fail: + return 1; +} + +int compute_mad_funque(const float *ref, const float *dis, int w, int h, int ref_stride, int dis_stride, double *score) { if (ref_stride % sizeof(float) != 0) diff --git a/libvmaf/src/feature/third_party/funque/funque_motion.h b/libvmaf/src/feature/third_party/funque/funque_motion.h index 9c3676e40..0a9216455 100644 --- a/libvmaf/src/feature/third_party/funque/funque_motion.h +++ b/libvmaf/src/feature/third_party/funque/funque_motion.h @@ -15,5 +15,9 @@ * limitations under the License. * */ +#define DEFAULT_MOTION_LEVELS 4 +#define DEFAULT_MAD_LEVELS 4 -int compute_motion_funque(const float *ref, const float *dis, int w, int h, int ref_stride, int dis_stride, double *score); +int compute_motion_funque(const float *prev, const float *curr, int w, int h, int ref_stride, int dis_stride, double *score); + +int compute_mad_funque(const float *ref, const float *dis, int w, int h, int ref_stride, int dis_stride, double *score); diff --git a/libvmaf/src/feature/third_party/funque/funque_picture_copy.c b/libvmaf/src/feature/third_party/funque/funque_picture_copy.c index 3e44ba4fb..8c674ce0f 100644 --- a/libvmaf/src/feature/third_party/funque/funque_picture_copy.c +++ b/libvmaf/src/feature/third_party/funque/funque_picture_copy.c @@ -17,10 +17,11 @@ */ #include - +#include #include #include "funque_filters.h" +#include "funque_global_options.h" void funque_picture_copy_hbd(float *dst, ptrdiff_t dst_stride, VmafPicture *src, int offset, int width, int height) @@ -57,3 +58,25 @@ void funque_picture_copy(float *dst, ptrdiff_t dst_stride, return; } + +int copy_frame_funque(const struct dwt2buffers *ref, const struct dwt2buffers *dist, + struct dwt2buffers *shared_ref, struct dwt2buffers *shared_dist, size_t width, + size_t height) +{ + int subband; + int total_subbands = DEFAULT_BANDS; + + for(subband = 0; subband < total_subbands; subband++) { + memcpy(shared_ref->bands[subband], ref->bands[subband], width * height * sizeof(float)); + memcpy(shared_dist->bands[subband], dist->bands[subband], width * height * sizeof(float)); + } + shared_ref->width = ref->width; + shared_ref->height = ref->height; + shared_ref->stride = ref->stride; + + shared_dist->width = dist->width; + shared_dist->height = dist->height; + shared_dist->stride = dist->stride; + + return 0; +} diff --git a/libvmaf/src/feature/third_party/funque/funque_picture_copy.h b/libvmaf/src/feature/third_party/funque/funque_picture_copy.h index 4eb0d54dd..31425c28b 100644 --- a/libvmaf/src/feature/third_party/funque/funque_picture_copy.h +++ b/libvmaf/src/feature/third_party/funque/funque_picture_copy.h @@ -19,3 +19,7 @@ void funque_picture_copy(float *dst, ptrdiff_t dst_stride, VmafPicture *src, int offset, unsigned bpc, int width, int height); + +int copy_frame_funque(const struct dwt2buffers* ref, const struct dwt2buffers* dist, + struct dwt2buffers* shared_ref, struct dwt2buffers* shared_dist, size_t width, + size_t height); diff --git a/libvmaf/src/feature/third_party/funque/funque_ssim.h b/libvmaf/src/feature/third_party/funque/funque_ssim.h index 3be1e618b..5eae2e748 100644 --- a/libvmaf/src/feature/third_party/funque/funque_ssim.h +++ b/libvmaf/src/feature/third_party/funque/funque_ssim.h @@ -20,4 +20,4 @@ int compute_ssim_funque(dwt2buffers *ref, dwt2buffers *dist, SsimScore *score, i int compute_ms_ssim_funque(dwt2buffers *ref, dwt2buffers *dist, MsSsimScore *score, int max_val, float K1, float K2, int n_levels); int mean_2x2_ms_ssim_funque(float *var_x_cum, float *var_y_cum, float *cov_xy_cum, int width, int height, int level); -int compute_ms_ssim_mean_scales(MsSsimScore *score, int n_levels); \ No newline at end of file +int compute_ms_ssim_mean_scales(MsSsimScore *score, int n_levels); diff --git a/libvmaf/src/feature/third_party/funque/funque_strred.c b/libvmaf/src/feature/third_party/funque/funque_strred.c index e63b503b5..bc8002ba4 100644 --- a/libvmaf/src/feature/third_party/funque/funque_strred.c +++ b/libvmaf/src/feature/third_party/funque/funque_strred.c @@ -171,36 +171,13 @@ void subract_subbands(const float* ref_src, const float* ref_prev_src, float* re } } -int copy_prev_frame_strred_funque(const struct dwt2buffers* ref, const struct dwt2buffers* dist, - struct strredbuffers* prev_ref, struct strredbuffers* prev_dist, - size_t width, size_t height) -{ - int subband; - int total_subbands = DEFAULT_STRRED_SUBBANDS; - - for(subband = 1; subband < total_subbands; subband++) { - memcpy(prev_ref->bands[subband], ref->bands[subband], width * height * sizeof(float)); - memcpy(prev_dist->bands[subband], dist->bands[subband], width * height * sizeof(float)); - } - prev_ref->width = ref->width; - prev_ref->height = ref->height; - prev_ref->stride = ref->stride; - - prev_dist->width = dist->width; - prev_dist->height = dist->height; - prev_dist->stride = dist->stride; - - return 0; -} - -int compute_strred_funque(const struct dwt2buffers* ref, const struct dwt2buffers* dist, - struct strredbuffers* prev_ref, struct strredbuffers* prev_dist, - size_t width, size_t height, struct strred_results* strred_scores, - int block_size, int level) +int compute_srred_funque(const struct dwt2buffers* ref, const struct dwt2buffers* dist, + size_t width, size_t height, float** spat_scales_ref, + float** spat_scales_dist, struct strred_results* strred_scores, + int block_size, int level) { size_t subband; float spat_abs, spat_values[DEFAULT_STRRED_SUBBANDS]; - float temp_abs, temp_values[DEFAULT_STRRED_SUBBANDS]; size_t total_subbands = DEFAULT_STRRED_SUBBANDS; size_t x_reflect = (size_t) ((STRRED_WINDOW_SIZE - 1) / 2); @@ -209,10 +186,6 @@ int compute_strred_funque(const struct dwt2buffers* ref, const struct dwt2buffer float* entropies_ref = (float*) calloc((r_width + 1) * (r_height + 1), sizeof(float)); float* entropies_dist = (float*) calloc((r_width + 1) * (r_height + 1), sizeof(float)); - float* spat_scales_ref = (float*) calloc((r_width + 1) * (r_height + 1), sizeof(float)); - float* spat_scales_dist = (float*) calloc((r_width + 1) * (r_height + 1), sizeof(float)); - float* temp_scales_ref = (float*) calloc((r_width + 1) * (r_height + 1), sizeof(float)); - float* temp_scales_dist = (float*) calloc((r_width + 1) * (r_height + 1), sizeof(float)); float* spat_aggregate = (float*) calloc((r_width + 1) * (r_height + 1), sizeof(float)); for(subband = 1; subband < total_subbands; subband++) { @@ -220,15 +193,15 @@ int compute_strred_funque(const struct dwt2buffers* ref, const struct dwt2buffer spat_abs = 0; rred_entropies_and_scales(ref->bands[subband], block_size, width, height, entropies_ref, - spat_scales_ref); + spat_scales_ref[subband]); rred_entropies_and_scales(dist->bands[subband], block_size, width, height, entropies_dist, - spat_scales_dist); + spat_scales_dist[subband]); for(i = 0; i < r_height; i++) { for(j = 0; j < r_width; j++) { spat_aggregate[i * r_width + j] = - entropies_ref[i * r_width + j] * spat_scales_ref[i * r_width + j] - - entropies_dist[i * r_width + j] * spat_scales_dist[i * r_width + j]; + entropies_ref[i * r_width + j] * spat_scales_ref[subband][i * r_width + j] - + entropies_dist[i * r_width + j] * spat_scales_dist[subband][i * r_width + j]; } } @@ -238,6 +211,47 @@ int compute_strred_funque(const struct dwt2buffers* ref, const struct dwt2buffer } } spat_values[subband] = spat_abs / (height * width); + } + + strred_scores->spat_vals[level] = (spat_values[1] + spat_values[2] + spat_values[3]) / 3; + + // Add equations to compute S-RRED using norm factors + int norm_factor = 1, num_level; + for(num_level = 0; num_level <= level; num_level++) + norm_factor = num_level + 1; + + strred_scores->spat_vals_cumsum += strred_scores->spat_vals[level]; + + strred_scores->srred_vals[level] = strred_scores->spat_vals_cumsum / norm_factor; + + free(entropies_ref); + free(entropies_dist); + free(spat_aggregate); + + return 0; +} + +int compute_strred_funque(const struct dwt2buffers* ref, const struct dwt2buffers* dist, + struct strredbuffers* prev_ref, struct strredbuffers* prev_dist, + size_t width, size_t height, float** spat_scales_ref, + float** spat_scales_dist, struct strred_results* strred_scores, + int block_size, int level) +{ + size_t subband; + float temp_abs, temp_values[DEFAULT_STRRED_SUBBANDS]; + + size_t total_subbands = DEFAULT_STRRED_SUBBANDS; + size_t x_reflect = (size_t) ((STRRED_WINDOW_SIZE - 1) / 2); + size_t r_width = width + (2 * x_reflect); + size_t r_height = height + (2 * x_reflect); + + float* entropies_ref = (float*) calloc((r_width + 1) * (r_height + 1), sizeof(float)); + float* entropies_dist = (float*) calloc((r_width + 1) * (r_height + 1), sizeof(float)); + float* temp_scales_ref = (float*) calloc((r_width + 1) * (r_height + 1), sizeof(float)); + float* temp_scales_dist = (float*) calloc((r_width + 1) * (r_height + 1), sizeof(float)); + + for(subband = 1; subband < total_subbands; subband++) { + size_t i, j; if(prev_ref != NULL && prev_dist != NULL) { float* ref_temporal = (float*) calloc((width) * (height), sizeof(float)); @@ -258,9 +272,9 @@ int compute_strred_funque(const struct dwt2buffers* ref, const struct dwt2buffer for(i = 0; i < r_height; i++) { for(j = 0; j < r_width; j++) { temp_aggregate[i * r_width + j] = - entropies_ref[i * r_width + j] * spat_scales_ref[i * r_width + j] * + entropies_ref[i * r_width + j] * spat_scales_ref[subband][i * r_width + j] * temp_scales_ref[i * r_width + j] - - entropies_dist[i * r_width + j] * spat_scales_dist[i * r_width + j] * + entropies_dist[i * r_width + j] * spat_scales_dist[subband][i * r_width + j] * temp_scales_dist[i * r_width + j]; } } @@ -281,31 +295,25 @@ int compute_strred_funque(const struct dwt2buffers* ref, const struct dwt2buffer } } - strred_scores->spat_vals[level] = (spat_values[1] + spat_values[2] + spat_values[3]) / 3; strred_scores->temp_vals[level] = (temp_values[1] + temp_values[2] + temp_values[3]) / 3; strred_scores->spat_temp_vals[level] = strred_scores->spat_vals[level] * strred_scores->temp_vals[level]; // Add equations to compute ST-RRED using norm factors - int norm_factor, num_level; + int norm_factor = 1, num_level; for(num_level = 0; num_level <= level; num_level++) norm_factor = num_level + 1; - strred_scores->spat_vals_cumsum += strred_scores->spat_vals[level]; strred_scores->temp_vals_cumsum += strred_scores->temp_vals[level]; strred_scores->spat_temp_vals_cumsum += strred_scores->spat_temp_vals[level]; - strred_scores->srred_vals[level] = strred_scores->spat_vals_cumsum / norm_factor; strred_scores->trred_vals[level] = strred_scores->temp_vals_cumsum / norm_factor; strred_scores->strred_vals[level] = strred_scores->spat_temp_vals_cumsum / norm_factor; free(entropies_ref); free(entropies_dist); - free(spat_scales_ref); - free(spat_scales_dist); free(temp_scales_ref); free(temp_scales_dist); - free(spat_aggregate); return 0; } \ No newline at end of file diff --git a/libvmaf/src/feature/third_party/funque/funque_strred.h b/libvmaf/src/feature/third_party/funque/funque_strred.h index 4bf0d8fb9..3b76a2b8c 100644 --- a/libvmaf/src/feature/third_party/funque/funque_strred.h +++ b/libvmaf/src/feature/third_party/funque/funque_strred.h @@ -37,11 +37,13 @@ typedef struct strred_results { } strred_results; +int compute_srred_funque(const struct dwt2buffers* ref, const struct dwt2buffers* dist, + size_t width, size_t height, float** spat_scales_ref, + float** spat_scales_dist, struct strred_results* strred_scores, + int block_size, int level); + int compute_strred_funque(const struct dwt2buffers* ref, const struct dwt2buffers* dist, struct strredbuffers* prev_ref, struct strredbuffers* prev_dist, - size_t width, size_t height, struct strred_results* strred_scores, - int block_size, int level); - -int copy_prev_frame_strred_funque(const struct dwt2buffers* ref, const struct dwt2buffers* dist, - struct strredbuffers* prev_ref, struct strredbuffers* prev_dist, - size_t width, size_t height); \ No newline at end of file + size_t width, size_t height, float** spat_scales_ref, + float** spat_scales_dist, struct strred_results* strred_scores, + int block_size, int level); \ No newline at end of file diff --git a/libvmaf/src/feature/third_party/funque/integer_funque.c b/libvmaf/src/feature/third_party/funque/integer_funque.c index 243e47747..cbd976255 100644 --- a/libvmaf/src/feature/third_party/funque/integer_funque.c +++ b/libvmaf/src/feature/third_party/funque/integer_funque.c @@ -23,6 +23,7 @@ // #include "config.h" #include "dict.h" +#include "framesync.h" #include "feature_collector.h" #include "feature_extractor.h" #include "feature_name.h" @@ -70,9 +71,9 @@ #include "x86/resizer_avx512.h" #include "x86/integer_funque_ssim_avx512.h" #include "x86/integer_funque_adm_avx512.h" +#include "x86/integer_funque_motion_avx512.h" #include "x86/integer_funque_vif_avx512.h" #include "x86/integer_funque_strred_avx512.h" - #endif #endif @@ -111,6 +112,8 @@ typedef struct IntFunqueState i_dwt2buffers i_dist_dwt2out[4]; i_dwt2buffers i_prev_ref[4]; i_dwt2buffers i_prev_dist[4]; + i_dwt2buffers i_shared_ref[4]; + i_dwt2buffers i_shared_dist[4]; // funque configurable parameters bool enable_resize; @@ -120,9 +123,10 @@ typedef struct IntFunqueState int needed_dwt_levels; int needed_full_dwt_levels; int ssim_levels; - int ms_ssim_levels; int strred_levels; + int motion_levels; + int mad_levels; double norm_view_dist; int ref_display_height; int i_process_ref_width; @@ -146,6 +150,7 @@ typedef struct IntFunqueState ResizerState resize_module; strred_results strred_scores; MsSsimScore_int *score; + FrameBufLen frame_buf_len; } IntFunqueState; @@ -312,6 +317,26 @@ static const VmafOption options[] = { .min = MIN_LEVELS, .max = MAX_LEVELS, }, + { + .name = "motion_levels", + .alias = "motion", + .help = "Number of levels in MOTION", + .offset = offsetof(IntFunqueState, motion_levels), + .type = VMAF_OPT_TYPE_INT, + .default_val.i = DEFAULT_MOTION_LEVELS, + .min = MIN_LEVELS, + .max = MAX_LEVELS, + }, + { + .name = "mad_levels", + .alias = "mad", + .help = "Number of levels in Mean absolute difference", + .offset = offsetof(IntFunqueState, mad_levels), + .type = VMAF_OPT_TYPE_INT, + .default_val.i = DEFAULT_MAD_LEVELS, + .min = MIN_LEVELS, + .max = MAX_LEVELS, + }, {0}}; @@ -380,6 +405,8 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt, (void)bpc; IntFunqueState *s = fex->priv; + s->frame_buf_len.total_buf_size = 0; + s->feature_name_dict = vmaf_feature_name_dict_from_provided_features(fex->provided_features, fex->options, s); @@ -393,8 +420,8 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt, } s->needed_dwt_levels = - MAX5(s->vif_levels, s->adm_levels, s->ssim_levels, s->ms_ssim_levels, s->strred_levels); - s->needed_full_dwt_levels = MAX4(s->adm_levels, s->ssim_levels, s->ms_ssim_levels, s->strred_levels); + MAX7(s->vif_levels, s->adm_levels, s->ssim_levels, s->ms_ssim_levels, s->strred_levels, s->motion_levels, s->mad_levels); + s->needed_full_dwt_levels = MAX6(s->adm_levels, s->ssim_levels, s->ms_ssim_levels, s->strred_levels, s->motion_levels, s->mad_levels); int ref_process_width, ref_process_height, dist_process_width, dist_process_height, process_wh_div_factor; @@ -605,14 +632,14 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt, err |= integer_alloc_dwt2buffers(&s->i_ref_dwt2out[level], tref_width, tref_height); err |= integer_alloc_dwt2buffers(&s->i_dist_dwt2out[level], tdist_width, tdist_height); - s->i_prev_ref[level].bands[0] = NULL; - s->i_prev_dist[level].bands[0] = NULL; + for(int subband = 0; subband < DEFAULT_BANDS; subband++) { + s->frame_buf_len.buf_size[level][subband] = tref_width * tref_height; + s->frame_buf_len.total_buf_size += s->frame_buf_len.buf_size[level][subband]; + } - for(int subband = 1; subband < 4; subband++) { - s->i_prev_ref[level].bands[subband] = - calloc(tref_width * tref_height, sizeof(dwt2_dtype)); - s->i_prev_dist[level].bands[subband] = - calloc(tref_width * tref_height, sizeof(dwt2_dtype)); + for(int subband = 0; subband < 4; subband++) { + s->i_prev_ref[level].bands[subband] = NULL; + s->i_prev_dist[level].bands[subband] = NULL; } s->i_prev_ref[level].width = s->i_ref_dwt2out[level].width; @@ -639,7 +666,8 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt, s->modules.integer_compute_ms_ssim_funque = integer_compute_ms_ssim_funque_c; s->modules.integer_mean_2x2_ms_ssim_funque = integer_mean_2x2_ms_ssim_funque_c; s->modules.integer_ms_ssim_shift_cum_buffer_funque = integer_ms_ssim_shift_cum_buffer_funque_c; - s->modules.integer_compute_motion_funque = integer_compute_motion_funque; + s->modules.integer_compute_motion_funque = integer_compute_motion_funque_c; + s->modules.integer_compute_mad_funque = integer_compute_mad_funque_c; s->modules.integer_funque_adm_decouple = integer_adm_decouple_c; s->modules.integer_adm_integralimg_numscore = integer_adm_integralimg_numscore_c; s->modules.integer_compute_vif_funque = integer_compute_vif_funque_c; @@ -647,6 +675,7 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt, s->resize_module.hbd_resizer_step = hbd_step; s->modules.integer_funque_vifdwt2_band0 = integer_funque_vifdwt2_band0; + s->modules.integer_compute_srred_funque = integer_compute_srred_funque_c; s->modules.integer_compute_strred_funque = integer_compute_strred_funque_c; s->modules.integer_copy_prev_frame_strred_funque = integer_copy_prev_frame_strred_funque_c; @@ -667,6 +696,8 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt, s->modules.integer_compute_ms_ssim_funque = integer_compute_ms_ssim_funque_c; s->modules.integer_mean_2x2_ms_ssim_funque = integer_mean_2x2_ms_ssim_funque_neon; s->modules.integer_ms_ssim_shift_cum_buffer_funque = integer_ms_ssim_shift_cum_buffer_funque_neon; + s->modules.integer_compute_motion_funque = integer_compute_motion_funque_neon; + s->modules.integer_compute_mad_funque = integer_compute_mad_funque_neon; s->modules.integer_funque_adm_decouple = integer_adm_decouple_neon; s->modules.integer_compute_vif_funque = integer_compute_vif_funque_neon; //Commenting this since C was performing better @@ -674,6 +705,7 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt, // s->modules.integer_funque_image_mad = integer_funque_image_mad_neon; // s->modules.integer_adm_integralimg_numscore = integer_adm_integralimg_numscore_neon; + s->modules.integer_compute_srred_funque = integer_compute_srred_funque_neon; s->modules.integer_compute_strred_funque = integer_compute_strred_funque_neon; s->modules.integer_copy_prev_frame_strred_funque = integer_copy_prev_frame_strred_funque_c; #else @@ -693,8 +725,11 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt, s->modules.integer_mean_2x2_ms_ssim_funque = integer_mean_2x2_ms_ssim_funque_c; s->modules.integer_ms_ssim_shift_cum_buffer_funque = integer_ms_ssim_shift_cum_buffer_funque_c; s->modules.integer_funque_adm_decouple = integer_adm_decouple_c; + s->modules.integer_compute_motion_funque = integer_compute_motion_funque_c; + s->modules.integer_compute_mad_funque = integer_compute_mad_funque_c; s->resize_module.resizer_step = step; s->resize_module.hbd_resizer_step = hbd_step; + s->modules.integer_compute_srred_funque = integer_compute_srred_funque_c; s->modules.integer_compute_strred_funque = integer_compute_strred_funque_c; s->modules.integer_copy_prev_frame_strred_funque = integer_copy_prev_frame_strred_funque_c; #endif @@ -726,8 +761,12 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt, s->modules.integer_compute_ms_ssim_funque = integer_compute_ms_ssim_funque_avx2; s->modules.integer_mean_2x2_ms_ssim_funque = integer_mean_2x2_ms_ssim_funque_avx2; s->modules.integer_ms_ssim_shift_cum_buffer_funque = integer_ms_ssim_shift_cum_buffer_funque_avx2; + s->modules.integer_compute_motion_funque = integer_compute_motion_funque_avx2; + s->modules.integer_compute_mad_funque = integer_compute_mad_funque_avx2; + s->modules.integer_funque_adm_decouple = integer_adm_decouple_avx2; s->resize_module.resizer_step = step_avx2; s->resize_module.hbd_resizer_step = hbd_step_avx2; + s->modules.integer_compute_srred_funque = integer_compute_srred_funque_avx2; s->modules.integer_compute_strred_funque = integer_compute_strred_funque_avx2; s->modules.integer_copy_prev_frame_strred_funque = integer_copy_prev_frame_strred_funque_c; #else @@ -747,9 +786,11 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt, s->modules.integer_mean_2x2_ms_ssim_funque = integer_mean_2x2_ms_ssim_funque_c; s->modules.integer_ms_ssim_shift_cum_buffer_funque = integer_ms_ssim_shift_cum_buffer_funque_c; s->modules.integer_funque_adm_decouple = integer_adm_decouple_c; - s->modules.integer_compute_motion_funque = integer_compute_motion_funque; + s->modules.integer_compute_motion_funque = integer_compute_motion_funque_c; + s->modules.integer_compute_mad_funque = integer_compute_mad_funque_c; s->resize_module.resizer_step = step; s->resize_module.hbd_resizer_step = hbd_step; + s->modules.integer_compute_srred_funque = integer_compute_srred_funque_c; s->modules.integer_compute_strred_funque = integer_compute_strred_funque_c; s->modules.integer_copy_prev_frame_strred_funque = integer_copy_prev_frame_strred_funque_c; #endif @@ -772,9 +813,12 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt, s->modules.integer_compute_ms_ssim_funque = integer_compute_ms_ssim_funque_avx512; s->modules.integer_mean_2x2_ms_ssim_funque = integer_mean_2x2_ms_ssim_funque_avx512; s->modules.integer_ms_ssim_shift_cum_buffer_funque = integer_ms_ssim_shift_cum_buffer_funque_avx512; - s->modules.integer_compute_motion_funque = integer_compute_motion_funque; + s->modules.integer_compute_motion_funque = integer_compute_motion_funque_avx512; + s->modules.integer_compute_mad_funque = integer_compute_mad_funque_avx512; + s->modules.integer_funque_adm_decouple = integer_adm_decouple_avx512; s->resize_module.resizer_step = step_avx512; s->resize_module.hbd_resizer_step = hbd_step_avx512; + s->modules.integer_compute_srred_funque = integer_compute_srred_funque_avx512; s->modules.integer_compute_strred_funque = integer_compute_strred_funque_avx512; s->modules.integer_copy_prev_frame_strred_funque = integer_copy_prev_frame_strred_funque_c; @@ -795,9 +839,11 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt, s->modules.integer_mean_2x2_ms_ssim_funque = integer_mean_2x2_ms_ssim_funque_c; s->modules.integer_ms_ssim_shift_cum_buffer_funque = integer_ms_ssim_shift_cum_buffer_funque_c; s->modules.integer_funque_adm_decouple = integer_adm_decouple_c; - s->modules.integer_compute_motion_funque = integer_compute_motion_funque; + s->modules.integer_compute_motion_funque = integer_compute_motion_funque_c; + s->modules.integer_compute_mad_funque = integer_compute_mad_funque_c; s->resize_module.resizer_step = step; s->resize_module.hbd_resizer_step = hbd_step; + s->modules.integer_compute_srred_funque = integer_compute_srred_funque_c; s->modules.integer_compute_strred_funque = integer_compute_strred_funque_c; s->modules.integer_copy_prev_frame_strred_funque = integer_copy_prev_frame_strred_funque_c; #endif @@ -832,10 +878,6 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt, aligned_free(s->i_ref_dwt2out[level].bands[i]); if(s->i_dist_dwt2out[level].bands[i]) aligned_free(s->i_dist_dwt2out[level].bands[i]); - if(s->i_prev_ref[level].bands[i]) - aligned_free(s->i_prev_ref[level].bands[i]); - if(s->i_prev_dist[level].bands[i]) - aligned_free(s->i_prev_dist[level].bands[i]); } } vmaf_dictionary_free(&s->feature_name_dict); @@ -850,6 +892,9 @@ static int extract(VmafFeatureExtractor *fex, { IntFunqueState *s = fex->priv; int err = 0; + int mt_err = 0; + + VmafFrameSyncContext *framesync = fex->framesync; (void)ref_pic_90; (void)dist_pic_90; @@ -1003,7 +1048,8 @@ static int extract(VmafFeatureExtractor *fex, SsimScore_int ssim_score[MAX_LEVELS]; MsSsimScore_int ms_ssim_score[MAX_LEVELS]; - // s->score = &ms_ssim_score; + double motion_score[MAX_LEVELS]; + double mad_score[MAX_LEVELS]; s->score = ms_ssim_score; double adm_score[MAX_LEVELS], adm_score_num[MAX_LEVELS], adm_score_den[MAX_LEVELS]; double vif_score[MAX_LEVELS], vif_score_num[MAX_LEVELS], vif_score_den[MAX_LEVELS]; @@ -1031,9 +1077,44 @@ static int extract(VmafFeatureExtractor *fex, s->strred_scores.temp_vals_cumsum = 0; s->strred_scores.spat_temp_vals_cumsum = 0; - for(int level = 0; level < s->needed_dwt_levels; - level++) // For ST-RRED Debugging level set to 0 - { + float *spat_scales_ref[DEFAULT_STRRED_LEVELS][DEFAULT_STRRED_SUBBANDS]; + float *spat_scales_dist[DEFAULT_STRRED_LEVELS][DEFAULT_STRRED_SUBBANDS]; + size_t total_subbands = DEFAULT_STRRED_SUBBANDS; + + if((s->strred_levels != 0) && (index != 0)) { + for(int level = 0; level <= s->strred_levels - 1; level++) { + for(size_t subband = 1; subband < total_subbands; subband++) { + size_t x_reflect = (size_t) ((STRRED_WINDOW_SIZE - 1) / 2); + size_t r_width = s->i_ref_dwt2out[level].width + (2 * x_reflect); + size_t r_height = s->i_ref_dwt2out[level].height + (2 * x_reflect); + + spat_scales_ref[level][subband] = + (float *) calloc(r_width * r_height, sizeof(float)); + spat_scales_dist[level][subband] = + (float *) calloc(r_width * r_height, sizeof(float)); + } + } + } + + dwt2_dtype *shared_buf, *shared_buf_temp; + // Total_buf_size is multiplied by 2 for ref and dist + mt_err = vmaf_framesync_acquire_new_buf( + framesync, (void **) &shared_buf, s->frame_buf_len.total_buf_size * 2 * sizeof(dwt2_dtype), index); + if(mt_err) + return mt_err; + + shared_buf_temp = shared_buf; + // Distibute the big buffer to smaller ones for each levels and bands + for(int level = 0; level < s->needed_dwt_levels; level++) { + for(int subband = 0; subband < DEFAULT_BANDS; subband++) { + s->i_shared_ref[level].bands[subband] = shared_buf; + s->i_shared_dist[level].bands[subband] = shared_buf + s->frame_buf_len.total_buf_size; + + shared_buf += s->frame_buf_len.buf_size[level][subband]; + } + } + + for(int level = 0; level < s->needed_dwt_levels; level++) { if(level + 1 < s->needed_dwt_levels) { if(level + 1 > s->needed_full_dwt_levels - 1) { // from here on out we only need approx band for VIF @@ -1062,7 +1143,7 @@ static int extract(VmafFeatureExtractor *fex, if(!s->enable_spatial_csf) { if(level < s->adm_levels || level < s->ssim_levels || level < s->ms_ssim_levels - || level < s->strred_levels) { + || level < s->strred_levels || level < s->motion_levels || level < s->mad_levels) { // we need full CSF on all bands s->modules.integer_funque_dwt2_inplace_csf( &s->i_ref_dwt2out[level], s->csf_factors[level], 0, 3, @@ -1081,6 +1162,19 @@ static int extract(VmafFeatureExtractor *fex, } } + // Function to copy all bands from i_ref_dwt2out, i_dist_dwt2out (2 copies) + err |= integer_copy_frame_funque(&s->i_ref_dwt2out[level], &s->i_dist_dwt2out[level], + &s->i_shared_ref[level], &s->i_shared_dist[level], + s->i_ref_dwt2out[level].width, + s->i_ref_dwt2out[level].height); + } + + mt_err = vmaf_framesync_submit_filled_data(framesync, shared_buf_temp, index); + if(mt_err) + return mt_err; + + for(int level = 0; level < s->needed_dwt_levels; level++) { + // TODO: Need to modify for crop width and height if((s->adm_levels != 0) && (level <= s->adm_levels - 1)) { float adm_pending_div = (float) (((int) pending_div_factor) >> (level)); if(!s->enable_spatial_csf) @@ -1097,7 +1191,6 @@ static int extract(VmafFeatureExtractor *fex, if(err) return err; } - if((s->ms_ssim_levels != 0) && (level < s->ms_ssim_levels)) { int pending_div_c1 = (int) pending_div_factor >> (level); int pending_div_c2 = (int) pending_div_factor >> (level); @@ -1113,7 +1206,6 @@ static int extract(VmafFeatureExtractor *fex, &s->i_ref_dwt2out[level], &s->i_dist_dwt2out[level], &ms_ssim_score[level], 1, 0.01, 0.03, pending_div_c1, pending_div_c2, pending_div_offset, pending_div_halfround, s->adm_div_lookup, (level + 1), (int) (s->enable_spatial_csf == false)); - err = s->modules.integer_mean_2x2_ms_ssim_funque(var_x_cum, var_y_cum, cov_xy_cum, s->i_ref_dwt2out[level].width, s->i_ref_dwt2out[level].height, level); @@ -1175,29 +1267,87 @@ static int extract(VmafFeatureExtractor *fex, return err; } + if((s->strred_levels != 0) && (level <= s->strred_levels - 1) && (index != 0)) { + int32_t strred_pending_div = spatfilter_shifts + dwt_shifts - level; + + err |= s->modules.integer_compute_srred_funque( + &s->i_ref_dwt2out[level], &s->i_dist_dwt2out[level], s->i_ref_dwt2out[level].width, + s->i_ref_dwt2out[level].height, spat_scales_ref[level], spat_scales_dist[level], + &s->strred_scores, BLOCK_SIZE, level, s->log_lut, strred_pending_div, + (double) 0.1, s->enable_spatial_csf, s->csf_pending_div[level]); + + if(err) + return err; + } + + if((s->mad_levels != 0) && (level <= s->mad_levels - 1)) { + int mad_pending_div = (s->enable_spatial_csf) ? (spatfilter_shifts + dwt_shifts - level) : + s->csf_pending_div[level][0]; + + err |= s->modules.integer_compute_mad_funque(s->i_ref_dwt2out[level].bands[0], s->i_dist_dwt2out[level].bands[0], + s->i_ref_dwt2out[level].width, s->i_ref_dwt2out[level].height, + s->i_prev_ref[level].stride, s->i_ref_dwt2out[level].stride, mad_pending_div, &mad_score[level]); + if(err) + return err; + } + } + + dwt2_dtype *dependent_buf, *dependent_buf_temp; + dependent_buf_temp = NULL; + if(index != 0) { + mt_err = + vmaf_framesync_retrieve_filled_data(framesync, (void **) &dependent_buf, (index - 1)); + if(mt_err) + return mt_err; + + dependent_buf_temp = dependent_buf; + + // Distribute buffers + for(int level = 0; level < s->needed_dwt_levels; level++) { + for(int subband = 0; subband < DEFAULT_BANDS; subband++) { + s->i_prev_ref[level].bands[subband] = dependent_buf; + s->i_prev_dist[level].bands[subband] = + dependent_buf + s->frame_buf_len.total_buf_size; + + dependent_buf += s->frame_buf_len.buf_size[level][subband]; + } + } + } + + for(int level = 0; level < s->needed_dwt_levels; level++) { if((s->strred_levels != 0) && (level <= s->strred_levels - 1)) { int32_t strred_pending_div = spatfilter_shifts + dwt_shifts - level; - if(index == 0) { - err |= s->modules.integer_copy_prev_frame_strred_funque( - &s->i_ref_dwt2out[level], &s->i_dist_dwt2out[level], &s->i_prev_ref[level], - &s->i_prev_dist[level], s->i_ref_dwt2out[level].width, - s->i_ref_dwt2out[level].height); - } else { + if(index != 0) { err |= s->modules.integer_compute_strred_funque( &s->i_ref_dwt2out[level], &s->i_dist_dwt2out[level], &s->i_prev_ref[level], &s->i_prev_dist[level], s->i_ref_dwt2out[level].width, - s->i_ref_dwt2out[level].height, &s->strred_scores, BLOCK_SIZE, level, - s->log_lut, strred_pending_div, (double) 0.1, s->enable_spatial_csf, s->csf_pending_div[level]); - - err |= s->modules.integer_copy_prev_frame_strred_funque( - &s->i_ref_dwt2out[level], &s->i_dist_dwt2out[level], &s->i_prev_ref[level], - &s->i_prev_dist[level], s->i_ref_dwt2out[level].width, - s->i_ref_dwt2out[level].height); + s->i_ref_dwt2out[level].height, spat_scales_ref[level], spat_scales_dist[level], + &s->strred_scores, BLOCK_SIZE, level, s->log_lut, strred_pending_div, + (double) 0.1, s->enable_spatial_csf, s->csf_pending_div[level]); } if(err) return err; } + + if((s->motion_levels != 0) && (level <= s->motion_levels - 1)) { + int motion_pending_div = (s->enable_spatial_csf) ? (spatfilter_shifts + dwt_shifts - level) : + s->csf_pending_div[level][0]; + + if(index != 0) { + err |= s->modules.integer_compute_motion_funque(s->i_prev_ref[level].bands[0], s->i_ref_dwt2out[level].bands[0], + s->i_ref_dwt2out[level].width, s->i_ref_dwt2out[level].height, + s->i_prev_ref[level].stride, s->i_ref_dwt2out[level].stride, motion_pending_div, &motion_score[level]); + } + else + motion_score[level] = 0; + } + } + + if(index != 0) { + mt_err = vmaf_framesync_release_buf(framesync, dependent_buf_temp, (index - 1)); + if(mt_err) + return mt_err; } if(s->ms_ssim_levels != 0) { @@ -1302,6 +1452,61 @@ static int extract(VmafFeatureExtractor *fex, } } + if(s->motion_levels > 0) { + err |= vmaf_feature_collector_append_with_dict(feature_collector, s->feature_name_dict, + "FUNQUE_integer_feature_motion_scale0_score", + motion_score[0], index); + if(s->motion_levels > 1) { + err |= vmaf_feature_collector_append_with_dict( + feature_collector, s->feature_name_dict, + "FUNQUE_integer_feature_motion_scale1_score", motion_score[1], + index); + + if(s->motion_levels > 2) { + err |= vmaf_feature_collector_append_with_dict( + feature_collector, s->feature_name_dict, + "FUNQUE_integer_feature_motion_scale2_score", motion_score[2], + index); + + if(s->motion_levels > 3) { + err |= vmaf_feature_collector_append_with_dict( + feature_collector, s->feature_name_dict, + "FUNQUE_integer_feature_motion_scale3_score", + motion_score[3], index); + } + } + } + + } + + if(s->mad_levels > 0){ + { + err |= vmaf_feature_collector_append_with_dict(feature_collector, s->feature_name_dict, + "FUNQUE_integer_feature_mad_scale0_score", + mad_score[0], index); + if(s->mad_levels > 1) { + err |= vmaf_feature_collector_append_with_dict( + feature_collector, s->feature_name_dict, + "FUNQUE_integer_feature_mad_scale1_score", mad_score[1], + index); + + if(s->mad_levels > 2) { + err |= vmaf_feature_collector_append_with_dict( + feature_collector, s->feature_name_dict, + "FUNQUE_integer_feature_mad_scale2_score", mad_score[2], + index); + + if(s->mad_levels > 3) { + err |= vmaf_feature_collector_append_with_dict( + feature_collector, s->feature_name_dict, + "FUNQUE_integer_feature_mad_scale3_score", + mad_score[3], index); + } + } + } + } + } + if(s->strred_levels > 0) { err |= vmaf_feature_collector_append_with_dict(feature_collector, s->feature_name_dict, "FUNQUE_integer_feature_strred_scale0_score", @@ -1378,6 +1583,15 @@ static int extract(VmafFeatureExtractor *fex, free(var_y_cum); free(cov_xy_cum); + if((s->strred_levels != 0) && (index != 0)) { + for(int level = 0; level <= s->strred_levels - 1; level++) { + for(size_t subband = 1; subband < total_subbands; subband++) { + free(spat_scales_ref[level][subband]); + free(spat_scales_dist[level][subband]); + } + } + } + return err; } @@ -1399,10 +1613,6 @@ static int close(VmafFeatureExtractor *fex) aligned_free(s->i_ref_dwt2out[level].bands[i]); if(s->i_dist_dwt2out[level].bands[i]) aligned_free(s->i_dist_dwt2out[level].bands[i]); - if(s->i_prev_ref[level].bands[i]) - aligned_free(s->i_prev_ref[level].bands[i]); - if(s->i_prev_dist[level].bands[i]) - aligned_free(s->i_prev_dist[level].bands[i]); } } vmaf_dictionary_free(&s->feature_name_dict); @@ -1436,6 +1646,16 @@ static const char *provided_features[] = { "FUNQUE_integer_feature_strred_scale2_score", "FUNQUE_integer_feature_strred_scale3_score", + "FUNQUE_integer_feature_motion_scale0_score", + "FUNQUE_integer_feature_motion_scale1_score", + "FUNQUE_integer_feature_motion_scale2_score", + "FUNQUE_integer_feature_motion_scale3_score", + + "FUNQUE_integer_feature_mad_scale0_score", + "FUNQUE_integer_feature_mad_scale1_score", + "FUNQUE_integer_feature_mad_scale2_score", + "FUNQUE_integer_feature_mad_scale3_score", + "FUNQUE_integer_feature_ms_ssim_mean_scale0_score", "FUNQUE_integer_feature_ms_ssim_mean_scale1_score", "FUNQUE_integer_feature_ms_ssim_mean_scale2_score", @@ -1456,4 +1676,5 @@ VmafFeatureExtractor vmaf_fex_integer_funque = { .close = close, .priv_size = sizeof(IntFunqueState), .provided_features = provided_features, + .flags = VMAF_FEATURE_FRAME_SYNC, }; \ No newline at end of file diff --git a/libvmaf/src/feature/third_party/funque/integer_funque_filters.h b/libvmaf/src/feature/third_party/funque/integer_funque_filters.h index 7f2e9817a..5668edf9d 100644 --- a/libvmaf/src/feature/third_party/funque/integer_funque_filters.h +++ b/libvmaf/src/feature/third_party/funque/integer_funque_filters.h @@ -157,7 +157,8 @@ typedef struct ModuleFunqueState int32_t *cov_xy_cum, int width, int height, int level); int (*integer_ms_ssim_shift_cum_buffer_funque)(int32_t *var_x_cum, int32_t *var_y_cum, int32_t *cov_xy_cum, int width, int height, int level, uint8_t csf_pending_div[4], uint8_t csf_pending_div_lp1[4]); - double (*integer_compute_motion_funque)(const dwt2_dtype *ref, const dwt2_dtype *dis, int w, int h, int ref_stride, int dis_stride, float pending_div_factor, double *score); + int (*integer_compute_motion_funque)(const dwt2_dtype *prev, const dwt2_dtype *curr, int w, int h, int prev_stride, int curr_stride, int pending_div_factor, double *score); + int (*integer_compute_mad_funque)(const dwt2_dtype *ref, const dwt2_dtype *dis, int w, int h, int ref_stride, int dis_stride, int pending_div_factor, double *score); void (*integer_funque_adm_decouple)(i_dwt2buffers ref, i_dwt2buffers dist, i_dwt2buffers i_dlm_rest, int32_t *i_dlm_add, int32_t *adm_div_lookup, float border_size, double *adm_score_den, float adm_pending_div); void (*integer_adm_integralimg_numscore)(i_dwt2buffers pyr_1, int32_t *x_pad, int k, @@ -179,14 +180,18 @@ typedef struct ModuleFunqueState #endif // void (*resizer_step)(const unsigned char *_src, unsigned char *_dst, const int *xofs, const int *yofs, const short *_alpha, const short *_beta, int iwidth, int iheight, int dwidth, int dheight, int channels, int ksize, int start, int end, int xmin, int xmax); - int (*integer_compute_strred_funque)(const struct i_dwt2buffers *ref, - const struct i_dwt2buffers *dist, - struct i_dwt2buffers *prev_ref, - struct i_dwt2buffers *prev_dist, size_t width, - size_t height, struct strred_results *strred_scores, - int block_size, int level, - uint32_t *log_lut, int32_t shift_val, double sigma_nsq_t, - uint8_t enable_spatial_csf, uint8_t csf_pending_div[4]); + int (*integer_compute_srred_funque)( + const struct i_dwt2buffers *ref, const struct i_dwt2buffers *dist, size_t width, + size_t height, float **spat_scales_ref, float **spat_scales_dist, + struct strred_results *strred_scores, int block_size, int level, + uint32_t *log_lut, int32_t shift_val, double sigma_nsq_t, uint8_t enable_spatial_csf, uint8_t csf_pending_div[4]); + + int (*integer_compute_strred_funque)( + const struct i_dwt2buffers *ref, const struct i_dwt2buffers *dist, + struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, size_t width, + size_t height, float **spat_scales_ref, float **spat_scales_dist, + struct strred_results *strred_scores, int block_size, int level, + uint32_t *log_lut, int32_t shift_val, double sigma_nsq_t, uint8_t enable_spatial_csf, uint8_t csf_pending_div[4]); int (*integer_copy_prev_frame_strred_funque)(const struct i_dwt2buffers *ref, const struct i_dwt2buffers *dist, @@ -276,7 +281,7 @@ static const uint8_t i_hill_interim_shift[4][4] = { {13, 15, 14, 15}, {13, 16, 16, 16}, }; - + static const uint8_t i_hill_pending_div_factors[4][4] = { #if BAND_HVD_SAME_PENDING_DIV {6, 10, 10, 10}, // L0 diff --git a/libvmaf/src/feature/third_party/funque/integer_funque_motion.c b/libvmaf/src/feature/third_party/funque/integer_funque_motion.c index 772375e34..dbccf0843 100644 --- a/libvmaf/src/feature/third_party/funque/integer_funque_motion.c +++ b/libvmaf/src/feature/third_party/funque/integer_funque_motion.c @@ -30,7 +30,7 @@ /** * Note: img1_stride and img2_stride are in terms of (sizeof(double) bytes) */ -double integer_funque_image_mad_c(const dwt2_dtype *img1, const dwt2_dtype *img2, int width, int height, int img1_stride, int img2_stride, float pending_div_factor) +double integer_funque_image_mad_c(const dwt2_dtype *img1, const dwt2_dtype *img2, int width, int height, int img1_stride, int img2_stride, int pending_div_factor) { motion_accum_dtype accum = 0; @@ -52,11 +52,38 @@ double integer_funque_image_mad_c(const dwt2_dtype *img1, const dwt2_dtype *img2 } /** - * Note: ref_stride and dis_stride are in terms of bytes + * Note: prev_stride and curr_stride are in terms of bytes */ -int integer_compute_motion_funque(const dwt2_dtype *ref, const dwt2_dtype *dis, int w, int h, int ref_stride, int dis_stride, float pending_div_factor, double *score) +int integer_compute_motion_funque_c(const dwt2_dtype *prev, const dwt2_dtype *curr, int w, int h, int prev_stride, int curr_stride, int pending_div_factor_arg, double *score) { + int pending_div_factor = (1 << pending_div_factor_arg) * 255; + + if (prev_stride % sizeof(dwt2_dtype) != 0) + { + printf("error: prev_stride %% sizeof(dwt2_dtype) != 0, prev_stride = %d, sizeof(dwt2_dtype) = %zu.\n", prev_stride, sizeof(dwt2_dtype)); + fflush(stdout); + goto fail; + } + if (curr_stride % sizeof(dwt2_dtype) != 0) + { + printf("error: curr_stride %% sizeof(dwt2_dtype) != 0, curr_stride = %d, sizeof(dwt2_dtype) = %zu.\n", curr_stride, sizeof(dwt2_dtype)); + fflush(stdout); + goto fail; + } + // stride for integer_funque_image_mad_c is in terms of (sizeof(dwt2_dtype) bytes) + + *score = integer_funque_image_mad_c(prev, curr, w, h, prev_stride / sizeof(dwt2_dtype), curr_stride / sizeof(dwt2_dtype), pending_div_factor); + + return 0; + +fail: + return 1; +} + +int integer_compute_mad_funque_c(const dwt2_dtype *ref, const dwt2_dtype *dis, int w, int h, int ref_stride, int dis_stride, int pending_div_factor_arg, double *score) +{ + int pending_div_factor = (1 << pending_div_factor_arg) * 255; if (ref_stride % sizeof(dwt2_dtype) != 0) { diff --git a/libvmaf/src/feature/third_party/funque/integer_funque_motion.h b/libvmaf/src/feature/third_party/funque/integer_funque_motion.h index 3b4f2c465..ea46659a1 100644 --- a/libvmaf/src/feature/third_party/funque/integer_funque_motion.h +++ b/libvmaf/src/feature/third_party/funque/integer_funque_motion.h @@ -17,6 +17,9 @@ */ #include "integer_funque_filters.h" +#define DEFAULT_MOTION_LEVELS 4 +#define DEFAULT_MAD_LEVELS 4 -double integer_funque_image_mad_c(const dwt2_dtype *img1, const dwt2_dtype *img2, int width, int height, int img1_stride, int img2_stride, float pending_div_factor); -int integer_compute_motion_funque(const dwt2_dtype *ref, const dwt2_dtype *dis, int w, int h, int ref_stride, int dis_stride, float pending_div_factor, double *score); +double integer_funque_image_mad_c(const dwt2_dtype *img1, const dwt2_dtype *img2, int width, int height, int img1_stride, int img2_stride, int pending_div_factor); +int integer_compute_motion_funque_c(const dwt2_dtype *ref, const dwt2_dtype *dis, int w, int h, int ref_stride, int dis_stride, int pending_div_factor, double *score); +int integer_compute_mad_funque_c(const dwt2_dtype *ref, const dwt2_dtype *dis, int w, int h, int ref_stride, int dis_stride, int pending_div_factor, double *score); diff --git a/libvmaf/src/feature/third_party/funque/integer_funque_strred.c b/libvmaf/src/feature/third_party/funque/integer_funque_strred.c index 1651f62a7..b5e544601 100644 --- a/libvmaf/src/feature/third_party/funque/integer_funque_strred.c +++ b/libvmaf/src/feature/third_party/funque/integer_funque_strred.c @@ -27,26 +27,6 @@ #include "common/macros.h" #include "integer_funque_strred.h" -// just change the store offset to reduce multiple calculation when getting log2f value -void strred_funque_log_generate(uint32_t *log_18) -{ - uint64_t i; - uint64_t start = (unsigned int) pow(2, 17); - uint64_t end = (unsigned int) pow(2, 18); - for(i = start; i < end; i++) { - log_18[i] = (uint32_t) round(log2((double) i) * (1 << STRRED_Q_FORMAT)); - } -} - -void strred_funque_generate_log22(uint32_t *log_22) -{ - uint64_t i; - uint64_t start = (unsigned int) pow(2, 21); - uint64_t end = (unsigned int) pow(2, 22); - for(i = start; i < end; i++) { - log_22[i] = (uint32_t) round(log2((double) i) * (1 << STRRED_Q_FORMAT)); - } -} void strred_integer_reflect_pad(const dwt2_dtype *src, size_t width, size_t height, int reflect, dwt2_dtype *dest) @@ -117,11 +97,10 @@ void integer_subract_subbands_c(const dwt2_dtype *ref_src, const dwt2_dtype *ref } } -int integer_compute_strred_funque_c(const struct i_dwt2buffers *ref, - const struct i_dwt2buffers *dist, - struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, - size_t width, size_t height, - struct strred_results *strred_scores, int block_size, int level, +int integer_compute_srred_funque_c(const struct i_dwt2buffers *ref, + const struct i_dwt2buffers *dist, size_t width, size_t height, + float **spat_scales_ref, float **spat_scales_dist, + struct strred_results *strred_scores, int block_size, int level, uint32_t *log_lut, int32_t shift_val_arg, double sigma_nsq_t, uint8_t check_enable_spatial_csf, uint8_t csf_pending_div[4]) { @@ -129,19 +108,10 @@ int integer_compute_strred_funque_c(const struct i_dwt2buffers *ref, UNUSED(block_size); size_t total_subbands = DEFAULT_STRRED_SUBBANDS; size_t subband; - float spat_values[DEFAULT_STRRED_SUBBANDS], temp_values[DEFAULT_STRRED_SUBBANDS]; - float fspat_val[DEFAULT_STRRED_SUBBANDS], ftemp_val[DEFAULT_STRRED_SUBBANDS]; + float spat_values[DEFAULT_STRRED_SUBBANDS], fspat_val[DEFAULT_STRRED_SUBBANDS]; uint8_t enable_temp = 0; int32_t shift_val; - /* amount of reflecting */ - int x_reflect = (int) ((STRRED_WINDOW_SIZE - 1) / 2); - size_t r_width = width + (2 * x_reflect); - size_t r_height = height + (2 * x_reflect); - - float *scales_spat_x = (float *) calloc(r_width * r_height, sizeof(float)); - float *scales_spat_y = (float *) calloc(r_width * r_height, sizeof(float)); - for(subband = 1; subband < total_subbands; subband++) { enable_temp = 0; spat_values[subband] = 0; @@ -153,8 +123,49 @@ int integer_compute_strred_funque_c(const struct i_dwt2buffers *ref, } spat_values[subband] = integer_rred_entropies_and_scales( ref->bands[subband], dist->bands[subband], width, height, log_lut, sigma_nsq_t, - shift_val, enable_temp, scales_spat_x, scales_spat_y, check_enable_spatial_csf); + shift_val, enable_temp, spat_scales_ref[subband], spat_scales_dist[subband], + check_enable_spatial_csf); fspat_val[subband] = spat_values[subband] / (width * height); + } + + strred_scores->spat_vals[level] = (fspat_val[1] + fspat_val[2] + fspat_val[3]) / 3; + + // Add equations to compute S-RRED using norm factors + int norm_factor = 1, num_level; + for(num_level = 0; num_level <= level; num_level++) + norm_factor = num_level + 1; + + strred_scores->spat_vals_cumsum += strred_scores->spat_vals[level]; + + strred_scores->srred_vals[level] = strred_scores->spat_vals_cumsum / norm_factor; + + ret = 0; + return ret; +} + +int integer_compute_strred_funque_c(const struct i_dwt2buffers *ref, + const struct i_dwt2buffers *dist, + struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, + size_t width, size_t height, float **spat_scales_ref, + float **spat_scales_dist, struct strred_results *strred_scores, + int block_size, int level, uint32_t *log_lut, + int32_t shift_val_arg, double sigma_nsq_t, + uint8_t check_enable_spatial_csf, uint8_t csf_pending_div[4]) +{ + int ret; + UNUSED(block_size); + size_t total_subbands = DEFAULT_STRRED_SUBBANDS; + size_t subband; + float temp_values[DEFAULT_STRRED_SUBBANDS], ftemp_val[DEFAULT_STRRED_SUBBANDS]; + uint8_t enable_temp = 0; + int32_t shift_val; + + for(subband = 1; subband < total_subbands; subband++) { + if(check_enable_spatial_csf == 1) + shift_val = 2 * shift_val_arg; + else { + shift_val = 2 * csf_pending_div[subband]; + } if(prev_ref != NULL && prev_dist != NULL) { enable_temp = 1; @@ -167,14 +178,14 @@ int integer_compute_strred_funque_c(const struct i_dwt2buffers *ref, dist_temporal, width, height); temp_values[subband] = integer_rred_entropies_and_scales( ref_temporal, dist_temporal, width, height, log_lut, sigma_nsq_t, shift_val, - enable_temp, scales_spat_x, scales_spat_y, check_enable_spatial_csf); + enable_temp, spat_scales_ref[subband], spat_scales_dist[subband], + check_enable_spatial_csf); ftemp_val[subband] = temp_values[subband] / (width * height); free(ref_temporal); free(dist_temporal); } } - strred_scores->spat_vals[level] = (fspat_val[1] + fspat_val[2] + fspat_val[3]) / 3; strred_scores->temp_vals[level] = (ftemp_val[1] + ftemp_val[2] + ftemp_val[3]) / 3; strred_scores->spat_temp_vals[level] = strred_scores->spat_vals[level] * strred_scores->temp_vals[level]; @@ -184,17 +195,12 @@ int integer_compute_strred_funque_c(const struct i_dwt2buffers *ref, for(num_level = 0; num_level <= level; num_level++) norm_factor = num_level + 1; - strred_scores->spat_vals_cumsum += strred_scores->spat_vals[level]; strred_scores->temp_vals_cumsum += strred_scores->temp_vals[level]; strred_scores->spat_temp_vals_cumsum += strred_scores->spat_temp_vals[level]; - strred_scores->srred_vals[level] = strred_scores->spat_vals_cumsum / norm_factor; strred_scores->trred_vals[level] = strred_scores->temp_vals_cumsum / norm_factor; strred_scores->strred_vals[level] = strred_scores->spat_temp_vals_cumsum / norm_factor; - free(scales_spat_x); - free(scales_spat_y); - ret = 0; return ret; } \ No newline at end of file diff --git a/libvmaf/src/feature/third_party/funque/integer_funque_strred.h b/libvmaf/src/feature/third_party/funque/integer_funque_strred.h index 949051a51..09d23ce8f 100644 --- a/libvmaf/src/feature/third_party/funque/integer_funque_strred.h +++ b/libvmaf/src/feature/third_party/funque/integer_funque_strred.h @@ -26,13 +26,22 @@ #define LOGE_BASE2 1.442684682 +int integer_compute_srred_funque_c(const struct i_dwt2buffers *ref, + const struct i_dwt2buffers *dist, size_t width, size_t height, + float **spat_scales_ref, float **spat_scales_dist, + struct strred_results *strred_scores, int block_size, int level, + uint32_t *log_lut, int32_t shift_val, + double sigma_nsq_t, uint8_t enable_spatial_csf, uint8_t csf_pending_div[4]); + int integer_compute_strred_funque_c(const struct i_dwt2buffers *ref, const struct i_dwt2buffers *dist, struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, - size_t width, size_t height, - struct strred_results *strred_scores, int block_size, int level, - uint32_t *log_lut, int32_t shift_val, - double sigma_nsq_t, uint8_t enable_spatial_csf, uint8_t csf_pending_div[4]); + size_t width, size_t height, float **spat_scales_ref, + float **spat_scales_dist, struct strred_results *strred_scores, + int block_size, int level, uint32_t *log_lut, + int32_t shift_val, double sigma_nsq_t, + uint8_t enable_spatial_csf, uint8_t csf_pending_div[4]); + int integer_copy_prev_frame_strred_funque_c(const struct i_dwt2buffers *ref, const struct i_dwt2buffers *dist, struct i_dwt2buffers *prev_ref, @@ -74,30 +83,6 @@ FORCE_INLINE inline uint32_t strred_get_best_bits_from_u64(uint64_t temp, int *x return (uint32_t) temp; } -FORCE_INLINE inline uint32_t strred_get_best_u22_from_u64(uint64_t temp, int *x) -{ - int k = __builtin_clzll(temp); - - if(k > 42) { - k -= 42; - temp = temp << k; - *x = -k; - - } else if(k < 41) { - k = 42 - k; - temp = (temp + (1 << (k - 1))) >> k; - *x = k; - } else { - *x = 0; - if(temp >> 22) { - temp = temp >> 1; - *x = 1; - } - } - - return (uint32_t) temp; -} - static inline float strred_horz_integralsum_spatial_csf( int kw, int width_p1, int16_t knorm_fact, int16_t knorm_shift, uint32_t entr_const, double sigma_nsq_arg, uint32_t *log_lut, int32_t *interim_1_x, diff --git a/libvmaf/src/feature/third_party/funque/integer_picture_copy.c b/libvmaf/src/feature/third_party/funque/integer_picture_copy.c index 0f3166dc2..d1f1c76fc 100644 --- a/libvmaf/src/feature/third_party/funque/integer_picture_copy.c +++ b/libvmaf/src/feature/third_party/funque/integer_picture_copy.c @@ -17,6 +17,7 @@ */ #include +#include #include @@ -49,3 +50,27 @@ void integer_funque_picture_copy(void *src, spat_fil_output_dtype *dst, int dst_ return; } + +int integer_copy_frame_funque(const struct i_dwt2buffers *ref, const struct i_dwt2buffers *dist, + struct i_dwt2buffers *shared_ref, struct i_dwt2buffers *shared_dist, + size_t width, size_t height) +{ + int subband; + int total_subbands = DEFAULT_BANDS; + + for(subband = 0; subband < total_subbands; subband++) { + memcpy(shared_ref->bands[subband], ref->bands[subband], + width * height * sizeof(dwt2_dtype)); + memcpy(shared_dist->bands[subband], dist->bands[subband], + width * height * sizeof(dwt2_dtype)); + } + shared_ref->width = ref->width; + shared_ref->height = ref->height; + shared_ref->stride = ref->stride; + + shared_dist->width = dist->width; + shared_dist->height = dist->height; + shared_dist->stride = dist->stride; + + return 0; +} \ No newline at end of file diff --git a/libvmaf/src/feature/third_party/funque/integer_picture_copy.h b/libvmaf/src/feature/third_party/funque/integer_picture_copy.h index 71e4a401f..181a79649 100644 --- a/libvmaf/src/feature/third_party/funque/integer_picture_copy.h +++ b/libvmaf/src/feature/third_party/funque/integer_picture_copy.h @@ -20,3 +20,7 @@ void integer_funque_picture_copy(void *src, spat_fil_output_dtype *dst, int dst_stride, int width, int height, int bitdepth); + +int integer_copy_frame_funque(const struct i_dwt2buffers* ref, const struct i_dwt2buffers* dist, + struct i_dwt2buffers* shared_ref, struct i_dwt2buffers* shared_dist, + size_t width, size_t height); \ No newline at end of file diff --git a/libvmaf/src/feature/third_party/funque/resizer.h b/libvmaf/src/feature/third_party/funque/resizer.h index 1c5193935..a67e062f6 100644 --- a/libvmaf/src/feature/third_party/funque/resizer.h +++ b/libvmaf/src/feature/third_party/funque/resizer.h @@ -32,6 +32,8 @@ : X) #define MAX(LEFT, RIGHT) (LEFT > RIGHT ? LEFT : RIGHT) #define MIN(LEFT, RIGHT) (LEFT < RIGHT ? LEFT : RIGHT) +#define MAX7(A, B, C, D, E, F, G) MAX(MAX(MAX(MAX(MAX(MAX(A, B), C), D), E), F), G) +#define MAX6(A, B, C, D, E, F) MAX(MAX(MAX(MAX(MAX(A, B), C), D), E), F) #define MAX5(A, B, C, D, E) MAX(MAX(MAX(MAX(A, B), C), D), E) #define MAX4(A, B, C, D) MAX(MAX(MAX(A, B), C), D) diff --git a/libvmaf/src/feature/third_party/funque/x86/integer_funque_filters_avx2.h b/libvmaf/src/feature/third_party/funque/x86/integer_funque_filters_avx2.h index 617a8155f..c2a4b5c9b 100644 --- a/libvmaf/src/feature/third_party/funque/x86/integer_funque_filters_avx2.h +++ b/libvmaf/src/feature/third_party/funque/x86/integer_funque_filters_avx2.h @@ -55,5 +55,4 @@ void integer_spatial_5tap_filter_avx2(void *src, spat_fil_output_dtype *dst, int void integer_funque_dwt2_inplace_csf_avx2(const i_dwt2buffers *src, spat_fil_coeff_dtype factors[4], int min_theta, int max_theta, uint16_t interim_rnd_factors[4], - uint8_t interim_shift_factors[4], int level, - i_dwt2buffers *dst); + uint8_t interim_shift_factors[4], int level); \ No newline at end of file diff --git a/libvmaf/src/feature/third_party/funque/x86/integer_funque_motion_avx2.c b/libvmaf/src/feature/third_party/funque/x86/integer_funque_motion_avx2.c index 653e4c783..af4167fec 100644 --- a/libvmaf/src/feature/third_party/funque/x86/integer_funque_motion_avx2.c +++ b/libvmaf/src/feature/third_party/funque/x86/integer_funque_motion_avx2.c @@ -44,7 +44,7 @@ double integer_funque_image_mad_avx2(const dwt2_dtype *img1, const dwt2_dtype *i for (int i = 0; i < height; ++i) { int j = 0; - motion_interaccum_dtype accum_line = 0; + motion_interaccum_dtype accum_line = 0; __m256i accum_line_256 = _mm256_setzero_si256(); for (; j < width_32; j+=32) { @@ -122,4 +122,58 @@ double integer_funque_image_mad_avx2(const dwt2_dtype *img1, const dwt2_dtype *i double d_accum = (double) accum / pending_div_factor; return (d_accum / (width * height)); -} \ No newline at end of file +} + +int integer_compute_motion_funque_avx2(const dwt2_dtype *prev, const dwt2_dtype *curr, int w, int h, int prev_stride, int curr_stride, int pending_div_factor_arg, double *score) +{ + + float pending_div_factor = (1 << pending_div_factor_arg) * 255; + + if (prev_stride % sizeof(dwt2_dtype) != 0) + { + printf("error: prev_stride %% sizeof(dwt2_dtype) != 0, prev_stride = %d, sizeof(dwt2_dtype) = %zu.\n", prev_stride, sizeof(dwt2_dtype)); + fflush(stdout); + goto fail; + } + if (curr_stride % sizeof(dwt2_dtype) != 0) + { + printf("error: curr_stride %% sizeof(dwt2_dtype) != 0, curr_stride = %d, sizeof(dwt2_dtype) = %zu.\n", curr_stride, sizeof(dwt2_dtype)); + fflush(stdout); + goto fail; + } + // stride for integer_funque_image_mad_c is in terms of (sizeof(dwt2_dtype) bytes) + + *score = integer_funque_image_mad_avx2(prev, curr, w, h, prev_stride / sizeof(dwt2_dtype), curr_stride / sizeof(dwt2_dtype), pending_div_factor); + + return 0; + +fail: + return 1; +} + +int integer_compute_mad_funque_avx2(const dwt2_dtype *ref, const dwt2_dtype *dis, int w, int h, int ref_stride, int dis_stride, int pending_div_factor_arg, double *score) +{ + + float pending_div_factor = (1 << pending_div_factor_arg) * 255; + + if (ref_stride % sizeof(dwt2_dtype) != 0) + { + printf("error: ref_stride %% sizeof(dwt2_dtype) != 0, ref_stride = %d, sizeof(dwt2_dtype) = %zu.\n", ref_stride, sizeof(dwt2_dtype)); + fflush(stdout); + goto fail; + } + if (dis_stride % sizeof(dwt2_dtype) != 0) + { + printf("error: dis_stride %% sizeof(dwt2_dtype) != 0, dis_stride = %d, sizeof(dwt2_dtype) = %zu.\n", dis_stride, sizeof(dwt2_dtype)); + fflush(stdout); + goto fail; + } + // stride for integer_funque_image_mad_c is in terms of (sizeof(dwt2_dtype) bytes) + + *score = integer_funque_image_mad_avx2(ref, dis, w, h, ref_stride / sizeof(dwt2_dtype), dis_stride / sizeof(dwt2_dtype), pending_div_factor); + + return 0; + +fail: + return 1; +} diff --git a/libvmaf/src/feature/third_party/funque/x86/integer_funque_motion_avx2.h b/libvmaf/src/feature/third_party/funque/x86/integer_funque_motion_avx2.h index 13ccfa5f3..530bc5823 100644 --- a/libvmaf/src/feature/third_party/funque/x86/integer_funque_motion_avx2.h +++ b/libvmaf/src/feature/third_party/funque/x86/integer_funque_motion_avx2.h @@ -22,4 +22,5 @@ #include "../integer_funque_filters.h" #include "../integer_funque_motion.h" -double integer_funque_image_mad_avx2(const dwt2_dtype *img1, const dwt2_dtype *img2, int width, int height, int img1_stride, int img2_stride, float pending_div_factor); \ No newline at end of file +int integer_compute_motion_funque_avx2(const dwt2_dtype *prev, const dwt2_dtype *curr, int w, int h, int prev_stride, int curr_stride, int pending_div_factor_arg, double *score); +int integer_compute_mad_funque_avx2(const dwt2_dtype *ref, const dwt2_dtype *dis, int w, int h, int ref_stride, int dis_stride, int pending_div_factor_arg, double *score); diff --git a/libvmaf/src/feature/third_party/funque/x86/integer_funque_motion_avx512.c b/libvmaf/src/feature/third_party/funque/x86/integer_funque_motion_avx512.c new file mode 100644 index 000000000..e340ed9aa --- /dev/null +++ b/libvmaf/src/feature/third_party/funque/x86/integer_funque_motion_avx512.c @@ -0,0 +1,134 @@ +/** + * + * Copyright 2016-2020 Netflix, Inc. + * + * Licensed under the BSD+Patent License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSDplusPatent + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "integer_funque_motion_avx512.h" + +/** + * Note: img1_stride and img2_stride are in terms of (sizeof(double) bytes) + */ +double integer_funque_image_mad_avx512(const dwt2_dtype *img1, const dwt2_dtype *img2, int width, int height, int img1_stride, int img2_stride, float pending_div_factor) +{ + motion_accum_dtype accum = 0; + int i = 0; + int j = 0; + for(i = 0; i < height; ++i) + { + motion_interaccum_dtype accum_line = 0; + for(j = 0; j < width - 16; j =+ 16) + { + __m512i img1px = _mm512_loadu_si512((__m512i*) (img1 + i * img1_stride + j)); + __m512i img2px = _mm512_loadu_si512((__m512i*) (img2 + i * img2_stride + j)); + + __m512i img1px_lower = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(img1px , 0)); + __m512i img1px_upper = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(img1px , 1)); + + __m512i img2px_lower = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(img2px , 0)); + __m512i img2px_upper = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(img2px , 1)); + + __m512i img_diff_lower = _mm512_abs_epi32(_mm512_sub_epi32(img1px_lower , img2px_lower)); + __m512i img_diff_upper = _mm512_abs_epi32(_mm512_sub_epi32(img1px_upper , img2px_upper)); + + __m512i sum_32x16 = _mm512_add_epi32(img_diff_lower , img_diff_upper); + + accum_line += (motion_interaccum_dtype) _mm512_reduce_add_epi32(sum_32x16); + // assuming it is 4k video, max accum_inner is 2^16*3840 + } + for(; j < width; ++j) + { + dwt2_dtype img1px = img1[i * img1_stride + j]; + dwt2_dtype img2px = img2[i * img2_stride + j]; + + accum_line += (motion_interaccum_dtype) abs(img1px - img2px); + // assuming it is 4k video, max accum_inner is 2^16*3840 + } + accum += (motion_accum_dtype) accum_line; + // assuming it is 4k video, max accum is 2^16*3840*1920 which uses upto 39bits + } + + double d_accum = (double) accum / pending_div_factor; + return (d_accum / (width * height)); +} +/** + * Note: prev_stride and curr_stride are in terms of bytes + */ + +int integer_compute_motion_funque_avx512(const dwt2_dtype *prev, const dwt2_dtype *curr, int w, int h, int prev_stride, int curr_stride, int pending_div_factor_arg, double *score) +{ + + float pending_div_factor = (1 << pending_div_factor_arg) * 255; + + if (prev_stride % sizeof(dwt2_dtype) != 0) + { + printf("error: prev_stride %% sizeof(dwt2_dtype) != 0, prev_stride = %d, sizeof(dwt2_dtype) = %zu.\n", prev_stride, sizeof(dwt2_dtype)); + fflush(stdout); + goto fail; + } + if (curr_stride % sizeof(dwt2_dtype) != 0) + { + printf("error: curr_stride %% sizeof(dwt2_dtype) != 0, curr_stride = %d, sizeof(dwt2_dtype) = %zu.\n", curr_stride, sizeof(dwt2_dtype)); + fflush(stdout); + goto fail; + } + // stride for integer_funque_image_mad_c is in terms of (sizeof(dwt2_dtype) bytes) + + *score = integer_funque_image_mad_avx512(prev, curr, w, h, prev_stride / sizeof(dwt2_dtype), curr_stride / sizeof(dwt2_dtype), pending_div_factor); + + return 0; + +fail: + return 1; +} + +int integer_compute_mad_funque_avx512(const dwt2_dtype *ref, const dwt2_dtype *dis, int w, int h, int ref_stride, int dis_stride, int pending_div_factor_arg, double *score) +{ + + float pending_div_factor = (1 << pending_div_factor_arg) * 255; + + if (ref_stride % sizeof(dwt2_dtype) != 0) + { + printf("error: ref_stride %% sizeof(dwt2_dtype) != 0, ref_stride = %d, sizeof(dwt2_dtype) = %zu.\n", ref_stride, sizeof(dwt2_dtype)); + fflush(stdout); + goto fail; + } + if (dis_stride % sizeof(dwt2_dtype) != 0) + { + printf("error: dis_stride %% sizeof(dwt2_dtype) != 0, dis_stride = %d, sizeof(dwt2_dtype) = %zu.\n", dis_stride, sizeof(dwt2_dtype)); + fflush(stdout); + goto fail; + } + // stride for integer_funque_image_mad_c is in terms of (sizeof(dwt2_dtype) bytes) + + *score = integer_funque_image_mad_avx512(ref, dis, w, h, ref_stride / sizeof(dwt2_dtype), dis_stride / sizeof(dwt2_dtype), pending_div_factor); + + return 0; + +fail: + return 1; +} diff --git a/libvmaf/src/feature/third_party/funque/x86/integer_funque_motion_avx512.h b/libvmaf/src/feature/third_party/funque/x86/integer_funque_motion_avx512.h new file mode 100644 index 000000000..512b762d9 --- /dev/null +++ b/libvmaf/src/feature/third_party/funque/x86/integer_funque_motion_avx512.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: BSD-3-Clause +* Copyright (C) 2022 Intel Corporation. +*/ +/** + * + * Copyright 2016-2020 Netflix, Inc. + * + * Licensed under the BSD+Patent License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSDplusPatent + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "../integer_funque_filters.h" +#include "../integer_funque_motion.h" + +int integer_compute_mad_funque_avx512(const dwt2_dtype *ref, const dwt2_dtype *dis, int w, int h, int ref_stride, int dis_stride, int pending_div_factor_arg, double *score); +int integer_compute_motion_funque_avx512(const dwt2_dtype *prev, const dwt2_dtype *curr, int w, int h, int prev_stride, int curr_stride, int pending_div_factor_arg, double *score); \ No newline at end of file diff --git a/libvmaf/src/feature/third_party/funque/x86/integer_funque_strred_avx2.c b/libvmaf/src/feature/third_party/funque/x86/integer_funque_strred_avx2.c index 95c6ae69b..dbf54a44e 100644 --- a/libvmaf/src/feature/third_party/funque/x86/integer_funque_strred_avx2.c +++ b/libvmaf/src/feature/third_party/funque/x86/integer_funque_strred_avx2.c @@ -437,29 +437,21 @@ float integer_rred_entropies_and_scales_avx2(const dwt2_dtype *x_t, const dwt2_d return agg_abs_accum; } -int integer_compute_strred_funque_avx2( - const struct i_dwt2buffers *ref, const struct i_dwt2buffers *dist, - struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, size_t width, size_t height, - struct strred_results *strred_scores, int block_size, int level, - uint32_t *log_lut, int32_t shift_val_arg, double sigma_nsq_t, uint8_t check_enable_spatial_csf) +int integer_compute_srred_funque_avx2(const struct i_dwt2buffers *ref, + const struct i_dwt2buffers *dist, size_t width, size_t height, + float **spat_scales_ref, float **spat_scales_dist, + struct strred_results *strred_scores, int block_size, int level, + uint32_t *log_lut, int32_t shift_val_arg, + double sigma_nsq_t, uint8_t check_enable_spatial_csf) { int ret; UNUSED(block_size); size_t total_subbands = DEFAULT_STRRED_SUBBANDS; size_t subband; - float spat_values[DEFAULT_STRRED_SUBBANDS], temp_values[DEFAULT_STRRED_SUBBANDS]; - float fspat_val[DEFAULT_STRRED_SUBBANDS], ftemp_val[DEFAULT_STRRED_SUBBANDS]; + float spat_values[DEFAULT_STRRED_SUBBANDS], fspat_val[DEFAULT_STRRED_SUBBANDS]; uint8_t enable_temp = 0; int32_t shift_val; - /* amount of reflecting */ - int x_reflect = (int) ((STRRED_WINDOW_SIZE - 1) / 2); - size_t r_width = width + (2 * x_reflect); - size_t r_height = height + (2 * x_reflect); - - float *scales_spat_x = (float *) calloc(r_width * r_height, sizeof(float)); - float *scales_spat_y = (float *) calloc(r_width * r_height, sizeof(float)); - for(subband = 1; subband < total_subbands; subband++) { enable_temp = 0; spat_values[subband] = 0; @@ -471,8 +463,48 @@ int integer_compute_strred_funque_avx2( } spat_values[subband] = integer_rred_entropies_and_scales_avx2( ref->bands[subband], dist->bands[subband], width, height, log_lut, sigma_nsq_t, - shift_val, enable_temp, scales_spat_x, scales_spat_y, check_enable_spatial_csf); + shift_val, enable_temp, spat_scales_ref[subband], spat_scales_dist[subband], + check_enable_spatial_csf); fspat_val[subband] = spat_values[subband] / (width * height); + } + + strred_scores->spat_vals[level] = (fspat_val[1] + fspat_val[2] + fspat_val[3]) / 3; + + // Add equations to compute S-RRED using norm factors + int norm_factor = 1, num_level; + for(num_level = 0; num_level <= level; num_level++) norm_factor = num_level + 1; + + strred_scores->spat_vals_cumsum += strred_scores->spat_vals[level]; + + strred_scores->srred_vals[level] = strred_scores->spat_vals_cumsum / norm_factor; + + ret = 0; + return ret; +} + +int integer_compute_strred_funque_avx2(const struct i_dwt2buffers *ref, + const struct i_dwt2buffers *dist, + struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, + size_t width, size_t height, float **spat_scales_ref, + float **spat_scales_dist, struct strred_results *strred_scores, + int block_size, int level, uint32_t *log_lut, + int32_t shift_val_arg, double sigma_nsq_t, + uint8_t check_enable_spatial_csf) +{ + int ret; + UNUSED(block_size); + size_t total_subbands = DEFAULT_STRRED_SUBBANDS; + size_t subband; + float temp_values[DEFAULT_STRRED_SUBBANDS], ftemp_val[DEFAULT_STRRED_SUBBANDS]; + uint8_t enable_temp = 0; + int32_t shift_val; + + for(subband = 1; subband < total_subbands; subband++) { + if(check_enable_spatial_csf == 1) + shift_val = 2 * shift_val_arg; + else { + shift_val = 2 * i_nadenau_pending_div_factors[level][subband]; + } if(prev_ref != NULL && prev_dist != NULL) { enable_temp = 1; @@ -480,19 +512,19 @@ int integer_compute_strred_funque_avx2( dwt2_dtype *dist_temporal = (dwt2_dtype *) calloc(width * height, sizeof(dwt2_dtype)); temp_values[subband] = 0; - integer_subract_subbands_avx2(ref->bands[subband], prev_ref->bands[subband], - ref_temporal, dist->bands[subband], - prev_dist->bands[subband], dist_temporal, width, height); + integer_subract_subbands_avx2(ref->bands[subband], prev_ref->bands[subband], ref_temporal, + dist->bands[subband], prev_dist->bands[subband], + dist_temporal, width, height); temp_values[subband] = integer_rred_entropies_and_scales_avx2( ref_temporal, dist_temporal, width, height, log_lut, sigma_nsq_t, shift_val, - enable_temp, scales_spat_x, scales_spat_y, check_enable_spatial_csf); + enable_temp, spat_scales_ref[subband], spat_scales_dist[subband], + check_enable_spatial_csf); ftemp_val[subband] = temp_values[subband] / (width * height); free(ref_temporal); free(dist_temporal); } } - strred_scores->spat_vals[level] = (fspat_val[1] + fspat_val[2] + fspat_val[3]) / 3; strred_scores->temp_vals[level] = (ftemp_val[1] + ftemp_val[2] + ftemp_val[3]) / 3; strred_scores->spat_temp_vals[level] = strred_scores->spat_vals[level] * strred_scores->temp_vals[level]; @@ -501,17 +533,12 @@ int integer_compute_strred_funque_avx2( int norm_factor = 1, num_level; for(num_level = 0; num_level <= level; num_level++) norm_factor = num_level + 1; - strred_scores->spat_vals_cumsum += strred_scores->spat_vals[level]; strred_scores->temp_vals_cumsum += strred_scores->temp_vals[level]; strred_scores->spat_temp_vals_cumsum += strred_scores->spat_temp_vals[level]; - strred_scores->srred_vals[level] = strred_scores->spat_vals_cumsum / norm_factor; strred_scores->trred_vals[level] = strred_scores->temp_vals_cumsum / norm_factor; strred_scores->strred_vals[level] = strred_scores->spat_temp_vals_cumsum / norm_factor; - free(scales_spat_x); - free(scales_spat_y); - ret = 0; return ret; } \ No newline at end of file diff --git a/libvmaf/src/feature/third_party/funque/x86/integer_funque_strred_avx2.h b/libvmaf/src/feature/third_party/funque/x86/integer_funque_strred_avx2.h index e5a67c2bf..26c59c86c 100644 --- a/libvmaf/src/feature/third_party/funque/x86/integer_funque_strred_avx2.h +++ b/libvmaf/src/feature/third_party/funque/x86/integer_funque_strred_avx2.h @@ -21,11 +21,21 @@ #include "../common/macros.h" #include "../funque_global_options.h" -int integer_compute_strred_funque_avx2( - const struct i_dwt2buffers *ref, const struct i_dwt2buffers *dist, - struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, size_t width, size_t height, - struct strred_results *strred_scores, int block_size, int level, - uint32_t *log_lut, int32_t shift_val, double sigma_nsq_t, uint8_t enable_spatial_csf); +int integer_compute_srred_funque_avx2(const struct i_dwt2buffers *ref, + const struct i_dwt2buffers *dist, size_t width, size_t height, + float **spat_scales_ref, float **spat_scales_dist, + struct strred_results *strred_scores, int block_size, int level, + uint32_t *log_lut, int32_t shift_val_arg, + double sigma_nsq_t, uint8_t check_enable_spatial_csf); + +int integer_compute_strred_funque_avx2(const struct i_dwt2buffers *ref, + const struct i_dwt2buffers *dist, + struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, + size_t width, size_t height, float **spat_scales_ref, + float **spat_scales_dist, struct strred_results *strred_scores, + int block_size, int level, uint32_t *log_lut, + int32_t shift_val_arg, double sigma_nsq_t, + uint8_t check_enable_spatial_csf); void integer_subract_subbands_avx2(const dwt2_dtype *ref_src, const dwt2_dtype *ref_prev_src, dwt2_dtype *ref_dst, const dwt2_dtype *dist_src, diff --git a/libvmaf/src/feature/third_party/funque/x86/integer_funque_strred_avx512.c b/libvmaf/src/feature/third_party/funque/x86/integer_funque_strred_avx512.c index d6175aab4..77d12e02f 100644 --- a/libvmaf/src/feature/third_party/funque/x86/integer_funque_strred_avx512.c +++ b/libvmaf/src/feature/third_party/funque/x86/integer_funque_strred_avx512.c @@ -446,29 +446,21 @@ float integer_rred_entropies_and_scales_avx512(const dwt2_dtype *x_t, const dwt2 return agg_abs_accum; } -int integer_compute_strred_funque_avx512( - const struct i_dwt2buffers *ref, const struct i_dwt2buffers *dist, - struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, size_t width, size_t height, - struct strred_results *strred_scores, int block_size, int level, - uint32_t *log_lut, int32_t shift_val_arg, double sigma_nsq_t, uint8_t check_enable_spatial_csf) +int integer_compute_srred_funque_avx512(const struct i_dwt2buffers *ref, + const struct i_dwt2buffers *dist, size_t width, size_t height, + float **spat_scales_ref, float **spat_scales_dist, + struct strred_results *strred_scores, int block_size, int level, + uint32_t *log_lut, int32_t shift_val_arg, + double sigma_nsq_t, uint8_t check_enable_spatial_csf) { int ret; UNUSED(block_size); size_t total_subbands = DEFAULT_STRRED_SUBBANDS; size_t subband; - float spat_values[DEFAULT_STRRED_SUBBANDS], temp_values[DEFAULT_STRRED_SUBBANDS]; - float fspat_val[DEFAULT_STRRED_SUBBANDS], ftemp_val[DEFAULT_STRRED_SUBBANDS]; + float spat_values[DEFAULT_STRRED_SUBBANDS], fspat_val[DEFAULT_STRRED_SUBBANDS]; uint8_t enable_temp = 0; int32_t shift_val; - /* amount of reflecting */ - int x_reflect = (int) ((STRRED_WINDOW_SIZE - 1) / 2); - size_t r_width = width + (2 * x_reflect); - size_t r_height = height + (2 * x_reflect); - - float *scales_spat_x = (float *) calloc(r_width * r_height, sizeof(float)); - float *scales_spat_y = (float *) calloc(r_width * r_height, sizeof(float)); - for(subband = 1; subband < total_subbands; subband++) { enable_temp = 0; spat_values[subband] = 0; @@ -479,9 +471,49 @@ int integer_compute_strred_funque_avx512( shift_val = 2 * i_nadenau_pending_div_factors[level][subband]; } spat_values[subband] = integer_rred_entropies_and_scales_avx512( - ref->bands[subband], dist->bands[subband], width, height, log_18, log_22, sigma_nsq_t, - shift_val, enable_temp, scales_spat_x, scales_spat_y, check_enable_spatial_csf); + ref->bands[subband], dist->bands[subband], width, height, log_lut, sigma_nsq_t, + shift_val, enable_temp, spat_scales_ref[subband], spat_scales_dist[subband], + check_enable_spatial_csf); fspat_val[subband] = spat_values[subband] / (width * height); + } + + strred_scores->spat_vals[level] = (fspat_val[1] + fspat_val[2] + fspat_val[3]) / 3; + + // Add equations to compute S-RRED using norm factors + int norm_factor = 1, num_level; + for(num_level = 0; num_level <= level; num_level++) norm_factor = num_level + 1; + + strred_scores->spat_vals_cumsum += strred_scores->spat_vals[level]; + + strred_scores->srred_vals[level] = strred_scores->spat_vals_cumsum / norm_factor; + + ret = 0; + return ret; +} + +int integer_compute_strred_funque_avx512(const struct i_dwt2buffers *ref, + const struct i_dwt2buffers *dist, + struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, + size_t width, size_t height, float **spat_scales_ref, + float **spat_scales_dist, struct strred_results *strred_scores, + int block_size, int level, uint32_t *log_lut, + int32_t shift_val_arg, double sigma_nsq_t, + uint8_t check_enable_spatial_csf) +{ + int ret; + UNUSED(block_size); + size_t total_subbands = DEFAULT_STRRED_SUBBANDS; + size_t subband; + float temp_values[DEFAULT_STRRED_SUBBANDS], ftemp_val[DEFAULT_STRRED_SUBBANDS]; + uint8_t enable_temp = 0; + int32_t shift_val; + + for(subband = 1; subband < total_subbands; subband++) { + if(check_enable_spatial_csf == 1) + shift_val = 2 * shift_val_arg; + else { + shift_val = 2 * i_nadenau_pending_div_factors[level][subband]; + } if(prev_ref != NULL && prev_dist != NULL) { enable_temp = 1; @@ -489,19 +521,19 @@ int integer_compute_strred_funque_avx512( dwt2_dtype *dist_temporal = (dwt2_dtype *) calloc(width * height, sizeof(dwt2_dtype)); temp_values[subband] = 0; - integer_subract_subbands_avx512( - ref->bands[subband], prev_ref->bands[subband], ref_temporal, dist->bands[subband], - prev_dist->bands[subband], dist_temporal, width, height); + integer_subract_subbands_avx512(ref->bands[subband], prev_ref->bands[subband], ref_temporal, + dist->bands[subband], prev_dist->bands[subband], + dist_temporal, width, height); temp_values[subband] = integer_rred_entropies_and_scales_avx512( ref_temporal, dist_temporal, width, height, log_lut, sigma_nsq_t, shift_val, - enable_temp, scales_spat_x, scales_spat_y, check_enable_spatial_csf); + enable_temp, spat_scales_ref[subband], spat_scales_dist[subband], + check_enable_spatial_csf); ftemp_val[subband] = temp_values[subband] / (width * height); free(ref_temporal); free(dist_temporal); } } - strred_scores->spat_vals[level] = (fspat_val[1] + fspat_val[2] + fspat_val[3]) / 3; strred_scores->temp_vals[level] = (ftemp_val[1] + ftemp_val[2] + ftemp_val[3]) / 3; strred_scores->spat_temp_vals[level] = strred_scores->spat_vals[level] * strred_scores->temp_vals[level]; @@ -510,17 +542,12 @@ int integer_compute_strred_funque_avx512( int norm_factor = 1, num_level; for(num_level = 0; num_level <= level; num_level++) norm_factor = num_level + 1; - strred_scores->spat_vals_cumsum += strred_scores->spat_vals[level]; strred_scores->temp_vals_cumsum += strred_scores->temp_vals[level]; strred_scores->spat_temp_vals_cumsum += strred_scores->spat_temp_vals[level]; - strred_scores->srred_vals[level] = strred_scores->spat_vals_cumsum / norm_factor; strred_scores->trred_vals[level] = strred_scores->temp_vals_cumsum / norm_factor; strred_scores->strred_vals[level] = strred_scores->spat_temp_vals_cumsum / norm_factor; - free(scales_spat_x); - free(scales_spat_y); - ret = 0; return ret; } \ No newline at end of file diff --git a/libvmaf/src/feature/third_party/funque/x86/integer_funque_strred_avx512.h b/libvmaf/src/feature/third_party/funque/x86/integer_funque_strred_avx512.h index 280e06fad..879f5b0eb 100644 --- a/libvmaf/src/feature/third_party/funque/x86/integer_funque_strred_avx512.h +++ b/libvmaf/src/feature/third_party/funque/x86/integer_funque_strred_avx512.h @@ -21,11 +21,21 @@ #include "../common/macros.h" #include "../funque_global_options.h" -int integer_compute_strred_funque_avx512( - const struct i_dwt2buffers *ref, const struct i_dwt2buffers *dist, - struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, size_t width, size_t height, - struct strred_results *strred_scores, int block_size, int level, - uint32_t *log_lut, int32_t shift_val, double sigma_nsq_t, uint8_t enable_spatial_csf); +int integer_compute_srred_funque_avx512(const struct i_dwt2buffers *ref, + const struct i_dwt2buffers *dist, size_t width, size_t height, + float **spat_scales_ref, float **spat_scales_dist, + struct strred_results *strred_scores, int block_size, int level, + uint32_t *log_lut, int32_t shift_val_arg, + double sigma_nsq_t, uint8_t check_enable_spatial_csf); + +int integer_compute_strred_funque_avx512(const struct i_dwt2buffers *ref, + const struct i_dwt2buffers *dist, + struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, + size_t width, size_t height, float **spat_scales_ref, + float **spat_scales_dist, struct strred_results *strred_scores, + int block_size, int level, uint32_t *log_lut, + int32_t shift_val_arg, double sigma_nsq_t, + uint8_t check_enable_spatial_csf); void integer_subract_subbands_avx512(const dwt2_dtype *ref_src, const dwt2_dtype *ref_prev_src, dwt2_dtype *ref_dst, const dwt2_dtype *dist_src, diff --git a/libvmaf/src/meson.build b/libvmaf/src/meson.build index ca2069016..03677d6f6 100644 --- a/libvmaf/src/meson.build +++ b/libvmaf/src/meson.build @@ -312,6 +312,7 @@ if is_asm_enabled funque_feature_dir + 'x86/integer_funque_filters_avx512.c', funque_feature_dir + 'x86/resizer_avx512.c', funque_feature_dir + 'x86/hbd_resizer_avx512.c', + funque_feature_dir + 'x86/integer_funque_motion_avx512.c', funque_feature_dir + 'x86/integer_funque_ssim_avx512.c', funque_feature_dir + 'x86/integer_funque_adm_avx512.c', funque_feature_dir + 'x86/integer_funque_vif_avx512.c', @@ -467,6 +468,7 @@ libvmaf_feature_sources = [ feature_src_dir + 'cambi.c', feature_src_dir + 'luminance_tools.c', feature_src_dir + 'null.c', + src_dir + 'framesync.c', ] if float_enabled @@ -543,6 +545,7 @@ libvmaf_feature_static_lib = static_library( libvmaf_sources = [ src_dir + 'libvmaf.c', + src_dir + 'compute_vmaf.c', src_dir + 'predict.c', src_dir + 'model.c', src_dir + 'svm.cpp', diff --git a/libvmaf/src/output.c b/libvmaf/src/output.c index a70123cc1..890382ac5 100644 --- a/libvmaf/src/output.c +++ b/libvmaf/src/output.c @@ -44,6 +44,26 @@ static const char *pool_method_name[] = { [VMAF_POOL_METHOD_HARMONIC_MEAN] = "harmonic_mean", }; +int countLeadingZeroesFloat(double x) +{ + if(x < 0) + x = fabs(x); + + int intPart = (int)x; + double fractionalPart = x - intPart; + + // Count leading zeroes in the fractional part + int leadingZeroesCount = 0; + + while (fractionalPart < 1.0 && fractionalPart != 0) + { + fractionalPart *= 10; // Shift decimal point to the right + leadingZeroesCount++; + } + + return leadingZeroesCount; +} + int vmaf_write_output_xml(VmafContext *vmaf, VmafFeatureCollector *fc, FILE *outfile, unsigned subsample, unsigned width, unsigned height, double fps, unsigned pic_cnt) @@ -58,6 +78,7 @@ int vmaf_write_output_xml(VmafContext *vmaf, VmafFeatureCollector *fc, fprintf(outfile, " \n", fps); unsigned n_frames = 0; + int leadingZeroesCount; fprintf(outfile, " \n"); for (unsigned i = 0 ; i < max_capacity(fc); i++) { if ((subsample > 1) && (i % subsample)) @@ -78,10 +99,15 @@ int vmaf_write_output_xml(VmafContext *vmaf, VmafFeatureCollector *fc, continue; if (!fc->feature_vector[j]->score[i].written) continue; - fprintf(outfile, "%s=\"%.6f\" ", - vmaf_feature_name_alias(fc->feature_vector[j]->name), - fc->feature_vector[j]->score[i].value - ); + leadingZeroesCount = countLeadingZeroesFloat(fc->feature_vector[j]->score[i].value); + if (leadingZeroesCount <= 6) + fprintf(outfile, "%s=\"%.6f\" ", + vmaf_feature_name_alias(fc->feature_vector[j]->name), + fc->feature_vector[j]->score[i].value); + else + fprintf(outfile, "%s=\"%.16f\" ", + vmaf_feature_name_alias(fc->feature_vector[j]->name), + fc->feature_vector[j]->score[i].value); } n_frames++; fprintf(outfile, "/>\n"); @@ -99,7 +125,13 @@ int vmaf_write_output_xml(VmafContext *vmaf, VmafFeatureCollector *fc, int err = vmaf_feature_score_pooled(vmaf, feature_name, j, &score, 0, pic_cnt - 1); if (!err) - fprintf(outfile, "%s=\"%.30f\" ", pool_method_name[j], score); + { + leadingZeroesCount = countLeadingZeroesFloat(score); + if (leadingZeroesCount <= 6) + fprintf(outfile, "%s=\"%.6f\" ", pool_method_name[j], score); + else + fprintf(outfile, "%s=\"%.16f\" ", pool_method_name[j], score); + } } fprintf(outfile, "/>\n"); } @@ -108,7 +140,13 @@ int vmaf_write_output_xml(VmafContext *vmaf, VmafFeatureCollector *fc, fprintf(outfile, " aggregate_vector.cnt; i++) { - fprintf(outfile, "%s=\"%.6f\" ", + leadingZeroesCount = countLeadingZeroesFloat(fc->aggregate_vector.metric[i].value); + if (leadingZeroesCount <= 6) + fprintf(outfile, "%s=\"%.6f\" ", + fc->aggregate_vector.metric[i].name, + fc->aggregate_vector.metric[i].value); + else + fprintf(outfile, "%s=\"%.16f\" ", fc->aggregate_vector.metric[i].name, fc->aggregate_vector.metric[i].value); } @@ -123,6 +161,7 @@ int vmaf_write_output_json(VmafContext *vmaf, VmafFeatureCollector *fc, FILE *outfile, unsigned subsample, double fps, unsigned pic_cnt) { + int leadingZeroesCount; fprintf(outfile, "{\n"); fprintf(outfile, " \"version\": \"%s\",\n", vmaf_version()); switch(fpclassify(fps)) { @@ -167,11 +206,18 @@ int vmaf_write_output_json(VmafContext *vmaf, VmafFeatureCollector *fc, case FP_NORMAL: case FP_ZERO: case FP_SUBNORMAL: - fprintf(outfile, " \"%s\": %.30f%s\n", - vmaf_feature_name_alias(fc->feature_vector[j]->name), - fc->feature_vector[j]->score[i].value, - cnt2 < cnt ? "," : "" - ); + leadingZeroesCount = countLeadingZeroesFloat(fc->feature_vector[j]->score[i].value); + if (leadingZeroesCount <= 6) + fprintf(outfile, " \"%s\": %.6f%s\n", + vmaf_feature_name_alias(fc->feature_vector[j]->name), + fc->feature_vector[j]->score[i].value, + cnt2 < cnt ? "," : ""); + else + fprintf(outfile, " \"%s\": %.16f%s\n", + vmaf_feature_name_alias(fc->feature_vector[j]->name), + fc->feature_vector[j]->score[i].value, + cnt2 < cnt ? "," : ""); + break; case FP_INFINITE: case FP_NAN: @@ -203,7 +249,12 @@ int vmaf_write_output_json(VmafContext *vmaf, VmafFeatureCollector *fc, case FP_NORMAL: case FP_ZERO: case FP_SUBNORMAL: - fprintf(outfile, " \"%s\": %.30f", + leadingZeroesCount = countLeadingZeroesFloat((double)score); + if (leadingZeroesCount <= 6) + fprintf(outfile, " \"%s\": %.6f", + pool_method_name[j], score); + else + fprintf(outfile, " \"%s\": %.16f", pool_method_name[j], score); break; case FP_INFINITE: @@ -225,9 +276,17 @@ int vmaf_write_output_json(VmafContext *vmaf, VmafFeatureCollector *fc, case FP_NORMAL: case FP_ZERO: case FP_SUBNORMAL: - fprintf(outfile, "\n \"%s\": %.6f", + leadingZeroesCount = countLeadingZeroesFloat(fc->aggregate_vector.metric[i].value); + if (leadingZeroesCount <= 6) + fprintf(outfile, "\n \"%s\": %.6f", fc->aggregate_vector.metric[i].name, fc->aggregate_vector.metric[i].value); + else + fprintf(outfile, "\n \"%s\": %.16f", + fc->aggregate_vector.metric[i].name, + fc->aggregate_vector.metric[i].value); + + break; case FP_INFINITE: case FP_NAN: @@ -246,7 +305,7 @@ int vmaf_write_output_json(VmafContext *vmaf, VmafFeatureCollector *fc, int vmaf_write_output_csv(VmafFeatureCollector *fc, FILE *outfile, unsigned subsample) { - + int leadingZeroesCount; fprintf(outfile, "Frame,"); for (unsigned i = 0; i < fc->cnt; i++) { fprintf(outfile, "%s,", @@ -273,7 +332,12 @@ int vmaf_write_output_csv(VmafFeatureCollector *fc, FILE *outfile, continue; if (!fc->feature_vector[j]->score[i].written) continue; - fprintf(outfile, "%.30f,", fc->feature_vector[j]->score[i].value); + + leadingZeroesCount = countLeadingZeroesFloat(fc->feature_vector[j]->score[i].value); + if (leadingZeroesCount <= 6) + fprintf(outfile, "%.6f,", fc->feature_vector[j]->score[i].value); + else + fprintf(outfile, "%.16f,", fc->feature_vector[j]->score[i].value); } fprintf(outfile, "\n"); } @@ -284,6 +348,7 @@ int vmaf_write_output_csv(VmafFeatureCollector *fc, FILE *outfile, int vmaf_write_output_sub(VmafFeatureCollector *fc, FILE *outfile, unsigned subsample) { + int leadingZeroesCount; for (unsigned i = 0 ; i < max_capacity(fc); i++) { if ((subsample > 1) && (i % subsample)) continue; @@ -303,7 +368,13 @@ int vmaf_write_output_sub(VmafFeatureCollector *fc, FILE *outfile, continue; if (!fc->feature_vector[j]->score[i].written) continue; - fprintf(outfile, "%s: %.30f|", + leadingZeroesCount = countLeadingZeroesFloat(fc->feature_vector[j]->score[i].value); + if (leadingZeroesCount <= 6) + fprintf(outfile, "%s: %.6f|", + vmaf_feature_name_alias(fc->feature_vector[j]->name), + fc->feature_vector[j]->score[i].value); + else + fprintf(outfile, "%s: %.16f|", vmaf_feature_name_alias(fc->feature_vector[j]->name), fc->feature_vector[j]->score[i].value); } diff --git a/libvmaf/src/output.h b/libvmaf/src/output.h index 4c6e2a00c..31f11f5f7 100644 --- a/libvmaf/src/output.h +++ b/libvmaf/src/output.h @@ -19,6 +19,8 @@ #ifndef __VMAF_OUTPUT_H__ #define __VMAF_OUTPUT_H__ +int countLeadingZeroesFloat(float x); + int vmaf_write_output_xml(VmafContext *vmaf, VmafFeatureCollector *fc, FILE *outfile, unsigned subsample, unsigned width, unsigned height, double fps, unsigned pic_cnt); diff --git a/model/funque_float.json b/model/funque_float.json index 3faff32bb..c5275f259 100644 --- a/model/funque_float.json +++ b/model/funque_float.json @@ -23,7 +23,9 @@ "adm_levels": 4, "ssim_levels": 4, "ms_ssim_levels": 4, - "strred_levels": 4 + "strred_levels": 4, + "motion_levels": 4, + "mad_levels": 4 } ], "feature_names": [ @@ -42,6 +44,13 @@ "FUNQUE_feature_strred_scale0_score", "FUNQUE_feature_strred_scale1_score", "FUNQUE_feature_strred_scale2_score", "FUNQUE_feature_strred_scale3_score", + + "FUNQUE_feature_motion_scale0_score", "FUNQUE_feature_motion_scale1_score", + "FUNQUE_feature_motion_scale2_score", "FUNQUE_feature_motion_scale3_score", + + "FUNQUE_feature_mad_scale0_score", "FUNQUE_feature_mad_scale1_score", + "FUNQUE_feature_mad_scale2_score", "FUNQUE_feature_mad_scale3_score", + "FUNQUE_feature_ms_ssim_mean_scale0_score", "FUNQUE_feature_ms_ssim_mean_scale1_score", "FUNQUE_feature_ms_ssim_mean_scale2_score", "FUNQUE_feature_ms_ssim_mean_scale3_score", "FUNQUE_feature_ms_ssim_mink3_scale0_score", "FUNQUE_feature_ms_ssim_mink3_scale1_score", @@ -84,7 +93,11 @@ "strred_scale0", "strred_scale1", "strred_scale2", - "strred_scale3" + "strred_scale3", + "motion_scale0", + "motion_scale1", + "motion_scale2", + "motion_scale3" ] }, "score_clip": [ diff --git a/model/funque_integer.json b/model/funque_integer.json index 21c41a019..0a8bd9b52 100644 --- a/model/funque_integer.json +++ b/model/funque_integer.json @@ -22,7 +22,9 @@ "adm_levels": 4, "strred_levels": 4, "ssim_levels": 4, - "ms_ssim_levels": 4 + "ms_ssim_levels": 4, + "motion_levels": 4, + "mad_levels": 4 }, {}, {}, @@ -51,6 +53,10 @@ "FUNQUE_integer_feature_strred_scale1_score", "FUNQUE_integer_feature_strred_scale2_score", "FUNQUE_integer_feature_strred_scale3_score", + "FUNQUE_integer_feature_motion_scale0_score", "FUNQUE_integer_feature_motion_scale1_score", + "FUNQUE_integer_feature_motion_scale2_score", "FUNQUE_integer_feature_motion_scale3_score", + "FUNQUE_integer_feature_mad_scale0_score", "FUNQUE_integer_feature_mad_scale1_score", + "FUNQUE_integer_feature_mad_scale2_score", "FUNQUE_integer_feature_mad_scale3_score", "FUNQUE_integer_feature_ms_ssim_mean_scale0_score", "FUNQUE_integer_feature_ms_ssim_mean_scale1_score", "FUNQUE_integer_feature_ms_ssim_mean_scale2_score", "FUNQUE_integer_feature_ms_ssim_mean_scale3_score", "FUNQUE_integer_feature_ms_ssim_mink3_scale0_score", "FUNQUE_integer_feature_ms_ssim_mink3_scale1_score", @@ -93,7 +99,11 @@ "strred_scale0", "strred_scale1", "strred_scale2", - "strred_scale3" + "strred_scale3", + "motion_scale0", + "motion_scale1", + "motion_scale2", + "motion_scale3" ] }, "score_clip": [