Skip to content

Commit

Permalink
Multi-threading Implementation for Float and Integer (#15)
Browse files Browse the repository at this point in the history
Co-authored-by: Priyanka-885 <[email protected]>
Co-authored-by: Mallikarjun Kamble <[email protected]>
  • Loading branch information
3 people authored and cosmin committed Apr 22, 2024
1 parent 7263a81 commit 2d18502
Show file tree
Hide file tree
Showing 37 changed files with 1,404 additions and 341 deletions.
4 changes: 0 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
VENV=.venv
.PHONY: all install clean distclean deps

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

#include <arm_neon.h>
#include <stdlib.h>
#include "../integer_funque_filters.h"
#include "../integer_funque_motion.h"

double integer_funque_image_mad_neon(const dwt2_dtype *img1, const dwt2_dtype *img2, int width, int height, int img1_stride, int img2_stride, float pending_div_factor)
{
Expand Down Expand Up @@ -98,3 +100,56 @@ double integer_funque_image_mad_neon(const dwt2_dtype *img1, const dwt2_dtype *i
double d_accum = (double)accum / pending_div_factor;
return (d_accum / (width * height));
}

int integer_compute_motion_funque_neon(const dwt2_dtype *prev, const dwt2_dtype *curr, int w, int h, int prev_stride, int curr_stride, int pending_div_factor_arg, double *score)
{
float pending_div_factor = (1 << pending_div_factor_arg) * 255;

if (prev_stride % sizeof(dwt2_dtype) != 0)
{
printf("error: prev_stride %% sizeof(dwt2_dtype) != 0, prev_stride = %d, sizeof(dwt2_dtype) = %zu.\n", prev_stride, sizeof(dwt2_dtype));
fflush(stdout);
goto fail;
}
if (curr_stride % sizeof(dwt2_dtype) != 0)
{
printf("error: curr_stride %% sizeof(dwt2_dtype) != 0, curr_stride = %d, sizeof(dwt2_dtype) = %zu.\n", curr_stride, sizeof(dwt2_dtype));
fflush(stdout);
goto fail;
}
// stride for integer_funque_image_mad_c is in terms of (sizeof(dwt2_dtype) bytes)

*score = integer_funque_image_mad_neon(prev, curr, w, h, prev_stride / sizeof(dwt2_dtype), curr_stride / sizeof(dwt2_dtype), pending_div_factor);

return 0;

fail:
return 1;
}

int integer_compute_mad_funque_neon(const dwt2_dtype *ref, const dwt2_dtype *dis, int w, int h, int ref_stride, int dis_stride, int pending_div_factor_arg, double *score)
{

float pending_div_factor = (1 << pending_div_factor_arg) * 255;

if (ref_stride % sizeof(dwt2_dtype) != 0)
{
printf("error: ref_stride %% sizeof(dwt2_dtype) != 0, ref_stride = %d, sizeof(dwt2_dtype) = %zu.\n", ref_stride, sizeof(dwt2_dtype));
fflush(stdout);
goto fail;
}
if (dis_stride % sizeof(dwt2_dtype) != 0)
{
printf("error: dis_stride %% sizeof(dwt2_dtype) != 0, dis_stride = %d, sizeof(dwt2_dtype) = %zu.\n", dis_stride, sizeof(dwt2_dtype));
fflush(stdout);
goto fail;
}
// stride for integer_funque_image_mad_c is in terms of (sizeof(dwt2_dtype) bytes)

*score = integer_funque_image_mad_neon(ref, dis, w, h, ref_stride / sizeof(dwt2_dtype), dis_stride / sizeof(dwt2_dtype), pending_div_factor);

return 0;

fail:
return 1;
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
/**
*
* Copyright 2016-2020 Netflix, Inc.
*
* Licensed under the BSD+Patent License (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://opensource.org/licenses/BSDplusPatent
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

#include "../integer_funque_filters.h"

double integer_funque_image_mad_neon(const dwt2_dtype *img1, const dwt2_dtype *img2, int width, int height, int img1_stride, int img2_stride, float pending_div_factor);
int integer_compute_motion_funque_neon(const dwt2_dtype *prev, const dwt2_dtype *curr, int w, int h, int prev_stride, int curr_stride, int pending_div_factor_arg, double *score);
int integer_compute_mad_funque_neon(const dwt2_dtype *ref, const dwt2_dtype *dis, int w, int h, int ref_stride, int dis_stride, int pending_div_factor_arg, double *score);
Original file line number Diff line number Diff line change
Expand Up @@ -215,29 +215,21 @@ float integer_rred_entropies_and_scales_neon(const dwt2_dtype *x_t, const dwt2_d
return agg_abs_accum;
}

int integer_compute_strred_funque_neon(
const struct i_dwt2buffers *ref, const struct i_dwt2buffers *dist,
struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, size_t width, size_t height,
struct strred_results *strred_scores, int block_size, int level,
uint32_t *log_lut, int32_t shift_val_arg, double sigma_nsq_t, uint8_t check_enable_spatial_csf)
int integer_compute_srred_funque_neon(const struct i_dwt2buffers *ref,
const struct i_dwt2buffers *dist, size_t width, size_t height,
float **spat_scales_ref, float **spat_scales_dist,
struct strred_results *strred_scores, int block_size, int level,
uint32_t *log_lut, int32_t shift_val_arg,
double sigma_nsq_t, uint8_t check_enable_spatial_csf)
{
int ret;
UNUSED(block_size);
size_t total_subbands = DEFAULT_STRRED_SUBBANDS;
size_t subband;
float spat_values[DEFAULT_STRRED_SUBBANDS], temp_values[DEFAULT_STRRED_SUBBANDS];
float fspat_val[DEFAULT_STRRED_SUBBANDS], ftemp_val[DEFAULT_STRRED_SUBBANDS];
float spat_values[DEFAULT_STRRED_SUBBANDS], fspat_val[DEFAULT_STRRED_SUBBANDS];
uint8_t enable_temp = 0;
int32_t shift_val;

/* amount of reflecting */
int x_reflect = (int) ((STRRED_WINDOW_SIZE - 1) / 2);
size_t r_width = width + (2 * x_reflect);
size_t r_height = height + (2 * x_reflect);

float *scales_spat_x = (float *) calloc(r_width * r_height, sizeof(float));
float *scales_spat_y = (float *) calloc(r_width * r_height, sizeof(float));

for(subband = 1; subband < total_subbands; subband++) {
enable_temp = 0;
spat_values[subband] = 0;
Expand All @@ -249,28 +241,68 @@ int integer_compute_strred_funque_neon(
}
spat_values[subband] = integer_rred_entropies_and_scales_neon(
ref->bands[subband], dist->bands[subband], width, height, log_lut, sigma_nsq_t,
shift_val, enable_temp, scales_spat_x, scales_spat_y, check_enable_spatial_csf);
shift_val, enable_temp, spat_scales_ref[subband], spat_scales_dist[subband],
check_enable_spatial_csf);
fspat_val[subband] = spat_values[subband] / (width * height);
}

strred_scores->spat_vals[level] = (fspat_val[1] + fspat_val[2] + fspat_val[3]) / 3;

// Add equations to compute S-RRED using norm factors
int norm_factor = 1, num_level;
for(num_level = 0; num_level <= level; num_level++) norm_factor = num_level + 1;

strred_scores->spat_vals_cumsum += strred_scores->spat_vals[level];

strred_scores->srred_vals[level] = strred_scores->spat_vals_cumsum / norm_factor;

ret = 0;
return ret;
}

int integer_compute_strred_funque_neon(const struct i_dwt2buffers *ref,
const struct i_dwt2buffers *dist,
struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist,
size_t width, size_t height, float **spat_scales_ref,
float **spat_scales_dist, struct strred_results *strred_scores,
int block_size, int level, uint32_t *log_lut,
int32_t shift_val_arg, double sigma_nsq_t,
uint8_t check_enable_spatial_csf)
{
int ret;
UNUSED(block_size);
size_t total_subbands = DEFAULT_STRRED_SUBBANDS;
size_t subband;
float temp_values[DEFAULT_STRRED_SUBBANDS], ftemp_val[DEFAULT_STRRED_SUBBANDS];
uint8_t enable_temp = 0;
int32_t shift_val;

for(subband = 1; subband < total_subbands; subband++) {
if(check_enable_spatial_csf == 1)
shift_val = 2 * shift_val_arg;
else {
shift_val = 2 * i_nadenau_pending_div_factors[level][subband];
}

if(prev_ref != NULL && prev_dist != NULL) {
enable_temp = 1;
dwt2_dtype *ref_temporal = (dwt2_dtype *) calloc(width * height, sizeof(dwt2_dtype));
dwt2_dtype *dist_temporal = (dwt2_dtype *) calloc(width * height, sizeof(dwt2_dtype));
temp_values[subband] = 0;

integer_subract_subbands_neon(ref->bands[subband], prev_ref->bands[subband],
ref_temporal, dist->bands[subband],
prev_dist->bands[subband], dist_temporal, width, height);
integer_subract_subbands_neon(ref->bands[subband], prev_ref->bands[subband], ref_temporal,
dist->bands[subband], prev_dist->bands[subband],
dist_temporal, width, height);
temp_values[subband] = integer_rred_entropies_and_scales_neon(
ref_temporal, dist_temporal, width, height, log_lut, sigma_nsq_t, shift_val,
enable_temp, scales_spat_x, scales_spat_y, check_enable_spatial_csf);
enable_temp, spat_scales_ref[subband], spat_scales_dist[subband],
check_enable_spatial_csf);
ftemp_val[subband] = temp_values[subband] / (width * height);

free(ref_temporal);
free(dist_temporal);
}
}
strred_scores->spat_vals[level] = (fspat_val[1] + fspat_val[2] + fspat_val[3]) / 3;
strred_scores->temp_vals[level] = (ftemp_val[1] + ftemp_val[2] + ftemp_val[3]) / 3;
strred_scores->spat_temp_vals[level] =
strred_scores->spat_vals[level] * strred_scores->temp_vals[level];
Expand All @@ -279,17 +311,12 @@ int integer_compute_strred_funque_neon(
int norm_factor = 1, num_level;
for(num_level = 0; num_level <= level; num_level++) norm_factor = num_level + 1;

strred_scores->spat_vals_cumsum += strred_scores->spat_vals[level];
strred_scores->temp_vals_cumsum += strred_scores->temp_vals[level];
strred_scores->spat_temp_vals_cumsum += strred_scores->spat_temp_vals[level];

strred_scores->srred_vals[level] = strred_scores->spat_vals_cumsum / norm_factor;
strred_scores->trred_vals[level] = strred_scores->temp_vals_cumsum / norm_factor;
strred_scores->strred_vals[level] = strred_scores->spat_temp_vals_cumsum / norm_factor;

free(scales_spat_x);
free(scales_spat_y);

ret = 0;
return ret;
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,21 @@
#include "../common/macros.h"
#include "../funque_global_options.h"

int integer_compute_strred_funque_neon(
const struct i_dwt2buffers *ref, const struct i_dwt2buffers *dist,
struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist, size_t width, size_t height,
struct strred_results *strred_scores, int block_size, int level,
uint32_t *log_lut, int32_t shift_val, double sigma_nsq_t, uint8_t enable_spatial_csf);
int integer_compute_srred_funque_neon(const struct i_dwt2buffers *ref,
const struct i_dwt2buffers *dist, size_t width, size_t height,
float **spat_scales_ref, float **spat_scales_dist,
struct strred_results *strred_scores, int block_size, int level,
uint32_t *log_lut, int32_t shift_val_arg,
double sigma_nsq_t, uint8_t check_enable_spatial_csf);

int integer_compute_strred_funque_neon(const struct i_dwt2buffers *ref,
const struct i_dwt2buffers *dist,
struct i_dwt2buffers *prev_ref, struct i_dwt2buffers *prev_dist,
size_t width, size_t height, float **spat_scales_ref,
float **spat_scales_dist, struct strred_results *strred_scores,
int block_size, int level, uint32_t *log_lut,
int32_t shift_val_arg, double sigma_nsq_t,
uint8_t check_enable_spatial_csf);

void integer_subract_subbands_neon(const dwt2_dtype *ref_src, const dwt2_dtype *ref_prev_src,
dwt2_dtype *ref_dst, const dwt2_dtype *dist_src,
Expand Down
Loading

0 comments on commit 2d18502

Please sign in to comment.