From 6acf57348ff7ef9a4507260dfafb93a4a52f7a0f Mon Sep 17 00:00:00 2001 From: Federico Peccia Date: Thu, 27 Apr 2023 09:37:02 +0200 Subject: [PATCH 1/2] Added non_zero_padding --- bareMetalC/conv.c | 2 +- bareMetalC/conv_dw.c | 4 +- bareMetalC/conv_dw_perf.c | 2 +- bareMetalC/conv_first_layer.c | 2 +- bareMetalC/conv_perf.c | 2 +- bareMetalC/conv_trans_input_3120.c | 4 +- ...nv_trans_input_3120_with_kernel_dilation.c | 4 +- bareMetalC/conv_trans_output_1203.c | 4 +- bareMetalC/conv_trans_weight_0132.c | 4 +- bareMetalC/conv_trans_weight_1203.c | 4 +- bareMetalC/conv_with_input_dilation.c | 2 +- ...conv_with_input_dilation_and_neg_padding.c | 2 +- .../conv_with_input_dilation_and_rot180.c | 2 +- bareMetalC/conv_with_kernel_dilation.c | 2 +- bareMetalC/conv_with_pool.c | 2 +- bareMetalC/conv_with_rot180.c | 2 +- .../templates/conv_template.c | 2 +- imagenet/alexnet.c | 4 +- imagenet/mobilenet.c | 36 ++++++------- imagenet/resnet50.c | 38 ++++++------- include/gemmini.h | 53 ++++++++++--------- 21 files changed, 90 insertions(+), 87 deletions(-) diff --git a/bareMetalC/conv.c b/bareMetalC/conv.c index 1e776bfe..6e7d87aa 100644 --- a/bareMetalC/conv.c +++ b/bareMetalC/conv.c @@ -216,7 +216,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, false, false, false, false, (elem_t*)input, diff --git a/bareMetalC/conv_dw.c b/bareMetalC/conv_dw.c index 7a22bd5d..1f26a531 100644 --- a/bareMetalC/conv_dw.c +++ b/bareMetalC/conv_dw.c @@ -114,7 +114,7 @@ int main() { uint64_t start_cpu = read_cycles(); #ifndef FAST tiled_conv_dw_auto(BATCH_SIZE, IN_DIM, CHANNELS, OUT_DIM, - STRIDE, PADDING, KERNEL_DIM, + STRIDE, PADDING, 0, KERNEL_DIM, (elem_t*)input, (elem_t*)weights, @@ -133,7 +133,7 @@ int main() { printf("Gemmini conv...\n"); uint64_t start_gemmini = read_cycles(); tiled_conv_dw_auto(BATCH_SIZE, IN_DIM, CHANNELS, OUT_DIM, - STRIDE, PADDING, KERNEL_DIM, + STRIDE, PADDING, 0, KERNEL_DIM, (elem_t*)input, (elem_t*)weights, diff --git a/bareMetalC/conv_dw_perf.c b/bareMetalC/conv_dw_perf.c index 3d0d0a8b..a7b2ad27 100644 --- a/bareMetalC/conv_dw_perf.c +++ b/bareMetalC/conv_dw_perf.c @@ -85,7 +85,7 @@ int main (int argc, char * argv[]) { uint64_t start_gemmini = read_cycles(); tiled_conv_dw_auto(BATCH_SIZE, IN_DIM, CHANNELS, OUT_DIM, - STRIDE, PADDING, KERNEL_DIM, + STRIDE, PADDING, 0, KERNEL_DIM, (elem_t*)input, (elem_t*)weights, diff --git a/bareMetalC/conv_first_layer.c b/bareMetalC/conv_first_layer.c index 48e7dab1..85bd2a99 100644 --- a/bareMetalC/conv_first_layer.c +++ b/bareMetalC/conv_first_layer.c @@ -219,7 +219,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, false, false, false, false, (elem_t*)input, diff --git a/bareMetalC/conv_perf.c b/bareMetalC/conv_perf.c index afc3be06..c055c217 100644 --- a/bareMetalC/conv_perf.c +++ b/bareMetalC/conv_perf.c @@ -114,7 +114,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, false, false, false, false, (elem_t*)input, diff --git a/bareMetalC/conv_trans_input_3120.c b/bareMetalC/conv_trans_input_3120.c index c3fca89d..0fd3d129 100644 --- a/bareMetalC/conv_trans_input_3120.c +++ b/bareMetalC/conv_trans_input_3120.c @@ -158,7 +158,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, TRANS_INPUT_3120, TRANS_WEIGHT_1203, TRANS_WEIGHT_0132, (elem_t*)input, @@ -178,7 +178,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, TRANS_INPUT_3120, TRANS_WEIGHT_1203, TRANS_WEIGHT_0132, (elem_t*)input, diff --git a/bareMetalC/conv_trans_input_3120_with_kernel_dilation.c b/bareMetalC/conv_trans_input_3120_with_kernel_dilation.c index f0b1f365..b6676134 100644 --- a/bareMetalC/conv_trans_input_3120_with_kernel_dilation.c +++ b/bareMetalC/conv_trans_input_3120_with_kernel_dilation.c @@ -160,7 +160,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, 1, KERNEL_DILATION, PADDING, KERNEL_DIM, + STRIDE, 1, KERNEL_DILATION, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, TRANS_INPUT_3120, TRANS_WEIGHT_1203, TRANS_WEIGHT_0132, (elem_t*)input, @@ -180,7 +180,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, 1, KERNEL_DILATION, PADDING, KERNEL_DIM, + STRIDE, 1, KERNEL_DILATION, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, TRANS_INPUT_3120, TRANS_WEIGHT_1203, TRANS_WEIGHT_0132, (elem_t*)input, diff --git a/bareMetalC/conv_trans_output_1203.c b/bareMetalC/conv_trans_output_1203.c index 52666d8d..78911381 100644 --- a/bareMetalC/conv_trans_output_1203.c +++ b/bareMetalC/conv_trans_output_1203.c @@ -155,7 +155,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, false, false, false, (elem_t*)input, @@ -175,7 +175,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, false, false, false, (elem_t*)input, diff --git a/bareMetalC/conv_trans_weight_0132.c b/bareMetalC/conv_trans_weight_0132.c index 2f726fa5..dd449e98 100644 --- a/bareMetalC/conv_trans_weight_0132.c +++ b/bareMetalC/conv_trans_weight_0132.c @@ -157,7 +157,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, false, TRANS_WEIGHT_1203, TRANS_WEIGHT_0132, (elem_t*)input, @@ -177,7 +177,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, false, TRANS_WEIGHT_1203, TRANS_WEIGHT_0132, (elem_t*)input, diff --git a/bareMetalC/conv_trans_weight_1203.c b/bareMetalC/conv_trans_weight_1203.c index 5ef544ba..aa8c4dda 100644 --- a/bareMetalC/conv_trans_weight_1203.c +++ b/bareMetalC/conv_trans_weight_1203.c @@ -156,7 +156,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, false, TRANS_WEIGHT_1203, false, (elem_t*)input, @@ -176,7 +176,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, false, TRANS_WEIGHT_1203, false, (elem_t*)input, diff --git a/bareMetalC/conv_with_input_dilation.c b/bareMetalC/conv_with_input_dilation.c index 4a7fd1d0..4b5d8fc8 100644 --- a/bareMetalC/conv_with_input_dilation.c +++ b/bareMetalC/conv_with_input_dilation.c @@ -239,7 +239,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, INPUT_DILATION, 1, PADDING, KERNEL_DIM, + STRIDE, INPUT_DILATION, 1, PADDING, 0, KERNEL_DIM, false, false, false, false, false, (elem_t*)input, diff --git a/bareMetalC/conv_with_input_dilation_and_neg_padding.c b/bareMetalC/conv_with_input_dilation_and_neg_padding.c index ab3b9d09..c054dfb0 100644 --- a/bareMetalC/conv_with_input_dilation_and_neg_padding.c +++ b/bareMetalC/conv_with_input_dilation_and_neg_padding.c @@ -240,7 +240,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, INPUT_DILATION, 1, PADDING, KERNEL_DIM, + STRIDE, INPUT_DILATION, 1, PADDING, 0, KERNEL_DIM, false, false, false, false, false, (elem_t*)input, diff --git a/bareMetalC/conv_with_input_dilation_and_rot180.c b/bareMetalC/conv_with_input_dilation_and_rot180.c index e5fa3795..cd2b0707 100644 --- a/bareMetalC/conv_with_input_dilation_and_rot180.c +++ b/bareMetalC/conv_with_input_dilation_and_rot180.c @@ -254,7 +254,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, INPUT_DILATION, 1, PADDING, KERNEL_DIM, + STRIDE, INPUT_DILATION, 1, PADDING, 0, KERNEL_DIM, WROT180, false, false, false, false, (elem_t*)input, diff --git a/bareMetalC/conv_with_kernel_dilation.c b/bareMetalC/conv_with_kernel_dilation.c index 6e274304..54f11c1d 100644 --- a/bareMetalC/conv_with_kernel_dilation.c +++ b/bareMetalC/conv_with_kernel_dilation.c @@ -261,7 +261,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, INPUT_DILATION, KERNEL_DILATION, PADDING, KERNEL_DIM, + STRIDE, INPUT_DILATION, KERNEL_DILATION, PADDING, 0, KERNEL_DIM, false, false, false, false, false, (elem_t*)input, diff --git a/bareMetalC/conv_with_pool.c b/bareMetalC/conv_with_pool.c index 1f0e7b59..e8ce1bfb 100644 --- a/bareMetalC/conv_with_pool.c +++ b/bareMetalC/conv_with_pool.c @@ -275,7 +275,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, false, false, false, false, // 1, diff --git a/bareMetalC/conv_with_rot180.c b/bareMetalC/conv_with_rot180.c index 52f692d0..3544e4e9 100644 --- a/bareMetalC/conv_with_rot180.c +++ b/bareMetalC/conv_with_rot180.c @@ -254,7 +254,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, INPUT_DILATION, 1, PADDING, KERNEL_DIM, + STRIDE, INPUT_DILATION, 1, PADDING, 0, KERNEL_DIM, WROT180, false, false, false, false, (elem_t*)input, diff --git a/gemmini-data-collection/templates/conv_template.c b/gemmini-data-collection/templates/conv_template.c index 49df0aa1..de65da02 100644 --- a/gemmini-data-collection/templates/conv_template.c +++ b/gemmini-data-collection/templates/conv_template.c @@ -201,7 +201,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, false, false, false, false, (elem_t*)input, diff --git a/imagenet/alexnet.c b/imagenet/alexnet.c index 18268397..b2d6ed7b 100644 --- a/imagenet/alexnet.c +++ b/imagenet/alexnet.c @@ -104,7 +104,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_1_params.batch_size, conv_1_params.in_dim, conv_1_params.in_channels, conv_1_params.out_channels, conv_1_params.out_dim, - conv_1_params.stride, conv_1_params.padding, conv_1_params.kernel_size, + conv_1_params.stride, conv_1_params.padding, 0, conv_1_params.kernel_size, (elem_t*)images, (elem_t*)conv_1_w, (acc_t*)conv_1_b, (elem_t*)conv_1_out_pooled, @@ -156,7 +156,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_2_params.batch_size, conv_2_params.in_dim, conv_2_params.in_channels, conv_2_params.out_channels, conv_2_params.out_dim, - conv_2_params.stride, conv_2_params.padding, conv_2_params.kernel_size, + conv_2_params.stride, conv_2_params.padding, 0, conv_2_params.kernel_size, (elem_t*)conv_1_out_pooled, (elem_t*)conv_2_w, (acc_t*)conv_2_b, (elem_t*)conv_2_out_pooled, diff --git a/imagenet/mobilenet.c b/imagenet/mobilenet.c index 0f1884a5..1a8cc9cb 100644 --- a/imagenet/mobilenet.c +++ b/imagenet/mobilenet.c @@ -95,7 +95,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_1_params.batch_size, conv_1_params.in_dim, conv_1_params.in_channels, conv_1_params.out_channels, conv_1_params.out_dim, - conv_1_params.stride, 1, 1, conv_1_params.padding, conv_1_params.kernel_size, + conv_1_params.stride, 1, 1, conv_1_params.padding, 0, conv_1_params.kernel_size, false, false, false, false, false, (elem_t*)images, (elem_t*)conv_1_w, (acc_t*)conv_1_b, (elem_t*)conv_1_out, @@ -121,7 +121,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_2_params.batch_size, conv_dw_2_params.in_dim, conv_dw_2_params.in_channels, conv_dw_2_params.out_dim, - conv_dw_2_params.stride, conv_dw_2_params.padding, conv_dw_2_params.kernel_size, + conv_dw_2_params.stride, conv_dw_2_params.padding, 0, conv_dw_2_params.kernel_size, (elem_t*)conv_1_out, (elem_t*)conv_dw_2_w, (acc_t*)conv_dw_2_b, (elem_t*)conv_dw_2_out, @@ -198,7 +198,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_5_params.batch_size, conv_dw_5_params.in_dim, conv_dw_5_params.in_channels, conv_dw_5_params.out_dim, - conv_dw_5_params.stride, conv_dw_5_params.padding, conv_dw_5_params.kernel_size, + conv_dw_5_params.stride, conv_dw_5_params.padding, 0, conv_dw_5_params.kernel_size, (elem_t*)conv_4_out, (elem_t*)conv_dw_5_w, (acc_t*)conv_dw_5_b, (elem_t*)conv_dw_5_out, @@ -275,7 +275,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_8_params.batch_size, conv_dw_8_params.in_dim, conv_dw_8_params.in_channels, conv_dw_8_params.out_dim, - conv_dw_8_params.stride, conv_dw_8_params.padding, conv_dw_8_params.kernel_size, + conv_dw_8_params.stride, conv_dw_8_params.padding, 0, conv_dw_8_params.kernel_size, (elem_t*)conv_7_out, (elem_t*)conv_dw_8_w, (acc_t*)conv_dw_8_b, (elem_t*)conv_dw_8_out, @@ -368,7 +368,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_11_params.batch_size, conv_dw_11_params.in_dim, conv_dw_11_params.in_channels, conv_dw_11_params.out_dim, - conv_dw_11_params.stride, conv_dw_11_params.padding, conv_dw_11_params.kernel_size, + conv_dw_11_params.stride, conv_dw_11_params.padding, 0, conv_dw_11_params.kernel_size, (elem_t*)conv_10_out, (elem_t*)conv_dw_11_w, (acc_t*)conv_dw_11_b, (elem_t*)conv_dw_11_out, @@ -445,7 +445,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_14_params.batch_size, conv_dw_14_params.in_dim, conv_dw_14_params.in_channels, conv_dw_14_params.out_dim, - conv_dw_14_params.stride, conv_dw_14_params.padding, conv_dw_14_params.kernel_size, + conv_dw_14_params.stride, conv_dw_14_params.padding, 0, conv_dw_14_params.kernel_size, (elem_t*)conv_13_out, (elem_t*)conv_dw_14_w, (acc_t*)conv_dw_14_b, (elem_t*)conv_dw_14_out, @@ -538,7 +538,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_17_params.batch_size, conv_dw_17_params.in_dim, conv_dw_17_params.in_channels, conv_dw_17_params.out_dim, - conv_dw_17_params.stride, conv_dw_17_params.padding, conv_dw_17_params.kernel_size, + conv_dw_17_params.stride, conv_dw_17_params.padding, 0, conv_dw_17_params.kernel_size, (elem_t*)conv_16_out, (elem_t*)conv_dw_17_w, (acc_t*)conv_dw_17_b, (elem_t*)conv_dw_17_out, @@ -631,7 +631,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_20_params.batch_size, conv_dw_20_params.in_dim, conv_dw_20_params.in_channels, conv_dw_20_params.out_dim, - conv_dw_20_params.stride, conv_dw_20_params.padding, conv_dw_20_params.kernel_size, + conv_dw_20_params.stride, conv_dw_20_params.padding, 0, conv_dw_20_params.kernel_size, (elem_t*)conv_19_out, (elem_t*)conv_dw_20_w, (acc_t*)conv_dw_20_b, (elem_t*)conv_dw_20_out, @@ -707,7 +707,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_23_params.batch_size, conv_dw_23_params.in_dim, conv_dw_23_params.in_channels, conv_dw_23_params.out_dim, - conv_dw_23_params.stride, conv_dw_23_params.padding, conv_dw_23_params.kernel_size, + conv_dw_23_params.stride, conv_dw_23_params.padding, 0, conv_dw_23_params.kernel_size, (elem_t*)conv_22_out, (elem_t*)conv_dw_23_w, (acc_t*)conv_dw_23_b, (elem_t*)conv_dw_23_out, @@ -800,7 +800,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_26_params.batch_size, conv_dw_26_params.in_dim, conv_dw_26_params.in_channels, conv_dw_26_params.out_dim, - conv_dw_26_params.stride, conv_dw_26_params.padding, conv_dw_26_params.kernel_size, + conv_dw_26_params.stride, conv_dw_26_params.padding, 0, conv_dw_26_params.kernel_size, (elem_t*)conv_25_out, (elem_t*)conv_dw_26_w, (acc_t*)conv_dw_26_b, (elem_t*)conv_dw_26_out, @@ -893,7 +893,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_29_params.batch_size, conv_dw_29_params.in_dim, conv_dw_29_params.in_channels, conv_dw_29_params.out_dim, - conv_dw_29_params.stride, conv_dw_29_params.padding, conv_dw_29_params.kernel_size, + conv_dw_29_params.stride, conv_dw_29_params.padding, 0, conv_dw_29_params.kernel_size, (elem_t*)conv_28_out, (elem_t*)conv_dw_29_w, (acc_t*)conv_dw_29_b, (elem_t*)conv_dw_29_out, @@ -986,7 +986,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_32_params.batch_size, conv_dw_32_params.in_dim, conv_dw_32_params.in_channels, conv_dw_32_params.out_dim, - conv_dw_32_params.stride, conv_dw_32_params.padding, conv_dw_32_params.kernel_size, + conv_dw_32_params.stride, conv_dw_32_params.padding, 0, conv_dw_32_params.kernel_size, (elem_t*)conv_31_out, (elem_t*)conv_dw_32_w, (acc_t*)conv_dw_32_b, (elem_t*)conv_dw_32_out, @@ -1063,7 +1063,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_35_params.batch_size, conv_dw_35_params.in_dim, conv_dw_35_params.in_channels, conv_dw_35_params.out_dim, - conv_dw_35_params.stride, conv_dw_35_params.padding, conv_dw_35_params.kernel_size, + conv_dw_35_params.stride, conv_dw_35_params.padding, 0, conv_dw_35_params.kernel_size, (elem_t*)conv_34_out, (elem_t*)conv_dw_35_w, (acc_t*)conv_dw_35_b, (elem_t*)conv_dw_35_out, @@ -1156,7 +1156,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_38_params.batch_size, conv_dw_38_params.in_dim, conv_dw_38_params.in_channels, conv_dw_38_params.out_dim, - conv_dw_38_params.stride, conv_dw_38_params.padding, conv_dw_38_params.kernel_size, + conv_dw_38_params.stride, conv_dw_38_params.padding, 0, conv_dw_38_params.kernel_size, (elem_t*)conv_37_out, (elem_t*)conv_dw_38_w, (acc_t*)conv_dw_38_b, (elem_t*)conv_dw_38_out, @@ -1249,7 +1249,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_41_params.batch_size, conv_dw_41_params.in_dim, conv_dw_41_params.in_channels, conv_dw_41_params.out_dim, - conv_dw_41_params.stride, conv_dw_41_params.padding, conv_dw_41_params.kernel_size, + conv_dw_41_params.stride, conv_dw_41_params.padding, 0, conv_dw_41_params.kernel_size, (elem_t*)conv_40_out, (elem_t*)conv_dw_41_w, (acc_t*)conv_dw_41_b, (elem_t*)conv_dw_41_out, @@ -1326,7 +1326,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_44_params.batch_size, conv_dw_44_params.in_dim, conv_dw_44_params.in_channels, conv_dw_44_params.out_dim, - conv_dw_44_params.stride, conv_dw_44_params.padding, conv_dw_44_params.kernel_size, + conv_dw_44_params.stride, conv_dw_44_params.padding, 0, conv_dw_44_params.kernel_size, (elem_t*)conv_43_out, (elem_t*)conv_dw_44_w, (acc_t*)conv_dw_44_b, (elem_t*)conv_dw_44_out, @@ -1419,7 +1419,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_47_params.batch_size, conv_dw_47_params.in_dim, conv_dw_47_params.in_channels, conv_dw_47_params.out_dim, - conv_dw_47_params.stride, conv_dw_47_params.padding, conv_dw_47_params.kernel_size, + conv_dw_47_params.stride, conv_dw_47_params.padding, 0, conv_dw_47_params.kernel_size, (elem_t*)conv_46_out, (elem_t*)conv_dw_47_w, (acc_t*)conv_dw_47_b, (elem_t*)conv_dw_47_out, @@ -1512,7 +1512,7 @@ int main (int argc, char * argv[]) { tiled_conv_dw_auto( conv_dw_50_params.batch_size, conv_dw_50_params.in_dim, conv_dw_50_params.in_channels, conv_dw_50_params.out_dim, - conv_dw_50_params.stride, conv_dw_50_params.padding, conv_dw_50_params.kernel_size, + conv_dw_50_params.stride, conv_dw_50_params.padding, 0, conv_dw_50_params.kernel_size, (elem_t*)conv_49_out, (elem_t*)conv_dw_50_w, (acc_t*)conv_dw_50_b, (elem_t*)conv_dw_50_out, diff --git a/imagenet/resnet50.c b/imagenet/resnet50.c index 35294b7c..655e6d90 100644 --- a/imagenet/resnet50.c +++ b/imagenet/resnet50.c @@ -105,7 +105,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_1_params.batch_size, conv_1_params.in_dim, conv_1_params.in_channels, conv_1_params.out_channels, conv_1_params.out_dim, - conv_1_params.stride, 1, 1, conv_1_params.padding, conv_1_params.kernel_size, + conv_1_params.stride, 1, 1, conv_1_params.padding, 0, conv_1_params.kernel_size, false, false, false, false, false, (elem_t*)images, (elem_t*)conv_1_w, (acc_t*)conv_1_b, (elem_t*)conv_1_out_pooled, @@ -181,7 +181,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_3_params.batch_size, conv_3_params.in_dim, conv_3_params.in_channels, conv_3_params.out_channels, conv_3_params.out_dim, - conv_3_params.stride, 1, 1, conv_3_params.padding, conv_3_params.kernel_size, + conv_3_params.stride, 1, 1, conv_3_params.padding, 0, conv_3_params.kernel_size, false, false, false, false, false, (elem_t*)conv_2_out, (elem_t*)conv_3_w, (acc_t*)conv_3_b, (elem_t*)conv_3_out, @@ -324,7 +324,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_7_params.batch_size, conv_7_params.in_dim, conv_7_params.in_channels, conv_7_params.out_channels, conv_7_params.out_dim, - conv_7_params.stride, 1, 1, conv_7_params.padding, conv_7_params.kernel_size, + conv_7_params.stride, 1, 1, conv_7_params.padding, 0, conv_7_params.kernel_size, false, false, false, false, false, (elem_t*)conv_6_out, (elem_t*)conv_7_w, (acc_t*)conv_7_b, (elem_t*)conv_7_out, @@ -432,7 +432,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_10_params.batch_size, conv_10_params.in_dim, conv_10_params.in_channels, conv_10_params.out_channels, conv_10_params.out_dim, - conv_10_params.stride, 1, 1, conv_10_params.padding, conv_10_params.kernel_size, + conv_10_params.stride, 1, 1, conv_10_params.padding, 0, conv_10_params.kernel_size, false, false, false, false, false, (elem_t*)conv_9_out, (elem_t*)conv_10_w, (acc_t*)conv_10_b, (elem_t*)conv_10_out, @@ -540,7 +540,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_13_params.batch_size, conv_13_params.in_dim, conv_13_params.in_channels, conv_13_params.out_channels, conv_13_params.out_dim, - conv_13_params.stride, 1, 1, conv_13_params.padding, conv_13_params.kernel_size, + conv_13_params.stride, 1, 1, conv_13_params.padding, 0, conv_13_params.kernel_size, false, false, false, false, false, (elem_t*)conv_12_out, (elem_t*)conv_13_w, (acc_t*)conv_13_b, (elem_t*)conv_13_out, @@ -609,7 +609,7 @@ int main (int argc, char * argv[]) { tiled_conv_downsample( conv_15_params.batch_size, conv_15_params.in_dim, conv_15_params.in_channels, conv_15_params.out_channels, conv_15_params.out_dim, - // conv_15_params.stride, 1, 1, conv_15_params.padding, conv_15_params.kernel_size, + // conv_15_params.stride, 1, 1, conv_15_params.padding, 0, conv_15_params.kernel_size, // false, false, false, false, false, (elem_t*)conv_11_out, (elem_t*)conv_15_w, (acc_t*)conv_15_b, (elem_t*)conv_15_out, @@ -692,7 +692,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_17_params.batch_size, conv_17_params.in_dim, conv_17_params.in_channels, conv_17_params.out_channels, conv_17_params.out_dim, - conv_17_params.stride, 1, 1, conv_17_params.padding, conv_17_params.kernel_size, + conv_17_params.stride, 1, 1, conv_17_params.padding, 0, conv_17_params.kernel_size, false, false, false, false, false, (elem_t*)conv_16_out, (elem_t*)conv_17_w, (acc_t*)conv_17_b, (elem_t*)conv_17_out, @@ -800,7 +800,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_20_params.batch_size, conv_20_params.in_dim, conv_20_params.in_channels, conv_20_params.out_channels, conv_20_params.out_dim, - conv_20_params.stride, 1, 1, conv_20_params.padding, conv_20_params.kernel_size, + conv_20_params.stride, 1, 1, conv_20_params.padding, 0, conv_20_params.kernel_size, false, false, false, false, false, (elem_t*)conv_19_out, (elem_t*)conv_20_w, (acc_t*)conv_20_b, (elem_t*)conv_20_out, @@ -908,7 +908,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_23_params.batch_size, conv_23_params.in_dim, conv_23_params.in_channels, conv_23_params.out_channels, conv_23_params.out_dim, - conv_23_params.stride, 1, 1, conv_23_params.padding, conv_23_params.kernel_size, + conv_23_params.stride, 1, 1, conv_23_params.padding, 0, conv_23_params.kernel_size, false, false, false, false, false, (elem_t*)conv_22_out, (elem_t*)conv_23_w, (acc_t*)conv_23_b, (elem_t*)conv_23_out, @@ -1016,7 +1016,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_26_params.batch_size, conv_26_params.in_dim, conv_26_params.in_channels, conv_26_params.out_channels, conv_26_params.out_dim, - conv_26_params.stride, 1, 1, conv_26_params.padding, conv_26_params.kernel_size, + conv_26_params.stride, 1, 1, conv_26_params.padding, 0, conv_26_params.kernel_size, false, false, false, false, false, (elem_t*)conv_25_out, (elem_t*)conv_26_w, (acc_t*)conv_26_b, (elem_t*)conv_26_out, @@ -1168,7 +1168,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_30_params.batch_size, conv_30_params.in_dim, conv_30_params.in_channels, conv_30_params.out_channels, conv_30_params.out_dim, - conv_30_params.stride, 1, 1, conv_30_params.padding, conv_30_params.kernel_size, + conv_30_params.stride, 1, 1, conv_30_params.padding, 0, conv_30_params.kernel_size, false, false, false, false, false, (elem_t*)conv_29_out, (elem_t*)conv_30_w, (acc_t*)conv_30_b, (elem_t*)conv_30_out, @@ -1276,7 +1276,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_33_params.batch_size, conv_33_params.in_dim, conv_33_params.in_channels, conv_33_params.out_channels, conv_33_params.out_dim, - conv_33_params.stride, 1, 1, conv_33_params.padding, conv_33_params.kernel_size, + conv_33_params.stride, 1, 1, conv_33_params.padding, 0, conv_33_params.kernel_size, false, false, false, false, false, (elem_t*)conv_32_out, (elem_t*)conv_33_w, (acc_t*)conv_33_b, (elem_t*)conv_33_out, @@ -1384,7 +1384,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_36_params.batch_size, conv_36_params.in_dim, conv_36_params.in_channels, conv_36_params.out_channels, conv_36_params.out_dim, - conv_36_params.stride, 1, 1, conv_36_params.padding, conv_36_params.kernel_size, + conv_36_params.stride, 1, 1, conv_36_params.padding, 0, conv_36_params.kernel_size, false, false, false, false, false, (elem_t*)conv_35_out, (elem_t*)conv_36_w, (acc_t*)conv_36_b, (elem_t*)conv_36_out, @@ -1492,7 +1492,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_39_params.batch_size, conv_39_params.in_dim, conv_39_params.in_channels, conv_39_params.out_channels, conv_39_params.out_dim, - conv_39_params.stride, 1, 1, conv_39_params.padding, conv_39_params.kernel_size, + conv_39_params.stride, 1, 1, conv_39_params.padding, 0, conv_39_params.kernel_size, false, false, false, false, false, (elem_t*)conv_38_out, (elem_t*)conv_39_w, (acc_t*)conv_39_b, (elem_t*)conv_39_out, @@ -1600,7 +1600,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_42_params.batch_size, conv_42_params.in_dim, conv_42_params.in_channels, conv_42_params.out_channels, conv_42_params.out_dim, - conv_42_params.stride, 1, 1, conv_42_params.padding, conv_42_params.kernel_size, + conv_42_params.stride, 1, 1, conv_42_params.padding, 0, conv_42_params.kernel_size, false, false, false, false, false, (elem_t*)conv_41_out, (elem_t*)conv_42_w, (acc_t*)conv_42_b, (elem_t*)conv_42_out, @@ -1708,7 +1708,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_45_params.batch_size, conv_45_params.in_dim, conv_45_params.in_channels, conv_45_params.out_channels, conv_45_params.out_dim, - conv_45_params.stride, 1, 1, conv_45_params.padding, conv_45_params.kernel_size, + conv_45_params.stride, 1, 1, conv_45_params.padding, 0, conv_45_params.kernel_size, false, false, false, false, false, (elem_t*)conv_44_out, (elem_t*)conv_45_w, (acc_t*)conv_45_b, (elem_t*)conv_45_out, @@ -1776,7 +1776,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_47_params.batch_size, conv_47_params.in_dim, conv_47_params.in_channels, conv_47_params.out_channels, conv_47_params.out_dim, - conv_47_params.stride, 1, 1, conv_47_params.padding, conv_47_params.kernel_size, + conv_47_params.stride, 1, 1, conv_47_params.padding, 0, conv_47_params.kernel_size, false, false, false, false, false, (elem_t*)conv_43_out, (elem_t*)conv_47_w, (acc_t*)conv_47_b, (elem_t*)conv_47_out, @@ -1859,7 +1859,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_49_params.batch_size, conv_49_params.in_dim, conv_49_params.in_channels, conv_49_params.out_channels, conv_49_params.out_dim, - conv_49_params.stride, 1, 1, conv_49_params.padding, conv_49_params.kernel_size, + conv_49_params.stride, 1, 1, conv_49_params.padding, 0, conv_49_params.kernel_size, false, false, false, false, false, (elem_t*)conv_48_out, (elem_t*)conv_49_w, (acc_t*)conv_49_b, (elem_t*)conv_49_out, @@ -1967,7 +1967,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_52_params.batch_size, conv_52_params.in_dim, conv_52_params.in_channels, conv_52_params.out_channels, conv_52_params.out_dim, - conv_52_params.stride, 1, 1, conv_52_params.padding, conv_52_params.kernel_size, + conv_52_params.stride, 1, 1, conv_52_params.padding, 0, conv_52_params.kernel_size, false, false, false, false, false, (elem_t*)conv_51_out, (elem_t*)conv_52_w, (acc_t*)conv_52_b, (elem_t*)conv_52_out, diff --git a/include/gemmini.h b/include/gemmini.h index 4ae4d4b6..434be6f4 100644 --- a/include/gemmini.h +++ b/include/gemmini.h @@ -254,8 +254,11 @@ static acc_scale_t_bits acc_scale_t_to_acc_scale_t_bits(acc_scale_t x) { // Note: The "pixel_repeats" parameter below is still experimental, andthere is // a high chance that it will be removed in future releases. +#define gemmini_extended6_config_ld(stride, scale, shrunk, block_mvin_stride, pixel_repeats, padding_value, id) \ + ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(scale_t_to_scale_t_bits(scale)) << 32) | ((uint64_t)(block_mvin_stride) << 16) | ((uint64_t)(pixel_repeats) << 8) | ((id) << 3) | ((shrunk) << 2) | CONFIG_LD, ((uint64_t)(padding_value) << 32) | stride, k_CONFIG) + #define gemmini_extended5_config_ld(stride, scale, shrunk, block_mvin_stride, pixel_repeats, id) \ - ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(scale_t_to_scale_t_bits(scale)) << 32) | ((uint64_t)(block_mvin_stride) << 16) | ((uint64_t)(pixel_repeats) << 8) | ((id) << 3) | ((shrunk) << 2) | CONFIG_LD, stride, k_CONFIG) + gemmini_extended6_config_ld(stride, scale, shrunk, block_mvin_stride, pixel_repeats, 0, id) \ #define gemmini_extended4_config_ld(stride, scale, shrunk, block_mvin_stride, id) \ gemmini_extended5_config_ld(stride, scale, shrunk, block_mvin_stride, 1, id) \ @@ -351,7 +354,7 @@ static void counter_reset() { } // weight-stationary conv loop -#define gemmini_loop_conv_ws(batch_size, in_dim, in_channels, out_channels, out_dim, pool_out_dim, stride, padding, kernel_dim, kernel_dilation, pool_size, pool_stride, pool_padding, batches, porows, pocols, pochs, krows, kcols, kchs, lpad, rpad, upad, dpad, plpad, prpad, pupad, pdpad, orows, ocols, weights, output, bias, input, no_bias, no_pool, downsample, wrot180, input_dilated, activation, trans_output_1203, trans_weight_1203, trans_weight_0132, trans_input_3120, max_pixels_per_row, dw) \ +#define gemmini_loop_conv_ws(batch_size, in_dim, in_channels, out_channels, out_dim, pool_out_dim, stride, padding, padding_value, kernel_dim, kernel_dilation, pool_size, pool_stride, pool_padding, batches, porows, pocols, pochs, krows, kcols, kchs, lpad, rpad, upad, dpad, plpad, prpad, pupad, pdpad, orows, ocols, weights, output, bias, input, no_bias, no_pool, downsample, wrot180, input_dilated, activation, trans_output_1203, trans_weight_1203, trans_weight_0132, trans_input_3120, max_pixels_per_row, dw) \ { \ ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(out_channels) << 48) | ((uint64_t)(in_channels) << 32) | ((uint64_t)(in_dim) << 16) | (uint64_t)(batch_size), \ ((uint64_t)(padding) << 48) | ((uint64_t)(stride) << 32) | ((uint64_t)(pool_out_dim) << 16) | (uint64_t)(out_dim), k_LOOP_CONV_WS_CONFIG_1) \ @@ -365,7 +368,7 @@ static void counter_reset() { output, k_LOOP_CONV_WS_CONFIG_5) \ ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, bias, \ input, k_LOOP_CONV_WS_CONFIG_6) \ - ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(max_pixels_per_row) << 8) | ((dw) << 6) | ((trans_input_3120) << 5) | ((trans_weight_0132) << 4) | ((trans_weight_1203) << 3) | ((trans_output_1203) << 2) | ((wrot180) << 1) | (no_bias), \ + ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(padding_value) << 48) | ((uint64_t)(max_pixels_per_row) << 8) | ((dw) << 6) | ((trans_input_3120) << 5) | ((trans_weight_0132) << 4) | ((trans_weight_1203) << 3) | ((trans_output_1203) << 2) | ((wrot180) << 1) | (no_bias), \ ((activation) << 3)| ((input_dilated) << 2) | ((downsample) << 1) | (no_pool), \ k_LOOP_CONV_WS) \ } @@ -1327,7 +1330,7 @@ static void sp_tiled_conv( int batch_size, int in_dim, int in_channels, int out_channels, int out_dim, int pool_out_dim, - int stride, int padding, int kernel_dim, int kernel_dilation, + int stride, int padding, elem_t padding_value, int kernel_dim, int kernel_dilation, int pool_size, int pool_stride, int pool_padding, @@ -1412,7 +1415,7 @@ static void sp_tiled_conv( C_sp_addr_row = (C_sp_addr_row + ACC_ROWS / 2) % ACC_ROWS; } - gemmini_loop_conv_ws(batch_size, in_dim, in_channels, out_channels, out_dim, pool_out_dim, stride, padding, kernel_dim, kernel_dilation, pool_size, pool_stride, pool_padding, batches, porows, pocols, pochs, krows, kcols, kchs, lpad, rpad, upad, dpad, plpad, prpad, pupad, pdpad, orows, ocols, weights, output, bias, input, no_bias, no_pool, downsample, wrot180, input_dilated, act, trans_output_1203, trans_weight_1203, trans_weight_0132, trans_input_3120, max_pixels_per_row, dw); + gemmini_loop_conv_ws(batch_size, in_dim, in_channels, out_channels, out_dim, pool_out_dim, stride, padding, padding_value, kernel_dim, kernel_dilation, pool_size, pool_stride, pool_padding, batches, porows, pocols, pochs, krows, kcols, kchs, lpad, rpad, upad, dpad, plpad, prpad, pupad, pdpad, orows, ocols, weights, output, bias, input, no_bias, no_pool, downsample, wrot180, input_dilated, act, trans_output_1203, trans_weight_1203, trans_weight_0132, trans_input_3120, max_pixels_per_row, dw); /* // mvin bias @@ -1796,7 +1799,7 @@ static int tiled_conv_total_spad_rows(bool acc, static void conv_cpu_without_pool( int batch_size, int in_dim, int in_channels, int out_channels, int out_dim, - int stride, int input_dilation, int kernel_dilation, int padding, int kernel_dim, + int stride, int input_dilation, int kernel_dilation, int padding, elem_t padding_value, int kernel_dim, bool wrot180, bool trans_output_1203, bool trans_input_3120, bool trans_weight_1203, bool trans_weight_0132, @@ -1836,7 +1839,7 @@ static void conv_cpu_without_pool( } elem_t ipixel = irow < 0 || irow >= in_dim || icol < 0 || icol >= in_dim ? - 0 : *in; + padding_value : *in; const int krow_ = wrot180 ? kernel_dim - krow - 1 : krow; const int kcol_ = wrot180 ? kernel_dim - kcol - 1 : kcol; @@ -1871,7 +1874,7 @@ static void conv_cpu_without_pool( static void conv_dw_cpu_without_pool( int batch_size, int in_dim, int channels, int out_dim, - int stride, int padding, int kernel_dim, + int stride, int padding, elem_t padding_value, int kernel_dim, const elem_t * input, const elem_t * weights, @@ -1897,7 +1900,7 @@ static void conv_dw_cpu_without_pool( const elem_t * in = input + (b * in_dim * in_dim + irow * in_dim + icol) * channels + ch; const elem_t ipixel = irow < 0 || irow >= in_dim || icol < 0 || icol >= in_dim ? - 0 : *in; + padding_value : *in; const elem_t weight = *(weights + (ch * kernel_dim + krow) * kernel_dim + kcol); @@ -1918,7 +1921,7 @@ static void conv_dw_cpu_without_pool( static void conv_cpu( int batch_size, int in_dim, int in_channels, int out_channels, int out_dim, - int stride, int input_dilation, int kernel_dilation, int padding, int kernel_dim, + int stride, int input_dilation, int kernel_dilation, int padding, elem_t padding_value, int kernel_dim, bool wrot180, bool trans_output_1203, bool trans_input_3120, bool trans_weight_1203, bool trans_weight_0132, @@ -1935,7 +1938,7 @@ static void conv_cpu( conv_cpu_without_pool( batch_size, in_dim, in_channels, out_channels, out_dim, - stride, input_dilation, kernel_dilation, padding, kernel_dim, + stride, input_dilation, kernel_dilation, padding, padding_value, kernel_dim, wrot180, trans_output_1203, trans_input_3120, trans_weight_1203, trans_weight_0132, input, weights, bias, output, @@ -1989,7 +1992,7 @@ static void conv_cpu( } elem_t ipixel = irow < 0 || irow >= in_dim || icol < 0 || icol >= in_dim ? - 0 : *in; + padding_value : *in; const int krow_ = wrot180 ? kernel_dim - krow - 1 : krow; const int kcol_ = wrot180 ? kernel_dim - kcol - 1 : kcol; @@ -2035,7 +2038,7 @@ static void conv_cpu( static void conv_dw_cpu( int batch_size, int in_dim, int channels, int out_dim, - int stride, int padding, int kernel_dim, + int stride, int padding, elem_t padding_value, int kernel_dim, const elem_t * input, const elem_t * weights, @@ -2049,7 +2052,7 @@ static void conv_dw_cpu( if (no_pool) { conv_dw_cpu_without_pool( batch_size, in_dim, channels, out_dim, - stride, padding, kernel_dim, + stride, padding, padding_value, kernel_dim, input, weights, bias, output, act, scale); return; @@ -2090,7 +2093,7 @@ static void conv_dw_cpu( const elem_t * in = input + (b * in_dim * in_dim + irow * in_dim + icol) * channels + ch; elem_t ipixel = irow < 0 || irow >= in_dim || icol < 0 || icol >= in_dim ? - 0 : *in; + padding_value : *in; const elem_t weight = *(weights + (ch * kernel_dim + krow) * kernel_dim + kcol); @@ -2122,7 +2125,7 @@ static void conv_dw_cpu( static void tiled_conv( int batch_size, int in_dim, int in_channels, int out_channels, int out_dim, - int stride, int input_dilation, int kernel_dilation, int padding, int kernel_dim, + int stride, int input_dilation, int kernel_dilation, int padding, elem_t padding_value, int kernel_dim, bool wrot180, bool trans_output_1203, bool trans_input_3120, bool trans_weight_1203, bool trans_weight_0132, @@ -2155,7 +2158,7 @@ static void tiled_conv( conv_cpu( batch_size, in_dim, in_channels, out_channels, out_dim, - stride, input_dilation, kernel_dilation, padding, kernel_dim, + stride, input_dilation, kernel_dilation, padding, padding_value, kernel_dim, wrot180, trans_output_1203, trans_input_3120, trans_weight_1203, trans_weight_0132, input, weights, bias, output, @@ -2333,7 +2336,7 @@ static void tiled_conv( batch_size, in_dim, in_channels, out_channels, out_dim, pool_out_dim, - stride, padding, kernel_dim, kernel_dilation, + stride, padding, padding_value, kernel_dim, kernel_dilation, pool_size, pool_stride, pool_padding, @@ -2369,7 +2372,7 @@ static void tiled_conv( static void tiled_conv_dw( int batch_size, int in_dim, int channels, int out_dim, - int stride, int padding, int kernel_dim, + int stride, int padding, elem_t padding_value, int kernel_dim, int batches, int porows, int pocols, @@ -2392,7 +2395,7 @@ static void tiled_conv_dw( conv_dw_cpu( batch_size, in_dim, channels, out_dim, - stride, padding, kernel_dim, + stride, padding, padding_value, kernel_dim, input, weights, bias, output, act, scale, pool_size, pool_stride, pool_padding); @@ -2511,7 +2514,7 @@ static void tiled_conv_dw( batch_size, in_dim, channels, channels, out_dim, pool_out_dim, - stride, padding, kernel_dim, 1, + stride, padding, padding_value, kernel_dim, 1, pool_size, pool_stride, pool_padding, @@ -2547,7 +2550,7 @@ static void tiled_conv_dw( static void tiled_conv_auto( int batch_size, int in_dim, int in_channels, int out_channels, int out_dim, - int stride, int input_dilation, int kernel_dilation, int padding, int kernel_dim, + int stride, int input_dilation, int kernel_dilation, int padding, elem_t padding_value, int kernel_dim, bool wrot180, bool trans_output_1203, bool trans_input_3120, bool trans_weight_1203, bool trans_weight_0132, @@ -2717,7 +2720,7 @@ static void tiled_conv_auto( tiled_conv( batch_size, in_dim, in_channels, out_channels, out_dim, - stride, input_dilation, kernel_dilation, padding, kernel_dim, + stride, input_dilation, kernel_dilation, padding, padding_value, kernel_dim, wrot180, trans_output_1203, trans_input_3120, trans_weight_1203, trans_weight_0132, @@ -2783,7 +2786,7 @@ static void tiled_conv_downsample( //for mobilenet's depthwise convs static void tiled_conv_dw_auto( int batch_size, int in_dim, int channels, int out_dim, - int stride, int padding, int kernel_dim, + int stride, int padding, elem_t padding_value, int kernel_dim, elem_t * input, elem_t * weights, @@ -2943,7 +2946,7 @@ static void tiled_conv_dw_auto( tiled_conv_dw( batch_size, in_dim, channels, out_dim, - stride, padding, kernel_dim, + stride, padding, padding_value, kernel_dim, batches, orows, ocols, From 1a9ff7e9002bffe02db2aebccbaa24a878fd1107 Mon Sep 17 00:00:00 2001 From: Federico Peccia Date: Thu, 1 Jun 2023 10:51:21 +0200 Subject: [PATCH 2/2] Fix for rectangular convolutions with non zero padding --- include/gemmini.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/gemmini.h b/include/gemmini.h index 5fffc5f5..c28dafa7 100644 --- a/include/gemmini.h +++ b/include/gemmini.h @@ -1473,7 +1473,7 @@ static void sp_tiled_conv( ichs * (irows >> downsample) * (icols >> downsample) : batches * (irows >> downsample) * (icols >> downsample); - gemmini_extended5_config_ld(dram_stride << downsample, MVIN_SCALE_IDENTITY, false, spad_stride, max_pixels_per_row, 0); + gemmini_extended6_config_ld(dram_stride << downsample, MVIN_SCALE_IDENTITY, false, spad_stride, max_pixels_per_row, padding_value, 0); const int b_it = trans_input_3120 ? max_chs_per_mvin : 1; const int ich_it = trans_input_3120 ? 1 : max_chs_per_mvin;