diff --git a/bareMetalC/conv.c b/bareMetalC/conv.c index 4ace514a..82bb32cd 100644 --- a/bareMetalC/conv.c +++ b/bareMetalC/conv.c @@ -228,7 +228,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, false, false, false, false, (elem_t*)input, diff --git a/bareMetalC/conv_dw.c b/bareMetalC/conv_dw.c index a0b00217..dcfa646a 100644 --- a/bareMetalC/conv_dw.c +++ b/bareMetalC/conv_dw.c @@ -120,7 +120,7 @@ int main() { #ifndef FAST tiled_conv_dw_auto(BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, PADDING, KERNEL_DIM, + STRIDE, PADDING, 0, KERNEL_DIM, (elem_t*)input, (elem_t*)weights, @@ -140,7 +140,7 @@ int main() { uint64_t start_gemmini = read_cycles(); tiled_conv_dw_auto(BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, PADDING, KERNEL_DIM, + STRIDE, PADDING, 0, KERNEL_DIM, (elem_t*)input, (elem_t*)weights, diff --git a/bareMetalC/conv_dw_perf.c b/bareMetalC/conv_dw_perf.c index d516cce5..1373d070 100644 --- a/bareMetalC/conv_dw_perf.c +++ b/bareMetalC/conv_dw_perf.c @@ -85,7 +85,7 @@ int main (int argc, char * argv[]) { uint64_t start_gemmini = read_cycles(); tiled_conv_dw_auto(BATCH_SIZE, IN_DIM, IN_DIM, CHANNELS, OUT_DIM, OUT_DIM, - STRIDE, PADDING, KERNEL_DIM, + STRIDE, PADDING, 0, KERNEL_DIM, (elem_t*)input, (elem_t*)weights, diff --git a/bareMetalC/conv_first_layer.c b/bareMetalC/conv_first_layer.c index 47e3c2af..7303d17e 100644 --- a/bareMetalC/conv_first_layer.c +++ b/bareMetalC/conv_first_layer.c @@ -229,7 +229,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, false, false, false, false, (elem_t*)input, diff --git a/bareMetalC/conv_perf.c b/bareMetalC/conv_perf.c index bacb09c4..a1278f66 100644 --- a/bareMetalC/conv_perf.c +++ b/bareMetalC/conv_perf.c @@ -114,7 +114,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, OUT_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, false, false, false, false, (elem_t*)input, diff --git a/bareMetalC/conv_trans_input_3120.c b/bareMetalC/conv_trans_input_3120.c index f909682c..8534816e 100644 --- a/bareMetalC/conv_trans_input_3120.c +++ b/bareMetalC/conv_trans_input_3120.c @@ -163,7 +163,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, TRANS_INPUT_3120, TRANS_WEIGHT_1203, TRANS_WEIGHT_0132, (elem_t*)input, @@ -183,7 +183,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, TRANS_INPUT_3120, TRANS_WEIGHT_1203, TRANS_WEIGHT_0132, (elem_t*)input, diff --git a/bareMetalC/conv_trans_input_3120_with_kernel_dilation.c b/bareMetalC/conv_trans_input_3120_with_kernel_dilation.c index e6d45f55..86e07853 100644 --- a/bareMetalC/conv_trans_input_3120_with_kernel_dilation.c +++ b/bareMetalC/conv_trans_input_3120_with_kernel_dilation.c @@ -165,7 +165,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, 1, KERNEL_DILATION, PADDING, KERNEL_DIM, + STRIDE, 1, KERNEL_DILATION, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, TRANS_INPUT_3120, TRANS_WEIGHT_1203, TRANS_WEIGHT_0132, (elem_t*)input, @@ -185,7 +185,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, 1, KERNEL_DILATION, PADDING, KERNEL_DIM, + STRIDE, 1, KERNEL_DILATION, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, TRANS_INPUT_3120, TRANS_WEIGHT_1203, TRANS_WEIGHT_0132, (elem_t*)input, diff --git a/bareMetalC/conv_trans_output_1203.c b/bareMetalC/conv_trans_output_1203.c index 652da5e2..99b2ec6e 100644 --- a/bareMetalC/conv_trans_output_1203.c +++ b/bareMetalC/conv_trans_output_1203.c @@ -160,7 +160,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, false, false, false, (elem_t*)input, @@ -180,7 +180,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, false, false, false, (elem_t*)input, diff --git a/bareMetalC/conv_trans_weight_0132.c b/bareMetalC/conv_trans_weight_0132.c index b5bb31f6..7ff1b72c 100644 --- a/bareMetalC/conv_trans_weight_0132.c +++ b/bareMetalC/conv_trans_weight_0132.c @@ -162,7 +162,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, false, TRANS_WEIGHT_1203, TRANS_WEIGHT_0132, (elem_t*)input, @@ -182,7 +182,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, false, TRANS_WEIGHT_1203, TRANS_WEIGHT_0132, (elem_t*)input, diff --git a/bareMetalC/conv_trans_weight_1203.c b/bareMetalC/conv_trans_weight_1203.c index 6d53f944..4564d64a 100644 --- a/bareMetalC/conv_trans_weight_1203.c +++ b/bareMetalC/conv_trans_weight_1203.c @@ -161,7 +161,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, false, TRANS_WEIGHT_1203, false, (elem_t*)input, @@ -181,7 +181,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, TRANS_OUTPUT_1203, false, TRANS_WEIGHT_1203, false, (elem_t*)input, diff --git a/bareMetalC/conv_with_input_dilation.c b/bareMetalC/conv_with_input_dilation.c index f667fdff..d144827c 100644 --- a/bareMetalC/conv_with_input_dilation.c +++ b/bareMetalC/conv_with_input_dilation.c @@ -263,7 +263,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, INPUT_DILATION, 1, PADDING, KERNEL_DIM, + STRIDE, INPUT_DILATION, 1, PADDING, 0, KERNEL_DIM, false, false, false, false, false, (elem_t*)input, diff --git a/bareMetalC/conv_with_input_dilation_and_neg_padding.c b/bareMetalC/conv_with_input_dilation_and_neg_padding.c index 2258618d..dafadfa4 100644 --- a/bareMetalC/conv_with_input_dilation_and_neg_padding.c +++ b/bareMetalC/conv_with_input_dilation_and_neg_padding.c @@ -255,7 +255,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, INPUT_DILATION, 1, PADDING, KERNEL_DIM, + STRIDE, INPUT_DILATION, 1, PADDING, 0, KERNEL_DIM, false, false, false, false, false, (elem_t*)input, diff --git a/bareMetalC/conv_with_input_dilation_and_rot180.c b/bareMetalC/conv_with_input_dilation_and_rot180.c index c1fbdf3e..b3ff1ce7 100644 --- a/bareMetalC/conv_with_input_dilation_and_rot180.c +++ b/bareMetalC/conv_with_input_dilation_and_rot180.c @@ -269,7 +269,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, INPUT_DILATION, 1, PADDING, KERNEL_DIM, + STRIDE, INPUT_DILATION, 1, PADDING, 0, KERNEL_DIM, WROT180, false, false, false, false, (elem_t*)input, diff --git a/bareMetalC/conv_with_kernel_dilation.c b/bareMetalC/conv_with_kernel_dilation.c index 8ffeab75..d8217eaf 100644 --- a/bareMetalC/conv_with_kernel_dilation.c +++ b/bareMetalC/conv_with_kernel_dilation.c @@ -276,7 +276,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, INPUT_DILATION, KERNEL_DILATION, PADDING, KERNEL_DIM, + STRIDE, INPUT_DILATION, KERNEL_DILATION, PADDING, 0, KERNEL_DIM, false, false, false, false, false, (elem_t*)input, diff --git a/bareMetalC/conv_with_pool.c b/bareMetalC/conv_with_pool.c index f896c4a6..3efbb226 100644 --- a/bareMetalC/conv_with_pool.c +++ b/bareMetalC/conv_with_pool.c @@ -288,7 +288,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, false, false, false, false, // 1, diff --git a/bareMetalC/conv_with_rot180.c b/bareMetalC/conv_with_rot180.c index ccbec995..13bb19b1 100644 --- a/bareMetalC/conv_with_rot180.c +++ b/bareMetalC/conv_with_rot180.c @@ -269,7 +269,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_ROW_DIM, IN_COL_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_ROW_DIM, OUT_COL_DIM, - STRIDE, INPUT_DILATION, 1, PADDING, KERNEL_DIM, + STRIDE, INPUT_DILATION, 1, PADDING, 0, KERNEL_DIM, WROT180, false, false, false, false, (elem_t*)input, diff --git a/gemmini-data-collection/templates/conv_template.c b/gemmini-data-collection/templates/conv_template.c index 49df0aa1..de65da02 100644 --- a/gemmini-data-collection/templates/conv_template.c +++ b/gemmini-data-collection/templates/conv_template.c @@ -201,7 +201,7 @@ int main() { tiled_conv_auto( BATCH_SIZE, IN_DIM, IN_CHANNELS, OUT_CHANNELS, OUT_DIM, - STRIDE, 1, 1, PADDING, KERNEL_DIM, + STRIDE, 1, 1, PADDING, 0, KERNEL_DIM, false, false, false, false, false, (elem_t*)input, diff --git a/imagenet/alexnet.c b/imagenet/alexnet.c index 18268397..b2d6ed7b 100644 --- a/imagenet/alexnet.c +++ b/imagenet/alexnet.c @@ -104,7 +104,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_1_params.batch_size, conv_1_params.in_dim, conv_1_params.in_channels, conv_1_params.out_channels, conv_1_params.out_dim, - conv_1_params.stride, conv_1_params.padding, conv_1_params.kernel_size, + conv_1_params.stride, conv_1_params.padding, 0, conv_1_params.kernel_size, (elem_t*)images, (elem_t*)conv_1_w, (acc_t*)conv_1_b, (elem_t*)conv_1_out_pooled, @@ -156,7 +156,7 @@ int main (int argc, char * argv[]) { tiled_conv_auto( conv_2_params.batch_size, conv_2_params.in_dim, conv_2_params.in_channels, conv_2_params.out_channels, conv_2_params.out_dim, - conv_2_params.stride, conv_2_params.padding, conv_2_params.kernel_size, + conv_2_params.stride, conv_2_params.padding, 0, conv_2_params.kernel_size, (elem_t*)conv_1_out_pooled, (elem_t*)conv_2_w, (acc_t*)conv_2_b, (elem_t*)conv_2_out_pooled, diff --git a/imagenet/mobilenet.c b/imagenet/mobilenet.c index a88292be..1f16e1f1 100644 --- a/imagenet/mobilenet.c +++ b/imagenet/mobilenet.c @@ -97,7 +97,7 @@ int main (int argc, char * argv[]) { conv_1_params.batch_size, conv_1_params.in_row_dim, conv_1_params.in_col_dim, conv_1_params.in_channels, conv_1_params.out_channels, conv_1_params.out_row_dim, conv_1_params.out_col_dim, - conv_1_params.stride, 1, 1, conv_1_params.padding, conv_1_params.kernel_size, + conv_1_params.stride, 1, 1, conv_1_params.padding, 0, conv_1_params.kernel_size, false, false, false, false, false, (elem_t*)images, (elem_t*)conv_1_w, (acc_t*)conv_1_b, (elem_t*)conv_1_out, @@ -126,7 +126,7 @@ int main (int argc, char * argv[]) { conv_dw_2_params.batch_size, conv_dw_2_params.in_row_dim, conv_dw_2_params.in_col_dim, conv_dw_2_params.in_channels, conv_dw_2_params.out_row_dim, conv_dw_2_params.out_col_dim, - conv_dw_2_params.stride, conv_dw_2_params.padding, conv_dw_2_params.kernel_size, + conv_dw_2_params.stride, conv_dw_2_params.padding, 0, conv_dw_2_params.kernel_size, (elem_t*)conv_1_out, (elem_t*)conv_dw_2_w, (acc_t*)conv_dw_2_b, (elem_t*)conv_dw_2_out, @@ -206,7 +206,7 @@ int main (int argc, char * argv[]) { conv_dw_5_params.batch_size, conv_dw_5_params.in_row_dim, conv_dw_5_params.in_col_dim, conv_dw_5_params.in_channels, conv_dw_5_params.out_row_dim, conv_dw_5_params.out_col_dim, - conv_dw_5_params.stride, conv_dw_5_params.padding, conv_dw_5_params.kernel_size, + conv_dw_5_params.stride, conv_dw_5_params.padding, 0, conv_dw_5_params.kernel_size, (elem_t*)conv_4_out, (elem_t*)conv_dw_5_w, (acc_t*)conv_dw_5_b, (elem_t*)conv_dw_5_out, @@ -287,7 +287,7 @@ int main (int argc, char * argv[]) { conv_dw_8_params.batch_size, conv_dw_8_params.in_row_dim, conv_dw_8_params.in_col_dim, conv_dw_8_params.in_channels, conv_dw_8_params.out_row_dim, conv_dw_8_params.out_col_dim, - conv_dw_8_params.stride, conv_dw_8_params.padding, conv_dw_8_params.kernel_size, + conv_dw_8_params.stride, conv_dw_8_params.padding, 0, conv_dw_8_params.kernel_size, (elem_t*)conv_7_out, (elem_t*)conv_dw_8_w, (acc_t*)conv_dw_8_b, (elem_t*)conv_dw_8_out, @@ -383,7 +383,7 @@ int main (int argc, char * argv[]) { conv_dw_11_params.batch_size, conv_dw_11_params.in_row_dim, conv_dw_11_params.in_col_dim, conv_dw_11_params.in_channels, conv_dw_11_params.out_row_dim, conv_dw_11_params.out_col_dim, - conv_dw_11_params.stride, conv_dw_11_params.padding, conv_dw_11_params.kernel_size, + conv_dw_11_params.stride, conv_dw_11_params.padding, 0, conv_dw_11_params.kernel_size, (elem_t*)conv_10_out, (elem_t*)conv_dw_11_w, (acc_t*)conv_dw_11_b, (elem_t*)conv_dw_11_out, @@ -463,7 +463,7 @@ int main (int argc, char * argv[]) { conv_dw_14_params.batch_size, conv_dw_14_params.in_row_dim, conv_dw_14_params.in_col_dim, conv_dw_14_params.in_channels, conv_dw_14_params.out_row_dim, conv_dw_14_params.out_col_dim, - conv_dw_14_params.stride, conv_dw_14_params.padding, conv_dw_14_params.kernel_size, + conv_dw_14_params.stride, conv_dw_14_params.padding, 0, conv_dw_14_params.kernel_size, (elem_t*)conv_13_out, (elem_t*)conv_dw_14_w, (acc_t*)conv_dw_14_b, (elem_t*)conv_dw_14_out, @@ -559,7 +559,7 @@ int main (int argc, char * argv[]) { conv_dw_17_params.batch_size, conv_dw_17_params.in_row_dim, conv_dw_17_params.in_col_dim, conv_dw_17_params.in_channels, conv_dw_17_params.out_row_dim, conv_dw_17_params.out_col_dim, - conv_dw_17_params.stride, conv_dw_17_params.padding, conv_dw_17_params.kernel_size, + conv_dw_17_params.stride, conv_dw_17_params.padding, 0, conv_dw_17_params.kernel_size, (elem_t*)conv_16_out, (elem_t*)conv_dw_17_w, (acc_t*)conv_dw_17_b, (elem_t*)conv_dw_17_out, @@ -655,7 +655,7 @@ int main (int argc, char * argv[]) { conv_dw_20_params.batch_size, conv_dw_20_params.in_row_dim, conv_dw_20_params.in_col_dim, conv_dw_20_params.in_channels, conv_dw_20_params.out_row_dim, conv_dw_20_params.out_col_dim, - conv_dw_20_params.stride, conv_dw_20_params.padding, conv_dw_20_params.kernel_size, + conv_dw_20_params.stride, conv_dw_20_params.padding, 0, conv_dw_20_params.kernel_size, (elem_t*)conv_19_out, (elem_t*)conv_dw_20_w, (acc_t*)conv_dw_20_b, (elem_t*)conv_dw_20_out, @@ -734,7 +734,7 @@ int main (int argc, char * argv[]) { conv_dw_23_params.batch_size, conv_dw_23_params.in_row_dim, conv_dw_23_params.in_col_dim, conv_dw_23_params.in_channels, conv_dw_23_params.out_row_dim, conv_dw_23_params.out_col_dim, - conv_dw_23_params.stride, conv_dw_23_params.padding, conv_dw_23_params.kernel_size, + conv_dw_23_params.stride, conv_dw_23_params.padding, 0, conv_dw_23_params.kernel_size, (elem_t*)conv_22_out, (elem_t*)conv_dw_23_w, (acc_t*)conv_dw_23_b, (elem_t*)conv_dw_23_out, @@ -830,7 +830,7 @@ int main (int argc, char * argv[]) { conv_dw_26_params.batch_size, conv_dw_26_params.in_row_dim, conv_dw_26_params.in_col_dim, conv_dw_26_params.in_channels, conv_dw_26_params.out_row_dim, conv_dw_26_params.out_col_dim, - conv_dw_26_params.stride, conv_dw_26_params.padding, conv_dw_26_params.kernel_size, + conv_dw_26_params.stride, conv_dw_26_params.padding, 0, conv_dw_26_params.kernel_size, (elem_t*)conv_25_out, (elem_t*)conv_dw_26_w, (acc_t*)conv_dw_26_b, (elem_t*)conv_dw_26_out, @@ -926,7 +926,7 @@ int main (int argc, char * argv[]) { conv_dw_29_params.batch_size, conv_dw_29_params.in_row_dim, conv_dw_29_params.in_col_dim, conv_dw_29_params.in_channels, conv_dw_29_params.out_row_dim, conv_dw_29_params.out_col_dim, - conv_dw_29_params.stride, conv_dw_29_params.padding, conv_dw_29_params.kernel_size, + conv_dw_29_params.stride, conv_dw_29_params.padding, 0, conv_dw_29_params.kernel_size, (elem_t*)conv_28_out, (elem_t*)conv_dw_29_w, (acc_t*)conv_dw_29_b, (elem_t*)conv_dw_29_out, @@ -1022,7 +1022,7 @@ int main (int argc, char * argv[]) { conv_dw_32_params.batch_size, conv_dw_32_params.in_row_dim, conv_dw_32_params.in_col_dim, conv_dw_32_params.in_channels, conv_dw_32_params.out_row_dim, conv_dw_32_params.out_col_dim, - conv_dw_32_params.stride, conv_dw_32_params.padding, conv_dw_32_params.kernel_size, + conv_dw_32_params.stride, conv_dw_32_params.padding, 0, conv_dw_32_params.kernel_size, (elem_t*)conv_31_out, (elem_t*)conv_dw_32_w, (acc_t*)conv_dw_32_b, (elem_t*)conv_dw_32_out, @@ -1102,7 +1102,7 @@ int main (int argc, char * argv[]) { conv_dw_35_params.batch_size, conv_dw_35_params.in_row_dim, conv_dw_35_params.in_col_dim, conv_dw_35_params.in_channels, conv_dw_35_params.out_row_dim, conv_dw_35_params.out_col_dim, - conv_dw_35_params.stride, conv_dw_35_params.padding, conv_dw_35_params.kernel_size, + conv_dw_35_params.stride, conv_dw_35_params.padding, 0, conv_dw_35_params.kernel_size, (elem_t*)conv_34_out, (elem_t*)conv_dw_35_w, (acc_t*)conv_dw_35_b, (elem_t*)conv_dw_35_out, @@ -1198,7 +1198,7 @@ int main (int argc, char * argv[]) { conv_dw_38_params.batch_size, conv_dw_38_params.in_row_dim, conv_dw_38_params.in_col_dim, conv_dw_38_params.in_channels, conv_dw_38_params.out_row_dim, conv_dw_38_params.out_col_dim, - conv_dw_38_params.stride, conv_dw_38_params.padding, conv_dw_38_params.kernel_size, + conv_dw_38_params.stride, conv_dw_38_params.padding, 0, conv_dw_38_params.kernel_size, (elem_t*)conv_37_out, (elem_t*)conv_dw_38_w, (acc_t*)conv_dw_38_b, (elem_t*)conv_dw_38_out, @@ -1294,7 +1294,7 @@ int main (int argc, char * argv[]) { conv_dw_41_params.batch_size, conv_dw_41_params.in_row_dim, conv_dw_41_params.in_col_dim, conv_dw_41_params.in_channels, conv_dw_41_params.out_row_dim, conv_dw_41_params.out_col_dim, - conv_dw_41_params.stride, conv_dw_41_params.padding, conv_dw_41_params.kernel_size, + conv_dw_41_params.stride, conv_dw_41_params.padding, 0, conv_dw_41_params.kernel_size, (elem_t*)conv_40_out, (elem_t*)conv_dw_41_w, (acc_t*)conv_dw_41_b, (elem_t*)conv_dw_41_out, @@ -1374,7 +1374,7 @@ int main (int argc, char * argv[]) { conv_dw_44_params.batch_size, conv_dw_44_params.in_row_dim, conv_dw_44_params.in_col_dim, conv_dw_44_params.in_channels, conv_dw_44_params.out_row_dim, conv_dw_44_params.out_col_dim, - conv_dw_44_params.stride, conv_dw_44_params.padding, conv_dw_44_params.kernel_size, + conv_dw_44_params.stride, conv_dw_44_params.padding, 0, conv_dw_44_params.kernel_size, (elem_t*)conv_43_out, (elem_t*)conv_dw_44_w, (acc_t*)conv_dw_44_b, (elem_t*)conv_dw_44_out, @@ -1470,7 +1470,7 @@ int main (int argc, char * argv[]) { conv_dw_47_params.batch_size, conv_dw_47_params.in_row_dim, conv_dw_47_params.in_col_dim, conv_dw_47_params.in_channels, conv_dw_47_params.out_row_dim, conv_dw_47_params.out_col_dim, - conv_dw_47_params.stride, conv_dw_47_params.padding, conv_dw_47_params.kernel_size, + conv_dw_47_params.stride, conv_dw_47_params.padding, 0, conv_dw_47_params.kernel_size, (elem_t*)conv_46_out, (elem_t*)conv_dw_47_w, (acc_t*)conv_dw_47_b, (elem_t*)conv_dw_47_out, @@ -1566,7 +1566,7 @@ int main (int argc, char * argv[]) { conv_dw_50_params.batch_size, conv_dw_50_params.in_row_dim, conv_dw_50_params.in_col_dim, conv_dw_50_params.in_channels, conv_dw_50_params.out_row_dim, conv_dw_50_params.out_col_dim, - conv_dw_50_params.stride, conv_dw_50_params.padding, conv_dw_50_params.kernel_size, + conv_dw_50_params.stride, conv_dw_50_params.padding, 0, conv_dw_50_params.kernel_size, (elem_t*)conv_49_out, (elem_t*)conv_dw_50_w, (acc_t*)conv_dw_50_b, (elem_t*)conv_dw_50_out, diff --git a/imagenet/resnet50.c b/imagenet/resnet50.c index 4bbd7849..f92e6400 100644 --- a/imagenet/resnet50.c +++ b/imagenet/resnet50.c @@ -109,7 +109,7 @@ int main (int argc, char * argv[]) { conv_1_params.batch_size, conv_1_params.in_row_dim, conv_1_params.in_col_dim, conv_1_params.in_channels, conv_1_params.out_channels, conv_1_params.out_row_dim, conv_1_params.out_col_dim, - conv_1_params.stride, 1, 1, conv_1_params.padding, conv_1_params.kernel_size, + conv_1_params.stride, 1, 1, conv_1_params.padding, 0, conv_1_params.kernel_size, false, false, false, false, false, (elem_t*)images, (elem_t*)conv_1_w, (acc_t*)conv_1_b, (elem_t*)conv_1_out_pooled, @@ -187,7 +187,7 @@ int main (int argc, char * argv[]) { conv_3_params.batch_size, conv_3_params.in_row_dim, conv_3_params.in_col_dim, conv_3_params.in_channels, conv_3_params.out_channels, conv_3_params.out_row_dim, conv_3_params.out_col_dim, - conv_3_params.stride, 1, 1, conv_3_params.padding, conv_3_params.kernel_size, + conv_3_params.stride, 1, 1, conv_3_params.padding, 0, conv_3_params.kernel_size, false, false, false, false, false, (elem_t*)conv_2_out, (elem_t*)conv_3_w, (acc_t*)conv_3_b, (elem_t*)conv_3_out, @@ -332,7 +332,7 @@ int main (int argc, char * argv[]) { conv_7_params.batch_size, conv_7_params.in_row_dim, conv_7_params.in_col_dim, conv_7_params.in_channels, conv_7_params.out_channels, conv_7_params.out_row_dim, conv_7_params.out_col_dim, - conv_7_params.stride, 1, 1, conv_7_params.padding, conv_7_params.kernel_size, + conv_7_params.stride, 1, 1, conv_7_params.padding, 0, conv_7_params.kernel_size, false, false, false, false, false, (elem_t*)conv_6_out, (elem_t*)conv_7_w, (acc_t*)conv_7_b, (elem_t*)conv_7_out, @@ -441,7 +441,7 @@ int main (int argc, char * argv[]) { conv_10_params.batch_size, conv_10_params.in_row_dim, conv_10_params.in_col_dim, conv_10_params.in_channels, conv_10_params.out_channels, conv_10_params.out_row_dim, conv_10_params.out_col_dim, - conv_10_params.stride, 1, 1, conv_10_params.padding, conv_10_params.kernel_size, + conv_10_params.stride, 1, 1, conv_10_params.padding, 0, conv_10_params.kernel_size, false, false, false, false, false, (elem_t*)conv_9_out, (elem_t*)conv_10_w, (acc_t*)conv_10_b, (elem_t*)conv_10_out, @@ -550,7 +550,7 @@ int main (int argc, char * argv[]) { conv_13_params.batch_size, conv_13_params.in_row_dim, conv_13_params.in_col_dim, conv_13_params.in_channels, conv_13_params.out_channels, conv_13_params.out_row_dim, conv_13_params.out_col_dim, - conv_13_params.stride, 1, 1, conv_13_params.padding, conv_13_params.kernel_size, + conv_13_params.stride, 1, 1, conv_13_params.padding, 0, conv_13_params.kernel_size, false, false, false, false, false, (elem_t*)conv_12_out, (elem_t*)conv_13_w, (acc_t*)conv_13_b, (elem_t*)conv_13_out, @@ -620,7 +620,7 @@ int main (int argc, char * argv[]) { conv_15_params.batch_size, conv_15_params.in_row_dim, conv_15_params.in_col_dim, conv_15_params.in_channels, conv_15_params.out_channels, conv_15_params.out_row_dim, conv_15_params.out_col_dim, - // conv_15_params.stride, 1, 1, conv_15_params.padding, conv_15_params.kernel_size, + // conv_15_params.stride, 1, 1, conv_15_params.padding, 0, conv_15_params.kernel_size, // false, false, false, false, false, (elem_t*)conv_11_out, (elem_t*)conv_15_w, (acc_t*)conv_15_b, (elem_t*)conv_15_out, @@ -704,7 +704,7 @@ int main (int argc, char * argv[]) { conv_17_params.batch_size, conv_17_params.in_row_dim, conv_17_params.in_col_dim, conv_17_params.in_channels, conv_17_params.out_channels, conv_17_params.out_row_dim, conv_17_params.out_col_dim, - conv_17_params.stride, 1, 1, conv_17_params.padding, conv_17_params.kernel_size, + conv_17_params.stride, 1, 1, conv_17_params.padding, 0, conv_17_params.kernel_size, false, false, false, false, false, (elem_t*)conv_16_out, (elem_t*)conv_17_w, (acc_t*)conv_17_b, (elem_t*)conv_17_out, @@ -813,7 +813,7 @@ int main (int argc, char * argv[]) { conv_20_params.batch_size, conv_20_params.in_row_dim, conv_20_params.in_col_dim, conv_20_params.in_channels, conv_20_params.out_channels, conv_20_params.out_row_dim, conv_20_params.out_col_dim, - conv_20_params.stride, 1, 1, conv_20_params.padding, conv_20_params.kernel_size, + conv_20_params.stride, 1, 1, conv_20_params.padding, 0, conv_20_params.kernel_size, false, false, false, false, false, (elem_t*)conv_19_out, (elem_t*)conv_20_w, (acc_t*)conv_20_b, (elem_t*)conv_20_out, @@ -922,7 +922,7 @@ int main (int argc, char * argv[]) { conv_23_params.batch_size, conv_23_params.in_row_dim, conv_23_params.in_col_dim, conv_23_params.in_channels, conv_23_params.out_channels, conv_23_params.out_row_dim, conv_23_params.out_col_dim, - conv_23_params.stride, 1, 1, conv_23_params.padding, conv_23_params.kernel_size, + conv_23_params.stride, 1, 1, conv_23_params.padding, 0, conv_23_params.kernel_size, false, false, false, false, false, (elem_t*)conv_22_out, (elem_t*)conv_23_w, (acc_t*)conv_23_b, (elem_t*)conv_23_out, @@ -1031,7 +1031,7 @@ int main (int argc, char * argv[]) { conv_26_params.batch_size, conv_26_params.in_row_dim, conv_26_params.in_col_dim, conv_26_params.in_channels, conv_26_params.out_channels, conv_26_params.out_row_dim, conv_26_params.out_col_dim, - conv_26_params.stride, 1, 1, conv_26_params.padding, conv_26_params.kernel_size, + conv_26_params.stride, 1, 1, conv_26_params.padding, 0, conv_26_params.kernel_size, false, false, false, false, false, (elem_t*)conv_25_out, (elem_t*)conv_26_w, (acc_t*)conv_26_b, (elem_t*)conv_26_out, @@ -1101,7 +1101,7 @@ int main (int argc, char * argv[]) { conv_28_params.batch_size, conv_28_params.in_row_dim, conv_28_params.in_col_dim, conv_28_params.in_channels, conv_28_params.out_channels, conv_28_params.out_row_dim, conv_28_params.out_col_dim, - // conv_28_params.stride, 1, 1, conv_28_params.padding, conv_28_params.kernel_size, + // conv_28_params.stride, 1, 1, conv_28_params.padding, 0, conv_28_params.kernel_size, // false, false, false, false, false, (elem_t*)conv_24_out, (elem_t*)conv_28_w, (acc_t*)conv_28_b, (elem_t*)conv_28_out, @@ -1185,7 +1185,7 @@ int main (int argc, char * argv[]) { conv_30_params.batch_size, conv_30_params.in_row_dim, conv_30_params.in_col_dim, conv_30_params.in_channels, conv_30_params.out_channels, conv_30_params.out_row_dim, conv_30_params.out_col_dim, - conv_30_params.stride, 1, 1, conv_30_params.padding, conv_30_params.kernel_size, + conv_30_params.stride, 1, 1, conv_30_params.padding, 0, conv_30_params.kernel_size, false, false, false, false, false, (elem_t*)conv_29_out, (elem_t*)conv_30_w, (acc_t*)conv_30_b, (elem_t*)conv_30_out, @@ -1294,7 +1294,7 @@ int main (int argc, char * argv[]) { conv_33_params.batch_size, conv_33_params.in_row_dim, conv_33_params.in_col_dim, conv_33_params.in_channels, conv_33_params.out_channels, conv_33_params.out_row_dim, conv_33_params.out_col_dim, - conv_33_params.stride, 1, 1, conv_33_params.padding, conv_33_params.kernel_size, + conv_33_params.stride, 1, 1, conv_33_params.padding, 0, conv_33_params.kernel_size, false, false, false, false, false, (elem_t*)conv_32_out, (elem_t*)conv_33_w, (acc_t*)conv_33_b, (elem_t*)conv_33_out, @@ -1403,7 +1403,7 @@ int main (int argc, char * argv[]) { conv_36_params.batch_size, conv_36_params.in_row_dim, conv_36_params.in_col_dim, conv_36_params.in_channels, conv_36_params.out_channels, conv_36_params.out_row_dim, conv_36_params.out_col_dim, - conv_36_params.stride, 1, 1, conv_36_params.padding, conv_36_params.kernel_size, + conv_36_params.stride, 1, 1, conv_36_params.padding, 0, conv_36_params.kernel_size, false, false, false, false, false, (elem_t*)conv_35_out, (elem_t*)conv_36_w, (acc_t*)conv_36_b, (elem_t*)conv_36_out, @@ -1512,7 +1512,7 @@ int main (int argc, char * argv[]) { conv_39_params.batch_size, conv_39_params.in_row_dim, conv_39_params.in_col_dim, conv_39_params.in_channels, conv_39_params.out_channels, conv_39_params.out_row_dim, conv_39_params.out_col_dim, - conv_39_params.stride, 1, 1, conv_39_params.padding, conv_39_params.kernel_size, + conv_39_params.stride, 1, 1, conv_39_params.padding, 0, conv_39_params.kernel_size, false, false, false, false, false, (elem_t*)conv_38_out, (elem_t*)conv_39_w, (acc_t*)conv_39_b, (elem_t*)conv_39_out, @@ -1621,7 +1621,7 @@ int main (int argc, char * argv[]) { conv_42_params.batch_size, conv_42_params.in_row_dim, conv_42_params.in_col_dim, conv_42_params.in_channels, conv_42_params.out_channels, conv_42_params.out_row_dim, conv_42_params.out_col_dim, - conv_42_params.stride, 1, 1, conv_42_params.padding, conv_42_params.kernel_size, + conv_42_params.stride, 1, 1, conv_42_params.padding, 0, conv_42_params.kernel_size, false, false, false, false, false, (elem_t*)conv_41_out, (elem_t*)conv_42_w, (acc_t*)conv_42_b, (elem_t*)conv_42_out, @@ -1730,7 +1730,7 @@ int main (int argc, char * argv[]) { conv_45_params.batch_size, conv_45_params.in_row_dim, conv_45_params.in_col_dim, conv_45_params.in_channels, conv_45_params.out_channels, conv_45_params.out_row_dim, conv_45_params.out_col_dim, - conv_45_params.stride, 1, 1, conv_45_params.padding, conv_45_params.kernel_size, + conv_45_params.stride, 1, 1, conv_45_params.padding, 0, conv_45_params.kernel_size, false, false, false, false, false, (elem_t*)conv_44_out, (elem_t*)conv_45_w, (acc_t*)conv_45_b, (elem_t*)conv_45_out, @@ -1799,7 +1799,7 @@ int main (int argc, char * argv[]) { conv_47_params.batch_size, conv_47_params.in_row_dim, conv_47_params.in_col_dim, conv_47_params.in_channels, conv_47_params.out_channels, conv_47_params.out_row_dim, conv_47_params.out_col_dim, - conv_47_params.stride, 1, 1, conv_47_params.padding, conv_47_params.kernel_size, + conv_47_params.stride, 1, 1, conv_47_params.padding, 0, conv_47_params.kernel_size, false, false, false, false, false, (elem_t*)conv_43_out, (elem_t*)conv_47_w, (acc_t*)conv_47_b, (elem_t*)conv_47_out, @@ -1883,7 +1883,7 @@ int main (int argc, char * argv[]) { conv_49_params.batch_size, conv_49_params.in_row_dim, conv_49_params.in_col_dim, conv_49_params.in_channels, conv_49_params.out_channels, conv_49_params.out_row_dim, conv_49_params.out_col_dim, - conv_49_params.stride, 1, 1, conv_49_params.padding, conv_49_params.kernel_size, + conv_49_params.stride, 1, 1, conv_49_params.padding, 0, conv_49_params.kernel_size, false, false, false, false, false, (elem_t*)conv_48_out, (elem_t*)conv_49_w, (acc_t*)conv_49_b, (elem_t*)conv_49_out, @@ -1992,7 +1992,7 @@ int main (int argc, char * argv[]) { conv_52_params.batch_size, conv_52_params.in_row_dim, conv_52_params.in_col_dim, conv_52_params.in_channels, conv_52_params.out_channels, conv_52_params.out_row_dim, conv_52_params.out_col_dim, - conv_52_params.stride, 1, 1, conv_52_params.padding, conv_52_params.kernel_size, + conv_52_params.stride, 1, 1, conv_52_params.padding, 0, conv_52_params.kernel_size, false, false, false, false, false, (elem_t*)conv_51_out, (elem_t*)conv_52_w, (acc_t*)conv_52_b, (elem_t*)conv_52_out, diff --git a/include/gemmini.h b/include/gemmini.h index 76b50a48..c28dafa7 100644 --- a/include/gemmini.h +++ b/include/gemmini.h @@ -254,8 +254,11 @@ static acc_scale_t_bits acc_scale_t_to_acc_scale_t_bits(acc_scale_t x) { // Note: The "pixel_repeats" parameter below is still experimental, andthere is // a high chance that it will be removed in future releases. +#define gemmini_extended6_config_ld(stride, scale, shrunk, block_mvin_stride, pixel_repeats, padding_value, id) \ + ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(scale_t_to_scale_t_bits(scale)) << 32) | ((uint64_t)(block_mvin_stride) << 16) | ((uint64_t)(pixel_repeats) << 8) | ((id) << 3) | ((shrunk) << 2) | CONFIG_LD, ((uint64_t)(padding_value) << 32) | stride, k_CONFIG) + #define gemmini_extended5_config_ld(stride, scale, shrunk, block_mvin_stride, pixel_repeats, id) \ - ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(scale_t_to_scale_t_bits(scale)) << 32) | ((uint64_t)(block_mvin_stride) << 16) | ((uint64_t)(pixel_repeats) << 8) | ((id) << 3) | ((shrunk) << 2) | CONFIG_LD, stride, k_CONFIG) + gemmini_extended6_config_ld(stride, scale, shrunk, block_mvin_stride, pixel_repeats, 0, id) \ #define gemmini_extended4_config_ld(stride, scale, shrunk, block_mvin_stride, id) \ gemmini_extended5_config_ld(stride, scale, shrunk, block_mvin_stride, 1, id) \ @@ -351,7 +354,7 @@ static void counter_reset() { } // weight-stationary conv loop -#define gemmini_loop_conv_ws(batch_size, in_dim, in_channels, out_channels, out_dim, pool_out_dim, stride, padding, kernel_dim, kernel_dilation, pool_size, pool_stride, pool_padding, batches, porows, pocols, pochs, krows, kcols, kchs, lpad, rpad, upad, dpad, plpad, prpad, pupad, pdpad, orows, ocols, weights, output, bias, input, no_bias, no_pool, downsample, wrot180, input_dilated, activation, trans_output_1203, trans_weight_1203, trans_weight_0132, trans_input_3120, max_pixels_per_row, dw) \ +#define gemmini_loop_conv_ws(batch_size, in_dim, in_channels, out_channels, out_dim, pool_out_dim, stride, padding, padding_value, kernel_dim, kernel_dilation, pool_size, pool_stride, pool_padding, batches, porows, pocols, pochs, krows, kcols, kchs, lpad, rpad, upad, dpad, plpad, prpad, pupad, pdpad, orows, ocols, weights, output, bias, input, no_bias, no_pool, downsample, wrot180, input_dilated, activation, trans_output_1203, trans_weight_1203, trans_weight_0132, trans_input_3120, max_pixels_per_row, dw) \ { \ ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(out_channels) << 48) | ((uint64_t)(in_channels) << 32) | ((uint64_t)(in_dim) << 16) | (uint64_t)(batch_size), \ ((uint64_t)(padding) << 48) | ((uint64_t)(stride) << 32) | ((uint64_t)(pool_out_dim) << 16) | (uint64_t)(out_dim), k_LOOP_CONV_WS_CONFIG_1) \ @@ -365,7 +368,7 @@ static void counter_reset() { output, k_LOOP_CONV_WS_CONFIG_5) \ ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, bias, \ input, k_LOOP_CONV_WS_CONFIG_6) \ - ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(max_pixels_per_row) << 8) | ((dw) << 6) | ((trans_input_3120) << 5) | ((trans_weight_0132) << 4) | ((trans_weight_1203) << 3) | ((trans_output_1203) << 2) | ((wrot180) << 1) | (no_bias), \ + ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(padding_value) << 48) | ((uint64_t)(max_pixels_per_row) << 8) | ((dw) << 6) | ((trans_input_3120) << 5) | ((trans_weight_0132) << 4) | ((trans_weight_1203) << 3) | ((trans_output_1203) << 2) | ((wrot180) << 1) | (no_bias), \ ((activation) << 3)| ((input_dilated) << 2) | ((downsample) << 1) | (no_pool), \ k_LOOP_CONV_WS) \ } @@ -1328,7 +1331,7 @@ static void sp_tiled_conv( int out_channels, int out_row_dim, int out_col_dim, int pool_out_row_dim, int pool_out_col_dim, - int stride, int padding, int kernel_dim, int kernel_dilation, + int stride, int padding, elem_t padding_value, int kernel_dim, int kernel_dilation, int pool_size, int pool_stride, int pool_padding, @@ -1414,7 +1417,7 @@ static void sp_tiled_conv( } if (in_row_dim == in_col_dim && out_row_dim == out_col_dim && pool_out_row_dim == pool_out_col_dim) { - gemmini_loop_conv_ws(batch_size, in_row_dim, in_channels, out_channels, out_row_dim, pool_out_row_dim, stride, padding, kernel_dim, kernel_dilation, pool_size, pool_stride, pool_padding, batches, porows, pocols, pochs, krows, kcols, kchs, lpad, rpad, upad, dpad, plpad, prpad, pupad, pdpad, orows, ocols, weights, output, bias, input, no_bias, no_pool, downsample, wrot180, input_dilated, act, trans_output_1203, trans_weight_1203, trans_weight_0132, trans_input_3120, max_pixels_per_row, dw); + gemmini_loop_conv_ws(batch_size, in_row_dim, in_channels, out_channels, out_row_dim, pool_out_row_dim, stride, padding, padding_value, kernel_dim, kernel_dilation, pool_size, pool_stride, pool_padding, batches, porows, pocols, pochs, krows, kcols, kchs, lpad, rpad, upad, dpad, plpad, prpad, pupad, pdpad, orows, ocols, weights, output, bias, input, no_bias, no_pool, downsample, wrot180, input_dilated, act, trans_output_1203, trans_weight_1203, trans_weight_0132, trans_input_3120, max_pixels_per_row, dw); return; } @@ -1470,7 +1473,7 @@ static void sp_tiled_conv( ichs * (irows >> downsample) * (icols >> downsample) : batches * (irows >> downsample) * (icols >> downsample); - gemmini_extended5_config_ld(dram_stride << downsample, MVIN_SCALE_IDENTITY, false, spad_stride, max_pixels_per_row, 0); + gemmini_extended6_config_ld(dram_stride << downsample, MVIN_SCALE_IDENTITY, false, spad_stride, max_pixels_per_row, padding_value, 0); const int b_it = trans_input_3120 ? max_chs_per_mvin : 1; const int ich_it = trans_input_3120 ? 1 : max_chs_per_mvin; @@ -1810,7 +1813,7 @@ static int tiled_conv_total_spad_rows(bool acc, static void conv_cpu_without_pool( int batch_size, int in_row_dim, int in_col_dim, int in_channels, int out_channels, int out_row_dim, int out_col_dim, - int stride, int input_dilation, int kernel_dilation, int padding, int kernel_dim, + int stride, int input_dilation, int kernel_dilation, int padding, elem_t padding_value, int kernel_dim, bool wrot180, bool trans_output_1203, bool trans_input_3120, bool trans_weight_1203, bool trans_weight_0132, @@ -1850,7 +1853,7 @@ static void conv_cpu_without_pool( } elem_t ipixel = irow < 0 || irow >= in_row_dim || icol < 0 || icol >= in_col_dim ? - 0 : *in; + padding_value : *in; const int krow_ = wrot180 ? kernel_dim - krow - 1 : krow; const int kcol_ = wrot180 ? kernel_dim - kcol - 1 : kcol; @@ -1886,7 +1889,7 @@ static void conv_cpu_without_pool( static void conv_dw_cpu_without_pool( int batch_size, int in_row_dim, int in_col_dim, int channels, int out_row_dim, int out_col_dim, - int stride, int padding, int kernel_dim, + int stride, int padding, elem_t padding_value, int kernel_dim, const elem_t * input, const elem_t * weights, @@ -1912,7 +1915,7 @@ static void conv_dw_cpu_without_pool( const elem_t * in = input + (b * in_row_dim * in_col_dim + irow * in_col_dim + icol) * channels + ch; const elem_t ipixel = irow < 0 || irow >= in_row_dim || icol < 0 || icol >= in_col_dim ? - 0 : *in; + padding_value : *in; const elem_t weight = *(weights + (ch * kernel_dim + krow) * kernel_dim + kcol); @@ -1933,7 +1936,7 @@ static void conv_dw_cpu_without_pool( static void conv_cpu( int batch_size, int in_row_dim, int in_col_dim, int in_channels, int out_channels, int out_row_dim, int out_col_dim, - int stride, int input_dilation, int kernel_dilation, int padding, int kernel_dim, + int stride, int input_dilation, int kernel_dilation, int padding, elem_t padding_value, int kernel_dim, bool wrot180, bool trans_output_1203, bool trans_input_3120, bool trans_weight_1203, bool trans_weight_0132, @@ -1950,7 +1953,7 @@ static void conv_cpu( conv_cpu_without_pool( batch_size, in_row_dim, in_col_dim, in_channels, out_channels, out_row_dim, out_col_dim, - stride, input_dilation, kernel_dilation, padding, kernel_dim, + stride, input_dilation, kernel_dilation, padding, padding_value, kernel_dim, wrot180, trans_output_1203, trans_input_3120, trans_weight_1203, trans_weight_0132, input, weights, bias, output, @@ -2005,7 +2008,7 @@ static void conv_cpu( } elem_t ipixel = irow < 0 || irow >= in_row_dim || icol < 0 || icol >= in_col_dim ? - 0 : *in; + padding_value : *in; const int krow_ = wrot180 ? kernel_dim - krow - 1 : krow; const int kcol_ = wrot180 ? kernel_dim - kcol - 1 : kcol; @@ -2052,7 +2055,7 @@ static void conv_cpu( static void conv_dw_cpu( int batch_size, int in_row_dim, int in_col_dim, int channels, int out_row_dim, int out_col_dim, - int stride, int padding, int kernel_dim, + int stride, int padding, elem_t padding_value, int kernel_dim, const elem_t * input, const elem_t * weights, @@ -2067,7 +2070,7 @@ static void conv_dw_cpu( conv_dw_cpu_without_pool( batch_size, in_row_dim, in_col_dim, channels, out_row_dim, out_col_dim, - stride, padding, kernel_dim, + stride, padding, padding_value, kernel_dim, input, weights, bias, output, act, scale); return; @@ -2109,7 +2112,7 @@ static void conv_dw_cpu( const elem_t * in = input + (b * in_row_dim * in_col_dim + irow * in_col_dim + icol) * channels + ch; elem_t ipixel = irow < 0 || irow >= in_row_dim || icol < 0 || icol >= in_col_dim ? - 0 : *in; + padding_value : *in; const elem_t weight = *(weights + (ch * kernel_dim + krow) * kernel_dim + kcol); @@ -2142,7 +2145,7 @@ static void tiled_conv( int batch_size, int in_row_dim, int in_col_dim, int in_channels, int out_channels, int out_row_dim, int out_col_dim, - int stride, int input_dilation, int kernel_dilation, int padding, int kernel_dim, + int stride, int input_dilation, int kernel_dilation, int padding, elem_t padding_value, int kernel_dim, bool wrot180, bool trans_output_1203, bool trans_input_3120, bool trans_weight_1203, bool trans_weight_0132, @@ -2177,7 +2180,7 @@ static void tiled_conv( conv_cpu( batch_size, in_row_dim, in_col_dim, in_channels, out_channels, out_row_dim, out_col_dim, - stride, input_dilation, kernel_dilation, padding, kernel_dim, + stride, input_dilation, kernel_dilation, padding, padding_value, kernel_dim, wrot180, trans_output_1203, trans_input_3120, trans_weight_1203, trans_weight_0132, input, weights, bias, output, @@ -2358,7 +2361,7 @@ static void tiled_conv( out_channels, out_row_dim, out_col_dim, pool_out_row_dim, pool_out_col_dim, - stride, padding, kernel_dim, kernel_dilation, + stride, padding, padding_value, kernel_dim, kernel_dilation, pool_size, pool_stride, pool_padding, @@ -2395,7 +2398,7 @@ static void tiled_conv( static void tiled_conv_dw( int batch_size, int in_row_dim, int in_col_dim, int channels, int out_row_dim, int out_col_dim, - int stride, int padding, int kernel_dim, + int stride, int padding, elem_t padding_value, int kernel_dim, int batches, int porows, int pocols, @@ -2419,7 +2422,7 @@ static void tiled_conv_dw( conv_dw_cpu( batch_size, in_row_dim, in_col_dim, channels, out_row_dim, out_col_dim, - stride, padding, kernel_dim, + stride, padding, padding_value, kernel_dim, input, weights, bias, output, act, scale, pool_size, pool_stride, pool_padding); @@ -2540,7 +2543,7 @@ static void tiled_conv_dw( channels, out_row_dim, out_col_dim, pool_out_row_dim, pool_out_col_dim, - stride, padding, kernel_dim, 1, + stride, padding, padding_value, kernel_dim, 1, pool_size, pool_stride, pool_padding, @@ -2576,7 +2579,7 @@ static void tiled_conv_dw( static void tiled_conv_auto( int batch_size, int in_row_dim, int in_col_dim, int in_channels, int out_channels, int out_row_dim, int out_col_dim, - int stride, int input_dilation, int kernel_dilation, int padding, int kernel_dim, + int stride, int input_dilation, int kernel_dilation, int padding, elem_t padding_value, int kernel_dim, bool wrot180, bool trans_output_1203, bool trans_input_3120, bool trans_weight_1203, bool trans_weight_0132, @@ -2747,7 +2750,7 @@ static void tiled_conv_auto( tiled_conv( batch_size, in_row_dim, in_col_dim, in_channels, out_channels, out_row_dim, out_col_dim, - stride, input_dilation, kernel_dilation, padding, kernel_dim, + stride, input_dilation, kernel_dilation, padding, padding_value, kernel_dim, wrot180, trans_output_1203, trans_input_3120, trans_weight_1203, trans_weight_0132, @@ -2823,7 +2826,7 @@ static void tiled_conv_downsample( static void tiled_conv_dw_auto( int batch_size, int in_row_dim, int in_col_dim, int channels, int out_row_dim, int out_col_dim, - int stride, int padding, int kernel_dim, + int stride, int padding, elem_t padding_value, int kernel_dim, elem_t * input, elem_t * weights, @@ -2985,7 +2988,7 @@ static void tiled_conv_dw_auto( tiled_conv_dw( batch_size, in_row_dim, in_col_dim, channels, out_row_dim, out_col_dim, - stride, padding, kernel_dim, + stride, padding, padding_value, kernel_dim, batches, orows, ocols,