diff --git a/aha/util/application_parameters.json b/aha/util/application_parameters.json index 17382d42..fdf5b135 100644 --- a/aha/util/application_parameters.json +++ b/aha/util/application_parameters.json @@ -880,7 +880,7 @@ }, "apps/InvRes3_dw_bias_relu6_fp":{ "default":{ - "HALIDE_GEN_ARGS":"out_img=46 n_oc=120 unroll=4 pad_o_left=0 pad_o_right=0 trunc_size=6", + "HALIDE_GEN_ARGS":"out_img=46 n_oc=120 unroll=4 pad_o_left=0 pad_o_right=2 trunc_size=8", "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", "DISABLE_GP":"1", "USE_GLB_BANK_CONFIG":"1", @@ -889,5 +889,281 @@ "HW_OUTPUT_STENCIL_POS":"24, 26, 28, 30", "GLB_INPUTS":"" } + }, + "apps/InvRes3_pw_sq_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"in_img=48 ksize=1 stride=1 n_ic=120 n_oc=24 k_ic=4 k_oc=24 glb_i=4 glb_k=4 glb_o=4", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "INPUT_HOST_STENCIL_POS":"24, 26, 28, 30", + "KERNEL_HOST_STENCIL_POS":"0, 2, 4, 6", + "HW_OUTPUT_STENCIL_POS":"16, 18, 20, 22", + "GLB_INPUTS":"" + }, + "fastest":{ + }, + "sweep":{ + } + }, + "apps/InvRes3_skip_padding_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"out_img=44 n_oc=24 unroll=4 pad_o_left=0 pad_o_right=4 trunc_size=0 use_relu6=0", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "HW_INPUT_STENCIL_POS":"8, 10, 12, 14", + "HW_OUTPUT_STENCIL_POS":"0, 2, 4, 6", + "GLB_INPUTS":"" + } + }, + "apps/InvRes3_pw_sq_bias_skip_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"out_img=48 n_oc=24 unroll=4 pad_o_left=0 pad_o_right=0 trunc_size=8", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "HW_INPUT_STENCIL_POS":"16, 18, 20, 22", + "HW_BIAS_STENCIL_POS":"8, 10, 12, 14", + "HW_SKIP_STENCIL_POS":"0, 2, 4, 6", + "HW_OUTPUT_STENCIL_POS":"24, 26, 28, 30", + "GLB_INPUTS":"" + } + }, + "apps/InvRes4_pw_exp_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"in_img=48 ksize=1 stride=1 n_ic=24 n_oc=96 k_ic=8 k_oc=8 glb_i=4 glb_k=4 glb_o=4", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "INPUT_HOST_STENCIL_POS":"24, 26, 28, 30", + "KERNEL_HOST_STENCIL_POS":"0, 2, 4, 6", + "HW_OUTPUT_STENCIL_POS":"8, 10, 12, 14", + "GLB_INPUTS":"" + }, + "fastest":{ + }, + "sweep":{ + } + }, + "apps/InvRes4_pw_exp_bias_relu6_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"out_img=48 n_oc=96 unroll=4 pad_o_left=2 pad_o_right=2 trunc_size=10", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "HW_INPUT_STENCIL_POS":"8, 10, 12, 14", + "HW_BIAS_STENCIL_POS":"0, 2, 4, 6", + "HW_OUTPUT_STENCIL_POS":"16, 18, 20, 22", + "GLB_INPUTS":"" + } + }, + "apps/InvRes4_dw_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"in_img=52 ksize=5 stride=1 n_ic=4 unroll=4", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "INPUT_HOST_STENCIL_POS":"16, 18, 20, 22", + "HW_OUTPUT_STENCIL_POS":"8, 10, 12, 14", + "NUM_GLB_TILING":"1", + "GLB_INPUTS":"" + }, + "fastest":{ + }, + "sweep":{ + } + }, + "apps/InvRes4_dw_stream_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"in_img=52 ksize=5 stride=1 n_ic=4 unroll=4", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "INPUT_HOST_STENCIL_POS":"16, 18, 20, 22", + "KERNEL_HOST_STENCIL_POS":"0, 2, 4, 6", + "HW_OUTPUT_STENCIL_POS":"8, 10, 12, 14", + "NUM_GLB_TILING":"1", + "GLB_INPUTS":"" + }, + "fastest":{ + }, + "sweep":{ + } + }, + "apps/InvRes4_dw_bias_relu6_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"out_img=48 n_oc=96 unroll=4 pad_o_left=0 pad_o_right=0 trunc_size=8", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "HW_INPUT_STENCIL_POS":"8, 10, 12, 14", + "HW_BIAS_STENCIL_POS":"0, 2, 4, 6", + "HW_OUTPUT_STENCIL_POS":"16, 18, 20, 22", + "GLB_INPUTS":"" + } + }, + "apps/InvRes4_pw_sq_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"in_img=48 ksize=1 stride=1 n_ic=96 n_oc=24 k_ic=4 k_oc=24 glb_i=4 glb_k=4 glb_o=4", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "INPUT_HOST_STENCIL_POS":"16, 18, 20, 22", + "KERNEL_HOST_STENCIL_POS":"0, 2, 4, 6", + "HW_OUTPUT_STENCIL_POS":"8, 10, 12, 14", + "GLB_INPUTS":"" + }, + "fastest":{ + }, + "sweep":{ + } + }, + "apps/InvRes4_pw_sq_bias_skip_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"out_img=48 n_oc=24 unroll=4 pad_o_left=0 pad_o_right=0 trunc_size=8", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "HW_INPUT_STENCIL_POS":"8, 10, 12, 14", + "HW_BIAS_STENCIL_POS":"0, 2, 4, 6", + "HW_SKIP_STENCIL_POS":"24, 26, 28, 30", + "HW_OUTPUT_STENCIL_POS":"16, 18, 20, 22", + "GLB_INPUTS":"" + } + }, + "apps/InvRes5_pw_exp_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"in_img=48 ksize=1 stride=1 n_ic=24 n_oc=120 k_ic=8 k_oc=8 glb_i=4 glb_k=4 glb_o=4", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "INPUT_HOST_STENCIL_POS":"16, 18, 20, 22", + "KERNEL_HOST_STENCIL_POS":"0, 2, 4, 6", + "HW_OUTPUT_STENCIL_POS":"8, 10, 12, 14", + "GLB_INPUTS":"" + }, + "fastest":{ + }, + "sweep":{ + } + }, + "apps/InvRes5_pw_exp_bias_relu6_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"out_img=48 n_oc=120 unroll=4 pad_o_left=3 pad_o_right=0 trunc_size=8", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "HW_INPUT_STENCIL_POS":"8, 10, 12, 14", + "HW_BIAS_STENCIL_POS":"0, 2, 4, 6", + "HW_OUTPUT_STENCIL_POS":"16, 18, 20, 22", + "GLB_INPUTS":"" + } + }, + "apps/InvRes5_dw_1_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"in_img=51 ksize=7 stride=2 n_ic=2 unroll=2", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "INPUT_HOST_STENCIL_POS":"16, 18", + "HW_OUTPUT_STENCIL_POS":"8, 10", + "NUM_GLB_TILING":"1", + "GLB_INPUTS":"" + }, + "fastest":{ + }, + "sweep":{ + } + }, + "apps/InvRes5_dw_1_stream_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"in_img=51 ksize=7 stride=2 n_ic=2 unroll=2", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "INPUT_HOST_STENCIL_POS":"16, 18", + "KERNEL_HOST_STENCIL_POS":"0, 2", + "HW_OUTPUT_STENCIL_POS":"8, 10", + "NUM_GLB_TILING":"1", + "GLB_INPUTS":"" + }, + "fastest":{ + }, + "sweep":{ + } + }, + "apps/InvRes5_dw_2_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"in_img=51 ksize=7 stride=2 n_ic=2 unroll=2", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "INPUT_HOST_STENCIL_POS":"20, 22", + "HW_OUTPUT_STENCIL_POS":"12, 14", + "NUM_GLB_TILING":"1", + "GLB_INPUTS":"" + }, + "fastest":{ + }, + "sweep":{ + } + }, + "apps/InvRes5_dw_2_stream_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"in_img=51 ksize=7 stride=2 n_ic=2 unroll=2", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "INPUT_HOST_STENCIL_POS":"20, 22", + "KERNEL_HOST_STENCIL_POS":"4, 6", + "HW_OUTPUT_STENCIL_POS":"12, 14", + "NUM_GLB_TILING":"1", + "GLB_INPUTS":"" + }, + "fastest":{ + }, + "sweep":{ + } + }, + "apps/InvRes5_dw_bias_relu6_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"out_img=23 n_oc=120 unroll=4 pad_o_left=0 pad_o_right=0 trunc_size=3", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "HW_INPUT_STENCIL_POS":"8, 10, 12, 14", + "HW_BIAS_STENCIL_POS":"0, 2, 4, 6", + "HW_OUTPUT_STENCIL_POS":"16, 18, 20, 22", + "GLB_INPUTS":"" + } + }, + "apps/InvRes5_pw_sq_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"in_img=23 ksize=1 stride=1 n_ic=120 n_oc=40 k_ic=4 k_oc=40 glb_i=4 glb_k=4 glb_o=4", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "INPUT_HOST_STENCIL_POS":"16, 18, 20, 22", + "KERNEL_HOST_STENCIL_POS":"0, 2, 4, 6", + "HW_OUTPUT_STENCIL_POS":"8, 10, 12, 14", + "GLB_INPUTS":"" + }, + "fastest":{ + }, + "sweep":{ + } + }, + "apps/InvRes5_pw_sq_bias_fp":{ + "default":{ + "HALIDE_GEN_ARGS":"out_img=23 n_oc=40 unroll=4 pad_o_left=0 pad_o_right=0 trunc_size=3 use_relu6=0", + "HL_TARGET":"host-x86-64-enable_ponds-bfloat_hardware", + "DISABLE_GP":"1", + "USE_GLB_BANK_CONFIG":"1", + "HW_INPUT_STENCIL_POS":"8, 10, 12, 14", + "HW_BIAS_STENCIL_POS":"0, 2, 4, 6", + "HW_OUTPUT_STENCIL_POS":"16, 18, 20, 22", + "GLB_INPUTS":"" + } } } diff --git a/aha/util/regress.py b/aha/util/regress.py index af4ad138..963cdc21 100644 --- a/aha/util/regress.py +++ b/aha/util/regress.py @@ -248,13 +248,48 @@ def dispatch(args, extra_args=None): # "apps/glb_exchange", ] glb_tests_fp = [ - "apps/sequential_0_fp", - # "apps/pointwise_fp", + # "apps/sequential_0_fp", + # "apps/sequential_0_bias_relu6_fp", + # "apps/InvRes1_dw_bias_relu6_fp", + # "apps/InvRes1_pw_fp", + # "apps/InvRes1_pw_bias_fp", + # "apps/InvRes2_pw_exp_fp", + # "apps/InvRes2_pw_exp_bias_relu6_fp", + # "apps/InvRes2_dw_bias_relu6_fp", + # "apps/InvRes2_pw_sq_fp", + # "apps/InvRes2_pw_sq_bias_fp", + # "apps/InvRes3_pw_exp_fp", + # "apps/InvRes3_pw_exp_bias_relu6_fp", + # "apps/InvRes3_dw_bias_relu6_fp", + # "apps/InvRes3_pw_sq_fp", + # "apps/InvRes3_skip_padding_fp", + # "apps/InvRes3_pw_sq_bias_skip_fp", + # "apps/InvRes4_pw_exp_fp", + # "apps/InvRes4_pw_exp_bias_relu6_fp", + # "apps/InvRes4_dw_bias_relu6_fp", + # "apps/InvRes4_pw_sq_fp", + # "apps/InvRes4_pw_sq_bias_skip_fp", + # "apps/InvRes5_pw_exp_fp", + # "apps/InvRes5_pw_exp_bias_relu6_fp", + # "apps/InvRes5_dw_bias_relu6_fp", + "apps/InvRes5_pw_sq_fp", + # "apps/InvRes5_pw_sq_bias_fp", + + # "apps/InvRes1_dw_fp", + # "apps/InvRes1_dw_stream_fp", + # "apps/InvRes1_dw_bias_relu6", + # "apps/InvRes1_pw_fp", + # "apps/matrix_multiplication_fp", + # "apps/conv2D_no_bias_fp", # "apps/relu_layer_fp", # "apps/depthwise_conv_preload_fp", ] - resnet_tests = [] - resnet_tests_fp = [] + resnet_tests = [ + # "conv5_x" + ] + resnet_tests_fp = [ + # "InvRes2_pw_exp_fp", + ] hardcoded_dense_tests = [] elif args.config == "pr": width, height = 32, 16 diff --git a/garnet b/garnet index 109a53a0..df7c108a 160000 --- a/garnet +++ b/garnet @@ -1 +1 @@ -Subproject commit 109a53a08951fb62019505bb04f3860ec4ea9de8 +Subproject commit df7c108aa56f8e409d93b3aad38203b8a6376bde