diff --git a/experiments/results_bs8.csv b/experiments/results_bs8.csv new file mode 100644 index 0000000..f7b97aa --- /dev/null +++ b/experiments/results_bs8.csv @@ -0,0 +1,9 @@ +technique,time,sam_commit_name,pytorch_version,sam_model_type,batch_size,memory(MiB),memory(%),img_s(avg),batch_ms(avg)/batch_size,mIoU,use_compile,use_half,compress,epilogue_fusion_first,use_compile_decoder,use_nested_tensor,use_rel_pos,pad_input_image_batch,num_workers,num_batches,num_images,profile_path,memory_path +fp32,8.32037784655889,default,2.2.0.dev20231023+cu121,vit_b,8,19934,49,12.123363560576202,82.4853593644495,0.5335705502003885,False,None,None,False,False,False,True,True,32,619,4952,None,None +bf16,2.861330274740855,codesign,2.2.0.dev20231023+cu121,vit_b,8,10003,24,39.7795247545384,25.13856075884645,0.5415806118803995,False,torch.bfloat16,None,False,False,False,True,True,32,619,4952,None,None +compile,2.652463134129842,codesign,2.2.0.dev20231023+cu121,vit_b,8,7916,19,54.71426032562412,18.276770882922342,0.5407576752390846,max-autotune,torch.bfloat16,None,False,False,False,True,True,32,619,4952,None,None +SDPA,2.148758562405904,sdpa-decoder,2.2.0.dev20231023+cu121,vit_b,8,4679,11,73.1570663251564,13.669219533153035,0.5355346808697282,max-autotune,torch.bfloat16,None,False,False,False,True,True,32,619,4952,None,None +Triton,2.0386854648590087,local-fork,2.2.0.dev20231023+cu121,vit_b,8,1703,4,85.53658249838097,11.690904298391018,0.5339075529136259,max-autotune,torch.bfloat16,None,False,False,False,True,True,32,619,4952,None,None +NT,1.9225259701410928,local-fork,2.2.0.dev20231023+cu121,vit_b,8,2797,6,92.11983959049361,10.85542489484747,0.5337810700594795,max-autotune,torch.bfloat16,None,False,False,True,True,True,32,619,4952,None,None +int8,4.885190570354462,local-fork,2.2.0.dev20231023+cu121,vit_b,8,2710,6,91.04449841914705,10.983640059130643,0.5331727804156572,max-autotune,torch.bfloat16,dynamic_quant,False,False,True,True,True,32,619,4952,None,None +sparse,3.841790223121643,local-fork,2.2.0.dev20231023+cu121,vit_b,8,3217,7,81.4912293589238,12.271259224665185,0.4783508911148021,max-autotune,torch.bfloat16,sparse,False,False,True,True,True,32,619,4952,None,None diff --git a/experiments/run_experiments.py b/experiments/run_experiments.py index e97e1db..9415e40 100755 --- a/experiments/run_experiments.py +++ b/experiments/run_experiments.py @@ -177,9 +177,9 @@ def run(batch_size, rexp("SDPA", "sdpa-decoder", use_half="bfloat16", use_compile="max-autotune") rexp("Triton", "local-fork", use_half="bfloat16", use_compile="max-autotune") if batch_size > 1: - rexp("NT", "local-fork", use_half="bfloat16", use_compile="max-autotune", use_nested_tensor=(bs > 1)) - rexp("int8", "local-fork", use_half="bfloat16", use_compile="max-autotune", use_nested_tensor=(bs > 1), compress="dynamic_quant") - rexp("sparse", "local-fork", use_half="bfloat16", use_compile="max-autotune", use_nested_tensor=(bs > 1), compress="sparse") + rexp("NT", "local-fork", use_half="bfloat16", use_compile="max-autotune", use_nested_tensor=(batch_size > 1)) + rexp("int8", "local-fork", use_half="bfloat16", use_compile="max-autotune", use_nested_tensor=(batch_size > 1), compress="dynamic_quant") + rexp("sparse", "local-fork", use_half="bfloat16", use_compile="max-autotune", use_nested_tensor=(batch_size > 1), compress="sparse") if __name__ == '__main__': diff --git a/segment_anything_fast/configs/flash_4_configs_a100.p b/segment_anything_fast/configs/flash_4_configs_a100.p index 57e1fa7..3da124e 100644 Binary files a/segment_anything_fast/configs/flash_4_configs_a100.p and b/segment_anything_fast/configs/flash_4_configs_a100.p differ diff --git a/segment_anything_fast/configs/int_mm_configs_a100.p b/segment_anything_fast/configs/int_mm_configs_a100.p index 3979eb1..90c6456 100644 Binary files a/segment_anything_fast/configs/int_mm_configs_a100.p and b/segment_anything_fast/configs/int_mm_configs_a100.p differ