Skip to content

Commit

Permalink
sync
Browse files Browse the repository at this point in the history
  • Loading branch information
ttl10101 committed Feb 7, 2024
1 parent 31397d9 commit 23d4d89
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 7 deletions.
2 changes: 1 addition & 1 deletion build/examples/15_ampere_sparse_tensorop_gemm/ncu_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ if [ $# != 1 ]; then
echo "e.g. ./ncu_run.sh output_file_name "
exit 100
fi
sudo /usr/local/cuda/bin/ncu --call-stack --nvtx -o $1 --set full ./15_ampere_sparse_tensorop_gemm
sudo /usr/local/cuda/bin/ncu --call-stack --nvtx -o $1 --set full ./15_ampere_sparse_tensorop_gemm
6 changes: 5 additions & 1 deletion build/examples/15_ampere_sparse_tensorop_gemm/nsys_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,18 @@ if [ $# != 1 ]; then
echo "e.g. ./nsys_run.sh output_file_name "
exit 100
fi
#for i in 0 32 64 96 128 224 320 416 512 1024 2048 4096 8192
#for i in 0 32 64 96 128 224 320 416 512
#for i in 0 32 64 96 128 224 256
#for i in 0 32 64 96 128
#for i in 0 32 64
for i in 0 32
#for i in 0 32
for i in 3264
#for i in 0
do
nsys profile -t cuda,osrt,nvtx,cudnn,cublas -o $1_$i.qdstrm --stats=true -w true ./15_ampere_sparse_tensorop_gemm $1 $i | grep -e SparseGemm -e Gemm -e vecAddOpt -e M: >>$1.log
echo ////////////////////////////////////////////////////////// >>$1.log
echo >>$1.log
done
cat $1.log
#nsys profile -t cuda,osrt,nvtx,cudnn,cublas -o $1.qdstrm --stats=true -w true ./15_ampere_sparse_tensorop_gemm 1024 0 |grep -e SparseGemm -e Gemm -e vecAddOpt -e M:
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,18 @@ efficiently.
///////////////////////////////////////////////
///// TEST CONFIGURATION
///////////////////////////////////////////////
#define DENSE_GEMM_EN 1 // 0: disable, 1: enable
#define DENSE_GEMM_EN 0 // 0: disable, 1: enable
#define VEC_ADD_EN 1 // 0: disable, 1: enable
#define REF_EN 2 // 0: disable, 1: host, 2: cutlass
#define DBG_LOG_EN 0 // 0: disable, 1: enable

#define LIST_ENTRY_NUM 2048
#define LIST_ENTRY_NUM 8192

#define M_SIZE 512
#define K_SIZE 20480
#define N_SIZE 5120
#define M_SIZE 8192
#define K_SIZE 16384
#define N_SIZE 4096
//#define K_SIZE 20480
//#define N_SIZE 5120
#define M_EXTRA_SIZE 0


Expand Down

0 comments on commit 23d4d89

Please sign in to comment.