Skip to content

Commit

Permalink
Merge pull request #805 from ROCm/6.2-hiprand-changes
Browse files Browse the repository at this point in the history
Matrix init using hiprand
  • Loading branch information
mamaydeo authored Sep 13, 2024
2 parents 3bc740a + 9fa1e36 commit a8e6cdd
Show file tree
Hide file tree
Showing 21 changed files with 552 additions and 117 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,9 @@ add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/rvs_smi-build/librocm_smi64.so

endif() # if (RVS_ROCMSMI EQUAL 1)

set(HIPRAND_INC_DIR "${ROCM_PATH}/include")
set(HIPRAND_LIB_DIR "${ROCM_PATH}/lib")

if (RVS_ROCBLAS EQUAL 1)
set(ROCBLAS_INC_DIR "${CMAKE_BINARY_DIR}/rvs_rblas-src/build/release/rocblas-install")
set(ROCBLAS_LIB_DIR "${CMAKE_BINARY_DIR}/rvs_rblas-src/build/release/rocblas-install/lib/")
Expand Down
3 changes: 2 additions & 1 deletion gm.so/tests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@
################################################################################

set(ROCBLAS_LIB "rocblas")
set(HIPRAND_LIB "hiprand")
set(ROC_THUNK_NAME "hsakmt")
set(CORE_RUNTIME_NAME "hsa-runtime")
set(CORE_RUNTIME_TARGET "${CORE_RUNTIME_NAME}64")

set(UT_LINK_LIBS libpthread.so libpci.so libm.so libdl.so "lib${ROCM_SMI_LIB}.so"
${ROCBLAS_LIB} ${ROC_THUNK_NAME} ${CORE_RUNTIME_TARGET} ${ROCM_CORE} ${YAML_CPP_LIBRARIES}
${ROCBLAS_LIB} ${ROC_THUNK_NAME} ${CORE_RUNTIME_TARGET} ${ROCM_CORE} ${YAML_CPP_LIBRARIES} ${HIPRAND_LIB}
)

# Add directories to look for library files to link
Expand Down
3 changes: 2 additions & 1 deletion gpup.so/tests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@
################################################################################

set(ROCBLAS_LIB "rocblas")
set(HIPRAND_LIB "hiprand")
set(ROC_THUNK_NAME "hsakmt")
set(CORE_RUNTIME_NAME "hsa-runtime")
set(CORE_RUNTIME_TARGET "${CORE_RUNTIME_NAME}64")

set(UT_LINK_LIBS libpthread.so libm.so libdl.so ${ROCM_SMI_LIB}
${ROCBLAS_LIB} ${ROC_THUNK_NAME} ${CORE_RUNTIME_TARGET} ${ROCM_CORE} ${YAML_CPP_LIBRARIES})
${ROCBLAS_LIB} ${ROC_THUNK_NAME} ${CORE_RUNTIME_TARGET} ${ROCM_CORE} ${YAML_CPP_LIBRARIES} ${HIPRAND_LIB})

# Add directories to look for library files to link
link_directories(${RVS_LIB_DIR} ${ROCM_SMI_LIB_DIR} ${ROCT_LIB_DIR} ${ROCBLAS_LIB_DIR})
Expand Down
14 changes: 7 additions & 7 deletions gst.so/src/gst_worker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ void GSTWorker::setup_blas(int *error, string *err_description) {
gpu_blas->generate_random_matrix_data();
if (!copy_matrix) {
// copy matrix only once
if (!gpu_blas->copy_data_to_gpu(gst_ops_type)) {
if (!gpu_blas->copy_data_to_gpu()) {
*error = 1;
*err_description = GST_BLAS_MEMCPY_ERROR;
}
Expand Down Expand Up @@ -137,15 +137,15 @@ void GSTWorker::hit_max_gflops(int *error, string *err_description) {

if (copy_matrix) {
// copy matrix before each GEMM
if (!gpu_blas->copy_data_to_gpu(gst_ops_type)) {
if (!gpu_blas->copy_data_to_gpu()) {
*error = 1;
*err_description = GST_BLAS_MEMCPY_ERROR;
return;
}
}

// run GEMM operation
if (!gpu_blas->run_blass_gemm(gst_ops_type))
if (!gpu_blas->run_blas_gemm())
continue; // failed to run the GEMM operation

// Waits for GEMM operation to complete
Expand Down Expand Up @@ -235,7 +235,7 @@ bool GSTWorker::do_gst_ramp(int *error, string *err_description) {
// Generate random matrix data
gpu_blas->generate_random_matrix_data();
// copy matrix before each GEMM
if (!gpu_blas->copy_data_to_gpu(gst_ops_type)) {
if (!gpu_blas->copy_data_to_gpu()) {
*error = 1;
*err_description = GST_BLAS_MEMCPY_ERROR;
return false;
Expand All @@ -246,7 +246,7 @@ bool GSTWorker::do_gst_ramp(int *error, string *err_description) {
start_time = gpu_blas->get_time_us();

// run GEMM operation
if(!gpu_blas->run_blass_gemm(gst_ops_type))
if(!gpu_blas->run_blas_gemm())
continue;

// Wait for GEMM operation to complete
Expand Down Expand Up @@ -436,7 +436,7 @@ bool GSTWorker::do_gst_stress_test(int *error, std::string *err_description) {

if (copy_matrix) {
// copy matrix before each GEMM
if (!gpu_blas->copy_data_to_gpu(gst_ops_type)) {
if (!gpu_blas->copy_data_to_gpu()) {
*error = 1;
*err_description = GST_BLAS_MEMCPY_ERROR;
return false;
Expand All @@ -449,7 +449,7 @@ bool GSTWorker::do_gst_stress_test(int *error, std::string *err_description) {
for (uint64_t i = 0; i < gst_hot_calls; i++) {

// run GEMM operation
if(!gpu_blas->run_blass_gemm(gst_ops_type)) {
if(!gpu_blas->run_blas_gemm()) {

*err_description = GST_BLAS_ERROR;
*error = 1;
Expand Down
7 changes: 6 additions & 1 deletion iet.so/include/action.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ class iet_action: public rvs::actionbase {
//! TRUE if JSON output is required
bool bjson = false;

std::string iet_ops_type;
//! gemm operation type
std::string iet_ops_type;
//! target power level for the test
float iet_target_power;
//! IET test ramp duration
Expand Down Expand Up @@ -118,6 +119,10 @@ class iet_action: public rvs::actionbase {
int iet_ldc_offset;
int iet_ldd_offset;

//! matrix initialization method :
//! default, random integer or trignometric float
std::string iet_matrix_init;

friend class IETWorker;

//! list of GPUs (along with some identification data) which are
Expand Down
47 changes: 28 additions & 19 deletions iet.so/include/iet_worker.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,16 +130,16 @@ class IETWorker : public rvs::ThreadBase {
//! returns the target power level for the test
float get_target_power(void) { return target_power; }

//! sets the SGEMM matrix size
//! sets the matrix size
void set_matrix_size(uint64_t _matrix_size) {
matrix_size = _matrix_size;
}
//! returns the SGEMM matrix size
//! returns the matrix size
uint64_t get_matrix_size(void) { return matrix_size; }

//! sets the EDPp power tolerance
//! sets gemm operation type
void set_iet_ops_type(std::string ops_type) { iet_ops_type = ops_type; }
//! returns the EDPp power tolerance
//! get gemm operation type
std::string get_ops_type(void) { return iet_ops_type; }

//! sets the EDPp power tolerance
Expand All @@ -157,10 +157,11 @@ class IETWorker : public rvs::ThreadBase {

//! returns the JSON flag
static bool get_use_json(void) { return bjson; }
//! returns the SGEMM matrix size

//! returns the matrix size a
uint64_t get_matrix_size_a(void) { return matrix_size_a; }

//! returns the SGEMM matrix size
//! returns the matrix size b
uint64_t get_matrix_size_b(void) { return matrix_size_b; }

//! returns the matrix size c
Expand Down Expand Up @@ -199,15 +200,15 @@ class IETWorker : public rvs::ThreadBase {
void set_ldd_offset(int ldd) {
iet_ldd_offset = ldd;
}
//! sets the SGEMM matrix size
//! sets the matrix size a
void set_matrix_size_a(uint64_t _matrix_size_a) {
matrix_size_a = _matrix_size_a;
}
//! sets the SGEMM matrix size
//! sets the matrix size b
void set_matrix_size_b(uint64_t _matrix_size_b) {
matrix_size_b = _matrix_size_b;
}
//! sets the SGEMM matrix size
//! sets the matrix size c
void set_matrix_size_c(uint64_t _matrix_size_c) {
matrix_size_c = _matrix_size_c;
}
Expand All @@ -218,6 +219,12 @@ class IETWorker : public rvs::ThreadBase {
//! returns bandwidth workload status
bool get_bw_workload(void) { return iet_bw_workload; }

//! sets the matrix init
void set_matrix_init(std::string _matrix_init) { matrix_init = _matrix_init; }

//! returns matrix init
std::string get_matrix_init(void) { return matrix_init; }

//! BLAS callback
static void blas_callback (bool status, void *user_data);

Expand Down Expand Up @@ -269,13 +276,13 @@ class IETWorker : public rvs::ThreadBase {
//! power tolerance (how much the target_power can fluctuare after
//! the ramp period for the test to succeed)
float tolerance;
//! SGEMM matrix size
//! matrix size
uint64_t matrix_size;
//! TRUE if JSON output is required
static bool bjson;
bool sgemm_success;
//! blas_worker pointer
std::string iet_ops_type;
//! gemm operation type
std::string iet_ops_type;

//! actual training time
uint64_t training_time_ms;
Expand All @@ -287,27 +294,29 @@ class IETWorker : public rvs::ThreadBase {
float avg_power_training;
//! the SGEMM delay which gives the actual GPU SGEMM frequency
float sgemm_si_delay;
//! SGEMM matrix size
//! matrix sizes
uint64_t matrix_size_a;
uint64_t matrix_size_b;
uint64_t matrix_size_c;
//leading offsets
//! leading offsets
int iet_lda_offset;
int iet_ldb_offset;
int iet_ldc_offset;
int iet_ldd_offset;
//Matrix transpose A
//! Matrix transpose A
int iet_trans_a;
//Matrix transpose B
//! Matrix transpose B
int iet_trans_b;
//IET aplha value
//! IET aplha value
float iet_alpha_val;
//IET beta value
//! IET beta value
float iet_beta_val;
//IET TP flag
//! IET TP flag
bool iet_tp_flag;
//! Bandwidth workload enable/disable
bool iet_bw_workload;
//! matrix init
std::string matrix_init;

bool endtest = false;
//! GEMM operations synchronization mutex
Expand Down
11 changes: 11 additions & 0 deletions iet.so/src/action.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ using std::fstream;
#define RVS_CONF_TP_FLAG "targetpower_met"
#define RVS_TP_MESSAGE "target_power"
#define RVS_DTYPE_MESSAGE "dtype"
#define RVS_CONF_MATRIX_INIT "matrix_init"

#define MODULE_NAME "iet"
#define MODULE_NAME_CAPS "IET"
Expand All @@ -110,6 +111,7 @@ using std::fstream;
#define IET_DEFAULT_LDD_OFFSET 0
#define IET_DEFAULT_TP_FLAG false
#define IET_DEFAULT_BW_WORKLOAD false
#define IET_DEFAULT_MATRIX_INIT "default"

#define IET_NO_COMPATIBLE_GPUS "No AMD compatible GPU found!"
#define PCI_ALLOC_ERROR "pci_alloc() error"
Expand Down Expand Up @@ -316,6 +318,14 @@ bool iet_action::get_all_iet_config_keys(void) {
bsts = false;
}

error = property_get<std::string>(RVS_CONF_MATRIX_INIT, &iet_matrix_init, IET_DEFAULT_MATRIX_INIT);
if (error == 1) {
msg = "invalid '" +
std::string(RVS_CONF_MATRIX_INIT) + "' key value";
rvs::lp::Err(msg, MODULE_NAME_CAPS, action_name);
bsts = false;
}

/* Set minimum sample interval as default */
if (iet_sample_interval < IET_DEFAULT_SAMPLE_INTERVAL) {
iet_sample_interval = IET_DEFAULT_SAMPLE_INTERVAL;
Expand Down Expand Up @@ -507,6 +517,7 @@ bool iet_action::do_edp_test(map<int, uint16_t> iet_gpus_device_index) {
workers[i].set_ldd_offset(iet_ldd_offset);
workers[i].set_tp_flag(iet_tp_flag);
workers[i].set_bw_workload(iet_bw_workload);
workers[i].set_matrix_init(iet_matrix_init);

i++;
}
Expand Down
6 changes: 3 additions & 3 deletions iet.so/src/iet_worker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,22 +145,22 @@ void IETWorker::blasThread(int gpuIdx, uint64_t matrix_size, std::string iet_o
duration = 0;
gem_ops = 0;
// setup rvsBlas
gpu_blas = std::unique_ptr<rvs_blas>(new rvs_blas(gpuIdx, matrix_size, matrix_size, matrix_size, "default", transa, transb, alpha, beta,
gpu_blas = std::unique_ptr<rvs_blas>(new rvs_blas(gpuIdx, matrix_size, matrix_size, matrix_size, matrix_init, transa, transb, alpha, beta,
iet_lda_offset, iet_ldb_offset, iet_ldc_offset, iet_ldd_offset, iet_ops_type, ""));

//Genreate random matrix data
gpu_blas->generate_random_matrix_data();

//Copy data to GPU
gpu_blas->copy_data_to_gpu(iet_ops_type);
gpu_blas->copy_data_to_gpu();

iet_start_time = std::chrono::system_clock::now();

//Hit the GPU with load to increase temperature
while ((duration < run_duration_ms) && (endtest == false)) {

//call the gemm blas
gpu_blas->run_blass_gemm(iet_ops_type);
gpu_blas->run_blas_gemm();

// Waits for GEMM operation to complete
if(!gpu_blas->is_gemm_op_complete())
Expand Down
10 changes: 7 additions & 3 deletions include/rvs_blas.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "hip/hip_runtime.h"
#include "hip/hip_runtime_api.h"
#include <sys/time.h>
#include <hiprand/hiprand.h>

typedef void (*rvsBlasCallback_t) (bool status, void *userData);

Expand All @@ -53,7 +54,7 @@ typedef void (*rvsBlasCallback_t) (bool status, void *userData);
class rvs_blas {
public:
rvs_blas(int _gpu_device_index, int _m, int _n, int _k, std::string _matrix_init,
int transa, int transb, float aplha, float beta,
int transa, int transb, float alpha, float beta,
rocblas_int lda, rocblas_int ldb, rocblas_int ldc, rocblas_int ldd,
std::string _ops_type, std::string _data_type);
rvs_blas() = delete;
Expand Down Expand Up @@ -88,8 +89,8 @@ class rvs_blas {
//! returns TRUE if an error occured
bool error(void) { return is_error; }
void generate_random_matrix_data(void);
bool copy_data_to_gpu(std::string);
bool run_blass_gemm(std::string);
bool copy_data_to_gpu(void);
bool run_blas_gemm(void);
bool is_gemm_op_complete(void);
bool validate_gemm(bool self_check, bool accu_check, double &self_error, double &accu_error);
void set_gemm_error(uint64_t _error_freq, uint64_t _error_count);
Expand Down Expand Up @@ -215,6 +216,9 @@ class rvs_blas {

//! HIP API stream - used to query for GEMM completion
hipStream_t hip_stream;
//! random number generator
hiprandGenerator_t hiprand_generator;

//! rocBlas related handle
rocblas_handle blas_handle;
//! TRUE is rocBlas handle was successfully initialized
Expand Down
4 changes: 2 additions & 2 deletions perf.so/src/perf_worker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ void PERFWorker::setup_blas(int *error, string *err_description) {
gpu_blas->generate_random_matrix_data();
if (!copy_matrix) {
// copy matrix only once
if (!gpu_blas->copy_data_to_gpu(perf_ops_type)) {
if (!gpu_blas->copy_data_to_gpu()) {
*error = 1;
*err_description = PERF_BLAS_MEMCPY_ERROR;
}
Expand Down Expand Up @@ -181,7 +181,7 @@ bool PERFWorker::do_perf_stress_test(int *error, std::string *err_description) {

while(num_gemm_ops++ <= perf_hot_calls) {
// run GEMM & wait for completion
gpu_blas->run_blass_gemm(perf_ops_type);
gpu_blas->run_blas_gemm();
}

//End the timer
Expand Down
5 changes: 3 additions & 2 deletions pesm.so/tests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,17 @@
################################################################################

set(ROCBLAS_LIB "rocblas")
set(HIPRAND_LIB "hiprand")
set(ROC_THUNK_NAME "hsakmt")
set(CORE_RUNTIME_NAME "hsa-runtime")
set(CORE_RUNTIME_TARGET "${CORE_RUNTIME_NAME}64")

set(UT_LINK_LIBS libpthread.so libpci.so libm.so libdl.so "lib${ROCM_SMI_LIB}.so"
${ROCBLAS_LIB} ${ROC_THUNK_NAME} ${CORE_RUNTIME_TARGET} ${ROCM_CORE} ${YAML_CPP_LIBRARIES}
${ROCBLAS_LIB} ${ROC_THUNK_NAME} ${CORE_RUNTIME_TARGET} ${ROCM_CORE} ${YAML_CPP_LIBRARIES} ${HIPRAND_LIB}
)

# Add directories to look for library files to link
link_directories(${ROCM_SMI_LIB_DIR} ${ROCT_LIB_DIR} ${ROCBLAS_LIB_DIR})
link_directories(${ROCM_SMI_LIB_DIR} ${ROCT_LIB_DIR} ${ROCBLAS_LIB_DIR} ${HIPRAND_LIB_DIR})

set (UT_SOURCES test/unitactionbase.cpp
)
Expand Down
Loading

0 comments on commit a8e6cdd

Please sign in to comment.