Skip to content

Commit

Permalink
Merge branch 'inference' into update_register_interface
Browse files Browse the repository at this point in the history
  • Loading branch information
jiazhihao authored Sep 25, 2024
2 parents 6ba0304 + 70e47b2 commit d7deb89
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 18 deletions.
21 changes: 12 additions & 9 deletions src/ops/residual_layer_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ void ResidualLayerNorm::inference_kernel(ResidualLayerNormMeta const *m,
beta_ptr,
output_ptr);
}

#ifdef DEADCODE
template <typename T>
void save_inference_tensors(ResidualLayerNormMeta const *m) {
if (m->inference_debugging) {
Expand Down Expand Up @@ -206,6 +208,7 @@ void save_inference_tensors(ResidualLayerNormMeta const *m) {
filename3.c_str());
}
}
#endif

/*static*/
void ResidualLayerNorm::inference_kernel_wrapper(
Expand Down Expand Up @@ -314,15 +317,15 @@ void ResidualLayerNorm::inference_kernel_wrapper(
}
}

if (m->inference_debugging) {
if (m->input_type[0] == DT_FLOAT) {
save_inference_tensors<float>(m);
} else if (m->input_type[0] == DT_HALF) {
save_inference_tensors<half>(m);
} else {
assert(false && "unsupport datatype in layernorm");
}
}
// if (m->inference_debugging) {
// if (m->input_type[0] == DT_FLOAT) {
// save_inference_tensors<float>(m);
// } else if (m->input_type[0] == DT_HALF) {
// save_inference_tensors<half>(m);
// } else {
// assert(false && "unsupport datatype in layernorm");
// }
// }

if (m->profiling) {
checkCUDA(hipEventRecord(t_end, stream));
Expand Down
21 changes: 12 additions & 9 deletions src/ops/residual_layer_norm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ void ResidualLayerNorm::inference_kernel(ResidualLayerNormMeta const *m,
beta_ptr,
output_ptr);
}
#ifdef DEADCODE
template <typename T>
void save_inference_tensors(ResidualLayerNormMeta const *m) {
if (m->inference_debugging) {
Expand Down Expand Up @@ -204,6 +206,7 @@ void save_inference_tensors(ResidualLayerNormMeta const *m) {
filename3.c_str());
}
}
#endif
/*static*/
void ResidualLayerNorm::inference_kernel_wrapper(
Expand Down Expand Up @@ -312,15 +315,15 @@ void ResidualLayerNorm::inference_kernel_wrapper(
}
}
if (m->inference_debugging) {
if (m->input_type[0] == DT_FLOAT) {
save_inference_tensors<float>(m);
} else if (m->input_type[0] == DT_HALF) {
save_inference_tensors<half>(m);
} else {
assert(false && "unsupport datatype in layernorm");
}
}
// if (m->inference_debugging) {
// if (m->input_type[0] == DT_FLOAT) {
// save_inference_tensors<float>(m);
// } else if (m->input_type[0] == DT_HALF) {
// save_inference_tensors<half>(m);
// } else {
// assert(false && "unsupport datatype in layernorm");
// }
// }
if (m->profiling) {
cudaEventRecord(t_end, stream);
Expand Down
16 changes: 16 additions & 0 deletions src/runtime/cuda_helper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,10 @@ __host__ void
host_ptr, ptr, sizeof(float) * num_elements, cudaMemcpyDeviceToHost));
FILE *tensor_file;
tensor_file = fopen(file_name, "w");
if (!tensor_file) {
fprintf(stderr, "Error %i creating file %s\n", errno, file_name);
assert(false);
}
assert(tensor_file != NULL);
for (unsigned i = 0; i < num_elements; i++) {
if (i < num_elements - 1) {
Expand All @@ -299,6 +303,10 @@ __host__ void
host_ptr, ptr, sizeof(half) * num_elements, cudaMemcpyDeviceToHost));
FILE *tensor_file;
tensor_file = fopen(file_name, "w");
if (!tensor_file) {
fprintf(stderr, "Error %i creating file %s\n", errno, file_name);
assert(false);
}
assert(tensor_file != NULL);
for (unsigned i = 0; i < num_elements; i++) {
if (i < num_elements - 1) {
Expand All @@ -321,6 +329,10 @@ __host__ void save_tensor(int32_t const *ptr,
host_ptr, ptr, sizeof(int32_t) * num_elements, cudaMemcpyDeviceToHost));
FILE *tensor_file;
tensor_file = fopen(file_name, "w");
if (!tensor_file) {
fprintf(stderr, "Error %i creating file %s\n", errno, file_name);
assert(false);
}
assert(tensor_file != NULL);
for (unsigned i = 0; i < num_elements; i++) {
if (i < num_elements - 1) {
Expand All @@ -343,6 +355,10 @@ __host__ void save_tensor(int64_t const *ptr,
host_ptr, ptr, sizeof(int64_t) * num_elements, cudaMemcpyDeviceToHost));
FILE *tensor_file;
tensor_file = fopen(file_name, "w");
if (!tensor_file) {
fprintf(stderr, "Error %i creating file %s\n", errno, file_name);
assert(false);
}
assert(tensor_file != NULL);
for (unsigned i = 0; i < num_elements; i++) {
if (i < num_elements - 1) {
Expand Down

0 comments on commit d7deb89

Please sign in to comment.