Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
黄宇扬 committed Aug 8, 2024
1 parent aa577f7 commit 28c6fdc
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/devices/cuda/fastllm-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,7 @@ __global__ void FastllmLayerNormKernelTop1(float *input, float *output, int chan
float *inputData = input + blockIdx.x * channels;
float *outputData = output + blockIdx.x * 2;
int tid = threadIdx.x;
idData[tid] = tid;
maxData[tid] = -1e100;
for (int j = tid; j < channels; j += THREAD_PER_BLOCK) {
if (inputData[j] > maxData[tid]) {
Expand Down Expand Up @@ -1134,6 +1135,7 @@ __global__ void FastllmLayerNormKernelTopK(float *input, float *output, int K, i
float *inputData = input + blockIdx.x * channels;
float *outputData = output + blockIdx.x * 2 * K;
int tid = threadIdx.x;
idData[tid][0] = tid;
for (int i = 0; i < K; i++) {
maxData[tid][i] = -1e100;
}
Expand Down

0 comments on commit 28c6fdc

Please sign in to comment.