Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
xinhaoc committed Oct 29, 2023
1 parent 02e4fad commit 90caa1a
Showing 1 changed file with 19 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,25 @@ namespace FlexFlow {
namespace Kernels {
namespace IncMultiHeadAttention {

template <typename DT,
int THREADS_PER_BLOCK,
int Dh,
int Dh_MAX,
int THREADS_PER_KEY,
int THREADS_PER_VALUE>
__global__ void compute_attention_kernel_generation_kernel(
DT const *query,
DT const *key_cache,
DT const *value_cache,
DT *output_ptr,
float const scale,
int max_seq_length,
int per_head_size,
int hidden_size,
BatchConfig::PerRequestInfo *request_infos,
bool is_beam,
int max_beam_width);

template <typename DT>
__global__ void apply_position_bias_qkprd(DT *input_ptr,
int num_tokens,
Expand Down

0 comments on commit 90caa1a

Please sign in to comment.