Skip to content

Commit

Permalink
linting
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-goliaro committed Oct 22, 2024
1 parent 2dab7cb commit 92199d0
Show file tree
Hide file tree
Showing 12 changed files with 179 additions and 166 deletions.
3 changes: 2 additions & 1 deletion include/flexflow/inference.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ struct GenerationRequest {
double emission_time_ms_,
bool add_special_tokens_ = true)
: prompt(prompt_), slo_ratio(slo_ratio_),
emission_time_ms(emission_time_ms_), add_special_tokens(add_special_tokens_) {}
emission_time_ms(emission_time_ms_),
add_special_tokens(add_special_tokens_) {}
};

struct GenerationResult {
Expand Down
79 changes: 40 additions & 39 deletions include/flexflow/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -765,45 +765,46 @@ class FFModel {
bool qk_prod_scaling = true,
bool position_bias = false,
char const *name = NULL);
Tensor groupquery_self_attention(Tensor const input,
int embed_dim,
int num_q_heads,
int num_kv_heads,
int kdim = 0,
int vdim = 0,
float dropout = 0.0f,
bool bias = false,
bool add_bias_kv = false,
bool add_zero_attn = false,
DataType data_type = DT_NONE,
Initializer *kernel_initializer = NULL,
RotaryEmbeddingMeta rotary_embedding_meta = RotaryEmbeddingMeta(),
bool scaling_query = false,
float scaling_factor = 1.0f,
bool qk_prod_scaling = true,
bool position_bias = false,
bool streaming_cache = false,
char const *name = NULL);
Tensor
spec_inc_multiquery_self_attention(Tensor const input,
int embed_dim,
int num_q_heads,
int num_kv_heads,
int kdim = 0,
int vdim = 0,
float dropout = 0.0f,
bool bias = false,
bool add_bias_kv = false,
bool add_zero_attn = false,
DataType data_type = DT_NONE,
Initializer *kernel_initializer = NULL,
RotaryEmbeddingMeta rotary_embedding_meta = RotaryEmbeddingMeta(),
bool scaling_query = false,
float scaling_factor = 1.0f,
bool qk_prod_scaling = true,
bool position_bias = false,
bool streaming_cache = false,
char const *name = NULL);
Tensor groupquery_self_attention(
Tensor const input,
int embed_dim,
int num_q_heads,
int num_kv_heads,
int kdim = 0,
int vdim = 0,
float dropout = 0.0f,
bool bias = false,
bool add_bias_kv = false,
bool add_zero_attn = false,
DataType data_type = DT_NONE,
Initializer *kernel_initializer = NULL,
RotaryEmbeddingMeta rotary_embedding_meta = RotaryEmbeddingMeta(),
bool scaling_query = false,
float scaling_factor = 1.0f,
bool qk_prod_scaling = true,
bool position_bias = false,
bool streaming_cache = false,
char const *name = NULL);
Tensor spec_inc_multiquery_self_attention(
Tensor const input,
int embed_dim,
int num_q_heads,
int num_kv_heads,
int kdim = 0,
int vdim = 0,
float dropout = 0.0f,
bool bias = false,
bool add_bias_kv = false,
bool add_zero_attn = false,
DataType data_type = DT_NONE,
Initializer *kernel_initializer = NULL,
RotaryEmbeddingMeta rotary_embedding_meta = RotaryEmbeddingMeta(),
bool scaling_query = false,
float scaling_factor = 1.0f,
bool qk_prod_scaling = true,
bool position_bias = false,
bool streaming_cache = false,
char const *name = NULL);
Tensor inc_multiquery_self_attention_verify(
Tensor const input,
int embed_dim,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ struct SpecIncMultiHeadSelfAttentionParams {
LayerID layer_guid;
int embed_dim, num_q_heads, num_kv_heads, kdim, vdim;
float dropout, scaling_factor;
bool qkv_bias, final_bias, add_zero_attn, scaling_query, qk_prod_scaling, position_bias;
bool qkv_bias, final_bias, add_zero_attn, scaling_query, qk_prod_scaling,
position_bias;
RotaryEmbeddingMeta rotary_embedding_meta;
bool streaming_cache;
char name[MAX_OPNAME];
Expand Down
10 changes: 7 additions & 3 deletions inference/incr_decoding/incr_decoding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -398,8 +398,11 @@ void FlexFlow::top_level_task(Task const *task,
assert(false);
}
for (size_t i = 1; i < prompt_json.size(); ++i) {
requests.push_back(GenerationRequest(
prompt_json[i]["prompt"].get<std::string>(), -1.0, 0, add_special_tokens));
requests.push_back(
GenerationRequest(prompt_json[i]["prompt"].get<std::string>(),
-1.0,
0,
add_special_tokens));
}
PoissonEmissionMachine emission_machine(request_per_second, slo_ratios);
// ConstantEmissionMachine emission_machine(-1, slo_ratios);
Expand All @@ -414,7 +417,8 @@ void FlexFlow::top_level_task(Task const *task,
std::vector<double> timestamps, ratios;
for (auto const &json_obj : trace_json) {
EmissionTrace trace(json_obj);
requests.push_back(GenerationRequest(trace.prompt, -1.0, 0, add_special_tokens));
requests.push_back(
GenerationRequest(trace.prompt, -1.0, 0, add_special_tokens));
timestamps.push_back(trace.emission_time_ms);
ratios.push_back(trace.slo_ratio);
}
Expand Down
28 changes: 14 additions & 14 deletions inference/models/falcon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,11 @@ void FALCON::create_falcon_model(FFModel &ff,
DT_NONE, /*data_type*/
NULL, /*kernel_initializer*/
falcon_config.rotary_embedding_meta,
false, /*scaling query*/
1.0f, /*scaling factor*/
true, /*qk_prod_scaling*/
false, /*position_bias*/
false, /*streaming_cache*/
false, /*scaling query*/
1.0f, /*scaling factor*/
true, /*qk_prod_scaling*/
false, /*position_bias*/
false, /*streaming_cache*/
std::string("layers_" + std::to_string(i) + "_attention")
.c_str() /*name*/
);
Expand All @@ -139,10 +139,10 @@ void FALCON::create_falcon_model(FFModel &ff,
DT_NONE, /*data_type*/
nullptr, /*kernel_initializer*/
falcon_config.rotary_embedding_meta,
false, /*scaling query*/
1.0f, /*scaling factor*/
true, /*qk_prod_scaling*/
false, /*position_bias*/
false, /*scaling query*/
1.0f, /*scaling factor*/
true, /*qk_prod_scaling*/
false, /*position_bias*/
std::string("layers_" + std::to_string(i) + "_attention")
.c_str() /*name*/
);
Expand All @@ -164,11 +164,11 @@ void FALCON::create_falcon_model(FFModel &ff,
DT_NONE, /*data_type*/
nullptr, /*kernel_initializer*/
falcon_config.rotary_embedding_meta,
false, /*scaling query*/
1.0f, /*scaling factor*/
true, /*qk_prod_scaling*/
false, /*position_bias*/
false, /*streaming_cache*/
false, /*scaling query*/
1.0f, /*scaling factor*/
true, /*qk_prod_scaling*/
false, /*position_bias*/
false, /*streaming_cache*/
std::string("layers_" + std::to_string(i) + "_attention")
.c_str() /*name*/
);
Expand Down
28 changes: 14 additions & 14 deletions inference/models/llama.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,10 @@ void LLAMA::create_llama_model(FFModel &ff,
DT_NONE, /*data_type*/
NULL, /*kernel_initializer*/
llama_config.rotary_embedding_meta,
false, /*scaling query*/
1.0f, /*scaling factor*/
true, /*qk_prod_scaling*/
false, /*position_bias*/
false, /*scaling query*/
1.0f, /*scaling factor*/
true, /*qk_prod_scaling*/
false, /*position_bias*/
streaming_cache,
std::string("layers_" + std::to_string(i) + "_attention")
.c_str() /*name*/
Expand All @@ -135,10 +135,10 @@ void LLAMA::create_llama_model(FFModel &ff,
DT_NONE, /*data_type*/
nullptr, /*kernel_initializer*/
llama_config.rotary_embedding_meta,
false, /*scaling query*/
1.0f, /*scaling factor*/
true, /*qk_prod_scaling*/
false, /*position_bias*/
false, /*scaling query*/
1.0f, /*scaling factor*/
true, /*qk_prod_scaling*/
false, /*position_bias*/
std::string("layers_" + std::to_string(i) + "_attention")
.c_str() /*name*/
);
Expand All @@ -152,12 +152,12 @@ void LLAMA::create_llama_model(FFModel &ff,
llama_config.num_key_value_heads,
llama_config.hidden_size / llama_config.num_attention_heads,
llama_config.hidden_size / llama_config.num_attention_heads,
0.0f, /*dropout*/
false, /*qkv_bias*/
false, /*final_bias*/
false, /*add_zero_attn*/
DT_NONE, /*data_type*/
nullptr, /*kernel_initializer*/
0.0f, /*dropout*/
false, /*qkv_bias*/
false, /*final_bias*/
false, /*add_zero_attn*/
DT_NONE, /*data_type*/
nullptr, /*kernel_initializer*/
llama_config.rotary_embedding_meta,
false, /*scaling query*/
1.0f, /*scaling factor*/
Expand Down
22 changes: 11 additions & 11 deletions inference/models/starcoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,18 +114,18 @@ void STARCODER::create_starcoder_model(
startcoder_config.num_attention_heads,
startcoder_config.hidden_size /
startcoder_config.num_attention_heads,
startcoder_config.dropout_p, /*dropout*/
true, /*bias*/
false, /*add_bias_kv*/
false, /*add_zero_attn*/
DT_NONE, /*data_type*/
nullptr, /*kernel_initializer*/
startcoder_config.dropout_p, /*dropout*/
true, /*bias*/
false, /*add_bias_kv*/
false, /*add_zero_attn*/
DT_NONE, /*data_type*/
nullptr, /*kernel_initializer*/
startcoder_config.rotary_embedding_meta, /*apply_rotary_embedding*/
false, /*scaling query*/
1.0f, /*scaling factor*/
true, /*qk_prod_scaling*/
false, /*position_bias*/
false, /*streaming_cache*/
false, /*scaling query*/
1.0f, /*scaling factor*/
true, /*qk_prod_scaling*/
false, /*position_bias*/
false, /*streaming_cache*/
std::string("layers_" + std::to_string(i) + "_attention")
.c_str() /*name*/
);
Expand Down
10 changes: 7 additions & 3 deletions inference/spec_infer/spec_infer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -604,8 +604,11 @@ void FlexFlow::top_level_task(Task const *task,
assert(false);
}
for (size_t i = 1; i < prompt_json.size(); ++i) {
requests.push_back(GenerationRequest(
prompt_json[i]["prompt"].get<std::string>(), -1.0, 0, add_special_tokens));
requests.push_back(
GenerationRequest(prompt_json[i]["prompt"].get<std::string>(),
-1.0,
0,
add_special_tokens));
}
PoissonEmissionMachine emission_machine(request_per_second, slo_ratios);
// ConstantEmissionMachine emission_machine(-1, slo_ratios);
Expand All @@ -620,7 +623,8 @@ void FlexFlow::top_level_task(Task const *task,
std::vector<double> timestamps, ratios;
for (auto const &json_obj : trace_json) {
EmissionTrace trace(json_obj);
requests.push_back(GenerationRequest(trace.prompt, -1.0, 0, add_special_tokens));
requests.push_back(
GenerationRequest(trace.prompt, -1.0, 0, add_special_tokens));
timestamps.push_back(trace.emission_time_ms);
ratios.push_back(trace.slo_ratio);
}
Expand Down
76 changes: 39 additions & 37 deletions src/ops/inc_multihead_self_attention.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,24 +54,25 @@ bool IncMultiHeadSelfAttentionParams::is_valid(
return is_valid;
}

Tensor FFModel::inc_multihead_self_attention(const Tensor input,
int embed_dim,
int num_heads,
int kdim,
int vdim,
float dropout,
bool qkv_bias,
bool final_bias,
bool add_zero_attn,
DataType data_type,
Initializer *kernel_initializer,
RotaryEmbeddingMeta rotary_embedding_meta,
bool scaling_query,
float scaling_factor,
bool qk_prod_scaling,
bool position_bias,
bool streaming_cache,
char const *name) {
Tensor FFModel::inc_multihead_self_attention(
const Tensor input,
int embed_dim,
int num_heads,
int kdim,
int vdim,
float dropout,
bool qkv_bias,
bool final_bias,
bool add_zero_attn,
DataType data_type,
Initializer *kernel_initializer,
RotaryEmbeddingMeta rotary_embedding_meta,
bool scaling_query,
float scaling_factor,
bool qk_prod_scaling,
bool position_bias,
bool streaming_cache,
char const *name) {
return groupquery_self_attention(input,
embed_dim,
num_heads,
Expand All @@ -93,25 +94,26 @@ Tensor FFModel::inc_multihead_self_attention(const Tensor input,
name);
}

Tensor FFModel::groupquery_self_attention(const Tensor input,
int embed_dim,
int num_q_heads,
int num_kv_heads,
int kdim,
int vdim,
float dropout,
bool qkv_bias,
bool final_bias,
bool add_zero_attn,
DataType data_type,
Initializer *kernel_initializer,
RotaryEmbeddingMeta rotary_embedding_meta,
bool scaling_query,
float scaling_factor,
bool qk_prod_scaling,
bool position_bias,
bool streaming_cache,
char const *name) {
Tensor FFModel::groupquery_self_attention(
const Tensor input,
int embed_dim,
int num_q_heads,
int num_kv_heads,
int kdim,
int vdim,
float dropout,
bool qkv_bias,
bool final_bias,
bool add_zero_attn,
DataType data_type,
Initializer *kernel_initializer,
RotaryEmbeddingMeta rotary_embedding_meta,
bool scaling_query,
float scaling_factor,
bool qk_prod_scaling,
bool position_bias,
bool streaming_cache,
char const *name) {
if (data_type == DT_NONE) {
data_type = input->data_type;
}
Expand Down
Loading

0 comments on commit 92199d0

Please sign in to comment.