Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Attention projections (QKV, O) disaggregation #1436

Merged
merged 28 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,6 @@ lora_training_logs
Untitled-1.ipynb
Untitled-2.ipynb
tests/inference/python_test_configs/*.json

core.*
fine_grained_alignment_config.json
6 changes: 0 additions & 6 deletions examples/python/native/ops/inc_multihead_self_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ def test_inc_multihead_self_attention(
kdim: int = 0,
vdim: int = 0,
dropout: float = 0.0,
bias: bool = True,
add_bias_kv: bool = False,
add_zero_attn: bool = False,
data_type: DataType = DataType.DT_NONE,
kernel_initializer=None,
Expand All @@ -34,8 +32,6 @@ def test_inc_multihead_self_attention(
kdim=kdim,
vdim=vdim,
dropout=dropout,
bias=bias,
add_bias_kv=add_bias_kv,
add_zero_attn=add_zero_attn,
data_type=data_type,
kernel_initializer=kernel_initializer,
Expand Down Expand Up @@ -85,8 +81,6 @@ def test_inc_multihead_self_attention(
kdim=0, # Example value for kdim
vdim=0, # Example value for vdim
dropout=0.1, # Example value for dropout
bias=True,
add_bias_kv=False,
add_zero_attn=False,
data_type=DataType.DT_FLOAT,
kernel_initializer=None, # Example value for kernel_initializer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ def test_inc_multihead_self_attention_verify(
kdim: int = 0,
vdim: int = 0,
dropout: float = 0.0,
bias: bool = True,
add_bias_kv: bool = False,
add_zero_attn: bool = False,
data_type: DataType = DataType.DT_NONE,
kernel_initializer=None,
Expand All @@ -34,8 +32,6 @@ def test_inc_multihead_self_attention_verify(
kdim=kdim,
vdim=vdim,
dropout=dropout,
bias=bias,
add_bias_kv=add_bias_kv,
add_zero_attn=add_zero_attn,
data_type=data_type,
kernel_initializer=kernel_initializer,
Expand Down Expand Up @@ -85,8 +81,6 @@ def test_inc_multihead_self_attention_verify(
kdim=0, # Example value for kdim
vdim=0, # Example value for vdim
dropout=0.1, # Example value for dropout
bias=True,
add_bias_kv=False,
add_zero_attn=False,
data_type=DataType.DT_FLOAT,
kernel_initializer=None, # Example value for kernel_initializer
Expand Down
6 changes: 0 additions & 6 deletions examples/python/native/ops/inc_multiquery_self_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ def test_inc_multiquery_self_attention(
kdim: int = 0,
vdim: int = 0,
dropout: float = 0.0,
bias: bool = True,
add_bias_kv: bool = False,
add_zero_attn: bool = False,
data_type: DataType = DataType.DT_NONE,
kernel_initializer=None,
Expand All @@ -36,8 +34,6 @@ def test_inc_multiquery_self_attention(
kdim=kdim,
vdim=vdim,
dropout=dropout,
bias=bias,
add_bias_kv=add_bias_kv,
add_zero_attn=add_zero_attn,
data_type=data_type,
kernel_initializer=kernel_initializer,
Expand Down Expand Up @@ -89,8 +85,6 @@ def test_inc_multiquery_self_attention(
kdim=0, # Example value for kdim
vdim=0, # Example value for vdim
dropout=0.1, # Example value for dropout
bias=True,
add_bias_kv=False,
add_zero_attn=False,
data_type=DataType.DT_FLOAT,
kernel_initializer=None, # Example value for kernel_initializer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ def test_inc_multiquery_self_attention_verify(
kdim: int = 0,
vdim: int = 0,
dropout: float = 0.0,
bias: bool = True,
add_bias_kv: bool = False,
add_zero_attn: bool = False,
data_type: DataType = DataType.DT_NONE,
kernel_initializer=None,
Expand All @@ -36,8 +34,6 @@ def test_inc_multiquery_self_attention_verify(
kdim=kdim,
vdim=vdim,
dropout=dropout,
bias=bias,
add_bias_kv=add_bias_kv,
add_zero_attn=add_zero_attn,
data_type=data_type,
kernel_initializer=kernel_initializer,
Expand Down Expand Up @@ -89,8 +85,6 @@ def test_inc_multiquery_self_attention_verify(
kdim=0, # Example value for kdim
vdim=0, # Example value for vdim
dropout=0.1, # Example value for dropout
bias=True,
add_bias_kv=False,
add_zero_attn=False,
data_type=DataType.DT_FLOAT,
kernel_initializer=None, # Example value for kernel_initializer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ def test_spec_inc_multihead_self_attention(
kdim: int = 0,
vdim: int = 0,
dropout: float = 0.0,
bias: bool = True,
add_bias_kv: bool = False,
add_zero_attn: bool = False,
data_type: DataType = DataType.DT_NONE,
kernel_initializer=None,
Expand All @@ -34,8 +32,6 @@ def test_spec_inc_multihead_self_attention(
kdim=kdim,
vdim=vdim,
dropout=dropout,
bias=bias,
add_bias_kv=add_bias_kv,
add_zero_attn=add_zero_attn,
data_type=data_type,
kernel_initializer=kernel_initializer,
Expand Down Expand Up @@ -85,8 +81,6 @@ def test_spec_inc_multihead_self_attention(
kdim=0, # Example value for kdim
vdim=0, # Example value for vdim
dropout=0.1, # Example value for dropout
bias=True,
add_bias_kv=False,
add_zero_attn=False,
data_type=DataType.DT_FLOAT,
kernel_initializer=None, # Example value for kernel_initializer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ def test_spec_inc_multiquery_self_attention(
kdim: int = 0,
vdim: int = 0,
dropout: float = 0.0,
bias: bool = True,
add_bias_kv: bool = False,
add_zero_attn: bool = False,
data_type: DataType = DataType.DT_NONE,
kernel_initializer=None,
Expand All @@ -36,8 +34,6 @@ def test_spec_inc_multiquery_self_attention(
kdim=kdim,
vdim=vdim,
dropout=dropout,
bias=bias,
add_bias_kv=add_bias_kv,
add_zero_attn=add_zero_attn,
data_type=data_type,
kernel_initializer=kernel_initializer,
Expand Down Expand Up @@ -89,8 +85,6 @@ def test_spec_inc_multiquery_self_attention(
kdim=0, # Example value for kdim
vdim=0, # Example value for vdim
dropout=0.1, # Example value for dropout
bias=True,
add_bias_kv=False,
add_zero_attn=False,
data_type=DataType.DT_FLOAT,
kernel_initializer=None, # Example value for kernel_initializer
Expand Down
48 changes: 36 additions & 12 deletions include/flexflow/flexflow_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -445,12 +445,16 @@ flexflow_tensor_t flexflow_model_add_inc_multihead_self_attention(
int kdim,
int vdim,
float dropout,
bool bias,
bool add_bias_kv,
bool add_zero_attn,
enum DataType data_type,
flexflow_initializer_t kernel_initializer_,
bool apply_rotary_embedding,
float rope_theta,
char const *rope_type,
float rope_factor,
float low_freq_factor,
float high_freq_factor,
int original_max_position_embeddings,
bool scaling_query,
float scaling_factor,
bool qk_prod_scaling,
Expand All @@ -465,12 +469,16 @@ flexflow_tensor_t flexflow_model_add_spec_inc_multihead_self_attention(
int kdim,
int vdim,
float dropout,
bool bias,
bool add_bias_kv,
bool add_zero_attn,
enum DataType data_type,
flexflow_initializer_t kernel_initializer_,
bool apply_rotary_embedding,
float rope_theta,
char const *rope_type,
float rope_factor,
float low_freq_factor,
float high_freq_factor,
int original_max_position_embeddings,
bool scaling_query,
float scaling_factor,
bool qk_prod_scaling,
Expand All @@ -485,12 +493,16 @@ flexflow_tensor_t flexflow_model_add_inc_multihead_self_attention_verify(
int kdim,
int vdim,
float dropout,
bool bias,
bool add_bias_kv,
bool add_zero_attn,
enum DataType data_type,
flexflow_initializer_t kernel_initializer_,
bool apply_rotary_embedding,
float rope_theta,
char const *rope_type,
float rope_factor,
float low_freq_factor,
float high_freq_factor,
int original_max_position_embeddings,
bool scaling_query,
float scaling_factor,
bool qk_prod_scaling,
Expand All @@ -506,12 +518,16 @@ flexflow_tensor_t flexflow_model_add_inc_multiquery_self_attention(
int kdim,
int vdim,
float dropout,
bool bias,
bool add_bias_kv,
bool add_zero_attn,
enum DataType data_type,
flexflow_initializer_t kernel_initializer_,
bool apply_rotary_embedding,
float rope_theta,
char const *rope_type,
float rope_factor,
float low_freq_factor,
float high_freq_factor,
int original_max_position_embeddings,
bool scaling_query,
float scaling_factor,
bool qk_prod_scaling,
Expand All @@ -527,12 +543,16 @@ flexflow_tensor_t flexflow_model_add_spec_inc_multiquery_self_attention(
int kdim,
int vdim,
float dropout,
bool bias,
bool add_bias_kv,
bool add_zero_attn,
enum DataType data_type,
flexflow_initializer_t kernel_initializer_,
bool apply_rotary_embedding,
float rope_theta,
char const *rope_type,
float rope_factor,
float low_freq_factor,
float high_freq_factor,
int original_max_position_embeddings,
bool scaling_query,
float scaling_factor,
bool qk_prod_scaling,
Expand All @@ -548,12 +568,16 @@ flexflow_tensor_t flexflow_model_add_inc_multiquery_self_attention_verify(
int kdim,
int vdim,
float dropout,
bool bias,
bool add_bias_kv,
bool add_zero_attn,
enum DataType data_type,
flexflow_initializer_t kernel_initializer_,
bool apply_rotary_embedding,
float rope_theta,
char const *rope_type,
float rope_factor,
float low_freq_factor,
float high_freq_factor,
int original_max_position_embeddings,
bool scaling_query,
float scaling_factor,
bool qk_prod_scaling,
Expand Down
39 changes: 37 additions & 2 deletions include/flexflow/inference.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,43 @@ struct GenerationResult {
std::vector<float> finetuning_losses;
};

#include <string>
#include <vector>
struct RotaryEmbeddingMeta {
bool apply_rotary_embedding = false;
float rope_theta = 10000.0f;
std::string rope_type = "default";
float factor = 8.0f;
float low_freq_factor = 1.0f;
float high_freq_factor = 4.0f;
int original_max_position_embeddings = 8192;

RotaryEmbeddingMeta(bool apply_rotary_embedding_ = false,
float rope_theta_ = 10000.0f,
std::string rope_type_ = "default",
float factor_ = 8.0f,
float low_freq_factor_ = 1.0f,
float high_freq_factor_ = 4.0f,
int original_max_position_embeddings_ = 8192)
: apply_rotary_embedding(apply_rotary_embedding_),
rope_theta(rope_theta_), rope_type(rope_type_), factor(factor_),
low_freq_factor(low_freq_factor_), high_freq_factor(high_freq_factor_),
original_max_position_embeddings(original_max_position_embeddings_) {}

friend std::ostream &operator<<(std::ostream &os,
RotaryEmbeddingMeta const &meta) {
os << std::boolalpha // To print bool as true/false instead of 1/0
<< "RotaryEmbeddingMeta {\n"
<< " apply_rotary_embedding: " << meta.apply_rotary_embedding << ",\n"
<< " rope_theta: " << meta.rope_theta << ",\n"
<< " rope_type: \"" << meta.rope_type << "\",\n"
<< " factor: " << meta.factor << ",\n"
<< " low_freq_factor: " << meta.low_freq_factor << ",\n"
<< " high_freq_factor: " << meta.high_freq_factor << ",\n"
<< " original_max_position_embeddings: "
<< meta.original_max_position_embeddings << "\n"
<< "}";
return os;
}
};

std::string join_path(std::vector<std::string> const &paths);

Expand Down
3 changes: 3 additions & 0 deletions include/flexflow/layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ class Layer {
void add_float_property(std::string const &key, float value);
void add_int_vector_property(std::string const &key,
std::vector<int> const &value);
void add_string_property(std::string const &key, std::string const &value);
void add_initializer(std::string const &key, Initializer *initializer);
bool get_int_property(std::string const &key, long long &value) const;
bool get_float_property(std::string const &key, float &value) const;
bool get_int_vector_property(std::string const &key,
std::vector<int> &value) const;
bool get_string_property(std::string const &key, std::string &value) const;
bool get_initializer(std::string const &key, Initializer *&initializer) const;
Tensor get_parameter(int index);
void print();
Expand All @@ -59,6 +61,7 @@ class Layer {
std::unordered_map<std::string, float> float_properties;
std::unordered_map<std::string, Initializer *> initializers;
std::unordered_map<std::string, std::vector<int>> int_vector_properties;
std::unordered_map<std::string, std::string> string_properties;
};

}; // namespace FlexFlow
Loading
Loading