Skip to content

Commit

Permalink
Add experimental CUDA support for StarCoder, StarChat models
Browse files Browse the repository at this point in the history
Add `gpt_bigcode` as model type for StarCoder, StarChat models
  • Loading branch information
marella committed Aug 12, 2023
1 parent bb4a5ce commit 7fc3407
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 9 deletions.
2 changes: 1 addition & 1 deletion models/llm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ LLM* ctransformers_llm_create(const char* model_path, const char* model_type,
llm = new mpt_llm;
} else if (type == "replit") {
llm = new replit_llm;
} else if (type == "starcoder") {
} else if (type == "starcoder" || type == "gptbigcode") {
llm = new starcoder_llm;
}

Expand Down
49 changes: 41 additions & 8 deletions models/llms/starcoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ struct starcoder_model {

// load the model's weights from a file
bool starcoder_model_load(const std::string &fname, starcoder_model &model,
gpt_vocab &vocab) {
gpt_vocab &vocab, const int gpu_layers) {
auto fin = std::ifstream(fname, std::ios::binary);
if (!fin) {
fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
Expand Down Expand Up @@ -251,6 +251,7 @@ bool starcoder_model_load(const std::string &fname, starcoder_model &model,

for (int i = 0; i < n_layer; ++i) {
auto &layer = model.layers[i];
const bool gpu = i >= (int)n_layer - gpu_layers;

layer.ln_1_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
layer.ln_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
Expand All @@ -259,18 +260,18 @@ bool starcoder_model_load(const std::string &fname, starcoder_model &model,
layer.ln_2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);

layer.c_attn_attn_w =
ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd + 2 * kv_dim);
ct_new_tensor(ctx, wtype, n_embd, n_embd + 2 * kv_dim, gpu);
layer.c_attn_attn_b =
ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd + 2 * kv_dim);

layer.c_attn_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd);
layer.c_attn_proj_w = ct_new_tensor(ctx, wtype, n_embd, n_embd, gpu);
layer.c_attn_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);

layer.c_mlp_fc_w = ggml_new_tensor_2d(
ctx, wtype, n_embd, 4 * n_embd); // TODO: 4*n_embd = config.n_inner
layer.c_mlp_fc_w = ct_new_tensor(ctx, wtype, n_embd, 4 * n_embd,
gpu); // TODO: 4*n_embd = config.n_inner
layer.c_mlp_fc_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4 * n_embd);

layer.c_mlp_proj_w = ggml_new_tensor_2d(ctx, wtype, 4 * n_embd, n_embd);
layer.c_mlp_proj_w = ct_new_tensor(ctx, wtype, 4 * n_embd, n_embd, gpu);
layer.c_mlp_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);

// map by name
Expand Down Expand Up @@ -390,7 +391,9 @@ bool starcoder_model_load(const std::string &fname, starcoder_model &model,
return false;
}

fin.read(reinterpret_cast<char *>(tensor->data), ggml_nbytes(tensor));
uint8_t *data = ct_alloc(tensor);
fin.read(reinterpret_cast<char *>(data), ggml_nbytes(tensor));
ct_transform(data, tensor);

// GPT-2 models share the WTE tensor as the LM head
if (name == "model/wte" && has_lm_head == false) {
Expand Down Expand Up @@ -762,4 +765,34 @@ bool starcoder_eval(const starcoder_model &model, const int n_threads,
return true;
}

REGISTER_LLM(starcoder);
class starcoder_llm : public LLM {
public:
virtual ~starcoder_llm() {
ct_free(model_.tensors);
if (model_.ctx != nullptr) {
ggml_free(model_.ctx);
}
}

protected:
bool Load(const std::string &filename, const int context_length,
const int gpu_layers) override {
if (context_length > 0) {
model_.hparams.n_ctx = context_length;
}
if (!starcoder_model_load(filename, model_, vocab_, gpu_layers)) {
return false;
}
n_ctx_ = model_.hparams.n_ctx;
return true;
}

bool Eval(const std::vector<gpt_vocab::id> &tokens, const int threads,
const int n_past) override {
return starcoder_eval(model_, threads, n_past, tokens, logits_,
mem_per_token_);
}

private:
starcoder_model model_;
};

0 comments on commit 7fc3407

Please sign in to comment.