Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
fix the sync issue
Browse files Browse the repository at this point in the history
  • Loading branch information
ThanatosShinji committed Jun 5, 2024
1 parent 08b77f1 commit 87c5823
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 82 deletions.
110 changes: 30 additions & 80 deletions neural_speed/core/ne_layers.c
Original file line number Diff line number Diff line change
Expand Up @@ -1507,13 +1507,9 @@ struct ne_tensor* ne_debug_op(struct ne_context* ctx, struct ne_tensor* a, ne_de
return result;
}

struct ne_tensor* ne_dup(struct ne_context* ctx, struct ne_tensor* a) {
return ne_dup_impl(ctx, a, false);
}
struct ne_tensor* ne_dup(struct ne_context* ctx, struct ne_tensor* a) { return ne_dup_impl(ctx, a, false); }

struct ne_tensor* ne_dup_inplace(struct ne_context* ctx, struct ne_tensor* a) {
return ne_dup_impl(ctx, a, true);
}
struct ne_tensor* ne_dup_inplace(struct ne_context* ctx, struct ne_tensor* a) { return ne_dup_impl(ctx, a, true); }

// ne_add

Expand Down Expand Up @@ -1872,13 +1868,9 @@ struct ne_tensor* ne_sqr_impl(struct ne_context* ctx, struct ne_tensor* a, bool
return result;
}

struct ne_tensor* ne_sqr(struct ne_context* ctx, struct ne_tensor* a) {
return ne_sqr_impl(ctx, a, false);
}
struct ne_tensor* ne_sqr(struct ne_context* ctx, struct ne_tensor* a) { return ne_sqr_impl(ctx, a, false); }

struct ne_tensor* ne_sqr_inplace(struct ne_context* ctx, struct ne_tensor* a) {
return ne_sqr_impl(ctx, a, true);
}
struct ne_tensor* ne_sqr_inplace(struct ne_context* ctx, struct ne_tensor* a) { return ne_sqr_impl(ctx, a, true); }

// ne_sqrt

Expand All @@ -1898,13 +1890,9 @@ struct ne_tensor* ne_sqrt_impl(struct ne_context* ctx, struct ne_tensor* a, bool
return result;
}

struct ne_tensor* ne_sqrt(struct ne_context* ctx, struct ne_tensor* a) {
return ne_sqrt_impl(ctx, a, false);
}
struct ne_tensor* ne_sqrt(struct ne_context* ctx, struct ne_tensor* a) { return ne_sqrt_impl(ctx, a, false); }

struct ne_tensor* ne_sqrt_inplace(struct ne_context* ctx, struct ne_tensor* a) {
return ne_sqrt_impl(ctx, a, true);
}
struct ne_tensor* ne_sqrt_inplace(struct ne_context* ctx, struct ne_tensor* a) { return ne_sqrt_impl(ctx, a, true); }

// ne_log

Expand All @@ -1924,13 +1912,9 @@ struct ne_tensor* ne_log_impl(struct ne_context* ctx, struct ne_tensor* a, bool
return result;
}

struct ne_tensor* ne_log(struct ne_context* ctx, struct ne_tensor* a) {
return ne_log_impl(ctx, a, false);
}
struct ne_tensor* ne_log(struct ne_context* ctx, struct ne_tensor* a) { return ne_log_impl(ctx, a, false); }

struct ne_tensor* ne_log_inplace(struct ne_context* ctx, struct ne_tensor* a) {
return ne_log_impl(ctx, a, true);
}
struct ne_tensor* ne_log_inplace(struct ne_context* ctx, struct ne_tensor* a) { return ne_log_impl(ctx, a, true); }

// ne_sum

Expand Down Expand Up @@ -2035,13 +2019,9 @@ struct ne_tensor* ne_abs_impl(struct ne_context* ctx, struct ne_tensor* a, bool
return result;
}

struct ne_tensor* ne_abs(struct ne_context* ctx, struct ne_tensor* a) {
return ne_abs_impl(ctx, a, false);
}
struct ne_tensor* ne_abs(struct ne_context* ctx, struct ne_tensor* a) { return ne_abs_impl(ctx, a, false); }

struct ne_tensor* ne_abs_inplace(struct ne_context* ctx, struct ne_tensor* a) {
return ne_abs_impl(ctx, a, true);
}
struct ne_tensor* ne_abs_inplace(struct ne_context* ctx, struct ne_tensor* a) { return ne_abs_impl(ctx, a, true); }

// ne_sgn

Expand All @@ -2061,13 +2041,9 @@ struct ne_tensor* ne_sgn_impl(struct ne_context* ctx, struct ne_tensor* a, bool
return result;
}

struct ne_tensor* ne_sgn(struct ne_context* ctx, struct ne_tensor* a) {
return ne_sgn_impl(ctx, a, false);
}
struct ne_tensor* ne_sgn(struct ne_context* ctx, struct ne_tensor* a) { return ne_sgn_impl(ctx, a, false); }

struct ne_tensor* ne_sgn_inplace(struct ne_context* ctx, struct ne_tensor* a) {
return ne_sgn_impl(ctx, a, true);
}
struct ne_tensor* ne_sgn_inplace(struct ne_context* ctx, struct ne_tensor* a) { return ne_sgn_impl(ctx, a, true); }

// ne_neg

Expand All @@ -2087,13 +2063,9 @@ struct ne_tensor* ne_neg_impl(struct ne_context* ctx, struct ne_tensor* a, bool
return result;
}

struct ne_tensor* ne_neg(struct ne_context* ctx, struct ne_tensor* a) {
return ne_neg_impl(ctx, a, false);
}
struct ne_tensor* ne_neg(struct ne_context* ctx, struct ne_tensor* a) { return ne_neg_impl(ctx, a, false); }

struct ne_tensor* ne_neg_inplace(struct ne_context* ctx, struct ne_tensor* a) {
return ne_neg_impl(ctx, a, true);
}
struct ne_tensor* ne_neg_inplace(struct ne_context* ctx, struct ne_tensor* a) { return ne_neg_impl(ctx, a, true); }

// ne_step

Expand All @@ -2113,13 +2085,9 @@ struct ne_tensor* ne_step_impl(struct ne_context* ctx, struct ne_tensor* a, bool
return result;
}

struct ne_tensor* ne_step(struct ne_context* ctx, struct ne_tensor* a) {
return ne_step_impl(ctx, a, false);
}
struct ne_tensor* ne_step(struct ne_context* ctx, struct ne_tensor* a) { return ne_step_impl(ctx, a, false); }

struct ne_tensor* ne_step_inplace(struct ne_context* ctx, struct ne_tensor* a) {
return ne_step_impl(ctx, a, true);
}
struct ne_tensor* ne_step_inplace(struct ne_context* ctx, struct ne_tensor* a) { return ne_step_impl(ctx, a, true); }

// ne_relu

Expand All @@ -2139,13 +2107,9 @@ struct ne_tensor* ne_relu_impl(struct ne_context* ctx, struct ne_tensor* a, bool
return result;
}

struct ne_tensor* ne_relu(struct ne_context* ctx, struct ne_tensor* a) {
return ne_relu_impl(ctx, a, false);
}
struct ne_tensor* ne_relu(struct ne_context* ctx, struct ne_tensor* a) { return ne_relu_impl(ctx, a, false); }

struct ne_tensor* ne_relu_inplace(struct ne_context* ctx, struct ne_tensor* a) {
return ne_relu_impl(ctx, a, true);
}
struct ne_tensor* ne_relu_inplace(struct ne_context* ctx, struct ne_tensor* a) { return ne_relu_impl(ctx, a, true); }

// ne_gelu

Expand All @@ -2165,13 +2129,9 @@ struct ne_tensor* ne_gelu_impl(struct ne_context* ctx, struct ne_tensor* a, bool
return result;
}

struct ne_tensor* ne_gelu(struct ne_context* ctx, struct ne_tensor* a) {
return ne_gelu_impl(ctx, a, false);
}
struct ne_tensor* ne_gelu(struct ne_context* ctx, struct ne_tensor* a) { return ne_gelu_impl(ctx, a, false); }

struct ne_tensor* ne_gelu_inplace(struct ne_context* ctx, struct ne_tensor* a) {
return ne_gelu_impl(ctx, a, true);
}
struct ne_tensor* ne_gelu_inplace(struct ne_context* ctx, struct ne_tensor* a) { return ne_gelu_impl(ctx, a, true); }

// ne_silu

Expand All @@ -2191,13 +2151,9 @@ struct ne_tensor* ne_silu_impl(struct ne_context* ctx, struct ne_tensor* a, bool
return result;
}

struct ne_tensor* ne_silu(struct ne_context* ctx, struct ne_tensor* a) {
return ne_silu_impl(ctx, a, false);
}
struct ne_tensor* ne_silu(struct ne_context* ctx, struct ne_tensor* a) { return ne_silu_impl(ctx, a, false); }

struct ne_tensor* ne_silu_inplace(struct ne_context* ctx, struct ne_tensor* a) {
return ne_silu_impl(ctx, a, true);
}
struct ne_tensor* ne_silu_inplace(struct ne_context* ctx, struct ne_tensor* a) { return ne_silu_impl(ctx, a, true); }

// ne_silu_back

Expand Down Expand Up @@ -2755,13 +2711,9 @@ struct ne_tensor* ne_cont_impl(struct ne_context* ctx, struct ne_tensor* a, bool
return result;
}

struct ne_tensor* ne_cont(struct ne_context* ctx, struct ne_tensor* a) {
return ne_cont_impl(ctx, a, false);
}
struct ne_tensor* ne_cont(struct ne_context* ctx, struct ne_tensor* a) { return ne_cont_impl(ctx, a, false); }

struct ne_tensor* ne_cont_inplace(struct ne_context* ctx, struct ne_tensor* a) {
return ne_cont_impl(ctx, a, true);
}
struct ne_tensor* ne_cont_inplace(struct ne_context* ctx, struct ne_tensor* a) { return ne_cont_impl(ctx, a, true); }

// ne_reshape

Expand Down Expand Up @@ -3297,9 +3249,7 @@ struct ne_tensor* ne_soft_max_impl(struct ne_context* ctx, struct ne_tensor* a,
return result;
}

struct ne_tensor* ne_soft_max(struct ne_context* ctx, struct ne_tensor* a) {
return ne_soft_max_impl(ctx, a, false);
}
struct ne_tensor* ne_soft_max(struct ne_context* ctx, struct ne_tensor* a) { return ne_soft_max_impl(ctx, a, false); }

struct ne_tensor* ne_soft_max_inplace(struct ne_context* ctx, struct ne_tensor* a) {
return ne_soft_max_impl(ctx, a, true);
Expand Down Expand Up @@ -7202,7 +7152,7 @@ static void ne_compute_forward_mul_mat_q_f32_bestla(const struct ne_compute_para
#ifdef NS_SYCL
if (params->ith == 0) {
if (dst->backend == NE_BACKEND_SYCL && src1->backend == NE_BACKEND_CPU) {
bestla_device_memcpy(actptr, src1->data, src1->size, params->dev_queue);
bestla_device_memcpy_sync(actptr, src1->data, src1->size, params->dev_queue);
}
}
#endif
Expand Down Expand Up @@ -7320,7 +7270,7 @@ static void ne_compute_forward_mul_mat_id_q_f32(const struct ne_compute_params*
// char * wdata_src1_end = (char *)params->wdata;
// int64_t wdata_src1_end = 0;

#define mmid_matrix_row(row_id, i1) matrix_rows[(row_id)*ne11 + (i1)]
#define mmid_matrix_row(row_id, i1) matrix_rows[(row_id) * ne11 + (i1)]

// nb01 >= nb00 - src0 is not transposed
// compute by src0 rows
Expand Down Expand Up @@ -7482,7 +7432,7 @@ static void ne_compute_forward_mul_mat_id_f32(const struct ne_compute_params* pa
}
int64_t matrix_row_counts[100]; // [n_as]
int64_t matrix_rows[30000]; // [n_as][ne11]
#define mmid_matrix_row(row_id, i1) matrix_rows[(row_id)*ne11 + (i1)]
#define mmid_matrix_row(row_id, i1) matrix_rows[(row_id) * ne11 + (i1)]
memset(matrix_row_counts, 0, n_as * sizeof(int64_t));
memset(matrix_rows, -1, 30000 * sizeof(int64_t));
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
Expand Down Expand Up @@ -7630,7 +7580,7 @@ static void ne_compute_forward_mul_mat_id_f16_f32(const struct ne_compute_params
}
int64_t matrix_row_counts[100]; // [n_as]
int64_t matrix_rows[30000]; // [n_as][ne11]
#define mmid_matrix_row(row_id, i1) matrix_rows[(row_id)*ne11 + (i1)]
#define mmid_matrix_row(row_id, i1) matrix_rows[(row_id) * ne11 + (i1)]
memset(matrix_row_counts, 0, n_as * sizeof(int64_t));
memset(matrix_rows, -1, 30000 * sizeof(int64_t));
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
Expand Down Expand Up @@ -7759,7 +7709,7 @@ static void ne_compute_forward_mul_mat_id_q_f32_bestla(const struct ne_compute_p
// int64_t wdata_src1_end = 0;
int64_t matrix_row_counts[100]; // [n_as]
int64_t matrix_rows[30000]; // [n_as][ne11]
#define mmid_matrix_row(row_id, i1) matrix_rows[(row_id)*ne11 + (i1)]
#define mmid_matrix_row(row_id, i1) matrix_rows[(row_id) * ne11 + (i1)]

// nb01 >= nb00 - src0 is not transposed
// compute by src0 rows
Expand Down
4 changes: 2 additions & 2 deletions neural_speed/models/llama/llama_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ void Llama::load(model_context* ctx, model_progress_callback progress_callback,
NE_ASSERT(n_expert == 0);
NE_ASSERT(n_expert_used == 0);
layer.ffn[0] = ml->get_tensor(layers_i + ".feed_forward.w1.weight", {n_embd, n_ff}, backend);
layer.ffn[1] = ml->get_tensor(layers_i + ".feed_forward.w2.weight", {n_ff, n_embd}, backend);
layer.ffn[1] = ml->get_tensor(layers_i + ".feed_forward.w2.weight", {n_ff, n_embd}, NE_BACKEND_SYCL);
layer.ffn[2] = ml->get_tensor(layers_i + ".feed_forward.w3.weight", {n_embd, n_ff}, backend);
} else {
NE_ASSERT(n_expert > 0);
Expand All @@ -212,7 +212,7 @@ void Llama::load(model_context* ctx, model_progress_callback progress_callback,
layer.ffn_gate_exp[x] =
ml->get_tensor(layers_i + ".ffn_gate." + std::to_string(x) + ".weight", {n_embd, n_ff}, backend);
layer.ffn_down_exp[x] =
ml->get_tensor(layers_i + ".ffn_down." + std::to_string(x) + ".weight", {n_ff, n_embd}, NE_BACKEND_SYCL);
ml->get_tensor(layers_i + ".ffn_down." + std::to_string(x) + ".weight", {n_ff, n_embd}, backend);
layer.ffn_up_exp[x] =
ml->get_tensor(layers_i + ".ffn_up." + std::to_string(x) + ".weight", {n_embd, n_ff}, backend);
}
Expand Down

0 comments on commit 87c5823

Please sign in to comment.