Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
Undo ne_layers.c changes
Browse files Browse the repository at this point in the history
  • Loading branch information
aahouzi committed Mar 12, 2024
1 parent 7889188 commit e7c70bd
Showing 1 changed file with 8 additions and 10 deletions.
18 changes: 8 additions & 10 deletions neural_speed/core/ne_layers.c
Original file line number Diff line number Diff line change
Expand Up @@ -2060,7 +2060,7 @@ struct ne_tensor* ne_silu_back(struct ne_context* ctx, struct ne_tensor* a, stru

// ne_norm

struct ne_tensor* ne_norm_impl(struct ne_context* ctx, struct ne_tensor* a, bool inplace, float eps) {
struct ne_tensor* ne_norm_impl(struct ne_context* ctx, struct ne_tensor* a, bool inplace) {
bool is_node = false;

if (!inplace && (a->grad)) {
Expand All @@ -2070,21 +2070,20 @@ struct ne_tensor* ne_norm_impl(struct ne_context* ctx, struct ne_tensor* a, bool

struct ne_tensor* result = inplace ? ne_view_tensor(ctx, a) : ne_dup_tensor(ctx, a);

ne_set_op_params(result, &eps, sizeof(eps));

result->op = NE_OP_NORM;
result->grad = is_node ? ne_dup_tensor(ctx, result) : NULL;
result->src0 = a;
result->src1 = NULL; // TODO: maybe store epsilon here?

return result;
}

struct ne_tensor* ne_norm(struct ne_context* ctx, struct ne_tensor* a, float eps) {
return ne_norm_impl(ctx, a, false, eps);
struct ne_tensor* ne_norm(struct ne_context* ctx, struct ne_tensor* a) {
return ne_norm_impl(ctx, a, false);
}

struct ne_tensor* ne_norm_inplace(struct ne_context* ctx, struct ne_tensor* a, float eps) {
return ne_norm_impl(ctx, a, true, eps);
struct ne_tensor* ne_norm_inplace(struct ne_context* ctx, struct ne_tensor* a) {
return ne_norm_impl(ctx, a, true);
}

struct ne_tensor* ne_rms_norm_impl(struct ne_context* ctx, struct ne_tensor* a, bool inplace, float eps) {
Expand Down Expand Up @@ -6180,8 +6179,7 @@ static void ne_compute_forward_norm_f32(const struct ne_compute_params* params,
const size_t nb2 = dst->nb[2];
const size_t nb3 = dst->nb[3];

float eps;
memcpy(&eps, dst->op_params, sizeof(float));
const float eps = 1e-5f; // TODO: make this a parameter

if (ne_is_contiguous(src0) && ne_is_contiguous(dst)) {
bestla_layernormalization(ne03 * ne02 * ne01, ne00, false, eps, (const float*)src0->data, (float*)dst->data);
Expand Down Expand Up @@ -12597,4 +12595,4 @@ int ne_cpu_has_sse3(void) {

int ne_cpu_has_vsx(void) { return 0; }

////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

0 comments on commit e7c70bd

Please sign in to comment.