Spaces:
Running
Running
sync : ggml (ggml-alloc + linker + gguf fixes) (#1501)
Browse files- ggml-alloc.c +12 -11
- ggml-quants.c +5 -0
- ggml.c +83 -258
- ggml.h +5 -0
ggml-alloc.c
CHANGED
|
@@ -446,12 +446,14 @@ static ggml_tallocr_t node_tallocr(ggml_gallocr_t galloc, struct ggml_tensor * n
|
|
| 446 |
return galloc->hash_allocs[ggml_hash_find_or_insert(galloc->hash_set, node)];
|
| 447 |
}
|
| 448 |
|
| 449 |
-
static void init_view(ggml_gallocr_t galloc, struct ggml_tensor * view) {
|
| 450 |
ggml_tallocr_t alloc = node_tallocr(galloc, view);
|
| 451 |
|
| 452 |
//printf("init_view: %s from src %s\n", view->name, view->view_src->name);
|
| 453 |
GGML_ASSERT(view->view_src != NULL && view->view_src->data != NULL);
|
| 454 |
-
|
|
|
|
|
|
|
| 455 |
view->buffer = view->view_src->buffer;
|
| 456 |
view->data = (char *)view->view_src->data + view->view_offs;
|
| 457 |
|
|
@@ -469,7 +471,7 @@ static void allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node) {
|
|
| 469 |
|
| 470 |
if (node->data == NULL) {
|
| 471 |
if (ggml_is_view(node)) {
|
| 472 |
-
init_view(galloc, node);
|
| 473 |
} else {
|
| 474 |
// see if we can reuse a parent's buffer (inplace)
|
| 475 |
if (ggml_op_can_inplace(node->op)) {
|
|
@@ -499,15 +501,14 @@ static void allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node) {
|
|
| 499 |
AT_PRINTF("reusing view parent %s (%s) for %s\n", parent->name, view_src->name, node->name);
|
| 500 |
node->view_src = view_src;
|
| 501 |
view_src_hn->n_views += 1;
|
| 502 |
-
init_view(galloc, node);
|
| 503 |
return;
|
| 504 |
}
|
| 505 |
-
}
|
| 506 |
-
else {
|
| 507 |
AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name);
|
| 508 |
node->view_src = parent;
|
| 509 |
p_hn->n_views += 1;
|
| 510 |
-
init_view(galloc, node);
|
| 511 |
return;
|
| 512 |
}
|
| 513 |
}
|
|
@@ -537,7 +538,7 @@ static void ggml_tallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
|
|
| 537 |
hash_get(galloc, view_src)->n_views += 1;
|
| 538 |
if (node->buffer == NULL && node->data != NULL) {
|
| 539 |
// view of a pre-allocated tensor, didn't call init_view() yet
|
| 540 |
-
init_view(galloc, node);
|
| 541 |
}
|
| 542 |
}
|
| 543 |
|
|
@@ -548,7 +549,7 @@ static void ggml_tallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
|
|
| 548 |
}
|
| 549 |
hash_get(galloc, parent)->n_children += 1;
|
| 550 |
if (ggml_is_view(parent) && parent->buffer == NULL && parent->data != NULL) {
|
| 551 |
-
init_view(galloc, parent);
|
| 552 |
}
|
| 553 |
}
|
| 554 |
}
|
|
@@ -663,7 +664,7 @@ size_t ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, ggml_tallocr_t talloc, st
|
|
| 663 |
return max_size;
|
| 664 |
}
|
| 665 |
|
| 666 |
-
void ggml_gallocr_alloc_graph_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, struct ggml_hash_set hash_set, ggml_tallocr_t *
|
| 667 |
const size_t hash_size = hash_set.size;
|
| 668 |
|
| 669 |
GGML_ASSERT(hash_size >= (size_t)(graph->n_nodes + graph->n_leafs));
|
|
@@ -686,7 +687,7 @@ void ggml_gallocr_alloc_graph_n(ggml_gallocr_t galloc, struct ggml_cgraph * grap
|
|
| 686 |
// reset hash values
|
| 687 |
memset(galloc->hash_values, 0, sizeof(struct hash_node) * hash_size);
|
| 688 |
|
| 689 |
-
galloc->hash_allocs =
|
| 690 |
|
| 691 |
ggml_tallocr_alloc_graph_impl(galloc, graph);
|
| 692 |
|
|
|
|
| 446 |
return galloc->hash_allocs[ggml_hash_find_or_insert(galloc->hash_set, node)];
|
| 447 |
}
|
| 448 |
|
| 449 |
+
static void init_view(ggml_gallocr_t galloc, struct ggml_tensor * view, bool update_backend) {
|
| 450 |
ggml_tallocr_t alloc = node_tallocr(galloc, view);
|
| 451 |
|
| 452 |
//printf("init_view: %s from src %s\n", view->name, view->view_src->name);
|
| 453 |
GGML_ASSERT(view->view_src != NULL && view->view_src->data != NULL);
|
| 454 |
+
if (update_backend) {
|
| 455 |
+
view->backend = view->view_src->backend;
|
| 456 |
+
}
|
| 457 |
view->buffer = view->view_src->buffer;
|
| 458 |
view->data = (char *)view->view_src->data + view->view_offs;
|
| 459 |
|
|
|
|
| 471 |
|
| 472 |
if (node->data == NULL) {
|
| 473 |
if (ggml_is_view(node)) {
|
| 474 |
+
init_view(galloc, node, true);
|
| 475 |
} else {
|
| 476 |
// see if we can reuse a parent's buffer (inplace)
|
| 477 |
if (ggml_op_can_inplace(node->op)) {
|
|
|
|
| 501 |
AT_PRINTF("reusing view parent %s (%s) for %s\n", parent->name, view_src->name, node->name);
|
| 502 |
node->view_src = view_src;
|
| 503 |
view_src_hn->n_views += 1;
|
| 504 |
+
init_view(galloc, node, false);
|
| 505 |
return;
|
| 506 |
}
|
| 507 |
+
} else {
|
|
|
|
| 508 |
AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name);
|
| 509 |
node->view_src = parent;
|
| 510 |
p_hn->n_views += 1;
|
| 511 |
+
init_view(galloc, node, false);
|
| 512 |
return;
|
| 513 |
}
|
| 514 |
}
|
|
|
|
| 538 |
hash_get(galloc, view_src)->n_views += 1;
|
| 539 |
if (node->buffer == NULL && node->data != NULL) {
|
| 540 |
// view of a pre-allocated tensor, didn't call init_view() yet
|
| 541 |
+
init_view(galloc, node, true);
|
| 542 |
}
|
| 543 |
}
|
| 544 |
|
|
|
|
| 549 |
}
|
| 550 |
hash_get(galloc, parent)->n_children += 1;
|
| 551 |
if (ggml_is_view(parent) && parent->buffer == NULL && parent->data != NULL) {
|
| 552 |
+
init_view(galloc, parent, true);
|
| 553 |
}
|
| 554 |
}
|
| 555 |
}
|
|
|
|
| 664 |
return max_size;
|
| 665 |
}
|
| 666 |
|
| 667 |
+
void ggml_gallocr_alloc_graph_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, struct ggml_hash_set hash_set, ggml_tallocr_t * hash_node_talloc) {
|
| 668 |
const size_t hash_size = hash_set.size;
|
| 669 |
|
| 670 |
GGML_ASSERT(hash_size >= (size_t)(graph->n_nodes + graph->n_leafs));
|
|
|
|
| 687 |
// reset hash values
|
| 688 |
memset(galloc->hash_values, 0, sizeof(struct hash_node) * hash_size);
|
| 689 |
|
| 690 |
+
galloc->hash_allocs = hash_node_talloc;
|
| 691 |
|
| 692 |
ggml_tallocr_alloc_graph_impl(galloc, graph);
|
| 693 |
|
ggml-quants.c
CHANGED
|
@@ -1368,7 +1368,12 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f
|
|
| 1368 |
float max = x[0];
|
| 1369 |
float sum_w = weights[0];
|
| 1370 |
float sum_x = sum_w * x[0];
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1371 |
for (int i = 1; i < n; ++i) {
|
|
|
|
| 1372 |
if (x[i] < min) min = x[i];
|
| 1373 |
if (x[i] > max) max = x[i];
|
| 1374 |
float w = weights[i];
|
|
|
|
| 1368 |
float max = x[0];
|
| 1369 |
float sum_w = weights[0];
|
| 1370 |
float sum_x = sum_w * x[0];
|
| 1371 |
+
#ifdef HAVE_BUGGY_APPLE_LINKER
|
| 1372 |
+
// use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7
|
| 1373 |
+
for (volatile int i = 1; i < n; ++i) {
|
| 1374 |
+
#else
|
| 1375 |
for (int i = 1; i < n; ++i) {
|
| 1376 |
+
#endif
|
| 1377 |
if (x[i] < min) min = x[i];
|
| 1378 |
if (x[i] > max) max = x[i];
|
| 1379 |
float w = weights[i];
|
ggml.c
CHANGED
|
@@ -5024,8 +5024,13 @@ struct ggml_tensor * ggml_rope_back(
|
|
| 5024 |
int n_dims,
|
| 5025 |
int mode,
|
| 5026 |
int n_ctx,
|
|
|
|
| 5027 |
float freq_base,
|
| 5028 |
float freq_scale,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5029 |
float xpos_base,
|
| 5030 |
bool xpos_down) {
|
| 5031 |
GGML_ASSERT(ggml_is_vector(b));
|
|
@@ -5042,11 +5047,15 @@ struct ggml_tensor * ggml_rope_back(
|
|
| 5042 |
|
| 5043 |
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
| 5044 |
|
| 5045 |
-
int32_t params[
|
| 5046 |
-
memcpy(params +
|
| 5047 |
-
memcpy(params +
|
| 5048 |
-
memcpy(params +
|
| 5049 |
-
memcpy(params +
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5050 |
ggml_set_op_params(result, params, sizeof(params));
|
| 5051 |
|
| 5052 |
result->op = GGML_OP_ROPE_BACK;
|
|
@@ -9376,7 +9385,6 @@ static bool ggml_compute_forward_mul_mat_use_blas(
|
|
| 9376 |
}
|
| 9377 |
#endif
|
| 9378 |
|
| 9379 |
-
|
| 9380 |
static void ggml_compute_forward_mul_mat(
|
| 9381 |
const struct ggml_compute_params * params,
|
| 9382 |
const struct ggml_tensor * src0,
|
|
@@ -10946,7 +10954,8 @@ static void ggml_compute_forward_rope_f32(
|
|
| 10946 |
const struct ggml_compute_params * params,
|
| 10947 |
const struct ggml_tensor * src0,
|
| 10948 |
const struct ggml_tensor * src1,
|
| 10949 |
-
struct ggml_tensor * dst
|
|
|
|
| 10950 |
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
| 10951 |
return;
|
| 10952 |
}
|
|
@@ -11005,6 +11014,11 @@ static void ggml_compute_forward_rope_f32(
|
|
| 11005 |
const bool is_neox = mode & 2;
|
| 11006 |
const bool is_glm = mode & 4;
|
| 11007 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11008 |
const int32_t * pos = (const int32_t *) src1->data;
|
| 11009 |
|
| 11010 |
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
|
@@ -11021,9 +11035,9 @@ static void ggml_compute_forward_rope_f32(
|
|
| 11021 |
float block_theta = MAX(p - (n_ctx - 2), 0);
|
| 11022 |
for (int64_t i0 = 0; i0 < ne0 / 4; i0++) {
|
| 11023 |
const float cos_theta = cosf(theta_base);
|
| 11024 |
-
const float sin_theta = sinf(theta_base);
|
| 11025 |
const float cos_block_theta = cosf(block_theta);
|
| 11026 |
-
const float sin_block_theta = sinf(block_theta);
|
| 11027 |
|
| 11028 |
theta_base *= theta_scale;
|
| 11029 |
block_theta *= theta_scale;
|
|
@@ -11047,6 +11061,7 @@ static void ggml_compute_forward_rope_f32(
|
|
| 11047 |
rope_yarn(
|
| 11048 |
theta_base, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta
|
| 11049 |
);
|
|
|
|
| 11050 |
|
| 11051 |
// zeta scaling for xPos only:
|
| 11052 |
float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), p / xpos_base) : 1.0f;
|
|
@@ -11077,6 +11092,7 @@ static void ggml_compute_forward_rope_f32(
|
|
| 11077 |
theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor,
|
| 11078 |
&cos_theta, &sin_theta
|
| 11079 |
);
|
|
|
|
| 11080 |
|
| 11081 |
theta_base *= theta_scale;
|
| 11082 |
|
|
@@ -11102,7 +11118,8 @@ static void ggml_compute_forward_rope_f16(
|
|
| 11102 |
const struct ggml_compute_params * params,
|
| 11103 |
const struct ggml_tensor * src0,
|
| 11104 |
const struct ggml_tensor * src1,
|
| 11105 |
-
struct ggml_tensor * dst
|
|
|
|
| 11106 |
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
| 11107 |
return;
|
| 11108 |
}
|
|
@@ -11154,6 +11171,11 @@ static void ggml_compute_forward_rope_f16(
|
|
| 11154 |
const bool is_neox = mode & 2;
|
| 11155 |
const bool is_glm = mode & 4;
|
| 11156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11157 |
const int32_t * pos = (const int32_t *) src1->data;
|
| 11158 |
|
| 11159 |
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
|
@@ -11170,9 +11192,9 @@ static void ggml_compute_forward_rope_f16(
|
|
| 11170 |
float block_theta = MAX(p - (n_ctx - 2), 0);
|
| 11171 |
for (int64_t i0 = 0; i0 < ne0 / 4; i0++) {
|
| 11172 |
const float cos_theta = cosf(theta_base);
|
| 11173 |
-
const float sin_theta = sinf(theta_base);
|
| 11174 |
const float cos_block_theta = cosf(block_theta);
|
| 11175 |
-
const float sin_block_theta = sinf(block_theta);
|
| 11176 |
|
| 11177 |
theta_base *= theta_scale;
|
| 11178 |
block_theta *= theta_scale;
|
|
@@ -11196,6 +11218,7 @@ static void ggml_compute_forward_rope_f16(
|
|
| 11196 |
rope_yarn(
|
| 11197 |
theta_base, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta
|
| 11198 |
);
|
|
|
|
| 11199 |
|
| 11200 |
theta_base *= theta_scale;
|
| 11201 |
|
|
@@ -11222,6 +11245,7 @@ static void ggml_compute_forward_rope_f16(
|
|
| 11222 |
theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor,
|
| 11223 |
&cos_theta, &sin_theta
|
| 11224 |
);
|
|
|
|
| 11225 |
|
| 11226 |
theta_base *= theta_scale;
|
| 11227 |
|
|
@@ -11251,11 +11275,11 @@ static void ggml_compute_forward_rope(
|
|
| 11251 |
switch (src0->type) {
|
| 11252 |
case GGML_TYPE_F16:
|
| 11253 |
{
|
| 11254 |
-
ggml_compute_forward_rope_f16(params, src0, src1, dst);
|
| 11255 |
} break;
|
| 11256 |
case GGML_TYPE_F32:
|
| 11257 |
{
|
| 11258 |
-
ggml_compute_forward_rope_f32(params, src0, src1, dst);
|
| 11259 |
} break;
|
| 11260 |
default:
|
| 11261 |
{
|
|
@@ -11266,216 +11290,6 @@ static void ggml_compute_forward_rope(
|
|
| 11266 |
|
| 11267 |
// ggml_compute_forward_rope_back
|
| 11268 |
|
| 11269 |
-
static void ggml_compute_forward_rope_back_f32(
|
| 11270 |
-
const struct ggml_compute_params * params,
|
| 11271 |
-
const struct ggml_tensor * src0,
|
| 11272 |
-
const struct ggml_tensor * src1,
|
| 11273 |
-
struct ggml_tensor * dst) {
|
| 11274 |
-
|
| 11275 |
-
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
| 11276 |
-
return;
|
| 11277 |
-
}
|
| 11278 |
-
|
| 11279 |
-
// y = rope(x, src1)
|
| 11280 |
-
// dx = rope_back(dy, src1)
|
| 11281 |
-
// src0 is dy, src1 contains options
|
| 11282 |
-
|
| 11283 |
-
float freq_base;
|
| 11284 |
-
float freq_scale;
|
| 11285 |
-
|
| 11286 |
-
// these two only relevant for xPos RoPE:
|
| 11287 |
-
float xpos_base;
|
| 11288 |
-
bool xpos_down;
|
| 11289 |
-
|
| 11290 |
-
//const int n_past = ((int32_t *) dst->op_params)[0];
|
| 11291 |
-
const int n_dims = ((int32_t *) dst->op_params)[1];
|
| 11292 |
-
const int mode = ((int32_t *) dst->op_params)[2];
|
| 11293 |
-
const int n_ctx = ((int32_t *) dst->op_params)[3]; UNUSED(n_ctx);
|
| 11294 |
-
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
| 11295 |
-
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
| 11296 |
-
memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
|
| 11297 |
-
memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
|
| 11298 |
-
|
| 11299 |
-
GGML_TENSOR_UNARY_OP_LOCALS
|
| 11300 |
-
|
| 11301 |
-
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
| 11302 |
-
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
| 11303 |
-
|
| 11304 |
-
assert(nb0 == sizeof(float));
|
| 11305 |
-
|
| 11306 |
-
const int ith = params->ith;
|
| 11307 |
-
const int nth = params->nth;
|
| 11308 |
-
|
| 11309 |
-
const int nr = ggml_nrows(dst);
|
| 11310 |
-
|
| 11311 |
-
// rows per thread
|
| 11312 |
-
const int dr = (nr + nth - 1)/nth;
|
| 11313 |
-
|
| 11314 |
-
// row range for this thread
|
| 11315 |
-
const int ir0 = dr*ith;
|
| 11316 |
-
const int ir1 = MIN(ir0 + dr, nr);
|
| 11317 |
-
|
| 11318 |
-
// row index used to determine which thread to use
|
| 11319 |
-
int ir = 0;
|
| 11320 |
-
|
| 11321 |
-
const float theta_scale = powf(freq_base, -2.0f/n_dims);
|
| 11322 |
-
|
| 11323 |
-
const bool is_neox = mode & 2;
|
| 11324 |
-
|
| 11325 |
-
const int32_t * pos = (const int32_t *) src1->data;
|
| 11326 |
-
|
| 11327 |
-
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
| 11328 |
-
for (int64_t i2 = 0; i2 < ne2; i2++) {
|
| 11329 |
-
const int64_t p = pos[i2];
|
| 11330 |
-
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
| 11331 |
-
if (ir++ < ir0) continue;
|
| 11332 |
-
if (ir > ir1) break;
|
| 11333 |
-
|
| 11334 |
-
float theta_base = freq_scale * (float)p;
|
| 11335 |
-
|
| 11336 |
-
if (!is_neox) {
|
| 11337 |
-
for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
|
| 11338 |
-
const float cos_theta = cosf(theta_base);
|
| 11339 |
-
const float sin_theta = sinf(theta_base);
|
| 11340 |
-
|
| 11341 |
-
// zeta scaling for xPos only:
|
| 11342 |
-
float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), p / xpos_base) : 1.0f;
|
| 11343 |
-
if (xpos_down) zeta = 1.0f / zeta;
|
| 11344 |
-
|
| 11345 |
-
theta_base *= theta_scale;
|
| 11346 |
-
|
| 11347 |
-
const float * const dy = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
| 11348 |
-
float * dx = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
| 11349 |
-
|
| 11350 |
-
const float dy0 = dy[0];
|
| 11351 |
-
const float dy1 = dy[1];
|
| 11352 |
-
|
| 11353 |
-
dx[0] = dy0*cos_theta*zeta + dy1*sin_theta*zeta;
|
| 11354 |
-
dx[1] = - dy0*sin_theta*zeta + dy1*cos_theta*zeta;
|
| 11355 |
-
}
|
| 11356 |
-
} else {
|
| 11357 |
-
for (int64_t ib = 0; ib < ne0/n_dims; ++ib) {
|
| 11358 |
-
for (int64_t ic = 0; ic < n_dims; ic += 2) {
|
| 11359 |
-
const float cos_theta = cosf(theta_base);
|
| 11360 |
-
const float sin_theta = sinf(theta_base);
|
| 11361 |
-
|
| 11362 |
-
theta_base *= theta_scale;
|
| 11363 |
-
|
| 11364 |
-
const int64_t i0 = ib*n_dims + ic/2;
|
| 11365 |
-
|
| 11366 |
-
const float * const dy = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
| 11367 |
-
float * dx = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
| 11368 |
-
|
| 11369 |
-
const float dy0 = dy[0];
|
| 11370 |
-
const float dy1 = dy[n_dims/2];
|
| 11371 |
-
|
| 11372 |
-
dx[0] = dy0*cos_theta + dy1*sin_theta;
|
| 11373 |
-
dx[n_dims/2] = - dy0*sin_theta + dy1*cos_theta;
|
| 11374 |
-
}
|
| 11375 |
-
}
|
| 11376 |
-
}
|
| 11377 |
-
}
|
| 11378 |
-
}
|
| 11379 |
-
}
|
| 11380 |
-
}
|
| 11381 |
-
|
| 11382 |
-
static void ggml_compute_forward_rope_back_f16(
|
| 11383 |
-
const struct ggml_compute_params * params,
|
| 11384 |
-
const struct ggml_tensor * src0,
|
| 11385 |
-
const struct ggml_tensor * src1,
|
| 11386 |
-
struct ggml_tensor * dst) {
|
| 11387 |
-
|
| 11388 |
-
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
| 11389 |
-
return;
|
| 11390 |
-
}
|
| 11391 |
-
|
| 11392 |
-
// y = rope(x, src1)
|
| 11393 |
-
// dx = rope_back(dy, src1)
|
| 11394 |
-
// src0 is dy, src1 contains options
|
| 11395 |
-
|
| 11396 |
-
//const int n_past = ((int32_t *) dst->op_params)[0];
|
| 11397 |
-
const int n_dims = ((int32_t *) dst->op_params)[1];
|
| 11398 |
-
const int mode = ((int32_t *) dst->op_params)[2];
|
| 11399 |
-
|
| 11400 |
-
GGML_TENSOR_UNARY_OP_LOCALS
|
| 11401 |
-
|
| 11402 |
-
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
| 11403 |
-
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
| 11404 |
-
|
| 11405 |
-
assert(nb0 == sizeof(ggml_fp16_t));
|
| 11406 |
-
|
| 11407 |
-
const int ith = params->ith;
|
| 11408 |
-
const int nth = params->nth;
|
| 11409 |
-
|
| 11410 |
-
const int nr = ggml_nrows(dst);
|
| 11411 |
-
|
| 11412 |
-
// rows per thread
|
| 11413 |
-
const int dr = (nr + nth - 1)/nth;
|
| 11414 |
-
|
| 11415 |
-
// row range for this thread
|
| 11416 |
-
const int ir0 = dr*ith;
|
| 11417 |
-
const int ir1 = MIN(ir0 + dr, nr);
|
| 11418 |
-
|
| 11419 |
-
// row index used to determine which thread to use
|
| 11420 |
-
int ir = 0;
|
| 11421 |
-
|
| 11422 |
-
const float theta_scale = powf(10000.0, -2.0f/n_dims);
|
| 11423 |
-
|
| 11424 |
-
const bool is_neox = mode & 2;
|
| 11425 |
-
|
| 11426 |
-
const int32_t * pos = (const int32_t *) src1->data;
|
| 11427 |
-
|
| 11428 |
-
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
| 11429 |
-
for (int64_t i2 = 0; i2 < ne2; i2++) {
|
| 11430 |
-
const int64_t p = pos[i2];
|
| 11431 |
-
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
| 11432 |
-
if (ir++ < ir0) continue;
|
| 11433 |
-
if (ir > ir1) break;
|
| 11434 |
-
|
| 11435 |
-
float theta_base = (float)p;
|
| 11436 |
-
|
| 11437 |
-
if (!is_neox) {
|
| 11438 |
-
for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
|
| 11439 |
-
const float cos_theta = cosf(theta_base);
|
| 11440 |
-
const float sin_theta = sinf(theta_base);
|
| 11441 |
-
|
| 11442 |
-
theta_base *= theta_scale;
|
| 11443 |
-
|
| 11444 |
-
const ggml_fp16_t * const dy = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
| 11445 |
-
ggml_fp16_t * dx = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
| 11446 |
-
|
| 11447 |
-
const float dy0 = GGML_FP16_TO_FP32(dy[0]);
|
| 11448 |
-
const float dy1 = GGML_FP16_TO_FP32(dy[1]);
|
| 11449 |
-
|
| 11450 |
-
dx[0] = GGML_FP32_TO_FP16( dy0*cos_theta + dy1*sin_theta);
|
| 11451 |
-
dx[1] = GGML_FP32_TO_FP16(-dy0*sin_theta + dy1*cos_theta);
|
| 11452 |
-
}
|
| 11453 |
-
} else {
|
| 11454 |
-
for (int64_t ib = 0; ib < ne0/n_dims; ++ib) {
|
| 11455 |
-
for (int64_t ic = 0; ic < n_dims; ic += 2) {
|
| 11456 |
-
const float cos_theta = cosf(theta_base);
|
| 11457 |
-
const float sin_theta = sinf(theta_base);
|
| 11458 |
-
|
| 11459 |
-
theta_base *= theta_scale;
|
| 11460 |
-
|
| 11461 |
-
const int64_t i0 = ib*n_dims + ic/2;
|
| 11462 |
-
|
| 11463 |
-
const ggml_fp16_t * const dy = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
| 11464 |
-
ggml_fp16_t * dx = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
| 11465 |
-
|
| 11466 |
-
const float dy0 = GGML_FP16_TO_FP32(dy[0]);
|
| 11467 |
-
const float dy1 = GGML_FP16_TO_FP32(dy[n_dims/2]);
|
| 11468 |
-
|
| 11469 |
-
dx[0] = GGML_FP32_TO_FP16( dy0*cos_theta + dy1*sin_theta);
|
| 11470 |
-
dx[n_dims/2] = GGML_FP32_TO_FP16(-dy0*sin_theta + dy1*cos_theta);
|
| 11471 |
-
}
|
| 11472 |
-
}
|
| 11473 |
-
}
|
| 11474 |
-
}
|
| 11475 |
-
}
|
| 11476 |
-
}
|
| 11477 |
-
}
|
| 11478 |
-
|
| 11479 |
static void ggml_compute_forward_rope_back(
|
| 11480 |
const struct ggml_compute_params * params,
|
| 11481 |
const struct ggml_tensor * src0,
|
|
@@ -11484,11 +11298,11 @@ static void ggml_compute_forward_rope_back(
|
|
| 11484 |
switch (src0->type) {
|
| 11485 |
case GGML_TYPE_F16:
|
| 11486 |
{
|
| 11487 |
-
|
| 11488 |
} break;
|
| 11489 |
case GGML_TYPE_F32:
|
| 11490 |
{
|
| 11491 |
-
|
| 11492 |
} break;
|
| 11493 |
default:
|
| 11494 |
{
|
|
@@ -14923,17 +14737,20 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 14923 |
// necessary for llama
|
| 14924 |
if (src0->grad) {
|
| 14925 |
//const int n_past = ((int32_t *) tensor->op_params)[0];
|
| 14926 |
-
const int n_dims
|
| 14927 |
-
const int mode
|
| 14928 |
-
const int n_ctx
|
| 14929 |
-
|
| 14930 |
-
float freq_scale;
|
| 14931 |
-
|
| 14932 |
-
|
| 14933 |
-
memcpy(&
|
| 14934 |
-
memcpy(&
|
| 14935 |
-
memcpy(&
|
| 14936 |
-
memcpy(&
|
|
|
|
|
|
|
|
|
|
| 14937 |
|
| 14938 |
src0->grad = ggml_add_or_set(ctx,
|
| 14939 |
src0->grad,
|
|
@@ -14943,8 +14760,13 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 14943 |
n_dims,
|
| 14944 |
mode,
|
| 14945 |
n_ctx,
|
|
|
|
| 14946 |
freq_base,
|
| 14947 |
freq_scale,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14948 |
xpos_base,
|
| 14949 |
xpos_down),
|
| 14950 |
zero_table);
|
|
@@ -14954,17 +14776,20 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 14954 |
{
|
| 14955 |
if (src0->grad) {
|
| 14956 |
//const int n_past = ((int32_t *) tensor->op_params)[0];
|
| 14957 |
-
const int n_dims
|
| 14958 |
-
const int mode
|
| 14959 |
-
const int n_ctx
|
| 14960 |
-
|
| 14961 |
-
float freq_scale;
|
| 14962 |
-
|
| 14963 |
-
|
| 14964 |
-
memcpy(&
|
| 14965 |
-
memcpy(&
|
| 14966 |
-
memcpy(&
|
| 14967 |
-
memcpy(&
|
|
|
|
|
|
|
|
|
|
| 14968 |
|
| 14969 |
src0->grad = ggml_add_or_set(ctx,
|
| 14970 |
src0->grad,
|
|
@@ -14973,14 +14798,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 14973 |
src1,
|
| 14974 |
n_dims,
|
| 14975 |
mode,
|
| 14976 |
-
0,
|
| 14977 |
n_ctx,
|
|
|
|
| 14978 |
freq_base,
|
| 14979 |
freq_scale,
|
| 14980 |
-
|
| 14981 |
-
|
| 14982 |
-
|
| 14983 |
-
|
| 14984 |
xpos_base,
|
| 14985 |
xpos_down,
|
| 14986 |
false),
|
|
@@ -18248,7 +18073,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
| 18248 |
{
|
| 18249 |
ctx->kv = malloc(ctx->header.n_kv * sizeof(struct gguf_kv));
|
| 18250 |
|
| 18251 |
-
for (
|
| 18252 |
struct gguf_kv * kv = &ctx->kv[i];
|
| 18253 |
|
| 18254 |
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
|
@@ -18295,7 +18120,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
| 18295 |
case GGUF_TYPE_STRING:
|
| 18296 |
{
|
| 18297 |
kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
|
| 18298 |
-
for (
|
| 18299 |
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
|
| 18300 |
}
|
| 18301 |
} break;
|
|
@@ -18323,7 +18148,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
| 18323 |
{
|
| 18324 |
ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
|
| 18325 |
|
| 18326 |
-
for (
|
| 18327 |
struct gguf_tensor_info * info = &ctx->infos[i];
|
| 18328 |
|
| 18329 |
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
|
@@ -18370,7 +18195,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
| 18370 |
// compute the total size of the data section, taking into account the alignment
|
| 18371 |
{
|
| 18372 |
ctx->size = 0;
|
| 18373 |
-
for (
|
| 18374 |
struct gguf_tensor_info * info = &ctx->infos[i];
|
| 18375 |
|
| 18376 |
const int64_t ne =
|
|
@@ -18439,7 +18264,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
| 18439 |
ggml_set_no_alloc(ctx_data, true);
|
| 18440 |
|
| 18441 |
// create the tensors
|
| 18442 |
-
for (
|
| 18443 |
const int64_t ne[GGML_MAX_DIMS] = {
|
| 18444 |
ctx->infos[i].ne[0],
|
| 18445 |
ctx->infos[i].ne[1],
|
|
|
|
| 5024 |
int n_dims,
|
| 5025 |
int mode,
|
| 5026 |
int n_ctx,
|
| 5027 |
+
int n_orig_ctx,
|
| 5028 |
float freq_base,
|
| 5029 |
float freq_scale,
|
| 5030 |
+
float ext_factor,
|
| 5031 |
+
float attn_factor,
|
| 5032 |
+
float beta_fast,
|
| 5033 |
+
float beta_slow,
|
| 5034 |
float xpos_base,
|
| 5035 |
bool xpos_down) {
|
| 5036 |
GGML_ASSERT(ggml_is_vector(b));
|
|
|
|
| 5047 |
|
| 5048 |
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
| 5049 |
|
| 5050 |
+
int32_t params[13] = { /*n_past*/ 0, n_dims, mode, n_ctx, n_orig_ctx };
|
| 5051 |
+
memcpy(params + 5, &freq_base, sizeof(float));
|
| 5052 |
+
memcpy(params + 6, &freq_scale, sizeof(float));
|
| 5053 |
+
memcpy(params + 7, &ext_factor, sizeof(float));
|
| 5054 |
+
memcpy(params + 8, &attn_factor, sizeof(float));
|
| 5055 |
+
memcpy(params + 9, &beta_fast, sizeof(float));
|
| 5056 |
+
memcpy(params + 10, &beta_slow, sizeof(float));
|
| 5057 |
+
memcpy(params + 11, &xpos_base, sizeof(float));
|
| 5058 |
+
memcpy(params + 12, &xpos_down, sizeof(bool));
|
| 5059 |
ggml_set_op_params(result, params, sizeof(params));
|
| 5060 |
|
| 5061 |
result->op = GGML_OP_ROPE_BACK;
|
|
|
|
| 9385 |
}
|
| 9386 |
#endif
|
| 9387 |
|
|
|
|
| 9388 |
static void ggml_compute_forward_mul_mat(
|
| 9389 |
const struct ggml_compute_params * params,
|
| 9390 |
const struct ggml_tensor * src0,
|
|
|
|
| 10954 |
const struct ggml_compute_params * params,
|
| 10955 |
const struct ggml_tensor * src0,
|
| 10956 |
const struct ggml_tensor * src1,
|
| 10957 |
+
struct ggml_tensor * dst,
|
| 10958 |
+
const bool forward) {
|
| 10959 |
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
| 10960 |
return;
|
| 10961 |
}
|
|
|
|
| 11014 |
const bool is_neox = mode & 2;
|
| 11015 |
const bool is_glm = mode & 4;
|
| 11016 |
|
| 11017 |
+
// backward process uses inverse rotation by cos and sin.
|
| 11018 |
+
// cos and sin build a rotation matrix, where the inverse is the transpose.
|
| 11019 |
+
// this essentially just switches the sign of sin.
|
| 11020 |
+
const float sin_sign = forward ? 1.0f : -1.0f;
|
| 11021 |
+
|
| 11022 |
const int32_t * pos = (const int32_t *) src1->data;
|
| 11023 |
|
| 11024 |
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
|
|
|
| 11035 |
float block_theta = MAX(p - (n_ctx - 2), 0);
|
| 11036 |
for (int64_t i0 = 0; i0 < ne0 / 4; i0++) {
|
| 11037 |
const float cos_theta = cosf(theta_base);
|
| 11038 |
+
const float sin_theta = sinf(theta_base) * sin_sign;
|
| 11039 |
const float cos_block_theta = cosf(block_theta);
|
| 11040 |
+
const float sin_block_theta = sinf(block_theta) * sin_sign;
|
| 11041 |
|
| 11042 |
theta_base *= theta_scale;
|
| 11043 |
block_theta *= theta_scale;
|
|
|
|
| 11061 |
rope_yarn(
|
| 11062 |
theta_base, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta
|
| 11063 |
);
|
| 11064 |
+
sin_theta *= sin_sign;
|
| 11065 |
|
| 11066 |
// zeta scaling for xPos only:
|
| 11067 |
float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), p / xpos_base) : 1.0f;
|
|
|
|
| 11092 |
theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor,
|
| 11093 |
&cos_theta, &sin_theta
|
| 11094 |
);
|
| 11095 |
+
sin_theta *= sin_sign;
|
| 11096 |
|
| 11097 |
theta_base *= theta_scale;
|
| 11098 |
|
|
|
|
| 11118 |
const struct ggml_compute_params * params,
|
| 11119 |
const struct ggml_tensor * src0,
|
| 11120 |
const struct ggml_tensor * src1,
|
| 11121 |
+
struct ggml_tensor * dst,
|
| 11122 |
+
const bool forward) {
|
| 11123 |
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
| 11124 |
return;
|
| 11125 |
}
|
|
|
|
| 11171 |
const bool is_neox = mode & 2;
|
| 11172 |
const bool is_glm = mode & 4;
|
| 11173 |
|
| 11174 |
+
// backward process uses inverse rotation by cos and sin.
|
| 11175 |
+
// cos and sin build a rotation matrix, where the inverse is the transpose.
|
| 11176 |
+
// this essentially just switches the sign of sin.
|
| 11177 |
+
const float sin_sign = forward ? 1.0f : -1.0f;
|
| 11178 |
+
|
| 11179 |
const int32_t * pos = (const int32_t *) src1->data;
|
| 11180 |
|
| 11181 |
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
|
|
|
| 11192 |
float block_theta = MAX(p - (n_ctx - 2), 0);
|
| 11193 |
for (int64_t i0 = 0; i0 < ne0 / 4; i0++) {
|
| 11194 |
const float cos_theta = cosf(theta_base);
|
| 11195 |
+
const float sin_theta = sinf(theta_base) * sin_sign;
|
| 11196 |
const float cos_block_theta = cosf(block_theta);
|
| 11197 |
+
const float sin_block_theta = sinf(block_theta) * sin_sign;
|
| 11198 |
|
| 11199 |
theta_base *= theta_scale;
|
| 11200 |
block_theta *= theta_scale;
|
|
|
|
| 11218 |
rope_yarn(
|
| 11219 |
theta_base, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta
|
| 11220 |
);
|
| 11221 |
+
sin_theta *= sin_sign;
|
| 11222 |
|
| 11223 |
theta_base *= theta_scale;
|
| 11224 |
|
|
|
|
| 11245 |
theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor,
|
| 11246 |
&cos_theta, &sin_theta
|
| 11247 |
);
|
| 11248 |
+
sin_theta *= sin_sign;
|
| 11249 |
|
| 11250 |
theta_base *= theta_scale;
|
| 11251 |
|
|
|
|
| 11275 |
switch (src0->type) {
|
| 11276 |
case GGML_TYPE_F16:
|
| 11277 |
{
|
| 11278 |
+
ggml_compute_forward_rope_f16(params, src0, src1, dst, true);
|
| 11279 |
} break;
|
| 11280 |
case GGML_TYPE_F32:
|
| 11281 |
{
|
| 11282 |
+
ggml_compute_forward_rope_f32(params, src0, src1, dst, true);
|
| 11283 |
} break;
|
| 11284 |
default:
|
| 11285 |
{
|
|
|
|
| 11290 |
|
| 11291 |
// ggml_compute_forward_rope_back
|
| 11292 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11293 |
static void ggml_compute_forward_rope_back(
|
| 11294 |
const struct ggml_compute_params * params,
|
| 11295 |
const struct ggml_tensor * src0,
|
|
|
|
| 11298 |
switch (src0->type) {
|
| 11299 |
case GGML_TYPE_F16:
|
| 11300 |
{
|
| 11301 |
+
ggml_compute_forward_rope_f16(params, src0, src1, dst, false);
|
| 11302 |
} break;
|
| 11303 |
case GGML_TYPE_F32:
|
| 11304 |
{
|
| 11305 |
+
ggml_compute_forward_rope_f32(params, src0, src1, dst, false);
|
| 11306 |
} break;
|
| 11307 |
default:
|
| 11308 |
{
|
|
|
|
| 14737 |
// necessary for llama
|
| 14738 |
if (src0->grad) {
|
| 14739 |
//const int n_past = ((int32_t *) tensor->op_params)[0];
|
| 14740 |
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
| 14741 |
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
| 14742 |
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
| 14743 |
+
const int n_orig_ctx = ((int32_t *) tensor->op_params)[4];
|
| 14744 |
+
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow, xpos_base, xpos_down;
|
| 14745 |
+
|
| 14746 |
+
memcpy(&freq_base, (int32_t *) tensor->op_params + 5, sizeof(float));
|
| 14747 |
+
memcpy(&freq_scale, (int32_t *) tensor->op_params + 6, sizeof(float));
|
| 14748 |
+
memcpy(&ext_factor, (int32_t *) tensor->op_params + 7, sizeof(float));
|
| 14749 |
+
memcpy(&attn_factor, (int32_t *) tensor->op_params + 8, sizeof(float));
|
| 14750 |
+
memcpy(&beta_fast, (int32_t *) tensor->op_params + 9, sizeof(float));
|
| 14751 |
+
memcpy(&beta_slow, (int32_t *) tensor->op_params + 10, sizeof(float));
|
| 14752 |
+
memcpy(&xpos_base, (int32_t *) tensor->op_params + 11, sizeof(float));
|
| 14753 |
+
memcpy(&xpos_down, (int32_t *) tensor->op_params + 12, sizeof(bool));
|
| 14754 |
|
| 14755 |
src0->grad = ggml_add_or_set(ctx,
|
| 14756 |
src0->grad,
|
|
|
|
| 14760 |
n_dims,
|
| 14761 |
mode,
|
| 14762 |
n_ctx,
|
| 14763 |
+
n_orig_ctx,
|
| 14764 |
freq_base,
|
| 14765 |
freq_scale,
|
| 14766 |
+
ext_factor,
|
| 14767 |
+
attn_factor,
|
| 14768 |
+
beta_fast,
|
| 14769 |
+
beta_slow,
|
| 14770 |
xpos_base,
|
| 14771 |
xpos_down),
|
| 14772 |
zero_table);
|
|
|
|
| 14776 |
{
|
| 14777 |
if (src0->grad) {
|
| 14778 |
//const int n_past = ((int32_t *) tensor->op_params)[0];
|
| 14779 |
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
| 14780 |
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
| 14781 |
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
| 14782 |
+
const int n_orig_ctx = ((int32_t *) tensor->op_params)[4];
|
| 14783 |
+
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow, xpos_base, xpos_down;
|
| 14784 |
+
|
| 14785 |
+
memcpy(&freq_base, (int32_t *) tensor->op_params + 5, sizeof(float));
|
| 14786 |
+
memcpy(&freq_scale, (int32_t *) tensor->op_params + 6, sizeof(float));
|
| 14787 |
+
memcpy(&ext_factor, (int32_t *) tensor->op_params + 7, sizeof(float));
|
| 14788 |
+
memcpy(&attn_factor, (int32_t *) tensor->op_params + 8, sizeof(float));
|
| 14789 |
+
memcpy(&beta_fast, (int32_t *) tensor->op_params + 9, sizeof(float));
|
| 14790 |
+
memcpy(&beta_slow, (int32_t *) tensor->op_params + 10, sizeof(float));
|
| 14791 |
+
memcpy(&xpos_base, (int32_t *) tensor->op_params + 11, sizeof(float));
|
| 14792 |
+
memcpy(&xpos_down, (int32_t *) tensor->op_params + 12, sizeof(bool));
|
| 14793 |
|
| 14794 |
src0->grad = ggml_add_or_set(ctx,
|
| 14795 |
src0->grad,
|
|
|
|
| 14798 |
src1,
|
| 14799 |
n_dims,
|
| 14800 |
mode,
|
|
|
|
| 14801 |
n_ctx,
|
| 14802 |
+
n_orig_ctx,
|
| 14803 |
freq_base,
|
| 14804 |
freq_scale,
|
| 14805 |
+
ext_factor,
|
| 14806 |
+
attn_factor,
|
| 14807 |
+
beta_fast,
|
| 14808 |
+
beta_slow,
|
| 14809 |
xpos_base,
|
| 14810 |
xpos_down,
|
| 14811 |
false),
|
|
|
|
| 18073 |
{
|
| 18074 |
ctx->kv = malloc(ctx->header.n_kv * sizeof(struct gguf_kv));
|
| 18075 |
|
| 18076 |
+
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
|
| 18077 |
struct gguf_kv * kv = &ctx->kv[i];
|
| 18078 |
|
| 18079 |
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
|
|
|
| 18120 |
case GGUF_TYPE_STRING:
|
| 18121 |
{
|
| 18122 |
kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
|
| 18123 |
+
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
| 18124 |
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
|
| 18125 |
}
|
| 18126 |
} break;
|
|
|
|
| 18148 |
{
|
| 18149 |
ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
|
| 18150 |
|
| 18151 |
+
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
| 18152 |
struct gguf_tensor_info * info = &ctx->infos[i];
|
| 18153 |
|
| 18154 |
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
|
|
|
| 18195 |
// compute the total size of the data section, taking into account the alignment
|
| 18196 |
{
|
| 18197 |
ctx->size = 0;
|
| 18198 |
+
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
| 18199 |
struct gguf_tensor_info * info = &ctx->infos[i];
|
| 18200 |
|
| 18201 |
const int64_t ne =
|
|
|
|
| 18264 |
ggml_set_no_alloc(ctx_data, true);
|
| 18265 |
|
| 18266 |
// create the tensors
|
| 18267 |
+
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
| 18268 |
const int64_t ne[GGML_MAX_DIMS] = {
|
| 18269 |
ctx->infos[i].ne[0],
|
| 18270 |
ctx->infos[i].ne[1],
|
ggml.h
CHANGED
|
@@ -1371,8 +1371,13 @@ extern "C" {
|
|
| 1371 |
int n_dims,
|
| 1372 |
int mode,
|
| 1373 |
int n_ctx,
|
|
|
|
| 1374 |
float freq_base,
|
| 1375 |
float freq_scale,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1376 |
float xpos_base,
|
| 1377 |
bool xpos_down);
|
| 1378 |
|
|
|
|
| 1371 |
int n_dims,
|
| 1372 |
int mode,
|
| 1373 |
int n_ctx,
|
| 1374 |
+
int n_orig_ctx,
|
| 1375 |
float freq_base,
|
| 1376 |
float freq_scale,
|
| 1377 |
+
float ext_factor,
|
| 1378 |
+
float attn_factor,
|
| 1379 |
+
float beta_fast,
|
| 1380 |
+
float beta_slow,
|
| 1381 |
float xpos_base,
|
| 1382 |
bool xpos_down);
|
| 1383 |
|