Spaces:
Running
Running
Commit
·
0828065
1
Parent(s):
0653499
examples: add MNIST training + missing ops
Browse files- ggml/include/ggml.h +67 -38
- ggml/src/ggml.c +376 -18
ggml/include/ggml.h
CHANGED
|
@@ -220,7 +220,7 @@
|
|
| 220 |
#include <stdio.h>
|
| 221 |
|
| 222 |
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
|
| 223 |
-
#define GGML_FILE_VERSION
|
| 224 |
|
| 225 |
#define GGML_QNT_VERSION 2 // bump this on quantization format changes
|
| 226 |
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
|
|
@@ -490,9 +490,11 @@ extern "C" {
|
|
| 490 |
GGML_OP_CLAMP,
|
| 491 |
GGML_OP_CONV_TRANSPOSE_1D,
|
| 492 |
GGML_OP_IM2COL,
|
|
|
|
| 493 |
GGML_OP_CONV_TRANSPOSE_2D,
|
| 494 |
GGML_OP_POOL_1D,
|
| 495 |
GGML_OP_POOL_2D,
|
|
|
|
| 496 |
GGML_OP_UPSCALE, // nearest interpolate
|
| 497 |
GGML_OP_PAD,
|
| 498 |
GGML_OP_ARANGE,
|
|
@@ -1582,34 +1584,49 @@ extern "C" {
|
|
| 1582 |
float min,
|
| 1583 |
float max);
|
| 1584 |
|
|
|
|
|
|
|
| 1585 |
GGML_API struct ggml_tensor * ggml_im2col(
|
| 1586 |
struct ggml_context * ctx,
|
| 1587 |
-
struct ggml_tensor * a,
|
| 1588 |
-
struct ggml_tensor * b,
|
| 1589 |
-
int
|
| 1590 |
-
int
|
| 1591 |
-
int
|
| 1592 |
-
int
|
| 1593 |
-
int
|
| 1594 |
-
int
|
| 1595 |
-
bool
|
| 1596 |
-
enum ggml_type
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1597 |
|
| 1598 |
GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
|
| 1599 |
struct ggml_context * ctx,
|
| 1600 |
-
struct ggml_tensor * a,
|
| 1601 |
-
struct ggml_tensor * b,
|
| 1602 |
-
int s0,
|
| 1603 |
-
int s1,
|
| 1604 |
-
int p0,
|
| 1605 |
-
int p1,
|
| 1606 |
-
int d0,
|
| 1607 |
-
int d1);
|
| 1608 |
|
| 1609 |
GGML_API struct ggml_tensor * ggml_conv_1d(
|
| 1610 |
struct ggml_context * ctx,
|
| 1611 |
-
struct ggml_tensor * a,
|
| 1612 |
-
struct ggml_tensor * b,
|
| 1613 |
int s0, // stride
|
| 1614 |
int p0, // padding
|
| 1615 |
int d0); // dilation
|
|
@@ -1618,29 +1635,29 @@ extern "C" {
|
|
| 1618 |
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
| 1619 |
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
| 1620 |
struct ggml_context * ctx,
|
| 1621 |
-
struct ggml_tensor * a,
|
| 1622 |
-
struct ggml_tensor * b,
|
| 1623 |
-
int s,
|
| 1624 |
-
int d);
|
| 1625 |
|
| 1626 |
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
| 1627 |
struct ggml_context * ctx,
|
| 1628 |
-
struct ggml_tensor * a,
|
| 1629 |
-
struct ggml_tensor * b,
|
| 1630 |
-
int s0,
|
| 1631 |
-
int p0,
|
| 1632 |
-
int d0);
|
| 1633 |
|
| 1634 |
GGML_API struct ggml_tensor * ggml_conv_2d(
|
| 1635 |
struct ggml_context * ctx,
|
| 1636 |
-
struct ggml_tensor * a,
|
| 1637 |
-
struct ggml_tensor * b,
|
| 1638 |
-
int s0,
|
| 1639 |
-
int s1,
|
| 1640 |
-
int p0,
|
| 1641 |
-
int p1,
|
| 1642 |
-
int d0,
|
| 1643 |
-
int d1);
|
| 1644 |
|
| 1645 |
|
| 1646 |
// kernel size is a->ne[0] x a->ne[1]
|
|
@@ -1702,6 +1719,18 @@ extern "C" {
|
|
| 1702 |
float p0,
|
| 1703 |
float p1);
|
| 1704 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1705 |
// nearest interpolate
|
| 1706 |
// multiplies ne0 and ne1 by scale factor
|
| 1707 |
// used in stable-diffusion
|
|
|
|
| 220 |
#include <stdio.h>
|
| 221 |
|
| 222 |
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
|
| 223 |
+
#define GGML_FILE_VERSION 2
|
| 224 |
|
| 225 |
#define GGML_QNT_VERSION 2 // bump this on quantization format changes
|
| 226 |
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
|
|
|
|
| 490 |
GGML_OP_CLAMP,
|
| 491 |
GGML_OP_CONV_TRANSPOSE_1D,
|
| 492 |
GGML_OP_IM2COL,
|
| 493 |
+
GGML_OP_IM2COL_BACK,
|
| 494 |
GGML_OP_CONV_TRANSPOSE_2D,
|
| 495 |
GGML_OP_POOL_1D,
|
| 496 |
GGML_OP_POOL_2D,
|
| 497 |
+
GGML_OP_POOL_2D_BACK,
|
| 498 |
GGML_OP_UPSCALE, // nearest interpolate
|
| 499 |
GGML_OP_PAD,
|
| 500 |
GGML_OP_ARANGE,
|
|
|
|
| 1584 |
float min,
|
| 1585 |
float max);
|
| 1586 |
|
| 1587 |
+
// im2col
|
| 1588 |
+
// converts data into a format that effectively results in a convolution when combined with matrix multiplication
|
| 1589 |
GGML_API struct ggml_tensor * ggml_im2col(
|
| 1590 |
struct ggml_context * ctx,
|
| 1591 |
+
struct ggml_tensor * a, // convolution kernel
|
| 1592 |
+
struct ggml_tensor * b, // data
|
| 1593 |
+
int s0, // stride dimension 0
|
| 1594 |
+
int s1, // stride dimension 1
|
| 1595 |
+
int p0, // padding dimension 0
|
| 1596 |
+
int p1, // padding dimension 1
|
| 1597 |
+
int d0, // dilation dimension 0
|
| 1598 |
+
int d1, // dilation dimension 1
|
| 1599 |
+
bool is_2D,
|
| 1600 |
+
enum ggml_type dst_type);
|
| 1601 |
+
|
| 1602 |
+
GGML_API struct ggml_tensor * ggml_im2col_back(
|
| 1603 |
+
struct ggml_context * ctx,
|
| 1604 |
+
struct ggml_tensor * a, // convolution kernel
|
| 1605 |
+
struct ggml_tensor * b, // gradient of im2col output
|
| 1606 |
+
int64_t * ne, // shape of im2col input
|
| 1607 |
+
int s0, // stride dimension 0
|
| 1608 |
+
int s1, // stride dimension 1
|
| 1609 |
+
int p0, // padding dimension 0
|
| 1610 |
+
int p1, // padding dimension 1
|
| 1611 |
+
int d0, // dilation dimension 0
|
| 1612 |
+
int d1, // dilation dimension 1
|
| 1613 |
+
bool is_2D);
|
| 1614 |
|
| 1615 |
GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
|
| 1616 |
struct ggml_context * ctx,
|
| 1617 |
+
struct ggml_tensor * a, // convolution kernel
|
| 1618 |
+
struct ggml_tensor * b, // data
|
| 1619 |
+
int s0, // stride dimension 0
|
| 1620 |
+
int s1, // stride dimension 1
|
| 1621 |
+
int p0, // padding dimension 0
|
| 1622 |
+
int p1, // padding dimension 1
|
| 1623 |
+
int d0, // dilation dimension 0
|
| 1624 |
+
int d1); // dilation dimension 1
|
| 1625 |
|
| 1626 |
GGML_API struct ggml_tensor * ggml_conv_1d(
|
| 1627 |
struct ggml_context * ctx,
|
| 1628 |
+
struct ggml_tensor * a, // convolution kernel
|
| 1629 |
+
struct ggml_tensor * b, // data
|
| 1630 |
int s0, // stride
|
| 1631 |
int p0, // padding
|
| 1632 |
int d0); // dilation
|
|
|
|
| 1635 |
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
| 1636 |
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
| 1637 |
struct ggml_context * ctx,
|
| 1638 |
+
struct ggml_tensor * a, // convolution kernel
|
| 1639 |
+
struct ggml_tensor * b, // data
|
| 1640 |
+
int s, // stride
|
| 1641 |
+
int d); // dilation
|
| 1642 |
|
| 1643 |
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
| 1644 |
struct ggml_context * ctx,
|
| 1645 |
+
struct ggml_tensor * a, // convolution kernel
|
| 1646 |
+
struct ggml_tensor * b, // data
|
| 1647 |
+
int s0, // stride
|
| 1648 |
+
int p0, // padding
|
| 1649 |
+
int d0); // dilation
|
| 1650 |
|
| 1651 |
GGML_API struct ggml_tensor * ggml_conv_2d(
|
| 1652 |
struct ggml_context * ctx,
|
| 1653 |
+
struct ggml_tensor * a, // convolution kernel
|
| 1654 |
+
struct ggml_tensor * b, // data
|
| 1655 |
+
int s0, // stride dimension 0
|
| 1656 |
+
int s1, // stride dimension 1
|
| 1657 |
+
int p0, // padding dimension 0
|
| 1658 |
+
int p1, // padding dimension 1
|
| 1659 |
+
int d0, // dilation dimension 0
|
| 1660 |
+
int d1); // dilation dimension 1
|
| 1661 |
|
| 1662 |
|
| 1663 |
// kernel size is a->ne[0] x a->ne[1]
|
|
|
|
| 1719 |
float p0,
|
| 1720 |
float p1);
|
| 1721 |
|
| 1722 |
+
GGML_API struct ggml_tensor * ggml_pool_2d_back(
|
| 1723 |
+
struct ggml_context * ctx,
|
| 1724 |
+
struct ggml_tensor * a,
|
| 1725 |
+
struct ggml_tensor * af, // "a"/input used in forward pass
|
| 1726 |
+
enum ggml_op_pool op,
|
| 1727 |
+
int k0,
|
| 1728 |
+
int k1,
|
| 1729 |
+
int s0,
|
| 1730 |
+
int s1,
|
| 1731 |
+
float p0,
|
| 1732 |
+
float p1);
|
| 1733 |
+
|
| 1734 |
// nearest interpolate
|
| 1735 |
// multiplies ne0 and ne1 by scale factor
|
| 1736 |
// used in stable-diffusion
|
ggml/src/ggml.c
CHANGED
|
@@ -2801,9 +2801,11 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
| 2801 |
"CLAMP",
|
| 2802 |
"CONV_TRANSPOSE_1D",
|
| 2803 |
"IM2COL",
|
|
|
|
| 2804 |
"CONV_TRANSPOSE_2D",
|
| 2805 |
"POOL_1D",
|
| 2806 |
"POOL_2D",
|
|
|
|
| 2807 |
"UPSCALE",
|
| 2808 |
"PAD",
|
| 2809 |
"ARANGE",
|
|
@@ -2837,7 +2839,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
| 2837 |
"CROSS_ENTROPY_LOSS_BACK",
|
| 2838 |
};
|
| 2839 |
|
| 2840 |
-
static_assert(GGML_OP_COUNT ==
|
| 2841 |
|
| 2842 |
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
| 2843 |
"none",
|
|
@@ -2891,9 +2893,11 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
| 2891 |
"clamp(x)",
|
| 2892 |
"conv_transpose_1d(x)",
|
| 2893 |
"im2col(x)",
|
|
|
|
| 2894 |
"conv_transpose_2d(x)",
|
| 2895 |
"pool_1d(x)",
|
| 2896 |
"pool_2d(x)",
|
|
|
|
| 2897 |
"upscale(x)",
|
| 2898 |
"pad(x)",
|
| 2899 |
"arange(start, stop, step)",
|
|
@@ -2927,7 +2931,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
| 2927 |
"cross_entropy_loss_back(x,y)",
|
| 2928 |
};
|
| 2929 |
|
| 2930 |
-
static_assert(GGML_OP_COUNT ==
|
| 2931 |
|
| 2932 |
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
| 2933 |
|
|
@@ -3741,6 +3745,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
| 3741 |
|
| 3742 |
size_t data_size = ggml_row_size(type, ne[0]);
|
| 3743 |
for (int i = 1; i < n_dims; i++) {
|
|
|
|
| 3744 |
data_size *= ne[i];
|
| 3745 |
}
|
| 3746 |
|
|
@@ -3773,6 +3778,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
| 3773 |
}
|
| 3774 |
|
| 3775 |
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
|
|
|
|
| 3776 |
|
| 3777 |
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
| 3778 |
|
|
@@ -4492,8 +4498,6 @@ static struct ggml_tensor * ggml_add_impl(
|
|
| 4492 |
bool is_node = false;
|
| 4493 |
|
| 4494 |
if (!inplace && (a->grad || b->grad)) {
|
| 4495 |
-
// TODO: support backward pass for broadcasting
|
| 4496 |
-
GGML_ASSERT(ggml_are_same_shape(a, b));
|
| 4497 |
is_node = true;
|
| 4498 |
}
|
| 4499 |
|
|
@@ -6801,17 +6805,20 @@ struct ggml_tensor * ggml_im2col(
|
|
| 6801 |
GGML_ASSERT(a->ne[2] == b->ne[2]);
|
| 6802 |
} else {
|
| 6803 |
GGML_ASSERT(a->ne[1] == b->ne[1]);
|
|
|
|
| 6804 |
}
|
| 6805 |
bool is_node = false;
|
| 6806 |
|
| 6807 |
-
if (a->grad
|
| 6808 |
-
GGML_ABORT("fatal error"); // TODO: implement backward
|
| 6809 |
is_node = true;
|
| 6810 |
}
|
| 6811 |
|
| 6812 |
const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
|
| 6813 |
const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
| 6814 |
|
|
|
|
|
|
|
|
|
|
| 6815 |
const int64_t ne[4] = {
|
| 6816 |
is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
|
| 6817 |
OW,
|
|
@@ -6831,6 +6838,37 @@ struct ggml_tensor * ggml_im2col(
|
|
| 6831 |
return result;
|
| 6832 |
}
|
| 6833 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6834 |
// a: [OC,IC, KH, KW]
|
| 6835 |
// b: [N, IC, IH, IW]
|
| 6836 |
// result: [N, OC, OH, OW]
|
|
@@ -6844,7 +6882,7 @@ struct ggml_tensor * ggml_conv_2d(
|
|
| 6844 |
int p1,
|
| 6845 |
int d0,
|
| 6846 |
int d1) {
|
| 6847 |
-
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true,
|
| 6848 |
|
| 6849 |
struct ggml_tensor * result =
|
| 6850 |
ggml_mul_mat(ctx,
|
|
@@ -6970,17 +7008,17 @@ struct ggml_tensor * ggml_pool_2d(
|
|
| 6970 |
bool is_node = false;
|
| 6971 |
|
| 6972 |
if (a->grad) {
|
| 6973 |
-
GGML_ABORT("fatal error"); // TODO: implement backward
|
| 6974 |
is_node = true;
|
| 6975 |
}
|
| 6976 |
|
| 6977 |
struct ggml_tensor * result;
|
| 6978 |
-
const int64_t ne[
|
| 6979 |
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
| 6980 |
ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
|
| 6981 |
a->ne[2],
|
|
|
|
| 6982 |
};
|
| 6983 |
-
result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
| 6984 |
|
| 6985 |
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
| 6986 |
ggml_set_op_params(result, params, sizeof(params));
|
|
@@ -6991,6 +7029,37 @@ struct ggml_tensor * ggml_pool_2d(
|
|
| 6991 |
return result;
|
| 6992 |
}
|
| 6993 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6994 |
// ggml_upscale
|
| 6995 |
|
| 6996 |
static struct ggml_tensor * ggml_upscale_impl(
|
|
@@ -14714,6 +14783,7 @@ static void ggml_compute_forward_conv_transpose_1d(
|
|
| 14714 |
}
|
| 14715 |
}
|
| 14716 |
|
|
|
|
| 14717 |
// src0: kernel [OC, IC, KH, KW]
|
| 14718 |
// src1: image [N, IC, IH, IW]
|
| 14719 |
// dst: result [N, OH, OW, IC*KH*KW]
|
|
@@ -14724,7 +14794,6 @@ static void ggml_compute_forward_im2col_f32(
|
|
| 14724 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 14725 |
const struct ggml_tensor * src1 = dst->src[1];
|
| 14726 |
|
| 14727 |
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 14728 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 14729 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 14730 |
|
|
@@ -14755,7 +14824,6 @@ static void ggml_compute_forward_im2col_f32(
|
|
| 14755 |
int ofs0 = is_2D ? nb13 : nb12;
|
| 14756 |
int ofs1 = is_2D ? nb12 : nb11;
|
| 14757 |
|
| 14758 |
-
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
| 14759 |
GGML_ASSERT(nb10 == sizeof(float));
|
| 14760 |
|
| 14761 |
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
|
@@ -14791,6 +14859,7 @@ static void ggml_compute_forward_im2col_f32(
|
|
| 14791 |
}
|
| 14792 |
|
| 14793 |
|
|
|
|
| 14794 |
// src0: kernel [OC, IC, KH, KW]
|
| 14795 |
// src1: image [N, IC, IH, IW]
|
| 14796 |
// dst: result [N, OH, OW, IC*KH*KW]
|
|
@@ -14886,6 +14955,99 @@ static void ggml_compute_forward_im2col(
|
|
| 14886 |
}
|
| 14887 |
}
|
| 14888 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14889 |
|
| 14890 |
// ggml_compute_forward_conv_transpose_2d
|
| 14891 |
|
|
@@ -15128,6 +15290,128 @@ static void ggml_compute_forward_pool_2d(
|
|
| 15128 |
}
|
| 15129 |
}
|
| 15130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15131 |
// ggml_compute_forward_upscale
|
| 15132 |
|
| 15133 |
static void ggml_compute_forward_upscale_f32(
|
|
@@ -17097,6 +17381,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
| 17097 |
{
|
| 17098 |
ggml_compute_forward_im2col(params, tensor);
|
| 17099 |
} break;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17100 |
case GGML_OP_CONV_TRANSPOSE_2D:
|
| 17101 |
{
|
| 17102 |
ggml_compute_forward_conv_transpose_2d(params, tensor);
|
|
@@ -17109,6 +17397,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
| 17109 |
{
|
| 17110 |
ggml_compute_forward_pool_2d(params, tensor);
|
| 17111 |
} break;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17112 |
case GGML_OP_UPSCALE:
|
| 17113 |
{
|
| 17114 |
ggml_compute_forward_upscale(params, tensor);
|
|
@@ -17477,7 +17769,11 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 17477 |
src0->grad = ggml_add_or_set(ctx, src0->grad, tensor->grad, zero_table);
|
| 17478 |
}
|
| 17479 |
if (src1->grad) {
|
| 17480 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17481 |
}
|
| 17482 |
} break;
|
| 17483 |
case GGML_OP_ADD1:
|
|
@@ -18074,6 +18370,23 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 18074 |
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 18075 |
}
|
| 18076 |
case GGML_OP_IM2COL:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18077 |
{
|
| 18078 |
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 18079 |
}
|
|
@@ -18086,6 +18399,23 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 18086 |
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 18087 |
}
|
| 18088 |
case GGML_OP_POOL_2D:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18089 |
{
|
| 18090 |
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 18091 |
}
|
|
@@ -18375,6 +18705,7 @@ void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
| 18375 |
|
| 18376 |
void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep) {
|
| 18377 |
GGML_ASSERT(gf->n_nodes > 0);
|
|
|
|
| 18378 |
|
| 18379 |
// if we are keeping the gradient graph, we have to detach the gradient nodes from the original graph
|
| 18380 |
if (keep) {
|
|
@@ -18802,6 +19133,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
| 18802 |
n_tasks = MIN(n_threads, ggml_nrows(node->src[0]));
|
| 18803 |
} break;
|
| 18804 |
case GGML_OP_IM2COL:
|
|
|
|
| 18805 |
case GGML_OP_CONV_TRANSPOSE_1D:
|
| 18806 |
case GGML_OP_CONV_TRANSPOSE_2D:
|
| 18807 |
{
|
|
@@ -18809,6 +19141,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
| 18809 |
} break;
|
| 18810 |
case GGML_OP_POOL_1D:
|
| 18811 |
case GGML_OP_POOL_2D:
|
|
|
|
| 18812 |
{
|
| 18813 |
n_tasks = 1;
|
| 18814 |
} break;
|
|
@@ -19322,9 +19655,11 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
| 19322 |
|
| 19323 |
const uint32_t type = tensor->type;
|
| 19324 |
const uint32_t op = tensor->op;
|
|
|
|
| 19325 |
|
| 19326 |
fwrite(&type, sizeof(uint32_t), 1, fout);
|
| 19327 |
fwrite(&op, sizeof(uint32_t), 1, fout);
|
|
|
|
| 19328 |
|
| 19329 |
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
| 19330 |
const uint64_t ne = tensor->ne[j];
|
|
@@ -19354,9 +19689,11 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
| 19354 |
|
| 19355 |
const uint32_t type = tensor->type;
|
| 19356 |
const uint32_t op = tensor->op;
|
|
|
|
| 19357 |
|
| 19358 |
fwrite(&type, sizeof(uint32_t), 1, fout);
|
| 19359 |
fwrite(&op, sizeof(uint32_t), 1, fout);
|
|
|
|
| 19360 |
|
| 19361 |
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
| 19362 |
const uint64_t ne = tensor->ne[j];
|
|
@@ -19415,6 +19752,14 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
| 19415 |
}
|
| 19416 |
}
|
| 19417 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19418 |
}
|
| 19419 |
}
|
| 19420 |
|
|
@@ -19528,10 +19873,12 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|
| 19528 |
{
|
| 19529 |
uint32_t type;
|
| 19530 |
uint32_t op;
|
|
|
|
| 19531 |
|
| 19532 |
for (uint32_t i = 0; i < n_leafs; ++i) {
|
| 19533 |
type = *(const uint32_t *) ptr; ptr += sizeof(type);
|
| 19534 |
op = *(const uint32_t *) ptr; ptr += sizeof(op);
|
|
|
|
| 19535 |
|
| 19536 |
int64_t ne[GGML_MAX_DIMS];
|
| 19537 |
size_t nb[GGML_MAX_DIMS];
|
|
@@ -19549,20 +19896,19 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|
| 19549 |
|
| 19550 |
struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, GGML_MAX_DIMS, ne);
|
| 19551 |
|
| 19552 |
-
tensor->op
|
|
|
|
| 19553 |
|
| 19554 |
memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
|
| 19555 |
memcpy(tensor->op_params, ptr, GGML_MAX_OP_PARAMS); ptr += GGML_MAX_OP_PARAMS;
|
| 19556 |
|
| 19557 |
-
tensor->data = (void *) ptr;
|
| 19558 |
-
|
| 19559 |
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
| 19560 |
tensor->nb[j] = nb[j];
|
| 19561 |
}
|
| 19562 |
|
| 19563 |
-
|
| 19564 |
|
| 19565 |
-
|
| 19566 |
|
| 19567 |
fprintf(stderr, "%s: loaded leaf %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
| 19568 |
}
|
|
@@ -19574,10 +19920,12 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|
| 19574 |
{
|
| 19575 |
uint32_t type;
|
| 19576 |
uint32_t op;
|
|
|
|
| 19577 |
|
| 19578 |
for (uint32_t i = 0; i < n_nodes; ++i) {
|
| 19579 |
type = *(const uint32_t *) ptr; ptr += sizeof(type);
|
| 19580 |
op = *(const uint32_t *) ptr; ptr += sizeof(op);
|
|
|
|
| 19581 |
|
| 19582 |
enum ggml_op eop = (enum ggml_op) op;
|
| 19583 |
|
|
@@ -19667,6 +20015,11 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|
| 19667 |
|
| 19668 |
result->nodes[i] = tensor;
|
| 19669 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19670 |
fprintf(stderr, "%s: loaded node %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
| 19671 |
}
|
| 19672 |
}
|
|
@@ -20701,6 +21054,8 @@ enum ggml_opt_result ggml_opt(
|
|
| 20701 |
struct ggml_context * ctx,
|
| 20702 |
struct ggml_opt_params params,
|
| 20703 |
struct ggml_tensor * f) {
|
|
|
|
|
|
|
| 20704 |
bool free_ctx = false;
|
| 20705 |
if (ctx == NULL) {
|
| 20706 |
struct ggml_init_params params_ctx = {
|
|
@@ -20755,6 +21110,8 @@ enum ggml_opt_result ggml_opt_resume_g(
|
|
| 20755 |
ggml_opt_callback callback,
|
| 20756 |
void * callback_data) {
|
| 20757 |
|
|
|
|
|
|
|
| 20758 |
// build forward + backward compute graphs
|
| 20759 |
enum ggml_opt_result result = GGML_OPT_RESULT_OK;
|
| 20760 |
|
|
@@ -21842,6 +22199,7 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
|
|
| 21842 |
void gguf_add_tensor(
|
| 21843 |
struct gguf_context * ctx,
|
| 21844 |
const struct ggml_tensor * tensor) {
|
|
|
|
| 21845 |
if (gguf_find_tensor(ctx, tensor->name) != -1) {
|
| 21846 |
GGML_ABORT("duplicated tensor name");
|
| 21847 |
}
|
|
|
|
| 2801 |
"CLAMP",
|
| 2802 |
"CONV_TRANSPOSE_1D",
|
| 2803 |
"IM2COL",
|
| 2804 |
+
"IM2COL_BACK",
|
| 2805 |
"CONV_TRANSPOSE_2D",
|
| 2806 |
"POOL_1D",
|
| 2807 |
"POOL_2D",
|
| 2808 |
+
"POOL_2D_BACK",
|
| 2809 |
"UPSCALE",
|
| 2810 |
"PAD",
|
| 2811 |
"ARANGE",
|
|
|
|
| 2839 |
"CROSS_ENTROPY_LOSS_BACK",
|
| 2840 |
};
|
| 2841 |
|
| 2842 |
+
static_assert(GGML_OP_COUNT == 78, "GGML_OP_COUNT != 78");
|
| 2843 |
|
| 2844 |
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
| 2845 |
"none",
|
|
|
|
| 2893 |
"clamp(x)",
|
| 2894 |
"conv_transpose_1d(x)",
|
| 2895 |
"im2col(x)",
|
| 2896 |
+
"im2col_back(x)",
|
| 2897 |
"conv_transpose_2d(x)",
|
| 2898 |
"pool_1d(x)",
|
| 2899 |
"pool_2d(x)",
|
| 2900 |
+
"pool_2d_back(x)",
|
| 2901 |
"upscale(x)",
|
| 2902 |
"pad(x)",
|
| 2903 |
"arange(start, stop, step)",
|
|
|
|
| 2931 |
"cross_entropy_loss_back(x,y)",
|
| 2932 |
};
|
| 2933 |
|
| 2934 |
+
static_assert(GGML_OP_COUNT == 78, "GGML_OP_COUNT != 78");
|
| 2935 |
|
| 2936 |
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
| 2937 |
|
|
|
|
| 3745 |
|
| 3746 |
size_t data_size = ggml_row_size(type, ne[0]);
|
| 3747 |
for (int i = 1; i < n_dims; i++) {
|
| 3748 |
+
assert(ne[i] > 0);
|
| 3749 |
data_size *= ne[i];
|
| 3750 |
}
|
| 3751 |
|
|
|
|
| 3778 |
}
|
| 3779 |
|
| 3780 |
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
|
| 3781 |
+
GGML_ASSERT(obj_new);
|
| 3782 |
|
| 3783 |
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
| 3784 |
|
|
|
|
| 4498 |
bool is_node = false;
|
| 4499 |
|
| 4500 |
if (!inplace && (a->grad || b->grad)) {
|
|
|
|
|
|
|
| 4501 |
is_node = true;
|
| 4502 |
}
|
| 4503 |
|
|
|
|
| 6805 |
GGML_ASSERT(a->ne[2] == b->ne[2]);
|
| 6806 |
} else {
|
| 6807 |
GGML_ASSERT(a->ne[1] == b->ne[1]);
|
| 6808 |
+
GGML_ASSERT(b->ne[3] == 1);
|
| 6809 |
}
|
| 6810 |
bool is_node = false;
|
| 6811 |
|
| 6812 |
+
if (/*a->grad ||*/ b->grad) { // a is only used for its shape, not its data
|
|
|
|
| 6813 |
is_node = true;
|
| 6814 |
}
|
| 6815 |
|
| 6816 |
const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
|
| 6817 |
const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
| 6818 |
|
| 6819 |
+
GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
|
| 6820 |
+
GGML_ASSERT((OW > 0) && "b too small compared to a");
|
| 6821 |
+
|
| 6822 |
const int64_t ne[4] = {
|
| 6823 |
is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
|
| 6824 |
OW,
|
|
|
|
| 6838 |
return result;
|
| 6839 |
}
|
| 6840 |
|
| 6841 |
+
struct ggml_tensor * ggml_im2col_back(
|
| 6842 |
+
struct ggml_context * ctx,
|
| 6843 |
+
struct ggml_tensor * a,
|
| 6844 |
+
struct ggml_tensor * b,
|
| 6845 |
+
int64_t * ne,
|
| 6846 |
+
int s0,
|
| 6847 |
+
int s1,
|
| 6848 |
+
int p0,
|
| 6849 |
+
int p1,
|
| 6850 |
+
int d0,
|
| 6851 |
+
int d1,
|
| 6852 |
+
bool is_2D) {
|
| 6853 |
+
|
| 6854 |
+
bool is_node = false;
|
| 6855 |
+
|
| 6856 |
+
if (/*a->grad ||*/ b->grad) { // a is only used for its shape, not its data
|
| 6857 |
+
is_node = true;
|
| 6858 |
+
}
|
| 6859 |
+
|
| 6860 |
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
| 6861 |
+
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
| 6862 |
+
ggml_set_op_params(result, params, sizeof(params));
|
| 6863 |
+
|
| 6864 |
+
result->op = GGML_OP_IM2COL_BACK;
|
| 6865 |
+
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
| 6866 |
+
result->src[0] = a;
|
| 6867 |
+
result->src[1] = b;
|
| 6868 |
+
|
| 6869 |
+
return result;
|
| 6870 |
+
}
|
| 6871 |
+
|
| 6872 |
// a: [OC,IC, KH, KW]
|
| 6873 |
// b: [N, IC, IH, IW]
|
| 6874 |
// result: [N, OC, OH, OW]
|
|
|
|
| 6882 |
int p1,
|
| 6883 |
int d0,
|
| 6884 |
int d1) {
|
| 6885 |
+
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
|
| 6886 |
|
| 6887 |
struct ggml_tensor * result =
|
| 6888 |
ggml_mul_mat(ctx,
|
|
|
|
| 7008 |
bool is_node = false;
|
| 7009 |
|
| 7010 |
if (a->grad) {
|
|
|
|
| 7011 |
is_node = true;
|
| 7012 |
}
|
| 7013 |
|
| 7014 |
struct ggml_tensor * result;
|
| 7015 |
+
const int64_t ne[4] = {
|
| 7016 |
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
| 7017 |
ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
|
| 7018 |
a->ne[2],
|
| 7019 |
+
a->ne[3],
|
| 7020 |
};
|
| 7021 |
+
result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
| 7022 |
|
| 7023 |
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
| 7024 |
ggml_set_op_params(result, params, sizeof(params));
|
|
|
|
| 7029 |
return result;
|
| 7030 |
}
|
| 7031 |
|
| 7032 |
+
struct ggml_tensor * ggml_pool_2d_back(
|
| 7033 |
+
struct ggml_context * ctx,
|
| 7034 |
+
struct ggml_tensor * a,
|
| 7035 |
+
struct ggml_tensor * af,
|
| 7036 |
+
enum ggml_op_pool op,
|
| 7037 |
+
int k0,
|
| 7038 |
+
int k1,
|
| 7039 |
+
int s0,
|
| 7040 |
+
int s1,
|
| 7041 |
+
float p0,
|
| 7042 |
+
float p1) {
|
| 7043 |
+
|
| 7044 |
+
bool is_node = false;
|
| 7045 |
+
|
| 7046 |
+
if (a->grad) {
|
| 7047 |
+
is_node = true;
|
| 7048 |
+
}
|
| 7049 |
+
|
| 7050 |
+
struct ggml_tensor * result;
|
| 7051 |
+
result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
|
| 7052 |
+
|
| 7053 |
+
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
| 7054 |
+
ggml_set_op_params(result, params, sizeof(params));
|
| 7055 |
+
|
| 7056 |
+
result->op = GGML_OP_POOL_2D_BACK;
|
| 7057 |
+
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
| 7058 |
+
result->src[0] = a;
|
| 7059 |
+
result->src[1] = af;
|
| 7060 |
+
return result;
|
| 7061 |
+
}
|
| 7062 |
+
|
| 7063 |
// ggml_upscale
|
| 7064 |
|
| 7065 |
static struct ggml_tensor * ggml_upscale_impl(
|
|
|
|
| 14783 |
}
|
| 14784 |
}
|
| 14785 |
|
| 14786 |
+
// ggml_compute_forward_im2col_f32
|
| 14787 |
// src0: kernel [OC, IC, KH, KW]
|
| 14788 |
// src1: image [N, IC, IH, IW]
|
| 14789 |
// dst: result [N, OH, OW, IC*KH*KW]
|
|
|
|
| 14794 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 14795 |
const struct ggml_tensor * src1 = dst->src[1];
|
| 14796 |
|
|
|
|
| 14797 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 14798 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 14799 |
|
|
|
|
| 14824 |
int ofs0 = is_2D ? nb13 : nb12;
|
| 14825 |
int ofs1 = is_2D ? nb12 : nb11;
|
| 14826 |
|
|
|
|
| 14827 |
GGML_ASSERT(nb10 == sizeof(float));
|
| 14828 |
|
| 14829 |
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
|
|
|
| 14859 |
}
|
| 14860 |
|
| 14861 |
|
| 14862 |
+
// ggml_compute_forward_im2col_f16
|
| 14863 |
// src0: kernel [OC, IC, KH, KW]
|
| 14864 |
// src1: image [N, IC, IH, IW]
|
| 14865 |
// dst: result [N, OH, OW, IC*KH*KW]
|
|
|
|
| 14955 |
}
|
| 14956 |
}
|
| 14957 |
|
| 14958 |
+
// ggml_compute_forward_im2col_back_f32
|
| 14959 |
+
|
| 14960 |
+
static void ggml_compute_forward_im2col_back_f32(
|
| 14961 |
+
const struct ggml_compute_params * params,
|
| 14962 |
+
struct ggml_tensor * dst) {
|
| 14963 |
+
|
| 14964 |
+
const struct ggml_tensor * src0 = dst->src[0];
|
| 14965 |
+
const struct ggml_tensor * src1 = dst->src[1];
|
| 14966 |
+
|
| 14967 |
+
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 14968 |
+
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 14969 |
+
|
| 14970 |
+
GGML_TENSOR_BINARY_OP_LOCALS;
|
| 14971 |
+
|
| 14972 |
+
const int32_t s0 = ((const int32_t *)(dst->op_params))[0];
|
| 14973 |
+
const int32_t s1 = ((const int32_t *)(dst->op_params))[1];
|
| 14974 |
+
const int32_t p0 = ((const int32_t *)(dst->op_params))[2];
|
| 14975 |
+
const int32_t p1 = ((const int32_t *)(dst->op_params))[3];
|
| 14976 |
+
const int32_t d0 = ((const int32_t *)(dst->op_params))[4];
|
| 14977 |
+
const int32_t d1 = ((const int32_t *)(dst->op_params))[5];
|
| 14978 |
+
const bool is_2D = ((const int32_t *)(dst->op_params))[6] == 1;
|
| 14979 |
+
|
| 14980 |
+
const int ith = params->ith;
|
| 14981 |
+
const int nth = params->nth;
|
| 14982 |
+
|
| 14983 |
+
const int64_t N = is_2D ? ne3 : ne2;
|
| 14984 |
+
const int64_t IC = is_2D ? ne2 : ne1;
|
| 14985 |
+
const int64_t IH = is_2D ? ne1 : 1;
|
| 14986 |
+
const int64_t IW = ne0;
|
| 14987 |
+
|
| 14988 |
+
const int64_t KH = is_2D ? ne01 : 1;
|
| 14989 |
+
const int64_t KW = ne00;
|
| 14990 |
+
|
| 14991 |
+
const int64_t OH = is_2D ? ne12 : 1;
|
| 14992 |
+
const int64_t OW = ne11;
|
| 14993 |
+
|
| 14994 |
+
int ofs0 = is_2D ? nb3 : nb2;
|
| 14995 |
+
int ofs1 = is_2D ? nb2 : nb1;
|
| 14996 |
+
|
| 14997 |
+
GGML_ASSERT(nb0 == sizeof(float));
|
| 14998 |
+
|
| 14999 |
+
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
| 15000 |
+
{
|
| 15001 |
+
float * const wdata = (float *) dst->data;
|
| 15002 |
+
|
| 15003 |
+
for (int64_t in = 0; in < N; in++) {
|
| 15004 |
+
for (int64_t iic = ith; iic < IC; iic += nth) {
|
| 15005 |
+
for (int64_t iih = 0; iih < IH; iih++) {
|
| 15006 |
+
for (int64_t iiw = 0; iiw < IW; iiw++) {
|
| 15007 |
+
|
| 15008 |
+
// micro kernel
|
| 15009 |
+
float grad = 0.0f;
|
| 15010 |
+
for (int64_t ikh = 0; ikh < KH; ikh++) {
|
| 15011 |
+
for (int64_t ikw = 0; ikw < KW; ikw++) {
|
| 15012 |
+
// For s0 > 1 some values were skipped over in the forward pass.
|
| 15013 |
+
// These values have tmpw % s0 != 0 and need to be skipped in the backwards pass as well.
|
| 15014 |
+
const int64_t tmpw = (iiw + p0 - ikw*d0);
|
| 15015 |
+
if (tmpw % s0 != 0) {
|
| 15016 |
+
continue;
|
| 15017 |
+
}
|
| 15018 |
+
const int64_t iow = tmpw / s0;
|
| 15019 |
+
|
| 15020 |
+
// Equivalent logic as above except for s1.
|
| 15021 |
+
int64_t ioh;
|
| 15022 |
+
if (is_2D) {
|
| 15023 |
+
const int64_t tmph = iih + p1 - ikh*d1;
|
| 15024 |
+
|
| 15025 |
+
if (tmph % s1 != 0) {
|
| 15026 |
+
continue;
|
| 15027 |
+
}
|
| 15028 |
+
|
| 15029 |
+
ioh = tmph / s1;
|
| 15030 |
+
} else {
|
| 15031 |
+
ioh = 0;
|
| 15032 |
+
}
|
| 15033 |
+
|
| 15034 |
+
if (iow < 0 || iow >= OW || ioh < 0 || ioh >= OH) {
|
| 15035 |
+
continue;
|
| 15036 |
+
}
|
| 15037 |
+
|
| 15038 |
+
const float * const src_data = (const float *) src1->data
|
| 15039 |
+
+ (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
|
| 15040 |
+
grad += src_data[iic*(KH*KW) + ikh*KW + ikw];
|
| 15041 |
+
}
|
| 15042 |
+
}
|
| 15043 |
+
float * dst_data = (float *)((char *) wdata + (in*ofs0 + iic*ofs1)); // [IH, IW]
|
| 15044 |
+
dst_data[iih*IW + iiw] = grad;
|
| 15045 |
+
}
|
| 15046 |
+
}
|
| 15047 |
+
}
|
| 15048 |
+
}
|
| 15049 |
+
}
|
| 15050 |
+
}
|
| 15051 |
|
| 15052 |
// ggml_compute_forward_conv_transpose_2d
|
| 15053 |
|
|
|
|
| 15290 |
}
|
| 15291 |
}
|
| 15292 |
|
| 15293 |
+
// ggml_compute_forward_pool_2d_back
|
| 15294 |
+
|
| 15295 |
+
static void ggml_compute_forward_pool_2d_back(
|
| 15296 |
+
const struct ggml_compute_params * params,
|
| 15297 |
+
struct ggml_tensor * dst) {
|
| 15298 |
+
|
| 15299 |
+
const struct ggml_tensor * src = dst->src[0];
|
| 15300 |
+
const struct ggml_tensor * dstf = dst->src[1]; // forward tensor of dst
|
| 15301 |
+
|
| 15302 |
+
assert(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
|
| 15303 |
+
|
| 15304 |
+
if (params->ith != 0) {
|
| 15305 |
+
return;
|
| 15306 |
+
}
|
| 15307 |
+
|
| 15308 |
+
const int32_t * opts = (const int32_t *)dst->op_params;
|
| 15309 |
+
enum ggml_op_pool op = opts[0];
|
| 15310 |
+
const int k0 = opts[1];
|
| 15311 |
+
const int k1 = opts[2];
|
| 15312 |
+
const int s0 = opts[3];
|
| 15313 |
+
const int s1 = opts[4];
|
| 15314 |
+
const int p0 = opts[5];
|
| 15315 |
+
const int p1 = opts[6];
|
| 15316 |
+
|
| 15317 |
+
char * cdata = (char *) dst->data;
|
| 15318 |
+
const char * cdataf = (const char *) dstf->data;
|
| 15319 |
+
const char * const data_end = cdata + ggml_nbytes(dst);
|
| 15320 |
+
|
| 15321 |
+
GGML_ASSERT(params->ith == 0);
|
| 15322 |
+
memset(cdata, 0, ggml_nbytes(dst));
|
| 15323 |
+
|
| 15324 |
+
const int64_t px = src->ne[0];
|
| 15325 |
+
const int64_t py = src->ne[1];
|
| 15326 |
+
const int64_t pa = px * py;
|
| 15327 |
+
|
| 15328 |
+
const float * splane = (const float *) src->data;
|
| 15329 |
+
|
| 15330 |
+
const int ka = k0 * k1;
|
| 15331 |
+
const int offset0 = -p0;
|
| 15332 |
+
const int offset1 = -p1;
|
| 15333 |
+
|
| 15334 |
+
while (cdata < data_end) {
|
| 15335 |
+
for (int oy = 0; oy < py; ++oy) {
|
| 15336 |
+
const float * const srow = splane + oy * px;
|
| 15337 |
+
for (int ox = 0; ox < px; ++ox) {
|
| 15338 |
+
const float grad0 = srow[ox];
|
| 15339 |
+
|
| 15340 |
+
const int ix = offset0 + ox * s0;
|
| 15341 |
+
const int iy = offset1 + oy * s1;
|
| 15342 |
+
|
| 15343 |
+
if (op == GGML_OP_POOL_MAX) {
|
| 15344 |
+
float maxval = -FLT_MAX;
|
| 15345 |
+
int kxmax = -1;
|
| 15346 |
+
int kymax = -1;
|
| 15347 |
+
|
| 15348 |
+
for (int ky = 0; ky < k1; ++ky) {
|
| 15349 |
+
if (iy + ky < 0 || iy + ky >= dst->ne[1]) {
|
| 15350 |
+
continue;
|
| 15351 |
+
}
|
| 15352 |
+
const void * drowf = (const void *)(cdataf + dst->nb[1] * (iy + ky));
|
| 15353 |
+
for (int kx = 0; kx < k0; ++kx) {
|
| 15354 |
+
int j = ix + kx;
|
| 15355 |
+
if (j < 0 || j >= dst->ne[0]) {
|
| 15356 |
+
continue;
|
| 15357 |
+
}
|
| 15358 |
+
|
| 15359 |
+
const float val = dst->type == GGML_TYPE_F32 ?
|
| 15360 |
+
((const float *) drowf)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t *) drowf)[j]);
|
| 15361 |
+
if (val <= maxval) {
|
| 15362 |
+
continue;
|
| 15363 |
+
}
|
| 15364 |
+
|
| 15365 |
+
maxval = val;
|
| 15366 |
+
kxmax = kx;
|
| 15367 |
+
kymax = ky;
|
| 15368 |
+
}
|
| 15369 |
+
}
|
| 15370 |
+
|
| 15371 |
+
if (kxmax == -1 || kymax == -1) {
|
| 15372 |
+
continue;
|
| 15373 |
+
}
|
| 15374 |
+
|
| 15375 |
+
void * drow = (void *)(cdata + dst->nb[1] * (iy + kymax));
|
| 15376 |
+
const int j = ix + kxmax;
|
| 15377 |
+
if (dst->type == GGML_TYPE_F32) {
|
| 15378 |
+
((float *) drow)[j] += grad0;
|
| 15379 |
+
} else {
|
| 15380 |
+
((ggml_fp16_t *) drow)[j] = GGML_FP32_TO_FP16(grad0 + GGML_FP16_TO_FP32(((const ggml_fp16_t *) drow)[j]));
|
| 15381 |
+
}
|
| 15382 |
+
} else if (op == GGML_OP_POOL_AVG) {
|
| 15383 |
+
const float grad = grad0 / ka;
|
| 15384 |
+
|
| 15385 |
+
for (int ky = 0; ky < k1; ++ky) {
|
| 15386 |
+
if (iy + ky < 0 || iy + ky >= dst->ne[1]) {
|
| 15387 |
+
continue;
|
| 15388 |
+
}
|
| 15389 |
+
void * drow = (void *)(cdata + dst->nb[1] * (iy + ky));
|
| 15390 |
+
for (int kx = 0; kx < k0; ++kx) {
|
| 15391 |
+
int j = ix + kx;
|
| 15392 |
+
if (j < 0 || j >= dst->ne[0]) {
|
| 15393 |
+
continue;
|
| 15394 |
+
}
|
| 15395 |
+
|
| 15396 |
+
if (dst->type == GGML_TYPE_F32) {
|
| 15397 |
+
((float *) drow)[j] += grad;
|
| 15398 |
+
} else {
|
| 15399 |
+
((ggml_fp16_t *) drow)[j] += GGML_FP32_TO_FP16(grad);
|
| 15400 |
+
}
|
| 15401 |
+
}
|
| 15402 |
+
}
|
| 15403 |
+
} else {
|
| 15404 |
+
GGML_ASSERT(false);
|
| 15405 |
+
}
|
| 15406 |
+
}
|
| 15407 |
+
}
|
| 15408 |
+
|
| 15409 |
+
cdata += dst->nb[2];
|
| 15410 |
+
cdataf += dst->nb[2];
|
| 15411 |
+
splane += pa;
|
| 15412 |
+
}
|
| 15413 |
+
}
|
| 15414 |
+
|
| 15415 |
// ggml_compute_forward_upscale
|
| 15416 |
|
| 15417 |
static void ggml_compute_forward_upscale_f32(
|
|
|
|
| 17381 |
{
|
| 17382 |
ggml_compute_forward_im2col(params, tensor);
|
| 17383 |
} break;
|
| 17384 |
+
case GGML_OP_IM2COL_BACK:
|
| 17385 |
+
{
|
| 17386 |
+
ggml_compute_forward_im2col_back_f32(params, tensor);
|
| 17387 |
+
} break;
|
| 17388 |
case GGML_OP_CONV_TRANSPOSE_2D:
|
| 17389 |
{
|
| 17390 |
ggml_compute_forward_conv_transpose_2d(params, tensor);
|
|
|
|
| 17397 |
{
|
| 17398 |
ggml_compute_forward_pool_2d(params, tensor);
|
| 17399 |
} break;
|
| 17400 |
+
case GGML_OP_POOL_2D_BACK:
|
| 17401 |
+
{
|
| 17402 |
+
ggml_compute_forward_pool_2d_back(params, tensor);
|
| 17403 |
+
} break;
|
| 17404 |
case GGML_OP_UPSCALE:
|
| 17405 |
{
|
| 17406 |
ggml_compute_forward_upscale(params, tensor);
|
|
|
|
| 17769 |
src0->grad = ggml_add_or_set(ctx, src0->grad, tensor->grad, zero_table);
|
| 17770 |
}
|
| 17771 |
if (src1->grad) {
|
| 17772 |
+
if (ggml_are_same_shape(src0, src1)) {
|
| 17773 |
+
src1->grad = ggml_add_or_set(ctx, src1->grad, tensor->grad, zero_table);
|
| 17774 |
+
} else {
|
| 17775 |
+
src1->grad = ggml_add_or_set(ctx, src1->grad, ggml_repeat_back(ctx, tensor->grad, src1), zero_table);
|
| 17776 |
+
}
|
| 17777 |
}
|
| 17778 |
} break;
|
| 17779 |
case GGML_OP_ADD1:
|
|
|
|
| 18370 |
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 18371 |
}
|
| 18372 |
case GGML_OP_IM2COL:
|
| 18373 |
+
{
|
| 18374 |
+
if (src1->grad) {
|
| 18375 |
+
const int32_t s0 = ggml_get_op_params_i32(tensor, 0);
|
| 18376 |
+
const int32_t s1 = ggml_get_op_params_i32(tensor, 1);
|
| 18377 |
+
const int32_t p0 = ggml_get_op_params_i32(tensor, 2);
|
| 18378 |
+
const int32_t p1 = ggml_get_op_params_i32(tensor, 3);
|
| 18379 |
+
const int32_t d0 = ggml_get_op_params_i32(tensor, 4);
|
| 18380 |
+
const int32_t d1 = ggml_get_op_params_i32(tensor, 5);
|
| 18381 |
+
const bool is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
|
| 18382 |
+
|
| 18383 |
+
src1->grad = ggml_add_or_set(ctx,
|
| 18384 |
+
src1->grad,
|
| 18385 |
+
ggml_im2col_back(ctx, src0, tensor->grad, src1->ne, s0, s1, p0, p1, d0, d1, is_2D),
|
| 18386 |
+
zero_table);
|
| 18387 |
+
}
|
| 18388 |
+
} break;
|
| 18389 |
+
case GGML_OP_IM2COL_BACK:
|
| 18390 |
{
|
| 18391 |
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 18392 |
}
|
|
|
|
| 18399 |
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 18400 |
}
|
| 18401 |
case GGML_OP_POOL_2D:
|
| 18402 |
+
{
|
| 18403 |
+
if (src0->grad) {
|
| 18404 |
+
const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
|
| 18405 |
+
const int32_t k0 = ggml_get_op_params_i32(tensor, 1);
|
| 18406 |
+
const int32_t k1 = ggml_get_op_params_i32(tensor, 2);
|
| 18407 |
+
const int32_t s0 = ggml_get_op_params_i32(tensor, 3);
|
| 18408 |
+
const int32_t s1 = ggml_get_op_params_i32(tensor, 4);
|
| 18409 |
+
const int32_t p0 = ggml_get_op_params_i32(tensor, 5);
|
| 18410 |
+
const int32_t p1 = ggml_get_op_params_i32(tensor, 6);
|
| 18411 |
+
|
| 18412 |
+
src0->grad = ggml_add_or_set(ctx,
|
| 18413 |
+
src0->grad,
|
| 18414 |
+
ggml_pool_2d_back(ctx, tensor->grad, src0, op, k0, k1, s0, s1, p0, p1),
|
| 18415 |
+
zero_table);
|
| 18416 |
+
}
|
| 18417 |
+
} break;
|
| 18418 |
+
case GGML_OP_POOL_2D_BACK:
|
| 18419 |
{
|
| 18420 |
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 18421 |
}
|
|
|
|
| 18705 |
|
| 18706 |
void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep) {
|
| 18707 |
GGML_ASSERT(gf->n_nodes > 0);
|
| 18708 |
+
GGML_ASSERT(gf->grads);
|
| 18709 |
|
| 18710 |
// if we are keeping the gradient graph, we have to detach the gradient nodes from the original graph
|
| 18711 |
if (keep) {
|
|
|
|
| 19133 |
n_tasks = MIN(n_threads, ggml_nrows(node->src[0]));
|
| 19134 |
} break;
|
| 19135 |
case GGML_OP_IM2COL:
|
| 19136 |
+
case GGML_OP_IM2COL_BACK:
|
| 19137 |
case GGML_OP_CONV_TRANSPOSE_1D:
|
| 19138 |
case GGML_OP_CONV_TRANSPOSE_2D:
|
| 19139 |
{
|
|
|
|
| 19141 |
} break;
|
| 19142 |
case GGML_OP_POOL_1D:
|
| 19143 |
case GGML_OP_POOL_2D:
|
| 19144 |
+
case GGML_OP_POOL_2D_BACK:
|
| 19145 |
{
|
| 19146 |
n_tasks = 1;
|
| 19147 |
} break;
|
|
|
|
| 19655 |
|
| 19656 |
const uint32_t type = tensor->type;
|
| 19657 |
const uint32_t op = tensor->op;
|
| 19658 |
+
const int32_t flags = tensor->flags;
|
| 19659 |
|
| 19660 |
fwrite(&type, sizeof(uint32_t), 1, fout);
|
| 19661 |
fwrite(&op, sizeof(uint32_t), 1, fout);
|
| 19662 |
+
fwrite(&flags, sizeof(int32_t), 1, fout);
|
| 19663 |
|
| 19664 |
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
| 19665 |
const uint64_t ne = tensor->ne[j];
|
|
|
|
| 19689 |
|
| 19690 |
const uint32_t type = tensor->type;
|
| 19691 |
const uint32_t op = tensor->op;
|
| 19692 |
+
const int32_t flags = tensor->flags;
|
| 19693 |
|
| 19694 |
fwrite(&type, sizeof(uint32_t), 1, fout);
|
| 19695 |
fwrite(&op, sizeof(uint32_t), 1, fout);
|
| 19696 |
+
fwrite(&flags, sizeof(int32_t), 1, fout);
|
| 19697 |
|
| 19698 |
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
| 19699 |
const uint64_t ne = tensor->ne[j];
|
|
|
|
| 19752 |
}
|
| 19753 |
}
|
| 19754 |
}
|
| 19755 |
+
|
| 19756 |
+
// dump the data
|
| 19757 |
+
// TODO: pad this to 32 byte boundary
|
| 19758 |
+
if ((flags & GGML_TENSOR_FLAG_PARAM)) {
|
| 19759 |
+
const size_t size = ggml_nbytes(tensor);
|
| 19760 |
+
|
| 19761 |
+
fwrite(tensor->data, sizeof(char), size, fout);
|
| 19762 |
+
}
|
| 19763 |
}
|
| 19764 |
}
|
| 19765 |
|
|
|
|
| 19873 |
{
|
| 19874 |
uint32_t type;
|
| 19875 |
uint32_t op;
|
| 19876 |
+
int32_t flags;
|
| 19877 |
|
| 19878 |
for (uint32_t i = 0; i < n_leafs; ++i) {
|
| 19879 |
type = *(const uint32_t *) ptr; ptr += sizeof(type);
|
| 19880 |
op = *(const uint32_t *) ptr; ptr += sizeof(op);
|
| 19881 |
+
flags = *(const int32_t *) ptr; ptr += sizeof(flags);
|
| 19882 |
|
| 19883 |
int64_t ne[GGML_MAX_DIMS];
|
| 19884 |
size_t nb[GGML_MAX_DIMS];
|
|
|
|
| 19896 |
|
| 19897 |
struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, GGML_MAX_DIMS, ne);
|
| 19898 |
|
| 19899 |
+
tensor->op = (enum ggml_op) op;
|
| 19900 |
+
tensor->flags = flags;
|
| 19901 |
|
| 19902 |
memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
|
| 19903 |
memcpy(tensor->op_params, ptr, GGML_MAX_OP_PARAMS); ptr += GGML_MAX_OP_PARAMS;
|
| 19904 |
|
|
|
|
|
|
|
| 19905 |
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
| 19906 |
tensor->nb[j] = nb[j];
|
| 19907 |
}
|
| 19908 |
|
| 19909 |
+
tensor->data = (void *) ptr; ptr += ggml_nbytes(tensor);
|
| 19910 |
|
| 19911 |
+
result->leafs[i] = tensor;
|
| 19912 |
|
| 19913 |
fprintf(stderr, "%s: loaded leaf %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
| 19914 |
}
|
|
|
|
| 19920 |
{
|
| 19921 |
uint32_t type;
|
| 19922 |
uint32_t op;
|
| 19923 |
+
int32_t flags;
|
| 19924 |
|
| 19925 |
for (uint32_t i = 0; i < n_nodes; ++i) {
|
| 19926 |
type = *(const uint32_t *) ptr; ptr += sizeof(type);
|
| 19927 |
op = *(const uint32_t *) ptr; ptr += sizeof(op);
|
| 19928 |
+
flags = *(const int32_t *) ptr; ptr += sizeof(flags);
|
| 19929 |
|
| 19930 |
enum ggml_op eop = (enum ggml_op) op;
|
| 19931 |
|
|
|
|
| 20015 |
|
| 20016 |
result->nodes[i] = tensor;
|
| 20017 |
|
| 20018 |
+
// TODO tensor data is be duplicated due to ggml_new_tensor call above
|
| 20019 |
+
if (flags & GGML_TENSOR_FLAG_PARAM) {
|
| 20020 |
+
tensor->data = (void *) ptr; ptr += ggml_nbytes(tensor);
|
| 20021 |
+
}
|
| 20022 |
+
|
| 20023 |
fprintf(stderr, "%s: loaded node %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
| 20024 |
}
|
| 20025 |
}
|
|
|
|
| 21054 |
struct ggml_context * ctx,
|
| 21055 |
struct ggml_opt_params params,
|
| 21056 |
struct ggml_tensor * f) {
|
| 21057 |
+
GGML_ASSERT(f->grad && "ggml_set_param called for at least one parent tensor.");
|
| 21058 |
+
|
| 21059 |
bool free_ctx = false;
|
| 21060 |
if (ctx == NULL) {
|
| 21061 |
struct ggml_init_params params_ctx = {
|
|
|
|
| 21110 |
ggml_opt_callback callback,
|
| 21111 |
void * callback_data) {
|
| 21112 |
|
| 21113 |
+
GGML_ASSERT(f->grad && "ggml_set_param must be called for at least one ancestor");
|
| 21114 |
+
|
| 21115 |
// build forward + backward compute graphs
|
| 21116 |
enum ggml_opt_result result = GGML_OPT_RESULT_OK;
|
| 21117 |
|
|
|
|
| 22199 |
void gguf_add_tensor(
|
| 22200 |
struct gguf_context * ctx,
|
| 22201 |
const struct ggml_tensor * tensor) {
|
| 22202 |
+
GGML_ASSERT(tensor);
|
| 22203 |
if (gguf_find_tensor(ctx, tensor->name) != -1) {
|
| 22204 |
GGML_ABORT("duplicated tensor name");
|
| 22205 |
}
|