JohannesGaessler commited on
Commit
0828065
·
1 Parent(s): 0653499

examples: add MNIST training + missing ops

Browse files
Files changed (2) hide show
  1. ggml/include/ggml.h +67 -38
  2. ggml/src/ggml.c +376 -18
ggml/include/ggml.h CHANGED
@@ -220,7 +220,7 @@
220
  #include <stdio.h>
221
 
222
  #define GGML_FILE_MAGIC 0x67676d6c // "ggml"
223
- #define GGML_FILE_VERSION 1
224
 
225
  #define GGML_QNT_VERSION 2 // bump this on quantization format changes
226
  #define GGML_QNT_VERSION_FACTOR 1000 // do not change this
@@ -490,9 +490,11 @@ extern "C" {
490
  GGML_OP_CLAMP,
491
  GGML_OP_CONV_TRANSPOSE_1D,
492
  GGML_OP_IM2COL,
 
493
  GGML_OP_CONV_TRANSPOSE_2D,
494
  GGML_OP_POOL_1D,
495
  GGML_OP_POOL_2D,
 
496
  GGML_OP_UPSCALE, // nearest interpolate
497
  GGML_OP_PAD,
498
  GGML_OP_ARANGE,
@@ -1582,34 +1584,49 @@ extern "C" {
1582
  float min,
1583
  float max);
1584
 
 
 
1585
  GGML_API struct ggml_tensor * ggml_im2col(
1586
  struct ggml_context * ctx,
1587
- struct ggml_tensor * a,
1588
- struct ggml_tensor * b,
1589
- int s0,
1590
- int s1,
1591
- int p0,
1592
- int p1,
1593
- int d0,
1594
- int d1,
1595
- bool is_2D,
1596
- enum ggml_type dst_type);
 
 
 
 
 
 
 
 
 
 
 
 
 
1597
 
1598
  GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
1599
  struct ggml_context * ctx,
1600
- struct ggml_tensor * a,
1601
- struct ggml_tensor * b,
1602
- int s0,
1603
- int s1,
1604
- int p0,
1605
- int p1,
1606
- int d0,
1607
- int d1);
1608
 
1609
  GGML_API struct ggml_tensor * ggml_conv_1d(
1610
  struct ggml_context * ctx,
1611
- struct ggml_tensor * a,
1612
- struct ggml_tensor * b,
1613
  int s0, // stride
1614
  int p0, // padding
1615
  int d0); // dilation
@@ -1618,29 +1635,29 @@ extern "C" {
1618
  // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
1619
  GGML_API struct ggml_tensor* ggml_conv_1d_ph(
1620
  struct ggml_context * ctx,
1621
- struct ggml_tensor * a,
1622
- struct ggml_tensor * b,
1623
- int s,
1624
- int d);
1625
 
1626
  GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
1627
  struct ggml_context * ctx,
1628
- struct ggml_tensor * a,
1629
- struct ggml_tensor * b,
1630
- int s0,
1631
- int p0,
1632
- int d0);
1633
 
1634
  GGML_API struct ggml_tensor * ggml_conv_2d(
1635
  struct ggml_context * ctx,
1636
- struct ggml_tensor * a,
1637
- struct ggml_tensor * b,
1638
- int s0,
1639
- int s1,
1640
- int p0,
1641
- int p1,
1642
- int d0,
1643
- int d1);
1644
 
1645
 
1646
  // kernel size is a->ne[0] x a->ne[1]
@@ -1702,6 +1719,18 @@ extern "C" {
1702
  float p0,
1703
  float p1);
1704
 
 
 
 
 
 
 
 
 
 
 
 
 
1705
  // nearest interpolate
1706
  // multiplies ne0 and ne1 by scale factor
1707
  // used in stable-diffusion
 
220
  #include <stdio.h>
221
 
222
  #define GGML_FILE_MAGIC 0x67676d6c // "ggml"
223
+ #define GGML_FILE_VERSION 2
224
 
225
  #define GGML_QNT_VERSION 2 // bump this on quantization format changes
226
  #define GGML_QNT_VERSION_FACTOR 1000 // do not change this
 
490
  GGML_OP_CLAMP,
491
  GGML_OP_CONV_TRANSPOSE_1D,
492
  GGML_OP_IM2COL,
493
+ GGML_OP_IM2COL_BACK,
494
  GGML_OP_CONV_TRANSPOSE_2D,
495
  GGML_OP_POOL_1D,
496
  GGML_OP_POOL_2D,
497
+ GGML_OP_POOL_2D_BACK,
498
  GGML_OP_UPSCALE, // nearest interpolate
499
  GGML_OP_PAD,
500
  GGML_OP_ARANGE,
 
1584
  float min,
1585
  float max);
1586
 
1587
+ // im2col
1588
+ // converts data into a format that effectively results in a convolution when combined with matrix multiplication
1589
  GGML_API struct ggml_tensor * ggml_im2col(
1590
  struct ggml_context * ctx,
1591
+ struct ggml_tensor * a, // convolution kernel
1592
+ struct ggml_tensor * b, // data
1593
+ int s0, // stride dimension 0
1594
+ int s1, // stride dimension 1
1595
+ int p0, // padding dimension 0
1596
+ int p1, // padding dimension 1
1597
+ int d0, // dilation dimension 0
1598
+ int d1, // dilation dimension 1
1599
+ bool is_2D,
1600
+ enum ggml_type dst_type);
1601
+
1602
+ GGML_API struct ggml_tensor * ggml_im2col_back(
1603
+ struct ggml_context * ctx,
1604
+ struct ggml_tensor * a, // convolution kernel
1605
+ struct ggml_tensor * b, // gradient of im2col output
1606
+ int64_t * ne, // shape of im2col input
1607
+ int s0, // stride dimension 0
1608
+ int s1, // stride dimension 1
1609
+ int p0, // padding dimension 0
1610
+ int p1, // padding dimension 1
1611
+ int d0, // dilation dimension 0
1612
+ int d1, // dilation dimension 1
1613
+ bool is_2D);
1614
 
1615
  GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
1616
  struct ggml_context * ctx,
1617
+ struct ggml_tensor * a, // convolution kernel
1618
+ struct ggml_tensor * b, // data
1619
+ int s0, // stride dimension 0
1620
+ int s1, // stride dimension 1
1621
+ int p0, // padding dimension 0
1622
+ int p1, // padding dimension 1
1623
+ int d0, // dilation dimension 0
1624
+ int d1); // dilation dimension 1
1625
 
1626
  GGML_API struct ggml_tensor * ggml_conv_1d(
1627
  struct ggml_context * ctx,
1628
+ struct ggml_tensor * a, // convolution kernel
1629
+ struct ggml_tensor * b, // data
1630
  int s0, // stride
1631
  int p0, // padding
1632
  int d0); // dilation
 
1635
  // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
1636
  GGML_API struct ggml_tensor* ggml_conv_1d_ph(
1637
  struct ggml_context * ctx,
1638
+ struct ggml_tensor * a, // convolution kernel
1639
+ struct ggml_tensor * b, // data
1640
+ int s, // stride
1641
+ int d); // dilation
1642
 
1643
  GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
1644
  struct ggml_context * ctx,
1645
+ struct ggml_tensor * a, // convolution kernel
1646
+ struct ggml_tensor * b, // data
1647
+ int s0, // stride
1648
+ int p0, // padding
1649
+ int d0); // dilation
1650
 
1651
  GGML_API struct ggml_tensor * ggml_conv_2d(
1652
  struct ggml_context * ctx,
1653
+ struct ggml_tensor * a, // convolution kernel
1654
+ struct ggml_tensor * b, // data
1655
+ int s0, // stride dimension 0
1656
+ int s1, // stride dimension 1
1657
+ int p0, // padding dimension 0
1658
+ int p1, // padding dimension 1
1659
+ int d0, // dilation dimension 0
1660
+ int d1); // dilation dimension 1
1661
 
1662
 
1663
  // kernel size is a->ne[0] x a->ne[1]
 
1719
  float p0,
1720
  float p1);
1721
 
1722
+ GGML_API struct ggml_tensor * ggml_pool_2d_back(
1723
+ struct ggml_context * ctx,
1724
+ struct ggml_tensor * a,
1725
+ struct ggml_tensor * af, // "a"/input used in forward pass
1726
+ enum ggml_op_pool op,
1727
+ int k0,
1728
+ int k1,
1729
+ int s0,
1730
+ int s1,
1731
+ float p0,
1732
+ float p1);
1733
+
1734
  // nearest interpolate
1735
  // multiplies ne0 and ne1 by scale factor
1736
  // used in stable-diffusion
ggml/src/ggml.c CHANGED
@@ -2801,9 +2801,11 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
2801
  "CLAMP",
2802
  "CONV_TRANSPOSE_1D",
2803
  "IM2COL",
 
2804
  "CONV_TRANSPOSE_2D",
2805
  "POOL_1D",
2806
  "POOL_2D",
 
2807
  "UPSCALE",
2808
  "PAD",
2809
  "ARANGE",
@@ -2837,7 +2839,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
2837
  "CROSS_ENTROPY_LOSS_BACK",
2838
  };
2839
 
2840
- static_assert(GGML_OP_COUNT == 76, "GGML_OP_COUNT != 76");
2841
 
2842
  static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
2843
  "none",
@@ -2891,9 +2893,11 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
2891
  "clamp(x)",
2892
  "conv_transpose_1d(x)",
2893
  "im2col(x)",
 
2894
  "conv_transpose_2d(x)",
2895
  "pool_1d(x)",
2896
  "pool_2d(x)",
 
2897
  "upscale(x)",
2898
  "pad(x)",
2899
  "arange(start, stop, step)",
@@ -2927,7 +2931,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
2927
  "cross_entropy_loss_back(x,y)",
2928
  };
2929
 
2930
- static_assert(GGML_OP_COUNT == 76, "GGML_OP_COUNT != 76");
2931
 
2932
  static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
2933
 
@@ -3741,6 +3745,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
3741
 
3742
  size_t data_size = ggml_row_size(type, ne[0]);
3743
  for (int i = 1; i < n_dims; i++) {
 
3744
  data_size *= ne[i];
3745
  }
3746
 
@@ -3773,6 +3778,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
3773
  }
3774
 
3775
  struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
 
3776
 
3777
  // TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
3778
 
@@ -4492,8 +4498,6 @@ static struct ggml_tensor * ggml_add_impl(
4492
  bool is_node = false;
4493
 
4494
  if (!inplace && (a->grad || b->grad)) {
4495
- // TODO: support backward pass for broadcasting
4496
- GGML_ASSERT(ggml_are_same_shape(a, b));
4497
  is_node = true;
4498
  }
4499
 
@@ -6801,17 +6805,20 @@ struct ggml_tensor * ggml_im2col(
6801
  GGML_ASSERT(a->ne[2] == b->ne[2]);
6802
  } else {
6803
  GGML_ASSERT(a->ne[1] == b->ne[1]);
 
6804
  }
6805
  bool is_node = false;
6806
 
6807
- if (a->grad || b->grad) {
6808
- GGML_ABORT("fatal error"); // TODO: implement backward
6809
  is_node = true;
6810
  }
6811
 
6812
  const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
6813
  const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
6814
 
 
 
 
6815
  const int64_t ne[4] = {
6816
  is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
6817
  OW,
@@ -6831,6 +6838,37 @@ struct ggml_tensor * ggml_im2col(
6831
  return result;
6832
  }
6833
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6834
  // a: [OC,IC, KH, KW]
6835
  // b: [N, IC, IH, IW]
6836
  // result: [N, OC, OH, OW]
@@ -6844,7 +6882,7 @@ struct ggml_tensor * ggml_conv_2d(
6844
  int p1,
6845
  int d0,
6846
  int d1) {
6847
- struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N, OH, OW, IC * KH * KW]
6848
 
6849
  struct ggml_tensor * result =
6850
  ggml_mul_mat(ctx,
@@ -6970,17 +7008,17 @@ struct ggml_tensor * ggml_pool_2d(
6970
  bool is_node = false;
6971
 
6972
  if (a->grad) {
6973
- GGML_ABORT("fatal error"); // TODO: implement backward
6974
  is_node = true;
6975
  }
6976
 
6977
  struct ggml_tensor * result;
6978
- const int64_t ne[3] = {
6979
  ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
6980
  ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
6981
  a->ne[2],
 
6982
  };
6983
- result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
6984
 
6985
  int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
6986
  ggml_set_op_params(result, params, sizeof(params));
@@ -6991,6 +7029,37 @@ struct ggml_tensor * ggml_pool_2d(
6991
  return result;
6992
  }
6993
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6994
  // ggml_upscale
6995
 
6996
  static struct ggml_tensor * ggml_upscale_impl(
@@ -14714,6 +14783,7 @@ static void ggml_compute_forward_conv_transpose_1d(
14714
  }
14715
  }
14716
 
 
14717
  // src0: kernel [OC, IC, KH, KW]
14718
  // src1: image [N, IC, IH, IW]
14719
  // dst: result [N, OH, OW, IC*KH*KW]
@@ -14724,7 +14794,6 @@ static void ggml_compute_forward_im2col_f32(
14724
  const struct ggml_tensor * src0 = dst->src[0];
14725
  const struct ggml_tensor * src1 = dst->src[1];
14726
 
14727
- GGML_ASSERT(src0->type == GGML_TYPE_F16);
14728
  GGML_ASSERT(src1->type == GGML_TYPE_F32);
14729
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
14730
 
@@ -14755,7 +14824,6 @@ static void ggml_compute_forward_im2col_f32(
14755
  int ofs0 = is_2D ? nb13 : nb12;
14756
  int ofs1 = is_2D ? nb12 : nb11;
14757
 
14758
- GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
14759
  GGML_ASSERT(nb10 == sizeof(float));
14760
 
14761
  // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
@@ -14791,6 +14859,7 @@ static void ggml_compute_forward_im2col_f32(
14791
  }
14792
 
14793
 
 
14794
  // src0: kernel [OC, IC, KH, KW]
14795
  // src1: image [N, IC, IH, IW]
14796
  // dst: result [N, OH, OW, IC*KH*KW]
@@ -14886,6 +14955,99 @@ static void ggml_compute_forward_im2col(
14886
  }
14887
  }
14888
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14889
 
14890
  // ggml_compute_forward_conv_transpose_2d
14891
 
@@ -15128,6 +15290,128 @@ static void ggml_compute_forward_pool_2d(
15128
  }
15129
  }
15130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15131
  // ggml_compute_forward_upscale
15132
 
15133
  static void ggml_compute_forward_upscale_f32(
@@ -17097,6 +17381,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
17097
  {
17098
  ggml_compute_forward_im2col(params, tensor);
17099
  } break;
 
 
 
 
17100
  case GGML_OP_CONV_TRANSPOSE_2D:
17101
  {
17102
  ggml_compute_forward_conv_transpose_2d(params, tensor);
@@ -17109,6 +17397,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
17109
  {
17110
  ggml_compute_forward_pool_2d(params, tensor);
17111
  } break;
 
 
 
 
17112
  case GGML_OP_UPSCALE:
17113
  {
17114
  ggml_compute_forward_upscale(params, tensor);
@@ -17477,7 +17769,11 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
17477
  src0->grad = ggml_add_or_set(ctx, src0->grad, tensor->grad, zero_table);
17478
  }
17479
  if (src1->grad) {
17480
- src1->grad = ggml_add_or_set(ctx, src1->grad, tensor->grad, zero_table);
 
 
 
 
17481
  }
17482
  } break;
17483
  case GGML_OP_ADD1:
@@ -18074,6 +18370,23 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
18074
  GGML_ABORT("fatal error"); // TODO: not implemented
18075
  }
18076
  case GGML_OP_IM2COL:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18077
  {
18078
  GGML_ABORT("fatal error"); // TODO: not implemented
18079
  }
@@ -18086,6 +18399,23 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
18086
  GGML_ABORT("fatal error"); // TODO: not implemented
18087
  }
18088
  case GGML_OP_POOL_2D:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18089
  {
18090
  GGML_ABORT("fatal error"); // TODO: not implemented
18091
  }
@@ -18375,6 +18705,7 @@ void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor *
18375
 
18376
  void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep) {
18377
  GGML_ASSERT(gf->n_nodes > 0);
 
18378
 
18379
  // if we are keeping the gradient graph, we have to detach the gradient nodes from the original graph
18380
  if (keep) {
@@ -18802,6 +19133,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
18802
  n_tasks = MIN(n_threads, ggml_nrows(node->src[0]));
18803
  } break;
18804
  case GGML_OP_IM2COL:
 
18805
  case GGML_OP_CONV_TRANSPOSE_1D:
18806
  case GGML_OP_CONV_TRANSPOSE_2D:
18807
  {
@@ -18809,6 +19141,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
18809
  } break;
18810
  case GGML_OP_POOL_1D:
18811
  case GGML_OP_POOL_2D:
 
18812
  {
18813
  n_tasks = 1;
18814
  } break;
@@ -19322,9 +19655,11 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
19322
 
19323
  const uint32_t type = tensor->type;
19324
  const uint32_t op = tensor->op;
 
19325
 
19326
  fwrite(&type, sizeof(uint32_t), 1, fout);
19327
  fwrite(&op, sizeof(uint32_t), 1, fout);
 
19328
 
19329
  for (int j = 0; j < GGML_MAX_DIMS; ++j) {
19330
  const uint64_t ne = tensor->ne[j];
@@ -19354,9 +19689,11 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
19354
 
19355
  const uint32_t type = tensor->type;
19356
  const uint32_t op = tensor->op;
 
19357
 
19358
  fwrite(&type, sizeof(uint32_t), 1, fout);
19359
  fwrite(&op, sizeof(uint32_t), 1, fout);
 
19360
 
19361
  for (int j = 0; j < GGML_MAX_DIMS; ++j) {
19362
  const uint64_t ne = tensor->ne[j];
@@ -19415,6 +19752,14 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
19415
  }
19416
  }
19417
  }
 
 
 
 
 
 
 
 
19418
  }
19419
  }
19420
 
@@ -19528,10 +19873,12 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
19528
  {
19529
  uint32_t type;
19530
  uint32_t op;
 
19531
 
19532
  for (uint32_t i = 0; i < n_leafs; ++i) {
19533
  type = *(const uint32_t *) ptr; ptr += sizeof(type);
19534
  op = *(const uint32_t *) ptr; ptr += sizeof(op);
 
19535
 
19536
  int64_t ne[GGML_MAX_DIMS];
19537
  size_t nb[GGML_MAX_DIMS];
@@ -19549,20 +19896,19 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
19549
 
19550
  struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, GGML_MAX_DIMS, ne);
19551
 
19552
- tensor->op = (enum ggml_op) op;
 
19553
 
19554
  memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
19555
  memcpy(tensor->op_params, ptr, GGML_MAX_OP_PARAMS); ptr += GGML_MAX_OP_PARAMS;
19556
 
19557
- tensor->data = (void *) ptr;
19558
-
19559
  for (int j = 0; j < GGML_MAX_DIMS; ++j) {
19560
  tensor->nb[j] = nb[j];
19561
  }
19562
 
19563
- result->leafs[i] = tensor;
19564
 
19565
- ptr += ggml_nbytes(tensor);
19566
 
19567
  fprintf(stderr, "%s: loaded leaf %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
19568
  }
@@ -19574,10 +19920,12 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
19574
  {
19575
  uint32_t type;
19576
  uint32_t op;
 
19577
 
19578
  for (uint32_t i = 0; i < n_nodes; ++i) {
19579
  type = *(const uint32_t *) ptr; ptr += sizeof(type);
19580
  op = *(const uint32_t *) ptr; ptr += sizeof(op);
 
19581
 
19582
  enum ggml_op eop = (enum ggml_op) op;
19583
 
@@ -19667,6 +20015,11 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
19667
 
19668
  result->nodes[i] = tensor;
19669
 
 
 
 
 
 
19670
  fprintf(stderr, "%s: loaded node %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
19671
  }
19672
  }
@@ -20701,6 +21054,8 @@ enum ggml_opt_result ggml_opt(
20701
  struct ggml_context * ctx,
20702
  struct ggml_opt_params params,
20703
  struct ggml_tensor * f) {
 
 
20704
  bool free_ctx = false;
20705
  if (ctx == NULL) {
20706
  struct ggml_init_params params_ctx = {
@@ -20755,6 +21110,8 @@ enum ggml_opt_result ggml_opt_resume_g(
20755
  ggml_opt_callback callback,
20756
  void * callback_data) {
20757
 
 
 
20758
  // build forward + backward compute graphs
20759
  enum ggml_opt_result result = GGML_OPT_RESULT_OK;
20760
 
@@ -21842,6 +22199,7 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
21842
  void gguf_add_tensor(
21843
  struct gguf_context * ctx,
21844
  const struct ggml_tensor * tensor) {
 
21845
  if (gguf_find_tensor(ctx, tensor->name) != -1) {
21846
  GGML_ABORT("duplicated tensor name");
21847
  }
 
2801
  "CLAMP",
2802
  "CONV_TRANSPOSE_1D",
2803
  "IM2COL",
2804
+ "IM2COL_BACK",
2805
  "CONV_TRANSPOSE_2D",
2806
  "POOL_1D",
2807
  "POOL_2D",
2808
+ "POOL_2D_BACK",
2809
  "UPSCALE",
2810
  "PAD",
2811
  "ARANGE",
 
2839
  "CROSS_ENTROPY_LOSS_BACK",
2840
  };
2841
 
2842
+ static_assert(GGML_OP_COUNT == 78, "GGML_OP_COUNT != 78");
2843
 
2844
  static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
2845
  "none",
 
2893
  "clamp(x)",
2894
  "conv_transpose_1d(x)",
2895
  "im2col(x)",
2896
+ "im2col_back(x)",
2897
  "conv_transpose_2d(x)",
2898
  "pool_1d(x)",
2899
  "pool_2d(x)",
2900
+ "pool_2d_back(x)",
2901
  "upscale(x)",
2902
  "pad(x)",
2903
  "arange(start, stop, step)",
 
2931
  "cross_entropy_loss_back(x,y)",
2932
  };
2933
 
2934
+ static_assert(GGML_OP_COUNT == 78, "GGML_OP_COUNT != 78");
2935
 
2936
  static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
2937
 
 
3745
 
3746
  size_t data_size = ggml_row_size(type, ne[0]);
3747
  for (int i = 1; i < n_dims; i++) {
3748
+ assert(ne[i] > 0);
3749
  data_size *= ne[i];
3750
  }
3751
 
 
3778
  }
3779
 
3780
  struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
3781
+ GGML_ASSERT(obj_new);
3782
 
3783
  // TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
3784
 
 
4498
  bool is_node = false;
4499
 
4500
  if (!inplace && (a->grad || b->grad)) {
 
 
4501
  is_node = true;
4502
  }
4503
 
 
6805
  GGML_ASSERT(a->ne[2] == b->ne[2]);
6806
  } else {
6807
  GGML_ASSERT(a->ne[1] == b->ne[1]);
6808
+ GGML_ASSERT(b->ne[3] == 1);
6809
  }
6810
  bool is_node = false;
6811
 
6812
+ if (/*a->grad ||*/ b->grad) { // a is only used for its shape, not its data
 
6813
  is_node = true;
6814
  }
6815
 
6816
  const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
6817
  const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
6818
 
6819
+ GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
6820
+ GGML_ASSERT((OW > 0) && "b too small compared to a");
6821
+
6822
  const int64_t ne[4] = {
6823
  is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
6824
  OW,
 
6838
  return result;
6839
  }
6840
 
6841
+ struct ggml_tensor * ggml_im2col_back(
6842
+ struct ggml_context * ctx,
6843
+ struct ggml_tensor * a,
6844
+ struct ggml_tensor * b,
6845
+ int64_t * ne,
6846
+ int s0,
6847
+ int s1,
6848
+ int p0,
6849
+ int p1,
6850
+ int d0,
6851
+ int d1,
6852
+ bool is_2D) {
6853
+
6854
+ bool is_node = false;
6855
+
6856
+ if (/*a->grad ||*/ b->grad) { // a is only used for its shape, not its data
6857
+ is_node = true;
6858
+ }
6859
+
6860
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
6861
+ int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
6862
+ ggml_set_op_params(result, params, sizeof(params));
6863
+
6864
+ result->op = GGML_OP_IM2COL_BACK;
6865
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6866
+ result->src[0] = a;
6867
+ result->src[1] = b;
6868
+
6869
+ return result;
6870
+ }
6871
+
6872
  // a: [OC,IC, KH, KW]
6873
  // b: [N, IC, IH, IW]
6874
  // result: [N, OC, OH, OW]
 
6882
  int p1,
6883
  int d0,
6884
  int d1) {
6885
+ struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
6886
 
6887
  struct ggml_tensor * result =
6888
  ggml_mul_mat(ctx,
 
7008
  bool is_node = false;
7009
 
7010
  if (a->grad) {
 
7011
  is_node = true;
7012
  }
7013
 
7014
  struct ggml_tensor * result;
7015
+ const int64_t ne[4] = {
7016
  ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
7017
  ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
7018
  a->ne[2],
7019
+ a->ne[3],
7020
  };
7021
+ result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
7022
 
7023
  int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
7024
  ggml_set_op_params(result, params, sizeof(params));
 
7029
  return result;
7030
  }
7031
 
7032
+ struct ggml_tensor * ggml_pool_2d_back(
7033
+ struct ggml_context * ctx,
7034
+ struct ggml_tensor * a,
7035
+ struct ggml_tensor * af,
7036
+ enum ggml_op_pool op,
7037
+ int k0,
7038
+ int k1,
7039
+ int s0,
7040
+ int s1,
7041
+ float p0,
7042
+ float p1) {
7043
+
7044
+ bool is_node = false;
7045
+
7046
+ if (a->grad) {
7047
+ is_node = true;
7048
+ }
7049
+
7050
+ struct ggml_tensor * result;
7051
+ result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
7052
+
7053
+ int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
7054
+ ggml_set_op_params(result, params, sizeof(params));
7055
+
7056
+ result->op = GGML_OP_POOL_2D_BACK;
7057
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7058
+ result->src[0] = a;
7059
+ result->src[1] = af;
7060
+ return result;
7061
+ }
7062
+
7063
  // ggml_upscale
7064
 
7065
  static struct ggml_tensor * ggml_upscale_impl(
 
14783
  }
14784
  }
14785
 
14786
+ // ggml_compute_forward_im2col_f32
14787
  // src0: kernel [OC, IC, KH, KW]
14788
  // src1: image [N, IC, IH, IW]
14789
  // dst: result [N, OH, OW, IC*KH*KW]
 
14794
  const struct ggml_tensor * src0 = dst->src[0];
14795
  const struct ggml_tensor * src1 = dst->src[1];
14796
 
 
14797
  GGML_ASSERT(src1->type == GGML_TYPE_F32);
14798
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
14799
 
 
14824
  int ofs0 = is_2D ? nb13 : nb12;
14825
  int ofs1 = is_2D ? nb12 : nb11;
14826
 
 
14827
  GGML_ASSERT(nb10 == sizeof(float));
14828
 
14829
  // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
 
14859
  }
14860
 
14861
 
14862
+ // ggml_compute_forward_im2col_f16
14863
  // src0: kernel [OC, IC, KH, KW]
14864
  // src1: image [N, IC, IH, IW]
14865
  // dst: result [N, OH, OW, IC*KH*KW]
 
14955
  }
14956
  }
14957
 
14958
+ // ggml_compute_forward_im2col_back_f32
14959
+
14960
+ static void ggml_compute_forward_im2col_back_f32(
14961
+ const struct ggml_compute_params * params,
14962
+ struct ggml_tensor * dst) {
14963
+
14964
+ const struct ggml_tensor * src0 = dst->src[0];
14965
+ const struct ggml_tensor * src1 = dst->src[1];
14966
+
14967
+ GGML_ASSERT(src1->type == GGML_TYPE_F32);
14968
+ GGML_ASSERT( dst->type == GGML_TYPE_F32);
14969
+
14970
+ GGML_TENSOR_BINARY_OP_LOCALS;
14971
+
14972
+ const int32_t s0 = ((const int32_t *)(dst->op_params))[0];
14973
+ const int32_t s1 = ((const int32_t *)(dst->op_params))[1];
14974
+ const int32_t p0 = ((const int32_t *)(dst->op_params))[2];
14975
+ const int32_t p1 = ((const int32_t *)(dst->op_params))[3];
14976
+ const int32_t d0 = ((const int32_t *)(dst->op_params))[4];
14977
+ const int32_t d1 = ((const int32_t *)(dst->op_params))[5];
14978
+ const bool is_2D = ((const int32_t *)(dst->op_params))[6] == 1;
14979
+
14980
+ const int ith = params->ith;
14981
+ const int nth = params->nth;
14982
+
14983
+ const int64_t N = is_2D ? ne3 : ne2;
14984
+ const int64_t IC = is_2D ? ne2 : ne1;
14985
+ const int64_t IH = is_2D ? ne1 : 1;
14986
+ const int64_t IW = ne0;
14987
+
14988
+ const int64_t KH = is_2D ? ne01 : 1;
14989
+ const int64_t KW = ne00;
14990
+
14991
+ const int64_t OH = is_2D ? ne12 : 1;
14992
+ const int64_t OW = ne11;
14993
+
14994
+ int ofs0 = is_2D ? nb3 : nb2;
14995
+ int ofs1 = is_2D ? nb2 : nb1;
14996
+
14997
+ GGML_ASSERT(nb0 == sizeof(float));
14998
+
14999
+ // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
15000
+ {
15001
+ float * const wdata = (float *) dst->data;
15002
+
15003
+ for (int64_t in = 0; in < N; in++) {
15004
+ for (int64_t iic = ith; iic < IC; iic += nth) {
15005
+ for (int64_t iih = 0; iih < IH; iih++) {
15006
+ for (int64_t iiw = 0; iiw < IW; iiw++) {
15007
+
15008
+ // micro kernel
15009
+ float grad = 0.0f;
15010
+ for (int64_t ikh = 0; ikh < KH; ikh++) {
15011
+ for (int64_t ikw = 0; ikw < KW; ikw++) {
15012
+ // For s0 > 1 some values were skipped over in the forward pass.
15013
+ // These values have tmpw % s0 != 0 and need to be skipped in the backwards pass as well.
15014
+ const int64_t tmpw = (iiw + p0 - ikw*d0);
15015
+ if (tmpw % s0 != 0) {
15016
+ continue;
15017
+ }
15018
+ const int64_t iow = tmpw / s0;
15019
+
15020
+ // Equivalent logic as above except for s1.
15021
+ int64_t ioh;
15022
+ if (is_2D) {
15023
+ const int64_t tmph = iih + p1 - ikh*d1;
15024
+
15025
+ if (tmph % s1 != 0) {
15026
+ continue;
15027
+ }
15028
+
15029
+ ioh = tmph / s1;
15030
+ } else {
15031
+ ioh = 0;
15032
+ }
15033
+
15034
+ if (iow < 0 || iow >= OW || ioh < 0 || ioh >= OH) {
15035
+ continue;
15036
+ }
15037
+
15038
+ const float * const src_data = (const float *) src1->data
15039
+ + (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
15040
+ grad += src_data[iic*(KH*KW) + ikh*KW + ikw];
15041
+ }
15042
+ }
15043
+ float * dst_data = (float *)((char *) wdata + (in*ofs0 + iic*ofs1)); // [IH, IW]
15044
+ dst_data[iih*IW + iiw] = grad;
15045
+ }
15046
+ }
15047
+ }
15048
+ }
15049
+ }
15050
+ }
15051
 
15052
  // ggml_compute_forward_conv_transpose_2d
15053
 
 
15290
  }
15291
  }
15292
 
15293
+ // ggml_compute_forward_pool_2d_back
15294
+
15295
+ static void ggml_compute_forward_pool_2d_back(
15296
+ const struct ggml_compute_params * params,
15297
+ struct ggml_tensor * dst) {
15298
+
15299
+ const struct ggml_tensor * src = dst->src[0];
15300
+ const struct ggml_tensor * dstf = dst->src[1]; // forward tensor of dst
15301
+
15302
+ assert(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
15303
+
15304
+ if (params->ith != 0) {
15305
+ return;
15306
+ }
15307
+
15308
+ const int32_t * opts = (const int32_t *)dst->op_params;
15309
+ enum ggml_op_pool op = opts[0];
15310
+ const int k0 = opts[1];
15311
+ const int k1 = opts[2];
15312
+ const int s0 = opts[3];
15313
+ const int s1 = opts[4];
15314
+ const int p0 = opts[5];
15315
+ const int p1 = opts[6];
15316
+
15317
+ char * cdata = (char *) dst->data;
15318
+ const char * cdataf = (const char *) dstf->data;
15319
+ const char * const data_end = cdata + ggml_nbytes(dst);
15320
+
15321
+ GGML_ASSERT(params->ith == 0);
15322
+ memset(cdata, 0, ggml_nbytes(dst));
15323
+
15324
+ const int64_t px = src->ne[0];
15325
+ const int64_t py = src->ne[1];
15326
+ const int64_t pa = px * py;
15327
+
15328
+ const float * splane = (const float *) src->data;
15329
+
15330
+ const int ka = k0 * k1;
15331
+ const int offset0 = -p0;
15332
+ const int offset1 = -p1;
15333
+
15334
+ while (cdata < data_end) {
15335
+ for (int oy = 0; oy < py; ++oy) {
15336
+ const float * const srow = splane + oy * px;
15337
+ for (int ox = 0; ox < px; ++ox) {
15338
+ const float grad0 = srow[ox];
15339
+
15340
+ const int ix = offset0 + ox * s0;
15341
+ const int iy = offset1 + oy * s1;
15342
+
15343
+ if (op == GGML_OP_POOL_MAX) {
15344
+ float maxval = -FLT_MAX;
15345
+ int kxmax = -1;
15346
+ int kymax = -1;
15347
+
15348
+ for (int ky = 0; ky < k1; ++ky) {
15349
+ if (iy + ky < 0 || iy + ky >= dst->ne[1]) {
15350
+ continue;
15351
+ }
15352
+ const void * drowf = (const void *)(cdataf + dst->nb[1] * (iy + ky));
15353
+ for (int kx = 0; kx < k0; ++kx) {
15354
+ int j = ix + kx;
15355
+ if (j < 0 || j >= dst->ne[0]) {
15356
+ continue;
15357
+ }
15358
+
15359
+ const float val = dst->type == GGML_TYPE_F32 ?
15360
+ ((const float *) drowf)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t *) drowf)[j]);
15361
+ if (val <= maxval) {
15362
+ continue;
15363
+ }
15364
+
15365
+ maxval = val;
15366
+ kxmax = kx;
15367
+ kymax = ky;
15368
+ }
15369
+ }
15370
+
15371
+ if (kxmax == -1 || kymax == -1) {
15372
+ continue;
15373
+ }
15374
+
15375
+ void * drow = (void *)(cdata + dst->nb[1] * (iy + kymax));
15376
+ const int j = ix + kxmax;
15377
+ if (dst->type == GGML_TYPE_F32) {
15378
+ ((float *) drow)[j] += grad0;
15379
+ } else {
15380
+ ((ggml_fp16_t *) drow)[j] = GGML_FP32_TO_FP16(grad0 + GGML_FP16_TO_FP32(((const ggml_fp16_t *) drow)[j]));
15381
+ }
15382
+ } else if (op == GGML_OP_POOL_AVG) {
15383
+ const float grad = grad0 / ka;
15384
+
15385
+ for (int ky = 0; ky < k1; ++ky) {
15386
+ if (iy + ky < 0 || iy + ky >= dst->ne[1]) {
15387
+ continue;
15388
+ }
15389
+ void * drow = (void *)(cdata + dst->nb[1] * (iy + ky));
15390
+ for (int kx = 0; kx < k0; ++kx) {
15391
+ int j = ix + kx;
15392
+ if (j < 0 || j >= dst->ne[0]) {
15393
+ continue;
15394
+ }
15395
+
15396
+ if (dst->type == GGML_TYPE_F32) {
15397
+ ((float *) drow)[j] += grad;
15398
+ } else {
15399
+ ((ggml_fp16_t *) drow)[j] += GGML_FP32_TO_FP16(grad);
15400
+ }
15401
+ }
15402
+ }
15403
+ } else {
15404
+ GGML_ASSERT(false);
15405
+ }
15406
+ }
15407
+ }
15408
+
15409
+ cdata += dst->nb[2];
15410
+ cdataf += dst->nb[2];
15411
+ splane += pa;
15412
+ }
15413
+ }
15414
+
15415
  // ggml_compute_forward_upscale
15416
 
15417
  static void ggml_compute_forward_upscale_f32(
 
17381
  {
17382
  ggml_compute_forward_im2col(params, tensor);
17383
  } break;
17384
+ case GGML_OP_IM2COL_BACK:
17385
+ {
17386
+ ggml_compute_forward_im2col_back_f32(params, tensor);
17387
+ } break;
17388
  case GGML_OP_CONV_TRANSPOSE_2D:
17389
  {
17390
  ggml_compute_forward_conv_transpose_2d(params, tensor);
 
17397
  {
17398
  ggml_compute_forward_pool_2d(params, tensor);
17399
  } break;
17400
+ case GGML_OP_POOL_2D_BACK:
17401
+ {
17402
+ ggml_compute_forward_pool_2d_back(params, tensor);
17403
+ } break;
17404
  case GGML_OP_UPSCALE:
17405
  {
17406
  ggml_compute_forward_upscale(params, tensor);
 
17769
  src0->grad = ggml_add_or_set(ctx, src0->grad, tensor->grad, zero_table);
17770
  }
17771
  if (src1->grad) {
17772
+ if (ggml_are_same_shape(src0, src1)) {
17773
+ src1->grad = ggml_add_or_set(ctx, src1->grad, tensor->grad, zero_table);
17774
+ } else {
17775
+ src1->grad = ggml_add_or_set(ctx, src1->grad, ggml_repeat_back(ctx, tensor->grad, src1), zero_table);
17776
+ }
17777
  }
17778
  } break;
17779
  case GGML_OP_ADD1:
 
18370
  GGML_ABORT("fatal error"); // TODO: not implemented
18371
  }
18372
  case GGML_OP_IM2COL:
18373
+ {
18374
+ if (src1->grad) {
18375
+ const int32_t s0 = ggml_get_op_params_i32(tensor, 0);
18376
+ const int32_t s1 = ggml_get_op_params_i32(tensor, 1);
18377
+ const int32_t p0 = ggml_get_op_params_i32(tensor, 2);
18378
+ const int32_t p1 = ggml_get_op_params_i32(tensor, 3);
18379
+ const int32_t d0 = ggml_get_op_params_i32(tensor, 4);
18380
+ const int32_t d1 = ggml_get_op_params_i32(tensor, 5);
18381
+ const bool is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
18382
+
18383
+ src1->grad = ggml_add_or_set(ctx,
18384
+ src1->grad,
18385
+ ggml_im2col_back(ctx, src0, tensor->grad, src1->ne, s0, s1, p0, p1, d0, d1, is_2D),
18386
+ zero_table);
18387
+ }
18388
+ } break;
18389
+ case GGML_OP_IM2COL_BACK:
18390
  {
18391
  GGML_ABORT("fatal error"); // TODO: not implemented
18392
  }
 
18399
  GGML_ABORT("fatal error"); // TODO: not implemented
18400
  }
18401
  case GGML_OP_POOL_2D:
18402
+ {
18403
+ if (src0->grad) {
18404
+ const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
18405
+ const int32_t k0 = ggml_get_op_params_i32(tensor, 1);
18406
+ const int32_t k1 = ggml_get_op_params_i32(tensor, 2);
18407
+ const int32_t s0 = ggml_get_op_params_i32(tensor, 3);
18408
+ const int32_t s1 = ggml_get_op_params_i32(tensor, 4);
18409
+ const int32_t p0 = ggml_get_op_params_i32(tensor, 5);
18410
+ const int32_t p1 = ggml_get_op_params_i32(tensor, 6);
18411
+
18412
+ src0->grad = ggml_add_or_set(ctx,
18413
+ src0->grad,
18414
+ ggml_pool_2d_back(ctx, tensor->grad, src0, op, k0, k1, s0, s1, p0, p1),
18415
+ zero_table);
18416
+ }
18417
+ } break;
18418
+ case GGML_OP_POOL_2D_BACK:
18419
  {
18420
  GGML_ABORT("fatal error"); // TODO: not implemented
18421
  }
 
18705
 
18706
  void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep) {
18707
  GGML_ASSERT(gf->n_nodes > 0);
18708
+ GGML_ASSERT(gf->grads);
18709
 
18710
  // if we are keeping the gradient graph, we have to detach the gradient nodes from the original graph
18711
  if (keep) {
 
19133
  n_tasks = MIN(n_threads, ggml_nrows(node->src[0]));
19134
  } break;
19135
  case GGML_OP_IM2COL:
19136
+ case GGML_OP_IM2COL_BACK:
19137
  case GGML_OP_CONV_TRANSPOSE_1D:
19138
  case GGML_OP_CONV_TRANSPOSE_2D:
19139
  {
 
19141
  } break;
19142
  case GGML_OP_POOL_1D:
19143
  case GGML_OP_POOL_2D:
19144
+ case GGML_OP_POOL_2D_BACK:
19145
  {
19146
  n_tasks = 1;
19147
  } break;
 
19655
 
19656
  const uint32_t type = tensor->type;
19657
  const uint32_t op = tensor->op;
19658
+ const int32_t flags = tensor->flags;
19659
 
19660
  fwrite(&type, sizeof(uint32_t), 1, fout);
19661
  fwrite(&op, sizeof(uint32_t), 1, fout);
19662
+ fwrite(&flags, sizeof(int32_t), 1, fout);
19663
 
19664
  for (int j = 0; j < GGML_MAX_DIMS; ++j) {
19665
  const uint64_t ne = tensor->ne[j];
 
19689
 
19690
  const uint32_t type = tensor->type;
19691
  const uint32_t op = tensor->op;
19692
+ const int32_t flags = tensor->flags;
19693
 
19694
  fwrite(&type, sizeof(uint32_t), 1, fout);
19695
  fwrite(&op, sizeof(uint32_t), 1, fout);
19696
+ fwrite(&flags, sizeof(int32_t), 1, fout);
19697
 
19698
  for (int j = 0; j < GGML_MAX_DIMS; ++j) {
19699
  const uint64_t ne = tensor->ne[j];
 
19752
  }
19753
  }
19754
  }
19755
+
19756
+ // dump the data
19757
+ // TODO: pad this to 32 byte boundary
19758
+ if ((flags & GGML_TENSOR_FLAG_PARAM)) {
19759
+ const size_t size = ggml_nbytes(tensor);
19760
+
19761
+ fwrite(tensor->data, sizeof(char), size, fout);
19762
+ }
19763
  }
19764
  }
19765
 
 
19873
  {
19874
  uint32_t type;
19875
  uint32_t op;
19876
+ int32_t flags;
19877
 
19878
  for (uint32_t i = 0; i < n_leafs; ++i) {
19879
  type = *(const uint32_t *) ptr; ptr += sizeof(type);
19880
  op = *(const uint32_t *) ptr; ptr += sizeof(op);
19881
+ flags = *(const int32_t *) ptr; ptr += sizeof(flags);
19882
 
19883
  int64_t ne[GGML_MAX_DIMS];
19884
  size_t nb[GGML_MAX_DIMS];
 
19896
 
19897
  struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, GGML_MAX_DIMS, ne);
19898
 
19899
+ tensor->op = (enum ggml_op) op;
19900
+ tensor->flags = flags;
19901
 
19902
  memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
19903
  memcpy(tensor->op_params, ptr, GGML_MAX_OP_PARAMS); ptr += GGML_MAX_OP_PARAMS;
19904
 
 
 
19905
  for (int j = 0; j < GGML_MAX_DIMS; ++j) {
19906
  tensor->nb[j] = nb[j];
19907
  }
19908
 
19909
+ tensor->data = (void *) ptr; ptr += ggml_nbytes(tensor);
19910
 
19911
+ result->leafs[i] = tensor;
19912
 
19913
  fprintf(stderr, "%s: loaded leaf %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
19914
  }
 
19920
  {
19921
  uint32_t type;
19922
  uint32_t op;
19923
+ int32_t flags;
19924
 
19925
  for (uint32_t i = 0; i < n_nodes; ++i) {
19926
  type = *(const uint32_t *) ptr; ptr += sizeof(type);
19927
  op = *(const uint32_t *) ptr; ptr += sizeof(op);
19928
+ flags = *(const int32_t *) ptr; ptr += sizeof(flags);
19929
 
19930
  enum ggml_op eop = (enum ggml_op) op;
19931
 
 
20015
 
20016
  result->nodes[i] = tensor;
20017
 
20018
+ // TODO tensor data is be duplicated due to ggml_new_tensor call above
20019
+ if (flags & GGML_TENSOR_FLAG_PARAM) {
20020
+ tensor->data = (void *) ptr; ptr += ggml_nbytes(tensor);
20021
+ }
20022
+
20023
  fprintf(stderr, "%s: loaded node %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
20024
  }
20025
  }
 
21054
  struct ggml_context * ctx,
21055
  struct ggml_opt_params params,
21056
  struct ggml_tensor * f) {
21057
+ GGML_ASSERT(f->grad && "ggml_set_param called for at least one parent tensor.");
21058
+
21059
  bool free_ctx = false;
21060
  if (ctx == NULL) {
21061
  struct ggml_init_params params_ctx = {
 
21110
  ggml_opt_callback callback,
21111
  void * callback_data) {
21112
 
21113
+ GGML_ASSERT(f->grad && "ggml_set_param must be called for at least one ancestor");
21114
+
21115
  // build forward + backward compute graphs
21116
  enum ggml_opt_result result = GGML_OPT_RESULT_OK;
21117
 
 
22199
  void gguf_add_tensor(
22200
  struct gguf_context * ctx,
22201
  const struct ggml_tensor * tensor) {
22202
+ GGML_ASSERT(tensor);
22203
  if (gguf_find_tensor(ctx, tensor->name) != -1) {
22204
  GGML_ABORT("duplicated tensor name");
22205
  }