ggerganov commited on
Commit
caf2759
·
unverified ·
1 Parent(s): 44edc8a

ggml : sync with ggml repo (warning fixes + asserts)

Browse files
Files changed (2) hide show
  1. ggml.c +15 -9
  2. ggml.h +2 -2
ggml.c CHANGED
@@ -8245,8 +8245,6 @@ static void ggml_compute_forward_mul_mat_f16_f32(
8245
  ggml_fp16_t * d_X = ggml_cuda_pool_malloc(sizeof(float) * x_ne, &x_size);
8246
  ggml_fp16_t * d_Y = ggml_cuda_pool_malloc(sizeof(float) * y_ne, &y_size);
8247
  float * d_D = ggml_cuda_pool_malloc(sizeof(float) * d_ne, &d_size);
8248
- #else
8249
- float * const wdata = params->wdata;
8250
  #endif
8251
  for (int64_t i03 = 0; i03 < ne03; i03++) {
8252
  for (int64_t i02 = 0; i02 < ne02; i02++) {
@@ -8263,8 +8261,11 @@ static void ggml_compute_forward_mul_mat_f16_f32(
8263
  wdata[id++] = GGML_FP32_TO_FP16(*(float *) ((char *) src1->data + i03*nb13 + i02*nb12 + i01*nb11 + i00*nb10));
8264
  }
8265
  }
 
 
8266
  }
8267
  #else
 
8268
  {
8269
  size_t id = 0;
8270
  for (int64_t i01 = 0; i01 < ne01; ++i01) {
@@ -8272,6 +8273,8 @@ static void ggml_compute_forward_mul_mat_f16_f32(
8272
  wdata[id++] = GGML_FP16_TO_FP32(*(ggml_fp16_t *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00));
8273
  }
8274
  }
 
 
8275
  }
8276
  #endif
8277
 
@@ -8537,7 +8540,10 @@ static void ggml_compute_forward_mul_mat_q_f32(
8537
  dequantize_row_q((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01, wdata + id, ne00);
8538
  id += ne00;
8539
  }
 
 
8540
  }
 
8541
  const float * x = wdata;
8542
  #endif
8543
 
@@ -9118,7 +9124,7 @@ static void ggml_compute_forward_alibi_f32(
9118
  //const int nb3 = src0->nb[3];
9119
 
9120
  assert(nb0 == sizeof(float));
9121
- assert(ne1+n_past == ne0);
9122
 
9123
  // add alibi to src0 (KQ_scaled)
9124
  const int n_heads_log2_floor = 1 << (int) floor(log2(n_head));
@@ -9179,7 +9185,7 @@ static void ggml_compute_forward_alibi_f16(
9179
  //const int nb3 = src0->nb[3];
9180
 
9181
  assert(nb0 == sizeof(ggml_fp16_t));
9182
- assert(ne1+n_past == ne0);
9183
 
9184
  // add alibi to src0 (KQ_scaled)
9185
  const int n_heads_log2_floor = 1 << (int) floor(log2(n_head));
@@ -11571,12 +11577,12 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
11571
  if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
11572
  node->n_tasks = 1; // TODO: this actually is doing nothing
11573
  // the threads are still spinning
11574
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
 
 
 
11575
  // here we need memory just for single 2D matrix from src0
11576
  cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src0->ne[0]*node->src0->ne[1]);
11577
- #else
11578
- // with GPU, we need memory for the full 3D / 4D data
11579
- cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*MAX(ggml_nelements(node->src1), ggml_nelements(node->src0));
11580
  #endif
11581
  } else {
11582
  cur = GGML_TYPE_SIZE[GGML_TYPE_F16]*ggml_nelements(node->src1);
@@ -11586,7 +11592,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
11586
  #endif
11587
  } else if (node->src0->type == GGML_TYPE_F32 && node->src1->type == GGML_TYPE_F32) {
11588
  cur = 0;
11589
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS)
11590
  if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
11591
  node->n_tasks = 1;
11592
  }
 
8245
  ggml_fp16_t * d_X = ggml_cuda_pool_malloc(sizeof(float) * x_ne, &x_size);
8246
  ggml_fp16_t * d_Y = ggml_cuda_pool_malloc(sizeof(float) * y_ne, &y_size);
8247
  float * d_D = ggml_cuda_pool_malloc(sizeof(float) * d_ne, &d_size);
 
 
8248
  #endif
8249
  for (int64_t i03 = 0; i03 < ne03; i03++) {
8250
  for (int64_t i02 = 0; i02 < ne02; i02++) {
 
8261
  wdata[id++] = GGML_FP32_TO_FP16(*(float *) ((char *) src1->data + i03*nb13 + i02*nb12 + i01*nb11 + i00*nb10));
8262
  }
8263
  }
8264
+
8265
+ assert(id*sizeof(ggml_fp16_t) <= params->wsize);
8266
  }
8267
  #else
8268
+ float * const wdata = params->wdata;
8269
  {
8270
  size_t id = 0;
8271
  for (int64_t i01 = 0; i01 < ne01; ++i01) {
 
8273
  wdata[id++] = GGML_FP16_TO_FP32(*(ggml_fp16_t *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00));
8274
  }
8275
  }
8276
+
8277
+ assert(id*sizeof(float) <= params->wsize);
8278
  }
8279
  #endif
8280
 
 
8540
  dequantize_row_q((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01, wdata + id, ne00);
8541
  id += ne00;
8542
  }
8543
+
8544
+ assert(id*sizeof(float) <= params->wsize);
8545
  }
8546
+
8547
  const float * x = wdata;
8548
  #endif
8549
 
 
9124
  //const int nb3 = src0->nb[3];
9125
 
9126
  assert(nb0 == sizeof(float));
9127
+ assert(ne1 + n_past == ne0); (void) n_past;
9128
 
9129
  // add alibi to src0 (KQ_scaled)
9130
  const int n_heads_log2_floor = 1 << (int) floor(log2(n_head));
 
9185
  //const int nb3 = src0->nb[3];
9186
 
9187
  assert(nb0 == sizeof(ggml_fp16_t));
9188
+ assert(ne1 + n_past == ne0); (void) n_past;
9189
 
9190
  // add alibi to src0 (KQ_scaled)
9191
  const int n_heads_log2_floor = 1 << (int) floor(log2(n_head));
 
11577
  if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
11578
  node->n_tasks = 1; // TODO: this actually is doing nothing
11579
  // the threads are still spinning
11580
+ #if defined(GGML_USE_CUBLAS)
11581
+ // with cuBLAS, we need memory for the full 3D / 4D data of src1
11582
+ cur = GGML_TYPE_SIZE[GGML_TYPE_F16]*ggml_nelements(node->src1);
11583
+ #else
11584
  // here we need memory just for single 2D matrix from src0
11585
  cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src0->ne[0]*node->src0->ne[1]);
 
 
 
11586
  #endif
11587
  } else {
11588
  cur = GGML_TYPE_SIZE[GGML_TYPE_F16]*ggml_nelements(node->src1);
 
11592
  #endif
11593
  } else if (node->src0->type == GGML_TYPE_F32 && node->src1->type == GGML_TYPE_F32) {
11594
  cur = 0;
11595
+ #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
11596
  if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
11597
  node->n_tasks = 1;
11598
  }
ggml.h CHANGED
@@ -701,8 +701,8 @@ extern "C" {
701
  struct ggml_tensor * c1);
702
 
703
  // Mapping operations
704
- GGML_API typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
705
- GGML_API typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
706
 
707
  GGML_API struct ggml_tensor * ggml_map_unary_f32(
708
  struct ggml_context * ctx,
 
701
  struct ggml_tensor * c1);
702
 
703
  // Mapping operations
704
+ typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
705
+ typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
706
 
707
  GGML_API struct ggml_tensor * ggml_map_unary_f32(
708
  struct ggml_context * ctx,