ggerganov commited on
Commit
ea3aa71
·
1 Parent(s): ee56a37

ggml : improve ggml_is_contiguous logic (llama/7856)

Browse files

* ggml : improve ggml_is_contiguous logic

ggml-ci

* ggml : support more contiguous cases

ggml-ci

Files changed (1) hide show
  1. ggml.c +35 -40
ggml.c CHANGED
@@ -3212,35 +3212,42 @@ GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor) {
3212
  return tensor->nb[0] > tensor->nb[1];
3213
  }
3214
 
3215
- GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
3216
- static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3217
 
3218
- return
3219
- tensor->nb[0] == ggml_type_size(tensor->type) &&
3220
- tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) &&
3221
- tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
3222
- tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
3223
  }
3224
 
3225
  GGML_CALL bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
3226
- return ggml_is_contiguous(tensor);
3227
  }
3228
 
3229
  GGML_CALL bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
3230
- static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3231
-
3232
- return
3233
- tensor->nb[0] == ggml_type_size(tensor->type) &&
3234
- tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
3235
- tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
3236
  }
3237
 
3238
  GGML_CALL bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
3239
- static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3240
-
3241
- return
3242
- tensor->nb[0] == ggml_type_size(tensor->type) &&
3243
- tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
3244
  }
3245
 
3246
  GGML_CALL bool ggml_is_permuted(const struct ggml_tensor * tensor) {
@@ -3272,20 +3279,20 @@ bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor
3272
  static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3273
 
3274
  return
3275
- (t0->ne[0] == t1->ne[0] ) &&
3276
- (t0->ne[1] == t1->ne[1] ) &&
3277
- (t0->ne[2] == t1->ne[2] ) &&
3278
- (t0->ne[3] == t1->ne[3] );
3279
  }
3280
 
3281
  bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3282
  static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3283
 
3284
  return
3285
- (t0->nb[0] == t1->nb[0] ) &&
3286
- (t0->nb[1] == t1->nb[1] ) &&
3287
- (t0->nb[2] == t1->nb[2] ) &&
3288
- (t0->nb[3] == t1->nb[3] );
3289
  }
3290
 
3291
  // check if t1 can be represented as a repeatition of t0
@@ -4078,32 +4085,26 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
4078
  switch (tensor->type) {
4079
  case GGML_TYPE_I8:
4080
  {
4081
- GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
4082
  return ((int8_t *)(tensor->data))[i];
4083
  }
4084
  case GGML_TYPE_I16:
4085
  {
4086
- GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
4087
  return ((int16_t *)(tensor->data))[i];
4088
  }
4089
  case GGML_TYPE_I32:
4090
  {
4091
- GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
4092
  return ((int32_t *)(tensor->data))[i];
4093
  }
4094
  case GGML_TYPE_F16:
4095
  {
4096
- GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
4097
  return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
4098
  }
4099
  case GGML_TYPE_BF16:
4100
  {
4101
- GGML_ASSERT(tensor->nb[0] == sizeof(ggml_bf16_t));
4102
  return GGML_BF16_TO_FP32(((ggml_bf16_t *)(tensor->data))[i]);
4103
  }
4104
  case GGML_TYPE_F32:
4105
  {
4106
- GGML_ASSERT(tensor->nb[0] == sizeof(float));
4107
  return ((float *)(tensor->data))[i];
4108
  }
4109
  default:
@@ -4125,32 +4126,26 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
4125
  switch (tensor->type) {
4126
  case GGML_TYPE_I8:
4127
  {
4128
- GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
4129
  ((int8_t *)(tensor->data))[i] = value;
4130
  } break;
4131
  case GGML_TYPE_I16:
4132
  {
4133
- GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
4134
  ((int16_t *)(tensor->data))[i] = value;
4135
  } break;
4136
  case GGML_TYPE_I32:
4137
  {
4138
- GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
4139
  ((int32_t *)(tensor->data))[i] = value;
4140
  } break;
4141
  case GGML_TYPE_F16:
4142
  {
4143
- GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
4144
  ((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value);
4145
  } break;
4146
  case GGML_TYPE_BF16:
4147
  {
4148
- GGML_ASSERT(tensor->nb[0] == sizeof(ggml_bf16_t));
4149
  ((ggml_bf16_t *)(tensor->data))[i] = GGML_FP32_TO_BF16(value);
4150
  } break;
4151
  case GGML_TYPE_F32:
4152
  {
4153
- GGML_ASSERT(tensor->nb[0] == sizeof(float));
4154
  ((float *)(tensor->data))[i] = value;
4155
  } break;
4156
  default:
@@ -7343,7 +7338,7 @@ struct ggml_tensor * ggml_add_rel_pos_inplace(
7343
  return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
7344
  }
7345
 
7346
- // gmml_unary
7347
 
7348
  static struct ggml_tensor * ggml_unary_impl(
7349
  struct ggml_context * ctx,
 
3212
  return tensor->nb[0] > tensor->nb[1];
3213
  }
3214
 
3215
+ static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
3216
+ size_t next_nb = ggml_type_size(tensor->type);
3217
+ if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
3218
+ return false;
3219
+ }
3220
+ next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
3221
+ for (int i = 1; i < GGML_MAX_DIMS; i++) {
3222
+ if (tensor->ne[i] != 1) {
3223
+ if (i > n) {
3224
+ if (tensor->nb[i] != next_nb) {
3225
+ return false;
3226
+ }
3227
+ next_nb *= tensor->ne[i];
3228
+ } else {
3229
+ // this dimension does not need to be contiguous
3230
+ next_nb = tensor->ne[i]*tensor->nb[i];
3231
+ }
3232
+ }
3233
+ }
3234
+ return true;
3235
+ }
3236
 
3237
+ GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
3238
+ return ggml_is_contiguous_0(tensor);
 
 
 
3239
  }
3240
 
3241
  GGML_CALL bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
3242
+ return ggml_is_contiguous_n(tensor, 0);
3243
  }
3244
 
3245
  GGML_CALL bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
3246
+ return ggml_is_contiguous_n(tensor, 1);
 
 
 
 
 
3247
  }
3248
 
3249
  GGML_CALL bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
3250
+ return ggml_is_contiguous_n(tensor, 2);
 
 
 
 
3251
  }
3252
 
3253
  GGML_CALL bool ggml_is_permuted(const struct ggml_tensor * tensor) {
 
3279
  static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3280
 
3281
  return
3282
+ (t0->ne[0] == t1->ne[0]) &&
3283
+ (t0->ne[1] == t1->ne[1]) &&
3284
+ (t0->ne[2] == t1->ne[2]) &&
3285
+ (t0->ne[3] == t1->ne[3]);
3286
  }
3287
 
3288
  bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3289
  static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3290
 
3291
  return
3292
+ (t0->nb[0] == t1->nb[0]) &&
3293
+ (t0->nb[1] == t1->nb[1]) &&
3294
+ (t0->nb[2] == t1->nb[2]) &&
3295
+ (t0->nb[3] == t1->nb[3]);
3296
  }
3297
 
3298
  // check if t1 can be represented as a repeatition of t0
 
4085
  switch (tensor->type) {
4086
  case GGML_TYPE_I8:
4087
  {
 
4088
  return ((int8_t *)(tensor->data))[i];
4089
  }
4090
  case GGML_TYPE_I16:
4091
  {
 
4092
  return ((int16_t *)(tensor->data))[i];
4093
  }
4094
  case GGML_TYPE_I32:
4095
  {
 
4096
  return ((int32_t *)(tensor->data))[i];
4097
  }
4098
  case GGML_TYPE_F16:
4099
  {
 
4100
  return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
4101
  }
4102
  case GGML_TYPE_BF16:
4103
  {
 
4104
  return GGML_BF16_TO_FP32(((ggml_bf16_t *)(tensor->data))[i]);
4105
  }
4106
  case GGML_TYPE_F32:
4107
  {
 
4108
  return ((float *)(tensor->data))[i];
4109
  }
4110
  default:
 
4126
  switch (tensor->type) {
4127
  case GGML_TYPE_I8:
4128
  {
 
4129
  ((int8_t *)(tensor->data))[i] = value;
4130
  } break;
4131
  case GGML_TYPE_I16:
4132
  {
 
4133
  ((int16_t *)(tensor->data))[i] = value;
4134
  } break;
4135
  case GGML_TYPE_I32:
4136
  {
 
4137
  ((int32_t *)(tensor->data))[i] = value;
4138
  } break;
4139
  case GGML_TYPE_F16:
4140
  {
 
4141
  ((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value);
4142
  } break;
4143
  case GGML_TYPE_BF16:
4144
  {
 
4145
  ((ggml_bf16_t *)(tensor->data))[i] = GGML_FP32_TO_BF16(value);
4146
  } break;
4147
  case GGML_TYPE_F32:
4148
  {
 
4149
  ((float *)(tensor->data))[i] = value;
4150
  } break;
4151
  default:
 
7338
  return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
7339
  }
7340
 
7341
+ // ggml_unary
7342
 
7343
  static struct ggml_tensor * ggml_unary_impl(
7344
  struct ggml_context * ctx,