Spaces:
Running
Running
ggml : improve ggml_is_contiguous logic (llama/7856)
Browse files* ggml : improve ggml_is_contiguous logic
ggml-ci
* ggml : support more contiguous cases
ggml-ci
ggml.c
CHANGED
|
@@ -3212,35 +3212,42 @@ GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor) {
|
|
| 3212 |
return tensor->nb[0] > tensor->nb[1];
|
| 3213 |
}
|
| 3214 |
|
| 3215 |
-
|
| 3216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3217 |
|
| 3218 |
-
|
| 3219 |
-
|
| 3220 |
-
tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) &&
|
| 3221 |
-
tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
|
| 3222 |
-
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
| 3223 |
}
|
| 3224 |
|
| 3225 |
GGML_CALL bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
|
| 3226 |
-
return
|
| 3227 |
}
|
| 3228 |
|
| 3229 |
GGML_CALL bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
|
| 3230 |
-
|
| 3231 |
-
|
| 3232 |
-
return
|
| 3233 |
-
tensor->nb[0] == ggml_type_size(tensor->type) &&
|
| 3234 |
-
tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
|
| 3235 |
-
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
| 3236 |
}
|
| 3237 |
|
| 3238 |
GGML_CALL bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
|
| 3239 |
-
|
| 3240 |
-
|
| 3241 |
-
return
|
| 3242 |
-
tensor->nb[0] == ggml_type_size(tensor->type) &&
|
| 3243 |
-
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
| 3244 |
}
|
| 3245 |
|
| 3246 |
GGML_CALL bool ggml_is_permuted(const struct ggml_tensor * tensor) {
|
|
@@ -3272,20 +3279,20 @@ bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor
|
|
| 3272 |
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
| 3273 |
|
| 3274 |
return
|
| 3275 |
-
(t0->ne[0] == t1->ne[0]
|
| 3276 |
-
(t0->ne[1] == t1->ne[1]
|
| 3277 |
-
(t0->ne[2] == t1->ne[2]
|
| 3278 |
-
(t0->ne[3] == t1->ne[3]
|
| 3279 |
}
|
| 3280 |
|
| 3281 |
bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
| 3282 |
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
| 3283 |
|
| 3284 |
return
|
| 3285 |
-
(t0->nb[0] == t1->nb[0]
|
| 3286 |
-
(t0->nb[1] == t1->nb[1]
|
| 3287 |
-
(t0->nb[2] == t1->nb[2]
|
| 3288 |
-
(t0->nb[3] == t1->nb[3]
|
| 3289 |
}
|
| 3290 |
|
| 3291 |
// check if t1 can be represented as a repeatition of t0
|
|
@@ -4078,32 +4085,26 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
|
|
| 4078 |
switch (tensor->type) {
|
| 4079 |
case GGML_TYPE_I8:
|
| 4080 |
{
|
| 4081 |
-
GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
|
| 4082 |
return ((int8_t *)(tensor->data))[i];
|
| 4083 |
}
|
| 4084 |
case GGML_TYPE_I16:
|
| 4085 |
{
|
| 4086 |
-
GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
|
| 4087 |
return ((int16_t *)(tensor->data))[i];
|
| 4088 |
}
|
| 4089 |
case GGML_TYPE_I32:
|
| 4090 |
{
|
| 4091 |
-
GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
|
| 4092 |
return ((int32_t *)(tensor->data))[i];
|
| 4093 |
}
|
| 4094 |
case GGML_TYPE_F16:
|
| 4095 |
{
|
| 4096 |
-
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
|
| 4097 |
return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
|
| 4098 |
}
|
| 4099 |
case GGML_TYPE_BF16:
|
| 4100 |
{
|
| 4101 |
-
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_bf16_t));
|
| 4102 |
return GGML_BF16_TO_FP32(((ggml_bf16_t *)(tensor->data))[i]);
|
| 4103 |
}
|
| 4104 |
case GGML_TYPE_F32:
|
| 4105 |
{
|
| 4106 |
-
GGML_ASSERT(tensor->nb[0] == sizeof(float));
|
| 4107 |
return ((float *)(tensor->data))[i];
|
| 4108 |
}
|
| 4109 |
default:
|
|
@@ -4125,32 +4126,26 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
|
|
| 4125 |
switch (tensor->type) {
|
| 4126 |
case GGML_TYPE_I8:
|
| 4127 |
{
|
| 4128 |
-
GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
|
| 4129 |
((int8_t *)(tensor->data))[i] = value;
|
| 4130 |
} break;
|
| 4131 |
case GGML_TYPE_I16:
|
| 4132 |
{
|
| 4133 |
-
GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
|
| 4134 |
((int16_t *)(tensor->data))[i] = value;
|
| 4135 |
} break;
|
| 4136 |
case GGML_TYPE_I32:
|
| 4137 |
{
|
| 4138 |
-
GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
|
| 4139 |
((int32_t *)(tensor->data))[i] = value;
|
| 4140 |
} break;
|
| 4141 |
case GGML_TYPE_F16:
|
| 4142 |
{
|
| 4143 |
-
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
|
| 4144 |
((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value);
|
| 4145 |
} break;
|
| 4146 |
case GGML_TYPE_BF16:
|
| 4147 |
{
|
| 4148 |
-
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_bf16_t));
|
| 4149 |
((ggml_bf16_t *)(tensor->data))[i] = GGML_FP32_TO_BF16(value);
|
| 4150 |
} break;
|
| 4151 |
case GGML_TYPE_F32:
|
| 4152 |
{
|
| 4153 |
-
GGML_ASSERT(tensor->nb[0] == sizeof(float));
|
| 4154 |
((float *)(tensor->data))[i] = value;
|
| 4155 |
} break;
|
| 4156 |
default:
|
|
@@ -7343,7 +7338,7 @@ struct ggml_tensor * ggml_add_rel_pos_inplace(
|
|
| 7343 |
return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
|
| 7344 |
}
|
| 7345 |
|
| 7346 |
-
//
|
| 7347 |
|
| 7348 |
static struct ggml_tensor * ggml_unary_impl(
|
| 7349 |
struct ggml_context * ctx,
|
|
|
|
| 3212 |
return tensor->nb[0] > tensor->nb[1];
|
| 3213 |
}
|
| 3214 |
|
| 3215 |
+
static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
|
| 3216 |
+
size_t next_nb = ggml_type_size(tensor->type);
|
| 3217 |
+
if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
|
| 3218 |
+
return false;
|
| 3219 |
+
}
|
| 3220 |
+
next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
|
| 3221 |
+
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
| 3222 |
+
if (tensor->ne[i] != 1) {
|
| 3223 |
+
if (i > n) {
|
| 3224 |
+
if (tensor->nb[i] != next_nb) {
|
| 3225 |
+
return false;
|
| 3226 |
+
}
|
| 3227 |
+
next_nb *= tensor->ne[i];
|
| 3228 |
+
} else {
|
| 3229 |
+
// this dimension does not need to be contiguous
|
| 3230 |
+
next_nb = tensor->ne[i]*tensor->nb[i];
|
| 3231 |
+
}
|
| 3232 |
+
}
|
| 3233 |
+
}
|
| 3234 |
+
return true;
|
| 3235 |
+
}
|
| 3236 |
|
| 3237 |
+
GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
|
| 3238 |
+
return ggml_is_contiguous_0(tensor);
|
|
|
|
|
|
|
|
|
|
| 3239 |
}
|
| 3240 |
|
| 3241 |
GGML_CALL bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
|
| 3242 |
+
return ggml_is_contiguous_n(tensor, 0);
|
| 3243 |
}
|
| 3244 |
|
| 3245 |
GGML_CALL bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
|
| 3246 |
+
return ggml_is_contiguous_n(tensor, 1);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3247 |
}
|
| 3248 |
|
| 3249 |
GGML_CALL bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
|
| 3250 |
+
return ggml_is_contiguous_n(tensor, 2);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3251 |
}
|
| 3252 |
|
| 3253 |
GGML_CALL bool ggml_is_permuted(const struct ggml_tensor * tensor) {
|
|
|
|
| 3279 |
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
| 3280 |
|
| 3281 |
return
|
| 3282 |
+
(t0->ne[0] == t1->ne[0]) &&
|
| 3283 |
+
(t0->ne[1] == t1->ne[1]) &&
|
| 3284 |
+
(t0->ne[2] == t1->ne[2]) &&
|
| 3285 |
+
(t0->ne[3] == t1->ne[3]);
|
| 3286 |
}
|
| 3287 |
|
| 3288 |
bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
| 3289 |
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
| 3290 |
|
| 3291 |
return
|
| 3292 |
+
(t0->nb[0] == t1->nb[0]) &&
|
| 3293 |
+
(t0->nb[1] == t1->nb[1]) &&
|
| 3294 |
+
(t0->nb[2] == t1->nb[2]) &&
|
| 3295 |
+
(t0->nb[3] == t1->nb[3]);
|
| 3296 |
}
|
| 3297 |
|
| 3298 |
// check if t1 can be represented as a repeatition of t0
|
|
|
|
| 4085 |
switch (tensor->type) {
|
| 4086 |
case GGML_TYPE_I8:
|
| 4087 |
{
|
|
|
|
| 4088 |
return ((int8_t *)(tensor->data))[i];
|
| 4089 |
}
|
| 4090 |
case GGML_TYPE_I16:
|
| 4091 |
{
|
|
|
|
| 4092 |
return ((int16_t *)(tensor->data))[i];
|
| 4093 |
}
|
| 4094 |
case GGML_TYPE_I32:
|
| 4095 |
{
|
|
|
|
| 4096 |
return ((int32_t *)(tensor->data))[i];
|
| 4097 |
}
|
| 4098 |
case GGML_TYPE_F16:
|
| 4099 |
{
|
|
|
|
| 4100 |
return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
|
| 4101 |
}
|
| 4102 |
case GGML_TYPE_BF16:
|
| 4103 |
{
|
|
|
|
| 4104 |
return GGML_BF16_TO_FP32(((ggml_bf16_t *)(tensor->data))[i]);
|
| 4105 |
}
|
| 4106 |
case GGML_TYPE_F32:
|
| 4107 |
{
|
|
|
|
| 4108 |
return ((float *)(tensor->data))[i];
|
| 4109 |
}
|
| 4110 |
default:
|
|
|
|
| 4126 |
switch (tensor->type) {
|
| 4127 |
case GGML_TYPE_I8:
|
| 4128 |
{
|
|
|
|
| 4129 |
((int8_t *)(tensor->data))[i] = value;
|
| 4130 |
} break;
|
| 4131 |
case GGML_TYPE_I16:
|
| 4132 |
{
|
|
|
|
| 4133 |
((int16_t *)(tensor->data))[i] = value;
|
| 4134 |
} break;
|
| 4135 |
case GGML_TYPE_I32:
|
| 4136 |
{
|
|
|
|
| 4137 |
((int32_t *)(tensor->data))[i] = value;
|
| 4138 |
} break;
|
| 4139 |
case GGML_TYPE_F16:
|
| 4140 |
{
|
|
|
|
| 4141 |
((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value);
|
| 4142 |
} break;
|
| 4143 |
case GGML_TYPE_BF16:
|
| 4144 |
{
|
|
|
|
| 4145 |
((ggml_bf16_t *)(tensor->data))[i] = GGML_FP32_TO_BF16(value);
|
| 4146 |
} break;
|
| 4147 |
case GGML_TYPE_F32:
|
| 4148 |
{
|
|
|
|
| 4149 |
((float *)(tensor->data))[i] = value;
|
| 4150 |
} break;
|
| 4151 |
default:
|
|
|
|
| 7338 |
return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
|
| 7339 |
}
|
| 7340 |
|
| 7341 |
+
// ggml_unary
|
| 7342 |
|
| 7343 |
static struct ggml_tensor * ggml_unary_impl(
|
| 7344 |
struct ggml_context * ctx,
|