Spaces:
Running
Running
Sigbjørn Skjæret
commited on
Commit
·
f7995cb
1
Parent(s):
add5c0f
ggml : fix unmerged GGML_FPxx_TO_FPxx refactoring (llama/14443)
Browse files- ggml/src/ggml-cpu/vec.h +9 -9
ggml/src/ggml-cpu/vec.h
CHANGED
|
@@ -913,8 +913,8 @@ inline static void ggml_vec_reglu_f32 (const int n, float * y, const float * x,
|
|
| 913 |
|
| 914 |
inline static void ggml_vec_reglu_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x, const ggml_fp16_t * g) {
|
| 915 |
for (int i = 0; i < n; ++i) {
|
| 916 |
-
float v =
|
| 917 |
-
y[i] =
|
| 918 |
}
|
| 919 |
}
|
| 920 |
|
|
@@ -927,9 +927,9 @@ inline static void ggml_vec_geglu_f32(const int n, float * y, const float * x, c
|
|
| 927 |
} else if (x[i] >= 10.0f) {
|
| 928 |
y[i] = x[i] * g[i];
|
| 929 |
} else {
|
| 930 |
-
ggml_fp16_t fp16 =
|
| 931 |
memcpy(&t, &fp16, sizeof(uint16_t));
|
| 932 |
-
y[i] =
|
| 933 |
}
|
| 934 |
}
|
| 935 |
}
|
|
@@ -944,8 +944,8 @@ inline static void ggml_vec_geglu_f32(const int n, float * y, const float * x, c
|
|
| 944 |
inline static void ggml_vec_geglu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x, const ggml_fp16_t * g) {
|
| 945 |
const uint16_t * i16 = (const uint16_t *) x;
|
| 946 |
for (int i = 0; i < n; ++i) {
|
| 947 |
-
float v =
|
| 948 |
-
y[i] =
|
| 949 |
}
|
| 950 |
}
|
| 951 |
|
|
@@ -953,9 +953,9 @@ void ggml_vec_swiglu_f32(const int n, float * y, const float * x, const float *
|
|
| 953 |
|
| 954 |
inline static void ggml_vec_swiglu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x, const ggml_fp16_t * g) {
|
| 955 |
for (int i = 0; i < n; ++i) {
|
| 956 |
-
float v =
|
| 957 |
-
float w =
|
| 958 |
-
y[i] =
|
| 959 |
}
|
| 960 |
}
|
| 961 |
|
|
|
|
| 913 |
|
| 914 |
inline static void ggml_vec_reglu_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x, const ggml_fp16_t * g) {
|
| 915 |
for (int i = 0; i < n; ++i) {
|
| 916 |
+
float v = GGML_CPU_FP16_TO_FP32(x[i]);
|
| 917 |
+
y[i] = GGML_CPU_FP32_TO_FP16((v > 0.f) ? v * GGML_CPU_FP16_TO_FP32(g[i]) : 0.f);
|
| 918 |
}
|
| 919 |
}
|
| 920 |
|
|
|
|
| 927 |
} else if (x[i] >= 10.0f) {
|
| 928 |
y[i] = x[i] * g[i];
|
| 929 |
} else {
|
| 930 |
+
ggml_fp16_t fp16 = GGML_CPU_FP32_TO_FP16(x[i]);
|
| 931 |
memcpy(&t, &fp16, sizeof(uint16_t));
|
| 932 |
+
y[i] = GGML_CPU_FP16_TO_FP32(ggml_table_gelu_f16[t]) * g[i];
|
| 933 |
}
|
| 934 |
}
|
| 935 |
}
|
|
|
|
| 944 |
inline static void ggml_vec_geglu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x, const ggml_fp16_t * g) {
|
| 945 |
const uint16_t * i16 = (const uint16_t *) x;
|
| 946 |
for (int i = 0; i < n; ++i) {
|
| 947 |
+
float v = GGML_CPU_FP16_TO_FP32(g[i]);
|
| 948 |
+
y[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(ggml_table_gelu_f16[i16[i]]) * v);
|
| 949 |
}
|
| 950 |
}
|
| 951 |
|
|
|
|
| 953 |
|
| 954 |
inline static void ggml_vec_swiglu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x, const ggml_fp16_t * g) {
|
| 955 |
for (int i = 0; i < n; ++i) {
|
| 956 |
+
float v = GGML_CPU_FP16_TO_FP32(x[i]);
|
| 957 |
+
float w = GGML_CPU_FP16_TO_FP32(g[i]);
|
| 958 |
+
y[i] = GGML_CPU_FP32_TO_FP16((v/(1.0f + expf(-v))) * w);
|
| 959 |
}
|
| 960 |
}
|
| 961 |
|