ggerganov commited on
Commit
88282d1
·
unverified ·
1 Parent(s): f84edd5

sync : llama.cpp (skip)

Browse files
Files changed (3) hide show
  1. ggml-cuda/common.cuh +0 -6
  2. ggml-cuda/dmmv.cu +0 -4
  3. ggml-cuda/dmmv.cuh +11 -0
ggml-cuda/common.cuh CHANGED
@@ -231,12 +231,6 @@ typedef float dfloat; // dequantize float
231
  typedef float2 dfloat2;
232
  #endif //GGML_CUDA_F16
233
 
234
- // dmmv = dequantize_mul_mat_vec
235
- // TODO: remove this?
236
- #ifndef GGML_CUDA_DMMV_X
237
- #define GGML_CUDA_DMMV_X 32
238
- #endif
239
-
240
  [[noreturn]]
241
  static __device__ void no_device_code(
242
  const char * file_name, const int line, const char * function_name, const int arch, const char * arch_list) {
 
231
  typedef float2 dfloat2;
232
  #endif //GGML_CUDA_F16
233
 
 
 
 
 
 
 
234
  [[noreturn]]
235
  static __device__ void no_device_code(
236
  const char * file_name, const int line, const char * function_name, const int arch, const char * arch_list) {
ggml-cuda/dmmv.cu CHANGED
@@ -2,10 +2,6 @@
2
  #include "dequantize.cuh"
3
  #include "convert.cuh"
4
 
5
- #ifndef GGML_CUDA_MMV_Y
6
- #define GGML_CUDA_MMV_Y 1
7
- #endif
8
-
9
  #ifndef K_QUANTS_PER_ITERATION
10
  #define K_QUANTS_PER_ITERATION 2
11
  #else
 
2
  #include "dequantize.cuh"
3
  #include "convert.cuh"
4
 
 
 
 
 
5
  #ifndef K_QUANTS_PER_ITERATION
6
  #define K_QUANTS_PER_ITERATION 2
7
  #else
ggml-cuda/dmmv.cuh CHANGED
@@ -1,5 +1,16 @@
1
  #include "common.cuh"
2
 
 
 
 
 
 
 
 
 
 
 
 
3
  void ggml_cuda_op_dequantize_mul_mat_vec(
4
  ggml_backend_cuda_context & ctx,
5
  const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
 
1
  #include "common.cuh"
2
 
3
+ // dmmv = dequantize_mul_mat_vec
4
+
5
+ // TODO: remove this?
6
+ #ifndef GGML_CUDA_DMMV_X
7
+ #define GGML_CUDA_DMMV_X 32
8
+ #endif
9
+
10
+ #ifndef GGML_CUDA_MMV_Y
11
+ #define GGML_CUDA_MMV_Y 1
12
+ #endif
13
+
14
  void ggml_cuda_op_dequantize_mul_mat_vec(
15
  ggml_backend_cuda_context & ctx,
16
  const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,