Spaces:
Running
Running
Commit
·
0b52fcc
1
Parent(s):
459beb1
CUDA: fix logic for V100 + GGML_CUDA_FORCE_MMQ (llama/12098)
Browse files
ggml/src/ggml-cuda/mmq.cuh
CHANGED
|
@@ -109,9 +109,9 @@ static constexpr __device__ int get_mmq_x_max_device() {
|
|
| 109 |
|
| 110 |
#if __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA
|
| 111 |
#ifdef GGML_CUDA_FORCE_MMQ
|
| 112 |
-
return MMQ_DP4A_MAX_BATCH_SIZE;
|
| 113 |
-
#else // GGML_CUDA_FORCE_MMQ
|
| 114 |
return 128;
|
|
|
|
|
|
|
| 115 |
#endif // GGML_CUDA_FORCE_MMQ
|
| 116 |
#else // __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA
|
| 117 |
|
|
|
|
| 109 |
|
| 110 |
#if __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA
|
| 111 |
#ifdef GGML_CUDA_FORCE_MMQ
|
|
|
|
|
|
|
| 112 |
return 128;
|
| 113 |
+
#else // GGML_CUDA_FORCE_MMQ
|
| 114 |
+
return MMQ_DP4A_MAX_BATCH_SIZE;
|
| 115 |
#endif // GGML_CUDA_FORCE_MMQ
|
| 116 |
#else // __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA
|
| 117 |
|