JohannesGaessler commited on
Commit
0b52fcc
·
1 Parent(s): 459beb1

CUDA: fix logic for V100 + GGML_CUDA_FORCE_MMQ (llama/12098)

Browse files
Files changed (1) hide show
  1. ggml/src/ggml-cuda/mmq.cuh +2 -2
ggml/src/ggml-cuda/mmq.cuh CHANGED
@@ -109,9 +109,9 @@ static constexpr __device__ int get_mmq_x_max_device() {
109
 
110
  #if __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA
111
  #ifdef GGML_CUDA_FORCE_MMQ
112
- return MMQ_DP4A_MAX_BATCH_SIZE;
113
- #else // GGML_CUDA_FORCE_MMQ
114
  return 128;
 
 
115
  #endif // GGML_CUDA_FORCE_MMQ
116
  #else // __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA
117
 
 
109
 
110
  #if __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA
111
  #ifdef GGML_CUDA_FORCE_MMQ
 
 
112
  return 128;
113
+ #else // GGML_CUDA_FORCE_MMQ
114
+ return MMQ_DP4A_MAX_BATCH_SIZE;
115
  #endif // GGML_CUDA_FORCE_MMQ
116
  #else // __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA
117