OccamRazor commited on
Commit
08debcd
·
1 Parent(s): a726ecc

Vulkan: Set device max size for host memory to avoid OOM warning and fallback to CPU buffer (llama/14249)

Browse files
ggml/src/ggml-vulkan/ggml-vulkan.cpp CHANGED
@@ -9495,6 +9495,12 @@ static size_t ggml_backend_vk_host_buffer_type_get_alignment(ggml_backend_buffer
9495
  UNUSED(buft);
9496
  }
9497
 
 
 
 
 
 
 
9498
  // Should be changed to return device-specific host buffer type
9499
  // but that probably requires changes in llama.cpp
9500
  ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
@@ -9503,7 +9509,7 @@ ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
9503
  /* .get_name = */ ggml_backend_vk_host_buffer_type_name,
9504
  /* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer,
9505
  /* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment,
9506
- /* .get_max_size = */ NULL, // defaults to SIZE_MAX
9507
  /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
9508
  /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
9509
  },
 
9495
  UNUSED(buft);
9496
  }
9497
 
9498
+ static size_t ggml_backend_vk_host_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
9499
+ return vk_instance.devices[0]->suballocation_block_size;
9500
+
9501
+ UNUSED(buft);
9502
+ }
9503
+
9504
  // Should be changed to return device-specific host buffer type
9505
  // but that probably requires changes in llama.cpp
9506
  ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
 
9509
  /* .get_name = */ ggml_backend_vk_host_buffer_type_name,
9510
  /* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer,
9511
  /* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment,
9512
+ /* .get_max_size = */ ggml_backend_vk_host_buffer_type_get_max_size,
9513
  /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
9514
  /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
9515
  },