Diego Devesa commited on
Commit
1ca87a8
·
1 Parent(s): a0fb22d

ggml-backend : only offload from host buffers (llama/11120)

Browse files
Files changed (1) hide show
  1. ggml/src/ggml-backend.cpp +1 -1
ggml/src/ggml-backend.cpp CHANGED
@@ -761,7 +761,7 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
761
  }
762
  // skip ROPE since the rope freqs tensor is too small to choose a backend based on it
763
  // not an ideal solution
764
- if (tensor->op != GGML_OP_ROPE && src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
765
  int src_backend_id = ggml_backend_sched_backend_from_buffer(sched, src, tensor);
766
  // check if a backend with higher prio wants to offload the op
767
  if (src_backend_id == sched->n_backends - 1) {
 
761
  }
762
  // skip ROPE since the rope freqs tensor is too small to choose a backend based on it
763
  // not an ideal solution
764
+ if (tensor->op != GGML_OP_ROPE && src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS && ggml_backend_buffer_is_host(src->buffer)) {
765
  int src_backend_id = ggml_backend_sched_backend_from_buffer(sched, src, tensor);
766
  // check if a backend with higher prio wants to offload the op
767
  if (src_backend_id == sched->n_backends - 1) {