slpnix commited on
Commit
b612415
·
1 Parent(s): f9baffc

kompute: add backend registry / device interfaces (llama/10045)

Browse files

Get in line with the other backends by supporting the newer
backend/device registry interfaces.

Signed-off-by: Sergio Lopez <[email protected]>

ggml/include/ggml-kompute.h CHANGED
@@ -11,6 +11,8 @@
11
  extern "C" {
12
  #endif
13
 
 
 
14
  struct ggml_vk_device {
15
  int index;
16
  int type; // same as VkPhysicalDeviceType
@@ -41,6 +43,8 @@ GGML_API bool ggml_backend_is_kompute(ggml_backend_t backend);
41
 
42
  GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
43
 
 
 
44
  #ifdef __cplusplus
45
  }
46
  #endif
 
11
  extern "C" {
12
  #endif
13
 
14
+ #define GGML_KOMPUTE_MAX_DEVICES 16
15
+
16
  struct ggml_vk_device {
17
  int index;
18
  int type; // same as VkPhysicalDeviceType
 
43
 
44
  GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
45
 
46
+ GGML_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
47
+
48
  #ifdef __cplusplus
49
  }
50
  #endif
ggml/src/ggml-backend.cpp CHANGED
@@ -562,6 +562,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
562
  #include "ggml-cann.h"
563
  #endif
564
 
 
 
 
 
565
  struct ggml_backend_registry {
566
  std::vector<ggml_backend_reg_t> backends;
567
  std::vector<ggml_backend_dev_t> devices;
@@ -591,8 +595,9 @@ struct ggml_backend_registry {
591
  #ifdef GGML_USE_AMX
592
  register_backend(ggml_backend_amx_reg());
593
  #endif
594
-
595
- // TODO: kompute
 
596
 
597
  register_backend(ggml_backend_cpu_reg());
598
  }
 
562
  #include "ggml-cann.h"
563
  #endif
564
 
565
+ #ifdef GGML_USE_KOMPUTE
566
+ #include "ggml-kompute.h"
567
+ #endif
568
+
569
  struct ggml_backend_registry {
570
  std::vector<ggml_backend_reg_t> backends;
571
  std::vector<ggml_backend_dev_t> devices;
 
595
  #ifdef GGML_USE_AMX
596
  register_backend(ggml_backend_amx_reg());
597
  #endif
598
+ #ifdef GGML_USE_KOMPUTE
599
+ register_backend(ggml_backend_kompute_reg());
600
+ #endif
601
 
602
  register_backend(ggml_backend_cpu_reg());
603
  }
ggml/src/ggml-kompute.cpp CHANGED
@@ -42,6 +42,7 @@
42
  #include <cstring>
43
  #include <iostream>
44
  #include <memory>
 
45
  #include <stdexcept>
46
  #include <string>
47
  #include <unordered_map>
@@ -273,18 +274,9 @@ static std::vector<ggml_vk_device> ggml_vk_available_devices_internal(size_t mem
273
  return results;
274
  }
275
 
276
- // public API returns a C-style array
277
- ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count) {
278
- auto devices = ggml_vk_available_devices_internal(memoryRequired);
279
- *count = devices.size();
280
- if (devices.empty()) {
281
- return nullptr;
282
- }
283
-
284
- size_t nbytes = sizeof (ggml_vk_device) * (devices.size());
285
- auto * arr = static_cast<ggml_vk_device *>(malloc(nbytes));
286
- memcpy(arr, devices.data(), nbytes);
287
- return arr;
288
  }
289
 
290
  static void ggml_vk_filterByVendor(std::vector<ggml_vk_device>& devices, const std::string& targetVendor) {
@@ -341,7 +333,7 @@ ggml_vk_device ggml_vk_current_device() {
341
  if (!komputeManager()->hasDevice())
342
  return ggml_vk_device();
343
 
344
- auto devices = ggml_vk_available_devices_internal(0);
345
  ggml_vk_filterByName(devices, komputeManager()->physicalDevice()->getProperties().deviceName.data());
346
  GGML_ASSERT(!devices.empty());
347
  return devices.front();
@@ -1323,17 +1315,7 @@ static void ggml_vk_cpy_f16_f32(Args&&... args) {
1323
  ggml_vk_cpy(spirv, 2, 4, std::forward<Args>(args)...);
1324
  }
1325
 
1326
- static bool ggml_vk_supports_op(const struct ggml_tensor * op) {
1327
- switch (op->type) {
1328
- case GGML_TYPE_F16:
1329
- case GGML_TYPE_F32:
1330
- case GGML_TYPE_Q4_0:
1331
- case GGML_TYPE_Q4_1:
1332
- break;
1333
- default:
1334
- return false;
1335
- }
1336
-
1337
  switch (op->op) {
1338
  case GGML_OP_UNARY:
1339
  switch (ggml_get_unary_op(op)) {
@@ -1410,6 +1392,8 @@ static bool ggml_vk_supports_op(const struct ggml_tensor * op) {
1410
  ;
1411
  }
1412
  return false;
 
 
1413
  }
1414
 
1415
  static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf) {
@@ -1458,11 +1442,6 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
1458
 
1459
  any_commands_recorded = true;
1460
 
1461
- if (!ggml_vk_supports_op(dst)) {
1462
- fprintf(stderr, "%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst));
1463
- GGML_ABORT("unsupported op");
1464
- }
1465
-
1466
  const int32_t ne00 = src0 ? src0->ne[0] : 0;
1467
  const int32_t ne01 = src0 ? src0->ne[1] : 0;
1468
  const int32_t ne02 = src0 ? src0->ne[2] : 0;
@@ -1907,25 +1886,31 @@ static ggml_backend_buffer_type_i ggml_backend_kompute_buffer_type_interface = {
1907
  };
1908
 
1909
  ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device) {
1910
- static std::vector<ggml_backend_buffer_type> bufts = []() {
1911
- std::vector<ggml_backend_buffer_type> vec;
1912
- auto devices = ggml_vk_available_devices_internal(0);
1913
- vec.reserve(devices.size());
1914
-
1915
- for (const auto & dev : devices) {
1916
- vec.push_back({
1917
- /* .iface = */ ggml_backend_kompute_buffer_type_interface,
1918
- /* .device = */ nullptr,
1919
- /* .context = */ new ggml_backend_kompute_buffer_type_context(dev.index, dev.bufferAlignment, dev.maxAlloc)
1920
- });
 
 
 
 
 
 
 
 
 
1921
  }
1922
- return vec;
1923
- }();
1924
 
1925
- auto it = std::find_if(bufts.begin(), bufts.end(), [device](const ggml_backend_buffer_type & t) {
1926
- return device == static_cast<ggml_backend_kompute_buffer_type_context *>(t.context)->device;
1927
- });
1928
- return it < bufts.end() ? &*it : nullptr;
1929
  }
1930
 
1931
  // backend
@@ -1953,16 +1938,6 @@ static ggml_status ggml_backend_kompute_graph_compute(ggml_backend_t backend, st
1953
  return GGML_STATUS_SUCCESS;
1954
  }
1955
 
1956
- static bool ggml_backend_kompute_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
1957
- GGML_UNUSED(backend);
1958
- return ggml_vk_supports_op(op);
1959
- }
1960
-
1961
- static bool ggml_backend_kompute_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
1962
- GGML_UNUSED(backend);
1963
- return buft->iface.get_name == ggml_backend_kompute_buffer_type_get_name;
1964
- }
1965
-
1966
  static struct ggml_backend_i kompute_backend_i = {
1967
  /* .get_name = */ ggml_backend_kompute_name,
1968
  /* .free = */ ggml_backend_kompute_free,
@@ -1991,7 +1966,7 @@ ggml_backend_t ggml_backend_kompute_init(int device) {
1991
  ggml_backend_t kompute_backend = new ggml_backend {
1992
  /* .guid = */ ggml_backend_kompute_guid(),
1993
  /* .interface = */ kompute_backend_i,
1994
- /* .device = */ nullptr,
1995
  /* .context = */ s_kompute_context,
1996
  };
1997
 
@@ -2001,3 +1976,167 @@ ggml_backend_t ggml_backend_kompute_init(int device) {
2001
  bool ggml_backend_is_kompute(ggml_backend_t backend) {
2002
  return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_kompute_guid());
2003
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  #include <cstring>
43
  #include <iostream>
44
  #include <memory>
45
+ #include <mutex>
46
  #include <stdexcept>
47
  #include <string>
48
  #include <unordered_map>
 
274
  return results;
275
  }
276
 
277
+ static std::vector<ggml_vk_device>& ggml_vk_available_devices() {
278
+ static std::vector<ggml_vk_device> devices = ggml_vk_available_devices_internal(0);
279
+ return devices;
 
 
 
 
 
 
 
 
 
280
  }
281
 
282
  static void ggml_vk_filterByVendor(std::vector<ggml_vk_device>& devices, const std::string& targetVendor) {
 
333
  if (!komputeManager()->hasDevice())
334
  return ggml_vk_device();
335
 
336
+ auto devices = ggml_vk_available_devices();
337
  ggml_vk_filterByName(devices, komputeManager()->physicalDevice()->getProperties().deviceName.data());
338
  GGML_ASSERT(!devices.empty());
339
  return devices.front();
 
1315
  ggml_vk_cpy(spirv, 2, 4, std::forward<Args>(args)...);
1316
  }
1317
 
1318
+ static bool ggml_backend_kompute_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
 
 
 
 
 
 
 
 
 
 
1319
  switch (op->op) {
1320
  case GGML_OP_UNARY:
1321
  switch (ggml_get_unary_op(op)) {
 
1392
  ;
1393
  }
1394
  return false;
1395
+
1396
+ GGML_UNUSED(dev);
1397
  }
1398
 
1399
  static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf) {
 
1442
 
1443
  any_commands_recorded = true;
1444
 
 
 
 
 
 
1445
  const int32_t ne00 = src0 ? src0->ne[0] : 0;
1446
  const int32_t ne01 = src0 ? src0->ne[1] : 0;
1447
  const int32_t ne02 = src0 ? src0->ne[2] : 0;
 
1886
  };
1887
 
1888
  ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device) {
1889
+ static std::mutex mutex;
1890
+ std::lock_guard<std::mutex> lock(mutex);
1891
+
1892
+ auto devices = ggml_vk_available_devices();
1893
+ int32_t device_count = (int32_t) devices.size();
1894
+ GGML_ASSERT(device < device_count);
1895
+ GGML_ASSERT(devices.size() <= GGML_KOMPUTE_MAX_DEVICES);
1896
+
1897
+ static ggml_backend_buffer_type
1898
+ ggml_backend_kompute_buffer_types[GGML_KOMPUTE_MAX_DEVICES];
1899
+
1900
+ static bool ggml_backend_kompute_buffer_type_initialized = false;
1901
+
1902
+ if (!ggml_backend_kompute_buffer_type_initialized) {
1903
+ for (int32_t i = 0; i < device_count; i++) {
1904
+ ggml_backend_kompute_buffer_types[i] = {
1905
+ /* .iface = */ ggml_backend_kompute_buffer_type_interface,
1906
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_kompute_reg(), i),
1907
+ /* .context = */ new ggml_backend_kompute_buffer_type_context{ i, devices[i].bufferAlignment, devices[i].maxAlloc },
1908
+ };
1909
  }
1910
+ ggml_backend_kompute_buffer_type_initialized = true;
1911
+ }
1912
 
1913
+ return &ggml_backend_kompute_buffer_types[device];
 
 
 
1914
  }
1915
 
1916
  // backend
 
1938
  return GGML_STATUS_SUCCESS;
1939
  }
1940
 
 
 
 
 
 
 
 
 
 
 
1941
  static struct ggml_backend_i kompute_backend_i = {
1942
  /* .get_name = */ ggml_backend_kompute_name,
1943
  /* .free = */ ggml_backend_kompute_free,
 
1966
  ggml_backend_t kompute_backend = new ggml_backend {
1967
  /* .guid = */ ggml_backend_kompute_guid(),
1968
  /* .interface = */ kompute_backend_i,
1969
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_kompute_reg(), device),
1970
  /* .context = */ s_kompute_context,
1971
  };
1972
 
 
1976
  bool ggml_backend_is_kompute(ggml_backend_t backend) {
1977
  return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_kompute_guid());
1978
  }
1979
+
1980
+ static size_t ggml_backend_kompute_get_device_count() {
1981
+ auto devices = ggml_vk_available_devices();
1982
+ return devices.size();
1983
+ }
1984
+
1985
+ static void ggml_backend_kompute_get_device_description(int device, char * description, size_t description_size) {
1986
+ auto devices = ggml_vk_available_devices();
1987
+ GGML_ASSERT((size_t) device < devices.size());
1988
+ snprintf(description, description_size, "%s", devices[device].name);
1989
+ }
1990
+
1991
+ static void ggml_backend_kompute_get_device_memory(int device, size_t * free, size_t * total) {
1992
+ auto devices = ggml_vk_available_devices();
1993
+ GGML_ASSERT((size_t) device < devices.size());
1994
+ *total = devices[device].heapSize;
1995
+ *free = devices[device].heapSize;
1996
+ }
1997
+
1998
+ //////////////////////////
1999
+
2000
+ struct ggml_backend_kompute_device_context {
2001
+ int device;
2002
+ std::string name;
2003
+ std::string description;
2004
+ };
2005
+
2006
+ static const char * ggml_backend_kompute_device_get_name(ggml_backend_dev_t dev) {
2007
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2008
+ return ctx->name.c_str();
2009
+ }
2010
+
2011
+ static const char * ggml_backend_kompute_device_get_description(ggml_backend_dev_t dev) {
2012
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2013
+ return ctx->description.c_str();
2014
+ }
2015
+
2016
+ static void ggml_backend_kompute_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
2017
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2018
+ ggml_backend_kompute_get_device_memory(ctx->device, free, total);
2019
+ }
2020
+
2021
+ static ggml_backend_buffer_type_t ggml_backend_kompute_device_get_buffer_type(ggml_backend_dev_t dev) {
2022
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2023
+ return ggml_backend_kompute_buffer_type(ctx->device);
2024
+ }
2025
+
2026
+ static bool ggml_backend_kompute_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
2027
+ if (buft->iface.get_name != ggml_backend_kompute_buffer_type_get_name) {
2028
+ return false;
2029
+ }
2030
+
2031
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2032
+ ggml_backend_kompute_buffer_type_context * buft_ctx = (ggml_backend_kompute_buffer_type_context *)buft->context;
2033
+
2034
+ return buft_ctx->device == ctx->device;
2035
+ }
2036
+
2037
+ static enum ggml_backend_dev_type ggml_backend_kompute_device_get_type(ggml_backend_dev_t dev) {
2038
+ GGML_UNUSED(dev);
2039
+ return GGML_BACKEND_DEVICE_TYPE_GPU;
2040
+ }
2041
+
2042
+ static void ggml_backend_kompute_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
2043
+ props->name = ggml_backend_kompute_device_get_name(dev);
2044
+ props->description = ggml_backend_kompute_device_get_description(dev);
2045
+ props->type = ggml_backend_kompute_device_get_type(dev);
2046
+ ggml_backend_kompute_device_get_memory(dev, &props->memory_free, &props->memory_total);
2047
+ props->caps = {
2048
+ /* async = */ false,
2049
+ /* host_buffer = */ false,
2050
+ /* .buffer_from_host_ptr = */ false,
2051
+ /* events = */ false,
2052
+ };
2053
+ }
2054
+
2055
+ static ggml_backend_t ggml_backend_kompute_device_init(ggml_backend_dev_t dev, const char * params) {
2056
+ GGML_UNUSED(params);
2057
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2058
+ return ggml_backend_kompute_init(ctx->device);
2059
+ }
2060
+
2061
+ static bool ggml_backend_kompute_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
2062
+ const int min_batch_size = 32;
2063
+
2064
+ return (op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS) ||
2065
+ (op->ne[2] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID);
2066
+
2067
+ GGML_UNUSED(dev);
2068
+ }
2069
+
2070
+ static const struct ggml_backend_device_i ggml_backend_kompute_device_i = {
2071
+ /* .get_name = */ ggml_backend_kompute_device_get_name,
2072
+ /* .get_description = */ ggml_backend_kompute_device_get_description,
2073
+ /* .get_memory = */ ggml_backend_kompute_device_get_memory,
2074
+ /* .get_type = */ ggml_backend_kompute_device_get_type,
2075
+ /* .get_props = */ ggml_backend_kompute_device_get_props,
2076
+ /* .init_backend = */ ggml_backend_kompute_device_init,
2077
+ /* .get_buffer_type = */ ggml_backend_kompute_device_get_buffer_type,
2078
+ /* .get_host_buffer_type = */ NULL,
2079
+ /* .buffer_from_host_ptr = */ NULL,
2080
+ /* .supports_op = */ ggml_backend_kompute_device_supports_op,
2081
+ /* .supports_buft = */ ggml_backend_kompute_device_supports_buft,
2082
+ /* .offload_op = */ ggml_backend_kompute_device_offload_op,
2083
+ /* .event_new = */ NULL,
2084
+ /* .event_free = */ NULL,
2085
+ /* .event_synchronize = */ NULL,
2086
+ };
2087
+
2088
+ static const char * ggml_backend_kompute_reg_get_name(ggml_backend_reg_t reg) {
2089
+ GGML_UNUSED(reg);
2090
+ return "Kompute";
2091
+ }
2092
+
2093
+ static size_t ggml_backend_kompute_reg_get_device_count(ggml_backend_reg_t reg) {
2094
+ GGML_UNUSED(reg);
2095
+ return ggml_backend_kompute_get_device_count();
2096
+ }
2097
+
2098
+ static ggml_backend_dev_t ggml_backend_kompute_reg_get_device(ggml_backend_reg_t reg, size_t device) {
2099
+ static std::vector<ggml_backend_dev_t> devices;
2100
+
2101
+ static bool initialized = false;
2102
+
2103
+ {
2104
+ static std::mutex mutex;
2105
+ std::lock_guard<std::mutex> lock(mutex);
2106
+ if (!initialized) {
2107
+ for (size_t i = 0; i < ggml_backend_kompute_get_device_count(); i++) {
2108
+ ggml_backend_kompute_device_context * ctx = new ggml_backend_kompute_device_context;
2109
+ char desc[256];
2110
+ ggml_backend_kompute_get_device_description(i, desc, sizeof(desc));
2111
+ ctx->device = i;
2112
+ ctx->name = "Kompute" + std::to_string(i);
2113
+ ctx->description = desc;
2114
+ devices.push_back(new ggml_backend_device {
2115
+ /* .iface = */ ggml_backend_kompute_device_i,
2116
+ /* .reg = */ reg,
2117
+ /* .context = */ ctx,
2118
+ });
2119
+ }
2120
+ initialized = true;
2121
+ }
2122
+ }
2123
+
2124
+ GGML_ASSERT(device < devices.size());
2125
+ return devices[device];
2126
+ }
2127
+
2128
+ static const struct ggml_backend_reg_i ggml_backend_kompute_reg_i = {
2129
+ /* .get_name = */ ggml_backend_kompute_reg_get_name,
2130
+ /* .get_device_count = */ ggml_backend_kompute_reg_get_device_count,
2131
+ /* .get_device = */ ggml_backend_kompute_reg_get_device,
2132
+ /* .get_proc_address = */ NULL,
2133
+ };
2134
+
2135
+ ggml_backend_reg_t ggml_backend_kompute_reg() {
2136
+ static ggml_backend_reg reg = {
2137
+ /* .iface = */ ggml_backend_kompute_reg_i,
2138
+ /* .context = */ nullptr,
2139
+ };
2140
+
2141
+ return &reg;
2142
+ }