Spaces:
Running
Running
kompute: add backend registry / device interfaces (llama/10045)
Browse filesGet in line with the other backends by supporting the newer
backend/device registry interfaces.
Signed-off-by: Sergio Lopez <[email protected]>
- ggml/include/ggml-kompute.h +4 -0
- ggml/src/ggml-backend.cpp +7 -2
- ggml/src/ggml-kompute.cpp +196 -57
ggml/include/ggml-kompute.h
CHANGED
|
@@ -11,6 +11,8 @@
|
|
| 11 |
extern "C" {
|
| 12 |
#endif
|
| 13 |
|
|
|
|
|
|
|
| 14 |
struct ggml_vk_device {
|
| 15 |
int index;
|
| 16 |
int type; // same as VkPhysicalDeviceType
|
|
@@ -41,6 +43,8 @@ GGML_API bool ggml_backend_is_kompute(ggml_backend_t backend);
|
|
| 41 |
|
| 42 |
GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
|
| 43 |
|
|
|
|
|
|
|
| 44 |
#ifdef __cplusplus
|
| 45 |
}
|
| 46 |
#endif
|
|
|
|
| 11 |
extern "C" {
|
| 12 |
#endif
|
| 13 |
|
| 14 |
+
#define GGML_KOMPUTE_MAX_DEVICES 16
|
| 15 |
+
|
| 16 |
struct ggml_vk_device {
|
| 17 |
int index;
|
| 18 |
int type; // same as VkPhysicalDeviceType
|
|
|
|
| 43 |
|
| 44 |
GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
|
| 45 |
|
| 46 |
+
GGML_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
|
| 47 |
+
|
| 48 |
#ifdef __cplusplus
|
| 49 |
}
|
| 50 |
#endif
|
ggml/src/ggml-backend.cpp
CHANGED
|
@@ -562,6 +562,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
|
|
| 562 |
#include "ggml-cann.h"
|
| 563 |
#endif
|
| 564 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 565 |
struct ggml_backend_registry {
|
| 566 |
std::vector<ggml_backend_reg_t> backends;
|
| 567 |
std::vector<ggml_backend_dev_t> devices;
|
|
@@ -591,8 +595,9 @@ struct ggml_backend_registry {
|
|
| 591 |
#ifdef GGML_USE_AMX
|
| 592 |
register_backend(ggml_backend_amx_reg());
|
| 593 |
#endif
|
| 594 |
-
|
| 595 |
-
|
|
|
|
| 596 |
|
| 597 |
register_backend(ggml_backend_cpu_reg());
|
| 598 |
}
|
|
|
|
| 562 |
#include "ggml-cann.h"
|
| 563 |
#endif
|
| 564 |
|
| 565 |
+
#ifdef GGML_USE_KOMPUTE
|
| 566 |
+
#include "ggml-kompute.h"
|
| 567 |
+
#endif
|
| 568 |
+
|
| 569 |
struct ggml_backend_registry {
|
| 570 |
std::vector<ggml_backend_reg_t> backends;
|
| 571 |
std::vector<ggml_backend_dev_t> devices;
|
|
|
|
| 595 |
#ifdef GGML_USE_AMX
|
| 596 |
register_backend(ggml_backend_amx_reg());
|
| 597 |
#endif
|
| 598 |
+
#ifdef GGML_USE_KOMPUTE
|
| 599 |
+
register_backend(ggml_backend_kompute_reg());
|
| 600 |
+
#endif
|
| 601 |
|
| 602 |
register_backend(ggml_backend_cpu_reg());
|
| 603 |
}
|
ggml/src/ggml-kompute.cpp
CHANGED
|
@@ -42,6 +42,7 @@
|
|
| 42 |
#include <cstring>
|
| 43 |
#include <iostream>
|
| 44 |
#include <memory>
|
|
|
|
| 45 |
#include <stdexcept>
|
| 46 |
#include <string>
|
| 47 |
#include <unordered_map>
|
|
@@ -273,18 +274,9 @@ static std::vector<ggml_vk_device> ggml_vk_available_devices_internal(size_t mem
|
|
| 273 |
return results;
|
| 274 |
}
|
| 275 |
|
| 276 |
-
|
| 277 |
-
ggml_vk_device
|
| 278 |
-
|
| 279 |
-
*count = devices.size();
|
| 280 |
-
if (devices.empty()) {
|
| 281 |
-
return nullptr;
|
| 282 |
-
}
|
| 283 |
-
|
| 284 |
-
size_t nbytes = sizeof (ggml_vk_device) * (devices.size());
|
| 285 |
-
auto * arr = static_cast<ggml_vk_device *>(malloc(nbytes));
|
| 286 |
-
memcpy(arr, devices.data(), nbytes);
|
| 287 |
-
return arr;
|
| 288 |
}
|
| 289 |
|
| 290 |
static void ggml_vk_filterByVendor(std::vector<ggml_vk_device>& devices, const std::string& targetVendor) {
|
|
@@ -341,7 +333,7 @@ ggml_vk_device ggml_vk_current_device() {
|
|
| 341 |
if (!komputeManager()->hasDevice())
|
| 342 |
return ggml_vk_device();
|
| 343 |
|
| 344 |
-
auto devices =
|
| 345 |
ggml_vk_filterByName(devices, komputeManager()->physicalDevice()->getProperties().deviceName.data());
|
| 346 |
GGML_ASSERT(!devices.empty());
|
| 347 |
return devices.front();
|
|
@@ -1323,17 +1315,7 @@ static void ggml_vk_cpy_f16_f32(Args&&... args) {
|
|
| 1323 |
ggml_vk_cpy(spirv, 2, 4, std::forward<Args>(args)...);
|
| 1324 |
}
|
| 1325 |
|
| 1326 |
-
static bool
|
| 1327 |
-
switch (op->type) {
|
| 1328 |
-
case GGML_TYPE_F16:
|
| 1329 |
-
case GGML_TYPE_F32:
|
| 1330 |
-
case GGML_TYPE_Q4_0:
|
| 1331 |
-
case GGML_TYPE_Q4_1:
|
| 1332 |
-
break;
|
| 1333 |
-
default:
|
| 1334 |
-
return false;
|
| 1335 |
-
}
|
| 1336 |
-
|
| 1337 |
switch (op->op) {
|
| 1338 |
case GGML_OP_UNARY:
|
| 1339 |
switch (ggml_get_unary_op(op)) {
|
|
@@ -1410,6 +1392,8 @@ static bool ggml_vk_supports_op(const struct ggml_tensor * op) {
|
|
| 1410 |
;
|
| 1411 |
}
|
| 1412 |
return false;
|
|
|
|
|
|
|
| 1413 |
}
|
| 1414 |
|
| 1415 |
static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf) {
|
|
@@ -1458,11 +1442,6 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
|
|
| 1458 |
|
| 1459 |
any_commands_recorded = true;
|
| 1460 |
|
| 1461 |
-
if (!ggml_vk_supports_op(dst)) {
|
| 1462 |
-
fprintf(stderr, "%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst));
|
| 1463 |
-
GGML_ABORT("unsupported op");
|
| 1464 |
-
}
|
| 1465 |
-
|
| 1466 |
const int32_t ne00 = src0 ? src0->ne[0] : 0;
|
| 1467 |
const int32_t ne01 = src0 ? src0->ne[1] : 0;
|
| 1468 |
const int32_t ne02 = src0 ? src0->ne[2] : 0;
|
|
@@ -1907,25 +1886,31 @@ static ggml_backend_buffer_type_i ggml_backend_kompute_buffer_type_interface = {
|
|
| 1907 |
};
|
| 1908 |
|
| 1909 |
ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device) {
|
| 1910 |
-
static std::
|
| 1911 |
-
|
| 1912 |
-
|
| 1913 |
-
|
| 1914 |
-
|
| 1915 |
-
|
| 1916 |
-
|
| 1917 |
-
|
| 1918 |
-
|
| 1919 |
-
|
| 1920 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1921 |
}
|
| 1922 |
-
|
| 1923 |
-
}
|
| 1924 |
|
| 1925 |
-
|
| 1926 |
-
return device == static_cast<ggml_backend_kompute_buffer_type_context *>(t.context)->device;
|
| 1927 |
-
});
|
| 1928 |
-
return it < bufts.end() ? &*it : nullptr;
|
| 1929 |
}
|
| 1930 |
|
| 1931 |
// backend
|
|
@@ -1953,16 +1938,6 @@ static ggml_status ggml_backend_kompute_graph_compute(ggml_backend_t backend, st
|
|
| 1953 |
return GGML_STATUS_SUCCESS;
|
| 1954 |
}
|
| 1955 |
|
| 1956 |
-
static bool ggml_backend_kompute_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
|
| 1957 |
-
GGML_UNUSED(backend);
|
| 1958 |
-
return ggml_vk_supports_op(op);
|
| 1959 |
-
}
|
| 1960 |
-
|
| 1961 |
-
static bool ggml_backend_kompute_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
|
| 1962 |
-
GGML_UNUSED(backend);
|
| 1963 |
-
return buft->iface.get_name == ggml_backend_kompute_buffer_type_get_name;
|
| 1964 |
-
}
|
| 1965 |
-
|
| 1966 |
static struct ggml_backend_i kompute_backend_i = {
|
| 1967 |
/* .get_name = */ ggml_backend_kompute_name,
|
| 1968 |
/* .free = */ ggml_backend_kompute_free,
|
|
@@ -1991,7 +1966,7 @@ ggml_backend_t ggml_backend_kompute_init(int device) {
|
|
| 1991 |
ggml_backend_t kompute_backend = new ggml_backend {
|
| 1992 |
/* .guid = */ ggml_backend_kompute_guid(),
|
| 1993 |
/* .interface = */ kompute_backend_i,
|
| 1994 |
-
/* .device = */
|
| 1995 |
/* .context = */ s_kompute_context,
|
| 1996 |
};
|
| 1997 |
|
|
@@ -2001,3 +1976,167 @@ ggml_backend_t ggml_backend_kompute_init(int device) {
|
|
| 2001 |
bool ggml_backend_is_kompute(ggml_backend_t backend) {
|
| 2002 |
return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_kompute_guid());
|
| 2003 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
#include <cstring>
|
| 43 |
#include <iostream>
|
| 44 |
#include <memory>
|
| 45 |
+
#include <mutex>
|
| 46 |
#include <stdexcept>
|
| 47 |
#include <string>
|
| 48 |
#include <unordered_map>
|
|
|
|
| 274 |
return results;
|
| 275 |
}
|
| 276 |
|
| 277 |
+
static std::vector<ggml_vk_device>& ggml_vk_available_devices() {
|
| 278 |
+
static std::vector<ggml_vk_device> devices = ggml_vk_available_devices_internal(0);
|
| 279 |
+
return devices;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
}
|
| 281 |
|
| 282 |
static void ggml_vk_filterByVendor(std::vector<ggml_vk_device>& devices, const std::string& targetVendor) {
|
|
|
|
| 333 |
if (!komputeManager()->hasDevice())
|
| 334 |
return ggml_vk_device();
|
| 335 |
|
| 336 |
+
auto devices = ggml_vk_available_devices();
|
| 337 |
ggml_vk_filterByName(devices, komputeManager()->physicalDevice()->getProperties().deviceName.data());
|
| 338 |
GGML_ASSERT(!devices.empty());
|
| 339 |
return devices.front();
|
|
|
|
| 1315 |
ggml_vk_cpy(spirv, 2, 4, std::forward<Args>(args)...);
|
| 1316 |
}
|
| 1317 |
|
| 1318 |
+
static bool ggml_backend_kompute_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1319 |
switch (op->op) {
|
| 1320 |
case GGML_OP_UNARY:
|
| 1321 |
switch (ggml_get_unary_op(op)) {
|
|
|
|
| 1392 |
;
|
| 1393 |
}
|
| 1394 |
return false;
|
| 1395 |
+
|
| 1396 |
+
GGML_UNUSED(dev);
|
| 1397 |
}
|
| 1398 |
|
| 1399 |
static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf) {
|
|
|
|
| 1442 |
|
| 1443 |
any_commands_recorded = true;
|
| 1444 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1445 |
const int32_t ne00 = src0 ? src0->ne[0] : 0;
|
| 1446 |
const int32_t ne01 = src0 ? src0->ne[1] : 0;
|
| 1447 |
const int32_t ne02 = src0 ? src0->ne[2] : 0;
|
|
|
|
| 1886 |
};
|
| 1887 |
|
| 1888 |
ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device) {
|
| 1889 |
+
static std::mutex mutex;
|
| 1890 |
+
std::lock_guard<std::mutex> lock(mutex);
|
| 1891 |
+
|
| 1892 |
+
auto devices = ggml_vk_available_devices();
|
| 1893 |
+
int32_t device_count = (int32_t) devices.size();
|
| 1894 |
+
GGML_ASSERT(device < device_count);
|
| 1895 |
+
GGML_ASSERT(devices.size() <= GGML_KOMPUTE_MAX_DEVICES);
|
| 1896 |
+
|
| 1897 |
+
static ggml_backend_buffer_type
|
| 1898 |
+
ggml_backend_kompute_buffer_types[GGML_KOMPUTE_MAX_DEVICES];
|
| 1899 |
+
|
| 1900 |
+
static bool ggml_backend_kompute_buffer_type_initialized = false;
|
| 1901 |
+
|
| 1902 |
+
if (!ggml_backend_kompute_buffer_type_initialized) {
|
| 1903 |
+
for (int32_t i = 0; i < device_count; i++) {
|
| 1904 |
+
ggml_backend_kompute_buffer_types[i] = {
|
| 1905 |
+
/* .iface = */ ggml_backend_kompute_buffer_type_interface,
|
| 1906 |
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_kompute_reg(), i),
|
| 1907 |
+
/* .context = */ new ggml_backend_kompute_buffer_type_context{ i, devices[i].bufferAlignment, devices[i].maxAlloc },
|
| 1908 |
+
};
|
| 1909 |
}
|
| 1910 |
+
ggml_backend_kompute_buffer_type_initialized = true;
|
| 1911 |
+
}
|
| 1912 |
|
| 1913 |
+
return &ggml_backend_kompute_buffer_types[device];
|
|
|
|
|
|
|
|
|
|
| 1914 |
}
|
| 1915 |
|
| 1916 |
// backend
|
|
|
|
| 1938 |
return GGML_STATUS_SUCCESS;
|
| 1939 |
}
|
| 1940 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1941 |
static struct ggml_backend_i kompute_backend_i = {
|
| 1942 |
/* .get_name = */ ggml_backend_kompute_name,
|
| 1943 |
/* .free = */ ggml_backend_kompute_free,
|
|
|
|
| 1966 |
ggml_backend_t kompute_backend = new ggml_backend {
|
| 1967 |
/* .guid = */ ggml_backend_kompute_guid(),
|
| 1968 |
/* .interface = */ kompute_backend_i,
|
| 1969 |
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_kompute_reg(), device),
|
| 1970 |
/* .context = */ s_kompute_context,
|
| 1971 |
};
|
| 1972 |
|
|
|
|
| 1976 |
bool ggml_backend_is_kompute(ggml_backend_t backend) {
|
| 1977 |
return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_kompute_guid());
|
| 1978 |
}
|
| 1979 |
+
|
| 1980 |
+
static size_t ggml_backend_kompute_get_device_count() {
|
| 1981 |
+
auto devices = ggml_vk_available_devices();
|
| 1982 |
+
return devices.size();
|
| 1983 |
+
}
|
| 1984 |
+
|
| 1985 |
+
static void ggml_backend_kompute_get_device_description(int device, char * description, size_t description_size) {
|
| 1986 |
+
auto devices = ggml_vk_available_devices();
|
| 1987 |
+
GGML_ASSERT((size_t) device < devices.size());
|
| 1988 |
+
snprintf(description, description_size, "%s", devices[device].name);
|
| 1989 |
+
}
|
| 1990 |
+
|
| 1991 |
+
static void ggml_backend_kompute_get_device_memory(int device, size_t * free, size_t * total) {
|
| 1992 |
+
auto devices = ggml_vk_available_devices();
|
| 1993 |
+
GGML_ASSERT((size_t) device < devices.size());
|
| 1994 |
+
*total = devices[device].heapSize;
|
| 1995 |
+
*free = devices[device].heapSize;
|
| 1996 |
+
}
|
| 1997 |
+
|
| 1998 |
+
//////////////////////////
|
| 1999 |
+
|
| 2000 |
+
struct ggml_backend_kompute_device_context {
|
| 2001 |
+
int device;
|
| 2002 |
+
std::string name;
|
| 2003 |
+
std::string description;
|
| 2004 |
+
};
|
| 2005 |
+
|
| 2006 |
+
static const char * ggml_backend_kompute_device_get_name(ggml_backend_dev_t dev) {
|
| 2007 |
+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
|
| 2008 |
+
return ctx->name.c_str();
|
| 2009 |
+
}
|
| 2010 |
+
|
| 2011 |
+
static const char * ggml_backend_kompute_device_get_description(ggml_backend_dev_t dev) {
|
| 2012 |
+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
|
| 2013 |
+
return ctx->description.c_str();
|
| 2014 |
+
}
|
| 2015 |
+
|
| 2016 |
+
static void ggml_backend_kompute_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
| 2017 |
+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
|
| 2018 |
+
ggml_backend_kompute_get_device_memory(ctx->device, free, total);
|
| 2019 |
+
}
|
| 2020 |
+
|
| 2021 |
+
static ggml_backend_buffer_type_t ggml_backend_kompute_device_get_buffer_type(ggml_backend_dev_t dev) {
|
| 2022 |
+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
|
| 2023 |
+
return ggml_backend_kompute_buffer_type(ctx->device);
|
| 2024 |
+
}
|
| 2025 |
+
|
| 2026 |
+
static bool ggml_backend_kompute_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
| 2027 |
+
if (buft->iface.get_name != ggml_backend_kompute_buffer_type_get_name) {
|
| 2028 |
+
return false;
|
| 2029 |
+
}
|
| 2030 |
+
|
| 2031 |
+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
|
| 2032 |
+
ggml_backend_kompute_buffer_type_context * buft_ctx = (ggml_backend_kompute_buffer_type_context *)buft->context;
|
| 2033 |
+
|
| 2034 |
+
return buft_ctx->device == ctx->device;
|
| 2035 |
+
}
|
| 2036 |
+
|
| 2037 |
+
static enum ggml_backend_dev_type ggml_backend_kompute_device_get_type(ggml_backend_dev_t dev) {
|
| 2038 |
+
GGML_UNUSED(dev);
|
| 2039 |
+
return GGML_BACKEND_DEVICE_TYPE_GPU;
|
| 2040 |
+
}
|
| 2041 |
+
|
| 2042 |
+
static void ggml_backend_kompute_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
|
| 2043 |
+
props->name = ggml_backend_kompute_device_get_name(dev);
|
| 2044 |
+
props->description = ggml_backend_kompute_device_get_description(dev);
|
| 2045 |
+
props->type = ggml_backend_kompute_device_get_type(dev);
|
| 2046 |
+
ggml_backend_kompute_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
| 2047 |
+
props->caps = {
|
| 2048 |
+
/* async = */ false,
|
| 2049 |
+
/* host_buffer = */ false,
|
| 2050 |
+
/* .buffer_from_host_ptr = */ false,
|
| 2051 |
+
/* events = */ false,
|
| 2052 |
+
};
|
| 2053 |
+
}
|
| 2054 |
+
|
| 2055 |
+
static ggml_backend_t ggml_backend_kompute_device_init(ggml_backend_dev_t dev, const char * params) {
|
| 2056 |
+
GGML_UNUSED(params);
|
| 2057 |
+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
|
| 2058 |
+
return ggml_backend_kompute_init(ctx->device);
|
| 2059 |
+
}
|
| 2060 |
+
|
| 2061 |
+
static bool ggml_backend_kompute_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
| 2062 |
+
const int min_batch_size = 32;
|
| 2063 |
+
|
| 2064 |
+
return (op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS) ||
|
| 2065 |
+
(op->ne[2] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID);
|
| 2066 |
+
|
| 2067 |
+
GGML_UNUSED(dev);
|
| 2068 |
+
}
|
| 2069 |
+
|
| 2070 |
+
static const struct ggml_backend_device_i ggml_backend_kompute_device_i = {
|
| 2071 |
+
/* .get_name = */ ggml_backend_kompute_device_get_name,
|
| 2072 |
+
/* .get_description = */ ggml_backend_kompute_device_get_description,
|
| 2073 |
+
/* .get_memory = */ ggml_backend_kompute_device_get_memory,
|
| 2074 |
+
/* .get_type = */ ggml_backend_kompute_device_get_type,
|
| 2075 |
+
/* .get_props = */ ggml_backend_kompute_device_get_props,
|
| 2076 |
+
/* .init_backend = */ ggml_backend_kompute_device_init,
|
| 2077 |
+
/* .get_buffer_type = */ ggml_backend_kompute_device_get_buffer_type,
|
| 2078 |
+
/* .get_host_buffer_type = */ NULL,
|
| 2079 |
+
/* .buffer_from_host_ptr = */ NULL,
|
| 2080 |
+
/* .supports_op = */ ggml_backend_kompute_device_supports_op,
|
| 2081 |
+
/* .supports_buft = */ ggml_backend_kompute_device_supports_buft,
|
| 2082 |
+
/* .offload_op = */ ggml_backend_kompute_device_offload_op,
|
| 2083 |
+
/* .event_new = */ NULL,
|
| 2084 |
+
/* .event_free = */ NULL,
|
| 2085 |
+
/* .event_synchronize = */ NULL,
|
| 2086 |
+
};
|
| 2087 |
+
|
| 2088 |
+
static const char * ggml_backend_kompute_reg_get_name(ggml_backend_reg_t reg) {
|
| 2089 |
+
GGML_UNUSED(reg);
|
| 2090 |
+
return "Kompute";
|
| 2091 |
+
}
|
| 2092 |
+
|
| 2093 |
+
static size_t ggml_backend_kompute_reg_get_device_count(ggml_backend_reg_t reg) {
|
| 2094 |
+
GGML_UNUSED(reg);
|
| 2095 |
+
return ggml_backend_kompute_get_device_count();
|
| 2096 |
+
}
|
| 2097 |
+
|
| 2098 |
+
static ggml_backend_dev_t ggml_backend_kompute_reg_get_device(ggml_backend_reg_t reg, size_t device) {
|
| 2099 |
+
static std::vector<ggml_backend_dev_t> devices;
|
| 2100 |
+
|
| 2101 |
+
static bool initialized = false;
|
| 2102 |
+
|
| 2103 |
+
{
|
| 2104 |
+
static std::mutex mutex;
|
| 2105 |
+
std::lock_guard<std::mutex> lock(mutex);
|
| 2106 |
+
if (!initialized) {
|
| 2107 |
+
for (size_t i = 0; i < ggml_backend_kompute_get_device_count(); i++) {
|
| 2108 |
+
ggml_backend_kompute_device_context * ctx = new ggml_backend_kompute_device_context;
|
| 2109 |
+
char desc[256];
|
| 2110 |
+
ggml_backend_kompute_get_device_description(i, desc, sizeof(desc));
|
| 2111 |
+
ctx->device = i;
|
| 2112 |
+
ctx->name = "Kompute" + std::to_string(i);
|
| 2113 |
+
ctx->description = desc;
|
| 2114 |
+
devices.push_back(new ggml_backend_device {
|
| 2115 |
+
/* .iface = */ ggml_backend_kompute_device_i,
|
| 2116 |
+
/* .reg = */ reg,
|
| 2117 |
+
/* .context = */ ctx,
|
| 2118 |
+
});
|
| 2119 |
+
}
|
| 2120 |
+
initialized = true;
|
| 2121 |
+
}
|
| 2122 |
+
}
|
| 2123 |
+
|
| 2124 |
+
GGML_ASSERT(device < devices.size());
|
| 2125 |
+
return devices[device];
|
| 2126 |
+
}
|
| 2127 |
+
|
| 2128 |
+
static const struct ggml_backend_reg_i ggml_backend_kompute_reg_i = {
|
| 2129 |
+
/* .get_name = */ ggml_backend_kompute_reg_get_name,
|
| 2130 |
+
/* .get_device_count = */ ggml_backend_kompute_reg_get_device_count,
|
| 2131 |
+
/* .get_device = */ ggml_backend_kompute_reg_get_device,
|
| 2132 |
+
/* .get_proc_address = */ NULL,
|
| 2133 |
+
};
|
| 2134 |
+
|
| 2135 |
+
ggml_backend_reg_t ggml_backend_kompute_reg() {
|
| 2136 |
+
static ggml_backend_reg reg = {
|
| 2137 |
+
/* .iface = */ ggml_backend_kompute_reg_i,
|
| 2138 |
+
/* .context = */ nullptr,
|
| 2139 |
+
};
|
| 2140 |
+
|
| 2141 |
+
return ®
|
| 2142 |
+
}
|