Spaces:
Running
Running
vulkan: select only one device for single gpu with multiple drivers (llama/7582)
Browse files- ggml-vulkan.cpp +78 -4
ggml-vulkan.cpp
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#include "ggml-vulkan.h"
|
| 2 |
-
|
| 3 |
#ifdef GGML_VULKAN_RUN_TESTS
|
| 4 |
#include <chrono>
|
| 5 |
#endif
|
|
@@ -9,12 +9,13 @@
|
|
| 9 |
#include <algorithm>
|
| 10 |
#include <cmath>
|
| 11 |
#include <iostream>
|
| 12 |
-
#include <limits>
|
| 13 |
#include <tuple>
|
| 14 |
#include <vector>
|
| 15 |
#include <sstream>
|
| 16 |
#include <utility>
|
| 17 |
#include <memory>
|
|
|
|
|
|
|
| 18 |
|
| 19 |
#include "ggml.h"
|
| 20 |
#include "ggml-backend-impl.h"
|
|
@@ -1555,8 +1556,10 @@ static void ggml_vk_print_gpu_info(size_t idx) {
|
|
| 1555 |
vk::PhysicalDeviceProperties2 props2;
|
| 1556 |
vk::PhysicalDeviceMaintenance3Properties props3;
|
| 1557 |
vk::PhysicalDeviceSubgroupProperties subgroup_props;
|
|
|
|
| 1558 |
props2.pNext = &props3;
|
| 1559 |
props3.pNext = &subgroup_props;
|
|
|
|
| 1560 |
physical_device.getProperties2(&props2);
|
| 1561 |
|
| 1562 |
const size_t subgroup_size = subgroup_props.subgroupSize;
|
|
@@ -1600,7 +1603,7 @@ static void ggml_vk_print_gpu_info(size_t idx) {
|
|
| 1600 |
fp16 = fp16 && vk12_features.shaderFloat16;
|
| 1601 |
|
| 1602 |
std::string device_name = props2.properties.deviceName.data();
|
| 1603 |
-
std::cerr << GGML_VK_NAME << idx << ": " << device_name << " | uma: " << uma << " | fp16: " << fp16 << " | warp size: " << subgroup_size << std::endl;
|
| 1604 |
|
| 1605 |
if (props2.properties.deviceType == vk::PhysicalDeviceType::eCpu) {
|
| 1606 |
std::cerr << "ggml_vulkan: Warning: Device type is CPU. This is probably not the device you want." << std::endl;
|
|
@@ -1696,7 +1699,78 @@ void ggml_vk_instance_init() {
|
|
| 1696 |
vk::PhysicalDeviceProperties props = devices[i].getProperties();
|
| 1697 |
|
| 1698 |
if (props.deviceType == vk::PhysicalDeviceType::eDiscreteGpu) {
|
| 1699 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1700 |
}
|
| 1701 |
}
|
| 1702 |
|
|
|
|
| 1 |
#include "ggml-vulkan.h"
|
| 2 |
+
#include <vulkan/vulkan_core.h>
|
| 3 |
#ifdef GGML_VULKAN_RUN_TESTS
|
| 4 |
#include <chrono>
|
| 5 |
#endif
|
|
|
|
| 9 |
#include <algorithm>
|
| 10 |
#include <cmath>
|
| 11 |
#include <iostream>
|
|
|
|
| 12 |
#include <tuple>
|
| 13 |
#include <vector>
|
| 14 |
#include <sstream>
|
| 15 |
#include <utility>
|
| 16 |
#include <memory>
|
| 17 |
+
#include <limits>
|
| 18 |
+
#include <map>
|
| 19 |
|
| 20 |
#include "ggml.h"
|
| 21 |
#include "ggml-backend-impl.h"
|
|
|
|
| 1556 |
vk::PhysicalDeviceProperties2 props2;
|
| 1557 |
vk::PhysicalDeviceMaintenance3Properties props3;
|
| 1558 |
vk::PhysicalDeviceSubgroupProperties subgroup_props;
|
| 1559 |
+
vk::PhysicalDeviceDriverProperties driver_props;
|
| 1560 |
props2.pNext = &props3;
|
| 1561 |
props3.pNext = &subgroup_props;
|
| 1562 |
+
subgroup_props.pNext = &driver_props;
|
| 1563 |
physical_device.getProperties2(&props2);
|
| 1564 |
|
| 1565 |
const size_t subgroup_size = subgroup_props.subgroupSize;
|
|
|
|
| 1603 |
fp16 = fp16 && vk12_features.shaderFloat16;
|
| 1604 |
|
| 1605 |
std::string device_name = props2.properties.deviceName.data();
|
| 1606 |
+
std::cerr << GGML_VK_NAME << idx << ": " << device_name << " (" << driver_props.driverName << ") | uma: " << uma << " | fp16: " << fp16 << " | warp size: " << subgroup_size << std::endl;
|
| 1607 |
|
| 1608 |
if (props2.properties.deviceType == vk::PhysicalDeviceType::eCpu) {
|
| 1609 |
std::cerr << "ggml_vulkan: Warning: Device type is CPU. This is probably not the device you want." << std::endl;
|
|
|
|
| 1699 |
vk::PhysicalDeviceProperties props = devices[i].getProperties();
|
| 1700 |
|
| 1701 |
if (props.deviceType == vk::PhysicalDeviceType::eDiscreteGpu) {
|
| 1702 |
+
// Check if there are two physical devices corresponding to the same GPU
|
| 1703 |
+
auto old_device = std::find_if(
|
| 1704 |
+
vk_instance.device_indices.begin(),
|
| 1705 |
+
vk_instance.device_indices.end(),
|
| 1706 |
+
[&devices, &props](const size_t k){ return devices[k].getProperties().deviceID == props.deviceID; }
|
| 1707 |
+
);
|
| 1708 |
+
if (old_device == vk_instance.device_indices.end()) {
|
| 1709 |
+
vk_instance.device_indices.push_back(i);
|
| 1710 |
+
} else {
|
| 1711 |
+
// There can be two physical devices corresponding to the same GPU if there are 2 different drivers
|
| 1712 |
+
// This can cause error when splitting layers aross the devices, need to keep only 1
|
| 1713 |
+
#ifdef GGML_VULKAN_DEBUG
|
| 1714 |
+
std::cerr << "Device " << i << " and device " << *old_device << " have the same device id" << std::endl;
|
| 1715 |
+
#endif
|
| 1716 |
+
|
| 1717 |
+
vk::PhysicalDeviceProperties2 old_prop;
|
| 1718 |
+
vk::PhysicalDeviceDriverProperties old_driver;
|
| 1719 |
+
old_prop.pNext = &old_driver;
|
| 1720 |
+
devices[*old_device].getProperties2(&old_prop);
|
| 1721 |
+
|
| 1722 |
+
vk::PhysicalDeviceProperties2 new_prop;
|
| 1723 |
+
vk::PhysicalDeviceDriverProperties new_driver;
|
| 1724 |
+
new_prop.pNext = &new_driver;
|
| 1725 |
+
devices[i].getProperties2(&new_prop);
|
| 1726 |
+
|
| 1727 |
+
std::map<vk::DriverId, int> driver_priorities {};
|
| 1728 |
+
int old_priority = std::numeric_limits<int>::max();
|
| 1729 |
+
int new_priority = std::numeric_limits<int>::max();
|
| 1730 |
+
|
| 1731 |
+
// Check https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkDriverId.html for the list of driver id
|
| 1732 |
+
// Smaller number -> higher priority
|
| 1733 |
+
switch (old_prop.properties.vendorID) {
|
| 1734 |
+
case VK_VENDOR_ID_AMD:
|
| 1735 |
+
driver_priorities[vk::DriverId::eMesaRadv] = 1;
|
| 1736 |
+
driver_priorities[vk::DriverId::eAmdOpenSource] = 2;
|
| 1737 |
+
driver_priorities[vk::DriverId::eAmdProprietary] = 3;
|
| 1738 |
+
break;
|
| 1739 |
+
case VK_VENDOR_ID_INTEL:
|
| 1740 |
+
driver_priorities[vk::DriverId::eIntelOpenSourceMESA] = 1;
|
| 1741 |
+
driver_priorities[vk::DriverId::eIntelProprietaryWindows] = 2;
|
| 1742 |
+
break;
|
| 1743 |
+
case VK_VENDOR_ID_NVIDIA:
|
| 1744 |
+
driver_priorities[vk::DriverId::eNvidiaProprietary] = 1;
|
| 1745 |
+
#if defined(VK_API_VERSION_1_3) && VK_HEADER_VERSION >= 235
|
| 1746 |
+
driver_priorities[vk::DriverId::eMesaNvk] = 2;
|
| 1747 |
+
#endif
|
| 1748 |
+
break;
|
| 1749 |
+
}
|
| 1750 |
+
|
| 1751 |
+
if (driver_priorities.count(old_driver.driverID)) {
|
| 1752 |
+
old_priority = driver_priorities[old_driver.driverID];
|
| 1753 |
+
}
|
| 1754 |
+
if (driver_priorities.count(new_driver.driverID)) {
|
| 1755 |
+
new_priority = driver_priorities[new_driver.driverID];
|
| 1756 |
+
}
|
| 1757 |
+
|
| 1758 |
+
if (new_priority < old_priority) {
|
| 1759 |
+
auto r = std::remove(vk_instance.device_indices.begin(), vk_instance.device_indices.end(), *old_device);
|
| 1760 |
+
vk_instance.device_indices.erase(r, vk_instance.device_indices.end());
|
| 1761 |
+
vk_instance.device_indices.push_back(i);
|
| 1762 |
+
|
| 1763 |
+
#ifdef GGML_VULKAN_DEBUG
|
| 1764 |
+
std::cerr << "Prioritize device " << i << " driver " << new_driver.driverName << " over device " << *old_device << " driver " << old_driver.driverName << std::endl;
|
| 1765 |
+
#endif
|
| 1766 |
+
}
|
| 1767 |
+
#ifdef GGML_VULKAN_DEBUG
|
| 1768 |
+
else {
|
| 1769 |
+
std::cerr << "Prioritize device " << *old_device << " driver " << old_driver.driverName << " over device " << i << " driver " << new_driver.driverName << std::endl;
|
| 1770 |
+
|
| 1771 |
+
}
|
| 1772 |
+
#endif
|
| 1773 |
+
}
|
| 1774 |
}
|
| 1775 |
}
|
| 1776 |
|