Spaces:
Running
Running
Dou Xinpeng
commited on
Commit
·
7cbca42
1
Parent(s):
f77ad34
cann: Add host buffer type for Ascend NPU (llama/9406)
Browse files* feat: Add host buffer type for Ascend NPU(CANN backend)
* fix some checking errors
* Add a few comments
- ggml/include/ggml-cann.h +7 -0
- ggml/src/ggml-cann.cpp +110 -0
ggml/include/ggml-cann.h
CHANGED
|
@@ -80,6 +80,13 @@ ggml_backend_cann_buffer_type(int32_t device);
|
|
| 80 |
*/
|
| 81 |
GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
/**
|
| 84 |
* @brief Retrieves the description of a specific CANN device.
|
| 85 |
*
|
|
|
|
| 80 |
*/
|
| 81 |
GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
|
| 82 |
|
| 83 |
+
/**
|
| 84 |
+
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
|
| 85 |
+
*
|
| 86 |
+
* @return A pointer to the host buffer type interface.
|
| 87 |
+
*/
|
| 88 |
+
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
|
| 89 |
+
|
| 90 |
/**
|
| 91 |
* @brief Retrieves the description of a specific CANN device.
|
| 92 |
*
|
ggml/src/ggml-cann.cpp
CHANGED
|
@@ -1221,6 +1221,116 @@ ggml_backend_cann_buffer_type(int32_t device) {
|
|
| 1221 |
return &ggml_backend_cann_buffer_types[device];
|
| 1222 |
}
|
| 1223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1224 |
/**
|
| 1225 |
* @brief Computes the forward operation for a given tensor using CANN
|
| 1226 |
* operations.
|
|
|
|
| 1221 |
return &ggml_backend_cann_buffer_types[device];
|
| 1222 |
}
|
| 1223 |
|
| 1224 |
+
/**
|
| 1225 |
+
* @brief Retrieves the name associated with a CANN host buffer type.
|
| 1226 |
+
*
|
| 1227 |
+
* This function returns the descriptive name associated with the specified
|
| 1228 |
+
* CANN host buffer type context.
|
| 1229 |
+
*
|
| 1230 |
+
* @param buft Pointer to the host buffer type context.
|
| 1231 |
+
* @return Const pointer to the C-style string containing the name.
|
| 1232 |
+
*/
|
| 1233 |
+
GGML_CALL static const char * ggml_backend_cann_host_buffer_type_name(ggml_backend_buffer_type_t buft) {
|
| 1234 |
+
return "CANN_Host";
|
| 1235 |
+
|
| 1236 |
+
GGML_UNUSED(buft);
|
| 1237 |
+
}
|
| 1238 |
+
|
| 1239 |
+
/**
|
| 1240 |
+
* @brief Retrieves the name associated with a CANN host buffer.
|
| 1241 |
+
*
|
| 1242 |
+
* This function returns the descriptive name associated with the specified
|
| 1243 |
+
* CANN host buffer context.
|
| 1244 |
+
*
|
| 1245 |
+
* @param buft Pointer to the host buffer context.
|
| 1246 |
+
* @return Const pointer to the C-style string containing the name.
|
| 1247 |
+
*/
|
| 1248 |
+
GGML_CALL static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buffer) {
|
| 1249 |
+
return "CANN_Host";
|
| 1250 |
+
|
| 1251 |
+
GGML_UNUSED(buffer);
|
| 1252 |
+
}
|
| 1253 |
+
|
| 1254 |
+
/**
|
| 1255 |
+
* @brief Free resources associated with a CANN host buffer.
|
| 1256 |
+
*
|
| 1257 |
+
* This function frees the resources associated with a CANN host buffer, including
|
| 1258 |
+
* its context.
|
| 1259 |
+
*
|
| 1260 |
+
* @param buffer The CANN host buffer to free.
|
| 1261 |
+
*/
|
| 1262 |
+
GGML_CALL static void ggml_backend_cann_host_buffer_free(ggml_backend_buffer_t buffer) {
|
| 1263 |
+
ACL_CHECK(aclrtFreeHost(buffer->context));
|
| 1264 |
+
}
|
| 1265 |
+
|
| 1266 |
+
/**
|
| 1267 |
+
* @brief Allocates a new CANN host buffer of the specified size.
|
| 1268 |
+
*
|
| 1269 |
+
* This function allocates a new CANN host buffer with the given size.
|
| 1270 |
+
* @param size Size in bytes of the host buffer to allocate.
|
| 1271 |
+
* @return Pointer to the allocated host buffer, or nullptr if allocation fails.
|
| 1272 |
+
*/
|
| 1273 |
+
static void * ggml_cann_host_malloc(size_t size) {
|
| 1274 |
+
if (getenv("GGML_CANN_NO_PINNED") != nullptr) {
|
| 1275 |
+
return nullptr;
|
| 1276 |
+
}
|
| 1277 |
+
|
| 1278 |
+
void * hostPtr = nullptr;
|
| 1279 |
+
aclError err = aclrtMallocHost((void **) &hostPtr, size);
|
| 1280 |
+
if (err != ACL_SUCCESS) {
|
| 1281 |
+
|
| 1282 |
+
GGML_CANN_LOG_WARN("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__,
|
| 1283 |
+
size / 1024.0 / 1024.0, aclGetRecentErrMsg());
|
| 1284 |
+
return nullptr;
|
| 1285 |
+
}
|
| 1286 |
+
return hostPtr;
|
| 1287 |
+
}
|
| 1288 |
+
|
| 1289 |
+
/**
|
| 1290 |
+
* @brief Allocates a new CANN host buffer of the specified type and size.
|
| 1291 |
+
*
|
| 1292 |
+
* @param buft Pointer to the host buffer type context.
|
| 1293 |
+
* @param size Size in bytes of the host buffer to allocate.
|
| 1294 |
+
* @return Pointer to the allocated host buffer, or CPU buffer pointer if allocation fails.
|
| 1295 |
+
*/
|
| 1296 |
+
GGML_CALL static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
| 1297 |
+
void * hostPtr = ggml_cann_host_malloc(size);
|
| 1298 |
+
|
| 1299 |
+
if (hostPtr == nullptr) {
|
| 1300 |
+
// fallback to cpu buffer
|
| 1301 |
+
return ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
|
| 1302 |
+
}
|
| 1303 |
+
|
| 1304 |
+
ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(hostPtr, size);
|
| 1305 |
+
buffer->buft = buft;
|
| 1306 |
+
buffer->iface.get_name = ggml_backend_cann_host_buffer_name;
|
| 1307 |
+
buffer->iface.free_buffer = ggml_backend_cann_host_buffer_free;
|
| 1308 |
+
|
| 1309 |
+
return buffer;
|
| 1310 |
+
}
|
| 1311 |
+
|
| 1312 |
+
/**
|
| 1313 |
+
* @brief Interface for managing CANN host buffer types in the GGML backend.
|
| 1314 |
+
*
|
| 1315 |
+
* Provides function pointers for allocating, querying properties, and managing
|
| 1316 |
+
* memory for CANN buffer types in the GGML backend.
|
| 1317 |
+
*/
|
| 1318 |
+
GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
|
| 1319 |
+
static struct ggml_backend_buffer_type ggml_backend_cann_buffer_type_host = {
|
| 1320 |
+
/* .iface = */ {
|
| 1321 |
+
/* .get_name = */ ggml_backend_cann_host_buffer_type_name,
|
| 1322 |
+
/* .alloc_buffer = */ ggml_backend_cann_host_buffer_type_alloc_buffer,
|
| 1323 |
+
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
|
| 1324 |
+
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
| 1325 |
+
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
| 1326 |
+
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
| 1327 |
+
},
|
| 1328 |
+
/* .context = */ nullptr,
|
| 1329 |
+
};
|
| 1330 |
+
|
| 1331 |
+
return &ggml_backend_cann_buffer_type_host;
|
| 1332 |
+
}
|
| 1333 |
+
|
| 1334 |
/**
|
| 1335 |
* @brief Computes the forward operation for a given tensor using CANN
|
| 1336 |
* operations.
|