Dou Xinpeng commited on
Commit
7cbca42
·
1 Parent(s): f77ad34

cann: Add host buffer type for Ascend NPU (llama/9406)

Browse files

* feat: Add host buffer type for Ascend NPU(CANN backend)

* fix some checking errors

* Add a few comments

Files changed (2) hide show
  1. ggml/include/ggml-cann.h +7 -0
  2. ggml/src/ggml-cann.cpp +110 -0
ggml/include/ggml-cann.h CHANGED
@@ -80,6 +80,13 @@ ggml_backend_cann_buffer_type(int32_t device);
80
  */
81
  GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
82
 
 
 
 
 
 
 
 
83
  /**
84
  * @brief Retrieves the description of a specific CANN device.
85
  *
 
80
  */
81
  GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
82
 
83
+ /**
84
+ * @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
85
+ *
86
+ * @return A pointer to the host buffer type interface.
87
+ */
88
+ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
89
+
90
  /**
91
  * @brief Retrieves the description of a specific CANN device.
92
  *
ggml/src/ggml-cann.cpp CHANGED
@@ -1221,6 +1221,116 @@ ggml_backend_cann_buffer_type(int32_t device) {
1221
  return &ggml_backend_cann_buffer_types[device];
1222
  }
1223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1224
  /**
1225
  * @brief Computes the forward operation for a given tensor using CANN
1226
  * operations.
 
1221
  return &ggml_backend_cann_buffer_types[device];
1222
  }
1223
 
1224
+ /**
1225
+ * @brief Retrieves the name associated with a CANN host buffer type.
1226
+ *
1227
+ * This function returns the descriptive name associated with the specified
1228
+ * CANN host buffer type context.
1229
+ *
1230
+ * @param buft Pointer to the host buffer type context.
1231
+ * @return Const pointer to the C-style string containing the name.
1232
+ */
1233
+ GGML_CALL static const char * ggml_backend_cann_host_buffer_type_name(ggml_backend_buffer_type_t buft) {
1234
+ return "CANN_Host";
1235
+
1236
+ GGML_UNUSED(buft);
1237
+ }
1238
+
1239
+ /**
1240
+ * @brief Retrieves the name associated with a CANN host buffer.
1241
+ *
1242
+ * This function returns the descriptive name associated with the specified
1243
+ * CANN host buffer context.
1244
+ *
1245
+ * @param buft Pointer to the host buffer context.
1246
+ * @return Const pointer to the C-style string containing the name.
1247
+ */
1248
+ GGML_CALL static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buffer) {
1249
+ return "CANN_Host";
1250
+
1251
+ GGML_UNUSED(buffer);
1252
+ }
1253
+
1254
+ /**
1255
+ * @brief Free resources associated with a CANN host buffer.
1256
+ *
1257
+ * This function frees the resources associated with a CANN host buffer, including
1258
+ * its context.
1259
+ *
1260
+ * @param buffer The CANN host buffer to free.
1261
+ */
1262
+ GGML_CALL static void ggml_backend_cann_host_buffer_free(ggml_backend_buffer_t buffer) {
1263
+ ACL_CHECK(aclrtFreeHost(buffer->context));
1264
+ }
1265
+
1266
+ /**
1267
+ * @brief Allocates a new CANN host buffer of the specified size.
1268
+ *
1269
+ * This function allocates a new CANN host buffer with the given size.
1270
+ * @param size Size in bytes of the host buffer to allocate.
1271
+ * @return Pointer to the allocated host buffer, or nullptr if allocation fails.
1272
+ */
1273
+ static void * ggml_cann_host_malloc(size_t size) {
1274
+ if (getenv("GGML_CANN_NO_PINNED") != nullptr) {
1275
+ return nullptr;
1276
+ }
1277
+
1278
+ void * hostPtr = nullptr;
1279
+ aclError err = aclrtMallocHost((void **) &hostPtr, size);
1280
+ if (err != ACL_SUCCESS) {
1281
+
1282
+ GGML_CANN_LOG_WARN("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__,
1283
+ size / 1024.0 / 1024.0, aclGetRecentErrMsg());
1284
+ return nullptr;
1285
+ }
1286
+ return hostPtr;
1287
+ }
1288
+
1289
+ /**
1290
+ * @brief Allocates a new CANN host buffer of the specified type and size.
1291
+ *
1292
+ * @param buft Pointer to the host buffer type context.
1293
+ * @param size Size in bytes of the host buffer to allocate.
1294
+ * @return Pointer to the allocated host buffer, or CPU buffer pointer if allocation fails.
1295
+ */
1296
+ GGML_CALL static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
1297
+ void * hostPtr = ggml_cann_host_malloc(size);
1298
+
1299
+ if (hostPtr == nullptr) {
1300
+ // fallback to cpu buffer
1301
+ return ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
1302
+ }
1303
+
1304
+ ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(hostPtr, size);
1305
+ buffer->buft = buft;
1306
+ buffer->iface.get_name = ggml_backend_cann_host_buffer_name;
1307
+ buffer->iface.free_buffer = ggml_backend_cann_host_buffer_free;
1308
+
1309
+ return buffer;
1310
+ }
1311
+
1312
+ /**
1313
+ * @brief Interface for managing CANN host buffer types in the GGML backend.
1314
+ *
1315
+ * Provides function pointers for allocating, querying properties, and managing
1316
+ * memory for CANN buffer types in the GGML backend.
1317
+ */
1318
+ GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
1319
+ static struct ggml_backend_buffer_type ggml_backend_cann_buffer_type_host = {
1320
+ /* .iface = */ {
1321
+ /* .get_name = */ ggml_backend_cann_host_buffer_type_name,
1322
+ /* .alloc_buffer = */ ggml_backend_cann_host_buffer_type_alloc_buffer,
1323
+ /* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
1324
+ /* .get_max_size = */ NULL, // defaults to SIZE_MAX
1325
+ /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
1326
+ /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
1327
+ },
1328
+ /* .context = */ nullptr,
1329
+ };
1330
+
1331
+ return &ggml_backend_cann_buffer_type_host;
1332
+ }
1333
+
1334
  /**
1335
  * @brief Computes the forward operation for a given tensor using CANN
1336
  * operations.