Spaces:
Running
Running
Diego Devesa
commited on
Commit
·
e9f5612
1
Parent(s):
a687ec3
sched : fix multiple evaluations of the same graph with pipeline parallelism (llama/14855)
Browse files
ggml/src/ggml-backend.cpp
CHANGED
|
@@ -647,6 +647,7 @@ struct ggml_backend_sched {
|
|
| 647 |
// pipeline parallelism support
|
| 648 |
int n_copies;
|
| 649 |
int cur_copy;
|
|
|
|
| 650 |
ggml_backend_event_t events[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
|
| 651 |
struct ggml_tensor * graph_inputs[GGML_SCHED_MAX_SPLIT_INPUTS];
|
| 652 |
int n_graph_inputs;
|
|
@@ -1433,8 +1434,6 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
|
| 1433 |
}
|
| 1434 |
}
|
| 1435 |
|
| 1436 |
-
sched->cur_copy = (sched->cur_copy + 1) % sched->n_copies;
|
| 1437 |
-
|
| 1438 |
return GGML_STATUS_SUCCESS;
|
| 1439 |
}
|
| 1440 |
|
|
@@ -1535,10 +1534,10 @@ void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
|
|
| 1535 |
bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
|
| 1536 |
GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
|
| 1537 |
|
| 1538 |
-
ggml_backend_sched_split_graph(sched, measure_graph);
|
| 1539 |
-
|
| 1540 |
ggml_backend_sched_synchronize(sched);
|
| 1541 |
|
|
|
|
|
|
|
| 1542 |
if (!ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
|
| 1543 |
return false;
|
| 1544 |
}
|
|
@@ -1550,6 +1549,10 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph *
|
|
| 1550 |
|
| 1551 |
bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
| 1552 |
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + graph->n_leafs);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1553 |
|
| 1554 |
ggml_backend_sched_split_graph(sched, graph);
|
| 1555 |
|
|
@@ -1590,7 +1593,7 @@ void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
|
|
| 1590 |
// if the graph is not already allocated, always use copy 0 after a synchronization
|
| 1591 |
// this ensures that during generation the same copy is used every time,
|
| 1592 |
// which avoids changes in the graph that could cause CUDA or other graphs to be disabled
|
| 1593 |
-
sched->
|
| 1594 |
}
|
| 1595 |
}
|
| 1596 |
|
|
|
|
| 647 |
// pipeline parallelism support
|
| 648 |
int n_copies;
|
| 649 |
int cur_copy;
|
| 650 |
+
int next_copy;
|
| 651 |
ggml_backend_event_t events[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
|
| 652 |
struct ggml_tensor * graph_inputs[GGML_SCHED_MAX_SPLIT_INPUTS];
|
| 653 |
int n_graph_inputs;
|
|
|
|
| 1434 |
}
|
| 1435 |
}
|
| 1436 |
|
|
|
|
|
|
|
| 1437 |
return GGML_STATUS_SUCCESS;
|
| 1438 |
}
|
| 1439 |
|
|
|
|
| 1534 |
bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
|
| 1535 |
GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
|
| 1536 |
|
|
|
|
|
|
|
| 1537 |
ggml_backend_sched_synchronize(sched);
|
| 1538 |
|
| 1539 |
+
ggml_backend_sched_split_graph(sched, measure_graph);
|
| 1540 |
+
|
| 1541 |
if (!ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
|
| 1542 |
return false;
|
| 1543 |
}
|
|
|
|
| 1549 |
|
| 1550 |
bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
| 1551 |
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + graph->n_leafs);
|
| 1552 |
+
GGML_ASSERT(!sched->is_alloc);
|
| 1553 |
+
|
| 1554 |
+
sched->cur_copy = sched->next_copy;
|
| 1555 |
+
sched->next_copy = (sched->next_copy + 1) % sched->n_copies;
|
| 1556 |
|
| 1557 |
ggml_backend_sched_split_graph(sched, graph);
|
| 1558 |
|
|
|
|
| 1593 |
// if the graph is not already allocated, always use copy 0 after a synchronization
|
| 1594 |
// this ensures that during generation the same copy is used every time,
|
| 1595 |
// which avoids changes in the graph that could cause CUDA or other graphs to be disabled
|
| 1596 |
+
sched->next_copy = 0;
|
| 1597 |
}
|
| 1598 |
}
|
| 1599 |
|