Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hungry feature #3949

Merged
merged 10 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions hi.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
q
85 changes: 63 additions & 22 deletions taskvine/src/manager/vine_manager.c
Original file line number Diff line number Diff line change
Expand Up @@ -5112,7 +5112,7 @@ struct vine_task *vine_manager_no_wait(struct vine_manager *q, const char *tag,
// check if workers' resources are available to execute more tasks queue should
// have at least MAX(hungry_minimum, hungry_minimum_factor * number of workers) ready tasks
//@param: struct vine_manager* - pointer to manager
//@return: 1 if hungry, 0 otherwise
//@return: approximate number of additional tasks if hungry, 0 otherwise
int vine_hungry(struct vine_manager *q)
{
// check if manager is initialized
Expand All @@ -5124,9 +5124,9 @@ int vine_hungry(struct vine_manager *q)
struct vine_stats qstats;
vine_get_stats(q, &qstats);

// if number of ready tasks is less than minimum, then queue is hungry
if (qstats.tasks_waiting < MAX(q->hungry_minimum, q->hungry_minimum_factor * hash_table_size(q->worker_table))) {
return 1;
// if number of ready tasks is the queue is less than the miniumum, then it is hungry
if (qstats.tasks_waiting < q->hungry_minimum) {
return q->hungry_minimum - qstats.tasks_waiting;
}

// get total available resources consumption (cores, memory, disk, gpus) of all workers of this manager
Expand All @@ -5135,36 +5135,33 @@ int vine_hungry(struct vine_manager *q)
int64_t workers_total_avail_memory = 0;
int64_t workers_total_avail_disk = 0;
int64_t workers_total_avail_gpus = 0;

workers_total_avail_cores = overcommitted_resource_total(q, q->stats->total_cores) - q->stats->committed_cores;
workers_total_avail_memory = overcommitted_resource_total(q, q->stats->total_memory) - q->stats->committed_memory;
workers_total_avail_gpus = overcommitted_resource_total(q, q->stats->total_gpus) - q->stats->committed_gpus;
workers_total_avail_disk = q->stats->total_disk - q->stats->committed_disk; // never overcommit disk

// get required resources (cores, memory, disk, gpus) of one waiting task
// Find available resources (2 * total - committed)
workers_total_avail_cores = 2 * qstats.total_cores - qstats.committed_cores;
workers_total_avail_memory = 2 * qstats.total_memory - qstats.committed_memory;
workers_total_avail_gpus = 2 * qstats.total_gpus - qstats.committed_gpus;
workers_total_avail_disk = 2 * qstats.total_disk - qstats.committed_disk; // never overcommit disk

// get required resources (cores, memory, disk, gpus) of one (all?) waiting tasks
// seems to iterate through all tasks counted in the queue.
int64_t ready_task_cores = 0;
int64_t ready_task_memory = 0;
int64_t ready_task_disk = 0;
int64_t ready_task_gpus = 0;

struct vine_task *t;

int count = task_state_count(q, NULL, VINE_TASK_READY);

while (count > 0) {
count--;
t = list_pop_head(q->ready_list);

LIST_ITERATE(q->ready_list, t)
{
ready_task_cores += MAX(1, t->resources_requested->cores);
ready_task_memory += t->resources_requested->memory;
ready_task_disk += t->resources_requested->disk;
ready_task_gpus += t->resources_requested->gpus;

list_push_tail(q->ready_list, t);
}

// check possible limiting factors
// return false if required resources exceed available resources
int count = task_state_count(q, NULL, VINE_TASK_READY);

int64_t avg_additional_tasks_cores, avg_additional_tasks_memory, avg_additional_tasks_disk, avg_additional_tasks_gpus;

if (ready_task_cores > workers_total_avail_cores) {
return 0;
}
Expand All @@ -5178,7 +5175,51 @@ int vine_hungry(struct vine_manager *q)
return 0;
}

return 1; // all good
if (ready_task_cores < 0)
ready_task_cores = 0;
if (ready_task_memory < 0)
ready_task_memory = 0;
if (ready_task_disk < 0)
ready_task_disk = 0;
if (ready_task_gpus < 0)
ready_task_gpus = 0;

if (count != 0) { // each statement counts the available (2*total - committed) and further subtracts the ready/in-queue tasks and then finds how mabny more
if (ready_task_cores != 0) {
avg_additional_tasks_cores = (workers_total_avail_cores - ready_task_cores) / (ready_task_cores / count);
} else {
avg_additional_tasks_cores = workers_total_avail_cores;
}
if (ready_task_memory != 0) {
avg_additional_tasks_memory = (workers_total_avail_memory - ready_task_memory) / (ready_task_memory / count);
} else {
avg_additional_tasks_memory = workers_total_avail_cores;
}
if (ready_task_disk != 0) {
avg_additional_tasks_disk = (workers_total_avail_disk - ready_task_disk) / (ready_task_disk / count);
} else {
avg_additional_tasks_disk = workers_total_avail_cores;
}
if (ready_task_gpus != 0) {
avg_additional_tasks_gpus = (workers_total_avail_gpus - ready_task_gpus) / (ready_task_gpus / count);
} else {
avg_additional_tasks_gpus = workers_total_avail_cores;
}
} else {
return workers_total_avail_cores; // this returns number of cores if no tasks in queue and we have resources.
}
// find the limiting factor
int64_t min = avg_additional_tasks_cores;
if (avg_additional_tasks_memory < min)
min = avg_additional_tasks_memory;
if (avg_additional_tasks_disk < min)
min = avg_additional_tasks_disk;
if (avg_additional_tasks_gpus < min)
min = avg_additional_tasks_gpus;
if (min < 0)
min = 0; // if for some reason we have a negative, just make it 0

return min;
}

int vine_workers_shutdown(struct vine_manager *q, int n)
Expand Down
Loading