Skip to content

Commit

Permalink
fix: more overhead memory by default; better look-ahead vram unload
Browse files Browse the repository at this point in the history
  • Loading branch information
tazlin committed Oct 4, 2023
1 parent b1e0a5f commit 02e8b67
Showing 1 changed file with 16 additions and 9 deletions.
25 changes: 16 additions & 9 deletions horde_worker_regen/process_management/process_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ def __init__(
*,
ctx: BaseContext,
bridge_data: reGenBridgeData,
target_ram_overhead_bytes: int = 8 * 1024 * 1024 * 1024,
target_ram_overhead_bytes: int = 10 * 1024 * 1024 * 1024,
target_vram_overhead_bytes_map: Mapping[int, int] | None = None, # FIXME
max_inference_processes: int = 4,
max_safety_processes: int = 1,
Expand Down Expand Up @@ -935,15 +935,22 @@ def start_inference(self) -> None:
if process_info.loaded_horde_model_name is None:
continue

next_n_models = self.get_next_n_models(self.max_inference_processes)
next_n_models = list(self.get_next_n_models(self.max_inference_processes))

if process_info.loaded_horde_model_name not in next_n_models:
process_info.pipe_connection.send(
HordeControlModelMessage(
control_flag=HordeControlFlag.UNLOAD_MODELS_FROM_VRAM,
horde_model_name=process_info.loaded_horde_model_name,
),
)
# If the model would be used by another process soon, don't unload it
if (
self.max_concurrent_inference_processes > 1
and process_info.loaded_horde_model_name
in next_n_models[: self.max_concurrent_inference_processes - 1]
):
continue

process_info.pipe_connection.send(
HordeControlModelMessage(
control_flag=HordeControlFlag.UNLOAD_MODELS_FROM_VRAM,
horde_model_name=process_info.loaded_horde_model_name,
),
)
time.sleep(0.1)

logger.info(f"Starting inference for job {next_job.id_} on process {process_with_model.process_id}")
Expand Down

0 comments on commit 02e8b67

Please sign in to comment.