Skip to content

Commit

Permalink
fix: don't count time with any job popped as "no job" time; add way t…
Browse files Browse the repository at this point in the history
…o suppress job time warnings (#301)

* fix: don't count no-job-time if a job is already popped

* fix: add way to suppress no-job time messages

* fix: missing type annotation for `minutes_allowed_without_jobs`

* fix: clarify the meaning of no-jobs-time message

* chore: version bump
  • Loading branch information
tazlin authored Sep 29, 2024
1 parent 78c79ad commit da894e8
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 20 deletions.
2 changes: 1 addition & 1 deletion bridgeData_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ models_to_skip:
# Suppress speed warnings if jobs are taking too long.
# Note: If you are getting these messages, you are serving jobs much slower than ideal.
# Lower your max_power for more kudos/hr.
suppress_speed_warnings: false # Currently unused in reGen
suppress_speed_warnings: false

# Exit if an unhandled fault occurs. Useful for setting up the worker as a system service.
exit_on_unhandled_faults: false
Expand Down
2 changes: 1 addition & 1 deletion horde_worker_regen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

ASSETS_FOLDER_PATH = Path(__file__).parent / "assets"

__version__ = "9.0.6"
__version__ = "9.0.7"


import pkg_resources # noqa: E402
Expand Down
2 changes: 1 addition & 1 deletion horde_worker_regen/_version_meta.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"recommended_version": "9.0.6",
"recommended_version": "9.0.7",
"required_min_version": "9.0.2",
"required_min_version_update_date": "2024-09-26",
"required_min_version_info": {
Expand Down
2 changes: 2 additions & 0 deletions horde_worker_regen/bridge_data/data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class reGenBridgeData(CombinedHordeBridgeData):
download_timeout: int = Field(default=TOTAL_LORA_DOWNLOAD_TIMEOUT + 1)
preload_timeout: int = Field(default=60, ge=15)

minutes_allowed_without_jobs: int = Field(default=30, ge=0, lt=60 * 60)

horde_model_stickiness: float = Field(default=0.0, le=1.0, ge=0.0, alias="model_stickiness")
"""
A percent chance (expressed as a decimal between 0 and 1) that the currently loaded models will
Expand Down
42 changes: 26 additions & 16 deletions horde_worker_regen/process_management/process_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3408,15 +3408,14 @@ async def api_job_pop(self) -> None:
info_string += f"(Skipped reasons: {job_pop_response.skipped.model_dump(exclude_defaults=True)})"

if job_pop_response.id_ is None:
self._last_pop_no_jobs_available = True
logger.info(info_string)
cur_time = time.time()
if self._last_pop_no_jobs_available_time == 0.0:
self._last_pop_no_jobs_available_time = cur_time
if len(self.job_deque) == 0:
if self._last_pop_no_jobs_available_time == 0.0:
self._last_pop_no_jobs_available_time = cur_time

self._time_spent_no_jobs_available += cur_time - self._last_pop_no_jobs_available_time
self._last_pop_no_jobs_available_time = cur_time

self._last_pop_no_jobs_available = True
self._time_spent_no_jobs_available += cur_time - self._last_pop_no_jobs_available_time
self._last_pop_no_jobs_available_time = cur_time
return

self.job_faults[job_pop_response.id_] = []
Expand Down Expand Up @@ -3889,7 +3888,8 @@ def _print_deadlock_info() -> None:

def print_status_method(self) -> None:
"""Print the status of the worker if it's time to do so."""
if time.time() - self._last_status_message_time > self._status_message_frequency:
cur_time = time.time()
if cur_time - self._last_status_message_time > self._status_message_frequency:
process_info_strings = self._process_map.get_process_info_strings()
logger.info("Process info:")
for process_info_string in process_info_strings:
Expand Down Expand Up @@ -4023,7 +4023,7 @@ def print_status_method(self) -> None:
)

if self._too_many_consecutive_failed_jobs:
time_since_failure = time.time() - self._too_many_consecutive_failed_jobs_time
time_since_failure = cur_time - self._too_many_consecutive_failed_jobs_time
logger.error(
"Too many consecutive failed jobs. This may be due to a misconfiguration or other issue. "
"Please check your logs and configuration.",
Expand All @@ -4033,17 +4033,27 @@ def print_status_method(self) -> None:
f"{self._too_many_consecutive_failed_jobs_wait_time} seconds must pass before resuming.",
)

if self._time_spent_no_jobs_available > 60 * 5:
logger.warning(
"Your worker spent more than 5 minutes without jobs. This may be due to low demand. "
"However, offering more models or increasing your max_power may help increase the number of jobs "
"you receive.",
)
minutes_allowed_without_jobs = self.bridge_data.minutes_allowed_without_jobs
seconds_allowed_without_jobs = minutes_allowed_without_jobs * 60
cur_session_minutes = (cur_time - self.session_start_time) / 60
if self._time_spent_no_jobs_available > seconds_allowed_without_jobs:
if not self.bridge_data.suppress_speed_warnings:
logger.warning(
f"Your worker spent more than {minutes_allowed_without_jobs} minutes combined throughout this "
f"session ({cur_session_minutes:.2f} minutes) "
"without jobs. This may be due to low demand. However, offering more models or increasing "
"your max_power may help increase the number of jobs you receive and reduce downtime.",
)
else:
logger.debug(
"Suppressed warning about time spent without jobs "
f"for {minutes_allowed_without_jobs} minutes",
)

if self._shutting_down:
logger.warning("Shutting down after current jobs are finished...")

self._last_status_message_time = time.time()
self._last_status_message_time = cur_time

_bridge_data_loop_interval = 1.0
_last_bridge_data_reload_time = 0.0
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "horde_worker_regen"
version = "9.0.6"
version = "9.0.7"
description = "Allows you to connect to the AI Horde and generate images for users."
authors = [
{name = "tazlin", email = "[email protected]"},
Expand Down

0 comments on commit da894e8

Please sign in to comment.