From 4143534346b85688c50d2dcf94552311fdccd3c3 Mon Sep 17 00:00:00 2001 From: fubuloubu <3859395+fubuloubu@users.noreply.github.com> Date: Wed, 31 Jul 2024 21:34:24 -0400 Subject: [PATCH] refactor: lots of changes; env -> vg, bot -> bots, api updates, hacks, etc. --- silverback/_cli.py | 315 +++++++++++++++++++---------------- silverback/cluster/client.py | 92 +++++++--- silverback/cluster/types.py | 229 +++++++++++++++++-------- 3 files changed, 391 insertions(+), 245 deletions(-) diff --git a/silverback/_cli.py b/silverback/_cli.py index 301a1885..5e0dbcea 100644 --- a/silverback/_cli.py +++ b/silverback/_cli.py @@ -249,35 +249,36 @@ def new_cluster( # TODO: Test payment w/ Signature validation of extra data -@cluster.command(name="status") -def cluster_status(client: ClusterClient): - """ - Get Status information about a CLUSTER +@cluster.command(name="info") +def cluster_info(client: ClusterClient): + """Get Configuration information about a CLUSTER""" - For clusters on the Silverback Platform, please provide a name for the cluster to access using - your platform authentication obtained via `silverback login` in `workspace/cluster-name` format + # NOTE: This actually doesn't query the cluster's routes, which are protected + click.echo(f"Cluster Version: v{client.version}") - NOTE: Connecting directly to clusters is supported, but is an advanced use case. - """ - click.echo(render_dict_as_yaml(client.build_display_fields())) + if config := client.state.configuration: + click.echo(yaml.safe_dump(config.model_dump())) + + else: + click.secho("No Cluster Configuration detected", fg="yellow", bold=True) -@cluster.group(cls=OrderedCommands) -def env(): - """Commands for managing environment variables in CLUSTER""" +@cluster.group() +def vars(): + """Manage groups of environment variables in a CLUSTER""" def parse_envvars(ctx, name, value: list[str]) -> dict[str, str]: def parse_envar(item: str): if not ("=" in item and len(item.split("=")) == 2): - raise click.UsageError("Value '{item}' must be in form `NAME=VAL`") + raise click.UsageError(f"Value '{item}' must be in form `NAME=VAL`") return item.split("=") return dict(parse_envar(item) for item in value) -@env.command(name="new") +@vars.command(name="new") @click.option( "-e", "--env", @@ -289,42 +290,40 @@ def parse_envar(item: str): help="Environment variable key and value to add (Multiple allowed)", ) @click.argument("name") -def new_env(client: ClusterClient, variables: dict, name: str): - """Create a new GROUP of environment variables in CLUSTER""" +def new_vargroup(client: ClusterClient, variables: dict, name: str): + """Create a new group of environment variables in a CLUSTER""" + if len(variables) == 0: raise click.UsageError("Must supply at least one var via `-e`") - click.echo(render_dict_as_yaml(client.new_env(name=name, variables=variables))) + vg = client.new_variable_group(name=name, variables=variables) + click.echo(yaml.safe_dump(vg.model_dump(exclude={"id"}))) # NOTE: Skip machine `.id` - click.echo(yaml.safe_dump(vg.model_dump(exclude={"id"}))) # NOTE: Skip machine `.id` +@vars.command(name="list") +def list_vargroups(client: ClusterClient): + """List latest revisions of all variable groups in a CLUSTER""" -@env.command(name="list") -def list_envs(client: ClusterClient): - """List latest revisions of all variable groups in CLUSTER""" - if all_envs := render_dict_as_yaml(client.envs): - click.echo(all_envs) + if group_names := list(client.variable_groups): + click.echo(yaml.safe_dump(group_names)) else: - click.secho("No envs in this cluster", bold=True, fg="red") + click.secho("No Variable Groups present in this cluster", bold=True, fg="red") -@env.command() +@vars.command(name="info") @click.argument("name") -@click.argument("new_name") -def change_name(client: ClusterClient, name: str, new_name: str): - """Change the display name of a variable GROUP in CLUSTER""" - if not (env := client.envs.get(name)): +def vargroup_info(client: ClusterClient, name: str): + """Show latest revision of a variable GROUP in a CLUSTER""" + + if not (vg := client.variable_groups.get(name)): raise click.UsageError(f"Unknown Variable Group '{name}'") - click.echo( - yaml.safe_dump( - env.update(name=new_slug).model_dump(exclude={"id"}) # NOTE: Skip machine `.id` - ) - ) + click.echo(yaml.safe_dump(vg.model_dump(exclude={"id", "name"}))) -@env.command(name="set") +@vars.command(name="update") +@click.option("--new-name", "new_name") # NOTE: No `-n` to match `bots update` @click.option( "-e", "--env", @@ -345,78 +344,73 @@ def change_name(client: ClusterClient, name: str, new_name: str): help="Environment variable name to delete (Multiple allowed)", ) @click.argument("name") -def set_env( +def update_vargroup( client: ClusterClient, name: str, + new_name: str, updated_vars: dict[str, str], deleted_vars: tuple[str], ): - """Create a new revision of GROUP in CLUSTER with updated values""" - if dup := "', '".join(set(updated_vars) & set(deleted_vars)): - raise click.UsageError(f"Cannot update and delete vars at the same time: '{dup}'") + """Update a variable GROUP in CLUSTER - if not (env := client.envs.get(name)): + NOTE: Changing the values of variables in GROUP by create a new revision, since variable groups + are immutable. New revisions do not automatically update bot configuration.""" + + if not (vg := client.variable_groups.get(name)): raise click.UsageError(f"Unknown Variable Group '{name}'") - if missing := "', '".join(set(deleted_vars) - set(env.variables)): - raise click.UsageError(f"Cannot delete vars not in env: '{missing}'") + if dup := "', '".join(set(updated_vars) & set(deleted_vars)): + raise click.UsageError(f"Cannot update and delete vars at the same time: '{dup}'") + + if missing := "', '".join(set(deleted_vars) - set(vg.variables)): + raise click.UsageError(f"Cannot delete vars not in group: '{missing}'") click.echo( - render_dict_as_yaml( - env.add_revision(dict(**updated_vars, **{v: None for v in deleted_vars})) + yaml.safe_dump( + vg.update( + name=new_name, + # NOTE: Do not update variables if no updates are provided + variables=dict(**updated_vars, **{v: None for v in deleted_vars}) or None, + ).model_dump( + exclude={"id"} + ) # NOTE: Skip machine `.id` ) ) -@env.command(name="show") +@vars.command(name="remove") @click.argument("name") -@click.option("-r", "--revision", type=int, help="Revision of GROUP to show (Defaults to latest)") -def show_env(client: ClusterClient, name: str, revision: int | None): - """Show all variables in latest revision of GROUP in CLUSTER""" - if not (env := client.envs.get(name)): - raise click.UsageError(f"Unknown Variable Group '{name}'") - - for env_info in env.revisions: - if revision is None or env_info.revision == revision: - click.echo(render_dict_as_yaml(env_info)) - return - - raise click.UsageError(f"Revision {revision} of '{name}' not found") - - -@env.command(name="rm") -@click.argument("name") -def remove_env(client: ClusterClient, name: str): +def remove_vargroup(client: ClusterClient, name: str): """ Remove a variable GROUP from a CLUSTER NOTE: Cannot delete if any bots reference any revision of GROUP """ - if not (env := client.envs.get(name)): + if not (vg := client.variable_groups.get(name)): raise click.UsageError(f"Unknown Variable Group '{name}'") - env.remove() - click.secho(f"Variable Group '{env.name}' removed.", fg="green", bold=True) + vg.remove() # NOTE: No confirmation because can only delete if no references exist + click.secho(f"Variable Group '{vg.name}' removed.", fg="green", bold=True) -@cluster.group(cls=OrderedCommands) -def bot(): - """Commands for managing bots in a CLUSTER""" +@cluster.group() +def bots(): + """Manage bots in a CLUSTER""" -@bot.command(name="new") -@click.option("-n", "--name", required=True) +@bots.command(name="new", section="Configuration Commands") @click.option("-i", "--image", required=True) @click.option("-n", "--network", required=True) @click.option("-a", "--account") @click.option("-g", "--group", "groups", multiple=True) +@click.argument("name") def new_bot( client: ClusterClient, - name: str, image: str, network: str, account: str | None, groups: list[str], + name: str, ): """Create a new bot in a CLUSTER with the given configuration""" @@ -424,132 +418,159 @@ def new_bot( raise click.UsageError(f"Cannot use name '{name}' to create bot") environment = list() - rendered_environment = dict() - for env_id in groups: - if "/" in env_id: - env_name, revision = env_id.split("/") - env = client.envs[env_name].revisions[int(revision)] + for vg_id in groups: + if "/" in vg_id: + vg_name, revision = vg_id.split("/") + vg = client.variable_groups[vg_name].get_revision(int(revision)) else: - env = client.envs[env_id] - - environment.append(env) + vg = client.variable_groups[vg_id] - for var_name in env.variables: - rendered_environment[var_name] = f"{env.name}/{env.revision}" + environment.append(vg) - display = render_dict_as_yaml(rendered_environment, prepend="\n ") - click.echo(f"Environment:\n {display}") + click.echo(f"Name: {name}") + click.echo(f"Image: {image}") + click.echo(f"Network: {network}") + if environment: + click.echo("Environment:") + click.echo(yaml.safe_dump([var for vg in environment for var in vg.variables])) if not click.confirm("Do you want to create this bot?"): - return + bot = client.new_bot(name, image, network, account=account, environment=environment) + click.secho(f"Bot '{bot.name}' ({bot.id}) deploying...", fg="green", bold=True) - bot = client.new_bot(name, image, network, account=account, environment=environment) - click.secho(f"Bot ({bot.id}) deploying...", fg="green", bold=True) - -@bot.command(name="list") +@bots.command(name="list", section="Configuration Commands") def list_bots(client: ClusterClient): """List all bots in a CLUSTER (Regardless of status)""" - For clusters on the Silverback Platform, please provide a name for the cluster to access using - your platform authentication obtained via `silverback login` in `workspace/cluster-name` format - - NOTE: Connecting directly to clusters is supported, but is an advanced use case. - """ - if bot_display := render_dict_as_yaml(client.bots): - click.echo(bot_display) + if bot_names := list(client.bots): + click.echo(yaml.safe_dump(bot_names)) else: click.secho("No bots in this cluster", bold=True, fg="red") -@bot.command(name="status") +@bots.command(name="info", section="Configuration Commands") @click.argument("bot_name", metavar="BOT") -def show_bot_status(client: ClusterClient, bot_name: str): - """Show status of BOT in CLUSTER""" +def bot_info(client: ClusterClient, bot_name: str): + """Get configuration information of a BOT in a CLUSTER""" if not (bot := client.bots.get(bot_name)): raise click.UsageError(f"Unknown bot '{bot_name}'.") - click.echo(render_dict_as_yaml(bot)) + # NOTE: Skip machine `.id`, and we already know it is `.name` + click.echo(yaml.safe_dump(bot.model_dump(exclude={"id", "name", "environment"}))) + if bot.environment: + click.echo("environment:") + click.echo(yaml.safe_dump([var.name for var in bot.environment])) -@bot.command(name="update") -@click.option("-n", "--name", "new_name") +@bots.command(name="update", section="Configuration Commands") +@click.option("--new-name", "new_name") # NOTE: No shorthand, because conflicts w/ `--network` @click.option("-i", "--image") @click.option("-n", "--network") @click.option("-a", "--account") @click.option("-g", "--group", "groups", multiple=True) -@click.argument("bot_name", metavar="BOT") +@click.argument("name", metavar="BOT") def update_bot( client: ClusterClient, - bot_name: str, - new_name: str, - image: str, - network: str, + new_name: str | None, + image: str | None, + network: str | None, account: str | None, groups: list[str], + name: str, ): """Update configuration of BOT in CLUSTER NOTE: Some configuration updates will trigger a redeploy""" if new_name in client.bots: - raise click.UsageError(f"Cannot use name '{new_name}' to update bot '{bot_name}'") + raise click.UsageError(f"Cannot use name '{new_name}' to update bot '{name}'") - if not (bot := client.bots.get(bot_name)): - raise click.UsageError(f"Unknown bot '{bot_name}'.") + if not (bot := client.bots.get(name)): + raise click.UsageError(f"Unknown bot '{name}'.") + + if new_name: + click.echo(f"Name:\n old: {name}\n new: {new_name}") + + if network: + click.echo(f"Network:\n old: {bot.network}\n new: {network}") + + redeploy_required = False + if image: + redeploy_required = True + click.echo(f"Image:\n old: {bot.image}\n new: {image}") environment = list() - rendered_environment = dict() - for env_id in groups: - if "/" in env_id: - env_name, revision = env_id.split("/") - env = client.envs[env_name].revisions[int(revision)] + for vg_id in groups: + if "/" in vg_id: + vg_name, revision = vg_id.split("/") + vg = client.variable_groups[vg_name].get_revision(int(revision)) else: - env = client.envs[env_id] - - environment.append(env) + vg = client.variable_groups[vg_id] - for var_name in env.variables: - rendered_environment[var_name] = f"{env.name}/{env.revision}" + environment.append(vg) set_environment = True - if len(environment) == 0: + if len(environment) == 0 and bot.environment: set_environment = click.confirm("Do you want to clear all environment variables?") - else: - display = render_dict_as_yaml(rendered_environment, prepend="\n ") - click.echo(f"Environment:\n {display}") - - if not click.confirm("Do you want to create this bot?"): - return - - bot.update( - name=new_name, - image=image, - network=network, - account=account, - environment=environment if set_environment else None, - ) + elif environment != bot.environment: + click.echo("old-environment:") + click.echo(yaml.safe_dump([var.name for var in bot.environment])) + click.echo("new-environment:") + click.echo(yaml.safe_dump([var for vg in environment for var in vg.variables])) + + redeploy_required |= set_environment + + if click.confirm( + f"Do you want to update '{name}'?" + if not redeploy_required + else f"Do you want to update and redeploy '{name}'?" + ): + bot = bot.update( + name=new_name, + image=image, + network=network, + account=account, + environment=environment if set_environment else None, + ) + # NOTE: Skip machine `.id` + click.echo(yaml.safe_dump(bot.model_dump(exclude={"id", "environment"}))) + if bot.environment: + click.echo("environment:") + click.echo(yaml.safe_dump([var.name for var in bot.environment])) -@bot.command(name="rm") +@bots.command(name="remove", section="Configuration Commands") @click.argument("name", metavar="BOT") -def rm_bot(client: ClusterClient, name: str): - """Remove BOT from CLUSTER""" +def remove_bot(client: ClusterClient, name: str): + """Remove BOT from CLUSTER (Shutdown if running)""" if not (bot := client.bots.get(name)): raise click.UsageError(f"Unknown bot '{name}'.") - bot.remove() - click.secho(f"Bot '{bot.name}' removed.", fg="green", bold=True) + elif click.confirm(f"Do you want to shutdown and delete '{name}'?"): + bot.remove() + click.secho(f"Bot '{bot.name}' removed.", fg="green", bold=True) + + +@bots.command(name="health", section="Bot Operation Commands") +@click.argument("bot_name", metavar="BOT") +def bot_health(client: ClusterClient, bot_name: str): + """Show current health of BOT in a CLUSTER""" + + if not (bot := client.bots.get(bot_name)): + raise click.UsageError(f"Unknown bot '{bot_name}'.") + + click.echo(yaml.safe_dump(bot.health.model_dump(exclude={"bot_id"}))) -@bot.command(name="start") +@bots.command(name="start", section="Bot Operation Commands") @click.argument("name", metavar="BOT") def start_bot(client: ClusterClient, name: str): """Start BOT running in CLUSTER (if stopped or terminated)""" @@ -557,10 +578,12 @@ def start_bot(client: ClusterClient, name: str): if not (bot := client.bots.get(name)): raise click.UsageError(f"Unknown bot '{name}'.") - bot.start() + elif click.confirm(f"Do you want to start running '{name}'?"): + bot.start() + click.secho(f"Bot '{bot.name}' starting...", fg="green", bold=True) -@bot.command(name="stop") +@bots.command(name="stop", section="Bot Operation Commands") @click.argument("name", metavar="BOT") def stop_bot(client: ClusterClient, name: str): """Stop BOT from running in CLUSTER (if running)""" @@ -568,10 +591,12 @@ def stop_bot(client: ClusterClient, name: str): if not (bot := client.bots.get(name)): raise click.UsageError(f"Unknown bot '{name}'.") - bot.stop() + elif click.confirm(f"Do you want to stop '{name}' from running?"): + bot.stop() + click.secho(f"Bot '{bot.name}' stopping...", fg="green", bold=True) -@bot.command(name="logs") +@bots.command(name="logs", section="Bot Operation Commands") @click.argument("name", metavar="BOT") def show_bot_logs(client: ClusterClient, name: str): """Show runtime logs for BOT in CLUSTER""" @@ -583,7 +608,7 @@ def show_bot_logs(client: ClusterClient, name: str): click.echo(log) -@bot.command(name="errors") +@bots.command(name="errors", section="Bot Operation Commands") @click.argument("name", metavar="BOT") def show_bot_errors(client: ClusterClient, name: str): """Show unacknowledged errors for BOT in CLUSTER""" diff --git a/silverback/cluster/client.py b/silverback/cluster/client.py index 817385d8..73aba9d5 100644 --- a/silverback/cluster/client.py +++ b/silverback/cluster/client.py @@ -5,7 +5,16 @@ from silverback.version import version -from .types import BotInfo, ClusterConfiguration, ClusterInfo, ClusterState, EnvInfo, WorkspaceInfo +from .types import ( + BotHealth, + BotInfo, + ClusterConfiguration, + ClusterHealth, + ClusterInfo, + ClusterState, + VariableGroupInfo, + WorkspaceInfo, +) DEFAULT_HEADERS = {"User-Agent": f"Silverback SDK/{version}"} @@ -44,25 +53,34 @@ def render_error(error: dict): assert response.status_code < 300, "Should follow redirects, so not sure what the issue is" -class Env(EnvInfo): +class VariableGroup(VariableGroupInfo): # NOTE: Client used only for this SDK # NOTE: DI happens in `ClusterClient.__init__` cluster: ClassVar["ClusterClient"] - def update(self, name: str | None = None): - response = self.cluster.put(f"/variables/{self.id}", json=dict(name=name)) - handle_error_with_response(response) - - @property - def revisions(self) -> list[EnvInfo]: - response = self.cluster.get(f"/variables/{self.id}") - handle_error_with_response(response) - return [EnvInfo.model_validate(env_info) for env_info in response.json()] + def __hash__(self) -> int: + return int(self.id) - def add_revision(self, variables: dict[str, str | None]) -> "Env": - response = self.cluster.post(f"/variables/{self.id}", json=dict(variables=variables)) + def update( + self, name: str | None = None, variables: dict[str, str | None] | None = None + ) -> "VariableGroup": + if name is not None: + # Update metadata + response = self.cluster.put(f"/variables/{self.id}", json=dict(name=name)) + handle_error_with_response(response) + + if variables is not None: + # Create a new revision + response = self.cluster.post(f"/variables/{self.id}", json=dict(variables=variables)) + handle_error_with_response(response) + return VariableGroup.model_validate(response.json()) + + return self + + def get_revision(self, revision: int) -> VariableGroupInfo: + response = self.cluster.get(f"/variables/{self.id}/{revision}") handle_error_with_response(response) - return Env.model_validate(response.json()) + return VariableGroupInfo.model_validate(response.json()) def remove(self): response = self.cluster.delete(f"/variables/{self.id}") @@ -80,13 +98,13 @@ def update( image: str | None = None, network: str | None = None, account: str | None = None, - environment: list[EnvInfo] | None = None, - ): + environment: list[VariableGroupInfo] | None = None, + ) -> "Bot": form: dict = dict( name=name, - network=network, account=account, image=image, + network=network, ) if environment: @@ -96,13 +114,24 @@ def update( response = self.cluster.put(f"/bots/{self.id}", json=form) handle_error_with_response(response) + return Bot.model_validate(response.json()) + + @property + def health(self) -> BotHealth: + response = self.cluster.get("/health") # TODO: Migrate this endpoint + # response = self.cluster.get(f"/bots/{self.id}/health") + handle_error_with_response(response) + raw_health = next(bot for bot in response.json()["bots"] if bot["bot_id"] == str(self.id)) + return BotHealth.model_validate(raw_health) # response.json()) TODO: Migrate this endpoint def stop(self): response = self.cluster.post(f"/bots/{self.id}/stop") handle_error_with_response(response) def start(self): - response = self.cluster.post(f"/bots/{self.id}/start") + # response = self.cluster.post(f"/bots/{self.id}/start") TODO: Add `/start` + # NOTE: Currently, a noop PUT request will trigger a start + response = self.cluster.put(f"/bots/{self.id}", json=dict(name=self.name)) handle_error_with_response(response) @property @@ -128,7 +157,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # DI for other client classes - Env.cluster = self # Connect to cluster client + VariableGroup.cluster = self # Connect to cluster client Bot.cluster = self # Connect to cluster client def send(self, request, *args, **kwargs): @@ -143,6 +172,11 @@ def send(self, request, *args, **kwargs): def openapi_schema(self) -> dict: return self.get("/openapi.json").json() + @property + def version(self) -> str: + # NOTE: Does not call routes + return self.openapi_schema["info"]["version"] + @property def state(self) -> ClusterState: response = self.get("/") @@ -150,21 +184,27 @@ def state(self) -> ClusterState: return ClusterState.model_validate(response.json()) @property - def envs(self) -> dict[str, Env]: + def health(self) -> ClusterHealth: + response = self.get("/health") + handle_error_with_response(response) + return ClusterHealth.model_validate(response.json()) + + @property + def variable_groups(self) -> dict[str, VariableGroup]: response = self.get("/variables") handle_error_with_response(response) - return {env.name: env for env in map(Env.model_validate, response.json())} + return {vg.name: vg for vg in map(VariableGroup.model_validate, response.json())} - def new_env(self, name: str, variables: dict[str, str]) -> EnvInfo: + def new_variable_group(self, name: str, variables: dict[str, str]) -> VariableGroup: response = self.post("/variables", json=dict(name=name, variables=variables)) handle_error_with_response(response) - return EnvInfo.model_validate(response.json()) + return VariableGroup.model_validate(response.json()) @property def bots(self) -> dict[str, Bot]: - response = self.get("/bots") # TODO: rename `/bots` + response = self.get("/bots") handle_error_with_response(response) - return {bot.slug: bot for bot in map(Bot.model_validate, response.json())} + return {bot.name: bot for bot in map(Bot.model_validate, response.json())} def new_bot( self, @@ -172,7 +212,7 @@ def new_bot( image: str, network: str, account: str | None = None, - environment: list[EnvInfo] | None = None, + environment: list[VariableGroupInfo] | None = None, ) -> Bot: form: dict = dict( name=name, diff --git a/silverback/cluster/types.py b/silverback/cluster/types.py index 075f4c50..7d9754e2 100644 --- a/silverback/cluster/types.py +++ b/silverback/cluster/types.py @@ -2,13 +2,9 @@ import math import uuid from datetime import datetime -from hashlib import blake2s from typing import Annotated -from pydantic import BaseModel, Field, field_validator, model_validator - -# NOTE: All configuration settings must be uint8 integer values -UINT8_MAX = 2**8 - 1 +from pydantic import BaseModel, Field, computed_field, field_validator class WorkspaceInfo(BaseModel): @@ -21,31 +17,42 @@ class WorkspaceInfo(BaseModel): class ClusterConfiguration(BaseModel): """Configuration of the cluster (represented as 16 byte value)""" + # NOTE: This configuration must be encode-able to a uint64 value for db storage + # and on-chain processing through ApePay + # NOTE: All defaults should be the minimal end of the scale, # so that `__or__` works right - # Version byte (Byte 1) + # Version byte (Byte 0) + # NOTE: Just in-case we change this after release version: int = 1 - # Bot Worker Configuration (Bytes 2-3) + # Bot Worker Configuration (Bytes 1-2) cpu: Annotated[int, Field(ge=0, le=16)] = 0 # 0.25 vCPU """Allocated vCPUs per bot: 0.25 vCPU (0) to 16 vCPU (6)""" memory: Annotated[int, Field(ge=0, le=120)] = 0 # 512 MiB """Total memory per bot (in GB)""" - # Runner configuration (Bytes 4-6) + # NOTE: Configure # of workers based on cpu & memory settings + + # Runner configuration (Bytes 3-5) networks: Annotated[int, Field(ge=1, le=20)] = 1 """Maximum number of concurrent network runners""" bots: Annotated[int, Field(ge=1, le=250)] = 1 """Maximum number of concurrent bots running""" - triggers: Annotated[int, Field(ge=5, le=1000, multiple_of=5)] = 30 + triggers: Annotated[int, Field(ge=50, le=1000, multiple_of=5)] = 50 """Maximum number of task triggers across all running bots""" - # TODO: Recorder configuration - # NOTE: Bytes 7-15 empty + # Recorder configuration (Byte 6) + storage: Annotated[int, Field(ge=0, le=250)] = 0 # 512 GB + """Total task results and metrics parquet storage (in TB)""" + + # Cluster general configuration (Byte 7) + secrets: Annotated[int, Field(ge=10, le=100)] = 10 + """Total managed secrets""" @field_validator("cpu", mode="before") def parse_cpu_value(cls, value: str | int) -> int: @@ -67,32 +74,58 @@ def parse_memory_value(cls, value: str | int) -> int: assert units.lower() == "gb" return int(mem) + @field_validator("storage", mode="before") + def parse_storage_value(cls, value: str | int) -> int: + if not isinstance(value, str): + return value + + storage, units = value.split(" ") + if units.lower() == "gb": + assert storage == "512" + return 0 + + assert units.lower() == "tb" + return int(storage) + + @staticmethod + def _decode_byte(value: int, byte: int) -> int: + # NOTE: All configuration settings must be uint8 integer values when encoded + return (value >> (8 * byte)) & (2**8 - 1) # NOTE: max uint8 + @classmethod def decode(cls, value: int) -> "ClusterConfiguration": - """Decode the configuration from 16 byte integer value""" + """Decode the configuration from 8 byte integer value""" if isinstance(value, ClusterConfiguration): return value # TODO: Something weird with SQLModel # NOTE: Do not change the order of these, these are not forwards compatible return cls( - version=value & UINT8_MAX, - cpu=(value >> 8) & UINT8_MAX, - memory=(value >> 16) & UINT8_MAX, - networks=(value >> 24) & UINT8_MAX, - bots=(value >> 32) & UINT8_MAX, - triggers=5 * ((value >> 40) & UINT8_MAX), + version=cls._decode_byte(value, 0), + cpu=cls._decode_byte(value, 1), + memory=cls._decode_byte(value, 2), + networks=cls._decode_byte(value, 3), + bots=cls._decode_byte(value, 4), + triggers=5 * cls._decode_byte(value, 5), + storage=cls._decode_byte(value, 6), + secrets=cls._decode_byte(value, 7), ) + @staticmethod + def _encode_byte(value: int, byte: int) -> int: + return value << (8 * byte) + def encode(self) -> int: - """Encode configuration as 16 byte integer value""" + """Encode configuration as 8 byte integer value""" # NOTE: Do not change the order of these, these are not forwards compatible return ( - self.version - + (self.cpu << 8) - + (self.memory << 16) - + (self.networks << 24) - + (self.bots << 32) - + (self.triggers // 5 << 40) + self._encode_byte(self.version, 0) + + self._encode_byte(self.cpu, 1) + + self._encode_byte(self.memory, 2) + + self._encode_byte(self.networks, 3) + + self._encode_byte(self.bots, 4) + + self._encode_byte(self.triggers // 5, 5) + + self._encode_byte(self.storage, 6) + + self._encode_byte(self.secrets, 7) ) @@ -104,27 +137,60 @@ class ClusterTier(enum.IntEnum): memory="512 MiB", networks=3, bots=5, - triggers=30, + triggers=50, + storage="512 GB", + secrets=10, ).encode() PROFESSIONAL = ClusterConfiguration( cpu="1 vCPU", memory="2 GB", networks=10, bots=20, - triggers=120, + triggers=400, + storage="5 TB", + secrets=25, ).encode() def configuration(self) -> ClusterConfiguration: return ClusterConfiguration.decode(int(self)) -class ClusterStatus(enum.IntEnum): - # NOTE: Selected integer values with some space for other steps - CREATED = 0 # User record created, but not paid for yet - STANDUP = 3 # Payment received, provisioning infrastructure - RUNNING = 5 # Paid for and fully deployed by payment handler - TEARDOWN = 6 # User triggered shutdown or payment expiration recorded - REMOVED = 9 # Infrastructure de-provisioning complete +class ResourceStatus(enum.IntEnum): + """ + Generic enum that represents that status of any associated resource or service. + + ```{note} + Calling `str(...)` on this will produce a human-readable status for display. + ``` + """ + + CREATED = 0 + """Resource record created, but not provisioning yet (likely awaiting payment)""" + + # NOTE: `1` is reserved + + PROVISIONING = 2 + """Resource is provisioning infrastructure (on payment received)""" + + STARTUP = 3 + """Resource is being put into the RUNNING state""" + + RUNNING = 4 + """Resource is in good health (Resource itself should be reporting status now)""" + + # NOTE: `5` is reserved + + SHUTDOWN = 6 + """Resource is being put into the STOPPED state""" + + STOPPED = 7 + """Resource has stopped (due to errors, user action, or resource contraints)""" + + DEPROVISIONING = 8 + """User removal action or payment expiration event triggered""" + + REMOVED = 9 + """Infrastructure de-provisioning complete (Cannot change from this state)""" def __str__(self) -> str: return self.name.capitalize() @@ -133,16 +199,17 @@ def __str__(self) -> str: class ClusterInfo(BaseModel): # NOTE: Raw API object (gets exported) id: uuid.UUID # NOTE: Keep this private, used as a temporary secret key for payment - name: str - slug: str - configuration: ClusterConfiguration + version: str | None # NOTE: Unprovisioned clusters have no known version yet + configuration: ClusterConfiguration | None = None # NOTE: self-hosted clusters have no config - created: datetime - status: ClusterStatus - last_updated: datetime + name: str # User-friendly display name + slug: str # Shorthand name, for CLI and URI usage + + created: datetime # When the resource was first created + status: ResourceStatus + last_updated: datetime # Last time the resource was changed (upgrade, provisioning, etc.) -# TODO: Merge `/health` with `/` class ClusterState(BaseModel): """ Cluster Build Information and Configuration, direct from cluster control service @@ -150,51 +217,65 @@ class ClusterState(BaseModel): version: str = Field(alias="cluster_version") # TODO: Rename in cluster configuration: ClusterConfiguration | None = None # TODO: Add to cluster - # TODO: Add other useful summary fields for frontend use (`bots: int`, `errors: int`, etc.) + # TODO: Add other useful summary fields for frontend use -class EnvInfo(BaseModel): - id: uuid.UUID +class ServiceHealth(BaseModel): + healthy: bool + - @model_validator(mode="before") - def set_expected_fields(cls, data: dict) -> dict: - name: str = data["name"] - instance_id: str = f"{name}.{data['revision']}" - name_hash = blake2s(instance_id.encode("utf-8")) - data["id"] = uuid.UUID(bytes=name_hash.digest()[:16]) - data["variables"] = list(data["variables"]) - return data +class ClusterHealth(BaseModel): + ars: ServiceHealth = Field(exclude=True) # TODO: Replace w/ cluster + ccs: ServiceHealth = Field(exclude=True) # TODO: Replace w/ cluster + bots: dict[str, ServiceHealth] = {} + @field_validator("bots", mode="before") # TODO: Fix so this is default + def convert_bot_health(cls, bots): + return {b["instance_id"]: ServiceHealth.model_validate(b) for b in bots} + + @computed_field + def cluster(self) -> ServiceHealth: + return ServiceHealth(healthy=self.ars.healthy and self.ccs.healthy) + + +class VariableGroupInfo(BaseModel): + id: uuid.UUID name: str revision: int - variables: list[str] # TODO: Change to list + variables: list[str] created: datetime +class EnvironmentVariable(BaseModel): + name: str + group_id: uuid.UUID + group_revision: int -class BotInfo(BaseModel): - id: uuid.UUID # TODO: Change `.instance_id` field to `id: UUID` - # TODO: Add `.network`, `.slug`, `.network` fields to cluster model - @model_validator(mode="before") - def set_expected_fields(cls, data: dict) -> dict: - instance_id: str = data.get("instance_id", "random:network:") - name_hash = blake2s(instance_id.encode("utf-8")) - data["id"] = uuid.UUID(bytes=name_hash.digest()[:16]) - ecosystem, network, name = instance_id.split(":") - data["slug"] = name - data["name"] = name.capitalize() - data["network"] = f"{ecosystem}:{network}" - return data +class BotTaskStatus(BaseModel): + last_status: str + exit_code: int | None + reason: str | None + started_at: datetime | None + stop_code: str | None + stopped_at: datetime | None + stopped_reason: str | None + + +class BotHealth(BaseModel): + bot_id: uuid.UUID + task_status: BotTaskStatus | None + healthy: bool - slug: str - name: str - network: str - # TODO: More config fields (`.description`, `.image`, `.account`, `.environment`) +class BotInfo(BaseModel): + id: uuid.UUID # TODO: Change `.instance_id` field to `id: UUID` + name: str + created: datetime - # Other fields that are currently in there (TODO: Remove) - config_set_name: str - config_set_revision: int + image: str + network: str + account: str | None revision: int - terminated: bool + + environment: list[EnvironmentVariable] = []