Skip to content

Commit

Permalink
Merge pull request #87 from NimbleBoxAI/v1
Browse files Browse the repository at this point in the history
0.13.0rc18
  • Loading branch information
yashbonde authored Jun 13, 2023
2 parents 9d03779 + 697d08f commit 319f000
Show file tree
Hide file tree
Showing 37 changed files with 551 additions and 4,205 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,6 @@ notebooks/
# not need to add / in symlink
.nbx
ex_jobs
scripts
scripts
.nboxignore
stories/requirements.txt
18 changes: 13 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,32 @@ Version](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8%20%7C%2

## 🧐 What is Nbox?

`nbox` provides first class API support for all NimbleBox.ai infrastructure (NBX-Build, Jobs, Deploy) and services (NBX-Workspaces) components. Write jobs using `nbox.Operators`
`nbox` provides first class CLI + python package support for all NimbleBox infrastructure and services. You can

## Installation

```bash
# on macos find the correct wheel file based on python version: https://github.com/pietrodn/grpcio-mac-arm-build/releases/tag/1.51.1
pip install <wheel_url>
pip install nbox

# on linux
pip install nbox
```

Next you need to authenticate yourself with the CLI:

```bash
nbx login
```

# Stability and Compatibility

Status: The library is currently undergoing heavy development.

☝️ Important Note: Current major version is zero (v0.x.x) to accommodate rapid development and fast iteration while getting early feedback from users (feedback on APIs are appreciated!). **The public API need not change** without a major version update before v1.0.0 release.
- `nbx projects`:
- `nbx projects - artifacts --help` stable ✅
- `nbx projects - run --help` stable ✅
- `nbx jobs --help`: mostly stable 🟡
- `nbx serve --help`: mostly stable 🟡


# 🤷Why NimbleBox

Expand Down
1 change: 0 additions & 1 deletion nbox/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from nbox.jobs import Job, Serve, Schedule
from nbox.instance import Instance
from nbox.relics import Relics
from nbox.lmao import Lmao, LmaoLive
from nbox.network import zip_to_nbox_folder
from nbox.version import __version__
from nbox.hyperloop.common.common_pb2 import Resource
Expand Down
8 changes: 2 additions & 6 deletions nbox/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,11 @@
from nbox.instance import Instance
from nbox.sub_utils.ssh import tunnel
from nbox.relics import Relics
from nbox.lmao import LmaoCLI
from nbox.lmao_v4 import LmaoCLI as Lmaov4CLI
from nbox.version import __version__ as V
from nbox.projects import Project
from nbox.utils import logger, lo
from nbox.plugins.base import PluginCLI
# from nbox.plugins.base import PluginCLI

# from nbox.jobs import Job, Serve
from nbox.jd_core import JobsCli, ServeCli
Expand Down Expand Up @@ -181,16 +180,13 @@ def main():
"build" : Instance,
"config" : Config,
"get" : get,
# "jobs" : Job,
"jobs" : JobsCli,
# "lmao" : LmaoCLI,
"lmao" : Lmaov4CLI,
"login" : login,
"open" : open_home,
"plugins" : PluginCLI,
# "plugins" : PluginCLI,
"projects" : Project,
"relics" : Relics,
# "serve" : Serve,
"serve" : ServeCli,
"tunnel" : tunnel,
"version" : version,
Expand Down
84 changes: 48 additions & 36 deletions nbox/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,21 +83,25 @@ class Instance():
status = staticmethod(print_status)

# each instance has a lot of data against it, we need to store only a few as attributes
useful_keys = ["project_id", "project_name", "size_used", "size", "state"]
useful_keys = ["instance_id", "project_name", "size_used", "size", "state"]

def __init__(self, i: str, *, workspace_id: str = ""):
"""NBX-Build Instance class manages the both individual instance, but provides webserver functionality using
`nbox_ws_v1`, such as starting and stopping, deletion and more.
Args:
i (str): name or `project_id` of the instance
i (str): name or `instance_id` of the instance
"""
if not i:
raise ValueError("Instance id must be provided, try --i='1023'")

# if user provided a number we assume that they gave an instance ID, this is a weak assumption because
# people usually use names.
_instance_id = isinstance(i, int)
i = str(i)

# simply add useful keys to the instance
self.project_id: str = None
self.instance_id: str = None
self.project_name: str = None
self.workspace_id: str = workspace_id or secret.workspace_id
self.size_used: float = None
Expand All @@ -113,31 +117,36 @@ def __init__(self, i: str, *, workspace_id: str = ""):
stub_ws_instance = create_webserver_subway("v1", sess)
stub_projects = stub_ws_instance.instances

# filter and get the data
project_details = stub_projects()["project_details"]
# print(project_details)
if i not in project_details:
by_name = list(filter(lambda x: x[1]['project_name'] == i, list(project_details.items())))
if len(by_name) == 0:
raise ValueError(f"Instance '{i}' not found")
elif len(by_name) > 1:
raise ValueError(f"Multiple instances with name '{i}' found")
data = by_name[0]
project_id = data[0]
data = data[1]
if _instance_id:
# if user provided an instance id, we can directly get the data
data = stub_projects.u(i)()
instance_id = i
else:
data = project_details[i]
project_id = i
data["project_id"] = project_id
logger.info(f"Found instance '{data['project_name']}' ({data['project_id']})")
# else filter and get the data
project_details = stub_projects()["project_details"]
if i not in project_details:
by_name = list(filter(lambda x: x[1]['project_name'] == i, list(project_details.items())))
if len(by_name) == 0:
raise ValueError(f"Instance '{i}' not found")
elif len(by_name) > 1:
raise ValueError(f"Multiple instances with name '{i}' found")
data = by_name[0]
instance_id = data[0]
data = data[1]
else:
data = project_details[i]
instance_id = i

data["instance_id"] = instance_id
logger.info(f"Found instance '{data['project_name']}' ({data['instance_id']})")
# print(data)
for x in self.useful_keys:
for x in Instance.useful_keys:
setattr(self, x, data[x])

# some data points require extra processing before usage
self.custom_ports: Dict[str, int] = loads(data["custom_ports"]) if data["custom_ports"] is not None else {}
self.exposed_ports: Dict[str, int] = loads(data["exposed_ports"]) if data["exposed_ports"] is not None else {}
self.stub_ws_instance = stub_projects.u(self.project_id)
self.stub_ws_instance = stub_projects.u(self.instance_id)
logger.debug(f"WS: {self.stub_ws_instance}")

# set values
Expand Down Expand Up @@ -257,13 +266,13 @@ def is_running(self) -> bool:

def refresh(self):
"""Update the data, get latest state"""
self.data = self.stub_ws_instance() # GET /user/projects/{project_id}
self.data = self.stub_ws_instance() # GET /user/projects/{instance_id}
for k in self.useful_keys:
setattr(self, k, self.data[k])

def _start(self, cpu, gpu, gpu_count, auto_shutdown, dedicated_hw, zone):
"""Turn on the the unserlying compute"""
logger.info(f"Starting instance {self.project_name} ({self.project_id})")
logger.info(f"Starting instance {self.project_name} ({self.instance_id})")
hw_config = {
"cpu":f"n1-standard-{cpu}"
}
Expand All @@ -283,21 +292,21 @@ def _start(self, cpu, gpu, gpu_count, auto_shutdown, dedicated_hw, zone):
region = zone
)

logger.info(f"Waiting for instance {self.project_name} ({self.project_id}) to start ...")
logger.info(f"Waiting for instance {self.project_name} ({self.instance_id}) to start ...")
_i = 0
while self.state != "RUNNING":
time.sleep(5)
self.refresh()
_i += 1
if _i > TIMEOUT_CALLS:
raise TimeoutError("Instance did not start within timeout, please check dashboard")
logger.info(f"Instance {self.project_name} ({self.project_id}) started")
logger.info(f"Instance {self.project_name} ({self.instance_id}) started")

def _open(self):
# now the instance is running, we can open it, opening will assign a bunch of cookies and
# then get us the exact location of the instance
if not self.__opened:
logger.debug(f"Opening instance {self.project_name} ({self.project_id})")
logger.debug(f"Opening instance {self.project_name} ({self.instance_id})")
launch_data = self.stub_ws_instance.launch(_method = "post")
base_domain = launch_data['base_domain']
self.open_data = {
Expand Down Expand Up @@ -340,7 +349,7 @@ def start(
self._start(cpu, gpu, gpu_count, auto_shutdown, dedicated_hw, zone)
else:
# TODO: @yashbonde: inform user in case of hardware mismatch?
logger.info(f"Instance {self.project_name} ({self.project_id}) is already running")
logger.info(f"Instance {self.project_name} ({self.instance_id}) is already running")

# prevent rate limiting
if not self.__opened:
Expand All @@ -349,31 +358,34 @@ def start(
def stop(self):
"""Stop the Instance"""
if self.state == "STOPPED":
logger.info(f"Instance {self.project_name} ({self.project_id}) is already stopped")
logger.info(f"Instance {self.project_name} ({self.instance_id}) is already stopped")
return

logger.debug(f"Stopping instance {self.project_name} ({self.project_id})")
message = self.stub_ws_instance.stop_instance("post", data = {"instance_id":self.project_id})["msg"]
logger.debug(f"Stopping instance {self.project_name} ({self.instance_id})")
message = self.stub_ws_instance.stop(
"post",
data = {"workspace_id": secret.workspace_id, "instance_id": self.instance_id}
)["msg"]
if not message == "success":
raise ValueError(message)

logger.debug(f"Waiting for instance {self.project_name} ({self.project_id}) to stop")
logger.debug(f"Waiting for instance {self.project_name} ({self.instance_id}) to stop")
_i = 0 # timeout call counter
while self.state != "STOPPED":
time.sleep(5)
self.refresh()
_i += 1
if _i > TIMEOUT_CALLS:
raise TimeoutError("Instance did not stop within timeout, please check dashboard")
logger.debug(f"Instance {self.project_name} ({self.project_id}) stopped")
logger.debug(f"Instance {self.project_name} ({self.instance_id}) stopped")

self.__opened = False

def delete(self, force = False):
"""With great power comes great responsibility."""
if self.__opened and not force:
raise ValueError("Instance is still opened, please call .stop() first")
logger.warning(f"Deleting instance {self.project_name} ({self.project_id})")
logger.warning(f"Deleting instance {self.project_name} ({self.instance_id})")
if input(f"> Are you sure you want to delete '{self.project_name}'? (y/N): ") == "y":
self.stub_ws_instance("delete")
else:
Expand All @@ -390,8 +402,8 @@ def delete(self, force = False):
def _unopened_error(self):
if not self.__opened:
logger.error(f"You are trying to move files to a {self.state} instance, you will have to start the instance first:")
logger.error(f' - nbox.Instance("{self.project_id}", "{self.workspace_id}").start(...)')
logger.error(f' - python3 -m nbox build --i "{self.project_id}" --workspace_id "{self.workspace_id}" start --help')
logger.error(f' - nbox.Instance("{self.instance_id}", "{self.workspace_id}").start(...)')
logger.error(f' - python3 -m nbox build --i "{self.instance_id}" --workspace_id "{self.workspace_id}" start --help')
raise ValueError("Instance is not opened, please call .open() first")

def __create_connection(self, *, port: int = 6174):
Expand All @@ -401,7 +413,7 @@ def __create_connection(self, *, port: int = 6174):
# create logging for RSock
folder = U.join(U.env.NBOX_HOME_DIR(), "tunnel_logs")
os.makedirs(folder, exist_ok=True)
filepath = U.join(folder, f"tunnel_{self.project_id}.log") # consistency with IDs instead of names
filepath = U.join(folder, f"tunnel_{self.instance_id}.log") # consistency with IDs instead of names
file_logger = FileLogger(filepath)
logger.debug(f"Logging RSock server to {filepath}")

Expand Down
2 changes: 1 addition & 1 deletion nbox/jd_core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from nbox.jd_core.schedule import Schedule
from nbox.jd_core.jobs import Job, _get_job_data, get_job_list
from nbox.jd_core.jobs import Job, _get_job_data, print_job_list, new_job
from nbox.jd_core.serving import Serve, _get_deployment_data, print_serving_list
from nbox.jd_core.upload import upload_job_folder
from nbox.jd_core.cli import JobsCli, ServeCli
10 changes: 6 additions & 4 deletions nbox/jd_core/cli.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
from functools import partial

from nbox.jd_core.upload import upload_job_folder
from nbox.jd_core.jobs import get_job_list, Job
from nbox.jd_core.jobs import print_job_list, Job, new_job, get_job_list
from nbox.jd_core.serving import print_serving_list, Serve

JobsCli = {
"status": get_job_list,
"status": print_job_list,
"list": print_job_list,
"upload": partial(upload_job_folder, "job"),
"get": Job,
"pick": Job,
"new": new_job,
}


ServeCli = {
"status": print_serving_list,
"upload": partial(upload_job_folder, "serving"),
"get": Serve,
"pick": Serve,
}
Loading

0 comments on commit 319f000

Please sign in to comment.