Skip to content

Commit

Permalink
feat: add tabby-eval package to run e2e evaluation on modal (#893)
Browse files Browse the repository at this point in the history
* updating predict.py, adding tabby_python_client package

* update predict.py

* move to tabby-eval directory

* delete old files

* update predict.py

* delete other folder files
  • Loading branch information
yan91083 authored Nov 25, 2023
1 parent 1883058 commit 39962c7
Show file tree
Hide file tree
Showing 34 changed files with 2,436 additions and 0 deletions.
212 changes: 212 additions & 0 deletions python/tabby-eval/modal/predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
import asyncio
import json
import modal
import os
import pandas as pd

from collections import namedtuple
from datetime import datetime
from modal import Image, Mount, Secret, Stub, asgi_app, gpu, method
from pathlib import Path
from typing import Union, List, Optional, Any, Tuple


GPU_CONFIG = gpu.A10G()

MODEL_ID = os.environ.get("MODEL_ID")
LAUNCH_FLAGS = ["serve", "--model", MODEL_ID, "--port", "8000", "--device", "cuda"]


def download_model():
import subprocess
import os
MODEL_ID = os.environ.get("MODEL_ID")
print(f'MODEL_ID={MODEL_ID}')
subprocess.run(
[
"/opt/tabby/bin/tabby",
"download",
"--model",
MODEL_ID,
]
)


image = (
Image.from_registry(
"tabbyml/tabby:0.5.5",
add_python="3.11",
)
.env({"MODEL_ID": os.environ.get("MODEL_ID")})
.dockerfile_commands("ENTRYPOINT []")
.copy_local_dir(local_path='./modal/tabby_python_client/tabby_python_client', remote_path='/root/tabby_python_client')
.pip_install(
"httpx",
"pandas"
)
.run_function(download_model)
)

stub = Stub("tabby-" + MODEL_ID.split("/")[-1], image=image)


@stub.cls(
gpu=GPU_CONFIG,
concurrency_limit=10,
allow_concurrent_inputs=2,
container_idle_timeout=60 * 10,
timeout=600,
)
class Model:

def __enter__(self):
import socket
import subprocess, os
import time

from tabby_python_client import Client


my_env = os.environ.copy()
my_env["TABBY_DISABLE_USAGE_COLLECTION"] = "1"
MODEL_ID = os.environ.get("MODEL_ID")
print(f'MODEL_ID={MODEL_ID}')

LAUNCH_FLAGS = ["serve", "--model", MODEL_ID, "--port", "8000", "--device", "cuda"]
self.launcher = subprocess.Popen(["/opt/tabby/bin/tabby"] + LAUNCH_FLAGS, env=my_env)
self.client = Client("http://127.0.0.1:8000", timeout=240)

# Poll until webserver at 127.0.0.1:8000 accepts connections before running inputs.
def webserver_ready():
try:
socket.create_connection(("127.0.0.1", 8000), timeout=1).close()
return True
except (socket.timeout, ConnectionRefusedError):
# Check if launcher webserving process has exited.
# If so, a connection can never be made.
retcode = self.launcher.poll()
if retcode is not None:
raise RuntimeError(
f"launcher exited unexpectedly with code {retcode}"
)
return False

while not webserver_ready():
time.sleep(1.0)

print("Tabby server ready!")

def __exit__(self, _exc_type, _exc_value, _traceback):
self.launcher.terminate()

@method()
async def health(self):
from tabby_python_client.api.v1 import health

resp = await health.asyncio(client=self.client)
return resp.to_dict()

@method()
async def complete(self, language: str, index: int, prompt: str) -> Tuple[int, Optional[str], Optional[str]]:
from tabby_python_client.api.v1 import completion
from tabby_python_client.models import (
CompletionRequest,
DebugOptions,
CompletionResponse,
Segments,
)
from tabby_python_client.types import Response


request = CompletionRequest(
language=language, debug_options=DebugOptions(raw_prompt=prompt)
)

try:
resp: Response = await completion.asyncio_detailed(
client=self.client, json_body=request
)

if resp.parsed != None:
return index, resp.parsed.choices[0].text, None
else:
return index, None, f"<{resp.status_code}>"
except errors.UnexpectedStatus as e:
return index, None, f"error: code={e.status_code} content={e.content} error={e}"
except Exception as e:
return index, None, f"error type: {type(e)}"

def write_log(log: str):
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
with open('./modal/log.txt', 'a') as f:
f.write(f"{now} : {log}")
f.write("\n")

def chunker(seq, size) -> List:
return (seq[pos:pos + size] for pos in range(0, len(seq), size))


def read_dataframe_from_file(language: str, file: str) -> pd.DataFrame:
whole_path_file = "./data/" + MODEL_ID.split("/")[-1] + "/" + language + "/" + file
objs = []
with open(whole_path_file) as fin:
for line in fin:
obj = json.loads(line)
if 'crossfile_context' in obj.keys():
obj['raw_prompt'] = obj['crossfile_context']['text'] + obj['prompt']
else:
obj['raw_prompt'] = obj['prompt']
objs.append(obj)

df = pd.DataFrame(objs)
return df

@stub.local_entrypoint()
async def main(language: str, files: str):
#Multiple files seperated by ','

model = Model()

health_resp = model.health.remote()
print(f'model info:\n{health_resp}')
assert(health_resp['model'] == MODEL_ID)

files = files.split(',')

for file in files:

df = read_dataframe_from_file(language, file.strip())

write_log(f'model: {MODEL_ID}; language: {language}; file: {file}: length = {len(df)}')


if 'prediction' in df.columns:
df_no_prediction = df[df['prediction'].isna()]
else:
df_no_prediction = df

skipped = len(df) - len(df_no_prediction)
success = 0
error = 0

for group in chunker(df_no_prediction, 30):
outputs = await asyncio.gather(*[model.complete.remote.aio(language, index, row['raw_prompt']) for index, row in group.iterrows()])

for index, prediction, error_msg in outputs:
if prediction is not None:
df.loc[index, 'prediction'] = prediction
success += 1
else:
df.loc[index, 'error'] = error_msg
error += 1

write_log(f"Skipped {skipped} rows, {success} rows with predictions, {error} rows with errors")

whole_path_file = "./data/" + MODEL_ID.split("/")[-1] + "/" + language + "/" + file
with open(whole_path_file, 'w') as fout:
for index, row in df.iterrows():
row_dict = row.to_dict()
json.dump(row_dict, fout)
fout.write('\n')

write_log(f"model: {MODEL_ID}; language: {language}; file: {file}: end!\n")
23 changes: 23 additions & 0 deletions python/tabby-eval/modal/tabby_python_client/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
__pycache__/
build/
dist/
*.egg-info/
.pytest_cache/

# pyenv
.python-version

# Environments
.env
.venv

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# JetBrains
.idea/

/coverage.xml
/.coverage
89 changes: 89 additions & 0 deletions python/tabby-eval/modal/tabby_python_client/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# tabby-python-client
A client library for accessing Tabby Server

## Usage
First, create a client:

```python
from tabby_python_client import Client

client = Client(base_url="https://api.example.com")
```

If the endpoints you're going to hit require authentication, use `AuthenticatedClient` instead:

```python
from tabby_python_client import AuthenticatedClient

client = AuthenticatedClient(base_url="https://api.example.com", token="SuperSecretToken")
```

Now call your endpoint and use your models:

```python
from tabby_python_client.models import MyDataModel
from tabby_python_client.api.my_tag import get_my_data_model
from tabby_python_client.types import Response

my_data: MyDataModel = get_my_data_model.sync(client=client)
# or if you need more info (e.g. status_code)
response: Response[MyDataModel] = get_my_data_model.sync_detailed(client=client)
```

Or do the same thing with an async version:

```python
from tabby_python_client.models import MyDataModel
from tabby_python_client.api.my_tag import get_my_data_model
from tabby_python_client.types import Response

my_data: MyDataModel = await get_my_data_model.asyncio(client=client)
response: Response[MyDataModel] = await get_my_data_model.asyncio_detailed(client=client)
```

By default, when you're calling an HTTPS API it will attempt to verify that SSL is working correctly. Using certificate verification is highly recommended most of the time, but sometimes you may need to authenticate to a server (especially an internal server) using a custom certificate bundle.

```python
client = AuthenticatedClient(
base_url="https://internal_api.example.com",
token="SuperSecretToken",
verify_ssl="/path/to/certificate_bundle.pem",
)
```

You can also disable certificate validation altogether, but beware that **this is a security risk**.

```python
client = AuthenticatedClient(
base_url="https://internal_api.example.com",
token="SuperSecretToken",
verify_ssl=False
)
```

There are more settings on the generated `Client` class which let you control more runtime behavior, check out the docstring on that class for more info.

Things to know:
1. Every path/method combo becomes a Python module with four functions:
1. `sync`: Blocking request that returns parsed data (if successful) or `None`
1. `sync_detailed`: Blocking request that always returns a `Request`, optionally with `parsed` set if the request was successful.
1. `asyncio`: Like `sync` but async instead of blocking
1. `asyncio_detailed`: Like `sync_detailed` but async instead of blocking

1. All path/query params, and bodies become method arguments.
1. If your endpoint had any tags on it, the first tag will be used as a module name for the function (my_tag above)
1. Any endpoint which did not have a tag will be in `tabby_python_client.api.default`

## Building / publishing this Client
This project uses [Poetry](https://python-poetry.org/) to manage dependencies and packaging. Here are the basics:
1. Update the metadata in pyproject.toml (e.g. authors, version)
1. If you're using a private repository, configure it with Poetry
1. `poetry config repositories.<your-repository-name> <url-to-your-repository>`
1. `poetry config http-basic.<your-repository-name> <username> <password>`
1. Publish the client with `poetry publish --build -r <your-repository-name>` or, if for public PyPI, just `poetry publish --build`

If you want to install this client into another project without publishing it (e.g. for development) then:
1. If that project **is using Poetry**, you can simply do `poetry add <path-to-this-client>` from that project
1. If that project is not using Poetry:
1. Build a wheel with `poetry build -f wheel`
1. Install that wheel from the other project `pip install <path-to-wheel>`
16 changes: 16 additions & 0 deletions python/tabby-eval/modal/tabby_python_client/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[tool.black]
line-length = 120
target_version = ['py38', 'py39', 'py310', 'py311']
exclude = '''
(
/(
| \.git
| \.venv
| \.mypy_cache
)/
)
'''

[tool.isort]
line_length = 120
profile = "black"
18 changes: 18 additions & 0 deletions python/tabby-eval/modal/tabby_python_client/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import pathlib

from setuptools import find_packages, setup

here = pathlib.Path(__file__).parent.resolve()
long_description = (here / "README.md").read_text(encoding="utf-8")

setup(
name="tabby-python-client",
version="0.4.0-dev",
description="A client library for accessing Tabby Server",
long_description=long_description,
long_description_content_type="text/markdown",
packages=find_packages(),
python_requires=">=3.8, <4",
install_requires=["httpx >= 0.15.0, < 0.25.0", "attrs >= 21.3.0", "python-dateutil >= 2.8.0, < 3"],
package_data={"tabby_python_client": ["py.typed"]},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
""" A client library for accessing Tabby Server """
from .client import AuthenticatedClient, Client

__all__ = (
"AuthenticatedClient",
"Client",
)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
""" Contains methods for accessing the API """
Empty file.
Loading

0 comments on commit 39962c7

Please sign in to comment.