Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refresh request map for every process #51

Merged
merged 5 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llm/cleanup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Clean up Kubernetes resources associated with a deployment.
"""

import argparse
import sys
import requests
Expand Down
1 change: 1 addition & 0 deletions llm/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Downloads model files, generates Model Archive (MAR)
and config.properties file
"""

import os
import argparse
import json
Expand Down
14 changes: 7 additions & 7 deletions llm/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
The handler provides functions to preprocess input data, make predictions using the model,
and post-process the output for a particular use case.
"""

import logging
import os
from abc import ABC
Expand Down Expand Up @@ -73,16 +74,12 @@ class LLMHandler(BaseHandler, ABC):
def __init__(self):
super().__init__()
self.initialized = False
self.request = {
"request_list": defaultdict(int),
"request_ids": defaultdict(int),
"request_type": defaultdict(int),
}
self.tokenizer = None
self.map_location = None
self.device = None
self.device_map = None
self.model = None
self.request = None

def initialize(self, context: ts.context.Context):
"""
Expand Down Expand Up @@ -155,6 +152,11 @@ def preprocess(self, data: List) -> torch.Tensor:
Tensor: Tokenized input data
"""
input_list = []
self.request = {
"request_list": defaultdict(int),
"request_ids": defaultdict(int),
"request_type": defaultdict(int),
}

for idx, input_data in enumerate(data):
# Pre-process for Kserve v2 format
Expand Down Expand Up @@ -182,7 +184,6 @@ def preprocess(self, data: List) -> torch.Tensor:
self.request["request_type"][idx] = "raw"
input_list.append(row_input)

logger.info("Received text: %s", ", ".join(map(str, input_list)))
encoded_input = self.tokenizer(input_list, padding=True, return_tensors="pt")[
"input_ids"
].to(self.device)
Expand Down Expand Up @@ -225,7 +226,6 @@ def inference(self, data: torch.Tensor, *args, **kwargs) -> List:

inference = []
inference = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
logger.info("Generated text is: %s", ", ".join(map(str, inference)))
return inference

def postprocess(self, data: List) -> List:
Expand Down
1 change: 1 addition & 0 deletions llm/kubeflow_inference_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
in a Kubernetes cluster by performing tasks such as creating
persistent storage, registering the model, and running inference.
"""

import argparse
import sys
import os
Expand Down
1 change: 1 addition & 0 deletions llm/tests/test_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
MODEL_CONFIG_PATH: Path to model_config.json file.
MODEL_TEMP_CONFIG_PATH: Path to backup model_config.json file.
"""

import os
import argparse
import json
Expand Down
1 change: 1 addition & 0 deletions llm/utils/generate_data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
function set_values that sets the GenerateDataModel attributes and
function set_model_files_and_mar that sets model path and mar output values.
"""

import os
import dataclasses
import argparse
Expand Down
1 change: 1 addition & 0 deletions llm/utils/hf_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Utility functions for using HuggingFace Api
"""

import sys
from typing import List
from huggingface_hub import HfApi
Expand Down
1 change: 1 addition & 0 deletions llm/utils/marsgen.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Generate a Model Archive (MAR) file for a specified LLM.
"""

import json
import os
import sys
Expand Down
2 changes: 1 addition & 1 deletion llm/utils/model_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
torch==2.0.1
tokenizers==0.15.0
transformers==4.36.0
transformers==4.38.1
accelerate==0.22.0
einops==0.6.1
bitsandbytes==0.41.1
Expand Down
1 change: 1 addition & 0 deletions llm/utils/system_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Utility functions to handle file and folder operations
"""

import os
import sys
import shutil
Expand Down
1 change: 1 addition & 0 deletions llm/utils/tsutils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Utility functions for running inference and getiing model parameters
"""

import os
import json
import collections
Expand Down
Loading