nutanix · johnugeorge · Feb 28, 2024 · Feb 26, 2024 · Feb 26, 2024 · Feb 26, 2024
diff --git a/llm/cleanup.py b/llm/cleanup.py
@@ -1,6 +1,7 @@
 """
 Clean up Kubernetes resources associated with a deployment.
 """
+
 import argparse
 import sys
 import requests

diff --git a/llm/generate.py b/llm/generate.py
@@ -2,6 +2,7 @@
 Downloads model files, generates Model Archive (MAR) 
 and config.properties file
 """
+
 import os
 import argparse
 import json

diff --git a/llm/handler.py b/llm/handler.py
@@ -3,6 +3,7 @@
 The handler provides functions to preprocess input data, make predictions using the model,
 and post-process the output for a particular use case.
 """
+
 import logging
 import os
 from abc import ABC
@@ -73,16 +74,12 @@ class LLMHandler(BaseHandler, ABC):
  def __init__(self):
  super().__init__()
  self.initialized = False
- self.request = {
- "request_list": defaultdict(int),
- "request_ids": defaultdict(int),
- "request_type": defaultdict(int),
- }
  self.tokenizer = None
  self.map_location = None
  self.device = None
  self.device_map = None
  self.model = None
+ self.request = None
 
  def initialize(self, context: ts.context.Context):
  """
@@ -155,6 +152,11 @@ def preprocess(self, data: List) -> torch.Tensor:
  Tensor: Tokenized input data
  """
  input_list = []
+ self.request = {
+ "request_list": defaultdict(int),
+ "request_ids": defaultdict(int),
+ "request_type": defaultdict(int),
+ }
 
  for idx, input_data in enumerate(data):
  # Pre-process for Kserve v2 format
@@ -182,7 +184,6 @@ def preprocess(self, data: List) -> torch.Tensor:
  self.request["request_type"][idx] = "raw"
  input_list.append(row_input)
 
- logger.info("Received text: %s", ", ".join(map(str, input_list)))
  encoded_input = self.tokenizer(input_list, padding=True, return_tensors="pt")[
  "input_ids"
  ].to(self.device)
@@ -225,7 +226,6 @@ def inference(self, data: torch.Tensor, *args, **kwargs) -> List:
 
  inference = []
  inference = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
- logger.info("Generated text is: %s", ", ".join(map(str, inference)))
  return inference
 
  def postprocess(self, data: List) -> List:

diff --git a/llm/kubeflow_inference_run.py b/llm/kubeflow_inference_run.py
@@ -3,6 +3,7 @@
 in a Kubernetes cluster by performing tasks such as creating 
 persistent storage, registering the model, and running inference.
 """
+
 import argparse
 import sys
 import os

diff --git a/llm/tests/test_generate.py b/llm/tests/test_generate.py
@@ -7,6 +7,7 @@
  MODEL_CONFIG_PATH: Path to model_config.json file.
  MODEL_TEMP_CONFIG_PATH: Path to backup model_config.json file.
 """
+
 import os
 import argparse
 import json

diff --git a/llm/utils/generate_data_model.py b/llm/utils/generate_data_model.py
@@ -3,6 +3,7 @@
 function set_values that sets the GenerateDataModel attributes and
 function set_model_files_and_mar that sets model path and mar output values.
 """
+
 import os
 import dataclasses
 import argparse

diff --git a/llm/utils/hf_utils.py b/llm/utils/hf_utils.py
@@ -1,6 +1,7 @@
 """
 Utility functions for using HuggingFace Api
 """
+
 import sys
 from typing import List
 from huggingface_hub import HfApi

diff --git a/llm/utils/marsgen.py b/llm/utils/marsgen.py
@@ -1,6 +1,7 @@
 """
 Generate a Model Archive (MAR) file for a specified LLM.
 """
+
 import json
 import os
 import sys

diff --git a/llm/utils/model_requirements.txt b/llm/utils/model_requirements.txt
@@ -1,6 +1,6 @@
 torch==2.0.1
 tokenizers==0.15.0
-transformers==4.36.0
+transformers==4.38.1
 accelerate==0.22.0
 einops==0.6.1
 bitsandbytes==0.41.1

diff --git a/llm/utils/system_utils.py b/llm/utils/system_utils.py
@@ -1,6 +1,7 @@
 """
 Utility functions to handle file and folder operations
 """
+
 import os
 import sys
 import shutil

diff --git a/llm/utils/tsutils.py b/llm/utils/tsutils.py
@@ -1,6 +1,7 @@
 """
 Utility functions for running inference and getiing model parameters
 """
+
 import os
 import json
 import collections