fix chinese disorder code; add some packages to requirements.txt

hliuca · Jul 25, 2024 · a51da18 · a51da18
1 parent c9a9932
commit a51da18
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 3 deletions.
diff --git a/byte_infer_perf/llm_perf/requirements.txt b/byte_infer_perf/llm_perf/requirements.txt
@@ -5,7 +5,9 @@ isort
 sentencepiece
 pandas
 google-api-python-client
-transformers==4.33.2
+transformers==4.40.0
 tqdm
 matplotlib
-backoff
+backoff
+psutil
+accelerate
diff --git a/byte_infer_perf/llm_perf/server/endpoint.py b/byte_infer_perf/llm_perf/server/endpoint.py
@@ -140,6 +140,8 @@ async def streaming_inference(
             prompt_tokens = len(req.input_ids)
             completion_tokens = 0
 
+            tokens_buffer = []
+
             async for gen_res in self.scheduler.generate(req):
                 result = gen_res["result"]
                 if result is not None:
@@ -157,7 +159,14 @@ async def streaming_inference(
                 }
 
                 if result is not None:
-                    text = self.tokenizer.decode([result.token_id], skip_special_tokens=True, clean_up_tokenization_spaces=True)
+                    tokens_buffer.append(result.token_id)
+
+                    text = self.tokenizer.decode(tokens_buffer, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+                    if text == " �" or text == "�":
+                        text = ""
+                    else:
+                        tokens_buffer = []
+
                     infer_outputs["choice"].update(
                         {
                             "message": text,