diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/llm_metrics.py b/src/c++/perf_analyzer/genai-pa/genai_pa/llm_metrics.py index 5854f32e8..4bc60ab6d 100755 --- a/src/c++/perf_analyzer/genai-pa/genai_pa/llm_metrics.py +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/llm_metrics.py @@ -372,9 +372,24 @@ def _parse_requests(self, requests: dict) -> LLMMetrics: num_generated_tokens, ) + def _remove_leading_invalid_chars(self, text: str): + for i, char in enumerate(text): + # There will be 3 or 4 chars + # (but sometimes the first char looks valid, so don't stop until we've seen at least 3) + if char.isprintable() and i > 2: + break + + return text[i:] + def _preprocess_response( self, res_timestamps: list[int], res_outputs: list[dict[str, str]] ) -> None: + # FIXME -- remove this triton code once it is properly fixed in PA + # (PA/triton will add junk to the start of the BYTES array. Remove it here) + if self._service_kind == "triton": + for d in res_outputs: + d["text_output"] = self._remove_leading_invalid_chars(d["text_output"]) + """Helper function to preprocess responses of a request.""" if self._service_kind == "openai": openai_final_response = res_outputs[-1]["response"] diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/utils.py b/src/c++/perf_analyzer/genai-pa/genai_pa/utils.py index 477731b43..f0728156e 100644 --- a/src/c++/perf_analyzer/genai-pa/genai_pa/utils.py +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/utils.py @@ -33,7 +33,7 @@ def remove_sse_prefix(msg: str) -> str: def load_json(filename: str): - with open(filename) as f: + with open(filename, encoding="utf-8", errors="ignore") as f: return json.load(f)