diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py
index 9c90bc75b..a215a2251 100755
--- a/model_analyzer/config/input/config_command_profile.py
+++ b/model_analyzer/config/input/config_command_profile.py
@@ -66,10 +66,6 @@ def __init__(self):
         super().__init__()
         self._fill_config()
 
-    # FIXME: placeholder until branch is merged
-    def is_llm_model(self):
-        return False
-
     def _resolve_protobuf_field(self, field: FieldDescriptor) -> ConfigSweep:
         """
         Recursively resolve protobuf fields.
diff --git a/model_analyzer/record/metrics_manager.py b/model_analyzer/record/metrics_manager.py
index c8077dfd1..fe77f6eb8 100755
--- a/model_analyzer/record/metrics_manager.py
+++ b/model_analyzer/record/metrics_manager.py
@@ -69,8 +69,8 @@ class MetricsManager:
         "gpu_power_usage",
         "cpu_available_ram",
         "cpu_used_ram",
-        "avg_first_latency",
-        "avg_token_latency",
+        "avg_first_token_latency",
+        "avg_token_to_token_latency",
     ]
 
     def __init__(self, config, client, server, gpus, result_manager, state_manager):
diff --git a/model_analyzer/record/types/avg_first_token_latency.py b/model_analyzer/record/types/avg_first_token_latency.py
index e012254b1..72d539633 100755
--- a/model_analyzer/record/types/avg_first_token_latency.py
+++ b/model_analyzer/record/types/avg_first_token_latency.py
@@ -22,10 +22,10 @@
 @total_ordering
 class AvgFirstTokenLatency(DecreasingRecord):
     """
-    A record for perf_analyzer avg first token to token latency metric
+    A record for perf_analyzer average first token latency metric
     """
 
-    tag = "avg_first_latency"
+    tag = "avg_first_token_latency"
 
     def __init__(self, value, timestamp=0):
         """
diff --git a/model_analyzer/record/types/avg_token_latency.py b/model_analyzer/record/types/avg_token_latency.py
deleted file mode 100755
index 93937cafd..000000000
--- a/model_analyzer/record/types/avg_token_latency.py
+++ /dev/null
@@ -1,96 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from functools import total_ordering
-
-from model_analyzer.record.record import DecreasingRecord
-
-
-@total_ordering
-class AvgTokenLatency(DecreasingRecord):
-    """
-    A record for perf_analyzer avg token-to-token latency metric
-    """
-
-    tag = "avg_token_latency"
-
-    def __init__(self, value, timestamp=0):
-        """
-        Parameters
-        ----------
-        value : float
-            the latency extracted from the perf analyzer output
-        timestamp : float
-            Elapsed avg time for token-to-token latency
-        """
-
-        super().__init__(value, timestamp)
-
-    @classmethod
-    def header(cls, aggregation_tag=False):
-        """
-        Parameters
-        ----------
-        aggregation_tag: bool
-            An optional tag that may be displayed
-            as part of the header indicating that
-            this record has been aggregated using
-            max, min or average etc.
-
-        Returns
-        -------
-        str
-            The full name of the
-            metric.
-        """
-
-        return "avg token-to-token latency (ms)"
-
-    def __eq__(self, other):
-        """
-        Allows checking for
-        equality between two records
-        """
-
-        return self.value() == other.value()
-
-    def __lt__(self, other):
-        """
-        Allows checking if
-        this record is less than
-        the other
-        """
-
-        return self.value() > other.value()
-
-    def __add__(self, other):
-        """
-        Allows adding two records together
-        to produce a brand new record.
-        """
-
-        return self.__class__(value=(self.value() + other.value()))
-
-    def __sub__(self, other):
-        """
-        Allows subbing two records together
-        to produce a brand new record.
-
-        ** Note this does reverse subtraction because
-            of the inverted nature of latency (lower is better)
-        """
-
-        return self.__class__(value=(other.value() - self.value()))
diff --git a/model_analyzer/record/types/avg_token_to_token_latency.py b/model_analyzer/record/types/avg_token_to_token_latency.py
index 2941da39b..66c93b6fc 100755
--- a/model_analyzer/record/types/avg_token_to_token_latency.py
+++ b/model_analyzer/record/types/avg_token_to_token_latency.py
@@ -22,7 +22,7 @@
 @total_ordering
 class AvgTokenToTokenLatency(DecreasingRecord):
     """
-    A record for perf_analyzer avg token-to-token latency metric
+    A record for perf_analyzer average token-to-token latency metric
     """
 
     tag = "avg_token_to_token_latency"
diff --git a/tests/common/test_utils.py b/tests/common/test_utils.py
index 33db04f30..9e48d0e9d 100755
--- a/tests/common/test_utils.py
+++ b/tests/common/test_utils.py
@@ -287,12 +287,12 @@ def construct_perf_analyzer_config(
 
     if request_rate:
         pa_config._args["request-rate-range"] = request_rate
-    elif llm_search_mode:
+    elif is_llm_model:
         pa_config._args["periodic-concurrency-range"] = concurrency
     else:
         pa_config._args["concurrency-range"] = concurrency
 
-    if llm_search_mode:
+    if is_llm_model:
         pa_config._args["request-parameter"] = (
             "max_token:" + str(max_token_count) + ":int"
         )
diff --git a/tests/test_perf_analyzer_config_generator.py b/tests/test_perf_analyzer_config_generator.py
index 69e42ef8d..4b99aa87c 100755
--- a/tests/test_perf_analyzer_config_generator.py
+++ b/tests/test_perf_analyzer_config_generator.py
@@ -578,7 +578,7 @@ def test_llm_search_max_token_count(self):
 
         max_token_counts = utils.generate_doubled_list(1, 256)
         expected_configs = [
-            construct_perf_analyzer_config(max_token_count=mtc, llm_search_mode=True)
+            construct_perf_analyzer_config(max_token_count=mtc, is_llm_model=True)
             for mtc in max_token_counts
         ]
 
@@ -612,7 +612,7 @@ def test_llm_search_text_input_length(self):
 
         text_input_lengths = utils.generate_doubled_list(1, 1024)
         expected_configs = [
-            construct_perf_analyzer_config(llm_search_mode=True)
+            construct_perf_analyzer_config(is_llm_model=True)
             for pl in text_input_lengths
         ]