diff --git a/pyxform/entities/entities_parsing.py b/pyxform/entities/entities_parsing.py
index 017b67511..34f44e7c7 100644
--- a/pyxform/entities/entities_parsing.py
+++ b/pyxform/entities/entities_parsing.py
@@ -3,7 +3,8 @@
 from pyxform import constants as const
 from pyxform.aliases import yes_no
 from pyxform.errors import PyXFormError
-from pyxform.xlsparseutils import find_sheet_misspellings, is_valid_xml_tag
+from pyxform.parsing.expression import is_xml_tag
+from pyxform.validators.pyxform.sheet_misspellings import find_sheet_misspellings
 
 EC = const.EntityColumns
 
@@ -73,7 +74,7 @@ def get_validated_dataset_name(entity):
             f"Invalid entity list name: '{dataset}'. Names may not include periods."
         )
 
-    if not is_valid_xml_tag(dataset):
+    if not is_xml_tag(dataset):
         if isinstance(dataset, bytes):
             dataset = dataset.decode("utf-8")
 
@@ -118,7 +119,7 @@ def validate_entity_saveto(
             f"{error_start} the entity property name '{save_to}' starts with reserved prefix {const.ENTITIES_RESERVED_PREFIX}."
         )
 
-    if not is_valid_xml_tag(save_to):
+    if not is_xml_tag(save_to):
         if isinstance(save_to, bytes):
             save_to = save_to.decode("utf-8")
 
diff --git a/pyxform/parsing/expression.py b/pyxform/parsing/expression.py
index af9198594..29df4fa1d 100644
--- a/pyxform/parsing/expression.py
+++ b/pyxform/parsing/expression.py
@@ -37,9 +37,9 @@ def get_expression_lexer() -> re.Scanner:
         "TIME": time_regex,
         "NUMBER": r"-?\d+\.\d*|-?\.\d+|-?\d+",
         # https://www.w3.org/TR/1999/REC-xpath-19991116/#exprlex
-        "OPS_MATH": r"[\*\+\-]|mod|div",
+        "OPS_MATH": r"[\*\+\-]| mod | div ",
         "OPS_COMP": r"\=|\!\=|\<|\>|\<=|>=",
-        "OPS_BOOL": r"and|or",
+        "OPS_BOOL": r" and | or ",
         "OPS_UNION": r"\|",
         "OPEN_PAREN": r"\(",
         "CLOSE_PAREN": r"\)",
@@ -107,3 +107,17 @@ def is_single_token_expression(expression: str, token_types: Iterable[str]) -> b
         return True
     else:
         return False
+
+
+def is_pyxform_reference(value: str) -> bool:
+    """
+    Does the input string contain only a valid Pyxform reference? e.g. ${my_question}
+    """
+    return is_single_token_expression(expression=value, token_types=("PYXFORM_REF",))
+
+
+def is_xml_tag(value: str) -> bool:
+    """
+    Does the input string contain only a valid XML tag / element name?
+    """
+    return is_single_token_expression(expression=value, token_types=("NAME",))
diff --git a/pyxform/survey_element.py b/pyxform/survey_element.py
index 504e83d0b..3468f92f9 100644
--- a/pyxform/survey_element.py
+++ b/pyxform/survey_element.py
@@ -11,6 +11,7 @@
 from pyxform import aliases as alias
 from pyxform import constants as const
 from pyxform.errors import PyXFormError
+from pyxform.parsing.expression import is_xml_tag
 from pyxform.question_type_dictionary import QUESTION_TYPE_DICT
 from pyxform.utils import (
     BRACKETED_TAG_REGEX,
@@ -19,7 +20,6 @@
     node,
 )
 from pyxform.xls2json import print_pyobj_to_json
-from pyxform.xlsparseutils import is_valid_xml_tag
 
 if TYPE_CHECKING:
     from pyxform.utils import DetachableElement
@@ -140,7 +140,7 @@ def add_children(self, children):
     SUPPORTED_MEDIA = ("image", "big-image", "audio", "video")
 
     def validate(self):
-        if not is_valid_xml_tag(self.name):
+        if not is_xml_tag(self.name):
             invalid_char = re.search(INVALID_XFORM_TAG_REGEXP, self.name)
             raise PyXFormError(
                 f"The name '{self.name}' contains an invalid character '{invalid_char.group(0)}'. Names {const.XML_IDENTIFIER_ERROR_MESSAGE}"
diff --git a/pyxform/validators/pyxform/choices.py b/pyxform/validators/pyxform/choices.py
new file mode 100644
index 000000000..c97298e75
--- /dev/null
+++ b/pyxform/validators/pyxform/choices.py
@@ -0,0 +1,59 @@
+from pyxform import constants
+from pyxform.errors import PyXFormError
+
+INVALID_NAME = (
+    "[row : {row}] On the 'choices' sheet, the 'name' value is invalid. "
+    "Choices must have a name. "
+    "Learn more: https://xlsform.org/en/#setting-up-your-worksheets"
+)
+INVALID_LABEL = (
+    "[row : {row}] On the 'choices' sheet, the 'label' value is invalid. "
+    "Choices should have a label. "
+    "Learn more: https://xlsform.org/en/#setting-up-your-worksheets"
+)
+INVALID_HEADER = (
+    "[row : 1] On the 'choices' sheet, the '{column}' value is invalid. "
+    "Column headers must not be empty and must not contain spaces. "
+    "Learn more: https://xlsform.org/en/#setting-up-your-worksheets"
+)
+INVALID_DUPLICATE = (
+    "[row : {row}] On the 'choices' sheet, the 'name' value is invalid. "
+    "Choice names must be unique for each choice list. "
+    "If this is intentional, use the setting 'allow_choice_duplicates'. "
+    "Learn more: https://xlsform.org/#choice-names."
+)
+
+
+def validate_headers(
+    headers: tuple[tuple[str, ...], ...], warnings: list[str]
+) -> list[str]:
+    def check():
+        for header in headers:
+            header = header[0]
+            if header != constants.LIST_NAME_S and (" " in header or header == ""):
+                warnings.append(INVALID_HEADER.format(column=header))
+                yield header
+
+    return list(check())
+
+
+def validate_choices(
+    options: list[dict], warnings: list[str], allow_duplicates: bool = False
+) -> None:
+    seen_options = set()
+    duplicate_errors = []
+    for option in options:
+        if "name" not in option:
+            raise PyXFormError(INVALID_NAME.format(row=option["__row"]))
+        elif "label" not in option:
+            warnings.append(INVALID_LABEL.format(row=option["__row"]))
+
+        if not allow_duplicates:
+            name = option["name"]
+            if name in seen_options:
+                duplicate_errors.append(INVALID_DUPLICATE.format(row=option["__row"]))
+            else:
+                seen_options.add(name)
+
+    if 0 < len(duplicate_errors):
+        raise PyXFormError("\n".join(duplicate_errors))
diff --git a/pyxform/validators/pyxform/question_types.py b/pyxform/validators/pyxform/question_types.py
index 7ec18edde..3c62673f9 100644
--- a/pyxform/validators/pyxform/question_types.py
+++ b/pyxform/validators/pyxform/question_types.py
@@ -3,7 +3,7 @@
 """
 
 from pyxform.errors import PyXFormError
-from pyxform.parsing.expression import is_single_token_expression
+from pyxform.parsing.expression import is_pyxform_reference
 from pyxform.utils import PYXFORM_REFERENCE_REGEX
 
 BACKGROUND_GEOPOINT_CALCULATION = "[row : {r}] For 'background-geopoint' questions, the 'calculation' column must be empty."
@@ -25,9 +25,7 @@ def validate_background_geopoint_calculation(row: dict, row_num: int) -> bool:
 
 def validate_background_geopoint_trigger(row: dict, row_num: int) -> bool:
     """A background-geopoint must have a trigger."""
-    if not row.get("trigger", False) or not is_single_token_expression(
-        expression=row["trigger"], token_types=["PYXFORM_REF"]
-    ):
+    if not row.get("trigger", False) or not is_pyxform_reference(value=row["trigger"]):
         raise PyXFormError(TRIGGER_INVALID.format(r=row_num, t=row["type"]))
     return True
 
diff --git a/pyxform/xlsparseutils.py b/pyxform/validators/pyxform/sheet_misspellings.py
similarity index 76%
rename from pyxform/xlsparseutils.py
rename to pyxform/validators/pyxform/sheet_misspellings.py
index 280f706a0..c83fef313 100644
--- a/pyxform/xlsparseutils.py
+++ b/pyxform/validators/pyxform/sheet_misspellings.py
@@ -1,14 +1,8 @@
-import re
 from collections.abc import KeysView
 
 from pyxform import constants
 from pyxform.utils import levenshtein_distance
 
-# http://www.w3.org/TR/REC-xml/
-TAG_START_CHAR = r"[a-zA-Z:_]"
-TAG_CHAR = r"[a-zA-Z:_0-9\-.]"
-XFORM_TAG_REGEXP = re.compile(rf"^{TAG_START_CHAR}{TAG_CHAR}*$")
-
 
 def find_sheet_misspellings(key: str, keys: "KeysView") -> "str | None":
     """
@@ -36,10 +30,3 @@ def find_sheet_misspellings(key: str, keys: "KeysView") -> "str | None":
         return msg
     else:
         return None
-
-
-def is_valid_xml_tag(tag):
-    """
-    Use a regex to see if there are any invalid characters (i.e. spaces).
-    """
-    return re.search(XFORM_TAG_REGEXP, tag)
diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py
index 3b260e22b..7e4e6ebe8 100644
--- a/pyxform/xls2json.py
+++ b/pyxform/xls2json.py
@@ -6,7 +6,6 @@
 import os
 import re
 import sys
-from collections import Counter
 from typing import IO, Any
 
 from pyxform import aliases, constants
@@ -21,15 +20,16 @@
     validate_entity_saveto,
 )
 from pyxform.errors import PyXFormError
-from pyxform.parsing.expression import is_single_token_expression
+from pyxform.parsing.expression import is_pyxform_reference, is_xml_tag
 from pyxform.utils import PYXFORM_REFERENCE_REGEX, coalesce, default_is_dynamic
+from pyxform.validators.pyxform import choices as vc
 from pyxform.validators.pyxform import parameters_generic, select_from_file
 from pyxform.validators.pyxform import question_types as qt
 from pyxform.validators.pyxform.android_package_name import validate_android_package_name
 from pyxform.validators.pyxform.pyxform_reference import validate_pyxform_reference_syntax
+from pyxform.validators.pyxform.sheet_misspellings import find_sheet_misspellings
 from pyxform.validators.pyxform.translations_checks import SheetTranslations
 from pyxform.xls2json_backends import csv_to_dict, xls_to_dict, xlsx_to_dict
-from pyxform.xlsparseutils import find_sheet_misspellings, is_valid_xml_tag
 
 SMART_QUOTES = {"\u2018": "'", "\u2019": "'", "\u201c": '"', "\u201d": '"'}
 RE_SMART_QUOTES = re.compile(r"|".join(re.escape(old) for old in SMART_QUOTES))
@@ -175,7 +175,12 @@ def dealias_types(dict_array):
     return dict_array
 
 
-def clean_text_values(sheet_name: str, data: list[dict], strip_whitespace: bool = False):
+def clean_text_values(
+    sheet_name: str,
+    data: list[dict],
+    strip_whitespace: bool = False,
+    add_row_number: bool = False,
+) -> list[dict]:
     """
     Go though the dict array and strips all text values.
     Also replaces multiple spaces with single spaces.
@@ -192,6 +197,8 @@ def clean_text_values(sheet_name: str, data: list[dict], strip_whitespace: bool
                 validate_pyxform_reference_syntax(
                     value=value, sheet_name=sheet_name, row_number=row_number, key=key
                 )
+        if add_row_number:
+            row["__row"] = row_number
     return data
 
 
@@ -513,7 +520,11 @@ def workbook_to_json(
 
     # ########## Choices sheet ##########
     choices_sheet = workbook_dict.get(constants.CHOICES, [])
-    choices_sheet = clean_text_values(sheet_name=constants.CHOICES, data=choices_sheet)
+    choices_sheet = clean_text_values(
+        sheet_name=constants.CHOICES,
+        data=choices_sheet,
+        add_row_number=True,
+    )
     choices_sheet = dealias_and_group_headers(
         dict_array=choices_sheet,
         header_aliases=aliases.list_header,
@@ -523,73 +534,27 @@ def workbook_to_json(
     choices = group_dictionaries_by_key(
         list_of_dicts=choices_sheet.data, key=constants.LIST_NAME_S
     )
-    if 0 < len(choices):
-        json_dict[constants.CHOICES] = choices
     # To combine the warning into one message, the check for missing choices translation
     # columns is run with Survey sheet below.
 
-    # Make sure all the options have the required properties:
-    warnedabout = set()
-    for list_name, options in choices.items():
+    # Warn and remove invalid headers in case the form uses headers for notes.
+    invalid_headers = vc.validate_headers(choices_sheet.headers, warnings)
+    allow_duplicates = aliases.yes_no.get(
+        settings.get("allow_choice_duplicates", False), False
+    )
+    for options in choices.values():
+        vc.validate_choices(
+            options=options,
+            warnings=warnings,
+            allow_duplicates=allow_duplicates,
+        )
         for option in options:
-            if "name" not in option:
-                info = "[list_name : " + list_name + "]"
-                raise PyXFormError(
-                    "On the choices sheet there is a option with no name. " + info
-                )
-            if "label" not in option:
-                info = "[list_name : " + list_name + "]"
-                warnings.append(
-                    "On the choices sheet there is a option with no label. " + info
-                )
-            # chrislrobert's fix for a cryptic error message:
-            # see: https://code.google.com/p/opendatakit/issues/detail?id=833&start=200
-            option_keys = list(option.keys())
-            for headername in option_keys:
-                # Using warnings and removing the bad columns
-                # instead of throwing errors because some forms
-                # use choices column headers for notes.
-                if " " in headername:
-                    if headername not in warnedabout:
-                        warnedabout.add(headername)
-                        warnings.append(
-                            "On the choices sheet there is "
-                            + 'a column ("'
-                            + headername
-                            + '") with an illegal header. '
-                            + "Headers cannot include spaces."
-                        )
-                    del option[headername]
-                elif headername == "":
-                    warnings.append(
-                        "On the choices sheet there is a value"
-                        + " in a column with no header."
-                    )
-                    del option[headername]
-        list_name_choices = [option.get("name") for option in options]
-        if len(list_name_choices) != len(set(list_name_choices)):
-            duplicate_setting = settings.get("allow_choice_duplicates")
-            for v in Counter(list_name_choices).values():
-                if v > 1:
-                    if not duplicate_setting or duplicate_setting.capitalize() != "Yes":
-                        choice_duplicates = [
-                            item
-                            for item, count in Counter(list_name_choices).items()
-                            if count > 1
-                        ]
-
-                        if choice_duplicates:
-                            raise PyXFormError(
-                                "The name column for the '{}' choice list contains these duplicates: {}. Duplicate names "
-                                "will be impossible to identify in analysis unless a previous value in a cascading "
-                                "select differentiates them. If this is intentional, you can set the "
-                                "allow_choice_duplicates setting to 'yes'. Learn more: https://xlsform.org/#choice-names.".format(
-                                    list_name,
-                                    ", ".join(
-                                        [f"'{dupe}'" for dupe in choice_duplicates]
-                                    ),
-                                )
-                            )
+            for invalid_header in invalid_headers:
+                option.pop(invalid_header, None)
+            del option["__row"]
+
+    if 0 < len(choices):
+        json_dict[constants.CHOICES] = choices
 
     # ########## Entities sheet ###########
     entities_sheet = workbook_dict.get(constants.ENTITIES, [])
@@ -945,7 +910,7 @@ def workbook_to_json(
                     ROW_FORMAT_STRING % row_number + " Question or group with no name."
                 )
         question_name = str(row[constants.NAME])
-        if not is_valid_xml_tag(question_name):
+        if not is_xml_tag(question_name):
             if isinstance(question_name, bytes):
                 question_name = question_name.decode("utf-8")
 
@@ -1022,10 +987,7 @@ def workbook_to_json(
                 repeat_count_expression = new_json_dict.get("control", {}).get("jr:count")
                 if repeat_count_expression:
                     # Simple expressions don't require a new node, they can reference directly.
-                    simple_expression = is_single_token_expression(
-                        expression=repeat_count_expression, token_types=["PYXFORM_REF"]
-                    )
-                    if not simple_expression:
+                    if not is_pyxform_reference(value=repeat_count_expression):
                         generated_node_name = new_json_dict["name"] + "_count"
                         parent_children_array.append(
                             {
diff --git a/tests/parsing/__init__.py b/tests/parsing/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/parsing/test_expression.py b/tests/parsing/test_expression.py
new file mode 100644
index 000000000..1fe3ad42c
--- /dev/null
+++ b/tests/parsing/test_expression.py
@@ -0,0 +1,49 @@
+from pyxform.parsing.expression import is_xml_tag
+
+from tests.pyxform_test_case import PyxformTestCase
+
+positive = [
+    ("A", "Single uppercase letter"),
+    ("ab", "Lowercase letters"),
+    ("_u", "Leading underscore"),
+    ("A12", "Leading uppercase letter with digit"),
+    ("A-1.23", "Leading uppercase letter with hyphen, period, and digit"),
+    ("Name123-456", "Mixed case, digits, hyphen"),
+    ("𐐀n", "Leading unicode"),
+    ("Αλ", "Following unicode"),
+    ("name:name", "NCName, colon, NCName"),
+    ("name_with_colon:_and_extras", "NCName, colon, NCName (non-letter characters)"),
+    # Other special character tokens are excluded by ncname_regex.
+    ("nameor", "Contains another parser token (or)"),
+    ("nameand", "Contains another parser token (and)"),
+    ("namemod", "Contains another parser token (mod)"),
+    ("namediv", "Contains another parser token (div)"),
+]
+
+negative = [
+    ("", "Empty string"),
+    (" ", "Space"),
+    ("123name", "Leading digit"),
+    ("-name", "Leading hyphen"),
+    (".name", "Leading period"),
+    (":name", "Leading colon"),
+    ("name$", "Invalid character ($)"),
+    ("name with space", "Invalid character (space)"),
+    ("na@me", "Invalid character (@)"),
+    ("na#me", "Invalid character (#)"),
+    ("name:name:name", "Invalid structure (multiple colons)"),
+]
+
+
+class TestExpression(PyxformTestCase):
+    def test_is_xml_tag__positive(self):
+        """Should accept positive match cases i.e. valid xml tag names."""
+        for case, description in positive:
+            with self.subTest(case=case, description=description):
+                self.assertTrue(is_xml_tag(case))
+
+    def test_is_xml_tag__negative(self):
+        """Should reject negative match cases i.e. invalid xml tag names."""
+        for case, description in negative:
+            with self.subTest(case=case, description=description):
+                self.assertFalse(is_xml_tag(case))
diff --git a/tests/test_fields.py b/tests/test_fields.py
index 6c44fc9eb..555915352 100644
--- a/tests/test_fields.py
+++ b/tests/test_fields.py
@@ -2,6 +2,8 @@
 Test duplicate survey question field name.
 """
 
+from pyxform.validators.pyxform import choices as vc
+
 from tests.pyxform_test_case import PyxformTestCase
 from tests.xpath_helpers.choices import xpc
 from tests.xpath_helpers.questions import xpq
@@ -57,9 +59,7 @@ def test_duplicate_choices_without_setting(self):
             |         | list            | b        | option c |
             """,
             errored=True,
-            error__contains=[
-                "The name column for the 'list' choice list contains these duplicates: 'b'"
-            ],
+            error__contains=[vc.INVALID_DUPLICATE.format(row=4)],
         )
 
     def test_multiple_duplicate_choices_without_setting(self):
@@ -77,8 +77,10 @@ def test_multiple_duplicate_choices_without_setting(self):
             """,
             errored=True,
             error__contains=[
-                "The name column for the 'list' choice list contains these duplicates: 'a', 'b'"
+                vc.INVALID_DUPLICATE.format(row=3),
+                vc.INVALID_DUPLICATE.format(row=5),
             ],
+            debug=True,
         )
 
     def test_duplicate_choices_with_setting_not_set_to_yes(self):
@@ -97,9 +99,7 @@ def test_duplicate_choices_with_setting_not_set_to_yes(self):
             |          | Duplicates   | Bob                       |
             """,
             errored=True,
-            error__contains=[
-                "The name column for the 'list' choice list contains these duplicates: 'b'"
-            ],
+            error__contains=[vc.INVALID_DUPLICATE.format(row=4)],
         )
 
     def test_duplicate_choices_with_allow_choice_duplicates_setting(self):
diff --git a/tests/test_sheet_columns.py b/tests/test_sheet_columns.py
index a5b85a42f..3a7e708d1 100644
--- a/tests/test_sheet_columns.py
+++ b/tests/test_sheet_columns.py
@@ -2,6 +2,8 @@
 Test XLSForm sheet names.
 """
 
+from pyxform.validators.pyxform import choices as vc
+
 from tests.pyxform_test_case import PyxformTestCase
 from tests.utils import prep_for_xml_contains
 from tests.xpath_helpers.choices import xpc
@@ -156,7 +158,7 @@ def test_invalid_choices_sheet_fails(self):
                 ]
             ),
             errored=True,
-            error__contains=["option with no name"],
+            error__contains=[vc.INVALID_NAME.format(row=2)],
         )
 
     def test_missing_list_name(self):
diff --git a/tests/test_xlsform_spec.py b/tests/test_xlsform_spec.py
index c3442fae2..891c591b5 100644
--- a/tests/test_xlsform_spec.py
+++ b/tests/test_xlsform_spec.py
@@ -1,3 +1,5 @@
+from pyxform.validators.pyxform import choices as vc
+
 from tests.pyxform_test_case import PyxformTestCase
 
 
@@ -61,10 +63,10 @@ def test_warnings__count(self):
         )
         self.maxDiff = 2000
         expected = [
-            "On the choices sheet there is a option with no label. [list_name : a_b]",
-            "On the choices sheet there is a option with no label. [list_name : a_b]",
-            "On the choices sheet there is a option with no label. [list_name : animals]",
-            "On the choices sheet there is a option with no label. [list_name : animals]",
+            vc.INVALID_LABEL.format(row=4),
+            vc.INVALID_LABEL.format(row=5),
+            vc.INVALID_LABEL.format(row=6),
+            vc.INVALID_LABEL.format(row=7),
             "[row : 9] Repeat has no label: {'name': 'repeat_test', 'type': 'begin repeat'}",
             "[row : 10] Group has no label: {'name': 'group_test', 'type': 'begin group'}",
             "[row : 17] Group has no label: {'name': 'name', 'type': 'begin group'}",
diff --git a/tests/xform_test_case/test_bugs.py b/tests/xform_test_case/test_bugs.py
index c022a8121..c5db20d1f 100644
--- a/tests/xform_test_case/test_bugs.py
+++ b/tests/xform_test_case/test_bugs.py
@@ -10,6 +10,7 @@
 from pyxform.errors import PyXFormError
 from pyxform.utils import has_external_choices
 from pyxform.validators.odk_validate import ODKValidateError, check_xform
+from pyxform.validators.pyxform import choices as vc
 from pyxform.xls2json import SurveyReader, parse_file_to_workbook_dict
 from pyxform.xls2json_backends import xlsx_to_dict
 from pyxform.xls2xform import convert
@@ -79,7 +80,11 @@ def test_conversion(self):
         )
         # The "column with no header" warning is probably not reachable since XLS/X
         # pre-processing ignores any columns without a header.
-        observed = [w for w in warnings if "Headers cannot include spaces" in w]
+        observed = [
+            w
+            for w in warnings
+            if w == vc.INVALID_HEADER.format(column="header with spaces")
+        ]
         self.assertEqual(1, len(observed), warnings)
 
     def test_values_with_spaces_are_cleaned(self):