Merge pull request #697 from lindsay-stevens/pyxform-694

694: search and pulldata secondary instance conflict
XLSForm · Feb 28, 2024 · e57a3fd · e57a3fd
2 parents a7027ea + e008ffd
commit e57a3fd
Show file tree

Hide file tree

Showing 20 changed files with 943 additions and 408 deletions.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -8,6 +8,6 @@ Closes #
 
 #### Before submitting this PR, please make sure you have:
 - [ ] included test cases for core behavior and edge cases in `tests`
-- [ ] run `nosetests` and verified all tests pass
-- [ ] run `black pyxform tests` to format code
+- [ ] run `python -m unittest` and verified all tests pass
+- [ ] run `ruff format pyxform tests` and `ruff check pyxform tests` to lint code
 - [ ] verified that any code or assets from external sources are properly credited in comments
diff --git a/pyxform/aliases.py b/pyxform/aliases.py
@@ -73,7 +73,7 @@
     "SMS Response": constants.SMS_RESPONSE,
     "compact_tag": "instance::odk:tag",  # used for compact representation
     "Type": "type",
-    "List_name": "list_name",
+    "List_name": constants.LIST_NAME_U,
     # u"repeat_count": u"jr:count",  duplicate key
     "read_only": "bind::readonly",
     "readonly": "bind::readonly",
@@ -111,7 +111,7 @@
     constants.ENTITIES_SAVETO: "bind::entities:saveto",
 }
 
-entities_header = {"list_name": "dataset"}
+entities_header = {constants.LIST_NAME_U: "dataset"}
 
 # Key is the pyxform internal name, Value is the name used in error/warning messages.
 TRANSLATABLE_SURVEY_COLUMNS = {
@@ -135,7 +135,7 @@
 }
 list_header = {
     "caption": constants.LABEL,
-    "list_name": constants.LIST_NAME,
+    constants.LIST_NAME_U: constants.LIST_NAME_S,
     "value": constants.NAME,
     "image": "media::image",
     "big-image": "media::big-image",

diff --git a/pyxform/constants.py b/pyxform/constants.py
@@ -67,7 +67,8 @@
 CHOICES = "choices"
 
 # XLS Specific constants
-LIST_NAME = "list name"
+LIST_NAME_S = "list name"
+LIST_NAME_U = "list_name"
 CASCADING_SELECT = "cascading_select"
 TABLE_LIST = "table-list"  # hyphenated because it goes in appearance, and convention for appearance column is dashes
 FIELD_LIST = "field-list"
@@ -155,3 +156,12 @@ class EntityColumns(StrEnum):
     "constraint",
     "calculate",
 )
+NSMAP = {
+    "xmlns": "http://www.w3.org/2002/xforms",
+    "xmlns:h": "http://www.w3.org/1999/xhtml",
+    "xmlns:ev": "http://www.w3.org/2001/xml-events",
+    "xmlns:xsd": "http://www.w3.org/2001/XMLSchema",
+    "xmlns:jr": "http://openrosa.org/javarosa",
+    "xmlns:orx": "http://openrosa.org/xforms",
+    "xmlns:odk": "http://www.opendatakit.org/xforms",
+}
diff --git a/pyxform/survey.py b/pyxform/survey.py
@@ -11,7 +11,7 @@
 from typing import Generator, Iterator, List, Optional, Tuple
 
 from pyxform import aliases, constants
-from pyxform.constants import EXTERNAL_INSTANCE_EXTENSIONS
+from pyxform.constants import EXTERNAL_INSTANCE_EXTENSIONS, NSMAP
 from pyxform.errors import PyXFormError, ValidationError
 from pyxform.external_instance import ExternalInstance
 from pyxform.instance import SurveyInstance
@@ -23,7 +23,6 @@
     BRACKETED_TAG_REGEX,
     LAST_SAVED_INSTANCE_NAME,
     LAST_SAVED_REGEX,
-    NSMAP,
     DetachableElement,
     PatchedText,
     get_languages_with_bad_tags,
@@ -40,7 +39,7 @@
     r"(instance\(.*\)\/root\/item\[.*?(\$\{.*\})\]\/.*?)\s"
 )
 RE_PULLDATA = re.compile(r"(pulldata\s*\(\s*)(.*?),")
-SEARCH_APPEARANCE_REGEX = re.compile(r"search\(.*?\)")
+SEARCH_FUNCTION_REGEX = re.compile(r"search\(.*?\)")
 
 
 class InstanceInfo:
@@ -216,7 +215,7 @@ class Survey(Section):
             "namespaces": str,
             constants.ENTITY_FEATURES: list,
         }
-    )  # yapf: disable
+    )
 
     def validate(self):
         if self.id_string in [None, "None"]:
@@ -385,7 +384,7 @@ def _validate_external_instances(instances) -> None:
         errors = []
         for element, copies in seen.items():
             if len(copies) > 1:
-                contexts = ", ".join(x.context for x in copies)
+                contexts = ", ".join(f"{x.context}({x.type})" for x in copies)
                 errors.append(
                     "Instance names must be unique within a form. "
                     f"The name '{element}' was found {len(copies)} time(s), "
@@ -655,34 +654,44 @@ def _add_to_nested_dict(self, dicty, path, value):
             dicty[path[0]] = {}
         self._add_to_nested_dict(dicty[path[0]], path[1:], value)
 
-    @staticmethod
-    def _redirect_is_search_itext(element: SurveyElement) -> None:
+    def _redirect_is_search_itext(self, element: SurveyElement) -> bool:
         """
-        For selects using the "search()" appearance, redirect itext for in-line items.
+        For selects using the "search()" function, redirect itext for in-line items.
 
-        External selects from a "search" appearance alone don't work in Enketo. In Collect
+        External selects from a "search" function alone don't work in Enketo. In Collect
         they must have the "item" elements in the body, rather than in an "itemset".
 
         The "itemset" reference is cleared below, so that the element will get in-line
         items instead of an itemset reference to a secondary instance. The itext ref is
         passed to the options/choices so they can use the generated translations. This
-        accounts for questions with and without a "search()" appearance sharing choices.
+        accounts for questions with and without a "search()" function sharing choices.
 
         :param element: A select type question.
-        :return: None, the question/children are modified in-place.
+        :return: If True, the element uses the search function.
         """
         try:
             is_search = bool(
-                SEARCH_APPEARANCE_REGEX.search(
+                SEARCH_FUNCTION_REGEX.search(
                     element[constants.CONTROL][constants.APPEARANCE]
                 )
             )
         except (KeyError, TypeError):
             is_search = False
         if is_search:
+            file_id, ext = os.path.splitext(element[constants.ITEMSET])
+            if ext in EXTERNAL_INSTANCE_EXTENSIONS:
+                msg = (
+                    f"Question '{element[constants.NAME]}' is a select from file type, "
+                    "using 'search()'. This combination is not supported. "
+                    "Remove the 'search()' usage, or change the select type."
+                )
+                raise PyXFormError(msg)
+            itemset = element[constants.ITEMSET]
+            self.choices.pop(itemset, None)
             element[constants.ITEMSET] = ""
             for i, opt in enumerate(element.get(constants.CHILDREN, [])):
-                opt["_choice_itext_id"] = f"{element['list_name']}-{i}"
+                opt["_choice_itext_id"] = f"{element[constants.LIST_NAME_U]}-{i}"
+        return is_search
 
     def _setup_translations(self):
         """
@@ -745,6 +754,8 @@ def get_choices():
             self._add_to_nested_dict(self._translations, path, leaf_value)
 
         select_types = set(aliases.select.keys())
+        search_lists = set()
+        non_search_lists = set()
         for element in self.iter_descendants():
             itemset = element.get("itemset")
             if itemset is not None:
@@ -753,7 +764,12 @@ def get_choices():
                 element._itemset_dyn_label = itemset in itemsets_has_dyn_label
 
             if element[constants.TYPE] in select_types:
-                self._redirect_is_search_itext(element=element)
+                select_ref = (element[constants.NAME], element[constants.LIST_NAME_U])
+                if self._redirect_is_search_itext(element=element):
+                    search_lists.add(select_ref)
+                else:
+                    non_search_lists.add(select_ref)
+
             # Skip creation of translations for choices in selects. The creation of these
             # translations is done above in this function.
             parent = element.get("parent")
@@ -780,6 +796,20 @@ def get_choices():
                         }
                     )
 
+        for q_name, list_name in search_lists:
+            choice_refs = [f"'{q}'" for q, c in non_search_lists if c == list_name]
+            if len(choice_refs) > 0:
+                refs_str = ", ".join(choice_refs)
+                msg = (
+                    f"Question '{q_name}' uses 'search()', and its select type references"
+                    f" the choice list name '{list_name}'. This choice list name is "
+                    f"referenced by at least one other question that is not using "
+                    f"'search()', which will not work: {refs_str}. Either 1) use "
+                    f"'search()' for all questions using this choice list name, or 2) "
+                    f"use a different choice list name for the question using 'search()'."
+                )
+                raise PyXFormError(msg)
+
     def _add_empty_translations(self):
         """
         Adds translations so that every itext element has the same elements across every

diff --git a/pyxform/survey_element.py b/pyxform/survey_element.py
@@ -51,7 +51,7 @@
     "autoplay": str,
     "flat": lambda: False,
     "action": str,
-    "list_name": str,
+    const.LIST_NAME_U: str,
     "trigger": str,
 }
 

diff --git a/pyxform/utils.py b/pyxform/utils.py
@@ -29,17 +29,6 @@
 NODE_TYPE_TEXT = (Node.TEXT_NODE, Node.CDATA_SECTION_NODE)
 
 
-NSMAP = {
-    "xmlns": "http://www.w3.org/2002/xforms",
-    "xmlns:h": "http://www.w3.org/1999/xhtml",
-    "xmlns:ev": "http://www.w3.org/2001/xml-events",
-    "xmlns:xsd": "http://www.w3.org/2001/XMLSchema",
-    "xmlns:jr": "http://openrosa.org/javarosa",
-    "xmlns:orx": "http://openrosa.org/xforms",
-    "xmlns:odk": "http://www.opendatakit.org/xforms",
-}
-
-
 class DetachableElement(Element):
     """
     Element classes are not meant to be instantiated directly.   This

diff --git a/pyxform/xform2json.py b/pyxform/xform2json.py
@@ -13,8 +13,8 @@
 from defusedxml.ElementTree import ParseError, XMLParser, fromstring, parse
 
 from pyxform import builder
+from pyxform.constants import NSMAP
 from pyxform.errors import PyXFormError
-from pyxform.utils import NSMAP
 
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())

diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py
@@ -370,7 +370,7 @@ def add_choices_info_to_question(
             question["query"] = list_name
         elif choices.get(list_name):
             # Reference to list name for data dictionary tools (ilri/odktools).
-            question["list_name"] = list_name
+            question[constants.LIST_NAME_U] = list_name
             # Copy choices for data export tools (onaio/onadata).
             # TODO: could onadata use the list_name to look up the list for
             #  export, instead of pyxform internally duplicating choices data?
@@ -386,7 +386,7 @@ def add_choices_info_to_question(
         # Select from previous answers e.g. type = "select_one ${q1}".
         or bool(PYXFORM_REFERENCE_REGEX.search(list_name))
     ):
-        question["list_name"] = list_name
+        question[constants.LIST_NAME_U] = list_name
         question[constants.CHOICES] = choices[list_name]
 
 
@@ -529,7 +529,7 @@ def workbook_to_json(
         default_language=default_language,
     )
     external_choices = group_dictionaries_by_key(
-        list_of_dicts=external_choices_sheet.data, key=constants.LIST_NAME
+        list_of_dicts=external_choices_sheet.data, key=constants.LIST_NAME_S
     )
 
     # ########## Choices sheet ##########
@@ -543,7 +543,7 @@ def workbook_to_json(
         default_language=default_language,
     )
     combined_lists = group_dictionaries_by_key(
-        list_of_dicts=choices_sheet.data, key=constants.LIST_NAME
+        list_of_dicts=choices_sheet.data, key=constants.LIST_NAME_S
     )
     # To combine the warning into one message, the check for missing choices translation
     # columns is run with Survey sheet below.
@@ -654,7 +654,7 @@ def workbook_to_json(
         use_double_colons=True,
     )
     osm_tags = group_dictionaries_by_key(
-        list_of_dicts=osm_sheet.data, key=constants.LIST_NAME
+        list_of_dicts=osm_sheet.data, key=constants.LIST_NAME_S
     )
     # #################################
 
@@ -1025,13 +1025,13 @@ def workbook_to_json(
                 child_list = []
                 new_json_dict[constants.CHILDREN] = child_list
                 if control_type is constants.LOOP:
-                    if not parse_dict.get("list_name"):
+                    if not parse_dict.get(constants.LIST_NAME_U):
                         # TODO: Perhaps warn and make repeat into a group?
                         raise PyXFormError(
                             ROW_FORMAT_STRING % row_number
                             + " Repeat loop without list name."
                         )
-                    list_name = parse_dict["list_name"]
+                    list_name = parse_dict[constants.LIST_NAME_U]
                     if list_name not in choices:
                         raise PyXFormError(
                             ROW_FORMAT_STRING % row_number
@@ -1127,7 +1127,7 @@ def workbook_to_json(
                         + " select one external is only meant for"
                         " filtered selects."
                     )
-                list_name = parse_dict["list_name"]
+                list_name = parse_dict[constants.LIST_NAME_U]
                 file_extension = os.path.splitext(list_name)[1]
                 if (
                     select_type == constants.SELECT_ONE_EXTERNAL
@@ -1323,8 +1323,8 @@ def workbook_to_json(
             new_dict = row.copy()
             new_dict["type"] = constants.OSM
 
-            if parse_dict.get("list_name") is not None:
-                tags = osm_tags.get(parse_dict.get("list_name"))
+            if parse_dict.get(constants.LIST_NAME_U) is not None:
+                tags = osm_tags.get(parse_dict.get(constants.LIST_NAME_U))
                 for tag in tags:
                     if osm_tags.get(tag.get("name")):
                         tag["choices"] = osm_tags.get(tag.get("name"))

diff --git a/tests/example_xls/settings.xls b/tests/example_xls/settings.xls
diff --git a/tests/pyxform_test_case.py b/tests/pyxform_test_case.py
@@ -14,8 +14,9 @@
 # noinspection PyProtectedMember
 from lxml.etree import _Element
 from pyxform.builder import create_survey_element_from_dict
+from pyxform.constants import NSMAP
 from pyxform.errors import PyXFormError
-from pyxform.utils import NSMAP, coalesce
+from pyxform.utils import coalesce
 from pyxform.validators.odk_validate import ODKValidateError, check_xform
 from pyxform.xls2json import workbook_to_json
 
@@ -113,13 +114,17 @@ def _ss_structure_to_pyxform_survey(
     ):
         # using existing methods from the builder
         imported_survey_json = workbook_to_json(
-            workbook_dict=ss_structure, warnings=warnings
+            workbook_dict=ss_structure, form_name=name, warnings=warnings
         )
         # ideally, when all these tests are working, this would be refactored as well
         survey = create_survey_element_from_dict(imported_survey_json)
-        survey.name = coalesce(name, "data")
-        survey.title = title
-        survey.id_string = id_string
+        # Due to str(name) in workbook_to_json
+        if survey.name is None or survey.name == "None":
+            survey.name = coalesce(name, "data")
+        if survey.title is None:
+            survey.title = title
+        if survey.id_string is None:
+            survey.id_string = id_string
 
         return survey
 

diff --git a/tests/test_builder.py b/tests/test_builder.py
@@ -545,7 +545,7 @@ def test_style_column(self):
     STRIP_NS_FROM_TAG_RE = re.compile(r"\{.+\}")
 
     def test_style_not_added_to_body_if_not_present(self):
-        survey = utils.create_survey_from_fixture("settings", filetype=FIXTURE_FILETYPE)
+        survey = utils.create_survey_from_fixture("widgets", filetype=FIXTURE_FILETYPE)
         xml = survey.to_xml()
         # find the body tag
         root_elm = ETree.fromstring(xml.encode("utf-8"))

diff --git a/tests/test_custom_xml_namespaces.py b/tests/test_custom_xml_namespaces.py