diff --git a/akm_tools/validation/data_context_validators.py b/akm_tools/validation/data_context_validators.py index fac0b5b..e864988 100644 --- a/akm_tools/validation/data_context_validators.py +++ b/akm_tools/validation/data_context_validators.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Dict, List, Any +from typing import Dict, List, Any, Tuple from .global_debug_config import GlobalDebugConfig from .custom_exceptions import IDConflictException, BaseInstanceOverwiteException, InvalidReferentIDException @@ -21,15 +21,15 @@ def create_instance_dict(self, all_data): # Populate the instance_dict dictionary instance_dict = {} for instance in all_data: - if "id" in instance: - instance_id = instance["id"] - if instance_id not in instance_dict: - # Initialize the ID key with a list containing the current instance - instance_dict[instance_id] = {"count": 1, "instances": [instance]} + if "id" in instance and "entityTypeID" in instance: + instance_key = (instance["id"], instance["entityTypeID"]) + if instance_key not in instance_dict: + # Initialize the composite key with a list containing the current instance + instance_dict[instance_key] = {"count": 1, "instances": [instance]} else: # Append the current instance to the list and increment the count - instance_dict[instance_id]["instances"].append(instance) - instance_dict[instance_id]["count"] += 1 + instance_dict[instance_key]["instances"].append(instance) + instance_dict[instance_key]["count"] += 1 return instance_dict def _handle_error(self, exception_type, *args): @@ -42,7 +42,7 @@ def _handle_error(self, exception_type, *args): class ExtendedInstanceContentValidator(AllDataContextValidators): """ - For Instances with duplicate "id", where one extends the other, + For Instances with duplicate ("id", "entityTypeID"), where one extends the other, check if the extended Instance does not overwrite content of base instance """ @@ -53,12 +53,12 @@ def validate_data_contexts(self, all_data: List[Dict[str, Any]]): valid_data = [] instance_dict = self.__class__.create_instance_dict(all_data) - # Handle instances with same ids and prepare valid_data - for instance_id, instance_content in instance_dict.items(): + # Handle instances with same composite keys and prepare valid_data + for instance_key, instance_content in instance_dict.items(): if len(instance_content) > 2: self._handle_multiple_id_conflicts(instance_content) if instance_content["count"] == 2: - # check if the insances are not overriding , but only extending existing data. + # check if the instances are not overriding, but only extending existing data. is_valid_extension, base_instance, extended_instance = self.__class__.check_data_is_extended_not_overwritten( instance_content["instances"] ) @@ -100,44 +100,48 @@ def __init__(self): self.id_set = set() def validate_data_contexts(self, all_data): - # Create a dictionary mapping IDs to data instances - id_to_instance = {instance["id"]: instance for instance in all_data if "id" in instance} - - # Create a dictionary mapping IDs to their validity - id_to_validity = {id: None for id in id_to_instance} - - def is_valid(id): - # If the ID is not in the dictionary, it's invalid - if id not in id_to_instance: + # Create a dictionary mapping composite keys to data instances + id_to_instance = { + (instance["id"], instance["entityTypeID"]): instance + for instance in all_data if "id" in instance and "entityTypeID" in instance + } + + # Create a dictionary mapping composite keys to their validity + id_to_validity = {key: None for key in id_to_instance} + + def is_valid(key: Tuple[Any, Any]): + # If the composite key is not in the dictionary, it's invalid + if key not in id_to_instance: return False # If the validity has already been determined, return it - if id_to_validity[id] is not None: - return id_to_validity[id] + if id_to_validity[key] is not None: + return id_to_validity[key] - # Mark the ID as being checked to handle circular references - id_to_validity[id] = False + # Mark the composite key as being checked to handle circular references + id_to_validity[key] = False - instance = id_to_instance[id] - for key, value in instance.items(): + instance = id_to_instance[key] + for value in instance.values(): if ( isinstance(value, dict) and "referentEntityTypeID" in value ## this is hard dependency to schema for akm.Reference and "referentID" in value ): - if not is_valid(value["referentID"]): + referent_key = (value["referentID"], value["referentEntityTypeID"]) + if not is_valid(referent_key): return False # If all references are valid, the instance is valid - id_to_validity[id] = True + id_to_validity[key] = True return True # Validate the references - for id in id_to_instance: - is_valid(id) + for key in id_to_instance: + is_valid(key) # Collect the valid data - valid_data = [instance for id, instance in id_to_instance.items() if id_to_validity[id]] + valid_data = [instance for key, instance in id_to_instance.items() if id_to_validity[key]] return valid_data diff --git a/akm_tools/validation/data_instance_validators.py b/akm_tools/validation/data_instance_validators.py index d729738..cc3f8a6 100644 --- a/akm_tools/validation/data_instance_validators.py +++ b/akm_tools/validation/data_instance_validators.py @@ -83,8 +83,8 @@ def validate(self, instance: dict, **kwargs): self.object_validators_dict[instance["entityTypeID"]].iter_errors(instance), key=lambda e: e.path, ) - base_error_msg += "\n".join(x.message for x in additioanl_error_info) - base_error_msg += "\n" + base_error_msg += "\n".join(x.message for x in additioanl_error_info) + base_error_msg += "\n" return False, base_error_msg except Exception as e: raise e diff --git a/tests/conftest.py b/tests/conftest.py index c71549e..e161503 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,8 +12,9 @@ def simple_schema(): "type": {"type": "string"}, "age": {"type": "number"}, "id": {"type": "string"}, + "entityTypeID": {"type": "string"}, }, - "required": ["id"], + "required": ["id", "entityTypeID"], "additionalProperties": False, } return schema @@ -22,8 +23,8 @@ def simple_schema(): @pytest.fixture def simple_data(): data = [ - {"type": "John", "age": 30, "id": "unique_id_1"}, - {"type": "Jane", "age": 25, "id": "unique_id_2"}, + {"type": "John", "age": 30, "id": "unique_id_1", "entityTypeID": "type1"}, + {"type": "Jane", "age": 25, "id": "unique_id_2", "entityTypeID": "type2"}, ] return data @@ -31,15 +32,18 @@ def simple_data(): @pytest.fixture def simple_data_with_more_attributes(): data = [ - {"type": "John", "age": 30, "id": "unique_id_1", "extra_attribute": "wild"}, - {"type": "Jane", "age": 25, "id": "unique_id_2", "extra_attribute": "grass"}, + {"type": "John", "age": 30, "id": "unique_id_1", "entityTypeID": "type1", "extra_attribute": "wild"}, + {"type": "Jane", "age": 25, "id": "unique_id_2", "entityTypeID": "type2", "extra_attribute": "grass"}, ] return data @pytest.fixture def simple_data_without_required_attribute(): - data = [{"type": "John", "age": 30}, {"type": "Jane", "age": 25}] + data = [ + {"type": "John", "age": 30, "entityTypeID": "type1"}, + {"type": "Jane", "age": 25, "entityTypeID": "type2"} + ] return data @@ -58,9 +62,10 @@ def complex_schema_with_defs(): "type": "object", "properties": { "id": {"type": "string"}, + "entityTypeID": {"type": "string"}, "definition": {"type": "string"}, }, - "required": ["id"], + "required": ["id", "entityTypeID"], }, "ObjectType1": { "$id": "complexSchema.ObjectType1", @@ -69,9 +74,8 @@ def complex_schema_with_defs(): "properties": { "name": {"type": "string"}, "description": {"type": "string"}, - "type": {"type": "string", "const": "ObjectType1"}, }, - "required": ["name", "type"], + "required": ["name"], "unevaluatedProperties": False, }, "ObjectType2": { @@ -80,9 +84,8 @@ def complex_schema_with_defs(): "allOf": [{"$ref": "complexSchema.BaseClass"}], "properties": { "age": {"type": "number"}, - "type": {"type": "string", "const": "ObjectType2"}, }, - "required": ["type"], + "required": ["age"], "unevaluatedProperties": False, }, }, @@ -97,27 +100,25 @@ def complex_data(): data = [ { "id": "unique_id_1", + "entityTypeID": "type1", "definition": "Some def1", "name": "AttributeName", - "type": "ObjectType1", "description": "some desc", }, - {"id": "unique_id_2", "type": "ObjectType2", "age": 10}, + {"id": "unique_id_2", "entityTypeID": "type2", "age": 10}, ] return data @pytest.fixture -def complex_data_missing_required_attributes(): ## id/type is missing. +def complex_data_missing_required_attributes(): ## id/entityTypeID is missing. data = [ { "definition": "Some def1", "name": "AttributeName", - "type": "ObjectType1", "description": "some desc", }, { - "type": "ObjectType2", "age": 10, }, ] @@ -129,15 +130,15 @@ def complex_data_with_additional_attributes(): data = [ { "id": "unique_id_1", + "entityTypeID": "typObjectType1e1", "definition": "Some def1", "name": "AttributeName", - "type": "ObjectType1", "description": "some desc", "extra_attribute": "wild", }, { "id": "unique_id_2", - "type": "ObjectType2", + "entityTypeID": "ObjectType2", "age": 10, "extra_attribute": "grass", }, @@ -150,16 +151,16 @@ def data_with_duplicate_ids(): data = [ { "id": "unique_id_1", + "entityTypeID": "type1", "definition": "Some def1", "name": "AttributeName", - "type": "ObjectType1", "description": "some desc", }, { "id": "unique_id_1", + "entityTypeID": "type1", "definition": "Some def2", "name": "AttributeName2", - "type": "ObjectType2", "description": "some desc2", }, ] @@ -167,7 +168,7 @@ def data_with_duplicate_ids(): @pytest.fixture -def scehma_with_extensions(): +def schema_with_extensions(): schema = { "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "schema_with_extensions", @@ -178,9 +179,9 @@ def scehma_with_extensions(): "properties": { "id": {"type": "string"}, "description": {"type": "string"}, - "entityType": {"type": "string", "const": "ObjectType3"}, + "entityTypeID": {"type": "string", "const": "ObjectType3"}, }, - "required": ["entityType"], + "required": ["id","entityTypeID"], "unevaluatedProperties": False, } schema_extension = { @@ -201,7 +202,7 @@ def data_with_extended_properties(): data = [ { "id": "unique_id1", - "entityType": "ObjectType3", + "entityTypeID": "ObjectType3", "extended_property": "any string", } ] @@ -213,11 +214,11 @@ def overlay_existing_data_with_addional_properties(): data = [ { "id": "unique_id1", - "entityType": "ObjectType3", + "entityTypeID": "ObjectType3", }, { "id": "unique_id1", - "entityType": "ObjectType3", + "entityTypeID": "ObjectType3", "extended_property": "any string", }, ] @@ -225,16 +226,16 @@ def overlay_existing_data_with_addional_properties(): @pytest.fixture -def ovewrite_existing_data(): +def overwrite_existing_data(): data = [ { "id": "unique_id1", + "entityTypeID": "ObjectType3", "description": "description for unique_id1", - "entityType": "ObjectType3", }, { "id": "unique_id1", - "entityType": "CHANGED", + "entityTypeID": "ObjectType3", "description": "description CHANGED", "extended_property": "any string", }, diff --git a/tests/test_AllDataContextValidators.py b/tests/test_AllDataContextValidators.py index 2b8e91a..06552ca 100644 --- a/tests/test_AllDataContextValidators.py +++ b/tests/test_AllDataContextValidators.py @@ -1,19 +1,22 @@ import pytest from akm_tools.validation.data_context_validators import AllDataContextValidators - def test_create_instance_dict(): """ Test case for the create_instance_dict method of the AllDataContextValidators class. This test checks if the create_instance_dict method correctly creates a dictionary - that maps instance id's to a dictionary containing the count of instances with that ID - and a list of the instances themselves. + that maps instance composite keys (id, entityTypeID) to a dictionary containing the count + of instances with that composite key and a list of the instances themselves. """ - all_data = [{"id": "1a", "name": "test1"}, {"id": "2b", "name": "test2"}, {"id": "1a", "name": "test3"}] + all_data = [ + {"id": "1a", "entityTypeID": "type1", "name": "test1"}, + {"id": "2b", "entityTypeID": "type2", "name": "test2"}, + {"id": "1a", "entityTypeID": "type1", "name": "test3"} + ] instance_dict = AllDataContextValidators.create_instance_dict(all_data) expected_dict = { - "1a": {"count": 2, "instances": [{"id": "1a", "name": "test1"}, {"id": "1a", "name": "test3"}]}, - "2b": {"count": 1, "instances": [{"id": "2b", "name": "test2"}]}, + ("1a", "type1"): {"count": 2, "instances": [{"id": "1a", "entityTypeID": "type1", "name": "test1"}, {"id": "1a", "entityTypeID": "type1", "name": "test3"}]}, + ("2b", "type2"): {"count": 1, "instances": [{"id": "2b", "entityTypeID": "type2", "name": "test2"}]}, } assert instance_dict == expected_dict, "The instance dictionary was not created correctly." diff --git a/tests/test_CoreJsonSchemaValidator.py b/tests/test_CoreJsonSchemaValidator.py index a84d678..2b50551 100644 --- a/tests/test_CoreJsonSchemaValidator.py +++ b/tests/test_CoreJsonSchemaValidator.py @@ -68,8 +68,8 @@ def test_complex_data_validator_with_invalid_attribute(complex_schema_with_defs, assert all(valid_data) == False -def test_complex_data_validator_with_extended_data(scehma_with_extensions, data_with_extended_properties): - schema, registry = scehma_with_extensions +def test_complex_data_validator_with_extended_data(schema_with_extensions, data_with_extended_properties): + schema, registry = schema_with_extensions complex_data_validator = CoreJsonSchemaValidator(schema=schema, extended_schema_dir=None) complex_data_validator.configure_registry(registry) valid_data = [] @@ -79,8 +79,8 @@ def test_complex_data_validator_with_extended_data(scehma_with_extensions, data_ assert all(valid_data) == True -def test_complex_data_validator_with_extended_data(scehma_with_extensions, overlay_existing_data_with_addional_properties): - schema, registry = scehma_with_extensions +def test_complex_data_validator_with_overlaid_data(schema_with_extensions, overlay_existing_data_with_addional_properties): + schema, registry = schema_with_extensions complex_data_validator = CoreJsonSchemaValidator(schema=schema, extended_schema_dir=None) complex_data_validator.configure_registry(registry) valid_data = [] @@ -88,3 +88,14 @@ def test_complex_data_validator_with_extended_data(scehma_with_extensions, overl is_valid, _ = complex_data_validator.validate(instance=instance) valid_data.append(is_valid) assert all(valid_data) == True + + +def test_complex_data_validator_with_overwritten_data(schema_with_extensions, overwrite_existing_data): + schema, registry = schema_with_extensions + complex_data_validator = CoreJsonSchemaValidator(schema=schema, extended_schema_dir=None) + complex_data_validator.configure_registry(registry) + valid_data = [] + for instance in overwrite_existing_data: + is_valid, _ = complex_data_validator.validate(instance=instance) + valid_data.append(is_valid) + assert all(valid_data) == True diff --git a/tests/test_CrossReferenceValidator.py b/tests/test_CrossReferenceValidator.py index 3abd88a..18b7a63 100644 --- a/tests/test_CrossReferenceValidator.py +++ b/tests/test_CrossReferenceValidator.py @@ -51,21 +51,22 @@ def circular_references(): def test_invalid_chain_of_references(invalid_chain_of_references): validator = CrossReferenceValidator() valid_data = validator.validate_data_contexts(invalid_chain_of_references) - assert len(valid_data) == 0, "The validator should return False for all instances" + assert len(valid_data) == 0, "The validator should return an empty list for invalid references" def test_valid_chain_of_reference(valid_chain_of_reference): validator = CrossReferenceValidator() valid_data = validator.validate_data_contexts(valid_chain_of_reference) - assert len(valid_data) == 3, "The validator should return True for valid cross-references" + assert len(valid_data) == 3, "The validator should return all instances for valid cross-references" def test_reference_not_present(reference_not_present): validator = CrossReferenceValidator() valid_data = validator.validate_data_contexts(reference_not_present) - assert len(valid_data) == 0, "The validator should return False for all instances" + assert len(valid_data) == 0, "The validator should return an empty list for missing references" def test_circular_references(circular_references): validator = CrossReferenceValidator() - assert validator.validate_data_contexts(circular_references) == [] + valid_data = validator.validate_data_contexts(circular_references) + assert len(valid_data) == 0, "The validator should return an empty list for circular references" diff --git a/tests/test_ExtendedInstanceContentValidator.py b/tests/test_ExtendedInstanceContentValidator.py index e516f11..d2a7028 100644 --- a/tests/test_ExtendedInstanceContentValidator.py +++ b/tests/test_ExtendedInstanceContentValidator.py @@ -15,30 +15,30 @@ def test_extended_data_is_used(overlay_existing_data_with_addional_properties): valid_data = validator.validate_data_contexts(overlay_existing_data_with_addional_properties) assert valid_data[0] == { "id": "unique_id1", - "entityType": "ObjectType3", + "entityTypeID": "ObjectType3", "extended_property": "any string", } assert len(validator.warning_messages) == 1 assert len(validator.error_messages) == 0 -def test_overriding_base_data_not_allowed(ovewrite_existing_data): +def test_overriding_base_data_not_allowed(overwrite_existing_data): validator = ExtendedInstanceContentValidator() - valid_data = validator.validate_data_contexts(ovewrite_existing_data) + valid_data = validator.validate_data_contexts(overwrite_existing_data) assert valid_data[0] == { "id": "unique_id1", + "entityTypeID": "ObjectType3", "description": "description for unique_id1", - "entityType": "ObjectType3", } assert len(validator.warning_messages) == 0 assert len(validator.error_messages) == 1 -def test_overriding_base_data_in_debug_mode_raises_exception(ovewrite_existing_data): +def test_overriding_base_data_in_debug_mode_raises_exception(overwrite_existing_data): GlobalDebugConfig.set_debug_mode() validator = ExtendedInstanceContentValidator() try: - valid_data = validator.validate_data_contexts(ovewrite_existing_data) + valid_data = validator.validate_data_contexts(overwrite_existing_data) pytest.fail("BaseInstanceOverwiteException was not raised when expected.") except BaseInstanceOverwiteException as e: assert True