Skip to content

Commit

Permalink
Primary key is now (id,entityTypeID) for context validators
Browse files Browse the repository at this point in the history
adapted test data to reflect the chagned requirements
  • Loading branch information
MohitYadav-codes committed Jun 27, 2024
1 parent 1c44e46 commit 001dadd
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 83 deletions.
68 changes: 36 additions & 32 deletions akm_tools/validation/data_context_validators.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import Dict, List, Any
from typing import Dict, List, Any, Tuple
from .global_debug_config import GlobalDebugConfig
from .custom_exceptions import IDConflictException, BaseInstanceOverwiteException, InvalidReferentIDException

Expand All @@ -21,15 +21,15 @@ def create_instance_dict(self, all_data):
# Populate the instance_dict dictionary
instance_dict = {}
for instance in all_data:
if "id" in instance:
instance_id = instance["id"]
if instance_id not in instance_dict:
# Initialize the ID key with a list containing the current instance
instance_dict[instance_id] = {"count": 1, "instances": [instance]}
if "id" in instance and "entityTypeID" in instance:
instance_key = (instance["id"], instance["entityTypeID"])
if instance_key not in instance_dict:
# Initialize the composite key with a list containing the current instance
instance_dict[instance_key] = {"count": 1, "instances": [instance]}
else:
# Append the current instance to the list and increment the count
instance_dict[instance_id]["instances"].append(instance)
instance_dict[instance_id]["count"] += 1
instance_dict[instance_key]["instances"].append(instance)
instance_dict[instance_key]["count"] += 1
return instance_dict

def _handle_error(self, exception_type, *args):
Expand All @@ -42,7 +42,7 @@ def _handle_error(self, exception_type, *args):

class ExtendedInstanceContentValidator(AllDataContextValidators):
"""
For Instances with duplicate "id", where one extends the other,
For Instances with duplicate ("id", "entityTypeID"), where one extends the other,
check if the extended Instance does not overwrite content of base instance
"""

Expand All @@ -53,12 +53,12 @@ def validate_data_contexts(self, all_data: List[Dict[str, Any]]):
valid_data = []
instance_dict = self.__class__.create_instance_dict(all_data)

# Handle instances with same ids and prepare valid_data
for instance_id, instance_content in instance_dict.items():
# Handle instances with same composite keys and prepare valid_data
for instance_key, instance_content in instance_dict.items():
if len(instance_content) > 2:
self._handle_multiple_id_conflicts(instance_content)
if instance_content["count"] == 2:
# check if the insances are not overriding , but only extending existing data.
# check if the instances are not overriding, but only extending existing data.
is_valid_extension, base_instance, extended_instance = self.__class__.check_data_is_extended_not_overwritten(
instance_content["instances"]
)
Expand Down Expand Up @@ -100,44 +100,48 @@ def __init__(self):
self.id_set = set()

def validate_data_contexts(self, all_data):
# Create a dictionary mapping IDs to data instances
id_to_instance = {instance["id"]: instance for instance in all_data if "id" in instance}

# Create a dictionary mapping IDs to their validity
id_to_validity = {id: None for id in id_to_instance}

def is_valid(id):
# If the ID is not in the dictionary, it's invalid
if id not in id_to_instance:
# Create a dictionary mapping composite keys to data instances
id_to_instance = {
(instance["id"], instance["entityTypeID"]): instance
for instance in all_data if "id" in instance and "entityTypeID" in instance
}

# Create a dictionary mapping composite keys to their validity
id_to_validity = {key: None for key in id_to_instance}

def is_valid(key: Tuple[Any, Any]):
# If the composite key is not in the dictionary, it's invalid
if key not in id_to_instance:
return False

# If the validity has already been determined, return it
if id_to_validity[id] is not None:
return id_to_validity[id]
if id_to_validity[key] is not None:
return id_to_validity[key]

# Mark the ID as being checked to handle circular references
id_to_validity[id] = False
# Mark the composite key as being checked to handle circular references
id_to_validity[key] = False

instance = id_to_instance[id]
for key, value in instance.items():
instance = id_to_instance[key]
for value in instance.values():
if (
isinstance(value, dict)
and "referentEntityTypeID" in value ## this is hard dependency to schema for akm.Reference
and "referentID" in value
):
if not is_valid(value["referentID"]):
referent_key = (value["referentID"], value["referentEntityTypeID"])
if not is_valid(referent_key):
return False

# If all references are valid, the instance is valid
id_to_validity[id] = True
id_to_validity[key] = True
return True

# Validate the references
for id in id_to_instance:
is_valid(id)
for key in id_to_instance:
is_valid(key)

# Collect the valid data
valid_data = [instance for id, instance in id_to_instance.items() if id_to_validity[id]]
valid_data = [instance for key, instance in id_to_instance.items() if id_to_validity[key]]

return valid_data

Expand Down
4 changes: 2 additions & 2 deletions akm_tools/validation/data_instance_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ def validate(self, instance: dict, **kwargs):
self.object_validators_dict[instance["entityTypeID"]].iter_errors(instance),
key=lambda e: e.path,
)
base_error_msg += "\n".join(x.message for x in additioanl_error_info)
base_error_msg += "\n"
base_error_msg += "\n".join(x.message for x in additioanl_error_info)
base_error_msg += "\n"
return False, base_error_msg
except Exception as e:
raise e
59 changes: 30 additions & 29 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ def simple_schema():
"type": {"type": "string"},
"age": {"type": "number"},
"id": {"type": "string"},
"entityTypeID": {"type": "string"},
},
"required": ["id"],
"required": ["id", "entityTypeID"],
"additionalProperties": False,
}
return schema
Expand All @@ -22,24 +23,27 @@ def simple_schema():
@pytest.fixture
def simple_data():
data = [
{"type": "John", "age": 30, "id": "unique_id_1"},
{"type": "Jane", "age": 25, "id": "unique_id_2"},
{"type": "John", "age": 30, "id": "unique_id_1", "entityTypeID": "type1"},
{"type": "Jane", "age": 25, "id": "unique_id_2", "entityTypeID": "type2"},
]
return data


@pytest.fixture
def simple_data_with_more_attributes():
data = [
{"type": "John", "age": 30, "id": "unique_id_1", "extra_attribute": "wild"},
{"type": "Jane", "age": 25, "id": "unique_id_2", "extra_attribute": "grass"},
{"type": "John", "age": 30, "id": "unique_id_1", "entityTypeID": "type1", "extra_attribute": "wild"},
{"type": "Jane", "age": 25, "id": "unique_id_2", "entityTypeID": "type2", "extra_attribute": "grass"},
]
return data


@pytest.fixture
def simple_data_without_required_attribute():
data = [{"type": "John", "age": 30}, {"type": "Jane", "age": 25}]
data = [
{"type": "John", "age": 30, "entityTypeID": "type1"},
{"type": "Jane", "age": 25, "entityTypeID": "type2"}
]
return data


Expand All @@ -58,9 +62,10 @@ def complex_schema_with_defs():
"type": "object",
"properties": {
"id": {"type": "string"},
"entityTypeID": {"type": "string"},
"definition": {"type": "string"},
},
"required": ["id"],
"required": ["id", "entityTypeID"],
},
"ObjectType1": {
"$id": "complexSchema.ObjectType1",
Expand All @@ -69,9 +74,8 @@ def complex_schema_with_defs():
"properties": {
"name": {"type": "string"},
"description": {"type": "string"},
"type": {"type": "string", "const": "ObjectType1"},
},
"required": ["name", "type"],
"required": ["name"],
"unevaluatedProperties": False,
},
"ObjectType2": {
Expand All @@ -80,9 +84,8 @@ def complex_schema_with_defs():
"allOf": [{"$ref": "complexSchema.BaseClass"}],
"properties": {
"age": {"type": "number"},
"type": {"type": "string", "const": "ObjectType2"},
},
"required": ["type"],
"required": ["age"],
"unevaluatedProperties": False,
},
},
Expand All @@ -97,27 +100,25 @@ def complex_data():
data = [
{
"id": "unique_id_1",
"entityTypeID": "type1",
"definition": "Some def1",
"name": "AttributeName",
"type": "ObjectType1",
"description": "some desc",
},
{"id": "unique_id_2", "type": "ObjectType2", "age": 10},
{"id": "unique_id_2", "entityTypeID": "type2", "age": 10},
]
return data


@pytest.fixture
def complex_data_missing_required_attributes(): ## id/type is missing.
def complex_data_missing_required_attributes(): ## id/entityTypeID is missing.
data = [
{
"definition": "Some def1",
"name": "AttributeName",
"type": "ObjectType1",
"description": "some desc",
},
{
"type": "ObjectType2",
"age": 10,
},
]
Expand All @@ -129,15 +130,15 @@ def complex_data_with_additional_attributes():
data = [
{
"id": "unique_id_1",
"entityTypeID": "typObjectType1e1",
"definition": "Some def1",
"name": "AttributeName",
"type": "ObjectType1",
"description": "some desc",
"extra_attribute": "wild",
},
{
"id": "unique_id_2",
"type": "ObjectType2",
"entityTypeID": "ObjectType2",
"age": 10,
"extra_attribute": "grass",
},
Expand All @@ -150,24 +151,24 @@ def data_with_duplicate_ids():
data = [
{
"id": "unique_id_1",
"entityTypeID": "type1",
"definition": "Some def1",
"name": "AttributeName",
"type": "ObjectType1",
"description": "some desc",
},
{
"id": "unique_id_1",
"entityTypeID": "type1",
"definition": "Some def2",
"name": "AttributeName2",
"type": "ObjectType2",
"description": "some desc2",
},
]
return data


@pytest.fixture
def scehma_with_extensions():
def schema_with_extensions():
schema = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "schema_with_extensions",
Expand All @@ -178,9 +179,9 @@ def scehma_with_extensions():
"properties": {
"id": {"type": "string"},
"description": {"type": "string"},
"entityType": {"type": "string", "const": "ObjectType3"},
"entityTypeID": {"type": "string", "const": "ObjectType3"},
},
"required": ["entityType"],
"required": ["id","entityTypeID"],
"unevaluatedProperties": False,
}
schema_extension = {
Expand All @@ -201,7 +202,7 @@ def data_with_extended_properties():
data = [
{
"id": "unique_id1",
"entityType": "ObjectType3",
"entityTypeID": "ObjectType3",
"extended_property": "any string",
}
]
Expand All @@ -213,28 +214,28 @@ def overlay_existing_data_with_addional_properties():
data = [
{
"id": "unique_id1",
"entityType": "ObjectType3",
"entityTypeID": "ObjectType3",
},
{
"id": "unique_id1",
"entityType": "ObjectType3",
"entityTypeID": "ObjectType3",
"extended_property": "any string",
},
]
return data


@pytest.fixture
def ovewrite_existing_data():
def overwrite_existing_data():
data = [
{
"id": "unique_id1",
"entityTypeID": "ObjectType3",
"description": "description for unique_id1",
"entityType": "ObjectType3",
},
{
"id": "unique_id1",
"entityType": "CHANGED",
"entityTypeID": "ObjectType3",
"description": "description CHANGED",
"extended_property": "any string",
},
Expand Down
15 changes: 9 additions & 6 deletions tests/test_AllDataContextValidators.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
import pytest
from akm_tools.validation.data_context_validators import AllDataContextValidators


def test_create_instance_dict():
"""
Test case for the create_instance_dict method of the AllDataContextValidators class.
This test checks if the create_instance_dict method correctly creates a dictionary
that maps instance id's to a dictionary containing the count of instances with that ID
and a list of the instances themselves.
that maps instance composite keys (id, entityTypeID) to a dictionary containing the count
of instances with that composite key and a list of the instances themselves.
"""
all_data = [{"id": "1a", "name": "test1"}, {"id": "2b", "name": "test2"}, {"id": "1a", "name": "test3"}]
all_data = [
{"id": "1a", "entityTypeID": "type1", "name": "test1"},
{"id": "2b", "entityTypeID": "type2", "name": "test2"},
{"id": "1a", "entityTypeID": "type1", "name": "test3"}
]
instance_dict = AllDataContextValidators.create_instance_dict(all_data)
expected_dict = {
"1a": {"count": 2, "instances": [{"id": "1a", "name": "test1"}, {"id": "1a", "name": "test3"}]},
"2b": {"count": 1, "instances": [{"id": "2b", "name": "test2"}]},
("1a", "type1"): {"count": 2, "instances": [{"id": "1a", "entityTypeID": "type1", "name": "test1"}, {"id": "1a", "entityTypeID": "type1", "name": "test3"}]},
("2b", "type2"): {"count": 1, "instances": [{"id": "2b", "entityTypeID": "type2", "name": "test2"}]},
}
assert instance_dict == expected_dict, "The instance dictionary was not created correctly."
Loading

0 comments on commit 001dadd

Please sign in to comment.