Skip to content

Commit

Permalink
deprecates skip_complex_types Pydantic config, updates trace contract
Browse files Browse the repository at this point in the history
  • Loading branch information
rudolfix committed Sep 10, 2024
1 parent 98f9ad7 commit 249725a
Show file tree
Hide file tree
Showing 9 changed files with 79 additions and 36 deletions.
22 changes: 16 additions & 6 deletions dlt/common/libs/pydantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
is_subclass,
is_union_type,
)
from dlt.common.warnings import Dlt100DeprecationWarning

try:
from pydantic import BaseModel, ValidationError, Json, create_model
Expand Down Expand Up @@ -69,11 +70,12 @@ class DltConfig(TypedDict, total=False):
>>> class ItemModel(BaseModel):
>>> b: bool
>>> nested: Dict[str, Any]
>>> dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True}
>>> dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}
"""

skip_complex_types: bool
skip_nested_types: bool
"""If True, columns of complex types (`dict`, `list`, `BaseModel`) will be excluded from dlt schema generated from the model"""
skip_complex_types: bool # deprecated


def pydantic_to_table_schema_columns(
Expand All @@ -90,9 +92,17 @@ def pydantic_to_table_schema_columns(
Returns:
TTableSchemaColumns: table schema columns dict
"""
skip_complex_types = False
skip_nested_types = False
if hasattr(model, "dlt_config"):
skip_complex_types = model.dlt_config.get("skip_complex_types", False)
if "skip_complex_types" in model.dlt_config:
warnings.warn(
"`skip_complex_types` is deprecated, use `skip_nested_types` instead.",
Dlt100DeprecationWarning,
stacklevel=2,
)
skip_nested_types = model.dlt_config["skip_complex_types"]
else:
skip_nested_types = model.dlt_config.get("skip_nested_types", False)

result: TTableSchemaColumns = {}

Expand Down Expand Up @@ -136,7 +146,7 @@ def pydantic_to_table_schema_columns(
# try to coerce unknown type to text
data_type = "text"

if is_inner_type_pydantic_model and not skip_complex_types:
if is_inner_type_pydantic_model and not skip_nested_types:
result[name] = {
"name": name,
"data_type": "json",
Expand All @@ -154,7 +164,7 @@ def pydantic_to_table_schema_columns(
**hints,
"name": snake_case_naming_convention.make_path(name, hints["name"]),
}
elif data_type == "json" and skip_complex_types:
elif data_type == "json" and skip_nested_types:
continue
else:
result[name] = {
Expand Down
4 changes: 2 additions & 2 deletions docs/website/docs/general-usage/resource.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,14 @@ from typing import ClassVar
from dlt.common.libs.pydantic import DltConfig

class UserWithNesting(User):
dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True}
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}

@dlt.resource(name="user", columns=UserWithNesting)
def get_users():
...
```

`"skip_complex_types"` omits any `dict`/`list`/`BaseModel` type fields from the schema, so dlt will fall back on the default
`"skip_nested_types"` omits any `dict`/`list`/`BaseModel` type fields from the schema, so dlt will fall back on the default
behavior of creating child tables for these fields.

We do not support `RootModel` that validate simple types. You can add such a validator yourself, see [data filtering section](#filter-transform-and-pivot-data).
Expand Down
2 changes: 1 addition & 1 deletion tests/common/normalizers/test_json_relational.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def test_preserve_json_value_with_hint(norm: RelationalNormalizer) -> None:
assert "value__json" not in flattened_row


def test_child_table_linking(norm: RelationalNormalizer) -> None:
def test_nested_table_linking(norm: RelationalNormalizer) -> None:
row = {"f": [{"l": ["a", "b", "c"], "v": 120, "o": [{"a": 1}, {"a": 2}]}]}
# request _dlt_root_id propagation
add_dlt_root_id_propagation(norm)
Expand Down
2 changes: 1 addition & 1 deletion tests/libs/test_deltalake.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def arrow_data( # type: ignore[return]
assert dt.to_pyarrow_table().shape == (arrow_table.num_rows, arrow_table.num_columns)

# the previous table version should still exist
dt.load_version(1)
dt.load_as_version(1)
assert dt.to_pyarrow_table().shape == (arrow_table.num_rows * 2, arrow_table.num_columns)

# `merge` should resolve to `append` bevavior
Expand Down
30 changes: 16 additions & 14 deletions tests/libs/test_pydantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ class User(BaseModel):
final_location: Final[Annotated[Union[str, int], None]] # type: ignore[misc]
final_optional: Final[Annotated[Optional[str], None]] # type: ignore[misc]

dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True}
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}


USER_INSTANCE_DATA = dict(
Expand Down Expand Up @@ -260,9 +260,9 @@ def test_pydantic_model_to_columns_annotated() -> None:
assert schema_from_user_class["final_optional"]["nullable"] is True


def test_pydantic_model_skip_complex_types() -> None:
def test_pydantic_model_skip_nested_types() -> None:
class SkipNestedModel(Model):
dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True}
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}

result = pydantic_to_table_schema_columns(SkipNestedModel)

Expand Down Expand Up @@ -393,7 +393,7 @@ class UserPipe(BaseModel):
final_location: Final[Annotated[Union[str, int], None]] # type: ignore[misc, syntax, unused-ignore]
final_optional: Final[Annotated[str | None, None]] # type: ignore[misc, syntax, unused-ignore]

dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True}
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}

# TODO: move to separate test
model_freeze = apply_schema_contract_to_model(UserPipe, "evolve", "freeze")
Expand Down Expand Up @@ -426,7 +426,7 @@ def test_item_list_validation() -> None:
class ItemModel(BaseModel):
b: bool
opt: Optional[int] = None
dlt_config: ClassVar[DltConfig] = {"skip_complex_types": False}
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": False}

# non validating items removed from the list (both extra and declared)
discard_model = apply_schema_contract_to_model(ItemModel, "discard_row", "discard_row")
Expand Down Expand Up @@ -563,7 +563,7 @@ class ItemModel(BaseModel):
def test_item_validation() -> None:
class ItemModel(BaseModel):
b: bool
dlt_config: ClassVar[DltConfig] = {"skip_complex_types": False}
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": False}

# non validating items removed from the list (both extra and declared)
discard_model = apply_schema_contract_to_model(ItemModel, "discard_row", "discard_row")
Expand Down Expand Up @@ -648,9 +648,10 @@ class Parent(BaseModel):
optional_parent_attribute: Optional[str] = None


def test_pydantic_model_flattened_when_skip_complex_types_is_true():
@pytest.mark.parametrize("config_attr", ("skip_nested_types", "skip_complex_types"))
def test_pydantic_model_flattened_when_skip_nested_types_is_true(config_attr: str):
class MyParent(Parent):
dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True}
dlt_config: ClassVar[DltConfig] = {config_attr: True} # type: ignore

schema = pydantic_to_table_schema_columns(MyParent)

Expand All @@ -673,10 +674,11 @@ class MyParent(Parent):
}


def test_considers_model_as_complex_when_skip_complex_types_is_false():
@pytest.mark.parametrize("config_attr", ("skip_nested_types", "skip_complex_types"))
def test_considers_model_as_complex_when_skip_nested_types_is_false(config_attr: str):
class MyParent(Parent):
data_dictionary: Dict[str, Any] = None
dlt_config: ClassVar[DltConfig] = {"skip_complex_types": False}
dlt_config: ClassVar[DltConfig] = {config_attr: False} # type: ignore

schema = pydantic_to_table_schema_columns(MyParent)

Expand All @@ -691,11 +693,11 @@ class MyParent(Parent):
}


def test_considers_dictionary_as_complex_when_skip_complex_types_is_false():
def test_considers_dictionary_as_complex_when_skip_nested_types_is_false():
class MyParent(Parent):
data_list: List[str] = []
data_dictionary: Dict[str, Any] = None
dlt_config: ClassVar[DltConfig] = {"skip_complex_types": False}
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": False}

schema = pydantic_to_table_schema_columns(MyParent)

Expand All @@ -712,11 +714,11 @@ class MyParent(Parent):
}


def test_skip_json_types_when_skip_complex_types_is_true_and_field_is_not_pydantic_model():
def test_skip_json_types_when_skip_nested_types_is_true_and_field_is_not_pydantic_model():
class MyParent(Parent):
data_list: List[str] = []
data_dictionary: Dict[str, Any] = None
dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True}
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}

schema = pydantic_to_table_schema_columns(MyParent)

Expand Down
4 changes: 2 additions & 2 deletions tests/load/pipeline/test_duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ class EventDetail(BaseModel):
is_complete: bool

class EventV1(BaseModel):
dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True}
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}

ver: int
id: str # noqa
Expand Down Expand Up @@ -184,7 +184,7 @@ class EventDetailV2(BaseModel):
time: Optional[datetime]

class EventV2(BaseModel):
dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True}
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}

ver: int
id: str # noqa
Expand Down
32 changes: 31 additions & 1 deletion tests/pipeline/cases/contracts/trace.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -636,9 +636,21 @@ tables:
data_type:
data_type: text
nullable: true
precision:
data_type: bigint
nullable: true
scale:
data_type: bigint
nullable: true
timezone:
data_type: bool
nullable: true
nullable:
data_type: bool
nullable: true
variant:
data_type: bool
nullable: true
primary_key:
data_type: bool
nullable: true
Expand Down Expand Up @@ -666,7 +678,25 @@ tables:
unique:
data_type: bool
nullable: true
foreign_key:
row_key:
data_type: bool
nullable: true
parent_key:
data_type: bool
nullable: true
root_key:
data_type: bool
nullable: true
merge_key:
data_type: bool
nullable: true
partition:
data_type: bool
nullable: true
cluster:
data_type: bool
nullable: true
sort:
data_type: bool
nullable: true
parent: trace__steps__step_info__load_packages__tables
Expand Down
5 changes: 3 additions & 2 deletions tests/pipeline/test_dlt_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from dlt.common.schema.typing import (
LOADS_TABLE_NAME,
PIPELINE_STATE_TABLE_NAME,
SCHEMA_ENGINE_VERSION,
VERSION_TABLE_NAME,
TStoredSchema,
)
Expand Down Expand Up @@ -274,7 +275,7 @@ def assert_github_pipeline_end_state(
pipeline.sync_destination()
# print(pipeline.working_dir)
# we have updated schema
assert pipeline.default_schema.ENGINE_VERSION == 9
assert pipeline.default_schema.ENGINE_VERSION == SCHEMA_ENGINE_VERSION
# make sure that schema hash retrieved from the destination is exactly the same as the schema hash that was in storage before the schema was wiped
assert pipeline.default_schema.stored_version_hash == orig_schema["version_hash"]

Expand Down Expand Up @@ -333,7 +334,7 @@ def test_load_package_with_dlt_update(test_storage: FileStorage) -> None:
)
pipeline = pipeline.drop()
pipeline.sync_destination()
assert pipeline.default_schema.ENGINE_VERSION == 9
assert pipeline.default_schema.ENGINE_VERSION == SCHEMA_ENGINE_VERSION
# schema version does not match `dlt.attach` does not update to the right schema by itself
assert pipeline.default_schema.stored_version_hash != github_schema["version_hash"]
# state has hash
Expand Down
14 changes: 7 additions & 7 deletions tests/pipeline/test_pipeline_extra.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ class User(BaseModel):
user_label: UserLabel
user_labels: List[UserLabel]

dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True}
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}

user = User(
user_id=1,
Expand Down Expand Up @@ -289,11 +289,11 @@ class Child(BaseModel):
optional_child_attribute: Optional[str] = None


def test_flattens_model_when_skip_complex_types_is_set() -> None:
def test_flattens_model_when_skip_nested_types_is_set() -> None:
class Parent(BaseModel):
child: Child
optional_parent_attribute: Optional[str] = None
dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True}
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}

example_data = {
"optional_parent_attribute": None,
Expand Down Expand Up @@ -351,12 +351,12 @@ class Parent(BaseModel):
}


def test_considers_model_as_complex_when_skip_complex_types_is_not_set():
def test_considers_model_as_complex_when_skip_nested_types_is_not_set():
class Parent(BaseModel):
child: Child
optional_parent_attribute: Optional[str] = None
data_dictionary: Dict[str, Any] = None
dlt_config: ClassVar[DltConfig] = {"skip_complex_types": False}
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": False}

example_data = {
"optional_parent_attribute": None,
Expand Down Expand Up @@ -412,11 +412,11 @@ class Parent(BaseModel):
}


def test_skips_complex_fields_when_skip_complex_types_is_true_and_field_is_not_a_pydantic_model():
def test_skips_complex_fields_when_skip_nested_types_is_true_and_field_is_not_a_pydantic_model():
class Parent(BaseModel):
data_list: List[int] = []
data_dictionary: Dict[str, Any] = None
dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True}
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}

example_data = {
"optional_parent_attribute": None,
Expand Down

0 comments on commit 249725a

Please sign in to comment.