Skip to content

Commit

Permalink
bumps to 1.0.0 + docs cleanup (#1809)
Browse files Browse the repository at this point in the history
* removes blog files

* updates schema docs for nested references

* updates docs to use nested instead of parent child

* adds more migration tests

* bumps to 1.0.0

* adds scd2 tests
  • Loading branch information
rudolfix authored Sep 16, 2024
1 parent c056b83 commit 866bce3
Show file tree
Hide file tree
Showing 97 changed files with 455 additions and 10,665 deletions.
2 changes: 1 addition & 1 deletion dlt/common/normalizers/naming/snake_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class NamingConvention(BaseNamingConvention):
- Replaces all trailing `_` with `x`
- Replaces `+` and `*` with `x`, `-` with `_`, `@` with `a` and `|` with `l`
Uses __ as patent-child separator for tables and flattened column names.
Uses __ as parent-child separator for tables and flattened column names.
"""

RE_UNDERSCORES: ClassVar[REPattern] = RE_UNDERSCORES
Expand Down
3 changes: 2 additions & 1 deletion dlt/common/schema/migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ def migrate_schema(schema_dict: DictStrAny, from_engine: int, to_engine: int) ->
# current version of the schema
current = cast(TStoredSchema, schema_dict)
# add default normalizers and root hash propagation
normalizers = explicit_normalizers()
# use explicit None to get default settings. ignore any naming conventions
normalizers = explicit_normalizers(naming=None, json_normalizer=None)
current["normalizers"], _, _ = import_normalizers(normalizers, normalizers)
current["normalizers"]["json"]["config"] = {
"propagation": {"root": {"_dlt_id": "_dlt_root_id"}}
Expand Down
2 changes: 1 addition & 1 deletion dlt/common/schema/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ class TColumnPropInfo(NamedTuple):

class TColumnType(TypedDict, total=False):
data_type: Optional[TDataType]
nullable: Optional[bool]
precision: Optional[int]
scale: Optional[int]
timezone: Optional[bool]
Expand All @@ -147,7 +148,6 @@ class TColumnSchemaBase(TColumnType, total=False):
"""TypedDict that defines basic properties of a column: name, data type and nullable"""

name: Optional[str]
nullable: Optional[bool]


class TColumnSchema(TColumnSchemaBase, total=False):
Expand Down
2 changes: 1 addition & 1 deletion dlt/destinations/sql_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,7 +782,7 @@ def gen_scd2_sql(
# insert list elements for new active records in nested tables
nested_tables = table_chain[1:]
if nested_tables:
# TODO: - based on deterministic child hashes (OK)
# TODO: - based on deterministic nested hashes (OK)
# - if row hash changes all is right
# - if it does not we only capture new records, while we should replace existing with those in stage
# - this write disposition is way more similar to regular merge (how root tables are handled is different, other tables handled same)
Expand Down
2 changes: 1 addition & 1 deletion dlt/extract/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def max_table_nesting(self, value: int) -> None:

@property
def root_key(self) -> bool:
"""Enables merging on all resources by propagating root foreign key to child tables. This option is most useful if you plan to change write disposition of a resource to disable/enable merge"""
"""Enables merging on all resources by propagating root foreign key to nested tables. This option is most useful if you plan to change write disposition of a resource to disable/enable merge"""
# this also check the normalizer type
config = RelationalNormalizer.get_normalizer_config(self._schema).get("propagation")
data_normalizer = self._schema.data_item_normalizer
Expand Down
8 changes: 4 additions & 4 deletions dlt/load/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def get_completed_table_chain(
For append and merge write disposition, tables without jobs will be included, providing they have seen data (and were created in the destination)
Optionally `being_completed_job_id` can be passed that is considered to be completed before job itself moves in storage
"""
# returns ordered list of tables from parent to child leaf tables
# returns ordered list of tables from parent to nested leaf tables
table_chain: List[TTableSchema] = []
# allow for jobless tables for those write disposition
skip_jobless_table = top_merged_table["write_disposition"] not in (
Expand Down Expand Up @@ -99,7 +99,7 @@ def init_client(
# get all tables that actually have load jobs with data
tables_with_jobs = set(job.table_name for job in new_jobs) - tables_no_data

# get tables to truncate by extending tables with jobs with all their child tables
# get tables to truncate by extending tables with jobs with all their nested tables
initial_truncate_names = set(t["name"] for t in truncate_tables) if truncate_tables else set()
truncate_table_names = set(
_extend_tables_with_table_chain(
Expand Down Expand Up @@ -198,13 +198,13 @@ def _extend_tables_with_table_chain(
haven't seen data or are not included by `include_table_filter`.
Note that for root tables with replace and merge, the filter for tables that do not have jobs
Returns an unordered set of table names and their child tables
Returns an unordered set of table names and their nested tables
"""
result: Set[str] = set()
for table_name in tables:
top_job_table = get_root_table(schema.tables, table_name)
# for replace and merge write dispositions we should include tables
# without jobs in the table chain, because child tables may need
# without jobs in the table chain, because nested tables may need
# processing due to changes in the root table
skip_jobless_table = top_job_table["write_disposition"] not in (
"replace",
Expand Down
10 changes: 5 additions & 5 deletions docs/examples/nested_data/nested_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@
CHUNK_SIZE = 10000


# You can limit how deep dlt goes when generating child tables.
# By default, the library will descend and generate child tables
# You can limit how deep dlt goes when generating nested tables.
# By default, the library will descend and generate nested tables
# for all nested lists, without a limit.
# In this example, we specify that we only want to generate child tables up to level 2,
# so there will be only one level of child tables within child tables.
# In this example, we specify that we only want to generate nested tables up to level 2,
# so there will be only one level of nested tables within nested tables.
@dlt.source(max_table_nesting=2)
def mongodb_collection(
connection_url: str = dlt.secrets.value,
Expand Down Expand Up @@ -149,7 +149,7 @@ def convert_mongo_objs(value: Any) -> Any:
# The third method involves applying data type hints to specific columns in the data.
# In this case, we tell dlt that column 'cast' (containing a list of actors)
# in 'movies' table should have type 'json' which means
# that it will be loaded as JSON/struct and not as child table.
# that it will be loaded as JSON/struct and not as nested table.
pipeline = dlt.pipeline(
pipeline_name="mongodb_pipeline",
destination="duckdb",
Expand Down

This file was deleted.

File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
"""
---
title: Load parent table records into child table
description: Learn how to integrate custom parent keys into child records
keywords: [parent child relationship, parent key]
title: Propagate primary_key from root to nested tables
description: Learn how to propagate any column to nested tables
keywords: [root table, nested reference, parent key]
---
This example demonstrates handling data with parent-child relationships using the `dlt` library.
You learn how to integrate specific fields (e.g., primary, foreign keys) from a parent record into each child record.
You learn how to propagate specific fields (e.g., primary, foreign keys) from a parent record into each child record.
In this example, we'll explore how to:
Expand Down
60 changes: 0 additions & 60 deletions docs/technical/customization_and_hacking.md

This file was deleted.

28 changes: 0 additions & 28 deletions docs/website/blog/2023-02-16-dlthub-mission.md

This file was deleted.

22 changes: 0 additions & 22 deletions docs/website/blog/2023-02-22-dlthub-who-we-serve.md

This file was deleted.

Loading

0 comments on commit 866bce3

Please sign in to comment.