bumps to 1.0.0 + docs cleanup (#1809)

* removes blog files * updates schema docs for nested references * updates docs to use nested instead of parent child * adds more migration tests * bumps to 1.0.0 * adds scd2 tests
dlt-hub · Sep 16, 2024 · 866bce3 · 866bce3
1 parent c056b83
commit 866bce3
Show file tree

Hide file tree

Showing 97 changed files with 455 additions and 10,665 deletions.
diff --git a/dlt/common/normalizers/naming/snake_case.py b/dlt/common/normalizers/naming/snake_case.py
@@ -21,7 +21,7 @@ class NamingConvention(BaseNamingConvention):
     - Replaces all trailing `_` with `x`
     - Replaces `+` and `*` with `x`, `-` with `_`, `@` with `a` and `|` with `l`
 
-    Uses __ as patent-child separator for tables and flattened column names.
+    Uses __ as parent-child separator for tables and flattened column names.
     """
 
     RE_UNDERSCORES: ClassVar[REPattern] = RE_UNDERSCORES

diff --git a/dlt/common/schema/migrations.py b/dlt/common/schema/migrations.py
@@ -34,7 +34,8 @@ def migrate_schema(schema_dict: DictStrAny, from_engine: int, to_engine: int) ->
         # current version of the schema
         current = cast(TStoredSchema, schema_dict)
         # add default normalizers and root hash propagation
-        normalizers = explicit_normalizers()
+        # use explicit None to get default settings. ignore any naming conventions
+        normalizers = explicit_normalizers(naming=None, json_normalizer=None)
         current["normalizers"], _, _ = import_normalizers(normalizers, normalizers)
         current["normalizers"]["json"]["config"] = {
             "propagation": {"root": {"_dlt_id": "_dlt_root_id"}}

diff --git a/dlt/common/schema/typing.py b/dlt/common/schema/typing.py
@@ -138,6 +138,7 @@ class TColumnPropInfo(NamedTuple):
 
 class TColumnType(TypedDict, total=False):
     data_type: Optional[TDataType]
+    nullable: Optional[bool]
     precision: Optional[int]
     scale: Optional[int]
     timezone: Optional[bool]
@@ -147,7 +148,6 @@ class TColumnSchemaBase(TColumnType, total=False):
     """TypedDict that defines basic properties of a column: name, data type and nullable"""
 
     name: Optional[str]
-    nullable: Optional[bool]
 
 
 class TColumnSchema(TColumnSchemaBase, total=False):

diff --git a/dlt/destinations/sql_jobs.py b/dlt/destinations/sql_jobs.py
@@ -782,7 +782,7 @@ def gen_scd2_sql(
         # insert list elements for new active records in nested tables
         nested_tables = table_chain[1:]
         if nested_tables:
-            # TODO: - based on deterministic child hashes (OK)
+            # TODO: - based on deterministic nested hashes (OK)
             # - if row hash changes all is right
             # - if it does not we only capture new records, while we should replace existing with those in stage
             # - this write disposition is way more similar to regular merge (how root tables are handled is different, other tables handled same)

diff --git a/dlt/extract/source.py b/dlt/extract/source.py
@@ -232,7 +232,7 @@ def max_table_nesting(self, value: int) -> None:
 
     @property
     def root_key(self) -> bool:
-        """Enables merging on all resources by propagating root foreign key to child tables. This option is most useful if you plan to change write disposition of a resource to disable/enable merge"""
+        """Enables merging on all resources by propagating root foreign key to nested tables. This option is most useful if you plan to change write disposition of a resource to disable/enable merge"""
         # this also check the normalizer type
         config = RelationalNormalizer.get_normalizer_config(self._schema).get("propagation")
         data_normalizer = self._schema.data_item_normalizer

diff --git a/dlt/load/utils.py b/dlt/load/utils.py
@@ -27,7 +27,7 @@ def get_completed_table_chain(
     For append and merge write disposition, tables without jobs will be included, providing they have seen data (and were created in the destination)
     Optionally `being_completed_job_id` can be passed that is considered to be completed before job itself moves in storage
     """
-    # returns ordered list of tables from parent to child leaf tables
+    # returns ordered list of tables from parent to nested leaf tables
     table_chain: List[TTableSchema] = []
     # allow for jobless tables for those write disposition
     skip_jobless_table = top_merged_table["write_disposition"] not in (
@@ -99,7 +99,7 @@ def init_client(
     # get all tables that actually have load jobs with data
     tables_with_jobs = set(job.table_name for job in new_jobs) - tables_no_data
 
-    # get tables to truncate by extending tables with jobs with all their child tables
+    # get tables to truncate by extending tables with jobs with all their nested tables
     initial_truncate_names = set(t["name"] for t in truncate_tables) if truncate_tables else set()
     truncate_table_names = set(
         _extend_tables_with_table_chain(
@@ -198,13 +198,13 @@ def _extend_tables_with_table_chain(
     haven't seen data or are not included by `include_table_filter`.
     Note that for root tables with replace and merge, the filter for tables that do not have jobs
 
-    Returns an unordered set of table names and their child tables
+    Returns an unordered set of table names and their nested tables
     """
     result: Set[str] = set()
     for table_name in tables:
         top_job_table = get_root_table(schema.tables, table_name)
         # for replace and merge write dispositions we should include tables
-        # without jobs in the table chain, because child tables may need
+        # without jobs in the table chain, because nested tables may need
         # processing due to changes in the root table
         skip_jobless_table = top_job_table["write_disposition"] not in (
             "replace",

diff --git a/docs/examples/nested_data/nested_data.py b/docs/examples/nested_data/nested_data.py
@@ -35,11 +35,11 @@
 CHUNK_SIZE = 10000
 
 
-# You can limit how deep dlt goes when generating child tables.
-# By default, the library will descend and generate child tables
+# You can limit how deep dlt goes when generating nested tables.
+# By default, the library will descend and generate nested tables
 # for all nested lists, without a limit.
-# In this example, we specify that we only want to generate child tables up to level 2,
-# so there will be only one level of child tables within child tables.
+# In this example, we specify that we only want to generate nested tables up to level 2,
+# so there will be only one level of nested tables within nested tables.
 @dlt.source(max_table_nesting=2)
 def mongodb_collection(
     connection_url: str = dlt.secrets.value,
@@ -149,7 +149,7 @@ def convert_mongo_objs(value: Any) -> Any:
     # The third method involves applying data type hints to specific columns in the data.
     # In this case, we tell dlt that column 'cast' (containing a list of actors)
     # in 'movies' table should have type 'json' which means
-    # that it will be loaded as JSON/struct and not as child table.
+    # that it will be loaded as JSON/struct and not as nested table.
     pipeline = dlt.pipeline(
         pipeline_name="mongodb_pipeline",
         destination="duckdb",

diff --git a/docs/examples/parent_child_relationship/test_parent_child_relationship.py b/docs/examples/parent_child_relationship/test_parent_child_relationship.py
diff --git a/...les/parent_child_relationship/__init__.py → docs/examples/propagate_hints/__init__.py b/...les/parent_child_relationship/__init__.py → docs/examples/propagate_hints/__init__.py
diff --git a/...relationship/parent_child_relationship.py → ...amples/propagate_hints/propagate_hints.py b/...relationship/parent_child_relationship.py → ...amples/propagate_hints/propagate_hints.py
@@ -1,12 +1,11 @@
 """
 ---
-title: Load parent table records into child table
-description: Learn how to integrate custom parent keys into child records
-keywords: [parent child relationship, parent key]
+title: Propagate primary_key from root to nested tables
+description: Learn how to propagate any column to nested tables
+keywords: [root table, nested reference, parent key]
 ---
 
-This example demonstrates handling data with parent-child relationships using the `dlt` library.
-You learn how to integrate specific fields (e.g., primary, foreign keys) from a parent record into each child record.
+You learn how to propagate specific fields (e.g., primary, foreign keys) from a parent record into each child record.
 
 In this example, we'll explore how to:
 

diff --git a/docs/technical/customization_and_hacking.md b/docs/technical/customization_and_hacking.md
diff --git a/docs/website/blog/2023-02-16-dlthub-mission.md b/docs/website/blog/2023-02-16-dlthub-mission.md
diff --git a/docs/website/blog/2023-02-22-dlthub-who-we-serve.md b/docs/website/blog/2023-02-22-dlthub-who-we-serve.md