diff --git a/.github/workflows/maven-sonar-build.yml b/.github/workflows/maven-sonar-build.yml
index e0295d0cb1c6..5e5a03e86159 100644
--- a/.github/workflows/maven-sonar-build.yml
+++ b/.github/workflows/maven-sonar-build.yml
@@ -16,7 +16,7 @@ on:
push:
branches:
- main
- - '0.[0-9]+.[0-9]+'
+ - '[0-9]+.[0-9]+.[0-9]+'
paths:
- "openmetadata-service/**"
- "openmetadata-ui/**"
@@ -116,4 +116,4 @@ jobs:
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
fail_on_test_failures: true
- report_paths: 'openmetadata-service/target/surefire-reports/TEST-*.xml'
\ No newline at end of file
+ report_paths: 'openmetadata-service/target/surefire-reports/TEST-*.xml'
diff --git a/.github/workflows/sync-docs-v1.yml b/.github/workflows/sync-docs-v1.yml
index cde9663804e1..15e7d76f514e 100644
--- a/.github/workflows/sync-docs-v1.yml
+++ b/.github/workflows/sync-docs-v1.yml
@@ -39,7 +39,7 @@ jobs:
destination-repository-name: 'docs-v1'
user-email: openmetadata@getcollate.io
commit-message: See ORIGIN_COMMIT from $GITHUB_REF
- target-branch: publish
+ target-branch: main
- name: Push images
id: push_images
@@ -54,7 +54,7 @@ jobs:
destination-repository-name: 'docs-v1'
user-email: openmetadata@getcollate.io
commit-message: See ORIGIN_COMMIT from $GITHUB_REF
- target-branch: publish
+ target-branch: main
- name: Prepare Collate content and partials
id: prepare_collate
diff --git a/.run/Template JUnit.run.xml b/.run/Template JUnit.run.xml
index e9df4c41e253..a76cd50c6bd1 100644
--- a/.run/Template JUnit.run.xml
+++ b/.run/Template JUnit.run.xml
@@ -5,7 +5,7 @@
-
+
diff --git a/SECURITY.md b/SECURITY.md
index 943cb78397d0..d2ecf1e9121b 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -7,8 +7,8 @@ currently being supported with security updates.
| Version | Supported |
| ------- | ------------------ |
+| 1.5.x | :white_check_mark: |
| 1.4.x | :white_check_mark: |
-| 1.3.x | :white_check_mark: |
## Reporting a Vulnerability
diff --git a/bootstrap/sql/migrations/native/1.5.7/mysql/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/1.5.7/mysql/postDataMigrationSQLScript.sql
new file mode 100644
index 000000000000..0aaa75e38767
--- /dev/null
+++ b/bootstrap/sql/migrations/native/1.5.7/mysql/postDataMigrationSQLScript.sql
@@ -0,0 +1,17 @@
+UPDATE test_definition
+SET json = JSON_MERGE_PRESERVE(
+ json,
+ JSON_OBJECT(
+ 'parameterDefinition',
+ JSON_ARRAY(
+ JSON_OBJECT(
+ 'name', 'caseSensitiveColumns',
+ 'dataType', 'BOOLEAN',
+ 'required', false,
+ 'description', 'Use case sensitivity when comparing the columns.',
+ 'displayName', 'Case sensitive columns'
+ )
+ )
+ )
+ )
+WHERE name = 'tableDiff';
diff --git a/bootstrap/sql/migrations/native/1.5.7/postgres/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/1.5.7/postgres/postDataMigrationSQLScript.sql
new file mode 100644
index 000000000000..32afd14731f4
--- /dev/null
+++ b/bootstrap/sql/migrations/native/1.5.7/postgres/postDataMigrationSQLScript.sql
@@ -0,0 +1,8 @@
+UPDATE test_definition
+SET json = jsonb_set(
+ json,
+ '{parameterDefinition}',
+ (json->'parameterDefinition')::jsonb ||
+ '{"name": "caseSensitiveColumns", "dataType": "BOOLEAN", "required": false, "description": "Use case sensitivity when comparing the columns.", "displayName": "Case sensitive columns"}'::jsonb
+ )
+WHERE name = 'tableDiff';
diff --git a/bootstrap/sql/migrations/native/1.6.0/mysql/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/1.6.0/mysql/postDataMigrationSQLScript.sql
index 04bf3b4dd173..5445e9b31c96 100644
--- a/bootstrap/sql/migrations/native/1.6.0/mysql/postDataMigrationSQLScript.sql
+++ b/bootstrap/sql/migrations/native/1.6.0/mysql/postDataMigrationSQLScript.sql
@@ -43,3 +43,8 @@ UPDATE installed_apps SET json = JSON_SET(json, '$.supportsInterrupt', true) whe
UPDATE apps_marketplace SET json = JSON_SET(json, '$.supportsInterrupt', true) where name = 'SearchIndexingApplication';
ALTER TABLE apps_extension_time_series ADD COLUMN appName VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.appName') STORED NOT NULL;
+
+-- Add supportsDataDiff for Athena, BigQuery, Mssql, Mysql, Oracle, Postgres, Redshift, SapHana, Snowflake, Trino
+UPDATE dbservice_entity
+SET json = JSON_SET(json, '$.connection.config.supportsDataDiff', 'true')
+WHERE serviceType IN ('Athena','BigQuery','Mssql','Mysql','Oracle','Postgres','Redshift','SapHana','Snowflake','Trino');
diff --git a/bootstrap/sql/migrations/native/1.6.0/mysql/schemaChanges.sql b/bootstrap/sql/migrations/native/1.6.0/mysql/schemaChanges.sql
index 6ddfd9369954..2f737915fad0 100644
--- a/bootstrap/sql/migrations/native/1.6.0/mysql/schemaChanges.sql
+++ b/bootstrap/sql/migrations/native/1.6.0/mysql/schemaChanges.sql
@@ -5,4 +5,11 @@ ALTER TABLE apps_extension_time_series MODIFY COLUMN extension VARCHAR(255) NOT
CREATE INDEX apps_extension_time_series_extension ON apps_extension_time_series(extension);
-- Clean dangling workflows not removed after test connection
-truncate automations_workflow;
\ No newline at end of file
+truncate automations_workflow;
+
+-- App Data Store
+CREATE TABLE IF NOT EXISTS apps_data_store (
+ identifier VARCHAR(256) NOT NULL,
+ type VARCHAR(256) NOT NULL,
+ json JSON NOT NULL
+);
\ No newline at end of file
diff --git a/bootstrap/sql/migrations/native/1.6.0/postgres/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/1.6.0/postgres/postDataMigrationSQLScript.sql
index e7de73cc09b6..799659bd1403 100644
--- a/bootstrap/sql/migrations/native/1.6.0/postgres/postDataMigrationSQLScript.sql
+++ b/bootstrap/sql/migrations/native/1.6.0/postgres/postDataMigrationSQLScript.sql
@@ -60,3 +60,8 @@ SET json = jsonb_set(
where name = 'SearchIndexingApplication';
ALTER TABLE apps_extension_time_series ADD COLUMN appName VARCHAR(256) GENERATED ALWAYS AS (json ->> 'appName') STORED NOT NULL;
+
+-- Add supportsDataDiff for Athena, BigQuery, Mssql, Mysql, Oracle, Postgres, Redshift, SapHana, Snowflake, Trino
+UPDATE dbservice_entity
+SET json = jsonb_set(json::jsonb, '{connection,config,supportsDataDiff}', 'true'::jsonb)
+WHERE serviceType IN ('Athena','BigQuery','Mssql','Mysql','Oracle','Postgres','Redshift','SapHana','Snowflake','Trino');
diff --git a/bootstrap/sql/migrations/native/1.6.0/postgres/schemaChanges.sql b/bootstrap/sql/migrations/native/1.6.0/postgres/schemaChanges.sql
index 6cd2d24b6dd6..6987056dcc48 100644
--- a/bootstrap/sql/migrations/native/1.6.0/postgres/schemaChanges.sql
+++ b/bootstrap/sql/migrations/native/1.6.0/postgres/schemaChanges.sql
@@ -5,4 +5,11 @@ ALTER TABLE apps_extension_time_series ALTER COLUMN extension SET NOT NULL;
CREATE INDEX IF NOT EXISTS apps_extension_time_series_extension ON apps_extension_time_series(extension);
-- Clean dangling workflows not removed after test connection
-truncate automations_workflow;
\ No newline at end of file
+truncate automations_workflow;
+
+-- App Data Store
+CREATE TABLE IF NOT EXISTS apps_data_store (
+ identifier VARCHAR(256) NOT NULL,
+ type VARCHAR(256) NOT NULL,
+ json JSON NOT NULL
+);
\ No newline at end of file
diff --git a/docker/development/docker-compose-gcp.yml b/docker/development/docker-compose-gcp.yml
index 169cee1a5914..2e8c31421e21 100644
--- a/docker/development/docker-compose-gcp.yml
+++ b/docker/development/docker-compose-gcp.yml
@@ -46,7 +46,7 @@ services:
- ./docker-volume/db-data:/var/lib/mysql
elasticsearch:
- image: docker.elastic.co/elasticsearch/elasticsearch:8.10.2
+ image: docker.elastic.co/elasticsearch/elasticsearch:8.11.4
container_name: openmetadata_elasticsearch
environment:
- discovery.type=single-node
diff --git a/docker/development/docker-compose.yml b/docker/development/docker-compose.yml
index ba05ead6d895..b5509137a521 100644
--- a/docker/development/docker-compose.yml
+++ b/docker/development/docker-compose.yml
@@ -42,7 +42,7 @@ services:
- ./docker-volume/db-data:/var/lib/mysql
elasticsearch:
- image: docker.elastic.co/elasticsearch/elasticsearch:8.10.2
+ image: docker.elastic.co/elasticsearch/elasticsearch:8.11.4
container_name: openmetadata_elasticsearch
environment:
- discovery.type=single-node
diff --git a/docker/docker-compose-quickstart/docker-compose-postgres.yml b/docker/docker-compose-quickstart/docker-compose-postgres.yml
index 66ac68bb7660..4b279df6cb28 100644
--- a/docker/docker-compose-quickstart/docker-compose-postgres.yml
+++ b/docker/docker-compose-quickstart/docker-compose-postgres.yml
@@ -41,7 +41,7 @@ services:
elasticsearch:
container_name: openmetadata_elasticsearch
- image: docker.elastic.co/elasticsearch/elasticsearch:8.10.2
+ image: docker.elastic.co/elasticsearch/elasticsearch:8.11.4
environment:
- discovery.type=single-node
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
diff --git a/docker/docker-compose-quickstart/docker-compose.yml b/docker/docker-compose-quickstart/docker-compose.yml
index 7ce4f1479b7e..a66b57ddfb63 100644
--- a/docker/docker-compose-quickstart/docker-compose.yml
+++ b/docker/docker-compose-quickstart/docker-compose.yml
@@ -39,7 +39,7 @@ services:
elasticsearch:
container_name: openmetadata_elasticsearch
- image: docker.elastic.co/elasticsearch/elasticsearch:8.10.2
+ image: docker.elastic.co/elasticsearch/elasticsearch:8.11.4
environment:
- discovery.type=single-node
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
diff --git a/docker/run_local_docker.sh b/docker/run_local_docker.sh
index f1f2a948e378..183b2f491e3d 100755
--- a/docker/run_local_docker.sh
+++ b/docker/run_local_docker.sh
@@ -50,8 +50,8 @@ echo "Running local docker using mode [$mode] database [$database] and skipping
cd ../
echo "Stopping any previous Local Docker Containers"
-docker compose -f docker/development/docker-compose-postgres.yml down
-docker compose -f docker/development/docker-compose.yml down
+docker compose -f docker/development/docker-compose-postgres.yml down --remove-orphans
+docker compose -f docker/development/docker-compose.yml down --remove-orphans
if [[ $skipMaven == "false" ]]; then
if [[ $mode == "no-ui" ]]; then
diff --git a/ingestion/examples/sample_data/api_service/service.json b/ingestion/examples/sample_data/api_service/service.json
index ded28abd509d..22cda4eba763 100644
--- a/ingestion/examples/sample_data/api_service/service.json
+++ b/ingestion/examples/sample_data/api_service/service.json
@@ -1,9 +1,9 @@
{
- "type": "REST",
+ "type": "rest",
"serviceName": "sample_api_service",
"serviceConnection": {
"config": {
- "type": "REST",
+ "type": "Rest",
"openAPISchemaURL": "https://petstore3.swagger.io/",
"token":"mock_token"
}
diff --git a/ingestion/examples/sample_data/ometa_api_service/service.json b/ingestion/examples/sample_data/ometa_api_service/service.json
index 6e3c672e9601..9552e9d04eae 100644
--- a/ingestion/examples/sample_data/ometa_api_service/service.json
+++ b/ingestion/examples/sample_data/ometa_api_service/service.json
@@ -1,9 +1,9 @@
{
- "type": "REST",
+ "type": "rest",
"serviceName": "ometa_api_service",
"serviceConnection": {
"config": {
- "type": "REST",
+ "type": "Rest",
"openAPISchemaURL": "https://docs.open-metadata.org/swagger.html",
"token":"token"
}
diff --git a/ingestion/pyproject.toml b/ingestion/pyproject.toml
index 65555975686a..3df3755aff9a 100644
--- a/ingestion/pyproject.toml
+++ b/ingestion/pyproject.toml
@@ -73,6 +73,9 @@ markers = [
"slow: marks tests as slow (deselect with '-m \"not slow\"')"
]
+[project.entry-points.pytest11]
+pytest_openmetadata = "_openmetadata_testutils.pytest_openmetadata.plugin"
+
[tool.pylint.BASIC]
# W1203: logging-fstring-interpolation - f-string brings better readability and unifies style
# W1202: logging-format-interpolation - lazy formatting in logging functions
diff --git a/ingestion/setup.py b/ingestion/setup.py
index 560c62625419..65b9fd521cc2 100644
--- a/ingestion/setup.py
+++ b/ingestion/setup.py
@@ -46,7 +46,7 @@
"databricks-sdk": "databricks-sdk>=0.18.0,<0.20.0",
"trino": "trino[sqlalchemy]",
"spacy": "spacy<3.8",
- "looker-sdk": "looker-sdk>=22.20.0",
+ "looker-sdk": "looker-sdk>=22.20.0,!=24.18.0",
"lkml": "lkml~=1.3",
"tableau": "tableau-api-lib~=0.1",
"pyhive": "pyhive[hive_pure_sasl]~=0.7",
@@ -183,7 +183,7 @@
VERSIONS["azure-storage-blob"],
VERSIONS["azure-identity"],
},
- "db2": {"ibm-db-sa~=0.3"},
+ "db2": {"ibm-db-sa~=0.4.1", "ibm-db>=2.0.0"},
"db2-ibmi": {"sqlalchemy-ibmi~=0.9.3"},
"databricks": {
VERSIONS["sqlalchemy-databricks"],
@@ -236,7 +236,7 @@
"impyla~=0.18.0",
},
"iceberg": {
- "pyiceberg>=0.5",
+ "pyiceberg==0.5.1",
# Forcing the version of a few packages so it plays nicely with other requirements.
VERSIONS["pydantic"],
VERSIONS["adlfs"],
diff --git a/ingestion/src/_openmetadata_testutils/pytest_openmetadata/__init__.py b/ingestion/src/_openmetadata_testutils/pytest_openmetadata/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ingestion/src/_openmetadata_testutils/pytest_openmetadata/plugin.py b/ingestion/src/_openmetadata_testutils/pytest_openmetadata/plugin.py
new file mode 100644
index 000000000000..eb021de0ce55
--- /dev/null
+++ b/ingestion/src/_openmetadata_testutils/pytest_openmetadata/plugin.py
@@ -0,0 +1,86 @@
+from typing import Type
+
+import pytest
+
+from _openmetadata_testutils.ometa import int_admin_ometa
+from metadata.generated.schema.entity.teams.user import AuthenticationMechanism, User
+from metadata.generated.schema.metadataIngestion.workflow import LogLevels
+from metadata.ingestion.api.common import Entity
+from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.workflow.ingestion import IngestionWorkflow
+
+
+@pytest.fixture(scope="session")
+def metadata():
+ return int_admin_ometa()
+
+
+@pytest.fixture(scope="session")
+def sink_config(metadata):
+ return {
+ "type": "metadata-rest",
+ "config": {},
+ }
+
+
+@pytest.fixture(scope="session")
+def workflow_config(metadata):
+ return {
+ "loggerLevel": LogLevels.DEBUG.value,
+ "openMetadataServerConfig": metadata.config.model_dump(),
+ }
+
+
+@pytest.fixture(scope="session")
+def ingestion_bot_wokflow_config(metadata, workflow_config):
+ ingestion_bot: User = metadata.get_by_name(
+ entity=User, fqn="ingestion-bot", nullable=False
+ )
+ ingestion_bot_auth: AuthenticationMechanism = metadata.get_by_id(
+ entity=AuthenticationMechanism, entity_id=ingestion_bot.id, nullable=False
+ )
+ workflow_config = workflow_config.copy()
+ workflow_config["openMetadataServerConfig"]["securityConfig"][
+ "jwtToken"
+ ] = ingestion_bot_auth.config.JWTToken
+ return workflow_config
+
+
+@pytest.fixture()
+def clean_up_fqn(metadata):
+ fqns = []
+
+ def inner(entity_type: type[Entity], fqn: str):
+ fqns.append((entity_type, fqn))
+
+ yield inner
+ for entity_type, fqn in fqns:
+ entity = metadata.get_by_name(entity_type, fqn, fields=["*"])
+ if entity:
+ metadata.delete(entity_type, entity.id, recursive=True, hard_delete=True)
+
+
+@pytest.fixture(scope="session")
+def ingestion_bot_workflow_config(metadata: OpenMetadata):
+ ingestion_bot: User = metadata.get_by_name(entity=User, fqn="ingestion-bot")
+ ingestion_bot_auth: AuthenticationMechanism = metadata.get_by_id(
+ entity=AuthenticationMechanism, entity_id=ingestion_bot.id
+ )
+ config = metadata.config.model_dump()
+ config["securityConfig"]["jwtToken"] = ingestion_bot_auth.config.JWTToken
+ return {
+ "loggerLevel": LogLevels.DEBUG.value,
+ "openMetadataServerConfig": config,
+ }
+
+
+@pytest.fixture(scope="module")
+def run_workflow():
+ def _run(workflow_type: Type[IngestionWorkflow], config, raise_from_status=True):
+ workflow: IngestionWorkflow = workflow_type.create(config)
+ workflow.execute()
+ if raise_from_status:
+ workflow.raise_from_status()
+ return workflow
+
+ return _run
diff --git a/ingestion/src/_openmetadata_testutils/pytest_openmetadata/test_utils.py b/ingestion/src/_openmetadata_testutils/pytest_openmetadata/test_utils.py
new file mode 100644
index 000000000000..386488955af9
--- /dev/null
+++ b/ingestion/src/_openmetadata_testutils/pytest_openmetadata/test_utils.py
@@ -0,0 +1,99 @@
+from collections import deque
+from typing import List, Union
+
+from pydantic import BaseModel
+
+
+def assert_equal_pydantic_objects(
+ expected: Union[BaseModel, List[BaseModel]],
+ actual: Union[BaseModel, List[BaseModel]],
+ ignore_none=True,
+):
+ """Compare 2 pydantic objects recursively and raise an AssertionError if they are not equal along with all
+ the differences by field. If `ignore_none` is set to True, expected None values will be ignored. This can be
+ useful when comparing objects that are partially filled.
+
+ Example:
+ >>> from pydantic import BaseModel
+ >>> class A(BaseModel):
+ ... a: int
+ >>> class B(BaseModel):
+ ... b: A
+ >>> a1 = A(a=1)
+ >>> a2 = A(a=2)
+ >>> b1 = B(b=a1)
+ >>> b2 = B(b=a2)
+ >>> assert_equal_pydantic_objects(a1, b1)
+ Traceback (most recent call last):
+ ```
+ AssertionError: objects mismatched on type at : expected: [A], actual: [B]
+ >>> assert_equal_pydantic_objects(a1, a2)
+ Traceback (most recent call last):
+ ```
+ AssertionError: objects mismatched on field: [a], expected: [1], actual: [2]
+ >>> assert_equal_pydantic_objects(b1, b2)
+ Traceback (most recent call last):
+ ```
+ AssertionError: objects mismatched on field: [b.a], expected: [1], actual: [2]
+ >>> assert_equal_pydantic_objects([a1, a2], [a2, a1])
+ Traceback (most recent call last):
+ ```
+ AssertionError: objects mismatched on field: [0].a, expected: [1], actual: [2]
+
+ Args:
+ expected (BaseModel): The expected pydantic object.
+ actual (BaseModel): The actual pydantic object.
+ ignore_none (bool, optional): Whether to ignore None values. Defaults to True.
+
+ Raises:
+ AssertionError: If the objects are not equal. The error message will contain all the differences.
+ """
+ errors = []
+ queue = deque([(expected, actual, "")])
+ while queue:
+ expected, actual, current_key_prefix = queue.popleft()
+ if not isinstance(expected, actual.__class__):
+ errors.append(
+ f"objects mismatched on type at {current_key_prefix}: "
+ f"expected: [{type(expected).__name__}], actual: [{type(actual).__name__}]"
+ )
+ continue
+ if issubclass(expected.__class__, BaseModel) and isinstance(
+ expected.model_dump(), dict
+ ):
+ for key, expected_value in expected.model_dump().items():
+ if expected_value is None and ignore_none:
+ continue
+ actual_value = actual.model_dump().get(key)
+ new_key_prefix = (
+ f"{current_key_prefix}.{key}" if current_key_prefix else key
+ )
+ if issubclass(getattr(expected, key).__class__, BaseModel):
+ queue.append(
+ (getattr(expected, key), getattr(actual, key), new_key_prefix)
+ )
+ elif expected_value != actual_value:
+ errors.append(
+ f"objects mismatched on field: [{new_key_prefix}], expected: [{expected_value}], actual: [{actual_value}]"
+ )
+ elif isinstance(expected, list):
+ if not isinstance(actual, list):
+ errors.append(
+ f"validation error on field: [{current_key_prefix}], expected: [list], actual: [{type(actual).__name__}]"
+ )
+ elif len(expected) != len(actual):
+ errors.append(
+ f"mismatch length at {current_key_prefix}: expected: [{len(expected)}], actual: [{len(actual)}]"
+ )
+ else:
+ for i, (expected_item, actual_item) in enumerate(zip(expected, actual)):
+ queue.append(
+ (expected_item, actual_item, f"{current_key_prefix}[{i}]")
+ )
+ else:
+ if expected != actual:
+ errors.append(
+ f"mismatch at {current_key_prefix}: expected: [{expected}], actual: [{actual}]"
+ )
+ if errors:
+ raise AssertionError("\n".join(errors))
diff --git a/ingestion/src/metadata/data_quality/builders/i_validator_builder.py b/ingestion/src/metadata/data_quality/builders/i_validator_builder.py
index 22466873ce03..8cfabb96d5d4 100644
--- a/ingestion/src/metadata/data_quality/builders/i_validator_builder.py
+++ b/ingestion/src/metadata/data_quality/builders/i_validator_builder.py
@@ -15,7 +15,7 @@
"""
from abc import ABC, abstractmethod
-from datetime import datetime
+from datetime import datetime, timezone
from typing import TYPE_CHECKING, Optional, Type, Union
from metadata.data_quality.validations.base_test_handler import BaseTestValidator
@@ -23,6 +23,7 @@
RuntimeParameterSetter,
)
from metadata.generated.schema.tests.testCase import TestCase, TestCaseParameterValue
+from metadata.generated.schema.type.basic import Timestamp
from metadata.profiler.processor.runner import QueryRunner
from metadata.utils.importer import import_test_case_class
@@ -91,7 +92,9 @@ def reset(self):
self._validator = self.validator_cls(
self.runner,
test_case=self.test_case,
- execution_date=int(datetime.now().timestamp() * 1000),
+ execution_date=Timestamp(
+ int(datetime.now(tz=timezone.utc).timestamp() * 1000)
+ ),
)
@abstractmethod
diff --git a/ingestion/src/metadata/data_quality/validations/base_test_handler.py b/ingestion/src/metadata/data_quality/validations/base_test_handler.py
index be93fdc3832b..3e1363c93864 100644
--- a/ingestion/src/metadata/data_quality/validations/base_test_handler.py
+++ b/ingestion/src/metadata/data_quality/validations/base_test_handler.py
@@ -17,9 +17,9 @@
import reprlib
from abc import ABC, abstractmethod
-from datetime import datetime
from typing import TYPE_CHECKING, Callable, List, Optional, Type, TypeVar, Union
+from metadata.data_quality.validations import utils
from metadata.data_quality.validations.runtime_param_setter.param_setter import (
RuntimeParameterSetter,
)
@@ -29,6 +29,7 @@
TestResultValue,
)
from metadata.generated.schema.tests.testCase import TestCase, TestCaseParameterValue
+from metadata.generated.schema.type.basic import Timestamp
from metadata.profiler.processor.runner import QueryRunner
if TYPE_CHECKING:
@@ -50,7 +51,7 @@ def __init__(
self,
runner: Union[QueryRunner, List["DataFrame"]],
test_case: TestCase,
- execution_date: Union[datetime, float],
+ execution_date: Timestamp,
) -> None:
self.runner = runner
self.test_case = test_case
@@ -65,40 +66,21 @@ def run_validation(self) -> TestCaseResult:
"""
raise NotImplementedError
+ @staticmethod
def get_test_case_param_value(
- self,
- test_case_param_vals: list[TestCaseParameterValue],
+ test_case_param_vals: List[TestCaseParameterValue],
name: str,
type_: T,
default: Optional[R] = None,
pre_processor: Optional[Callable] = None,
) -> Optional[Union[R, T]]:
- """Give a column and a type return the value with the appropriate type casting for the
- test case definition.
-
- Args:
- test_case: the test case
- type_ (Union[float, int, str]): type for the value
- name (str): column name
- default (_type_, optional): Default value to return if column is not found
- pre_processor: pre processor function/type to use against the value before casting to type_
- """
- value = next(
- (param.value for param in test_case_param_vals if param.name == name), None
+ return utils.get_test_case_param_value(
+ test_case_param_vals, name, type_, default, pre_processor
)
- if not value:
- return default if default is not None else None
-
- if not pre_processor:
- return type_(value)
-
- pre_processed_value = pre_processor(value)
- return type_(pre_processed_value)
-
def get_test_case_result_object( # pylint: disable=too-many-arguments
self,
- execution_date: Union[datetime, float],
+ execution_date: Timestamp,
status: TestCaseStatus,
result: str,
test_result_value: List[TestResultValue],
diff --git a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeInSet.py b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeInSet.py
index 6b8dfa207224..a587bb986983 100644
--- a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeInSet.py
+++ b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeInSet.py
@@ -20,6 +20,7 @@
from sqlalchemy import Column
+from metadata.data_quality.validations import utils
from metadata.data_quality.validations.base_test_handler import BaseTestValidator
from metadata.generated.schema.tests.basic import (
TestCaseResult,
@@ -50,11 +51,8 @@ def run_validation(self) -> TestCaseResult:
literal_eval,
)
- match_enum = self.get_test_case_param_value(
- self.test_case.parameterValues, # type: ignore
- "matchEnum",
- bool,
- default=False,
+ match_enum = utils.get_bool_test_case_param(
+ self.test_case.parameterValues, "matchEnum"
)
try:
diff --git a/ingestion/src/metadata/data_quality/validations/runtime_param_setter/table_diff_params_setter.py b/ingestion/src/metadata/data_quality/validations/runtime_param_setter/table_diff_params_setter.py
index 9ca775e39d74..ffadb0e0c6bd 100644
--- a/ingestion/src/metadata/data_quality/validations/runtime_param_setter/table_diff_params_setter.py
+++ b/ingestion/src/metadata/data_quality/validations/runtime_param_setter/table_diff_params_setter.py
@@ -13,6 +13,7 @@
from typing import List, Optional
from urllib.parse import urlparse
+from metadata.data_quality.validations import utils
from metadata.data_quality.validations.models import (
Column,
TableDiffRuntimeParameters,
@@ -27,6 +28,7 @@
from metadata.ingestion.source.connections import get_connection
from metadata.profiler.orm.registry import Dialects
from metadata.utils import fqn
+from metadata.utils.collections import CaseInsensitiveList
class TableDiffParamsSetter(RuntimeParameterSetter):
@@ -49,15 +51,26 @@ def __init__(self, *args, **kwargs):
}
def get_parameters(self, test_case) -> TableDiffRuntimeParameters:
+ service1_url = (
+ str(get_connection(self.service_connection_config).url)
+ if self.service_connection_config
+ else None
+ )
service1: DatabaseService = self.ometa_client.get_by_id(
DatabaseService, self.table_entity.service.id, nullable=False
)
table2_fqn = self.get_parameter(test_case, "table2")
+ case_sensitive_columns: bool = utils.get_bool_test_case_param(
+ test_case.parameterValues, "caseSensitiveColumns"
+ )
if table2_fqn is None:
raise ValueError("table2 not set")
table2: Table = self.ometa_client.get_by_name(
Table, fqn=table2_fqn, nullable=False
)
+ service2_url = (
+ service1_url if table2.service == self.table_entity.service else None
+ )
service2: DatabaseService = self.ometa_client.get_by_id(
DatabaseService, table2.service.id, nullable=False
)
@@ -69,10 +82,15 @@ def get_parameters(self, test_case) -> TableDiffRuntimeParameters:
self.table_entity.fullyQualifiedName.root
),
serviceUrl=self.get_data_diff_url(
- service1, self.table_entity.fullyQualifiedName.root
+ service1,
+ self.table_entity.fullyQualifiedName.root,
+ override_url=service1_url,
),
columns=self.filter_relevant_columns(
- self.table_entity.columns, key_columns, extra_columns
+ self.table_entity.columns,
+ key_columns,
+ extra_columns,
+ case_sensitive=case_sensitive_columns,
),
),
table2=TableParameter(
@@ -80,10 +98,14 @@ def get_parameters(self, test_case) -> TableDiffRuntimeParameters:
serviceUrl=self.get_data_diff_url(
service2,
table2_fqn,
- override_url=self.get_parameter(test_case, "service2Url"),
+ override_url=self.get_parameter(test_case, "service2Url")
+ or service2_url,
),
columns=self.filter_relevant_columns(
- table2.columns, key_columns, extra_columns
+ table2.columns,
+ key_columns,
+ extra_columns,
+ case_sensitive=case_sensitive_columns,
),
),
keyColumns=key_columns,
@@ -145,9 +167,17 @@ def get_key_columns(self, test_case) -> List[str]:
@staticmethod
def filter_relevant_columns(
- columns: List[Column], key_columns: List[str], extra_columns: List[str]
+ columns: List[Column],
+ key_columns: List[str],
+ extra_columns: List[str],
+ case_sensitive: bool,
) -> List[Column]:
- return [c for c in columns if c.name.root in [*key_columns, *extra_columns]]
+ validated_columns = (
+ [*key_columns, *extra_columns]
+ if case_sensitive
+ else CaseInsensitiveList([*key_columns, *extra_columns])
+ )
+ return [c for c in columns if c.name.root in validated_columns]
@staticmethod
def get_parameter(test_case: TestCase, key: str, default=None):
@@ -184,7 +214,7 @@ def get_data_diff_url(
if hasattr(db_service.connection.config, "supportsDatabase"):
kwargs["path"] = f"/{database}"
if kwargs["scheme"] in {Dialects.MSSQL, Dialects.Snowflake}:
- kwargs["path"] += f"/{schema}"
+ kwargs["path"] = f"/{database}/{schema}"
return url._replace(**kwargs).geturl()
@staticmethod
diff --git a/ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableDiff.py b/ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableDiff.py
index 3b59c59da7a2..39a04474a40d 100644
--- a/ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableDiff.py
+++ b/ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableDiff.py
@@ -11,17 +11,19 @@
# pylint: disable=missing-module-docstring
import logging
import traceback
+from decimal import Decimal
from itertools import islice
-from typing import Dict, Iterable, List, Optional, Tuple
+from typing import Dict, Iterable, List, Optional, Tuple, cast
from urllib.parse import urlparse
import data_diff
import sqlalchemy.types
from data_diff.diff_tables import DiffResultWrapper
from data_diff.errors import DataDiffMismatchingKeyTypesError
-from data_diff.utils import ArithAlphanumeric
+from data_diff.utils import ArithAlphanumeric, CaseInsensitiveDict
from sqlalchemy import Column as SAColumn
+from metadata.data_quality.validations import utils
from metadata.data_quality.validations.base_test_handler import BaseTestValidator
from metadata.data_quality.validations.mixins.sqa_validator_mixin import (
SQAValidatorMixin,
@@ -75,6 +77,18 @@ def masked(s: str, mask: bool = True) -> str:
return "***" if mask else s
+def is_numeric(t: type) -> bool:
+ """Check if a type is numeric.
+
+ Args:
+ t: type to check
+
+ Returns:
+ True if the type is numeric otherwise False
+ """
+ return t in [int, float, Decimal]
+
+
class TableDiffValidator(BaseTestValidator, SQAValidatorMixin):
"""
Compare two tables and fail if the number of differences exceeds a threshold
@@ -167,12 +181,14 @@ def get_incomparable_columns(self) -> List[str]:
self.runtime_params.table1.path,
self.runtime_params.keyColumns,
extra_columns=self.runtime_params.extraColumns,
+ case_sensitive=self.get_case_sensitive(),
).with_schema()
table2 = data_diff.connect_to_table(
self.runtime_params.table2.serviceUrl,
self.runtime_params.table2.path,
self.runtime_params.keyColumns,
extra_columns=self.runtime_params.extraColumns,
+ case_sensitive=self.get_case_sensitive(),
).with_schema()
result = []
for column in table1.key_columns + table1.extra_columns:
@@ -185,7 +201,8 @@ def get_incomparable_columns(self) -> List[str]:
col2_type = self._get_column_python_type(
table2._schema[column] # pylint: disable=protected-access
)
-
+ if is_numeric(col1_type) and is_numeric(col2_type):
+ continue
if col1_type != col2_type:
result.append(column)
return result
@@ -228,11 +245,13 @@ def get_table_diff(self) -> DiffResultWrapper:
self.runtime_params.table1.serviceUrl,
self.runtime_params.table1.path,
self.runtime_params.keyColumns, # type: ignore
+ case_sensitive=self.get_case_sensitive(),
)
table2 = data_diff.connect_to_table(
self.runtime_params.table2.serviceUrl,
self.runtime_params.table2.path,
self.runtime_params.keyColumns, # type: ignore
+ case_sensitive=self.get_case_sensitive(),
)
data_diff_kwargs = {
"key_columns": self.runtime_params.keyColumns,
@@ -308,7 +327,9 @@ def _validate_dialects(self):
def get_column_diff(self) -> Optional[TestCaseResult]:
"""Get the column diff between the two tables. If there are no differences, return None."""
removed, added = self.get_changed_added_columns(
- self.runtime_params.table1.columns, self.runtime_params.table2.columns
+ self.runtime_params.table1.columns,
+ self.runtime_params.table2.columns,
+ self.get_case_sensitive(),
)
changed = self.get_incomparable_columns()
if removed or added or changed:
@@ -321,7 +342,7 @@ def get_column_diff(self) -> Optional[TestCaseResult]:
@staticmethod
def get_changed_added_columns(
- left: List[Column], right: List[Column]
+ left: List[Column], right: List[Column], case_sensitive: bool
) -> Optional[Tuple[List[str], List[str]]]:
"""Given a list of columns from two tables, return the columns that are removed and added.
@@ -335,6 +356,10 @@ def get_changed_added_columns(
removed: List[str] = []
added: List[str] = []
right_columns_dict: Dict[str, Column] = {c.name.root: c for c in right}
+ if not case_sensitive:
+ right_columns_dict = cast(
+ Dict[str, Column], CaseInsensitiveDict(right_columns_dict)
+ )
for column in left:
table2_column = right_columns_dict.get(column.name.root)
if table2_column is None:
@@ -345,7 +370,10 @@ def get_changed_added_columns(
return removed, added
def column_validation_result(
- self, removed: List[str], added: List[str], changed: List[str]
+ self,
+ removed: List[str],
+ added: List[str],
+ changed: List[str],
) -> TestCaseResult:
"""Build the result for a column validation result. Messages will only be added
for non-empty categories. Values will be populated reported for all categories.
@@ -367,13 +395,18 @@ def column_validation_result(
message += f"\n Added columns: {','.join(added)}\n"
if changed:
message += "\n Changed columns:"
+ table1_columns = {
+ c.name.root: c for c in self.runtime_params.table1.columns
+ }
+ table2_columns = {
+ c.name.root: c for c in self.runtime_params.table2.columns
+ }
+ if not self.get_case_sensitive():
+ table1_columns = CaseInsensitiveDict(table1_columns)
+ table2_columns = CaseInsensitiveDict(table2_columns)
for col in changed:
- col1 = next(
- c for c in self.runtime_params.table1.columns if c.name.root == col
- )
- col2 = next(
- c for c in self.runtime_params.table2.columns if c.name.root == col
- )
+ col1 = table1_columns[col]
+ col2 = table2_columns[col]
message += (
f"\n {col}: {col1.dataType.value} -> {col2.dataType.value}"
)
@@ -432,3 +465,8 @@ def safe_table_diff_iterator(self) -> DiffResultWrapper:
if str(ex) == "2":
# This is a known issue in data_diff where the diff object is closed
pass
+
+ def get_case_sensitive(self):
+ return utils.get_bool_test_case_param(
+ self.test_case.parameterValues, "caseSensitiveColumns"
+ )
diff --git a/ingestion/src/metadata/data_quality/validations/utils.py b/ingestion/src/metadata/data_quality/validations/utils.py
new file mode 100644
index 000000000000..77f2037731eb
--- /dev/null
+++ b/ingestion/src/metadata/data_quality/validations/utils.py
@@ -0,0 +1,56 @@
+"""
+Data quality validation utility functions.
+"""
+
+from typing import Callable, List, Optional, TypeVar, Union
+
+from metadata.generated.schema.tests.testCase import TestCaseParameterValue
+
+T = TypeVar("T", bound=Callable)
+R = TypeVar("R")
+
+
+def get_test_case_param_value(
+ test_case_param_vals: List[TestCaseParameterValue],
+ name: str,
+ type_: T,
+ default: Optional[R] = None,
+ pre_processor: Optional[Callable] = None,
+) -> Optional[Union[R, T]]:
+ """Return a test case parameter value with the appropriate type casting for the test case definition.
+
+ Args:
+ test_case_param_vals: list of test case parameter values
+ type_ (Union[float, int, str]): type for the value
+ name (str): column name
+ default (_type_, optional): Default value to return if column is not found
+ pre_processor: pre processor function/type to use against the value before casting to type_
+ """
+ value = next(
+ (param.value for param in test_case_param_vals if param.name == name), None
+ )
+
+ if not value:
+ return default if default is not None else None
+
+ if not pre_processor:
+ return type_(value)
+
+ pre_processed_value = pre_processor(value)
+ return type_(pre_processed_value)
+
+
+def get_bool_test_case_param(
+ test_case_param_vals: List[TestCaseParameterValue],
+ name: str,
+) -> Optional[Union[R, T]]:
+ """Return a test case parameter value as a boolean. Boolean values are always False by default.
+
+ Args:
+ test_case_param_vals: list of test case parameter values
+ name (str): column name
+ """
+ str_val: str = get_test_case_param_value(test_case_param_vals, name, str, None)
+ if str_val is None:
+ return False
+ return str_val.lower() == "true"
diff --git a/ingestion/src/metadata/examples/workflows/rest.yaml b/ingestion/src/metadata/examples/workflows/rest.yaml
index d155eaea5dbc..3469a02ba58f 100644
--- a/ingestion/src/metadata/examples/workflows/rest.yaml
+++ b/ingestion/src/metadata/examples/workflows/rest.yaml
@@ -3,9 +3,9 @@ source:
serviceName: openapi_rest
serviceConnection:
config:
- type: REST
+ type: Rest
openAPISchemaURL: https://docs.open-metadata.org/swagger.json
- token:
+ # token:
sourceConfig:
config:
type: ApiMetadata
diff --git a/ingestion/src/metadata/ingestion/api/parser.py b/ingestion/src/metadata/ingestion/api/parser.py
index 84aa898844cd..08db6f2bf228 100644
--- a/ingestion/src/metadata/ingestion/api/parser.py
+++ b/ingestion/src/metadata/ingestion/api/parser.py
@@ -19,8 +19,8 @@
Workflow as AutomationWorkflow,
)
from metadata.generated.schema.entity.services.apiService import (
- ApiServiceConnection,
- APIServiceType,
+ ApiConnection,
+ ApiServiceType,
)
from metadata.generated.schema.entity.services.dashboardService import (
DashboardConnection,
@@ -135,7 +135,7 @@
# Build a service type map dynamically from JSON Schema covered types
SERVICE_TYPE_MAP = {
"Backend": PipelineConnection, # For Airflow backend
- **{service: ApiServiceConnection for service in APIServiceType.__members__},
+ **{service: ApiConnection for service in ApiServiceType.__members__},
**{service: DatabaseConnection for service in DatabaseServiceType.__members__},
**{service: DashboardConnection for service in DashboardServiceType.__members__},
**{service: MessagingConnection for service in MessagingServiceType.__members__},
@@ -183,7 +183,7 @@ class InvalidWorkflowException(Exception):
def get_service_type(
source_type: str,
) -> Union[
- Type[ApiServiceConnection],
+ Type[ApiConnection],
Type[DashboardConnection],
Type[DatabaseConnection],
Type[MessagingConnection],
@@ -233,7 +233,7 @@ def get_source_config_class(
def get_connection_class(
source_type: str,
service_type: Union[
- Type[ApiServiceConnection],
+ Type[ApiConnection],
Type[DashboardConnection],
Type[DatabaseConnection],
Type[MessagingConnection],
@@ -557,6 +557,7 @@ def parse_automation_workflow_gracefully(
message="Error parsing the service connection",
)
+ #
raise ParsingConfigurationError(
"Uncaught error when parsing the Ingestion Pipeline!"
)
diff --git a/ingestion/src/metadata/ingestion/connections/test_connections.py b/ingestion/src/metadata/ingestion/connections/test_connections.py
index e98b67dbe32b..c71b04a89d4e 100644
--- a/ingestion/src/metadata/ingestion/connections/test_connections.py
+++ b/ingestion/src/metadata/ingestion/connections/test_connections.py
@@ -40,6 +40,7 @@
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.connections import kill_active_connections
from metadata.profiler.orm.functions.conn_test import ConnTestFn
+from metadata.utils.constants import THREE_MIN
from metadata.utils.logger import cli_logger
from metadata.utils.timeout import timeout
@@ -92,25 +93,24 @@ def _test_connection_steps(
metadata: OpenMetadata,
steps: List[TestConnectionStep],
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+) -> TestConnectionResult:
"""
Run all the function steps and raise any errors
"""
if automation_workflow:
- _test_connection_steps_automation_workflow(
+ return _test_connection_steps_automation_workflow(
metadata=metadata, steps=steps, automation_workflow=automation_workflow
)
- else:
- _test_connection_steps_during_ingestion(steps=steps)
+ return _test_connection_steps_during_ingestion(steps=steps)
def _test_connection_steps_automation_workflow(
metadata: OpenMetadata,
steps: List[TestConnectionStep],
automation_workflow: AutomationWorkflow,
-) -> None:
+) -> TestConnectionResult:
"""
Run the test connection as part of the automation workflow
We need to update the automation workflow in each step
@@ -187,33 +187,40 @@ def _test_connection_steps_automation_workflow(
)
)
+ return test_connection_result
-def _test_connection_steps_during_ingestion(steps: List[TestConnectionStep]) -> None:
- """
- Run the test connection as part of the ingestion workflow
- Raise an exception if something fails
- """
- test_connection_result = TestConnectionIngestionResult()
+
+def _test_connection_steps_during_ingestion(
+ steps: List[TestConnectionStep],
+) -> TestConnectionResult:
+ """Run the test connection steps during ingestion"""
+ test_connection_result = TestConnectionResult(
+ status=StatusType.Running,
+ steps=[],
+ )
for step in steps:
try:
+ logger.info(f"Running {step.name}...")
step.function()
- test_connection_result.success.append(f"'{step.name}': Pass")
-
- except Exception as exc:
- logger.debug(traceback.format_exc())
- logger.warning(f"{step.name}-{exc}")
- if step.mandatory:
- test_connection_result.failed.append(
- f"'{step.name}': This is a mandatory step and we won't be able to extract"
- f" necessary metadata. Failed due to: {exc}"
+ test_connection_result.steps.append(
+ TestConnectionStepResult(
+ name=step.name,
+ mandatory=step.mandatory,
+ passed=True,
)
-
- else:
- test_connection_result.warning.append(
- f"'{step.name}': This is a optional and the ingestion will continue to work as expected."
- f"Failed due to: {exc}"
+ )
+ except Exception as err:
+ logger.debug(traceback.format_exc())
+ logger.error(f"{step.name}-{err}")
+ test_connection_result.steps.append(
+ TestConnectionStepResult(
+ name=step.name,
+ mandatory=step.mandatory,
+ passed=False,
+ message=step.error_message,
+ errorLog=str(err),
)
-
+ )
if step.short_circuit:
# break the workflow if the step is a short circuit step
break
@@ -221,10 +228,20 @@ def _test_connection_steps_during_ingestion(steps: List[TestConnectionStep]) ->
logger.info("Test connection results:")
logger.info(test_connection_result)
- if test_connection_result.failed:
- raise SourceConnectionException(
- f"Some steps failed when testing the connection: [{test_connection_result}]"
- )
+ return test_connection_result
+
+
+def raise_test_connection_exception(result: TestConnectionResult) -> None:
+ """Raise if needed an exception for the test connection"""
+ for step in result.steps:
+ if not step.passed and step.mandatory:
+ raise SourceConnectionException(
+ f"Failed to run the test connection step: {step.name}"
+ )
+ if not step.passed:
+ logger.warning(
+ f"You might be missing metadata in: {step.name} due to {step.message}"
+ )
def test_connection_steps(
@@ -232,8 +249,8 @@ def test_connection_steps(
service_type: str,
test_fn: dict,
automation_workflow: Optional[AutomationWorkflow] = None,
- timeout_seconds: int = 3 * 60,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test the connection steps with a given timeout
@@ -268,9 +285,12 @@ def test_connection_steps(
for step in test_connection_definition.steps
]
- return timeout(timeout_seconds)(_test_connection_steps)(
- metadata, steps, automation_workflow
- )
+ if timeout_seconds:
+ return timeout(timeout_seconds)(_test_connection_steps)(
+ metadata, steps, automation_workflow
+ )
+
+ return _test_connection_steps(metadata, steps, automation_workflow)
def test_connection_engine_step(connection: Engine) -> None:
@@ -289,8 +309,8 @@ def test_connection_db_common(
service_connection,
automation_workflow: Optional[AutomationWorkflow] = None,
queries: dict = None,
- timeout_seconds: int = 3 * 60,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -322,7 +342,7 @@ def test_connection_db_common(
for key, query in queries.items():
test_fn[key] = partial(test_query, statement=query, engine=engine)
- test_connection_steps(
+ result = test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
@@ -332,6 +352,8 @@ def test_connection_db_common(
kill_active_connections(engine)
+ return result
+
def test_connection_db_schema_sources(
metadata: OpenMetadata,
@@ -339,7 +361,8 @@ def test_connection_db_schema_sources(
service_connection,
automation_workflow: Optional[AutomationWorkflow] = None,
queries: dict = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -388,15 +411,18 @@ def custom_executor(engine_: Engine, inspector_fn_str: str):
for key, query in queries.items():
test_fn[key] = partial(test_query, statement=query, engine=engine)
- test_connection_steps(
+ result = test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
kill_active_connections(engine)
+ return result
+
def test_query(engine: Engine, statement: str):
"""
diff --git a/ingestion/src/metadata/ingestion/source/api/api_service.py b/ingestion/src/metadata/ingestion/source/api/api_service.py
index 38bddd260829..6b5e7380c785 100644
--- a/ingestion/src/metadata/ingestion/source/api/api_service.py
+++ b/ingestion/src/metadata/ingestion/source/api/api_service.py
@@ -26,8 +26,8 @@
from metadata.generated.schema.entity.data.apiCollection import APICollection
from metadata.generated.schema.entity.data.apiEndpoint import APIEndpoint
from metadata.generated.schema.entity.services.apiService import (
+ ApiConnection,
ApiService,
- ApiServiceConnection,
)
from metadata.generated.schema.metadataIngestion.apiServiceMetadataPipeline import (
ApiServiceMetadataPipeline,
@@ -39,6 +39,9 @@
from metadata.ingestion.api.models import Either
from metadata.ingestion.api.steps import Source
from metadata.ingestion.api.topology_runner import TopologyRunnerMixin
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.models.delete_entity import DeleteEntity
from metadata.ingestion.models.topology import (
NodeStage,
@@ -112,7 +115,7 @@ class ApiServiceSource(TopologyRunnerMixin, Source, ABC):
source_config: ApiServiceMetadataPipeline
config: WorkflowSource
# Big union of types we want to fetch dynamically
- service_connection: ApiServiceConnection.model_fields["config"].annotation
+ service_connection: ApiConnection.model_fields["config"].annotation
topology = ApiServiceTopology()
context = TopologyContextManager(topology)
@@ -175,7 +178,10 @@ def close(self):
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)
- test_connection_fn(self.metadata, self.connection_obj, self.service_connection)
+ result = test_connection_fn(
+ self.metadata, self.connection_obj, self.service_connection
+ )
+ raise_test_connection_exception(result)
def mark_api_collections_as_deleted(self) -> Iterable[Either[DeleteEntity]]:
"""Method to mark the api collection as deleted"""
diff --git a/ingestion/src/metadata/ingestion/source/api/rest/connection.py b/ingestion/src/metadata/ingestion/source/api/rest/connection.py
index 055ecbb0c0ab..2aa2f09c165c 100644
--- a/ingestion/src/metadata/ingestion/source/api/rest/connection.py
+++ b/ingestion/src/metadata/ingestion/source/api/rest/connection.py
@@ -20,11 +20,15 @@
from metadata.generated.schema.entity.automations.workflow import (
Workflow as AutomationWorkflow,
)
-from metadata.generated.schema.entity.services.connections.apiService.restConnection import (
- RESTConnection,
+from metadata.generated.schema.entity.services.connections.api.restConnection import (
+ RestConnection,
+)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
class SchemaURLError(Exception):
@@ -39,7 +43,7 @@ class InvalidOpenAPISchemaError(Exception):
"""
-def get_connection(connection: RESTConnection) -> Response:
+def get_connection(connection: RestConnection) -> Response:
"""
Create connection
"""
@@ -52,9 +56,10 @@ def get_connection(connection: RESTConnection) -> Response:
def test_connection(
metadata: OpenMetadata,
client: Response,
- service_connection: RESTConnection,
+ service_connection: RestConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -79,9 +84,10 @@ def custom_schema_exec():
test_fn = {"CheckURL": custom_url_exec, "CheckSchema": custom_schema_exec}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/api/rest/metadata.py b/ingestion/src/metadata/ingestion/source/api/rest/metadata.py
index bd2776258fae..925c0f1e943f 100644
--- a/ingestion/src/metadata/ingestion/source/api/rest/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/api/rest/metadata.py
@@ -23,8 +23,8 @@
)
from metadata.generated.schema.entity.data.apiCollection import APICollection
from metadata.generated.schema.entity.data.apiEndpoint import ApiRequestMethod
-from metadata.generated.schema.entity.services.connections.apiService.restConnection import (
- RESTConnection,
+from metadata.generated.schema.entity.services.connections.api.restConnection import (
+ RestConnection,
)
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
StackTraceError,
@@ -62,10 +62,10 @@ def create(
cls, config_dict, metadata: OpenMetadata, pipeline_name: Optional[str] = None
):
config: WorkflowSource = WorkflowSource.model_validate(config_dict)
- connection: RESTConnection = config.serviceConnection.root.config
- if not isinstance(connection, RESTConnection):
+ connection: RestConnection = config.serviceConnection.root.config
+ if not isinstance(connection, RestConnection):
raise InvalidSourceException(
- f"Expected RESTConnection, but got {connection}"
+ f"Expected RestConnection, but got {connection}"
)
return cls(config, metadata)
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/dashboard_service.py b/ingestion/src/metadata/ingestion/source/dashboard/dashboard_service.py
index 5a30430c4eec..96df9f912f78 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/dashboard_service.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/dashboard_service.py
@@ -55,6 +55,9 @@
from metadata.ingestion.api.models import Either, Entity
from metadata.ingestion.api.steps import Source
from metadata.ingestion.api.topology_runner import C, TopologyRunnerMixin
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.lineage.sql_lineage import get_column_fqn
from metadata.ingestion.models.delete_entity import DeleteEntity
from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification
@@ -554,7 +557,10 @@ def get_dashboard(self) -> Any:
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)
- test_connection_fn(self.metadata, self.connection_obj, self.service_connection)
+ result = test_connection_fn(
+ self.metadata, self.connection_obj, self.service_connection
+ )
+ raise_test_connection_exception(result)
def prepare(self):
"""By default, nothing to prepare"""
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/domodashboard/connection.py b/ingestion/src/metadata/ingestion/source/dashboard/domodashboard/connection.py
index 0b6cf700026a..7bc32cdf63d0 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/domodashboard/connection.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/domodashboard/connection.py
@@ -12,7 +12,6 @@
"""
Source connection handler
"""
-
from typing import Optional
from pydomo import Domo
@@ -24,11 +23,15 @@
from metadata.generated.schema.entity.services.connections.dashboard.domoDashboardConnection import (
DomoDashboardConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import (
SourceConnectionException,
test_connection_steps,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: DomoDashboardConnection) -> OMPyDomoClient:
@@ -57,7 +60,8 @@ def test_connection(
client: OMPyDomoClient,
service_connection: DomoDashboardConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -72,9 +76,10 @@ def custom_test_page_list():
"GetCharts": client.custom.test_list_cards,
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/lightdash/connection.py b/ingestion/src/metadata/ingestion/source/dashboard/lightdash/connection.py
index db653b1b02b3..1d0cb137eb0c 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/lightdash/connection.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/lightdash/connection.py
@@ -12,7 +12,6 @@
"""
Source connection handler
"""
-
from typing import Optional
from metadata.generated.schema.entity.automations.workflow import (
@@ -21,12 +20,16 @@
from metadata.generated.schema.entity.services.connections.dashboard.lightdashConnection import (
LightdashConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import (
SourceConnectionException,
test_connection_steps,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.dashboard.lightdash.client import LightdashApiClient
+from metadata.utils.constants import THREE_MIN
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
@@ -49,7 +52,8 @@ def test_connection(
client: LightdashApiClient,
service_connection: LightdashConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -60,9 +64,10 @@ def custom_executor():
test_fn = {"GetDashboards": custom_executor}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/looker/connection.py b/ingestion/src/metadata/ingestion/source/dashboard/looker/connection.py
index c5b235ec4cd0..cee7c9f2d68a 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/looker/connection.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/looker/connection.py
@@ -24,8 +24,12 @@
from metadata.generated.schema.entity.services.connections.dashboard.lookerConnection import (
LookerConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: LookerConnection) -> Looker40SDK:
@@ -49,7 +53,8 @@ def test_connection(
client: Looker40SDK,
service_connection: LookerConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -76,9 +81,10 @@ def validate_api_version():
"ListLookMLModels": list_datamodels_test,
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/metabase/connection.py b/ingestion/src/metadata/ingestion/source/dashboard/metabase/connection.py
index 1d8000ea439e..f8c09bc85992 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/metabase/connection.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/metabase/connection.py
@@ -20,9 +20,13 @@
from metadata.generated.schema.entity.services.connections.dashboard.metabaseConnection import (
MetabaseConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.dashboard.metabase.client import MetabaseClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: MetabaseConnection) -> MetabaseClient:
@@ -37,7 +41,8 @@ def test_connection(
client: MetabaseClient,
service_connection: MetabaseConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -49,9 +54,10 @@ def custom_executor():
test_fn = {"GetDashboards": custom_executor}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/mode/connection.py b/ingestion/src/metadata/ingestion/source/dashboard/mode/connection.py
index 339931b4c840..96a670538a4c 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/mode/connection.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/mode/connection.py
@@ -21,9 +21,13 @@
from metadata.generated.schema.entity.services.connections.dashboard.modeConnection import (
ModeConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.dashboard.mode.client import ModeApiClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: ModeConnection) -> ModeApiClient:
@@ -38,7 +42,8 @@ def test_connection(
client: ModeApiClient,
service_connection: ModeConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -50,9 +55,10 @@ def test_connection(
)
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/mstr/connection.py b/ingestion/src/metadata/ingestion/source/dashboard/mstr/connection.py
index df6efbde2a67..156022a8aaca 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/mstr/connection.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/mstr/connection.py
@@ -20,9 +20,13 @@
from metadata.generated.schema.entity.services.connections.dashboard.mstrConnection import (
MstrConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.dashboard.mstr.client import MSTRClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: MstrConnection) -> MSTRClient:
@@ -37,7 +41,8 @@ def test_connection(
client: MSTRClient,
service_connection: MstrConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -45,9 +50,10 @@ def test_connection(
test_fn = {"GetProjects": client.get_projects_list}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/powerbi/connection.py b/ingestion/src/metadata/ingestion/source/dashboard/powerbi/connection.py
index c9bae539f9f0..a7a73e7f406a 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/powerbi/connection.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/powerbi/connection.py
@@ -20,6 +20,9 @@
from metadata.generated.schema.entity.services.connections.dashboard.powerBIConnection import (
PowerBIConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.dashboard.powerbi.client import (
@@ -27,6 +30,7 @@
PowerBiClient,
)
from metadata.ingestion.source.dashboard.powerbi.file_client import PowerBiFileClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: PowerBIConnection) -> PowerBiApiClient:
@@ -46,16 +50,18 @@ def test_connection(
client: PowerBiClient,
service_connection: PowerBIConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
test_fn = {"GetDashboards": client.api_client.fetch_dashboards}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/qlikcloud/connection.py b/ingestion/src/metadata/ingestion/source/dashboard/qlikcloud/connection.py
index 38eef1660863..784d5088f2a0 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/qlikcloud/connection.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/qlikcloud/connection.py
@@ -20,9 +20,13 @@
from metadata.generated.schema.entity.services.connections.dashboard.qlikCloudConnection import (
QlikCloudConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.dashboard.qlikcloud.client import QlikCloudClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: QlikCloudConnection) -> QlikCloudClient:
@@ -37,7 +41,8 @@ def test_connection(
client: QlikCloudClient,
service_connection: QlikCloudConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -45,9 +50,10 @@ def test_connection(
test_fn = {"GetDashboards": client.get_dashboards_list_test_conn}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/qliksense/connection.py b/ingestion/src/metadata/ingestion/source/dashboard/qliksense/connection.py
index 94ed95e10d51..6ef88f4066a7 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/qliksense/connection.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/qliksense/connection.py
@@ -20,9 +20,13 @@
from metadata.generated.schema.entity.services.connections.dashboard.qlikSenseConnection import (
QlikSenseConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.dashboard.qliksense.client import QlikSenseClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: QlikSenseConnection) -> QlikSenseClient:
@@ -37,7 +41,8 @@ def test_connection(
client: QlikSenseClient,
service_connection: QlikSenseConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -45,9 +50,10 @@ def test_connection(
test_fn = {"GetDashboards": client.get_dashboard_for_test_connection}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/quicksight/connection.py b/ingestion/src/metadata/ingestion/source/dashboard/quicksight/connection.py
index 4d2d70dd8ba2..6926a140eb8e 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/quicksight/connection.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/quicksight/connection.py
@@ -22,8 +22,12 @@
from metadata.generated.schema.entity.services.connections.dashboard.quickSightConnection import (
QuickSightConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: QuickSightConnection):
@@ -43,7 +47,8 @@ def test_connection(
client: AWSClient,
service_connection: QuickSightConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -55,9 +60,10 @@ def test_connection(
)
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/quicksight/metadata.py b/ingestion/src/metadata/ingestion/source/dashboard/quicksight/metadata.py
index 11d16866200d..fe091527d083 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/quicksight/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/quicksight/metadata.py
@@ -247,6 +247,7 @@ def yield_dashboard_lineage_details( # pylint: disable=too-many-locals
**data_source["RelationalTable"]
)
except (KeyError, ValidationError) as err:
+ data_source_resp = None
yield Either(
left=StackTraceError(
name="Lineage",
@@ -257,62 +258,70 @@ def yield_dashboard_lineage_details( # pylint: disable=too-many-locals
stackTrace=traceback.format_exc(),
)
)
+ if data_source_resp:
+ schema_name = data_source_resp.schema_name
+ table_name = data_source_resp.table_name
- schema_name = data_source_resp.schema_name
- table_name = data_source_resp.table_name
-
- list_data_source_func = lambda kwargs: self.client.list_data_sources( # pylint: disable=unnecessary-lambda-assignment
- **kwargs
- )
+ list_data_source_func = lambda kwargs: self.client.list_data_sources( # pylint: disable=unnecessary-lambda-assignment
+ **kwargs
+ )
- data_source_summary_list = self._check_pagination(
- listing_method=list_data_source_func,
- entity_key="DataSources",
- )
+ data_source_summary_list = self._check_pagination(
+ listing_method=list_data_source_func,
+ entity_key="DataSources",
+ )
- data_source_ids = [
- data_source_arn["DataSourceId"]
- for data_source_arn in data_source_summary_list or []
- if data_source_arn["Arn"] in data_source_resp.datasource_arn
- ]
+ data_source_ids = [
+ data_source_arn["DataSourceId"]
+ for data_source_arn in data_source_summary_list or []
+ if data_source_arn["Arn"] in data_source_resp.datasource_arn
+ ]
- for data_source_id in data_source_ids or []:
- data_source_resp = DescribeDataSourceResponse(
- **self.client.describe_data_source(
- AwsAccountId=self.aws_account_id,
- DataSourceId=data_source_id,
- )
- ).DataSource
- if data_source_resp and data_source_resp.DataSourceParameters:
- data_source_dict = data_source_resp.DataSourceParameters
- for db in data_source_dict.keys() or []:
- from_fqn = fqn.build(
- self.metadata,
- entity_type=Table,
- service_name=db_service_name,
- database_name=data_source_dict[db].get("Database"),
- schema_name=schema_name,
- table_name=table_name,
- skip_es_search=True,
+ for data_source_id in data_source_ids or []:
+ data_source_resp = DescribeDataSourceResponse(
+ **self.client.describe_data_source(
+ AwsAccountId=self.aws_account_id,
+ DataSourceId=data_source_id,
)
- from_entity = self.metadata.get_by_name(
- entity=Table,
- fqn=from_fqn,
- )
- to_fqn = fqn.build(
- self.metadata,
- entity_type=Dashboard,
- service_name=self.config.serviceName,
- dashboard_name=dashboard_details.DashboardId,
- )
- to_entity = self.metadata.get_by_name(
- entity=Dashboard,
- fqn=to_fqn,
- )
- if from_entity is not None and to_entity is not None:
- yield self._get_add_lineage_request(
- to_entity=to_entity, from_entity=from_entity
+ ).DataSource
+ if (
+ data_source_resp
+ and data_source_resp.DataSourceParameters
+ ):
+ data_source_dict = data_source_resp.DataSourceParameters
+ for db in data_source_dict.keys() or []:
+ from_fqn = fqn.build(
+ self.metadata,
+ entity_type=Table,
+ service_name=db_service_name,
+ database_name=data_source_dict[db].get(
+ "Database"
+ ),
+ schema_name=schema_name,
+ table_name=table_name,
+ skip_es_search=True,
+ )
+ from_entity = self.metadata.get_by_name(
+ entity=Table,
+ fqn=from_fqn,
+ )
+ to_fqn = fqn.build(
+ self.metadata,
+ entity_type=Dashboard,
+ service_name=self.config.serviceName,
+ dashboard_name=dashboard_details.DashboardId,
+ )
+ to_entity = self.metadata.get_by_name(
+ entity=Dashboard,
+ fqn=to_fqn,
)
+ if (
+ from_entity is not None
+ and to_entity is not None
+ ):
+ yield self._get_add_lineage_request(
+ to_entity=to_entity, from_entity=from_entity
+ )
except Exception as exc: # pylint: disable=broad-except
yield Either(
left=StackTraceError(
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/quicksight/models.py b/ingestion/src/metadata/ingestion/source/dashboard/quicksight/models.py
index 47a6661820cb..fa279af2dc57 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/quicksight/models.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/quicksight/models.py
@@ -50,12 +50,12 @@ class DashboardResp(BaseModel):
RequestId: Optional[str] = None
-class DataSource(BaseModel):
+class DataSourceModel(BaseModel):
DataSourceId: str
DataSourceParameters: Optional[dict] = None
class DescribeDataSourceResponse(BaseModel):
- DataSource: Optional[DataSource] = None
+ DataSource: Optional[DataSourceModel] = None
RequestId: Optional[str] = None
Status: Optional[int] = None
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/redash/connection.py b/ingestion/src/metadata/ingestion/source/dashboard/redash/connection.py
index ae015da7e868..a0aa013aa01d 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/redash/connection.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/redash/connection.py
@@ -21,12 +21,16 @@
from metadata.generated.schema.entity.services.connections.dashboard.redashConnection import (
RedashConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import (
SourceConnectionException,
test_connection_steps,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.dashboard.redash.client import RedashApiClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: RedashConnection) -> RedashApiClient:
@@ -45,7 +49,8 @@ def test_connection(
client: RedashApiClient,
service_connection: RedashConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -53,9 +58,10 @@ def test_connection(
test_fn = {"GetDashboards": client.dashboards}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/sigma/connection.py b/ingestion/src/metadata/ingestion/source/dashboard/sigma/connection.py
index b2e2204157ec..a6d33c67d930 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/sigma/connection.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/sigma/connection.py
@@ -21,12 +21,16 @@
from metadata.generated.schema.entity.services.connections.dashboard.sigmaConnection import (
SigmaConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import (
SourceConnectionException,
test_connection_steps,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.dashboard.sigma.client import SigmaApiClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: SigmaConnection) -> SigmaApiClient:
@@ -45,7 +49,8 @@ def test_connection(
client: SigmaApiClient,
service_connection: SigmaConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -53,9 +58,10 @@ def test_connection(
test_fn = {"GetToken": client.get_auth_token, "GetWorkbooks": client.get_dashboards}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/superset/connection.py b/ingestion/src/metadata/ingestion/source/dashboard/superset/connection.py
index d1a3a5fa3b59..1aa87e343831 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/superset/connection.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/superset/connection.py
@@ -29,6 +29,9 @@
from metadata.generated.schema.entity.services.connections.database.postgresConnection import (
PostgresConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.generated.schema.entity.utils.supersetApiConnection import (
SupersetApiConnection,
)
@@ -49,6 +52,7 @@
from metadata.ingestion.source.database.postgres.connection import (
get_connection as pg_get_connection,
)
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: SupersetConnection) -> SupersetAPIClient:
@@ -69,7 +73,8 @@ def test_connection(
client: Union[SupersetAPIClient, Engine],
service_connection: SupersetConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -84,11 +89,17 @@ def test_connection(
else:
test_fn["CheckAccess"] = partial(test_connection_engine_step, client)
test_fn["GetDashboards"] = partial(test_query, client, FETCH_DASHBOARDS_TEST)
- test_fn["GetCharts"] = partial(test_query, client, FETCH_ALL_CHARTS_TEST)
+ if isinstance(service_connection.connection, MysqlConnection):
+ test_fn["GetCharts"] = partial(
+ test_query, client, FETCH_ALL_CHARTS_TEST.replace('"', "`")
+ )
+ else:
+ test_fn["GetCharts"] = partial(test_query, client, FETCH_ALL_CHARTS_TEST)
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/superset/db_source.py b/ingestion/src/metadata/ingestion/source/dashboard/superset/db_source.py
index 8be179f0b114..636641b61f8e 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/superset/db_source.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/superset/db_source.py
@@ -26,6 +26,9 @@
from metadata.generated.schema.entity.data.chart import Chart
from metadata.generated.schema.entity.data.dashboardDataModel import DataModelType
from metadata.generated.schema.entity.data.table import Table
+from metadata.generated.schema.entity.services.connections.database.mysqlConnection import (
+ MysqlConnection,
+)
from metadata.generated.schema.entity.services.databaseService import DatabaseService
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
StackTraceError,
@@ -81,7 +84,10 @@ def prepare(self):
the required information which is not available in fetch_charts_with_id api
"""
try:
- charts = self.engine.execute(FETCH_ALL_CHARTS)
+ if isinstance(self.service_connection.connection, MysqlConnection):
+ charts = self.engine.execute(FETCH_ALL_CHARTS.replace('"', "`"))
+ else:
+ charts = self.engine.execute(FETCH_ALL_CHARTS)
for chart in charts:
chart_detail = FetchChart(**chart)
self.all_charts[chart_detail.id] = chart_detail
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/tableau/connection.py b/ingestion/src/metadata/ingestion/source/dashboard/tableau/connection.py
index 8feef66c0e0e..e3e976e62b8d 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/tableau/connection.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/tableau/connection.py
@@ -24,6 +24,9 @@
from metadata.generated.schema.entity.services.connections.dashboard.tableauConnection import (
TableauConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.generated.schema.security.credentials.accessTokenAuth import (
AccessTokenAuth,
)
@@ -38,6 +41,7 @@
TABLEAU_GET_WORKBOOKS_PARAM_DICT,
)
from metadata.ingestion.source.dashboard.tableau.client import TableauClient
+from metadata.utils.constants import THREE_MIN
from metadata.utils.logger import ingestion_logger
from metadata.utils.ssl_registry import get_verify_ssl_fn
@@ -70,7 +74,8 @@ def test_connection(
client: TableauClient,
service_connection: TableauConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -101,11 +106,12 @@ def test_connection(
"GetDataModels": client.test_get_datamodels,
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/athena/connection.py b/ingestion/src/metadata/ingestion/source/database/athena/connection.py
index c151be6fd69f..59baec43250e 100644
--- a/ingestion/src/metadata/ingestion/source/database/athena/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/athena/connection.py
@@ -27,6 +27,9 @@
from metadata.generated.schema.entity.services.connections.database.athenaConnection import (
AthenaConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -37,6 +40,7 @@
test_connection_steps,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection_url(connection: AthenaConnection) -> str:
@@ -94,7 +98,8 @@ def test_connection(
engine: Engine,
service_connection: AthenaConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -121,9 +126,10 @@ def custom_executor_for_view():
"GetViews": custom_executor_for_view,
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/azuresql/connection.py b/ingestion/src/metadata/ingestion/source/database/azuresql/connection.py
index 7256e8507916..fe01c321b157 100644
--- a/ingestion/src/metadata/ingestion/source/database/azuresql/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/azuresql/connection.py
@@ -28,6 +28,9 @@
from metadata.generated.schema.entity.services.connections.database.mssqlConnection import (
MssqlConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -35,6 +38,7 @@
)
from metadata.ingestion.connections.test_connections import test_connection_db_common
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection_url(connection: Union[AzureSQLConnection, MssqlConnection]) -> str:
@@ -105,14 +109,16 @@ def test_connection(
engine: Engine,
service_connection: AzureSQLConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
- test_connection_db_common(
+ return test_connection_db_common(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/bigquery/connection.py b/ingestion/src/metadata/ingestion/source/database/bigquery/connection.py
index 06bc25a031aa..132f0671953d 100644
--- a/ingestion/src/metadata/ingestion/source/database/bigquery/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/bigquery/connection.py
@@ -27,6 +27,9 @@
from metadata.generated.schema.entity.services.connections.database.bigQueryConnection import (
BigQueryConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.generated.schema.security.credentials.gcpCredentials import (
GcpCredentialsPath,
)
@@ -47,6 +50,7 @@
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.bigquery.queries import BIGQUERY_TEST_STATEMENT
+from metadata.utils.constants import THREE_MIN
from metadata.utils.credentials import set_google_credentials
from metadata.utils.logger import ingestion_logger
@@ -109,7 +113,8 @@ def test_connection(
engine: Engine,
service_connection: BigQueryConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -163,14 +168,15 @@ def test_connection_inner(engine):
),
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
- test_connection_inner(engine)
+ return test_connection_inner(engine)
def get_table_view_names(connection, schema=None):
diff --git a/ingestion/src/metadata/ingestion/source/database/bigtable/connection.py b/ingestion/src/metadata/ingestion/source/database/bigtable/connection.py
index 990842604c52..4665e1004124 100644
--- a/ingestion/src/metadata/ingestion/source/database/bigtable/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/bigtable/connection.py
@@ -19,6 +19,9 @@
from metadata.generated.schema.entity.services.connections.database.bigTableConnection import (
BigTableConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.generated.schema.security.credentials.gcpValues import (
GcpCredentialsValues,
SingleProjectId,
@@ -29,6 +32,7 @@
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.bigtable.client import MultiProjectClient
+from metadata.utils.constants import THREE_MIN
from metadata.utils.credentials import set_google_credentials
from metadata.utils.logger import ingestion_logger
@@ -95,7 +99,8 @@ def test_connection(
client: MultiProjectClient,
service_connection: BigTableConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -108,9 +113,10 @@ def test_connection(
"GetRows": tester.get_row,
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/clickhouse/connection.py b/ingestion/src/metadata/ingestion/source/database/clickhouse/connection.py
index b0c17fded9d6..020f8ab3df0b 100644
--- a/ingestion/src/metadata/ingestion/source/database/clickhouse/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/clickhouse/connection.py
@@ -23,6 +23,9 @@
from metadata.generated.schema.entity.services.connections.database.clickhouseConnection import (
ClickhouseConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -35,6 +38,7 @@
from metadata.ingestion.source.database.clickhouse.queries import (
CLICKHOUSE_SQL_STATEMENT_TEST,
)
+from metadata.utils.constants import THREE_MIN
HTTPS_PROTOCOL = "https"
@@ -67,7 +71,8 @@ def test_connection(
engine: Engine,
service_connection: ClickhouseConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -75,10 +80,11 @@ def test_connection(
queries = {"GetQueries": CLICKHOUSE_SQL_STATEMENT_TEST}
- test_connection_db_common(
+ return test_connection_db_common(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
queries=queries,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/couchbase/connection.py b/ingestion/src/metadata/ingestion/source/database/couchbase/connection.py
index 7f988a920707..8a6a8f67547e 100644
--- a/ingestion/src/metadata/ingestion/source/database/couchbase/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/couchbase/connection.py
@@ -23,8 +23,12 @@
from metadata.generated.schema.entity.services.connections.database.couchbaseConnection import (
CouchbaseConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: CouchbaseConnection):
@@ -49,7 +53,8 @@ def test_connection(
client: Any,
service_connection: CouchbaseConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -80,9 +85,10 @@ def test_get_collections(client: Cluster, holder: SchemaHolder):
"GetCollections": partial(test_get_collections, client, holder),
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/couchbase/metadata.py b/ingestion/src/metadata/ingestion/source/database/couchbase/metadata.py
index 7201971da504..67f90c472e68 100644
--- a/ingestion/src/metadata/ingestion/source/database/couchbase/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/database/couchbase/metadata.py
@@ -27,10 +27,7 @@
SAMPLE_SIZE,
CommonNoSQLSource,
)
-from metadata.ingestion.source.database.couchbase.queries import (
- COUCHBASE_GET_DATA,
- COUCHBASE_SQL_STATEMENT,
-)
+from metadata.ingestion.source.database.couchbase.queries import COUCHBASE_GET_DATA
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
@@ -111,18 +108,14 @@ def get_table_columns_dict(self, schema_name: str, table_name: str) -> List[Dict
"""
try:
database_name = self.context.get().database
- query = COUCHBASE_SQL_STATEMENT.format(table_name=table_name)
- result = self.couchbase.query(query)
- for row in result.rows():
- if len(row) > 0:
- query_coln = COUCHBASE_GET_DATA.format(
- database_name=database_name,
- schema_name=schema_name,
- table_name=table_name,
- sample_size=SAMPLE_SIZE,
- )
- query_iter = self.couchbase.query(query_coln)
- return list(query_iter.rows())
+ query_coln = COUCHBASE_GET_DATA.format(
+ database_name=database_name,
+ schema_name=schema_name,
+ table_name=table_name,
+ sample_size=SAMPLE_SIZE,
+ )
+ query_iter = self.couchbase.query(query_coln)
+ return list(query_iter.rows())
except Exception as exp:
logger.debug(f"Failed to list column names for table [{table_name}]: {exp}")
logger.debug(traceback.format_exc())
diff --git a/ingestion/src/metadata/ingestion/source/database/couchbase/queries.py b/ingestion/src/metadata/ingestion/source/database/couchbase/queries.py
index 40a783eb4c12..3a32d74d66f9 100644
--- a/ingestion/src/metadata/ingestion/source/database/couchbase/queries.py
+++ b/ingestion/src/metadata/ingestion/source/database/couchbase/queries.py
@@ -14,10 +14,6 @@
import textwrap
-COUCHBASE_SQL_STATEMENT = textwrap.dedent(
- """ SELECT * FROM system:indexes WHERE keyspace_id = '{table_name}' AND is_primary = TRUE """
-)
-
COUCHBASE_GET_DATA = textwrap.dedent(
""" select crc.* from `{database_name}`.`{schema_name}`.`{table_name}` crc limit {sample_size} """
)
diff --git a/ingestion/src/metadata/ingestion/source/database/database_service.py b/ingestion/src/metadata/ingestion/source/database/database_service.py
index aee239c55efe..600f211170fb 100644
--- a/ingestion/src/metadata/ingestion/source/database/database_service.py
+++ b/ingestion/src/metadata/ingestion/source/database/database_service.py
@@ -58,6 +58,9 @@
from metadata.ingestion.api.models import Either
from metadata.ingestion.api.steps import Source
from metadata.ingestion.api.topology_runner import TopologyRunnerMixin
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.models.life_cycle import OMetaLifeCycleData
from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification
from metadata.ingestion.models.topology import (
@@ -609,4 +612,7 @@ def yield_external_table_lineage(self) -> Iterable[Either[AddLineageRequest]]:
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)
- test_connection_fn(self.metadata, self.connection_obj, self.service_connection)
+ result = test_connection_fn(
+ self.metadata, self.connection_obj, self.service_connection
+ )
+ raise_test_connection_exception(result)
diff --git a/ingestion/src/metadata/ingestion/source/database/databricks/connection.py b/ingestion/src/metadata/ingestion/source/database/databricks/connection.py
index 7d7854bebf9f..14d3d3e392e9 100644
--- a/ingestion/src/metadata/ingestion/source/database/databricks/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/databricks/connection.py
@@ -25,6 +25,9 @@
from metadata.generated.schema.entity.services.connections.database.databricksConnection import (
DatabricksConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -39,6 +42,7 @@
from metadata.ingestion.source.database.databricks.queries import (
DATABRICKS_GET_CATALOGS,
)
+from metadata.utils.constants import THREE_MIN
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
@@ -71,7 +75,8 @@ def test_connection(
connection: Engine,
service_connection: DatabricksConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -104,10 +109,10 @@ def test_database_query(engine: Engine, statement: str):
"GetQueries": client.test_query_api_access,
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
- timeout_seconds=service_connection.connectionTimeout,
+ timeout_seconds=service_connection.connectionTimeout or timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/datalake/connection.py b/ingestion/src/metadata/ingestion/source/database/datalake/connection.py
index 9ec0a8625287..fdc11f6021d0 100644
--- a/ingestion/src/metadata/ingestion/source/database/datalake/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/datalake/connection.py
@@ -31,6 +31,9 @@
from metadata.generated.schema.entity.services.connections.database.datalakeConnection import (
DatalakeConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.datalake.clients.azure_blob import (
@@ -38,6 +41,7 @@
)
from metadata.ingestion.source.database.datalake.clients.gcs import DatalakeGcsClient
from metadata.ingestion.source.database.datalake.clients.s3 import DatalakeS3Client
+from metadata.utils.constants import THREE_MIN
# Only import specific datalake dependencies if necessary
@@ -91,7 +95,8 @@ def test_connection(
connection: DatalakeClient,
service_connection: DatalakeConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -102,9 +107,10 @@ def test_connection(
),
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/db2/connection.py b/ingestion/src/metadata/ingestion/source/database/db2/connection.py
index 6ea82c758c20..cadde6775f38 100644
--- a/ingestion/src/metadata/ingestion/source/database/db2/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/db2/connection.py
@@ -22,6 +22,9 @@
from metadata.generated.schema.entity.services.connections.database.db2Connection import (
Db2Connection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -29,6 +32,7 @@
)
from metadata.ingestion.connections.test_connections import test_connection_db_common
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: Db2Connection) -> Engine:
@@ -47,14 +51,16 @@ def test_connection(
engine: Engine,
service_connection: Db2Connection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
- test_connection_db_common(
+ return test_connection_db_common(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/dbt/constants.py b/ingestion/src/metadata/ingestion/source/database/dbt/constants.py
index 63731473b72d..83c49c0724a4 100644
--- a/ingestion/src/metadata/ingestion/source/database/dbt/constants.py
+++ b/ingestion/src/metadata/ingestion/source/database/dbt/constants.py
@@ -22,6 +22,61 @@
# Based on https://schemas.getdbt.com/dbt/catalog/v1.json
REQUIRED_CATALOG_KEYS = ["name", "type", "index"]
+REQUIRED_RESULTS_KEYS = {
+ "status",
+ "timing",
+ "thread_id",
+ "execution_time",
+ "message",
+ "adapter_response",
+ "unique_id",
+}
+
+REQUIRED_NODE_KEYS = {
+ "schema_",
+ "schema",
+ "freshness",
+ "name",
+ "resource_type",
+ "path",
+ "unique_id",
+ "source_name",
+ "source_description",
+ "source_meta",
+ "loader",
+ "identifier",
+ "relation_name",
+ "fqn",
+ "alias",
+ "checksum",
+ "config",
+ "column_name",
+ "test_metadata",
+ "original_file_path",
+ "root_path",
+ "database",
+ "tags",
+ "description",
+ "columns",
+ "meta",
+ "owner",
+ "created_at",
+ "group",
+ "sources",
+ "compiled",
+ "docs",
+ "version",
+ "latest_version",
+ "package_name",
+ "depends_on",
+ "compiled_code",
+ "compiled_sql",
+ "raw_code",
+ "raw_sql",
+ "language",
+}
+
+
NONE_KEYWORDS_LIST = ["none", "null"]
DBT_CATALOG_FILE_NAME = "catalog.json"
diff --git a/ingestion/src/metadata/ingestion/source/database/dbt/dbt_service.py b/ingestion/src/metadata/ingestion/source/database/dbt/dbt_service.py
index 44307b6df755..aa2d65f4e2cf 100644
--- a/ingestion/src/metadata/ingestion/source/database/dbt/dbt_service.py
+++ b/ingestion/src/metadata/ingestion/source/database/dbt/dbt_service.py
@@ -13,7 +13,7 @@
"""
from abc import ABC, abstractmethod
-from typing import Iterable
+from typing import Iterable, List
from dbt_artifacts_parser.parser import parse_catalog, parse_manifest, parse_run_results
from pydantic import Field
@@ -37,6 +37,10 @@
TopologyNode,
)
from metadata.ingestion.source.database.database_service import DataModelLink
+from metadata.ingestion.source.database.dbt.constants import (
+ REQUIRED_NODE_KEYS,
+ REQUIRED_RESULTS_KEYS,
+)
from metadata.ingestion.source.database.dbt.dbt_config import get_dbt_details
from metadata.ingestion.source.database.dbt.models import (
DbtFiles,
@@ -169,51 +173,27 @@ def remove_manifest_non_required_keys(self, manifest_dict: dict):
}
)
- required_nodes_keys = {
- "schema_",
- "schema",
- "name",
- "resource_type",
- "path",
- "unique_id",
- "fqn",
- "alias",
- "checksum",
- "config",
- "column_name",
- "test_metadata",
- "original_file_path",
- "root_path",
- "database",
- "tags",
- "description",
- "columns",
- "meta",
- "owner",
- "created_at",
- "group",
- "sources",
- "compiled",
- "docs",
- "version",
- "latest_version",
- "package_name",
- "depends_on",
- "compiled_code",
- "compiled_sql",
- "raw_code",
- "raw_sql",
- "language",
- }
+ for field in ["nodes", "sources"]:
+ for node, value in manifest_dict.get( # pylint: disable=unused-variable
+ field
+ ).items():
+ keys_to_delete = [
+ key for key in value if key.lower() not in REQUIRED_NODE_KEYS
+ ]
+ for key in keys_to_delete:
+ del value[key]
- for node, value in manifest_dict.get( # pylint: disable=unused-variable
- "nodes"
- ).items():
- keys_to_delete = [
- key for key in value if key.lower() not in required_nodes_keys
- ]
- for key in keys_to_delete:
- del value[key]
+ def remove_run_result_non_required_keys(self, run_results: List[dict]):
+ """
+ Method to remove the non required keys from run results file
+ """
+ for run_result in run_results:
+ for result in run_result.get("results"):
+ keys_to_delete = [
+ key for key in result if key.lower() not in REQUIRED_RESULTS_KEYS
+ ]
+ for key in keys_to_delete:
+ del result[key]
def get_dbt_files(self) -> Iterable[DbtFiles]:
dbt_files = get_dbt_details(self.source_config.dbtConfigSource)
@@ -225,6 +205,10 @@ def get_dbt_objects(self) -> Iterable[DbtObjects]:
self.remove_manifest_non_required_keys(
manifest_dict=self.context.get().dbt_file.dbt_manifest
)
+ if self.context.get().dbt_file.dbt_run_results:
+ self.remove_run_result_non_required_keys(
+ run_results=self.context.get().dbt_file.dbt_run_results
+ )
dbt_objects = DbtObjects(
dbt_catalog=parse_catalog(self.context.get().dbt_file.dbt_catalog)
if self.context.get().dbt_file.dbt_catalog
diff --git a/ingestion/src/metadata/ingestion/source/database/dbt/metadata.py b/ingestion/src/metadata/ingestion/source/database/dbt/metadata.py
index 8337f0f47bce..d312f1a186b0 100644
--- a/ingestion/src/metadata/ingestion/source/database/dbt/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/database/dbt/metadata.py
@@ -288,10 +288,7 @@ def yield_dbt_tags(
for tag_name in dbt_tags_list
]
yield from get_ometa_tag_and_classification(
- tags=[
- tag_label.split(fqn.FQN_SEPARATOR)[1]
- for tag_label in dbt_tag_labels
- ],
+ tags=[fqn.split(tag_label)[1] for tag_label in dbt_tag_labels],
classification_name=self.tag_classification_name,
tag_description="dbt Tags",
classification_description="dbt classification",
diff --git a/ingestion/src/metadata/ingestion/source/database/deltalake/connection.py b/ingestion/src/metadata/ingestion/source/database/deltalake/connection.py
index 5219e89595a0..20d799007a1e 100644
--- a/ingestion/src/metadata/ingestion/source/database/deltalake/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/deltalake/connection.py
@@ -31,8 +31,12 @@
from metadata.generated.schema.entity.services.connections.database.deltaLakeConnection import (
DeltaLakeConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
@dataclass
@@ -83,7 +87,8 @@ def test_connection(
connection: DeltalakeClient,
service_connection: DeltaLakeConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -97,9 +102,10 @@ def test_connection(
),
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/domodatabase/connection.py b/ingestion/src/metadata/ingestion/source/database/domodatabase/connection.py
index 897c9dd767c5..3dbe0729e52f 100644
--- a/ingestion/src/metadata/ingestion/source/database/domodatabase/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/domodatabase/connection.py
@@ -23,11 +23,15 @@
from metadata.generated.schema.entity.services.connections.database.domoDatabaseConnection import (
DomoDatabaseConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import (
SourceConnectionException,
test_connection_steps,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: DomoDatabaseConnection) -> Domo:
@@ -51,7 +55,8 @@ def test_connection(
domo: Domo,
service_connection: DomoDatabaseConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -65,9 +70,10 @@ def custom_executor():
"GetTables": custom_executor,
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/doris/connection.py b/ingestion/src/metadata/ingestion/source/database/doris/connection.py
index d4eff8b228c1..4707ea926637 100644
--- a/ingestion/src/metadata/ingestion/source/database/doris/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/doris/connection.py
@@ -22,6 +22,9 @@
from metadata.generated.schema.entity.services.connections.database.dorisConnection import (
DorisConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -31,6 +34,7 @@
test_connection_db_schema_sources,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: DorisConnection) -> Engine:
@@ -49,14 +53,16 @@ def test_connection(
engine: Engine,
service_connection: DorisConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
- test_connection_db_schema_sources(
+ return test_connection_db_schema_sources(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/druid/connection.py b/ingestion/src/metadata/ingestion/source/database/druid/connection.py
index 4c81c12d0d66..a43b76df34c7 100644
--- a/ingestion/src/metadata/ingestion/source/database/druid/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/druid/connection.py
@@ -22,6 +22,9 @@
from metadata.generated.schema.entity.services.connections.database.druidConnection import (
DruidConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -29,6 +32,7 @@
)
from metadata.ingestion.connections.test_connections import test_connection_db_common
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection_url(connection: DruidConnection) -> str:
@@ -52,14 +56,16 @@ def test_connection(
engine: Engine,
service_connection: DruidConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
- test_connection_db_common(
+ return test_connection_db_common(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/dynamodb/connection.py b/ingestion/src/metadata/ingestion/source/database/dynamodb/connection.py
index 06dd5a338e25..085b37e366a0 100644
--- a/ingestion/src/metadata/ingestion/source/database/dynamodb/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/dynamodb/connection.py
@@ -22,8 +22,12 @@
from metadata.generated.schema.entity.services.connections.database.dynamoDBConnection import (
DynamoDBConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: DynamoDBConnection):
@@ -48,7 +52,8 @@ def test_connection(
client: AWSClient,
service_connection: DynamoDBConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -58,9 +63,10 @@ def test_connection(
"ListTables": partial(check_list_tables, client),
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/glue/connection.py b/ingestion/src/metadata/ingestion/source/database/glue/connection.py
index b718faa94083..d1f8f4649bb7 100644
--- a/ingestion/src/metadata/ingestion/source/database/glue/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/glue/connection.py
@@ -23,8 +23,12 @@
from metadata.generated.schema.entity.services.connections.database.glueConnection import (
GlueConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: GlueConnection) -> Engine:
@@ -39,7 +43,8 @@ def test_connection(
client: AWSClient,
service_connection: GlueConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -64,9 +69,10 @@ def custom_executor_for_table():
"GetTables": custom_executor_for_table,
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/greenplum/connection.py b/ingestion/src/metadata/ingestion/source/database/greenplum/connection.py
index 2614bf4d2967..b9ad6eff9d01 100644
--- a/ingestion/src/metadata/ingestion/source/database/greenplum/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/greenplum/connection.py
@@ -23,6 +23,9 @@
from metadata.generated.schema.entity.services.connections.database.greenplumConnection import (
GreenplumConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -31,6 +34,7 @@
from metadata.ingestion.connections.test_connections import test_connection_db_common
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.greenplum.queries import GREENPLUM_GET_DATABASE
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: GreenplumConnection) -> Engine:
@@ -49,7 +53,8 @@ def test_connection(
engine: Engine,
service_connection: GreenplumConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -57,10 +62,11 @@ def test_connection(
queries = {
"GetDatabases": GREENPLUM_GET_DATABASE,
}
- test_connection_db_common(
+ return test_connection_db_common(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
queries=queries,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/hive/connection.py b/ingestion/src/metadata/ingestion/source/database/hive/connection.py
index 1dd20e513905..0681f72c20ca 100644
--- a/ingestion/src/metadata/ingestion/source/database/hive/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/hive/connection.py
@@ -18,7 +18,7 @@
from typing import Any, Optional
from urllib.parse import quote_plus
-from pydantic import SecretStr
+from pydantic import SecretStr, ValidationError
from sqlalchemy.engine import Engine
from metadata.generated.schema.entity.automations.workflow import (
@@ -34,6 +34,9 @@
from metadata.generated.schema.entity.services.connections.database.postgresConnection import (
PostgresConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -45,6 +48,7 @@
test_connection_db_schema_sources,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
HIVE_POSTGRES_SCHEME = "hive+postgres"
HIVE_MYSQL_SCHEME = "hive+mysql"
@@ -181,18 +185,35 @@ def test_connection(
engine: Engine,
service_connection: HiveConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
- if service_connection.metastoreConnection:
+ if service_connection.metastoreConnection and isinstance(
+ service_connection.metastoreConnection, dict
+ ):
+ try:
+ service_connection.metastoreConnection = MysqlConnection.model_validate(
+ service_connection.metastoreConnection
+ )
+ except ValidationError:
+ try:
+ service_connection.metastoreConnection = (
+ PostgresConnection.model_validate(
+ service_connection.metastoreConnection
+ )
+ )
+ except ValidationError:
+ raise ValueError("Invalid metastore connection")
engine = get_metastore_connection(service_connection.metastoreConnection)
- test_connection_db_schema_sources(
+ return test_connection_db_schema_sources(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/iceberg/connection.py b/ingestion/src/metadata/ingestion/source/database/iceberg/connection.py
index 6f315b807a24..6b8b804dc80b 100644
--- a/ingestion/src/metadata/ingestion/source/database/iceberg/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/iceberg/connection.py
@@ -23,9 +23,13 @@
from metadata.generated.schema.entity.services.connections.database.icebergConnection import (
IcebergConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.iceberg.catalog import IcebergCatalogFactory
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: IcebergConnection) -> Catalog:
@@ -42,7 +46,8 @@ def test_connection(
catalog: Catalog,
service_connection: IcebergConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -60,9 +65,10 @@ def custom_executor_for_tables():
"GetTables": custom_executor_for_tables,
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/impala/connection.py b/ingestion/src/metadata/ingestion/source/database/impala/connection.py
index b28262d69469..4a984214ae27 100644
--- a/ingestion/src/metadata/ingestion/source/database/impala/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/impala/connection.py
@@ -24,6 +24,9 @@
from metadata.generated.schema.entity.services.connections.database.impalaConnection import (
ImpalaConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -34,6 +37,7 @@
test_connection_db_schema_sources,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection_url(connection: ImpalaConnection) -> str:
@@ -108,14 +112,16 @@ def test_connection(
engine: Engine,
service_connection: ImpalaConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
- test_connection_db_schema_sources(
+ return test_connection_db_schema_sources(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/mariadb/connection.py b/ingestion/src/metadata/ingestion/source/database/mariadb/connection.py
index 87942ac961e3..1af77fab582e 100644
--- a/ingestion/src/metadata/ingestion/source/database/mariadb/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/mariadb/connection.py
@@ -22,6 +22,9 @@
from metadata.generated.schema.entity.services.connections.database.mariaDBConnection import (
MariaDBConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -31,6 +34,7 @@
test_connection_db_schema_sources,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: MariaDBConnection) -> Engine:
@@ -49,14 +53,16 @@ def test_connection(
engine: Engine,
service_connection: MariaDBConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
- test_connection_db_schema_sources(
+ return test_connection_db_schema_sources(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/mongodb/connection.py b/ingestion/src/metadata/ingestion/source/database/mongodb/connection.py
index 56611eb40c61..0d77e2c16568 100644
--- a/ingestion/src/metadata/ingestion/source/database/mongodb/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/mongodb/connection.py
@@ -26,9 +26,13 @@
from metadata.generated.schema.entity.services.connections.database.mongoDBConnection import (
MongoDBConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import get_connection_url_common
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: MongoDBConnection):
@@ -44,7 +48,8 @@ def test_connection(
client: MongoClient,
service_connection: MongoDBConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -70,9 +75,10 @@ def test_get_collections(client_: MongoClient, holder_: SchemaHolder):
"GetCollections": partial(test_get_collections, client, holder),
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/mssql/connection.py b/ingestion/src/metadata/ingestion/source/database/mssql/connection.py
index 9979e01fa632..92d8b1281226 100644
--- a/ingestion/src/metadata/ingestion/source/database/mssql/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/mssql/connection.py
@@ -22,6 +22,9 @@
from metadata.generated.schema.entity.services.connections.database.mssqlConnection import (
MssqlConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -36,6 +39,7 @@
MSSQL_GET_DATABASE,
MSSQL_TEST_GET_QUERIES,
)
+from metadata.utils.constants import THREE_MIN
def get_connection_url(connection: MssqlConnection) -> str:
@@ -60,7 +64,8 @@ def test_connection(
engine: Engine,
service_connection: MssqlConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -69,10 +74,11 @@ def test_connection(
"GetQueries": MSSQL_TEST_GET_QUERIES,
"GetDatabases": MSSQL_GET_DATABASE,
}
- test_connection_db_common(
+ return test_connection_db_common(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
queries=queries,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/mysql/connection.py b/ingestion/src/metadata/ingestion/source/database/mysql/connection.py
index d4a8b2d0c946..86dc523ef8b2 100644
--- a/ingestion/src/metadata/ingestion/source/database/mysql/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/mysql/connection.py
@@ -26,6 +26,9 @@
from metadata.generated.schema.entity.services.connections.database.mysqlConnection import (
MysqlConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -35,6 +38,7 @@
test_connection_db_schema_sources,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: MysqlConnection) -> Engine:
@@ -63,14 +67,16 @@ def test_connection(
engine: Engine,
service_connection: MysqlConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
- test_connection_db_schema_sources(
+ return test_connection_db_schema_sources(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/oracle/connection.py b/ingestion/src/metadata/ingestion/source/database/oracle/connection.py
index ade912396452..83665684cbc2 100644
--- a/ingestion/src/metadata/ingestion/source/database/oracle/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/oracle/connection.py
@@ -31,6 +31,9 @@
OracleServiceName,
OracleTNSConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -39,6 +42,7 @@
from metadata.ingestion.connections.test_connections import test_connection_db_common
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.oracle.queries import CHECK_ACCESS_TO_ALL
+from metadata.utils.constants import THREE_MIN
from metadata.utils.logger import ingestion_logger
CX_ORACLE_LIB_VERSION = "8.3.0"
@@ -132,7 +136,8 @@ def test_connection(
engine: Engine,
service_connection: OracleConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -140,10 +145,11 @@ def test_connection(
test_conn_queries = {"CheckAccess": CHECK_ACCESS_TO_ALL}
- test_connection_db_common(
+ return test_connection_db_common(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
queries=test_conn_queries,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/pinotdb/connection.py b/ingestion/src/metadata/ingestion/source/database/pinotdb/connection.py
index 2f25281d670f..8f148f3718a3 100644
--- a/ingestion/src/metadata/ingestion/source/database/pinotdb/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/pinotdb/connection.py
@@ -12,6 +12,7 @@
"""
Source connection handler
"""
+from copy import deepcopy
from typing import Optional
from sqlalchemy.engine import Engine
@@ -22,13 +23,18 @@
from metadata.generated.schema.entity.services.connections.database.pinotDBConnection import (
PinotDBConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
get_connection_url_common,
+ init_empty_connection_arguments,
)
from metadata.ingestion.connections.test_connections import test_connection_db_common
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection_url(connection: PinotDBConnection) -> str:
@@ -41,8 +47,16 @@ def get_connection(connection: PinotDBConnection) -> Engine:
"""
Create connection
"""
+ # TODO: Rename database field to DatabaseSchema
+ # Pinot does not support multi database concept
+ if connection.database is not None:
+ if not connection.connectionArguments:
+ connection.connectionArguments = init_empty_connection_arguments()
+ connection.connectionArguments.root["database"] = connection.database
+ connection_copy = deepcopy(connection)
+ connection_copy.database = None
return create_generic_db_connection(
- connection=connection,
+ connection=connection_copy,
get_connection_url_fn=get_connection_url,
get_connection_args_fn=get_connection_args_common,
)
@@ -53,14 +67,16 @@ def test_connection(
engine: Engine,
service_connection: PinotDBConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
- test_connection_db_common(
+ return test_connection_db_common(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py b/ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py
index fed715470986..b0767679b2dd 100644
--- a/ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/database/pinotdb/metadata.py
@@ -9,7 +9,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""PinotDb source module"""
-from typing import Optional
+from typing import Iterable, Optional
+
+from pinotdb import sqlalchemy as pinot_sqlalchemy
+from sqlalchemy import types
from metadata.generated.schema.entity.services.connections.database.pinotDBConnection import (
PinotDBConnection,
@@ -22,6 +25,31 @@
from metadata.ingestion.source.database.common_db_source import CommonDbSourceService
+def get_type_custom(data_type, field_size):
+ type_map = {
+ "int": types.BigInteger,
+ "long": types.BigInteger,
+ "float": types.Float,
+ "double": types.Numeric,
+ # BOOLEAN, is added after release 0.7.1.
+ # In release 0.7.1 and older releases, BOOLEAN is equivalent to STRING.
+ "boolean": types.Boolean,
+ "timestamp": types.TIMESTAMP,
+ "string": types.String,
+ "json": types.JSON,
+ "bytes": types.LargeBinary,
+ "big_decimal": types.DECIMAL,
+ # Complex types
+ "struct": types.BLOB,
+ "map": types.BLOB,
+ "array": types.ARRAY,
+ }
+ return type_map.get(data_type.lower())
+
+
+pinot_sqlalchemy.get_type = get_type_custom
+
+
class PinotdbSource(CommonDbSourceService):
"""
Implements the necessary methods to extract
@@ -39,3 +67,15 @@ def create(
f"Expected PinotdbConnection, but got {connection}"
)
return cls(config, metadata)
+
+ def get_database_names(self) -> Iterable[str]:
+ """
+ Default case with a single database.
+
+ It might come informed - or not - from the source.
+
+ Sources with multiple databases should overwrite this and
+ apply the necessary filters.
+ """
+ # TODO: Add databaseDisplayName field in PinotDBConnection
+ yield "default"
diff --git a/ingestion/src/metadata/ingestion/source/database/postgres/connection.py b/ingestion/src/metadata/ingestion/source/database/postgres/connection.py
index 4ea066a8e8bb..f447f13dfb9e 100644
--- a/ingestion/src/metadata/ingestion/source/database/postgres/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/postgres/connection.py
@@ -27,6 +27,9 @@
from metadata.generated.schema.entity.services.connections.database.postgresConnection import (
PostgresConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -42,6 +45,7 @@
from metadata.ingestion.source.database.postgres.utils import (
get_postgres_time_column_name,
)
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: PostgresConnection) -> Engine:
@@ -71,7 +75,8 @@ def test_connection(
engine: Engine,
service_connection: PostgresConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -84,10 +89,11 @@ def test_connection(
"GetDatabases": POSTGRES_GET_DATABASE,
"GetTags": POSTGRES_TEST_GET_TAGS,
}
- test_connection_db_common(
+ return test_connection_db_common(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
queries=queries,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/presto/connection.py b/ingestion/src/metadata/ingestion/source/database/presto/connection.py
index 1847c3207c40..b45499700912 100644
--- a/ingestion/src/metadata/ingestion/source/database/presto/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/presto/connection.py
@@ -25,6 +25,9 @@
from metadata.generated.schema.entity.services.connections.database.prestoConnection import (
PrestoConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -38,6 +41,7 @@
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.presto.queries import PRESTO_SHOW_CATALOGS
+from metadata.utils.constants import THREE_MIN
def get_connection_url(connection: PrestoConnection) -> str:
@@ -84,7 +88,8 @@ def test_connection(
engine: Engine,
service_connection: PrestoConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -108,9 +113,10 @@ def custom_executor_for_table():
"GetTables": custom_executor_for_table,
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/query_parser_source.py b/ingestion/src/metadata/ingestion/source/database/query_parser_source.py
index f502c576a3e3..20d8fbaec0dc 100644
--- a/ingestion/src/metadata/ingestion/source/database/query_parser_source.py
+++ b/ingestion/src/metadata/ingestion/source/database/query_parser_source.py
@@ -20,6 +20,9 @@
)
from metadata.generated.schema.type.tableQuery import TableQuery
from metadata.ingestion.api.steps import Source
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.connections import get_test_connection_fn
from metadata.utils.helpers import get_start_and_end
@@ -121,4 +124,5 @@ def close(self):
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)
- test_connection_fn(self.engine)
+ result = test_connection_fn(self.engine)
+ raise_test_connection_exception(result)
diff --git a/ingestion/src/metadata/ingestion/source/database/redshift/connection.py b/ingestion/src/metadata/ingestion/source/database/redshift/connection.py
index a9193d5be298..e904ba8a95db 100644
--- a/ingestion/src/metadata/ingestion/source/database/redshift/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/redshift/connection.py
@@ -24,6 +24,9 @@
from metadata.generated.schema.entity.services.connections.database.redshiftConnection import (
RedshiftConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -44,6 +47,7 @@
REDSHIFT_TEST_GET_QUERIES,
REDSHIFT_TEST_PARTITION_DETAILS,
)
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: RedshiftConnection) -> Engine:
@@ -62,7 +66,8 @@ def test_connection(
engine: Engine,
service_connection: RedshiftConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -96,11 +101,14 @@ def test_get_queries_permissions(engine_: Engine):
),
}
- test_connection_steps(
+ result = test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
kill_active_connections(engine)
+
+ return result
diff --git a/ingestion/src/metadata/ingestion/source/database/redshift/metadata.py b/ingestion/src/metadata/ingestion/source/database/redshift/metadata.py
index 270eae010920..92267757dd91 100644
--- a/ingestion/src/metadata/ingestion/source/database/redshift/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/database/redshift/metadata.py
@@ -96,6 +96,7 @@
)
from metadata.utils.filters import filter_by_database
from metadata.utils.helpers import get_start_and_end
+from metadata.utils.importer import import_side_effects
from metadata.utils.logger import ingestion_logger
from metadata.utils.sqlalchemy_utils import (
get_all_table_comments,
@@ -105,6 +106,9 @@
logger = ingestion_logger()
+import_side_effects(
+ "metadata.ingestion.source.database.redshift.profiler.system",
+)
STANDARD_TABLE_TYPES = {
"r": TableType.Regular,
diff --git a/ingestion/src/metadata/ingestion/source/database/redshift/profiler/__init__.py b/ingestion/src/metadata/ingestion/source/database/redshift/profiler/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ingestion/src/metadata/ingestion/source/database/redshift/profiler/system.py b/ingestion/src/metadata/ingestion/source/database/redshift/profiler/system.py
new file mode 100644
index 000000000000..8ea963e8ed4c
--- /dev/null
+++ b/ingestion/src/metadata/ingestion/source/database/redshift/profiler/system.py
@@ -0,0 +1,120 @@
+from typing import Dict, List
+
+from pydantic import TypeAdapter
+from sqlalchemy.orm import DeclarativeMeta, Session
+
+from metadata.generated.schema.entity.data.table import SystemProfile
+from metadata.ingestion.source.database.redshift.queries import (
+ STL_QUERY,
+ get_metric_result,
+ get_query_results,
+)
+from metadata.profiler.metrics.system.dml_operation import DatabaseDMLOperations
+from metadata.profiler.metrics.system.system import (
+ SYSTEM_QUERY_RESULT_CACHE,
+ get_system_metrics_for_dialect,
+)
+from metadata.profiler.orm.registry import Dialects
+from metadata.utils.logger import profiler_logger
+from metadata.utils.profiler_utils import get_value_from_cache, set_cache
+
+logger = profiler_logger()
+
+
+@get_system_metrics_for_dialect.register(Dialects.Redshift)
+def _(
+ dialect: str,
+ session: Session,
+ table: DeclarativeMeta,
+ *args,
+ **kwargs,
+) -> List[SystemProfile]:
+ """List all the DML operations for reshifts tables
+
+ Args:
+ dialect (str): redshift
+ session (Session): session object
+ table (DeclarativeMeta): orm table
+
+ Returns:
+ List[Dict]:
+ """
+ logger.debug(f"Fetching system metrics for {dialect}")
+ database = session.get_bind().url.database
+ schema = table.__table_args__["schema"] # type: ignore
+
+ metric_results: List[Dict] = []
+
+ # get inserts ddl queries
+ inserts = get_value_from_cache(
+ SYSTEM_QUERY_RESULT_CACHE, f"{Dialects.Redshift}.{database}.{schema}.inserts"
+ )
+ if not inserts:
+ insert_query = STL_QUERY.format(
+ alias="si",
+ join_type="LEFT",
+ condition="sd.query is null",
+ database=database,
+ schema=schema,
+ )
+ inserts = get_query_results(
+ session,
+ insert_query,
+ DatabaseDMLOperations.INSERT.value,
+ )
+ set_cache(
+ SYSTEM_QUERY_RESULT_CACHE,
+ f"{Dialects.Redshift}.{database}.{schema}.inserts",
+ inserts,
+ )
+ metric_results.extend(get_metric_result(inserts, table.__tablename__))
+
+ # get deletes ddl queries
+ deletes = get_value_from_cache(
+ SYSTEM_QUERY_RESULT_CACHE, f"{Dialects.Redshift}.{database}.{schema}.deletes"
+ )
+ if not deletes:
+ delete_query = STL_QUERY.format(
+ alias="sd",
+ join_type="RIGHT",
+ condition="si.query is null",
+ database=database,
+ schema=schema,
+ )
+ deletes = get_query_results(
+ session,
+ delete_query,
+ DatabaseDMLOperations.DELETE.value,
+ )
+ set_cache(
+ SYSTEM_QUERY_RESULT_CACHE,
+ f"{Dialects.Redshift}.{database}.{schema}.deletes",
+ deletes,
+ )
+ metric_results.extend(get_metric_result(deletes, table.__tablename__)) # type: ignore
+
+ # get updates ddl queries
+ updates = get_value_from_cache(
+ SYSTEM_QUERY_RESULT_CACHE, f"{Dialects.Redshift}.{database}.{schema}.updates"
+ )
+ if not updates:
+ update_query = STL_QUERY.format(
+ alias="si",
+ join_type="INNER",
+ condition="sd.query is not null",
+ database=database,
+ schema=schema,
+ )
+ updates = get_query_results(
+ session,
+ update_query,
+ DatabaseDMLOperations.UPDATE.value,
+ )
+ set_cache(
+ SYSTEM_QUERY_RESULT_CACHE,
+ f"{Dialects.Redshift}.{database}.{schema}.updates",
+ updates,
+ )
+ metric_results.extend(get_metric_result(updates, table.__tablename__)) # type: ignore
+
+ return TypeAdapter(List[SystemProfile]).validate_python(metric_results)
diff --git a/ingestion/src/metadata/ingestion/source/database/redshift/queries.py b/ingestion/src/metadata/ingestion/source/database/redshift/queries.py
index 6a2210626ea8..65b11a400fe6 100644
--- a/ingestion/src/metadata/ingestion/source/database/redshift/queries.py
+++ b/ingestion/src/metadata/ingestion/source/database/redshift/queries.py
@@ -13,6 +13,13 @@
"""
import textwrap
+from typing import List
+
+from sqlalchemy import text
+from sqlalchemy.orm.session import Session
+
+from metadata.utils.profiler_utils import QueryResult
+from metadata.utils.time_utils import datetime_to_timestamp
# Not able to use SYS_QUERY_HISTORY here. Few users not getting any results
REDSHIFT_SQL_STATEMENT = textwrap.dedent(
@@ -375,3 +382,85 @@
and end_time >= '{start_date}'
ORDER BY end_time DESC
"""
+
+
+STL_QUERY = """
+ with data as (
+ select
+ {alias}.*
+ from
+ pg_catalog.stl_insert si
+ {join_type} join pg_catalog.stl_delete sd on si.query = sd.query
+ where
+ {condition}
+ )
+ SELECT
+ SUM(data."rows") AS "rows",
+ sti."database",
+ sti."schema",
+ sti."table",
+ DATE_TRUNC('second', data.starttime) AS starttime
+ FROM
+ data
+ INNER JOIN pg_catalog.svv_table_info sti ON data.tbl = sti.table_id
+ where
+ sti."database" = '{database}' AND
+ sti."schema" = '{schema}' AND
+ "rows" != 0 AND
+ DATE(data.starttime) >= CURRENT_DATE - 1
+ GROUP BY 2,3,4,5
+ ORDER BY 5 DESC
+"""
+
+
+def get_query_results(
+ session: Session,
+ query,
+ operation,
+) -> List[QueryResult]:
+ """get query results either from cache or from the database
+
+ Args:
+ session (Session): session
+ query (_type_): query
+ operation (_type_): operation
+
+ Returns:
+ List[QueryResult]:
+ """
+ cursor = session.execute(text(query))
+ results = [
+ QueryResult(
+ database_name=row.database,
+ schema_name=row.schema,
+ table_name=row.table,
+ query_text=None,
+ query_type=operation,
+ start_time=row.starttime,
+ rows=row.rows,
+ )
+ for row in cursor
+ ]
+
+ return results
+
+
+def get_metric_result(ddls: List[QueryResult], table_name: str) -> List:
+ """Given query results, retur the metric result
+
+ Args:
+ ddls (List[QueryResult]): list of query results
+ table_name (str): table name
+
+ Returns:
+ List:
+ """
+ return [
+ {
+ "timestamp": datetime_to_timestamp(ddl.start_time, milliseconds=True),
+ "operation": ddl.query_type,
+ "rowsAffected": ddl.rows,
+ }
+ for ddl in ddls
+ if ddl.table_name == table_name
+ ]
diff --git a/ingestion/src/metadata/ingestion/source/database/salesforce/connection.py b/ingestion/src/metadata/ingestion/source/database/salesforce/connection.py
index 9cfd79e08846..6a41e4389c47 100644
--- a/ingestion/src/metadata/ingestion/source/database/salesforce/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/salesforce/connection.py
@@ -22,8 +22,12 @@
from metadata.generated.schema.entity.services.connections.database.salesforceConnection import (
SalesforceConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: SalesforceConnection) -> Salesforce:
@@ -48,16 +52,18 @@ def test_connection(
client: Salesforce,
service_connection: SalesforceConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
test_fn = {"CheckAccess": client.describe}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/salesforce/metadata.py b/ingestion/src/metadata/ingestion/source/database/salesforce/metadata.py
index 5e7e203fabdb..a40ac76fa694 100644
--- a/ingestion/src/metadata/ingestion/source/database/salesforce/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/database/salesforce/metadata.py
@@ -48,6 +48,9 @@
from metadata.generated.schema.type.basic import EntityName, FullyQualifiedEntityName
from metadata.ingestion.api.models import Either
from metadata.ingestion.api.steps import InvalidSourceException
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.connections import get_connection, get_test_connection_fn
@@ -203,15 +206,18 @@ def get_tables_name_and_type(self) -> Optional[Iterable[Tuple[str, str]]]:
)
)
- def get_table_description(self, table_name: str) -> Optional[str]:
+ def get_table_description(
+ self, table_name: str, object_label: Optional[str]
+ ) -> Optional[str]:
"""
Method to get the table description for salesforce with Tooling API
"""
+ table_description = None
try:
result = self.client.toolingexecute(
f"query/?q=SELECT+Description+FROM+EntityDefinition+WHERE+QualifiedApiName='{table_name}'"
)
- return result["records"][0]["Description"]
+ table_description = result["records"][0]["Description"]
except KeyError as err:
logger.warning(
f"Unable to get required key from Tooling API response for table [{table_name}]: {err}"
@@ -225,7 +231,7 @@ def get_table_description(self, table_name: str) -> Optional[str]:
logger.warning(
f"Unable to get description with Tooling API for table [{table_name}]: {exc}"
)
- return None
+ return table_description if table_description else object_label
def yield_table(
self, table_name_and_type: Tuple[str, TableType]
@@ -241,11 +247,13 @@ def yield_table(
f"sobjects/{table_name}/describe/",
params=None,
)
- columns = self.get_columns(salesforce_objects["fields"])
+ columns = self.get_columns(salesforce_objects.get("fields", []))
table_request = CreateTableRequest(
name=EntityName(table_name),
tableType=table_type,
- description=self.get_table_description(table_name),
+ description=self.get_table_description(
+ table_name, salesforce_objects.get("label")
+ ),
columns=columns,
tableConstraints=table_constraints,
databaseSchema=FullyQualifiedEntityName(
@@ -370,4 +378,5 @@ def close(self):
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)
- test_connection_fn(self.client, self.service_connection)
+ result = test_connection_fn(self.client, self.service_connection)
+ raise_test_connection_exception(result)
diff --git a/ingestion/src/metadata/ingestion/source/database/saperp/connection.py b/ingestion/src/metadata/ingestion/source/database/saperp/connection.py
index ee2b2bff902e..ba5bac40dbb1 100644
--- a/ingestion/src/metadata/ingestion/source/database/saperp/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/saperp/connection.py
@@ -20,9 +20,13 @@
from metadata.generated.schema.entity.services.connections.database.sapErpConnection import (
SapErpConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.saperp.client import SapErpClient
+from metadata.utils.constants import THREE_MIN
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
@@ -37,14 +41,16 @@ def test_connection(
client: SapErpClient,
service_connection: SapErpConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
test_fn = {
"GetTables": client.test_table_api,
"GetColumns": client.test_column_api,
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/saphana/connection.py b/ingestion/src/metadata/ingestion/source/database/saphana/connection.py
index 18567eedb5ad..ae8fe0ce333c 100644
--- a/ingestion/src/metadata/ingestion/source/database/saphana/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/saphana/connection.py
@@ -30,6 +30,9 @@
from metadata.generated.schema.entity.services.connections.database.sapHanaConnection import (
SapHanaConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -41,6 +44,7 @@
test_connection_steps,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_database_connection_url(connection: SapHanaConnection) -> str:
@@ -156,15 +160,17 @@ def test_connection(
engine: Engine,
service_connection: SapHanaConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=_build_test_fn_dict(engine, service_connection),
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/saphana/lineage.py b/ingestion/src/metadata/ingestion/source/database/saphana/lineage.py
index 0c2bdf824adc..c474c4bfea6f 100644
--- a/ingestion/src/metadata/ingestion/source/database/saphana/lineage.py
+++ b/ingestion/src/metadata/ingestion/source/database/saphana/lineage.py
@@ -29,6 +29,9 @@
)
from metadata.ingestion.api.models import Either
from metadata.ingestion.api.steps import InvalidSourceException, Source
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.connections import get_test_connection_fn
from metadata.ingestion.source.database.saphana.cdata_parser import (
@@ -156,4 +159,5 @@ def parse_cdata(
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)
- test_connection_fn(self.engine)
+ result = test_connection_fn(self.engine)
+ raise_test_connection_exception(result)
diff --git a/ingestion/src/metadata/ingestion/source/database/sas/connection.py b/ingestion/src/metadata/ingestion/source/database/sas/connection.py
index a11bd1204cd1..014c1286dd67 100644
--- a/ingestion/src/metadata/ingestion/source/database/sas/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/sas/connection.py
@@ -20,9 +20,13 @@
from metadata.generated.schema.entity.services.connections.database.sasConnection import (
SASConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.sas.client import SASClient
+from metadata.utils.constants import THREE_MIN
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
@@ -37,11 +41,13 @@ def test_connection(
client: SASClient,
service_connection: SASConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
test_fn = {"CheckAccess": client.check_connection}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/sas/metadata.py b/ingestion/src/metadata/ingestion/source/database/sas/metadata.py
index aadfc8c3c9fd..c6fbc8fb0c29 100644
--- a/ingestion/src/metadata/ingestion/source/database/sas/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/database/sas/metadata.py
@@ -71,6 +71,9 @@
from metadata.ingestion.api.common import Entity
from metadata.ingestion.api.models import Either, StackTraceError
from metadata.ingestion.api.steps import InvalidSourceException
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.connections import get_connection, get_test_connection_fn
@@ -907,4 +910,7 @@ def close(self) -> None:
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)
- test_connection_fn(self.metadata, self.connection_obj, self.service_connection)
+ result = test_connection_fn(
+ self.metadata, self.connection_obj, self.service_connection
+ )
+ raise_test_connection_exception(result)
diff --git a/ingestion/src/metadata/ingestion/source/database/singlestore/connection.py b/ingestion/src/metadata/ingestion/source/database/singlestore/connection.py
index fdd371ff9f86..c3aa666f4ad0 100644
--- a/ingestion/src/metadata/ingestion/source/database/singlestore/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/singlestore/connection.py
@@ -23,6 +23,9 @@
from metadata.generated.schema.entity.services.connections.database.singleStoreConnection import (
SingleStoreConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -32,6 +35,7 @@
test_connection_db_schema_sources,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: SingleStoreConnection) -> Engine:
@@ -50,14 +54,16 @@ def test_connection(
engine: Engine,
service_connection: SingleStoreConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
- test_connection_db_schema_sources(
+ return test_connection_db_schema_sources(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/snowflake/connection.py b/ingestion/src/metadata/ingestion/source/database/snowflake/connection.py
index 599a3f3815e3..5cbc9479fbf8 100644
--- a/ingestion/src/metadata/ingestion/source/database/snowflake/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/snowflake/connection.py
@@ -28,6 +28,9 @@
from metadata.generated.schema.entity.services.connections.database.snowflakeConnection import (
SnowflakeConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -47,6 +50,7 @@
SNOWFLAKE_TEST_GET_TABLES,
SNOWFLAKE_TEST_GET_VIEWS,
)
+from metadata.utils.constants import THREE_MIN
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
@@ -145,7 +149,8 @@ def test_connection(
engine: Engine,
service_connection: SnowflakeConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow.
@@ -190,11 +195,12 @@ def test_connection(
),
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/snowflake/profiler/system_metrics.py b/ingestion/src/metadata/ingestion/source/database/snowflake/profiler/system.py
similarity index 100%
rename from ingestion/src/metadata/ingestion/source/database/snowflake/profiler/system_metrics.py
rename to ingestion/src/metadata/ingestion/source/database/snowflake/profiler/system.py
diff --git a/ingestion/src/metadata/ingestion/source/database/sqlite/connection.py b/ingestion/src/metadata/ingestion/source/database/sqlite/connection.py
index 03b0e4b685b4..f7215cca7c6b 100644
--- a/ingestion/src/metadata/ingestion/source/database/sqlite/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/sqlite/connection.py
@@ -22,12 +22,16 @@
from metadata.generated.schema.entity.services.connections.database.sqliteConnection import (
SQLiteConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
)
from metadata.ingestion.connections.test_connections import test_connection_db_common
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection_url(connection: SQLiteConnection) -> str:
@@ -52,14 +56,16 @@ def test_connection(
engine: Engine,
service_connection: SQLiteConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
- test_connection_db_common(
+ return test_connection_db_common(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/teradata/connection.py b/ingestion/src/metadata/ingestion/source/database/teradata/connection.py
index 014af8e288b3..be8d1e856bd9 100644
--- a/ingestion/src/metadata/ingestion/source/database/teradata/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/teradata/connection.py
@@ -24,6 +24,9 @@
from metadata.generated.schema.entity.services.connections.database.teradataConnection import (
TeradataConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -32,6 +35,7 @@
from metadata.ingestion.connections.test_connections import test_connection_db_common
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.teradata.queries import TERADATA_GET_DATABASE
+from metadata.utils.constants import THREE_MIN
def get_connection_url(connection: TeradataConnection) -> str:
@@ -82,17 +86,19 @@ def test_connection(
engine: Engine,
service_connection: TeradataConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
queries = {"GetDatabases": TERADATA_GET_DATABASE}
- test_connection_db_common(
+ return test_connection_db_common(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
queries=queries,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/trino/connection.py b/ingestion/src/metadata/ingestion/source/database/trino/connection.py
index 1a7a3a5ebd59..6ee18950850a 100644
--- a/ingestion/src/metadata/ingestion/source/database/trino/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/trino/connection.py
@@ -29,6 +29,9 @@
from metadata.generated.schema.entity.services.connections.database.trinoConnection import (
TrinoConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -41,6 +44,7 @@
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.trino.queries import TRINO_GET_DATABASE
+from metadata.utils.constants import THREE_MIN
def get_connection_url(connection: TrinoConnection) -> str:
@@ -124,7 +128,8 @@ def test_connection(
engine: Engine,
service_connection: TrinoConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -133,10 +138,11 @@ def test_connection(
"GetDatabases": TRINO_GET_DATABASE,
}
- test_connection_db_schema_sources(
+ return test_connection_db_schema_sources(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
queries=queries,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/unitycatalog/connection.py b/ingestion/src/metadata/ingestion/source/database/unitycatalog/connection.py
index 2ba24d3ac328..b17be4dd0222 100644
--- a/ingestion/src/metadata/ingestion/source/database/unitycatalog/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/unitycatalog/connection.py
@@ -23,10 +23,14 @@
from metadata.generated.schema.entity.services.connections.database.unityCatalogConnection import (
UnityCatalogConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.unitycatalog.client import UnityCatalogClient
from metadata.ingestion.source.database.unitycatalog.models import DatabricksTable
+from metadata.utils.constants import THREE_MIN
from metadata.utils.db_utils import get_host_from_host_port
from metadata.utils.logger import ingestion_logger
@@ -54,7 +58,8 @@ def test_connection(
connection: WorkspaceClient,
service_connection: UnityCatalogConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -92,10 +97,10 @@ def get_tables(connection: WorkspaceClient, table_obj: DatabricksTable):
"GetQueries": client.test_query_api_access,
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
- timeout_seconds=service_connection.connectionTimeout,
+ timeout_seconds=service_connection.connectionTimeout or timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/unitycatalog/lineage.py b/ingestion/src/metadata/ingestion/source/database/unitycatalog/lineage.py
index cafde0b443f5..3189102eec3e 100644
--- a/ingestion/src/metadata/ingestion/source/database/unitycatalog/lineage.py
+++ b/ingestion/src/metadata/ingestion/source/database/unitycatalog/lineage.py
@@ -30,6 +30,9 @@
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.ingestion.api.models import Either
from metadata.ingestion.api.steps import InvalidSourceException, Source
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.lineage.sql_lineage import get_column_fqn
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.connections import get_test_connection_fn
@@ -159,4 +162,7 @@ def _iter(self, *_, **__) -> Iterable[Either[AddLineageRequest]]:
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)
- test_connection_fn(self.metadata, self.connection_obj, self.service_connection)
+ result = test_connection_fn(
+ self.metadata, self.connection_obj, self.service_connection
+ )
+ raise_test_connection_exception(result)
diff --git a/ingestion/src/metadata/ingestion/source/database/vertica/connection.py b/ingestion/src/metadata/ingestion/source/database/vertica/connection.py
index 93d515850ed3..945564d3ac7e 100644
--- a/ingestion/src/metadata/ingestion/source/database/vertica/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/vertica/connection.py
@@ -22,6 +22,9 @@
from metadata.generated.schema.entity.services.connections.database.verticaConnection import (
VerticaConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
@@ -33,6 +36,7 @@
VERTICA_LIST_DATABASES,
VERTICA_TEST_GET_QUERIES,
)
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: VerticaConnection) -> Engine:
@@ -51,7 +55,8 @@ def test_connection(
engine: Engine,
service_connection: VerticaConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -60,10 +65,11 @@ def test_connection(
"GetQueries": VERTICA_TEST_GET_QUERIES,
"GetDatabases": VERTICA_LIST_DATABASES,
}
- test_connection_db_common(
+ return test_connection_db_common(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
queries=queries,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/messaging/kafka/connection.py b/ingestion/src/metadata/ingestion/source/messaging/kafka/connection.py
index ef8c4b0689ad..66c05a2d368e 100644
--- a/ingestion/src/metadata/ingestion/source/messaging/kafka/connection.py
+++ b/ingestion/src/metadata/ingestion/source/messaging/kafka/connection.py
@@ -29,8 +29,12 @@
from metadata.generated.schema.entity.services.connections.messaging.redpandaConnection import (
RedpandaConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
@@ -120,7 +124,8 @@ def test_connection(
client: KafkaClient,
service_connection: Union[KafkaConnection, RedpandaConnection],
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -149,9 +154,10 @@ def schema_registry_test():
"CheckSchemaRegistry": schema_registry_test,
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/messaging/kinesis/connection.py b/ingestion/src/metadata/ingestion/source/messaging/kinesis/connection.py
index dc5b3cc69a82..573dd986a65b 100644
--- a/ingestion/src/metadata/ingestion/source/messaging/kinesis/connection.py
+++ b/ingestion/src/metadata/ingestion/source/messaging/kinesis/connection.py
@@ -22,8 +22,12 @@
from metadata.generated.schema.entity.services.connections.messaging.kinesisConnection import (
KinesisConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
@@ -41,7 +45,8 @@ def test_connection(
client,
service_connection: KinesisConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -49,9 +54,10 @@ def test_connection(
test_fn = {"GetTopics": client.list_streams}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/messaging/messaging_service.py b/ingestion/src/metadata/ingestion/source/messaging/messaging_service.py
index faa033784a0f..b6d5f0d8eb1f 100644
--- a/ingestion/src/metadata/ingestion/source/messaging/messaging_service.py
+++ b/ingestion/src/metadata/ingestion/source/messaging/messaging_service.py
@@ -34,6 +34,9 @@
from metadata.ingestion.api.models import Either
from metadata.ingestion.api.steps import Source
from metadata.ingestion.api.topology_runner import TopologyRunnerMixin
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.models.delete_entity import DeleteEntity
from metadata.ingestion.models.topology import (
NodeStage,
@@ -198,7 +201,10 @@ def prepare(self):
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)
- test_connection_fn(self.metadata, self.connection_obj, self.service_connection)
+ result = test_connection_fn(
+ self.metadata, self.connection_obj, self.service_connection
+ )
+ raise_test_connection_exception(result)
def mark_topics_as_deleted(self) -> Iterable[Either[DeleteEntity]]:
"""Method to mark the topics as deleted"""
diff --git a/ingestion/src/metadata/ingestion/source/messaging/redpanda/connection.py b/ingestion/src/metadata/ingestion/source/messaging/redpanda/connection.py
index def0edd58848..34c01ceb9dcc 100644
--- a/ingestion/src/metadata/ingestion/source/messaging/redpanda/connection.py
+++ b/ingestion/src/metadata/ingestion/source/messaging/redpanda/connection.py
@@ -20,6 +20,9 @@
from metadata.generated.schema.entity.services.connections.messaging.redpandaConnection import (
RedpandaConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.messaging.kafka.connection import KafkaClient
from metadata.ingestion.source.messaging.kafka.connection import (
@@ -28,6 +31,7 @@
from metadata.ingestion.source.messaging.kafka.connection import (
test_connection as test_kafka_connection,
)
+from metadata.utils.constants import THREE_MIN
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
@@ -45,15 +49,17 @@ def test_connection(
client: KafkaClient,
service_connection: RedpandaConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
- test_kafka_connection(
+ return test_kafka_connection(
metadata=metadata,
client=client,
service_connection=service_connection,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/metadata/alationsink/connection.py b/ingestion/src/metadata/ingestion/source/metadata/alationsink/connection.py
index d3f6bfa4b743..64a6b1998925 100644
--- a/ingestion/src/metadata/ingestion/source/metadata/alationsink/connection.py
+++ b/ingestion/src/metadata/ingestion/source/metadata/alationsink/connection.py
@@ -20,9 +20,13 @@
from metadata.generated.schema.entity.services.connections.metadata.alationSinkConnection import (
AlationSinkConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.metadata.alationsink.client import AlationSinkClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: AlationSinkConnection) -> AlationSinkClient:
@@ -37,7 +41,8 @@ def test_connection(
client: AlationSinkClient,
service_connection: AlationSinkConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -45,9 +50,10 @@ def test_connection(
test_fn = {"CheckAccess": client.list_native_datasources}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/metadata/alationsink/metadata.py b/ingestion/src/metadata/ingestion/source/metadata/alationsink/metadata.py
index fa83c71ddd52..c29b81f7173b 100644
--- a/ingestion/src/metadata/ingestion/source/metadata/alationsink/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/metadata/alationsink/metadata.py
@@ -33,6 +33,9 @@
)
from metadata.ingestion.api.models import Either, Entity
from metadata.ingestion.api.steps import InvalidSourceException, Source
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.ometa.utils import model_str
from metadata.ingestion.source.connections import get_connection, get_test_connection_fn
@@ -459,6 +462,7 @@ def close(self):
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)
- test_connection_fn(
+ result = test_connection_fn(
self.metadata, self.alation_sink_client, self.service_connection
)
+ raise_test_connection_exception(result)
diff --git a/ingestion/src/metadata/ingestion/source/metadata/amundsen/connection.py b/ingestion/src/metadata/ingestion/source/metadata/amundsen/connection.py
index 2862421b8efe..8eabc792c96e 100644
--- a/ingestion/src/metadata/ingestion/source/metadata/amundsen/connection.py
+++ b/ingestion/src/metadata/ingestion/source/metadata/amundsen/connection.py
@@ -21,6 +21,9 @@
from metadata.generated.schema.entity.services.connections.metadata.amundsenConnection import (
AmundsenConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import (
SourceConnectionException,
test_connection_steps,
@@ -30,6 +33,7 @@
from metadata.ingestion.source.metadata.amundsen.queries import (
NEO4J_AMUNDSEN_USER_QUERY,
)
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: AmundsenConnection) -> Neo4jHelper:
@@ -56,7 +60,8 @@ def test_connection(
client: Neo4jHelper,
service_connection: AmundsenConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -66,9 +71,10 @@ def test_connection(
"CheckAccess": partial(client.execute_query, query=NEO4J_AMUNDSEN_USER_QUERY)
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/metadata/amundsen/metadata.py b/ingestion/src/metadata/ingestion/source/metadata/amundsen/metadata.py
index a561aa7ab136..8c9d077e434d 100644
--- a/ingestion/src/metadata/ingestion/source/metadata/amundsen/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/metadata/amundsen/metadata.py
@@ -56,6 +56,9 @@
from metadata.ingestion.api.common import Entity
from metadata.ingestion.api.models import Either
from metadata.ingestion.api.steps import InvalidSourceException, Source
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.models.user import OMetaUserProfile
from metadata.ingestion.ometa.client_utils import get_chart_entities_from_id
from metadata.ingestion.ometa.ometa_api import OpenMetadata
@@ -460,4 +463,7 @@ def get_database_service(self, service_name: str) -> DatabaseService:
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)
- test_connection_fn(self.metadata, self.connection_obj, self.service_connection)
+ result = test_connection_fn(
+ self.metadata, self.connection_obj, self.service_connection
+ )
+ raise_test_connection_exception(result)
diff --git a/ingestion/src/metadata/ingestion/source/metadata/atlas/connection.py b/ingestion/src/metadata/ingestion/source/metadata/atlas/connection.py
index 66b0d259b78c..4a5f934a051b 100644
--- a/ingestion/src/metadata/ingestion/source/metadata/atlas/connection.py
+++ b/ingestion/src/metadata/ingestion/source/metadata/atlas/connection.py
@@ -20,9 +20,13 @@
from metadata.generated.schema.entity.services.connections.metadata.atlasConnection import (
AtlasConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.metadata.atlas.client import AtlasClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: AtlasConnection) -> AtlasClient:
@@ -37,7 +41,8 @@ def test_connection(
client: AtlasClient,
service_connection: AtlasConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -45,9 +50,10 @@ def test_connection(
test_fn = {"CheckAccess": client.list_entities}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/metadata/atlas/metadata.py b/ingestion/src/metadata/ingestion/source/metadata/atlas/metadata.py
index 1f967e35a921..2711c5ca1469 100644
--- a/ingestion/src/metadata/ingestion/source/metadata/atlas/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/metadata/atlas/metadata.py
@@ -41,6 +41,9 @@
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.ingestion.api.models import Either, Entity, StackTraceError
from metadata.ingestion.api.steps import InvalidSourceException, Source
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.connections import get_connection, get_test_connection_fn
@@ -477,4 +480,7 @@ def get_lineage_entity_ref(
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)
- test_connection_fn(self.metadata, self.connection_obj, self.service_connection)
+ result = test_connection_fn(
+ self.metadata, self.connection_obj, self.service_connection
+ )
+ raise_test_connection_exception(result)
diff --git a/ingestion/src/metadata/ingestion/source/mlmodel/mlflow/connection.py b/ingestion/src/metadata/ingestion/source/mlmodel/mlflow/connection.py
index b12c3a221767..3957227936ad 100644
--- a/ingestion/src/metadata/ingestion/source/mlmodel/mlflow/connection.py
+++ b/ingestion/src/metadata/ingestion/source/mlmodel/mlflow/connection.py
@@ -22,8 +22,12 @@
from metadata.generated.schema.entity.services.connections.mlmodel.mlflowConnection import (
MlflowConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: MlflowConnection) -> MlflowClient:
@@ -41,7 +45,8 @@ def test_connection(
client: MlflowClient,
service_connection: MlflowConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -49,9 +54,10 @@ def test_connection(
test_fn = {"GetModels": client.search_registered_models}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/mlmodel/mlmodel_service.py b/ingestion/src/metadata/ingestion/source/mlmodel/mlmodel_service.py
index f49baae8cf97..e9517f819895 100644
--- a/ingestion/src/metadata/ingestion/source/mlmodel/mlmodel_service.py
+++ b/ingestion/src/metadata/ingestion/source/mlmodel/mlmodel_service.py
@@ -38,6 +38,9 @@
from metadata.ingestion.api.models import Either
from metadata.ingestion.api.steps import Source
from metadata.ingestion.api.topology_runner import TopologyRunnerMixin
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.models.delete_entity import DeleteEntity
from metadata.ingestion.models.topology import (
NodeStage,
@@ -176,7 +179,10 @@ def close(self):
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)
- test_connection_fn(self.metadata, self.connection_obj, self.service_connection)
+ result = test_connection_fn(
+ self.metadata, self.connection_obj, self.service_connection
+ )
+ raise_test_connection_exception(result)
def mark_mlmodels_as_deleted(self) -> Iterable[Either[DeleteEntity]]:
"""Method to mark the mlmodels as deleted"""
diff --git a/ingestion/src/metadata/ingestion/source/mlmodel/sagemaker/connection.py b/ingestion/src/metadata/ingestion/source/mlmodel/sagemaker/connection.py
index f2dd565494c8..ddf5f308e5e8 100644
--- a/ingestion/src/metadata/ingestion/source/mlmodel/sagemaker/connection.py
+++ b/ingestion/src/metadata/ingestion/source/mlmodel/sagemaker/connection.py
@@ -21,8 +21,12 @@
from metadata.generated.schema.entity.services.connections.mlmodel.sageMakerConnection import (
SageMakerConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: SageMakerConnection):
@@ -37,7 +41,8 @@ def test_connection(
client,
service_connection: SageMakerConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -45,9 +50,10 @@ def test_connection(
test_fn = {"GetModels": client.list_models}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/airbyte/connection.py b/ingestion/src/metadata/ingestion/source/pipeline/airbyte/connection.py
index 099c84fa7557..03393d90fea1 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/airbyte/connection.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/airbyte/connection.py
@@ -20,9 +20,13 @@
from metadata.generated.schema.entity.services.connections.pipeline.airbyteConnection import (
AirbyteConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.pipeline.airbyte.client import AirbyteClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: AirbyteConnection) -> AirbyteClient:
@@ -37,7 +41,8 @@ def test_connection(
client: AirbyteClient,
service_connection: AirbyteConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -45,9 +50,10 @@ def test_connection(
test_fn = {"GetPipelines": client.list_workspaces}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/airflow/connection.py b/ingestion/src/metadata/ingestion/source/pipeline/airflow/connection.py
index 705030cb9de7..70dacd993d54 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/airflow/connection.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/airflow/connection.py
@@ -36,12 +36,16 @@
from metadata.generated.schema.entity.services.connections.pipeline.backendConnection import (
BackendConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import (
SourceConnectionException,
test_connection_engine_step,
test_connection_steps,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
# Only import when needed
@@ -103,7 +107,8 @@ def test_connection(
engine: Engine,
service_connection: AirflowConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -111,9 +116,10 @@ def test_connection(
test_fn = {"CheckAccess": partial(test_connection_engine_step, engine)}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/airflow/metadata.py b/ingestion/src/metadata/ingestion/source/pipeline/airflow/metadata.py
index 3d20858099c7..9e3bd5d7541b 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/airflow/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/airflow/metadata.py
@@ -15,7 +15,7 @@
from collections import Counter
from datetime import datetime
from enum import Enum
-from typing import Iterable, List, Optional, cast
+from typing import Dict, Iterable, List, Optional, cast
from airflow.models import BaseOperator, DagRun, TaskInstance
from airflow.models.dag import DagModel
@@ -142,6 +142,16 @@ def session(self) -> Session:
return self._session
+ @staticmethod
+ def _extract_serialized_task(task: Dict) -> Dict:
+ """
+ Given the serialization changes introduced in Airflow 2.10,
+ ensure compatibility with all versions.
+ """
+ if task.keys() == {"__var", "__type"}:
+ return task["__var"]
+ return task
+
def get_pipeline_status(self, dag_id: str) -> List[DagRun]:
"""
Return the DagRuns of given dag
@@ -328,7 +338,9 @@ def get_pipelines_list(self) -> Iterable[AirflowDagDetails]:
max_active_runs=data.get("max_active_runs", None),
description=data.get("_description", None),
start_date=data.get("start_date", None),
- tasks=data.get("tasks", []),
+ tasks=list(
+ map(self._extract_serialized_task, data.get("tasks", []))
+ ),
schedule_interval=get_schedule_interval(data),
owner=self.fetch_dag_owners(data),
)
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/dagster/connection.py b/ingestion/src/metadata/ingestion/source/pipeline/dagster/connection.py
index 021d14b935d9..b17564bd2fa1 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/dagster/connection.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/dagster/connection.py
@@ -20,10 +20,14 @@
from metadata.generated.schema.entity.services.connections.pipeline.dagsterConnection import (
DagsterConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.pipeline.dagster.client import DagsterClient
from metadata.ingestion.source.pipeline.dagster.queries import TEST_QUERY_GRAPHQL
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: DagsterConnection) -> DagsterClient:
@@ -38,7 +42,8 @@ def test_connection(
client: DagsterClient,
service_connection: DagsterConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -49,9 +54,10 @@ def custom_executor_for_pipeline():
test_fn = {"GetPipelines": custom_executor_for_pipeline}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/databrickspipeline/connection.py b/ingestion/src/metadata/ingestion/source/pipeline/databrickspipeline/connection.py
index 0ba178943ff9..adbb9707ddcf 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/databrickspipeline/connection.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/databrickspipeline/connection.py
@@ -21,9 +21,13 @@
from metadata.generated.schema.entity.services.connections.pipeline.databricksPipelineConnection import (
DatabricksPipelineConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.databricks.client import DatabricksClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: DatabricksPipelineConnection) -> DatabricksClient:
@@ -38,7 +42,8 @@ def test_connection(
client: DatabricksClient,
service_connection: DatabricksPipelineConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -46,9 +51,10 @@ def test_connection(
test_fn = {"GetPipelines": client.list_jobs_test_connection}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/dbtcloud/connection.py b/ingestion/src/metadata/ingestion/source/pipeline/dbtcloud/connection.py
index 04e3ff56d8b5..0c98d03bd84e 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/dbtcloud/connection.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/dbtcloud/connection.py
@@ -22,9 +22,13 @@
from metadata.generated.schema.entity.services.connections.pipeline.dbtCloudConnection import (
DBTCloudConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.pipeline.dbtcloud.client import DBTCloudClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: DBTCloudConnection) -> DBTCloudClient:
@@ -39,7 +43,8 @@ def test_connection(
client: DBTCloudClient,
service_connection: DBTCloudConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -52,9 +57,10 @@ def test_connection(
"GetRuns": partial(client.get_runs, job_id=job_id),
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/domopipeline/connection.py b/ingestion/src/metadata/ingestion/source/pipeline/domopipeline/connection.py
index e25a10cc767e..eed706734aeb 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/domopipeline/connection.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/domopipeline/connection.py
@@ -24,11 +24,15 @@
from metadata.generated.schema.entity.services.connections.pipeline.domoPipelineConnection import (
DomoPipelineConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import (
SourceConnectionException,
test_connection_steps,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: DomoPipelineConnection) -> Domo:
@@ -47,7 +51,8 @@ def test_connection(
connection: Domo,
service_connection: DomoPipelineConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -59,9 +64,10 @@ def custom_executor():
test_fn = {"GetPipelines": custom_executor}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/fivetran/connection.py b/ingestion/src/metadata/ingestion/source/pipeline/fivetran/connection.py
index 644e19377004..5096b62f0268 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/fivetran/connection.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/fivetran/connection.py
@@ -20,9 +20,13 @@
from metadata.generated.schema.entity.services.connections.pipeline.fivetranConnection import (
FivetranConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.pipeline.fivetran.client import FivetranClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: FivetranConnection) -> FivetranClient:
@@ -37,7 +41,8 @@ def test_connection(
client: FivetranClient,
service_connection: FivetranConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -45,9 +50,10 @@ def test_connection(
test_fn = {"GetPipelines": client.list_groups}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/flink/connection.py b/ingestion/src/metadata/ingestion/source/pipeline/flink/connection.py
index db5cc77b2b55..a633b3096571 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/flink/connection.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/flink/connection.py
@@ -20,9 +20,13 @@
from metadata.generated.schema.entity.services.connections.pipeline.flinkConnection import (
FlinkConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.pipeline.flink.client import FlinkClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: FlinkConnection) -> FlinkClient:
@@ -37,16 +41,18 @@ def test_connection(
client: FlinkClient,
service_connection: FlinkConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
test_fn = {"GetPipelines": client.get_jobs}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/gluepipeline/connection.py b/ingestion/src/metadata/ingestion/source/pipeline/gluepipeline/connection.py
index 725277bbc514..b953934147b8 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/gluepipeline/connection.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/gluepipeline/connection.py
@@ -22,8 +22,12 @@
from metadata.generated.schema.entity.services.connections.pipeline.gluePipelineConnection import (
GluePipelineConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: GluePipelineConnection):
@@ -38,7 +42,8 @@ def test_connection(
client,
service_connection: GluePipelineConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -46,9 +51,10 @@ def test_connection(
test_fn = {"GetPipelines": client.list_workflows}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/gluepipeline/metadata.py b/ingestion/src/metadata/ingestion/source/pipeline/gluepipeline/metadata.py
index 94dc58f7c563..66ef26f27211 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/gluepipeline/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/gluepipeline/metadata.py
@@ -25,6 +25,7 @@
Task,
TaskStatus,
)
+from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.entity.services.connections.pipeline.gluePipelineConnection import (
GluePipelineConnection,
)
@@ -40,14 +41,25 @@
SourceUrl,
Timestamp,
)
+from metadata.generated.schema.type.entityLineage import EntitiesEdge, LineageDetails
+from metadata.generated.schema.type.entityLineage import Source as LineageSource
+from metadata.generated.schema.type.entityReference import EntityReference
from metadata.ingestion.api.models import Either
from metadata.ingestion.api.steps import InvalidSourceException
from metadata.ingestion.models.pipeline_status import OMetaPipelineStatus
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.ingestion.source.pipeline.gluepipeline.models import (
+ AmazonRedshift,
+ CatalogSource,
+ JDBCSource,
+ JobNodeResponse,
+ S3Source,
+ S3Target,
+)
from metadata.ingestion.source.pipeline.pipeline_service import PipelineServiceSource
from metadata.utils import fqn
from metadata.utils.logger import ingestion_logger
-from metadata.utils.time_utils import convert_timestamp_to_milliseconds
+from metadata.utils.time_utils import datetime_to_timestamp
logger = ingestion_logger()
@@ -63,6 +75,28 @@
"incomplete": StatusType.Failed,
"pending": StatusType.Pending,
}
+TABLE_MODEL_MAP = {
+ "AmazonRedshiftSource": AmazonRedshift,
+ "AmazonRedshiftTarget": AmazonRedshift,
+ "AthenaConnectorSource": JDBCSource,
+ "JDBCConnectorSource": JDBCSource,
+ "JDBCConnectorTarget": JDBCSource,
+ "DirectJDBCSource": CatalogSource,
+ "RedshiftSource": CatalogSource,
+ "RedshiftTarget": CatalogSource,
+ "DirectJDBC": CatalogSource,
+}
+STORAGE_MODEL_MAP = {
+ "S3CsvSource": S3Source,
+ "S3JsonSource": S3Source,
+ "S3ParquetSource": S3Source,
+ "S3HudiSource": S3Source,
+ "S3DeltaSource": S3Source,
+ "S3DirectTarget": S3Target,
+ "S3DeltaDirectTarget": S3Target,
+ "S3GlueParquetTarget": S3Target,
+ "S3HudiDirectTarget": S3Target,
+}
class GluepipelineSource(PipelineServiceSource):
@@ -145,9 +179,88 @@ def get_downstream_tasks(self, task_unique_id, tasks):
downstream_tasks.append(self.task_id_mapping[edges["DestinationId"]])
return downstream_tasks
+ def get_lineage_details(self, job) -> Optional[dict]:
+ """
+ Get the Lineage Details of the pipeline
+ """
+ lineage_details = {"sources": [], "targets": []}
+ try:
+ job_details = JobNodeResponse.model_validate(
+ self.glue.get_job(JobName=job)
+ ).Job
+ if job_details and job_details.config_nodes:
+ nodes = job_details.config_nodes
+ for _, node in nodes.items():
+ for key, entity in node.items():
+ table_model, storage_model = None, None
+ if key in TABLE_MODEL_MAP:
+ table_model = TABLE_MODEL_MAP[key].model_validate(entity)
+ elif "Catalog" in key:
+ table_model = CatalogSource.model_validate(entity)
+ elif key in STORAGE_MODEL_MAP:
+ storage_model = STORAGE_MODEL_MAP[key].model_validate(
+ entity
+ )
+ if table_model:
+ for db_service_name in self.get_db_service_names():
+ table_entity = self.metadata.get_entity_reference(
+ entity=Table,
+ fqn=fqn.build(
+ metadata=self.metadata,
+ entity_type=Table,
+ table_name=table_model.table_name,
+ database_name=table_model.database_name,
+ schema_name=table_model.schema_name,
+ service_name=db_service_name,
+ ),
+ )
+ if table_entity:
+ if key.endswith("Source"):
+ lineage_details["sources"].append(table_entity)
+ else:
+ lineage_details["targets"].append(table_entity)
+ break
+ if storage_model:
+ for path in storage_model.Paths or [storage_model.Path]:
+ container = self.metadata.es_search_container_by_path(
+ full_path=path
+ )
+ if container and container[0]:
+ storage_entity = EntityReference(
+ id=container[0].id,
+ type="container",
+ name=container[0].name.root,
+ fullyQualifiedName=container[
+ 0
+ ].fullyQualifiedName.root,
+ )
+ if storage_entity:
+ if key.endswith("Source"):
+ lineage_details["sources"].append(
+ storage_entity
+ )
+ else:
+ lineage_details["targets"].append(
+ storage_entity
+ )
+ break
+
+ except Exception as exc:
+ logger.debug(traceback.format_exc())
+ logger.warning(
+ f"Failed to get lineage details for job : {job} due to : {exc}"
+ )
+ return lineage_details
+
def yield_pipeline_status(
self, pipeline_details: Any
) -> Iterable[Either[OMetaPipelineStatus]]:
+ pipeline_fqn = fqn.build(
+ metadata=self.metadata,
+ entity_type=Pipeline,
+ service_name=self.context.get().pipeline_service,
+ pipeline_name=self.context.get().pipeline,
+ )
for job in self.job_name_list:
try:
runs = self.glue.get_job_runs(JobName=job)
@@ -161,13 +274,13 @@ def yield_pipeline_status(
attempt["JobRunState"].lower(), StatusType.Pending
).value,
startTime=Timestamp(
- convert_timestamp_to_milliseconds(
- attempt["StartedOn"].timestamp()
+ datetime_to_timestamp(
+ attempt["StartedOn"], milliseconds=True
)
),
endTime=Timestamp(
- convert_timestamp_to_milliseconds(
- attempt["CompletedOn"].timestamp()
+ datetime_to_timestamp(
+ attempt["CompletedOn"], milliseconds=True
)
),
)
@@ -175,20 +288,14 @@ def yield_pipeline_status(
pipeline_status = PipelineStatus(
taskStatus=task_status,
timestamp=Timestamp(
- convert_timestamp_to_milliseconds(
- attempt["StartedOn"].timestamp()
+ datetime_to_timestamp(
+ attempt["StartedOn"], milliseconds=True
)
),
executionStatus=STATUS_MAP.get(
attempt["JobRunState"].lower(), StatusType.Pending
).value,
)
- pipeline_fqn = fqn.build(
- metadata=self.metadata,
- entity_type=Pipeline,
- service_name=self.context.get().pipeline_service,
- pipeline_name=self.context.get().pipeline,
- )
yield Either(
right=OMetaPipelineStatus(
pipeline_fqn=pipeline_fqn,
@@ -199,7 +306,7 @@ def yield_pipeline_status(
yield Either(
left=StackTraceError(
name=pipeline_fqn,
- error=f"Failed to yield pipeline status: {exc}",
+ error=f"Failed to yield pipeline status for job {job}: {exc}",
stackTrace=traceback.format_exc(),
)
)
@@ -210,3 +317,42 @@ def yield_pipeline_lineage_details(
"""
Get lineage between pipeline and data sources
"""
+ try:
+ pipeline_fqn = fqn.build(
+ metadata=self.metadata,
+ entity_type=Pipeline,
+ service_name=self.context.get().pipeline_service,
+ pipeline_name=self.context.get().pipeline,
+ )
+
+ pipeline_entity = self.metadata.get_by_name(
+ entity=Pipeline, fqn=pipeline_fqn
+ )
+
+ lineage_details = LineageDetails(
+ pipeline=EntityReference(id=pipeline_entity.id.root, type="pipeline"),
+ source=LineageSource.PipelineLineage,
+ )
+
+ for job in self.job_name_list:
+ lineage_enities = self.get_lineage_details(job)
+ for source in lineage_enities.get("sources"):
+ for target in lineage_enities.get("targets"):
+ yield Either(
+ right=AddLineageRequest(
+ edge=EntitiesEdge(
+ fromEntity=source,
+ toEntity=target,
+ lineageDetails=lineage_details,
+ )
+ )
+ )
+
+ except Exception as exc:
+ yield Either(
+ left=StackTraceError(
+ name=pipeline_details.get(NAME),
+ error=f"Wild error ingesting pipeline lineage {pipeline_details} - {exc}",
+ stackTrace=traceback.format_exc(),
+ )
+ )
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/gluepipeline/models.py b/ingestion/src/metadata/ingestion/source/pipeline/gluepipeline/models.py
new file mode 100644
index 000000000000..84090b1febe8
--- /dev/null
+++ b/ingestion/src/metadata/ingestion/source/pipeline/gluepipeline/models.py
@@ -0,0 +1,78 @@
+# Copyright 2021 Collate
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Glue Pipeline Source Model module
+"""
+
+from typing import List, Optional
+
+from pydantic import BaseModel, Field
+
+
+class EntityDetails(BaseModel):
+ Value: str
+
+
+class SourceDetails(BaseModel):
+ schema_details: EntityDetails = Field(alias="Schema")
+ table_details: EntityDetails = Field(alias="Table")
+
+
+class AmazonRedshift(BaseModel):
+ Name: str
+ Data: SourceDetails
+ database_name: Optional[str] = None
+
+ @property
+ def table_name(self):
+ if self.Data:
+ return self.Data.table_details.Value
+ return None
+
+ @property
+ def schema_name(self):
+ if self.Data:
+ return self.Data.schema_details.Value
+ return None
+
+
+class CatalogSource(BaseModel):
+ Name: str
+ database_name: str = Field(alias="Database")
+ schema_name: Optional[str] = None
+ table_name: str = Field(alias="Table")
+
+
+class JDBCSource(BaseModel):
+ Name: str
+ schema_name: Optional[str] = Field(default=None, alias="SchemaName")
+ database_name: Optional[str] = None
+ table_name: str = Field(alias="ConnectionTable")
+
+
+class S3Source(BaseModel):
+ Name: str
+ Paths: List[str]
+
+
+class S3Target(BaseModel):
+ Name: str
+ Path: str
+ Paths: Optional[str] = None
+
+
+class JobNodes(BaseModel):
+ config_nodes: Optional[dict] = Field(alias="CodeGenConfigurationNodes")
+
+
+class JobNodeResponse(BaseModel):
+ Job: Optional[JobNodes] = None
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/kafkaconnect/connection.py b/ingestion/src/metadata/ingestion/source/pipeline/kafkaconnect/connection.py
index 27fda4bf3de8..38ea63920f0b 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/kafkaconnect/connection.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/kafkaconnect/connection.py
@@ -21,9 +21,13 @@
from metadata.generated.schema.entity.services.connections.pipeline.kafkaConnectConnection import (
KafkaConnectConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.pipeline.kafkaconnect.client import KafkaConnectClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: KafkaConnectConnection) -> KafkaConnectClient:
@@ -38,7 +42,8 @@ def test_connection(
client: KafkaConnectClient,
service_connection: KafkaConnectConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -50,9 +55,10 @@ def test_connection(
"GetPlugins": client.get_connector_plugins,
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/nifi/connection.py b/ingestion/src/metadata/ingestion/source/pipeline/nifi/connection.py
index dae1f25a524a..097b8cc1dbb4 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/nifi/connection.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/nifi/connection.py
@@ -21,9 +21,13 @@
BasicAuthentication,
NifiConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.pipeline.nifi.client import NifiClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: NifiConnection) -> NifiClient:
@@ -53,7 +57,8 @@ def test_connection(
client: NifiClient,
service_connection: NifiConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -64,9 +69,10 @@ def custom_executor():
test_fn = {"GetPipelines": custom_executor}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/openlineage/connection.py b/ingestion/src/metadata/ingestion/source/pipeline/openlineage/connection.py
index 74d488d77430..ec7abe8b80a1 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/openlineage/connection.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/openlineage/connection.py
@@ -26,11 +26,15 @@
from metadata.generated.schema.entity.services.connections.pipeline.openLineageConnection import (
SecurityProtocol as KafkaSecProtocol,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import (
SourceConnectionException,
test_connection_steps,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: OpenLineageConnection) -> KafkaConsumer:
@@ -76,7 +80,8 @@ def test_connection(
client: KafkaConsumer,
service_connection: OpenLineageConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -89,9 +94,10 @@ def custom_executor():
test_fn = {"GetWatermarkOffsets": custom_executor}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/pipeline_service.py b/ingestion/src/metadata/ingestion/source/pipeline/pipeline_service.py
index 40449f08bc1d..d9e8129aac91 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/pipeline_service.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/pipeline_service.py
@@ -35,6 +35,9 @@
from metadata.ingestion.api.models import Either
from metadata.ingestion.api.steps import Source
from metadata.ingestion.api.topology_runner import TopologyRunnerMixin
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.models.delete_entity import DeleteEntity
from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification
from metadata.ingestion.models.ometa_lineage import OMetaLineageRequest
@@ -251,7 +254,10 @@ def get_pipeline(self) -> Any:
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)
- test_connection_fn(self.metadata, self.connection_obj, self.service_connection)
+ result = test_connection_fn(
+ self.metadata, self.connection_obj, self.service_connection
+ )
+ raise_test_connection_exception(result)
def register_record(self, pipeline_request: CreatePipelineRequest) -> None:
"""Mark the pipeline record as scanned and update the pipeline_source_state"""
@@ -285,6 +291,16 @@ def get_db_service_names(self) -> List[str]:
else []
)
+ def get_storage_service_names(self) -> List[str]:
+ """
+ Get the list of storage service names
+ """
+ return (
+ self.source_config.lineageInformation.storageServiceNames or []
+ if self.source_config.lineageInformation
+ else []
+ )
+
def prepare(self):
"""
Method to implement any required logic before starting the ingestion process
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/spline/connection.py b/ingestion/src/metadata/ingestion/source/pipeline/spline/connection.py
index aeb1b81e67da..acee709f941f 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/spline/connection.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/spline/connection.py
@@ -20,9 +20,13 @@
from metadata.generated.schema.entity.services.connections.pipeline.splineConnection import (
SplineConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.pipeline.spline.client import SplineClient
+from metadata.utils.constants import THREE_MIN
def get_connection(connection: SplineConnection) -> SplineClient:
@@ -38,7 +42,8 @@ def test_connection(
client: SplineClient,
service_connection: SplineConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -46,9 +51,10 @@ def test_connection(
test_fn = {"GetPipelines": client.get_pipelines_test_connection}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/search/elasticsearch/connection.py b/ingestion/src/metadata/ingestion/source/search/elasticsearch/connection.py
index 52df2e9e3d29..19c1b2e2c15a 100644
--- a/ingestion/src/metadata/ingestion/source/search/elasticsearch/connection.py
+++ b/ingestion/src/metadata/ingestion/source/search/elasticsearch/connection.py
@@ -40,10 +40,13 @@
from metadata.generated.schema.entity.services.connections.search.elasticSearchConnection import (
ElasticsearchConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.builders import init_empty_connection_arguments
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
-from metadata.utils.constants import UTF_8
+from metadata.utils.constants import THREE_MIN, UTF_8
from metadata.utils.helpers import init_staging_dir
CA_CERT_FILE_NAME = "root.pem"
@@ -185,7 +188,8 @@ def test_connection(
client: Elasticsearch,
service_connection: ElasticsearchConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -199,9 +203,10 @@ def test_get_search_indexes():
"GetSearchIndexes": test_get_search_indexes,
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/search/search_service.py b/ingestion/src/metadata/ingestion/source/search/search_service.py
index ce49c3245a1f..6bf2eaadd374 100644
--- a/ingestion/src/metadata/ingestion/source/search/search_service.py
+++ b/ingestion/src/metadata/ingestion/source/search/search_service.py
@@ -41,6 +41,9 @@
from metadata.ingestion.api.models import Either
from metadata.ingestion.api.steps import Source
from metadata.ingestion.api.topology_runner import TopologyRunnerMixin
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.models.delete_entity import DeleteEntity
from metadata.ingestion.models.search_index_data import OMetaIndexSampleData
from metadata.ingestion.models.topology import (
@@ -192,7 +195,10 @@ def prepare(self):
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)
- test_connection_fn(self.metadata, self.connection_obj, self.service_connection)
+ result = test_connection_fn(
+ self.metadata, self.connection_obj, self.service_connection
+ )
+ raise_test_connection_exception(result)
def mark_search_indexes_as_deleted(self) -> Iterable[Either[DeleteEntity]]:
"""Method to mark the search index as deleted"""
diff --git a/ingestion/src/metadata/ingestion/source/storage/gcs/connection.py b/ingestion/src/metadata/ingestion/source/storage/gcs/connection.py
index a11d156c8383..5d1f78ade555 100644
--- a/ingestion/src/metadata/ingestion/source/storage/gcs/connection.py
+++ b/ingestion/src/metadata/ingestion/source/storage/gcs/connection.py
@@ -22,6 +22,9 @@
from metadata.generated.schema.entity.services.connections.storage.gcsConnection import (
GcsConnection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.generated.schema.security.credentials.gcpValues import (
GcpCredentialsValues,
SingleProjectId,
@@ -32,6 +35,7 @@
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.storage.gcs.client import MultiProjectClient
+from metadata.utils.constants import THREE_MIN
from metadata.utils.credentials import set_google_credentials
from metadata.utils.logger import ingestion_logger
@@ -136,7 +140,8 @@ def test_connection(
client: GcsObjectStoreClient,
service_connection: GcsConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -151,9 +156,10 @@ def test_connection(
"GetMetrics": tester.get_metrics,
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/storage/s3/connection.py b/ingestion/src/metadata/ingestion/source/storage/s3/connection.py
index 2df0b9a8ff44..d2ba001b6ea9 100644
--- a/ingestion/src/metadata/ingestion/source/storage/s3/connection.py
+++ b/ingestion/src/metadata/ingestion/source/storage/s3/connection.py
@@ -27,8 +27,12 @@
from metadata.generated.schema.entity.services.connections.storage.s3Connection import (
S3Connection,
)
+from metadata.generated.schema.entity.services.connections.testConnectionResult import (
+ TestConnectionResult,
+)
from metadata.ingestion.connections.test_connections import test_connection_steps
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils.constants import THREE_MIN
@dataclass
@@ -53,7 +57,8 @@ def test_connection(
client: S3ObjectStoreClient,
service_connection: S3Connection,
automation_workflow: Optional[AutomationWorkflow] = None,
-) -> None:
+ timeout_seconds: Optional[int] = THREE_MIN,
+) -> TestConnectionResult:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
@@ -75,9 +80,10 @@ def test_buckets(connection: S3Connection, client: S3ObjectStoreClient):
),
}
- test_connection_steps(
+ return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
+ timeout_seconds=timeout_seconds,
)
diff --git a/ingestion/src/metadata/ingestion/source/storage/storage_service.py b/ingestion/src/metadata/ingestion/source/storage/storage_service.py
index 5afb3b19b1d7..9a57e5231253 100644
--- a/ingestion/src/metadata/ingestion/source/storage/storage_service.py
+++ b/ingestion/src/metadata/ingestion/source/storage/storage_service.py
@@ -40,6 +40,9 @@
from metadata.ingestion.api.models import Either
from metadata.ingestion.api.steps import Source
from metadata.ingestion.api.topology_runner import TopologyRunnerMixin
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.models.delete_entity import DeleteEntity
from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification
from metadata.ingestion.models.topology import (
@@ -236,7 +239,10 @@ def register_record(self, container_request: CreateContainerRequest) -> None:
def test_connection(self) -> None:
test_connection_fn = get_test_connection_fn(self.service_connection)
- test_connection_fn(self.metadata, self.connection_obj, self.service_connection)
+ result = test_connection_fn(
+ self.metadata, self.connection_obj, self.service_connection
+ )
+ raise_test_connection_exception(result)
def mark_containers_as_deleted(self) -> Iterable[Either[DeleteEntity]]:
"""Method to mark the containers as deleted"""
diff --git a/ingestion/src/metadata/parsers/json_schema_parser.py b/ingestion/src/metadata/parsers/json_schema_parser.py
index 412d3be013b1..716fd237984e 100644
--- a/ingestion/src/metadata/parsers/json_schema_parser.py
+++ b/ingestion/src/metadata/parsers/json_schema_parser.py
@@ -16,7 +16,7 @@
import json
import traceback
from enum import Enum
-from typing import List, Optional, Type
+from typing import List, Optional, Tuple, Type
from pydantic import BaseModel
@@ -66,6 +66,49 @@ def parse_json_schema(
return None
+def get_child_models(key, value, field_models, cls: Type[BaseModel] = FieldModel):
+ """
+ Method to parse the child objects in the json schema
+ """
+ try:
+ cls_obj = cls(
+ name=key,
+ displayName=value.get("title"),
+ dataType=JsonSchemaDataTypes(value.get("type", "unknown")).name,
+ description=value.get("description"),
+ )
+ children = None
+ if value.get("type") == JsonSchemaDataTypes.RECORD.value:
+ children = get_json_schema_fields(value.get("properties"), cls=cls)
+ if value.get("type") == JsonSchemaDataTypes.ARRAY.value:
+ datatype_display, children = get_json_schema_array_fields(
+ value.get("items"), cls=cls
+ )
+ cls_obj.dataTypeDisplay = f"ARRAY<{datatype_display}>"
+ cls_obj.children = children
+ field_models.append(cls_obj)
+ except Exception as exc: # pylint: disable=broad-except
+ logger.debug(traceback.format_exc())
+ logger.warning(f"Unable to parse the json schema into models: {exc}")
+
+
+def get_json_schema_array_fields(
+ array_items, cls: Type[BaseModel] = FieldModel
+) -> Optional[Tuple[str, List[FieldModel]]]:
+ """
+ Recursively convert the parsed array schema into required models
+ """
+ field_models = []
+ if array_items.get("type") == JsonSchemaDataTypes.RECORD.value:
+ for key, value in array_items.get("properties", {}).items():
+ get_child_models(key, value, field_models, cls)
+
+ return (
+ JsonSchemaDataTypes(array_items.get("type", "unknown")).name,
+ field_models or None,
+ )
+
+
def get_json_schema_fields(
properties, cls: Type[BaseModel] = FieldModel
) -> Optional[List[FieldModel]]:
@@ -74,20 +117,6 @@ def get_json_schema_fields(
"""
field_models = []
for key, value in properties.items():
- try:
- field_models.append(
- cls(
- name=key,
- displayName=value.get("title"),
- dataType=JsonSchemaDataTypes(value.get("type", "unknown")).name,
- description=value.get("description"),
- children=get_json_schema_fields(value.get("properties"), cls=cls)
- if value.get("type") == "object"
- else None,
- )
- )
- except Exception as exc: # pylint: disable=broad-except
- logger.debug(traceback.format_exc())
- logger.warning(f"Unable to parse the json schema into models: {exc}")
+ get_child_models(key, value, field_models, cls)
return field_models
diff --git a/ingestion/src/metadata/profiler/metrics/system/queries/redshift.py b/ingestion/src/metadata/profiler/metrics/system/queries/redshift.py
deleted file mode 100644
index 37712e4f1b76..000000000000
--- a/ingestion/src/metadata/profiler/metrics/system/queries/redshift.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# Copyright 2021 Collate
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Redshift System Metric Queries and queries operations
-"""
-
-from typing import List
-
-from sqlalchemy import text
-from sqlalchemy.orm import Session
-
-from metadata.utils.profiler_utils import QueryResult
-from metadata.utils.time_utils import datetime_to_timestamp
-
-STL_QUERY = """
- with data as (
- select
- {alias}.*
- from
- pg_catalog.stl_insert si
- {join_type} join pg_catalog.stl_delete sd on si.query = sd.query
- where
- {condition}
- )
- SELECT
- SUM(data."rows") AS "rows",
- sti."database",
- sti."schema",
- sti."table",
- DATE_TRUNC('second', data.starttime) AS starttime
- FROM
- data
- INNER JOIN pg_catalog.svv_table_info sti ON data.tbl = sti.table_id
- where
- sti."database" = '{database}' AND
- sti."schema" = '{schema}' AND
- "rows" != 0 AND
- DATE(data.starttime) >= CURRENT_DATE - 1
- GROUP BY 2,3,4,5
- ORDER BY 5 DESC
-"""
-
-
-def get_query_results(
- session: Session,
- query,
- operation,
-) -> List[QueryResult]:
- """get query results either from cache or from the database
-
- Args:
- cache (Optional[List[QueryResult]]): cache results
- session (Session): session
- query (_type_): query
- operation (_type_): operation
-
- Returns:
- List[QueryResult]:
- """
- cursor = session.execute(text(query))
- results = [
- QueryResult(
- database_name=row.database,
- schema_name=row.schema,
- table_name=row.table,
- query_text=None,
- query_type=operation,
- start_time=row.starttime,
- rows=row.rows,
- )
- for row in cursor
- ]
-
- return results
-
-
-def get_metric_result(ddls: List[QueryResult], table_name: str) -> List:
- """Given query results, retur the metric result
-
- Args:
- ddls (List[QueryResult]): list of query results
- table_name (str): table name
-
- Returns:
- List:
- """
- return [
- {
- "timestamp": datetime_to_timestamp(ddl.start_time, milliseconds=True),
- "operation": ddl.query_type,
- "rowsAffected": ddl.rows,
- }
- for ddl in ddls
- if ddl.table_name == table_name
- ]
diff --git a/ingestion/src/metadata/profiler/metrics/system/system.py b/ingestion/src/metadata/profiler/metrics/system/system.py
index a981f2803de7..47524c430a2d 100644
--- a/ingestion/src/metadata/profiler/metrics/system/system.py
+++ b/ingestion/src/metadata/profiler/metrics/system/system.py
@@ -26,7 +26,7 @@
from metadata.generated.schema.entity.services.connections.database.bigQueryConnection import (
BigQueryConnection,
)
-from metadata.ingestion.source.database.snowflake.profiler.system_metrics import (
+from metadata.ingestion.source.database.snowflake.profiler.system import (
build_snowflake_query_results,
)
from metadata.profiler.metrics.core import SystemMetric
@@ -39,11 +39,6 @@
JOBS,
BigQueryQueryResult,
)
-from metadata.profiler.metrics.system.queries.redshift import (
- STL_QUERY,
- get_metric_result,
- get_query_results,
-)
from metadata.profiler.orm.registry import Dialects
from metadata.utils.dispatch import valuedispatch
from metadata.utils.helpers import deep_size_of_dict
@@ -87,7 +82,6 @@ def get_system_metrics_for_dialect(
} else returns None
"""
logger.debug(f"System metrics not support for {dialect}. Skipping processing.")
- return None
@get_system_metrics_for_dialect.register(Dialects.BigQuery)
@@ -190,105 +184,6 @@ def _(
return TypeAdapter(List[SystemProfile]).validate_python(metric_results)
-@get_system_metrics_for_dialect.register(Dialects.Redshift)
-def _(
- dialect: str,
- session: Session,
- table: DeclarativeMeta,
- *args,
- **kwargs,
-) -> List[SystemProfile]:
- """List all the DML operations for reshifts tables
-
- Args:
- dialect (str): redshift
- session (Session): session object
- table (DeclarativeMeta): orm table
-
- Returns:
- List[Dict]:
- """
- logger.debug(f"Fetching system metrics for {dialect}")
- database = session.get_bind().url.database
- schema = table.__table_args__["schema"] # type: ignore
-
- metric_results: List[Dict] = []
-
- # get inserts ddl queries
- inserts = get_value_from_cache(
- SYSTEM_QUERY_RESULT_CACHE, f"{Dialects.Redshift}.{database}.{schema}.inserts"
- )
- if not inserts:
- insert_query = STL_QUERY.format(
- alias="si",
- join_type="LEFT",
- condition="sd.query is null",
- database=database,
- schema=schema,
- )
- inserts = get_query_results(
- session,
- insert_query,
- DatabaseDMLOperations.INSERT.value,
- )
- set_cache(
- SYSTEM_QUERY_RESULT_CACHE,
- f"{Dialects.Redshift}.{database}.{schema}.inserts",
- inserts,
- )
- metric_results.extend(get_metric_result(inserts, table.__tablename__)) # type: ignore
-
- # get deletes ddl queries
- deletes = get_value_from_cache(
- SYSTEM_QUERY_RESULT_CACHE, f"{Dialects.Redshift}.{database}.{schema}.deletes"
- )
- if not deletes:
- delete_query = STL_QUERY.format(
- alias="sd",
- join_type="RIGHT",
- condition="si.query is null",
- database=database,
- schema=schema,
- )
- deletes = get_query_results(
- session,
- delete_query,
- DatabaseDMLOperations.DELETE.value,
- )
- set_cache(
- SYSTEM_QUERY_RESULT_CACHE,
- f"{Dialects.Redshift}.{database}.{schema}.deletes",
- deletes,
- )
- metric_results.extend(get_metric_result(deletes, table.__tablename__)) # type: ignore
-
- # get updates ddl queries
- updates = get_value_from_cache(
- SYSTEM_QUERY_RESULT_CACHE, f"{Dialects.Redshift}.{database}.{schema}.updates"
- )
- if not updates:
- update_query = STL_QUERY.format(
- alias="si",
- join_type="INNER",
- condition="sd.query is not null",
- database=database,
- schema=schema,
- )
- updates = get_query_results(
- session,
- update_query,
- DatabaseDMLOperations.UPDATE.value,
- )
- set_cache(
- SYSTEM_QUERY_RESULT_CACHE,
- f"{Dialects.Redshift}.{database}.{schema}.updates",
- updates,
- )
- metric_results.extend(get_metric_result(updates, table.__tablename__)) # type: ignore
-
- return TypeAdapter(List[SystemProfile]).validate_python(metric_results)
-
-
@get_system_metrics_for_dialect.register(Dialects.Snowflake)
def _(
dialect: str,
diff --git a/ingestion/src/metadata/profiler/orm/functions/table_metric_computer.py b/ingestion/src/metadata/profiler/orm/functions/table_metric_computer.py
index 5566600c61d6..9284b7264243 100644
--- a/ingestion/src/metadata/profiler/orm/functions/table_metric_computer.py
+++ b/ingestion/src/metadata/profiler/orm/functions/table_metric_computer.py
@@ -412,7 +412,7 @@ def compute(self):
)
res = self.runner._session.execute(query).first()
if not res:
- return None
+ return super().compute()
if res.rowCount is None or (
res.rowCount == 0 and self._entity.tableType == TableType.View
):
diff --git a/ingestion/src/metadata/utils/class_helper.py b/ingestion/src/metadata/utils/class_helper.py
index 281afb854db6..ce2da2454037 100644
--- a/ingestion/src/metadata/utils/class_helper.py
+++ b/ingestion/src/metadata/utils/class_helper.py
@@ -68,7 +68,7 @@
from metadata.generated.schema.metadataIngestion.workflow import SourceConfig
SERVICE_TYPE_REF = {
- ServiceType.API.value: "apiService",
+ ServiceType.Api.value: "apiService",
ServiceType.Database.value: "databaseService",
ServiceType.Dashboard.value: "dashboardService",
ServiceType.Pipeline.value: "pipelineService",
diff --git a/ingestion/src/metadata/utils/collections.py b/ingestion/src/metadata/utils/collections.py
new file mode 100644
index 000000000000..16895ed0366d
--- /dev/null
+++ b/ingestion/src/metadata/utils/collections.py
@@ -0,0 +1,27 @@
+"""
+Uility classes for collections
+"""
+
+
+class CaseInsensitiveString(str):
+ """
+ A case-insensitive string. Useful for case-insensitive comparisons like SQL.
+ """
+
+ def __eq__(self, other):
+ return self.casefold() == other.casefold()
+
+ def __hash__(self):
+ return hash(self.casefold())
+
+
+class CaseInsensitiveList(list):
+ """A case-insensitive list that treats all its string elements as case-insensitive.
+ Non-string elements are treated with default behavior."""
+
+ def __contains__(self, item):
+ return (
+ any(CaseInsensitiveString(x) == item for x in self)
+ if isinstance(item, str)
+ else any(x == item for x in self)
+ )
diff --git a/ingestion/src/metadata/utils/constants.py b/ingestion/src/metadata/utils/constants.py
index cbb1b7b5f3e9..2f1c921eb4d1 100644
--- a/ingestion/src/metadata/utils/constants.py
+++ b/ingestion/src/metadata/utils/constants.py
@@ -40,6 +40,7 @@
DOT = "_DOT_"
TEN_MIN = 10 * 60
+THREE_MIN = 3 * 60
UTF_8 = "utf-8"
CHUNKSIZE = 200000
DEFAULT_DATABASE = "default"
diff --git a/ingestion/src/metadata/utils/test_suite.py b/ingestion/src/metadata/utils/test_suite.py
deleted file mode 100644
index fd4e90e1b88b..000000000000
--- a/ingestion/src/metadata/utils/test_suite.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright 2021 Collate
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Helper module for test suite functions
-"""
-
-from __future__ import annotations
-
-from datetime import datetime
-from typing import Callable, List, Optional
-
-from metadata.generated.schema.tests.basic import (
- TestCaseResult,
- TestCaseStatus,
- TestResultValue,
-)
-from metadata.generated.schema.tests.testCase import TestCaseParameterValue
-
-
-def get_test_case_param_value(
- test_case_param_vals: list[TestCaseParameterValue],
- name: str,
- type_,
- default=None,
- pre_processor: Optional[Callable] = None,
-):
- """Give a column and a type return the value with the appropriate type casting for the
- test case definition.
-
- Args:
- test_case: the test case
- type_ (Union[float, int, str]): type for the value
- name (str): column name
- default (_type_, optional): Default value to return if column is not found
- pre_processor: pre processor function/type to use against the value before casting to type_
- """
- value = next(
- (param.value for param in test_case_param_vals if param.name == name), None
- )
-
- if not value:
- return default
-
- if not pre_processor:
- return type_(value)
-
- pre_processed_value = pre_processor(value)
- return type_(pre_processed_value)
-
-
-def build_test_case_result(
- execution_datetime: datetime,
- status: TestCaseStatus,
- result: str,
- test_result_value: List[TestResultValue],
- sample_data: Optional[str] = None,
-) -> TestCaseResult:
- """create a test case result object
-
- Args:
- execution_datetime (datetime): execution datetime of the test
- status (TestCaseStatus): failed, succeed, aborted
- result (str): message to display
- testResultValue (List[TestResultValue]): values for the test result
-
- Returns:
- TestCaseResult:
- """
- return TestCaseResult(
- timestamp=execution_datetime,
- testCaseStatus=status,
- result=result,
- testResultValue=test_result_value,
- sampleData=sample_data,
- )
diff --git a/ingestion/src/metadata/utils/time_utils.py b/ingestion/src/metadata/utils/time_utils.py
index b2c4196dab38..0bd54f9a3ca9 100644
--- a/ingestion/src/metadata/utils/time_utils.py
+++ b/ingestion/src/metadata/utils/time_utils.py
@@ -17,13 +17,19 @@
from math import floor
from typing import Union
+from metadata.generated.schema.type.basic import Timestamp
from metadata.utils.deprecation import deprecated
from metadata.utils.helpers import datetime_to_ts
+from metadata.utils.logger import utils_logger
+
+logger = utils_logger()
def datetime_to_timestamp(datetime_value: datetime, milliseconds=False) -> int:
- """Convert a datetime object to timestamp integer. Datetime can be timezone aware or naive. Result
- will always be in UTC.
+ """Convert a datetime object to timestamp integer. If datetime is timezone aware, it will be converted to UTC.
+ If it is naive it will be assumed to be in UTC.
+
+ # TODO: not sure the milliseconds flag is useful. Maybe this should return a 'Timestamp' object instead.
Args:
datetime_value (_type_): datetime object
@@ -37,12 +43,29 @@ def datetime_to_timestamp(datetime_value: datetime, milliseconds=False) -> int:
f"Object of type {type(datetime_value).__name__} has not method `timestamp()`"
)
+ datetime_value = (
+ datetime_value.replace(tzinfo=timezone.utc)
+ if datetime_value.tzinfo is None
+ else datetime_value.astimezone(timezone.utc)
+ )
tmsap = datetime_value.timestamp()
if milliseconds:
return int(tmsap * 1000)
return int(tmsap)
+def timestamp_to_datetime(ts: Timestamp) -> datetime:
+ """Convert a timestamp to datetime object in UTC.
+
+ Args:
+ ts (Timestamp): timestamp
+
+ Returns:
+ datetime: datetime object
+ """
+ return datetime.fromtimestamp(ts.root / 1000, tz=timezone.utc)
+
+
def get_beginning_of_day_timestamp_mill(
days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=0, weeks=0
) -> int:
diff --git a/ingestion/src/metadata/workflow/application.py b/ingestion/src/metadata/workflow/application.py
index 15bf98e132a0..b22d4b1bd789 100644
--- a/ingestion/src/metadata/workflow/application.py
+++ b/ingestion/src/metadata/workflow/application.py
@@ -14,10 +14,6 @@
from abc import ABC, abstractmethod
from typing import List, Optional
-from metadata.config.common import WorkflowExecutionError
-from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
- OpenMetadataConnection,
-)
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
StackTraceError,
)
@@ -25,13 +21,11 @@
from metadata.generated.schema.metadataIngestion.application import (
OpenMetadataApplicationConfig,
)
-from metadata.generated.schema.metadataIngestion.workflow import LogLevels
-from metadata.ingestion.api.step import Step, Summary
+from metadata.ingestion.api.step import Step
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.utils.importer import import_from_module
from metadata.utils.logger import ingestion_logger
from metadata.workflow.base import BaseWorkflow
-from metadata.workflow.workflow_status_mixin import SUCCESS_THRESHOLD_VALUE
logger = ingestion_logger()
@@ -84,29 +78,24 @@ class ApplicationWorkflow(BaseWorkflow, ABC):
config: OpenMetadataApplicationConfig
runner: Optional[AppRunner]
- def __init__(self, config_dict: dict):
+ def __init__(self, config: OpenMetadataApplicationConfig):
self.runner = None # Will be passed in post-init
- # TODO: Create a parse_gracefully method
- self.config = OpenMetadataApplicationConfig.model_validate(config_dict)
+ self.config = config
# Applications are associated to the OpenMetadata Service
self.service_type: ServiceType = ServiceType.Metadata
- metadata_config: OpenMetadataConnection = (
- self.config.workflowConfig.openMetadataServerConfig
- )
- log_level: LogLevels = self.config.workflowConfig.loggerLevel
-
super().__init__(
config=self.config,
- log_level=log_level,
- metadata_config=metadata_config,
+ workflow_config=config.workflowConfig,
service_type=self.service_type,
)
@classmethod
def create(cls, config_dict: dict):
- return cls(config_dict)
+ # TODO: Create a parse_gracefully method
+ config = OpenMetadataApplicationConfig.model_validate(config_dict)
+ return cls(config)
def post_init(self) -> None:
"""
@@ -134,26 +123,8 @@ def execute_internal(self) -> None:
"""Workflow-specific logic to execute safely"""
self.runner.run()
- def calculate_success(self) -> float:
- return self.runner.get_status().calculate_success()
-
def get_failures(self) -> List[StackTraceError]:
return self.workflow_steps()[0].get_status().failures
def workflow_steps(self) -> List[Step]:
return [self.runner]
-
- def raise_from_status_internal(self, raise_warnings=False):
- """Check failed status in the runner"""
- if (
- self.runner.get_status().failures
- and self.calculate_success() < SUCCESS_THRESHOLD_VALUE
- ):
- raise WorkflowExecutionError(
- f"{self.runner.name} reported errors: {Summary.from_step(self.runner)}"
- )
-
- if raise_warnings and self.runner.get_status().warnings:
- raise WorkflowExecutionError(
- f"{self.runner.name} reported warning: {Summary.from_step(self.runner)}"
- )
diff --git a/ingestion/src/metadata/workflow/base.py b/ingestion/src/metadata/workflow/base.py
index 87bca9565a22..e777d6731ec1 100644
--- a/ingestion/src/metadata/workflow/base.py
+++ b/ingestion/src/metadata/workflow/base.py
@@ -16,8 +16,10 @@
import uuid
from abc import ABC, abstractmethod
from datetime import datetime
+from statistics import mean
from typing import Any, Dict, List, Optional, TypeVar, Union
+from metadata.config.common import WorkflowExecutionError
from metadata.generated.schema.api.services.ingestionPipelines.createIngestionPipeline import (
CreateIngestionPipelineRequest,
)
@@ -32,10 +34,13 @@
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
StackTraceError,
)
-from metadata.generated.schema.metadataIngestion.workflow import LogLevels
+from metadata.generated.schema.metadataIngestion.workflow import (
+ LogLevels,
+ WorkflowConfig,
+)
from metadata.generated.schema.tests.testSuite import ServiceType
from metadata.generated.schema.type.entityReference import EntityReference
-from metadata.ingestion.api.step import Step
+from metadata.ingestion.api.step import Step, Summary
from metadata.ingestion.ometa.client_utils import create_ometa_client
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.timer.repeated_timer import RepeatedTimer
@@ -49,10 +54,7 @@
from metadata.utils.helpers import datetime_to_ts
from metadata.utils.logger import ingestion_logger, set_loggers_level
from metadata.workflow.workflow_output_handler import WorkflowOutputHandler
-from metadata.workflow.workflow_status_mixin import (
- SUCCESS_THRESHOLD_VALUE,
- WorkflowStatusMixin,
-)
+from metadata.workflow.workflow_status_mixin import WorkflowStatusMixin
logger = ingestion_logger()
@@ -82,8 +84,7 @@ class BaseWorkflow(ABC, WorkflowStatusMixin):
def __init__(
self,
config: Union[Any, Dict],
- log_level: LogLevels,
- metadata_config: OpenMetadataConnection,
+ workflow_config: WorkflowConfig,
service_type: ServiceType,
output_handler: WorkflowOutputHandler = WorkflowOutputHandler(),
):
@@ -92,19 +93,22 @@ def __init__(
"""
self.output_handler = output_handler
self.config = config
+ self.workflow_config = workflow_config
self.service_type = service_type
self._timer: Optional[RepeatedTimer] = None
self._ingestion_pipeline: Optional[IngestionPipeline] = None
self._start_ts = datetime_to_ts(datetime.now())
+
self._execution_time_tracker = ExecutionTimeTracker(
- log_level == LogLevels.DEBUG
+ self.workflow_config.loggerLevel == LogLevels.DEBUG
)
- set_loggers_level(log_level.value)
+ set_loggers_level(self.workflow_config.loggerLevel.value)
# We create the ometa client at the workflow level and pass it to the steps
- self.metadata_config = metadata_config
- self.metadata = create_ometa_client(metadata_config)
+ self.metadata = create_ometa_client(
+ self.workflow_config.openMetadataServerConfig
+ )
self.set_ingestion_pipeline_status(state=PipelineState.running)
self.post_init()
@@ -157,9 +161,22 @@ def post_init(self) -> None:
def execute_internal(self) -> None:
"""Workflow-specific logic to execute safely"""
- @abstractmethod
- def calculate_success(self) -> float:
- """Get the success % of the internal execution"""
+ def calculate_success(self) -> Optional[float]:
+ """
+ Get the success % of the internal execution.
+ Since we'll use this to get a single success % from multiple steps, we'll take
+ the minimum success % from all the steps. This way, we can have a proper
+ workflow status.
+ E.g., if we have no errors on the source but a bunch of them on the sink,
+ we still want the flow to be marked as a failure or partial success.
+ """
+ if not self.workflow_steps():
+ logger.warning("No steps to calculate success")
+ return None
+
+ return mean(
+ [step.get_status().calculate_success() for step in self.workflow_steps()]
+ )
@abstractmethod
def get_failures(self) -> List[StackTraceError]:
@@ -169,9 +186,22 @@ def get_failures(self) -> List[StackTraceError]:
def workflow_steps(self) -> List[Step]:
"""Steps to report status from"""
- @abstractmethod
def raise_from_status_internal(self, raise_warnings=False) -> None:
"""Based on the internal workflow status, raise a WorkflowExecutionError"""
+ for step in self.workflow_steps():
+ if (
+ step.get_status().failures
+ and step.get_status().calculate_success()
+ < self.workflow_config.successThreshold
+ ):
+ raise WorkflowExecutionError(
+ f"{step.name} reported errors: {Summary.from_step(step)}"
+ )
+
+ if raise_warnings and step.status.warnings:
+ raise WorkflowExecutionError(
+ f"{step.name} reported warning: {Summary.from_step(step)}"
+ )
def execute(self) -> None:
"""
@@ -186,7 +216,7 @@ def execute(self) -> None:
try:
self.execute_internal()
- if SUCCESS_THRESHOLD_VALUE <= self.calculate_success() < 100:
+ if self.workflow_config.successThreshold <= self.calculate_success() < 100:
pipeline_state = PipelineState.partialSuccess
# Any unhandled exception breaking the workflow should update the status
diff --git a/ingestion/src/metadata/workflow/ingestion.py b/ingestion/src/metadata/workflow/ingestion.py
index cfa78c1259ed..1e28f5013178 100644
--- a/ingestion/src/metadata/workflow/ingestion.py
+++ b/ingestion/src/metadata/workflow/ingestion.py
@@ -24,9 +24,6 @@
from typing import List, Tuple, Type, cast
from metadata.config.common import WorkflowExecutionError
-from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
- OpenMetadataConnection,
-)
from metadata.generated.schema.entity.services.connections.serviceConnection import (
ServiceConnection,
)
@@ -38,7 +35,7 @@
OpenMetadataWorkflowConfig,
)
from metadata.ingestion.api.parser import parse_workflow_config_gracefully
-from metadata.ingestion.api.step import Step, Summary
+from metadata.ingestion.api.step import Step
from metadata.ingestion.api.steps import BulkSink, Processor, Sink, Source, Stage
from metadata.ingestion.models.custom_types import ServiceWithConnectionType
from metadata.profiler.api.models import ProfilerProcessorConfig
@@ -55,14 +52,15 @@
)
from metadata.utils.logger import ingestion_logger
from metadata.workflow.base import BaseWorkflow, InvalidWorkflowJSONException
-from metadata.workflow.workflow_status_mixin import SUCCESS_THRESHOLD_VALUE
logger = ingestion_logger()
class IngestionWorkflow(BaseWorkflow, ABC):
"""
- Base Ingestion Workflow implementation
+ Base Ingestion Workflow implementation. This is used for all
+ workflows minus the application one, which directly inherits the
+ BaseWorkflow.
"""
config: OpenMetadataWorkflowConfig
@@ -79,14 +77,9 @@ def __init__(self, config: OpenMetadataWorkflowConfig):
self.config.source.type
)
- metadata_config: OpenMetadataConnection = (
- self.config.workflowConfig.openMetadataServerConfig
- )
-
super().__init__(
config=config,
- log_level=config.workflowConfig.loggerLevel,
- metadata_config=metadata_config,
+ workflow_config=config.workflowConfig,
service_type=self.service_type,
)
@@ -137,37 +130,12 @@ def execute_internal(self):
if bulk_sink:
bulk_sink.run()
- def calculate_success(self) -> float:
- return self.source.get_status().calculate_success()
-
def get_failures(self) -> List[StackTraceError]:
return self.source.get_status().failures
def workflow_steps(self) -> List[Step]:
return [self.source] + list(self.steps)
- def raise_from_status_internal(self, raise_warnings=False):
- """
- Check the status of all steps
- """
- if (
- self.source.get_status().failures
- and self.calculate_success() < SUCCESS_THRESHOLD_VALUE
- ):
- raise WorkflowExecutionError(
- f"{self.source.name} reported errors: {Summary.from_step(self.source)}"
- )
-
- for step in self.steps:
- if step.status.failures:
- raise WorkflowExecutionError(
- f"{step.name} reported errors: {Summary.from_step(step)}"
- )
- if raise_warnings and step.status.warnings:
- raise WorkflowExecutionError(
- f"{step.name} reported warnings: {Summary.from_step(step)}"
- )
-
def _retrieve_service_connection_if_needed(self, service_type: ServiceType) -> None:
"""
We override the current `serviceConnection` source config object if source workflow service already exists
diff --git a/ingestion/src/metadata/workflow/profiler.py b/ingestion/src/metadata/workflow/profiler.py
index 4354b8c96bc7..5f8d692d2f20 100644
--- a/ingestion/src/metadata/workflow/profiler.py
+++ b/ingestion/src/metadata/workflow/profiler.py
@@ -20,6 +20,9 @@
OpenMetadataWorkflowConfig,
)
from metadata.ingestion.api.steps import Processor, Sink
+from metadata.ingestion.connections.test_connections import (
+ raise_test_connection_exception,
+)
from metadata.ingestion.source.connections import get_test_connection_fn
from metadata.pii.processor import PIIProcessor
from metadata.profiler.processor.processor import ProfilerProcessor
@@ -75,12 +78,13 @@ def set_steps(self):
else:
self.steps = (profiler_processor, sink)
- def test_connection(self):
+ def test_connection(self) -> None:
service_config = self.config.source.serviceConnection.root.config
conn = get_ssl_connection(service_config)
test_connection_fn = get_test_connection_fn(service_config)
- test_connection_fn(self.metadata, conn, service_config)
+ result = test_connection_fn(self.metadata, conn, service_config)
+ raise_test_connection_exception(result)
def _get_sink(self) -> Sink:
sink_type = self.config.sink.type
diff --git a/ingestion/src/metadata/workflow/workflow_output_handler.py b/ingestion/src/metadata/workflow/workflow_output_handler.py
index d1a2070e2a7e..06df1f0dd54b 100644
--- a/ingestion/src/metadata/workflow/workflow_output_handler.py
+++ b/ingestion/src/metadata/workflow/workflow_output_handler.py
@@ -14,6 +14,7 @@
"""
import time
+from statistics import mean
from typing import Any, Dict, List, Optional, Type, Union
from pydantic import BaseModel
@@ -114,16 +115,15 @@ def print_summary(self, steps: List[Step], debug: bool = False):
self._print_summary(steps)
- def _print_summary(self, steps: List[Step]):
+ def _print_summary(self, steps: List[Step]) -> None:
failures: List[Failure] = []
- total_records: int = 0
- total_errors: int = 0
+ if not steps:
+ log_ansi_encoded_string(message="No steps to process.")
+ return
for step in steps:
step_summary = Summary.from_step(step)
- total_records += step_summary.records or 0
- total_errors += step_summary.errors or 0
failures.append(
Failure(name=step.name, failures=step.get_status().failures)
)
@@ -141,15 +141,18 @@ def _print_summary(self, steps: List[Step]):
log_ansi_encoded_string(message=f"Filtered: {step_summary.filtered}")
log_ansi_encoded_string(message=f"Errors: {step_summary.errors}")
+ log_ansi_encoded_string(
+ message=f"Success %: {step.get_status().calculate_success()}"
+ )
self._print_failures_if_apply(failures)
- total_success = max(total_records, 1)
+ # If nothing is processed, we'll have a success of 100%
+ success_pct = mean([step.get_status().calculate_success() for step in steps])
log_ansi_encoded_string(
color=ANSI.BRIGHT_CYAN,
bold=True,
- message="Success %: "
- + f"{round(total_success * 100 / (total_success + total_errors), 2)}",
+ message="Workflow Success %: " + f"{round(success_pct, 2)}",
)
def _print_debug_summary(self, steps: List[Step]):
diff --git a/ingestion/src/metadata/workflow/workflow_status_mixin.py b/ingestion/src/metadata/workflow/workflow_status_mixin.py
index e648ed00d439..fe8d99715c21 100644
--- a/ingestion/src/metadata/workflow/workflow_status_mixin.py
+++ b/ingestion/src/metadata/workflow/workflow_status_mixin.py
@@ -37,8 +37,6 @@
logger = ometa_logger()
-SUCCESS_THRESHOLD_VALUE = 90
-
class WorkflowResultStatus(Enum):
SUCCESS = 0
diff --git a/ingestion/tests/cli_e2e/test_cli_mysql.py b/ingestion/tests/cli_e2e/test_cli_mysql.py
index 04c8f9041cac..41b4ac1c5297 100644
--- a/ingestion/tests/cli_e2e/test_cli_mysql.py
+++ b/ingestion/tests/cli_e2e/test_cli_mysql.py
@@ -117,7 +117,7 @@ def expected_filtered_schema_excludes() -> int:
@staticmethod
def expected_filtered_table_includes() -> int:
- return 82
+ return 83
@staticmethod
def expected_filtered_table_excludes() -> int:
@@ -125,4 +125,4 @@ def expected_filtered_table_excludes() -> int:
@staticmethod
def expected_filtered_mix() -> int:
- return 82
+ return 83
diff --git a/ingestion/tests/integration/data_quality/conftest.py b/ingestion/tests/integration/data_quality/conftest.py
index 5fac9ade6519..a5ff6df9b264 100644
--- a/ingestion/tests/integration/data_quality/conftest.py
+++ b/ingestion/tests/integration/data_quality/conftest.py
@@ -113,11 +113,17 @@ def ingest_postgres_metadata(
"serviceConnection": postgres_service.connection.model_copy(
update={
"config": postgres_service.connection.config.model_copy(
- update={"ingestAllDatabases": True}
+ update={
+ "ingestAllDatabases": True,
+ }
)
}
),
- "sourceConfig": {"config": {}},
+ "sourceConfig": {
+ "config": {
+ "schemaFilterPattern": {"excludes": ["information_schema"]},
+ }
+ },
},
"sink": sink_config,
"workflowConfig": workflow_config,
diff --git a/ingestion/tests/integration/data_quality/test_data_diff.py b/ingestion/tests/integration/data_quality/test_table_diff.py
similarity index 90%
rename from ingestion/tests/integration/data_quality/test_data_diff.py
rename to ingestion/tests/integration/data_quality/test_table_diff.py
index 6df481341e85..62d69ca6bebf 100644
--- a/ingestion/tests/integration/data_quality/test_data_diff.py
+++ b/ingestion/tests/integration/data_quality/test_table_diff.py
@@ -234,6 +234,45 @@ def __init__(self, *args, **kwargs):
testCaseStatus=TestCaseStatus.Failed,
),
),
+ (
+ TestCaseDefinition(
+ name="postgres_different_case_columns_fail",
+ testDefinitionName="tableDiff",
+ computePassedFailedRowCount=True,
+ parameterValues=[
+ TestCaseParameterValue(
+ name="caseSensitiveColumns", value="true"
+ )
+ ],
+ ),
+ "POSTGRES_SERVICE.dvdrental.public.customer_different_case_columns",
+ TestCaseResult(
+ timestamp=int(datetime.now().timestamp() * 1000),
+ testCaseStatus=TestCaseStatus.Failed,
+ testResultValue=[
+ TestResultValue(name="removedColumns", value="1"),
+ TestResultValue(name="addedColumns", value="0"),
+ TestResultValue(name="changedColumns", value="0"),
+ ],
+ ),
+ ),
+ (
+ TestCaseDefinition(
+ name="postgres_different_case_columns_success",
+ testDefinitionName="tableDiff",
+ computePassedFailedRowCount=True,
+ parameterValues=[
+ TestCaseParameterValue(
+ name="caseSensitiveColumns", value="false"
+ )
+ ],
+ ),
+ "POSTGRES_SERVICE.dvdrental.public.customer_different_case_columns",
+ TestCaseResult(
+ timestamp=int(datetime.now().timestamp() * 1000),
+ testCaseStatus=TestCaseStatus.Success,
+ ),
+ ),
(
TestCaseDefinition(
name="table_from_another_db",
@@ -302,7 +341,7 @@ def test_happy_paths(
},
"processor": {
"type": "orm-test-runner",
- "config": {"testCases": [parameters.test_case_defintion.dict()]},
+ "config": {"testCases": [parameters.test_case_defintion.model_dump()]},
},
"sink": sink_config,
"workflowConfig": workflow_config,
@@ -442,6 +481,16 @@ def test_error_paths(
def add_changed_tables(connection: Connection):
connection.execute("CREATE TABLE customer_200 AS SELECT * FROM customer LIMIT 200;")
+ connection.execute(
+ "CREATE TABLE customer_different_case_columns AS SELECT * FROM customer;"
+ )
+ connection.execute(
+ 'ALTER TABLE customer_different_case_columns RENAME COLUMN first_name TO "First_Name";'
+ )
+ # TODO: this appears to be unsupported by data diff. Cross data type comparison is flaky.
+ # connection.execute(
+ # "ALTER TABLE customer_different_case_columns ALTER COLUMN store_id TYPE decimal"
+ # )
connection.execute("CREATE TABLE changed_customer AS SELECT * FROM customer;")
connection.execute(
"UPDATE changed_customer SET first_name = 'John' WHERE MOD(customer_id, 2) = 0;"
diff --git a/ingestion/tests/integration/integration_base.py b/ingestion/tests/integration/integration_base.py
index d54c9e19fff7..6cb82487792b 100644
--- a/ingestion/tests/integration/integration_base.py
+++ b/ingestion/tests/integration/integration_base.py
@@ -99,9 +99,24 @@
Markdown,
TestCaseEntityName,
)
+from metadata.generated.schema.type.tagLabel import (
+ LabelType,
+ State,
+ TagFQN,
+ TagLabel,
+ TagSource,
+)
from metadata.ingestion.ometa.ometa_api import C, T
from metadata.utils.dispatch import class_register
+TIER1_TAG: TagLabel = TagLabel(
+ tagFQN=TagFQN(f"Tier.Tier1"),
+ name="Tier1",
+ source=TagSource.Classification,
+ labelType=LabelType.Automated,
+ state=State.Suggested,
+)
+
COLUMNS = [
Column(name="id", dataType=DataType.BIGINT),
Column(name="another", dataType=DataType.BIGINT),
diff --git a/ingestion/tests/integration/ometa/test_ometa_es_api.py b/ingestion/tests/integration/ometa/test_ometa_es_api.py
index a66d57c0a9a2..23eee87c10a2 100644
--- a/ingestion/tests/integration/ometa/test_ometa_es_api.py
+++ b/ingestion/tests/integration/ometa/test_ometa_es_api.py
@@ -14,6 +14,7 @@
import logging
import time
import uuid
+from copy import deepcopy
from unittest import TestCase
from unittest.mock import patch
@@ -52,7 +53,7 @@
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.utils import fqn
-from ..integration_base import get_create_entity
+from ..integration_base import TIER1_TAG, get_create_entity
class OMetaESTest(TestCase):
@@ -363,3 +364,30 @@ def side_effect(path: str, data=None):
)
)
assert len(assets) == 10
+
+ def test_paginate_with_filters(self):
+ """We can paginate only tier 1 tables"""
+ # prepare some tables with tier 1 tags
+ for idx, name in enumerate([f"filtered_{i}" for i in range(10)]):
+ table = self.metadata.create_or_update(
+ data=get_create_entity(
+ entity=Table,
+ name=EntityName(name),
+ reference=self.create_schema_entity.fullyQualifiedName,
+ )
+ )
+ if idx % 2 == 0:
+ dest = deepcopy(table)
+ dest.tags = [TIER1_TAG]
+ self.metadata.patch(entity=Table, source=table, destination=dest)
+
+ query_filter = (
+ '{"query":{"bool":{"must":[{"bool":{"must":['
+ '{"term":{"tier.tagFQN":"Tier.Tier1"}},'
+ f'{{"term":{{"service.displayName.keyword":"{self.service_entity.name.root}"}}}}'
+ "]}}]}}}"
+ )
+ assets = list(
+ self.metadata.paginate_es(entity=Table, query_filter=query_filter, size=2)
+ )
+ assert len(assets) == 5
diff --git a/ingestion/tests/integration/postgres/test_data_quality.py b/ingestion/tests/integration/postgres/test_data_quality.py
index 0e27e7d03cf1..0612dff82312 100644
--- a/ingestion/tests/integration/postgres/test_data_quality.py
+++ b/ingestion/tests/integration/postgres/test_data_quality.py
@@ -1,3 +1,6 @@
+import glob
+import json
+import os.path
import sys
from dataclasses import dataclass
from typing import List
@@ -6,6 +9,7 @@
from _openmetadata_testutils.pydantic.test_utils import assert_equal_pydantic_objects
from metadata.data_quality.api.models import TestCaseDefinition
+from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.entity.services.databaseService import DatabaseService
from metadata.generated.schema.metadataIngestion.testSuitePipeline import (
TestSuiteConfigType,
@@ -19,12 +23,17 @@
SourceConfig,
WorkflowConfig,
)
-from metadata.generated.schema.tests.basic import TestCaseStatus
+from metadata.generated.schema.tests.basic import (
+ TestCaseResult,
+ TestCaseStatus,
+ TestResultValue,
+)
from metadata.generated.schema.tests.testCase import TestCase
from metadata.generated.schema.tests.testSuite import TestSuite
from metadata.generated.schema.type.basic import ComponentConfig
from metadata.ingestion.api.status import TruncatedStackTraceError
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.utils import entity_link
from metadata.workflow.data_quality import TestSuiteWorkflow
from metadata.workflow.metadata import MetadataWorkflow
@@ -32,7 +41,7 @@
pytest.skip("requires python 3.9+", allow_module_level=True)
-@pytest.fixture()
+@pytest.fixture(scope="module")
def run_data_quality_workflow(
run_workflow,
ingestion_config,
@@ -66,6 +75,7 @@ def run_data_quality_workflow(
"parameterValues": [
{"name": "allowedValues", "value": "['Tom', 'Jerry']"}
],
+ "computePassedFailedRowCount": True,
},
{
"name": "first_name_includes_tom_and_jerry",
@@ -73,7 +83,7 @@ def run_data_quality_workflow(
"columnName": "first_name",
"parameterValues": [
{"name": "allowedValues", "value": "['Tom', 'Jerry']"},
- {"name": "matchEnum", "value": ""},
+ {"name": "matchEnum", "value": "false"},
],
},
{
@@ -82,7 +92,7 @@ def run_data_quality_workflow(
"columnName": "first_name",
"parameterValues": [
{"name": "allowedValues", "value": "['Tom', 'Jerry']"},
- {"name": "matchEnum", "value": "True"},
+ {"name": "matchEnum", "value": "true"},
],
},
{
@@ -91,6 +101,104 @@ def run_data_quality_workflow(
"columnName": "customer_id",
"parameterValues": [],
},
+ {
+ "name": "column_values_not_match_regex",
+ "testDefinitionName": "columnValuesToNotMatchRegex",
+ "columnName": "email",
+ "parameterValues": [
+ {"name": "forbiddenRegex", "value": ".*@example\\.com$"}
+ ],
+ },
+ {
+ "name": "table_column_count_between",
+ "testDefinitionName": "tableColumnCountToBeBetween",
+ "parameterValues": [
+ {"name": "minColValue", "value": "8"},
+ {"name": "maxColValue", "value": "12"},
+ ],
+ },
+ {
+ "name": "table_column_count_equal",
+ "testDefinitionName": "tableColumnCountToEqual",
+ "parameterValues": [{"name": "columnCount", "value": "11"}],
+ },
+ {
+ "name": "table_column_name_exists",
+ "testDefinitionName": "tableColumnNameToExist",
+ "parameterValues": [
+ {"name": "columnName", "value": "customer_id"}
+ ],
+ },
+ {
+ "name": "table_column_names_match_set",
+ "testDefinitionName": "tableColumnToMatchSet",
+ "parameterValues": [
+ {
+ "name": "columnNames",
+ "value": "customer_id, store_id, first_name, last_name, email, address_id, activebool, create_date, last_update, active, json_field",
+ },
+ {"name": "ordered", "value": "false"},
+ ],
+ },
+ {
+ "name": "custom_sql_query_count",
+ "testDefinitionName": "tableCustomSQLQuery",
+ "parameterValues": [
+ {
+ "name": "sqlExpression",
+ "value": "SELECT CASE WHEN COUNT(*) > 0 THEN 0 ELSE 1 END FROM customer WHERE active = 1",
+ },
+ {"name": "strategy", "value": "COUNT"},
+ {"name": "threshold", "value": "0"},
+ ],
+ },
+ {
+ "name": "custom_sql_query_rows",
+ "testDefinitionName": "tableCustomSQLQuery",
+ "parameterValues": [
+ {
+ "name": "sqlExpression",
+ "value": "SELECT * FROM customer WHERE active = 1",
+ },
+ {"name": "strategy", "value": "ROWS"},
+ {"name": "threshold", "value": "10"},
+ ],
+ },
+ {
+ "name": "table_row_count_between",
+ "testDefinitionName": "tableRowCountToBeBetween",
+ "parameterValues": [
+ {"name": "minValue", "value": "100"},
+ {"name": "maxValue", "value": "1000"},
+ ],
+ },
+ {
+ "name": "table_row_count_equal",
+ "testDefinitionName": "tableRowCountToEqual",
+ "parameterValues": [{"name": "value", "value": "599"}],
+ },
+ {
+ "name": "table_row_inserted_count_between_fail",
+ "testDefinitionName": "tableRowInsertedCountToBeBetween",
+ "parameterValues": [
+ {"name": "min", "value": "10"},
+ {"name": "max", "value": "50"},
+ {"name": "columnName", "value": "create_date"},
+ {"name": "rangeType", "value": "DAY"},
+ {"name": "rangeInterval", "value": "1"},
+ ],
+ },
+ {
+ "name": "table_row_inserted_count_between_success",
+ "testDefinitionName": "tableRowInsertedCountToBeBetween",
+ "parameterValues": [
+ {"name": "min", "value": "590"},
+ {"name": "max", "value": "600"},
+ {"name": "columnName", "value": "last_update"},
+ {"name": "rangeType", "value": "YEAR"},
+ {"name": "rangeInterval", "value": "12"},
+ ],
+ },
],
}
),
@@ -109,14 +217,150 @@ def run_data_quality_workflow(
metadata.delete(TestSuite, test_suite.id, recursive=True, hard_delete=True)
+def test_all_definition_exists(metadata, run_data_quality_workflow, db_service):
+ test_difinitions_glob = (
+ os.path.dirname(__file__)
+ + "/../../../.."
+ + "/openmetadata-service/src/main/resources/json/data/tests/**.json"
+ )
+ test_definitions: List[str] = []
+ for test_definition_file in glob.glob(test_difinitions_glob, recursive=False):
+ test_definitions.append(json.load(open(test_definition_file))["name"])
+ assert len(test_definitions) > 0
+ table: Table = metadata.get_by_name(
+ Table,
+ f"{db_service.fullyQualifiedName.root}.dvdrental.public.customer",
+ nullable=False,
+ )
+ tcs: List[TestCase] = metadata.list_entities(
+ TestCase,
+ fields=["*"],
+ params={
+ "entityLink": entity_link.get_entity_link(
+ Table, table.fullyQualifiedName.root
+ )
+ },
+ ).entities
+ tcs_dict = {tc.testDefinition.fullyQualifiedName: tc for tc in tcs}
+ excluded = {
+ # TODO implement these too
+ "columnValueLengthsToBeBetween",
+ "columnValueMaxToBeBetween",
+ "columnValueMinToBeBetween",
+ "columnValuesToBeUnique",
+ "tableDataToBeFresh",
+ "columnValuesToMatchRegex",
+ "columnValuesToNotMatchRegex",
+ "columnValueStdDevToBeBetween",
+ "columnValuesToBeNotNull",
+ "columnValueMedianToBeBetween",
+ "columnValuesSumToBeBetween",
+ "columnValuesToBeInSet",
+ "columnValuesMissingCount",
+ "columnValuesToBeNotInSet",
+ "columnValueMeanToBeBetween",
+ "columnValuesToBeBetween",
+ "tableDiff",
+ }
+ missing = set()
+ for test_definition in test_definitions:
+ if test_definition in tcs_dict:
+ assert (
+ test_definition not in excluded
+ ), f"Remove test from excluded list: {test_definition}"
+ else:
+ if test_definition in excluded:
+ continue
+ missing.add(test_definition.fullyQualifiedName.root)
+ assert not missing, f"Missing test cases: {missing}"
+
+
@pytest.mark.parametrize(
"test_case_name,expected_status",
[
- ("first_name_includes_tom_and_jerry_wo_enum", TestCaseStatus.Success),
- ("first_name_includes_tom_and_jerry", TestCaseStatus.Success),
- ("first_name_is_tom_or_jerry", TestCaseStatus.Failed),
- ("id_no_bounds", TestCaseStatus.Success),
+ (
+ "first_name_includes_tom_and_jerry_wo_enum",
+ TestCaseResult(
+ timestamp=0,
+ testCaseStatus=TestCaseStatus.Success,
+ passedRows=2,
+ failedRows=597,
+ ),
+ ),
+ (
+ "first_name_includes_tom_and_jerry",
+ TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Success),
+ ),
+ (
+ "first_name_is_tom_or_jerry",
+ TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Failed),
+ ),
+ (
+ "id_no_bounds",
+ TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Success),
+ ),
+ (
+ "column_values_not_match_regex",
+ TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Success),
+ ),
+ (
+ "table_column_count_between",
+ TestCaseResult(
+ timestamp=0,
+ testCaseStatus=TestCaseStatus.Success,
+ testResultValue=[TestResultValue(name="columnCount", value="11")],
+ ),
+ ),
+ (
+ "table_column_count_equal",
+ TestCaseResult(
+ timestamp=0,
+ testCaseStatus=TestCaseStatus.Success,
+ testResultValue=[TestResultValue(name="columnCount", value="11")],
+ ),
+ ),
+ (
+ "table_column_name_exists",
+ TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Success),
+ ),
+ (
+ "table_column_names_match_set",
+ TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Success),
+ ),
+ (
+ "custom_sql_query_count",
+ TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Success),
+ ),
+ (
+ "custom_sql_query_rows",
+ TestCaseResult(
+ timestamp=0,
+ testCaseStatus=TestCaseStatus.Failed,
+ testResultValues=[{"name": "resultRowCount", "value": "599"}],
+ ),
+ ),
+ (
+ "table_row_count_between",
+ TestCaseResult(
+ timestamp=0,
+ testCaseStatus=TestCaseStatus.Success,
+ testResultValue=[TestResultValue(name="rowCount", value="599")],
+ ),
+ ),
+ (
+ "table_row_count_equal",
+ TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Success),
+ ),
+ (
+ "table_row_inserted_count_between_fail",
+ TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Failed),
+ ),
+ (
+ "table_row_inserted_count_between_success",
+ TestCaseResult(timestamp=0, testCaseStatus=TestCaseStatus.Success),
+ ),
],
+ ids=lambda *x: x[0],
)
def test_data_quality(
run_data_quality_workflow, metadata: OpenMetadata, test_case_name, expected_status
@@ -128,7 +372,12 @@ def test_data_quality(
(t for t in test_cases if t.name.root == test_case_name), None
)
assert test_case is not None
- assert test_case.testCaseResult.testCaseStatus == expected_status
+ assert_equal_pydantic_objects(
+ expected_status.model_copy(
+ update={"timestamp": test_case.testCaseResult.timestamp}
+ ),
+ test_case.testCaseResult,
+ )
@pytest.fixture()
diff --git a/ingestion/tests/unit/data_quality/validations/test_base_handler.py b/ingestion/tests/unit/data_quality/validations/test_base_handler.py
new file mode 100644
index 000000000000..fab476e1ddd3
--- /dev/null
+++ b/ingestion/tests/unit/data_quality/validations/test_base_handler.py
@@ -0,0 +1,34 @@
+from ast import literal_eval
+
+import pytest
+
+from metadata.data_quality.validations.base_test_handler import BaseTestValidator
+from metadata.generated.schema.tests.testCase import TestCaseParameterValue
+
+
+@pytest.mark.parametrize(
+ "param_values, name, type_, default, expected",
+ [
+ ([TestCaseParameterValue(name="str", value="test")], "str", str, None, "test"),
+ (
+ [TestCaseParameterValue(name="param", value="[1, 2, 3]")],
+ "param",
+ literal_eval,
+ None,
+ [1, 2, 3],
+ ),
+ ([TestCaseParameterValue(name="param", value="123")], "param", int, None, 123),
+ (
+ [TestCaseParameterValue(name="param", value=None)],
+ "param",
+ str,
+ "default",
+ "default",
+ ),
+ ],
+)
+def test_get_test_case_param_value(param_values, name, type_, default, expected):
+ result = BaseTestValidator.get_test_case_param_value(
+ param_values, name, type_, default
+ )
+ assert result == expected
diff --git a/ingestion/tests/unit/data_quality/validations/test_utils.py b/ingestion/tests/unit/data_quality/validations/test_utils.py
new file mode 100644
index 000000000000..4b4ce41a4248
--- /dev/null
+++ b/ingestion/tests/unit/data_quality/validations/test_utils.py
@@ -0,0 +1,22 @@
+import pytest
+
+from metadata.data_quality.validations.utils import get_bool_test_case_param
+from metadata.generated.schema.tests.testCase import TestCaseParameterValue
+
+
+@pytest.mark.parametrize(
+ "test_case_param_vals, name, expected",
+ [
+ ([TestCaseParameterValue(name="param1", value="true")], "param1", True),
+ ([TestCaseParameterValue(name="param1", value="false")], "param1", False),
+ ([TestCaseParameterValue(name="param1", value="True")], "param1", True),
+ ([TestCaseParameterValue(name="param1", value="False")], "param1", False),
+ ([TestCaseParameterValue(name="param1", value="TRUE")], "param1", True),
+ ([TestCaseParameterValue(name="param1", value="FALSE")], "param1", False),
+ ([TestCaseParameterValue(name="param1", value="invalid")], "param1", False),
+ ([], "param1", False),
+ ([TestCaseParameterValue(name="param2", value="true")], "param1", False),
+ ],
+)
+def test_get_bool_test_case_param(test_case_param_vals, name, expected):
+ assert get_bool_test_case_param(test_case_param_vals, name) == expected
diff --git a/ingestion/tests/unit/metadata/ingestion/source/database/snowflake/profiler/test_system_metrics.py b/ingestion/tests/unit/metadata/ingestion/source/database/snowflake/profiler/test_system_metrics.py
index c2fcbdc090f9..48809d434cfc 100644
--- a/ingestion/tests/unit/metadata/ingestion/source/database/snowflake/profiler/test_system_metrics.py
+++ b/ingestion/tests/unit/metadata/ingestion/source/database/snowflake/profiler/test_system_metrics.py
@@ -2,7 +2,7 @@
import pytest
-from metadata.ingestion.source.database.snowflake.profiler.system_metrics import (
+from metadata.ingestion.source.database.snowflake.profiler.system import (
PUBLIC_SCHEMA,
SnowflakeTableResovler,
)
diff --git a/ingestion/tests/unit/metadata/utils/test_time_utils.py b/ingestion/tests/unit/metadata/utils/test_time_utils.py
index 13ef5c9d9c0a..8ff2099a4359 100644
--- a/ingestion/tests/unit/metadata/utils/test_time_utils.py
+++ b/ingestion/tests/unit/metadata/utils/test_time_utils.py
@@ -1,8 +1,9 @@
-from datetime import timedelta
+from datetime import datetime, timedelta, timezone
import pytest
-from metadata.utils.time_utils import timedelta_to_string
+from metadata.generated.schema.type.basic import Timestamp
+from metadata.utils.time_utils import timedelta_to_string, timestamp_to_datetime
@pytest.mark.parametrize(
@@ -23,3 +24,54 @@
)
def test_timedelta_to_string(parameter, expected):
assert timedelta_to_string(parameter).startswith(expected)
+
+
+@pytest.mark.parametrize(
+ "timestamp, expected_datetime",
+ [
+ (
+ Timestamp(root=1638316800000),
+ datetime(2021, 12, 1, 0, 0, tzinfo=timezone.utc),
+ ),
+ (
+ Timestamp(root=1609459200000),
+ datetime(2021, 1, 1, 0, 0, tzinfo=timezone.utc),
+ ),
+ (Timestamp(root=0), datetime(1970, 1, 1, 0, 0, tzinfo=timezone.utc)),
+ ],
+)
+def test_timestamp_to_datetime(timestamp, expected_datetime):
+ assert timestamp_to_datetime(timestamp) == expected_datetime
+
+
+from datetime import datetime, timedelta, timezone
+
+import pytest
+
+from metadata.utils.time_utils import datetime_to_timestamp
+
+
+@pytest.mark.parametrize(
+ "datetime_value, milliseconds, expected_timestamp",
+ [
+ # Naive datetime (assumed to be in UTC)
+ (datetime(2021, 12, 1, 0, 0, 0), False, 1638316800),
+ (datetime(2021, 12, 1, 0, 0, 0), True, 1638316800000),
+ # Timezone-aware datetime (UTC)
+ (datetime(2021, 12, 1, 0, 0, 0, tzinfo=timezone.utc), False, 1638316800),
+ (datetime(2021, 12, 1, 0, 0, 0, tzinfo=timezone.utc), True, 1638316800000),
+ # Timezone-aware datetime (non-UTC)
+ (
+ datetime(2021, 12, 1, 0, 0, 0, tzinfo=timezone(timedelta(hours=1))),
+ False,
+ 1638313200,
+ ),
+ (
+ datetime(2021, 12, 1, 0, 0, 0, tzinfo=timezone(timedelta(hours=1))),
+ True,
+ 1638313200000,
+ ),
+ ],
+)
+def test_datetime_to_timestamp(datetime_value, milliseconds, expected_timestamp):
+ assert datetime_to_timestamp(datetime_value, milliseconds) == expected_timestamp
diff --git a/ingestion/tests/unit/profiler/test_utils.py b/ingestion/tests/unit/profiler/test_utils.py
index 523ad24c6452..7743673cf250 100644
--- a/ingestion/tests/unit/profiler/test_utils.py
+++ b/ingestion/tests/unit/profiler/test_utils.py
@@ -22,7 +22,7 @@
from sqlalchemy.sql.sqltypes import Integer, String
from metadata.ingestion.source.database.snowflake.models import SnowflakeQueryLogEntry
-from metadata.ingestion.source.database.snowflake.profiler.system_metrics import (
+from metadata.ingestion.source.database.snowflake.profiler.system import (
SnowflakeTableResovler,
get_snowflake_system_queries,
)
diff --git a/ingestion/tests/unit/profiler/test_workflow.py b/ingestion/tests/unit/profiler/test_workflow.py
index 8cc358c6f9d8..aeeb3df971d1 100644
--- a/ingestion/tests/unit/profiler/test_workflow.py
+++ b/ingestion/tests/unit/profiler/test_workflow.py
@@ -26,9 +26,6 @@
Table,
TableProfilerConfig,
)
-from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
- OpenMetadataConnection,
-)
from metadata.generated.schema.entity.services.databaseService import (
DatabaseService,
DatabaseServiceType,
@@ -36,6 +33,7 @@
from metadata.generated.schema.metadataIngestion.databaseServiceProfilerPipeline import (
DatabaseServiceProfilerPipeline,
)
+from metadata.generated.schema.metadataIngestion.workflow import WorkflowConfig
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.profiler.api.models import ProfilerProcessorConfig
from metadata.profiler.interface.sqlalchemy.profiler_interface import (
@@ -122,7 +120,7 @@ def test_init_workflow(mocked_method, mocked_orm): # pylint: disable=unused-arg
mocked_method.assert_called()
assert isinstance(workflow.source.source_config, DatabaseServiceProfilerPipeline)
- assert isinstance(workflow.metadata_config, OpenMetadataConnection)
+ assert isinstance(workflow.workflow_config, WorkflowConfig)
profiler_processor_step = workflow.steps[0]
assert isinstance(profiler_processor_step.profiler_config, ProfilerProcessorConfig)
diff --git a/ingestion/tests/unit/resources/datasets/manifest_v8.json b/ingestion/tests/unit/resources/datasets/manifest_v8.json
index e6a640e9c1e3..12d49be7fad7 100644
--- a/ingestion/tests/unit/resources/datasets/manifest_v8.json
+++ b/ingestion/tests/unit/resources/datasets/manifest_v8.json
@@ -77,7 +77,8 @@
},
"tags": [
"model_tag_one",
- "model_tag_two"
+ "model_tag_two",
+ "22.8.5.1"
],
"refs": [
[
diff --git a/ingestion/tests/unit/test_dbt.py b/ingestion/tests/unit/test_dbt.py
index 937428d01a9b..11b18438a1ad 100644
--- a/ingestion/tests/unit/test_dbt.py
+++ b/ingestion/tests/unit/test_dbt.py
@@ -135,6 +135,14 @@
state="Suggested",
href=None,
),
+ TagLabel(
+ tagFQN='dbtTags."22.8.5.1"',
+ description=None,
+ source="Classification",
+ labelType="Automated",
+ state="Suggested",
+ href=None,
+ ),
],
columns=[
Column(
@@ -426,6 +434,12 @@ def test_dbt_manifest_v8(self, get_tag_label, es_search_from_fqn, get_dbt_owner)
state=State.Suggested.value,
source=TagSource.Classification.value,
),
+ TagLabel(
+ tagFQN='dbtTags."22.8.5.1"',
+ labelType=LabelType.Automated.value,
+ state=State.Suggested.value,
+ source=TagSource.Classification.value,
+ ),
]
self.execute_test(
MOCK_SAMPLE_MANIFEST_V8,
diff --git a/ingestion/tests/unit/test_json_schema_parser.py b/ingestion/tests/unit/test_json_schema_parser.py
index fd1bb1d9ca89..7c5ce9acc401 100644
--- a/ingestion/tests/unit/test_json_schema_parser.py
+++ b/ingestion/tests/unit/test_json_schema_parser.py
@@ -91,8 +91,81 @@ class JsonSchemaParserTests(TestCase):
}
}"""
+ sample_array_json_schema = """
+ {
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "object",
+ "properties": {
+ "firstName": {
+ "type": "string"
+ },
+ "lastName": {
+ "type": "string"
+ },
+ "age": {
+ "type": "integer"
+ },
+ "address": {
+ "type": "object",
+ "properties": {
+ "streetAddress": {
+ "type": "string"
+ },
+ "city": {
+ "type": "string"
+ },
+ "state": {
+ "type": "string"
+ },
+ "postalCode": {
+ "type": "string"
+ }
+ },
+ "required": [
+ "streetAddress",
+ "city",
+ "state",
+ "postalCode"
+ ]
+ },
+ "phoneNumbers": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string"
+ },
+ "number": {
+ "type": "string"
+ }
+ },
+ "required": [
+ "type",
+ "number"
+ ]
+ }
+ },
+ "hobbies": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "required": [
+ "firstName",
+ "lastName",
+ "age",
+ "address",
+ "phoneNumbers"
+ ]
+ }
+ """
+
parsed_schema = parse_json_schema(sample_json_schema)
parsed_postgres_schema = parse_json_schema(sample_postgres_json_schema, Column)
+ parsed_array_schema = parse_json_schema(sample_array_json_schema)
def test_schema_name(self):
self.assertEqual(self.parsed_schema[0].name.root, "Person")
@@ -143,3 +216,37 @@ def test_parse_postgres_json_fields(self):
)
self.assertEqual(len(self.parsed_postgres_schema[0].children), 3)
self.assertEqual(len(self.parsed_postgres_schema[0].children[1].children), 4)
+
+ def test_parse_postgres_json_fields(self):
+ self.assertEqual(self.parsed_array_schema[0].name.root, "default")
+ self.assertEqual(len(self.parsed_array_schema[0].children), 6)
+
+ # Validate the complex array datatype
+ self.assertEqual(
+ self.parsed_array_schema[0].children[4].name.root, "phoneNumbers"
+ )
+ self.assertEqual(self.parsed_array_schema[0].children[4].dataType.name, "ARRAY")
+ self.assertEqual(
+ self.parsed_array_schema[0].children[4].dataTypeDisplay, "ARRAY"
+ )
+ self.assertEqual(len(self.parsed_array_schema[0].children[4].children), 2)
+ self.assertEqual(
+ self.parsed_array_schema[0].children[4].children[0].name.root, "type"
+ )
+ self.assertEqual(
+ self.parsed_array_schema[0].children[4].children[0].dataType.name, "STRING"
+ )
+ self.assertEqual(
+ self.parsed_array_schema[0].children[4].children[1].name.root, "number"
+ )
+ self.assertEqual(
+ self.parsed_array_schema[0].children[4].children[1].dataType.name, "STRING"
+ )
+
+ # Validate the primitive array datatype
+ self.assertEqual(self.parsed_array_schema[0].children[5].name.root, "hobbies")
+ self.assertEqual(self.parsed_array_schema[0].children[5].dataType.name, "ARRAY")
+ self.assertEqual(
+ self.parsed_array_schema[0].children[5].dataTypeDisplay, "ARRAY"
+ )
+ self.assertIsNone(self.parsed_array_schema[0].children[5].children)
diff --git a/ingestion/tests/unit/test_workflow_parse.py b/ingestion/tests/unit/test_workflow_parse.py
index 14d9463ed70b..f14d4c200450 100644
--- a/ingestion/tests/unit/test_workflow_parse.py
+++ b/ingestion/tests/unit/test_workflow_parse.py
@@ -19,6 +19,9 @@
from metadata.generated.schema.entity.automations.workflow import (
Workflow as AutomationWorkflow,
)
+from metadata.generated.schema.entity.services.connections.api.restConnection import (
+ RestConnection,
+)
from metadata.generated.schema.entity.services.connections.dashboard.tableauConnection import (
TableauConnection,
)
@@ -115,6 +118,10 @@ def test_get_connection_class(self):
connection = get_connection_class(source_type, get_service_type(source_type))
self.assertEqual(connection, KafkaConnection)
+ source_type = "Rest"
+ connection = get_connection_class(source_type, get_service_type(source_type))
+ self.assertEqual(connection, RestConnection)
+
def test_get_source_config_class(self):
"""
Check that we can correctly build the connection module ingredients
diff --git a/ingestion/tests/unit/topology/api/test_rest.py b/ingestion/tests/unit/topology/api/test_rest.py
index 8e6d8e69f17f..995b1de3a3a3 100644
--- a/ingestion/tests/unit/topology/api/test_rest.py
+++ b/ingestion/tests/unit/topology/api/test_rest.py
@@ -21,9 +21,9 @@
CreateAPICollectionRequest,
)
from metadata.generated.schema.entity.services.apiService import (
+ ApiConnection,
ApiService,
- ApiServiceConnection,
- APIServiceType,
+ ApiServiceType,
)
from metadata.generated.schema.metadataIngestion.workflow import (
OpenMetadataWorkflowConfig,
@@ -43,7 +43,7 @@
"serviceName": "openapi_rest",
"serviceConnection": {
"config": {
- "type": "REST",
+ "type": "Rest",
"openAPISchemaURL": "https://petstore3.swagger.io/api/v3/openapi.json",
}
},
@@ -91,8 +91,8 @@
id="c3eb265f-5445-4ad3-ba5e-797d3a3071bb",
name="openapi_rest",
fullyQualifiedName=FullyQualifiedEntityName("openapi_rest"),
- connection=ApiServiceConnection(),
- serviceType=APIServiceType.REST,
+ connection=ApiConnection(),
+ serviceType=ApiServiceType.Rest,
)
EXPECTED_COLLECTION_REQUEST = [
Either(
diff --git a/ingestion/tests/unit/topology/pipeline/test_gluepipeline.py b/ingestion/tests/unit/topology/pipeline/test_gluepipeline.py
new file mode 100644
index 000000000000..428d6e900255
--- /dev/null
+++ b/ingestion/tests/unit/topology/pipeline/test_gluepipeline.py
@@ -0,0 +1,341 @@
+# Copyright 2021 Collate
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Test dbt cloud using the topology
+"""
+import json
+from unittest import TestCase
+from unittest.mock import patch
+
+from metadata.generated.schema.api.data.createPipeline import CreatePipelineRequest
+from metadata.generated.schema.entity.data.pipeline import Pipeline, Task
+from metadata.generated.schema.entity.services.pipelineService import (
+ PipelineConnection,
+ PipelineService,
+ PipelineServiceType,
+)
+from metadata.generated.schema.metadataIngestion.workflow import (
+ OpenMetadataWorkflowConfig,
+)
+from metadata.generated.schema.type.basic import (
+ EntityName,
+ FullyQualifiedEntityName,
+ Markdown,
+ SourceUrl,
+)
+from metadata.generated.schema.type.entityReference import EntityReference
+from metadata.ingestion.source.pipeline.gluepipeline.metadata import GluepipelineSource
+
+mock_glue_config = {
+ "source": {
+ "type": "gluepipeline",
+ "serviceName": "local_gluepipeline",
+ "serviceConnection": {
+ "config": {
+ "type": "GluePipeline",
+ "awsConfig": {
+ "awsAccessKeyId": "aws_access_key_id",
+ "awsSecretAccessKey": "aws_secret_access_key",
+ "awsRegion": "us-east-2",
+ "endPointURL": "https://endpoint.com/",
+ },
+ },
+ },
+ "sourceConfig": {"config": {"type": "PipelineMetadata"}},
+ },
+ "sink": {"type": "metadata-rest", "config": {}},
+ "workflowConfig": {
+ "openMetadataServerConfig": {
+ "hostPort": "http://localhost:8585/api",
+ "authProvider": "openmetadata",
+ "securityConfig": {
+ "jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
+ },
+ }
+ },
+}
+
+EXPECTED_JOB_DETAILS = json.loads(
+ """
+{
+ "Name": "redshift workflow",
+ "Description": "redshift workflow description",
+ "DefaultRunProperties": {},
+ "CreatedOn": "2024-09-20 15:46:36.668000",
+ "LastModifiedOn": "2024-09-20 15:46:36.668000",
+ "LastRun": {
+ "Name": "redshift workflow",
+ "WorkflowRunId": "wr_6db99d3ea932db0739f03ba5ae56e4b635b7878261f75af062e1223a7272c50e",
+ "WorkflowRunProperties": {},
+ "StartedOn": "2024-09-30 17:07:24.032000",
+ "CompletedOn": "2024-09-30 17:08:24.032000",
+ "Status": "COMPLETED",
+ "Statistics": {
+ "TotalActions": 1,
+ "TimeoutActions": 0,
+ "FailedActions": 1,
+ "StoppedActions": 0,
+ "SucceededActions": 0,
+ "RunningActions": 0,
+ "ErroredActions": 0,
+ "WaitingActions": 0
+ },
+ "Graph": {
+ "Nodes": [
+ {
+ "Type": "TRIGGER",
+ "Name": "redshift_event",
+ "UniqueId": "wnode_98c85bc1e19d969e35e0687b2ec586822271463c72dd556f90cfe6421a2517ee",
+ "TriggerDetails": {
+ "Trigger": {
+ "Name": "redshift_event",
+ "WorkflowName": "redshift workflow",
+ "Type": "ON_DEMAND",
+ "State": "CREATED",
+ "Actions": [
+ {
+ "JobName": "Redshift DBT Job"
+ }
+ ]
+ }
+ }
+ },
+ {
+ "Type": "JOB",
+ "Name": "Redshift DBT Job",
+ "UniqueId": "wnode_0cbf9f52c41002015ebc46fe70a9b0ea64ff7dba891cf141d6dcbf5580fe7123",
+ "JobDetails": {
+ "JobRuns": [
+ {
+ "Id": "jr_108804857dd29cb1857c92d3e8bf0b48f7685c246e56125b713eb6ea7ebfe4e2",
+ "Attempt": 0,
+ "TriggerName": "redshift_event",
+ "JobName": "Redshift DBT Job",
+ "JobMode": "VISUAL",
+ "JobRunQueuingEnabled": false,
+ "StartedOn": "2024-09-30 17:07:59.185000",
+ "LastModifiedOn": "2024-09-30 17:08:03.003000",
+ "CompletedOn": "2024-09-30 17:08:03.003000",
+ "JobRunState": "FAILED",
+ "ErrorMessage": "Error Message",
+ "PredecessorRuns": [],
+ "AllocatedCapacity": 10,
+ "ExecutionTime": 0,
+ "Timeout": 2880,
+ "MaxCapacity": 10.0,
+ "WorkerType": "G.1X",
+ "NumberOfWorkers": 10,
+ "LogGroupName": "/aws-glue/jobs",
+ "GlueVersion": "4.0",
+ "ExecutionClass": "STANDARD"
+ }
+ ]
+ }
+ }
+ ],
+ "Edges": [
+ {
+ "SourceId": "wnode_98c85bc1e19d969e35e0687b2ec586822271463c72dd556f90cfe6421a2517ee",
+ "DestinationId": "wnode_0cbf9f52c41002015ebc46fe70a9b0ea64ff7dba891cf141d6dcbf5580fe7123"
+ }
+ ]
+ }
+ },
+ "Graph": {
+ "Nodes": [
+ {
+ "Type": "TRIGGER",
+ "Name": "redshift_event",
+ "UniqueId": "wnode_98c85bc1e19d969e35e0687b2ec586822271463c72dd556f90cfe6421a2517ee",
+ "TriggerDetails": {
+ "Trigger": {
+ "Name": "redshift_event",
+ "WorkflowName": "redshift workflow",
+ "Type": "ON_DEMAND",
+ "State": "CREATED",
+ "Actions": [
+ {
+ "JobName": "Redshift DBT Job"
+ }
+ ]
+ }
+ }
+ },
+ {
+ "Type": "JOB",
+ "Name": "Redshift DBT Job",
+ "UniqueId": "wnode_0cbf9f52c41002015ebc46fe70a9b0ea64ff7dba891cf141d6dcbf5580fe7123",
+ "JobDetails": {}
+ }
+ ],
+ "Edges": [
+ {
+ "SourceId": "wnode_98c85bc1e19d969e35e0687b2ec586822271463c72dd556f90cfe6421a2517ee",
+ "DestinationId": "wnode_0cbf9f52c41002015ebc46fe70a9b0ea64ff7dba891cf141d6dcbf5580fe7123"
+ }
+ ]
+ }
+}
+"""
+)
+
+EXPECTED_CREATED_PIPELINES = CreatePipelineRequest(
+ name=EntityName(root="redshift workflow"),
+ displayName="redshift workflow",
+ description=None,
+ dataProducts=None,
+ sourceUrl=SourceUrl(
+ root="https://us-east-2.console.aws.amazon.com/glue/home?region=us-east-2#/v2/etl-configuration/workflows/view/redshift workflow"
+ ),
+ concurrency=None,
+ pipelineLocation=None,
+ startDate=None,
+ tasks=[
+ Task(
+ name="redshift_event",
+ displayName="redshift_event",
+ fullyQualifiedName=None,
+ description=None,
+ sourceUrl=None,
+ downstreamTasks=["Redshift DBT Job"],
+ taskType="TRIGGER",
+ taskSQL=None,
+ startDate=None,
+ endDate=None,
+ tags=None,
+ owners=None,
+ ),
+ Task(
+ name="Redshift DBT Job",
+ displayName="Redshift DBT Job",
+ fullyQualifiedName=None,
+ description=None,
+ sourceUrl=None,
+ downstreamTasks=[],
+ taskType="JOB",
+ taskSQL=None,
+ startDate=None,
+ endDate=None,
+ tags=None,
+ owners=None,
+ ),
+ ],
+ tags=None,
+ owners=None,
+ service=FullyQualifiedEntityName(root="gluepipeline_test"),
+ extension=None,
+ scheduleInterval=None,
+ domain=None,
+ lifeCycle=None,
+ sourceHash=None,
+)
+
+MOCK_PIPELINE_SERVICE = PipelineService(
+ id="85811038-099a-11ed-861d-0242ac120002",
+ name="gluepipeline_test",
+ fullyQualifiedName=FullyQualifiedEntityName("gluepipeline_test"),
+ connection=PipelineConnection(),
+ serviceType=PipelineServiceType.DBTCloud,
+)
+
+MOCK_PIPELINE = Pipeline(
+ id="2aaa012e-099a-11ed-861d-0242ac120002",
+ name=EntityName(root="redshift workflow"),
+ fullyQualifiedName="gluepipeline_test.redshift workflow",
+ displayName="OpenMetadata DBTCloud Workflow",
+ description=Markdown(root="Example Job Description"),
+ dataProducts=None,
+ sourceUrl=SourceUrl(
+ root="https://abc12.us1.dbt.com/deploy/70403103922125/projects/70403103926818/jobs/70403103936332"
+ ),
+ concurrency=None,
+ pipelineLocation=None,
+ startDate=None,
+ tasks=[
+ Task(
+ name="70403110257794",
+ displayName=None,
+ fullyQualifiedName=None,
+ description=None,
+ sourceUrl=SourceUrl(
+ root="https://abc12.us1.dbt.com/deploy/70403103922125/projects/70403103926818/runs/70403110257794/"
+ ),
+ downstreamTasks=None,
+ taskType=None,
+ taskSQL=None,
+ startDate="2024-05-27 10:42:20.621788+00:00",
+ endDate="2024-05-28 10:42:52.622408+00:00",
+ tags=None,
+ owners=None,
+ ),
+ Task(
+ name="70403111615088",
+ displayName=None,
+ fullyQualifiedName=None,
+ description=None,
+ sourceUrl=SourceUrl(
+ root="https://abc12.us1.dbt.com/deploy/70403103922125/projects/70403103926818/runs/70403111615088/"
+ ),
+ downstreamTasks=None,
+ taskType=None,
+ taskSQL=None,
+ startDate="None",
+ endDate="None",
+ tags=None,
+ owners=None,
+ ),
+ ],
+ tags=None,
+ owners=None,
+ service=EntityReference(
+ id="85811038-099a-11ed-861d-0242ac120002", type="pipelineService"
+ ),
+ extension=None,
+ scheduleInterval="6 */12 * * 0,1,2,3,4,5,6",
+ domain=None,
+ lifeCycle=None,
+ sourceHash=None,
+)
+
+EXPECTED_PIPELINE_NAME = "redshift workflow"
+
+
+class GluePipelineUnitTest(TestCase):
+ """
+ DBTCloud unit tests
+ """
+
+ @patch(
+ "metadata.ingestion.source.pipeline.pipeline_service.PipelineServiceSource.test_connection"
+ )
+ def __init__(self, methodName, test_connection) -> None:
+ super().__init__(methodName)
+ test_connection.return_value = False
+
+ config = OpenMetadataWorkflowConfig.model_validate(mock_glue_config)
+ self.gluepipeline = GluepipelineSource.create(
+ mock_glue_config["source"],
+ config.workflowConfig.openMetadataServerConfig,
+ )
+ self.gluepipeline.context.get().__dict__["pipeline"] = MOCK_PIPELINE.name.root
+ self.gluepipeline.context.get().__dict__[
+ "pipeline_service"
+ ] = MOCK_PIPELINE_SERVICE.name.root
+
+ def test_pipeline_name(self):
+ assert (
+ self.gluepipeline.get_pipeline_name(EXPECTED_JOB_DETAILS)
+ == EXPECTED_PIPELINE_NAME
+ )
+
+ def test_pipelines(self):
+ pipeline = list(self.gluepipeline.yield_pipeline(EXPECTED_JOB_DETAILS))[0].right
+ assert pipeline == EXPECTED_CREATED_PIPELINES
diff --git a/ingestion/tests/unit/workflow/test_application_workflow.py b/ingestion/tests/unit/workflow/test_application_workflow.py
new file mode 100644
index 000000000000..df725f4d493f
--- /dev/null
+++ b/ingestion/tests/unit/workflow/test_application_workflow.py
@@ -0,0 +1,51 @@
+# Copyright 2021 Collate
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Validate the initialization of the App Workflow
+"""
+import yaml
+
+from metadata.workflow.application import ApplicationWorkflow, AppRunner
+
+
+class TestApp(AppRunner):
+ """Test App class"""
+
+ def close(self) -> None:
+ """I am a test"""
+
+ def run(self) -> None:
+ """I am a test"""
+
+
+def test_init_app() -> None:
+ """We can properly instantiate the app"""
+
+ config = f"""
+ sourcePythonClass: "{__name__}.TestApp"
+ appConfig:
+ type: Automator
+ resources:
+ type: [table]
+ queryFilter: "..."
+ actions:
+ - type: LineagePropagationAction
+ overwriteMetadata: false
+ workflowConfig:
+ openMetadataServerConfig:
+ hostPort: "http://localhost:8585/api"
+ authProvider: "openmetadata"
+ securityConfig:
+ jwtToken: "..."
+ """
+
+ workflow = ApplicationWorkflow.create(yaml.safe_load(config))
+ assert isinstance(workflow, ApplicationWorkflow)
diff --git a/openmetadata-docs/content/partials/v1.5/connectors/storage/connectors-list.md b/openmetadata-docs/content/partials/v1.5/connectors/storage/connectors-list.md
index 8b7ad617ad67..3a796ba54172 100644
--- a/openmetadata-docs/content/partials/v1.5/connectors/storage/connectors-list.md
+++ b/openmetadata-docs/content/partials/v1.5/connectors/storage/connectors-list.md
@@ -2,6 +2,6 @@
{% connectorInfoCard name="S3 Storage" stage="PROD" href="/connectors/storage/s3" platform="OpenMetadata" / %}
{% connectorInfoCard name="ADLS" stage="PROD" href="/connectors/storage/adls" platform="Collate" / %}
-{% connectorInfoCard name="GCS" stage="PROD" href="/connectors/storage/gcs" platform="Collate" / %}
+{% connectorInfoCard name="GCS" stage="PROD" href="/connectors/storage/gcs" platform="OpenMetadata" / %}
{% /connectorsListContainer %}
\ No newline at end of file
diff --git a/openmetadata-docs/content/partials/v1.6/connectors/storage/connectors-list.md b/openmetadata-docs/content/partials/v1.6/connectors/storage/connectors-list.md
index 8b7ad617ad67..3a796ba54172 100644
--- a/openmetadata-docs/content/partials/v1.6/connectors/storage/connectors-list.md
+++ b/openmetadata-docs/content/partials/v1.6/connectors/storage/connectors-list.md
@@ -2,6 +2,6 @@
{% connectorInfoCard name="S3 Storage" stage="PROD" href="/connectors/storage/s3" platform="OpenMetadata" / %}
{% connectorInfoCard name="ADLS" stage="PROD" href="/connectors/storage/adls" platform="Collate" / %}
-{% connectorInfoCard name="GCS" stage="PROD" href="/connectors/storage/gcs" platform="Collate" / %}
+{% connectorInfoCard name="GCS" stage="PROD" href="/connectors/storage/gcs" platform="OpenMetadata" / %}
{% /connectorsListContainer %}
\ No newline at end of file
diff --git a/openmetadata-docs/content/partials/v1.6/deployment/upgrade/upgrade-prerequisites.md b/openmetadata-docs/content/partials/v1.6/deployment/upgrade/upgrade-prerequisites.md
index 09a6f8e63ed0..24bc4ebc30df 100644
--- a/openmetadata-docs/content/partials/v1.6/deployment/upgrade/upgrade-prerequisites.md
+++ b/openmetadata-docs/content/partials/v1.6/deployment/upgrade/upgrade-prerequisites.md
@@ -113,75 +113,14 @@ We believe this update will bring greater consistency and clarity to our version
# Backward Incompatible Changes
-## 1.5.0
+## 1.6.0
-### Multi Owners
-OpenMetadata allows a single user or a team to be tagged as owners for any data assets. In Release 1.5.0, we allow users to tag multiple individual owners or a single team. This will allow organizations to add ownership to multiple individuals without necessarily needing to create a team around them like previously.
+### Ingestion Workflow Status
-This is a backward incompatible change, if you are using APIs, please make sure the owner field is now changed to “owners”
+We are updating how we compute the success percentage. Previously, we took into account for partial success the results
+of the Source (e.g., the tables we were able to properly retrieve from Snowflake, Redshift, etc.). This means that we had
+an error threshold in there were if up to 90% of the tables were successfully ingested, we would still consider the
+workflow as successful. However, any errors when sending the information to OpenMetadata would be considered as a failure.
-### Import/Export Format
-To support the multi-owner format, we have now changed how we export and import the CSV file in glossary, services, database, schema, table, etc. The new format will be
-user:userName;team:TeamName
-
-If you are importing an older file, please make sure to make this change.
-
-### Pydantic V2
-The core of OpenMetadata are the JSON Schemas that define the metadata standard. These schemas are automatically translated into Java, Typescript, and Python code with Pydantic classes.
-
-In this release, we have [migrated](https://docs.pydantic.dev/latest/migration/) the codebase from Pydantic V1 to Pydantic V2.
-
-### Deployment Related Changes (OSS only)
-
-`./bootstrap/bootstrap_storage.sh` **removed**
-
-OpenMetadata community has built rolling upgrades to database schema and the data to make upgrades easier. This tool is now called as ./bootstrap/openmetadata-ops.sh and has been part of our releases since 1.3. The `bootstrap_storage.sh` doesn’t support new native schemas in OpenMetadata. Hence, we have deleted this tool from this release.
-
-While upgrading, please refer to our Upgrade Notes in the documentation. Always follow the best practices provided there.
-
-### Database Connection Pooling
-
-OpenMetadata uses Jdbi to handle database-related operations such as read/write/delete. In this release, we introduced additional configs to help with connection pooling, allowing the efficient use of a database with low resources.
-
-Please update the defaults if your cluster is running at a large scale to scale up the connections efficiently.
-
-For the new configuration, please refer to the [doc](https://docs.open-metadata.org/latest/deployment/database-connection-pooling) here
-
-### Data Insights
-
-The Data Insights application is meant to give you a quick glance at your data's state and allow you to take action based on the information you receive. To continue pursuing this objective, the application was completely refactored to allow customizability.
-
-Part of this refactor was making Data Insights an internal application, no longer relying on an external pipeline. This means triggering Data Insights from the Python SDK will no longer be possible.
-
-With this change you will need to run a backfill on the Data Insights for the last couple of days since the Data Assets data changed.
-
-### UI Changes
-
-#### New Explore Page
-
-Explore page displays hierarchically organized data assets by grouping them into `services > database > schema > tables/stored procedures`. This helps users organically find the data asset they are looking for based on a known database or schema they were using. This is a new feature and changes the way the Explore page was built in previous releases.
-
-#### Connector Schema Changes
-
-In the latest release, several updates and enhancements have been made to the JSON schema across various connectors. These changes aim to improve security, configurability, and expand integration capabilities. Here's a detailed breakdown of the updates:
-
-- **KafkaConnect**: Added `schemaRegistryTopicSuffixName` to enhance topic configuration flexibility for schema registries.
-- **GCS Datalake**: Introduced `bucketNames` field, allowing users to specify targeted storage buckets within the Google Cloud Storage environment.
-- **OpenLineage**: Added `saslConfig` to enhance security by enabling SASL (Simple Authentication and Security Layer) configuration.
-- **Salesforce**: Added sslConfig to strengthen the security layer for Salesforce connections by supporting SSL.
-- **DeltaLake**: Updated schema by moving metastoreConnection to a newly created `metastoreConfig.json` file. Additionally, introduced `configSource` to better define source configurations, with new support for `metastoreConfig.json` and `storageConfig.json`.
-- **Iceberg RestCatalog**: Removed clientId and `clientSecret` as mandatory fields, making the schema more flexible for different authentication methods.
-- **DBT Cloud Pipelines**: Added as a new connector to support cloud-native data transformation workflows using DBT.
-- **Looker**: Expanded support to include connections using GitLab integration, offering more flexible and secure version control.
-- **Tableau**: Enhanced support by adding capabilities for connecting with `TableauPublishedDatasource` and `TableauEmbeddedDatasource`, providing more granular control over data visualization and reporting.
-
-### Include DDL
-During the Database Metadata ingestion, we can optionally pick up the DDL for both tables and views. During the metadata ingestion, we use the view DDLs to generate the View Lineage.
-
-To reduce the processing time for out-of-the-box workflows, we are disabling the include DDL by default, whereas before, it was enabled, which potentially led to long-running workflows.
-
-### Secrets Manager
-Starting with the release 1.5.0, the JWT Token for the bots will be sent to the Secrets Manager if you configured one. It won't appear anymore in your dag_generated_configs in Airflow.
-
-### Python SDK
-The `metadata insight` command has been removed. Since Data Insights application was moved to be an internal system application instead of relying on external pipelines the SDK command to run the pipeline was removed.
+Now, we're changing this behavior to consider the success rate of all the steps involved in the workflow. The UI will
+then show more `Partial Success` statuses rather than `Failed`, properly reflecting the real state of the workflow.
diff --git a/openmetadata-docs/content/v1.4.x/connectors/pipeline/glue-pipeline/yaml.md b/openmetadata-docs/content/v1.4.x/connectors/pipeline/glue-pipeline/yaml.md
index e6a57bcfcf62..86b773919e10 100644
--- a/openmetadata-docs/content/v1.4.x/connectors/pipeline/glue-pipeline/yaml.md
+++ b/openmetadata-docs/content/v1.4.x/connectors/pipeline/glue-pipeline/yaml.md
@@ -112,11 +112,11 @@ This is a sample config for Glue:
```yaml {% isCodeBlock=true %}
source:
- type: glue
+ type: gluepipeline
serviceName: local_glue
serviceConnection:
config:
- type: Glue
+ type: GluePipeline
awsConfig:
```
```yaml {% srNumber=1 %}
diff --git a/openmetadata-docs/content/v1.4.x/deployment/bare-metal/index.md b/openmetadata-docs/content/v1.4.x/deployment/bare-metal/index.md
index f9bb71f11abe..05447310fd90 100644
--- a/openmetadata-docs/content/v1.4.x/deployment/bare-metal/index.md
+++ b/openmetadata-docs/content/v1.4.x/deployment/bare-metal/index.md
@@ -53,7 +53,7 @@ You can refer a sample script [here](https://github.com/open-metadata/OpenMetada
## Elasticsearch (version 8.X)
-OpenMetadata supports ElasticSearch version up to 8.10.2. To install or upgrade Elasticsearch to a supported version please see the instructions for your operating system at
+OpenMetadata supports ElasticSearch version up to 8.11.4. To install or upgrade Elasticsearch to a supported version please see the instructions for your operating system at
[Installing ElasticSearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html).
Please follow the instructions here to [install ElasticSearch](https://www.elastic.co/guide/en/elasticsearch/reference/7.13/setup.html).
@@ -169,7 +169,7 @@ If you are running OpenMetadata in AWS, it is recommended to use [Amazon RDS](ht
We support
- Amazon RDS (MySQL) engine version 8 or higher
-- Amazon OpenSearch (ElasticSearch) engine version up to 8.10.2 or Amazon OpenSearch engine version up to 2.7
+- Amazon OpenSearch (ElasticSearch) engine version up to 8.11.4 or Amazon OpenSearch engine version up to 2.7
- Amazon RDS (PostgreSQL) engine version between 12 or higher
For Production Systems, we recommend Amazon RDS to be in Multiple Availability Zones. For Amazon OpenSearch (or ElasticSearch) Service, we recommend Multiple Availability Zones with minimum 3 Master Nodes.
diff --git a/openmetadata-docs/content/v1.4.x/deployment/docker/index.md b/openmetadata-docs/content/v1.4.x/deployment/docker/index.md
index 338a79dfb120..8b8f21bf7789 100644
--- a/openmetadata-docs/content/v1.4.x/deployment/docker/index.md
+++ b/openmetadata-docs/content/v1.4.x/deployment/docker/index.md
@@ -243,7 +243,7 @@ If you are running OpenMetadata in AWS, it is recommended to use [Amazon RDS](ht
We support
- Amazon RDS (MySQL) engine version 8 or higher
-- Amazon OpenSearch (ElasticSearch) engine version up to 8.10.2 or Amazon OpenSearch engine version up to 2.7
+- Amazon OpenSearch (ElasticSearch) engine version up to 8.11.4 or Amazon OpenSearch engine version up to 2.7
- Amazon RDS (PostgreSQL) engine version 12 or higher
Note:-
diff --git a/openmetadata-docs/content/v1.4.x/deployment/kubernetes/aks.md b/openmetadata-docs/content/v1.4.x/deployment/kubernetes/aks.md
index 7c606ecfd6c1..3e82e230511d 100644
--- a/openmetadata-docs/content/v1.4.x/deployment/kubernetes/aks.md
+++ b/openmetadata-docs/content/v1.4.x/deployment/kubernetes/aks.md
@@ -15,7 +15,7 @@ We support
- Azure SQL (MySQL) engine version 8 or higher
- Azure SQL (PostgreSQL) engine version 12 or higher
-- Elastic Cloud (ElasticSearch version 8.10.2)
+- Elastic Cloud (ElasticSearch version 8.11.4)
Once you have the Azure SQL and Elastic Cloud on Azure configured, you can update the environment variables below for OpenMetadata kubernetes deployments to connect with Database and ElasticSearch.
diff --git a/openmetadata-docs/content/v1.4.x/deployment/kubernetes/on-prem.md b/openmetadata-docs/content/v1.4.x/deployment/kubernetes/on-prem.md
index ab84a05697f2..8751547ca565 100644
--- a/openmetadata-docs/content/v1.4.x/deployment/kubernetes/on-prem.md
+++ b/openmetadata-docs/content/v1.4.x/deployment/kubernetes/on-prem.md
@@ -22,7 +22,7 @@ We support
- MySQL engine version 8 or higher
- PostgreSQL engine version 12 or higher
-- ElasticSearch version 8.X (upto 8.10.2) or OpenSearch Version 2.X (upto 2.7)
+- ElasticSearch version 8.X (upto 8.11.4) or OpenSearch Version 2.X (upto 2.7)
Once you have the External Database and Search Engine configured, you can update the environment variables below for OpenMetadata kubernetes deployments to connect with Database and ElasticSearch.
diff --git a/openmetadata-docs/content/v1.4.x/deployment/upgrade/versions/110-to-120.md b/openmetadata-docs/content/v1.4.x/deployment/upgrade/versions/110-to-120.md
index c4d742d006b5..2124ccc57a2a 100644
--- a/openmetadata-docs/content/v1.4.x/deployment/upgrade/versions/110-to-120.md
+++ b/openmetadata-docs/content/v1.4.x/deployment/upgrade/versions/110-to-120.md
@@ -160,7 +160,7 @@ For Kubernetes Deployment, `openmetadata.config.database.dbParams` is available
### Version Upgrades
- The OpenMetadata Server is now based on **JDK 17**
-- OpenMetadata now **requires** **Elasticsearch** version **8.10.2** or **Opensearch** version **2.7**
+- OpenMetadata now **requires** **Elasticsearch** version **8.11.4** or **Opensearch** version **2.7**
There is no direct migration to bump the indexes to the new supported versions. You might see errors like:
diff --git a/openmetadata-docs/content/v1.4.x/menu.md b/openmetadata-docs/content/v1.4.x/menu.md
index db5ad502fb4c..a91c580bbd82 100644
--- a/openmetadata-docs/content/v1.4.x/menu.md
+++ b/openmetadata-docs/content/v1.4.x/menu.md
@@ -5,8 +5,6 @@ site_menu:
- category: Quickstart
url: /quick-start
- color: violet-70
- icon: quickstart
- category: Quickstart / Try OpenMetadata in Docker
url: /quick-start/local-docker-deployment
- category: Quickstart / Try OpenMetadata in Kubernetes
@@ -16,8 +14,6 @@ site_menu:
- category: Deployment
url: /deployment
- color: violet-70
- icon: deployment
- category: Deployment / Bare Metal Deployment
url: /deployment/bare-metal
- category: Deployment / Bare Metal Deployment / Enable Security
@@ -208,8 +204,6 @@ site_menu:
- category: Connectors
url: /connectors
- color: violet-70
- icon: openmetadata
- category: Connectors / Database
url: /connectors/database
@@ -653,8 +647,6 @@ site_menu:
- category: How-to Guides
url: /how-to-guides
- color: violet-70
- icon: openmetadata
- category: How-to Guides / Admin Guide
url: /how-to-guides/admin-guide
@@ -875,8 +867,6 @@ site_menu:
- category: Releases
url: /releases
- color: violet-70
- icon: overview
- category: Releases / Latest Release
url: /releases/latest-release
- category: Releases / Supported Releases
@@ -959,8 +949,6 @@ site_menu:
- category: Main Concepts
url: /main-concepts
- color: violet-70
- icon: main-concepts
- category: Main Concepts / High Level Design
url: /main-concepts/high-level-design
- category: Main Concepts / Backend DB
@@ -1888,8 +1876,6 @@ site_menu:
- category: Developers
url: /developers
- color: violet-70
- icon: developers
- category: Developers / Architecture
url: /developers/architecture
- category: Developers / Architecture / Understand Code Layout
@@ -1935,8 +1921,6 @@ site_menu:
- category: SDK
url: /sdk
- color: violet-70
- icon: sdk
- category: SDK / Python SDK
url: /sdk/python
- category: SDK / Python SDK / Entities
diff --git a/openmetadata-docs/content/v1.5.x/Metapilot/how-to-use-metapilot.md b/openmetadata-docs/content/v1.5.x/Metapilot/how-to-use-metapilot.md
new file mode 100644
index 000000000000..3bf1b2a56350
--- /dev/null
+++ b/openmetadata-docs/content/v1.5.x/Metapilot/how-to-use-metapilot.md
@@ -0,0 +1,110 @@
+---
+title: How to Use MetaPilot
+slug: /metapilot/how-to-use-metapilot
+collate: true
+---
+
+# How to Use MetaPilot
+
+## 1. Setting Up MetaPilot
+- Navigate to **Settings > Applications** in the Collate platform.
+
+{% image
+src="/images/v1.5/metapilot/setting-up-metapilot-1.png"
+alt="setting up metapilot"
+caption="Navigate to Settings > Applications"
+/%}
+
+- Install MetaPilot by following the on-screen instructions.
+
+{% image
+src="/images/v1.5/metapilot/setting-up-metapilot-2.png"
+alt="Install MetaPilot"
+caption="Install MetaPilot"
+/%}
+
+- Select the databases for which you want MetaPilot to automatically generate descriptions. You can configure multiple databases and set a default database for the MetaPilot chatbot to work with.
+
+{% image
+src="/images/v1.5/metapilot/setting-up-metapilot-3.png"
+alt="automatically generate descriptions"
+caption="automatically generate descriptions"
+/%}
+
+- **Scheduling**: Schedule MetaPilot to run regularly, automatically generating metadata at predefined intervals (e.g., weekly).
+
+{% image
+src="/images/v1.5/metapilot/setting-up-metapilot-4.png"
+alt="Schedule MetaPilot"
+caption="Schedule MetaPilot"
+/%}
+
+## 2. Using the MetaPilot Chatbot
+- The MetaPilot chatbot icon appears on every page after installation.
+
+{% image
+src="/images/v1.5/metapilot/using-metapilot-chatbot-1.png"
+alt="chatbot icon"
+caption="chatbot icon"
+/%}
+
+- Interact with the chatbot by typing natural language questions. For example:
+ - “Show me sales data for Q1.”
+ - “What is the average revenue per customer?”
+
+- MetaPilot will generate the corresponding SQL query and provide a detailed explanation of the query logic.
+
+{% image
+src="/images/v1.5/metapilot/using-metapilot-chatbot-2.png"
+alt="natural language questions"
+caption="natural language questions"
+/%}
+
+- Users can refine queries by providing further instructions, and the chatbot will adjust the SQL query accordingly.
+
+## 3. Optimizing and Fixing SQL Queries
+
+- The **Metadata Usage** workflows will ingest the queries being run against the tables. You can see how long each query has been executed in the **Queries** tab.
+
+{% image
+src="/images/v1.5/metapilot/fixing-sql-queries-1.png"
+alt="metadata usage workflows"
+caption="metadata usage workflows"
+/%}
+
+- If a query runs inefficiently, ask the chatbot to optimize it by typing: “Optimize this query.”
+
+{% image
+src="/images/v1.5/metapilot/fixing-sql-queries-3.png"
+alt="Optimize this query"
+caption="Optimize this query"
+/%}
+
+- MetaPilot will return a more efficient version of the SQL query, which you can then copy and execute in your database.
+
+- If the query contains errors or isn’t functioning correctly, ask MetaPilot: “Can you fix this query?”
+
+- MetaPilot will correct the query and provide a working version.
+
+{% image
+src="/images/v1.5/metapilot/fixing-sql-queries-4.png"
+alt="Can you fix this query"
+caption="Can you fix this query"
+/%}
+
+## 4. Reviewing Generated Metadata
+- Once MetaPilot generates descriptions for tables and columns, navigate to the **database view** to review the metadata.
+- You can accept or reject each suggestion individually or choose to accept all suggestions in bulk.
+
+{% image
+src="/images/v1.5/metapilot/reviewing-generated-metadata.png"
+alt="reviewing Generated Metadata"
+caption="reviewing Generated Metadata"
+/%}
+
+- MetaPilot allows you to document entire datasets in a matter of minutes, significantly reducing the manual effort required to maintain metadata.
+
+## Best Practices
+- **Regular Scheduling**: Schedule MetaPilot to run at regular intervals to ensure your metadata is always up-to-date, especially when dealing with frequently changing datasets.
+- **Leverage the Chatbot for Query Writing**: Encourage both technical and non-technical users to use the MetaPilot chatbot for SQL query generation. It simplifies complex query writing and ensures accurate results.
+- **Optimize Queries Regularly**: Monitor the performance of your queries and use MetaPilot to optimize them, especially when working with large datasets or queries that require complex joins and filters.
diff --git a/openmetadata-docs/content/v1.5.x/Metapilot/index.md b/openmetadata-docs/content/v1.5.x/Metapilot/index.md
new file mode 100644
index 000000000000..43f031cf4e0a
--- /dev/null
+++ b/openmetadata-docs/content/v1.5.x/Metapilot/index.md
@@ -0,0 +1,85 @@
+---
+title: Metapilot
+slug: /metapilot
+collate: true
+---
+
+# Metapilot Technical Documentation
+
+{% youtube videoId="6glMYLzxNqk" start="0:00" end="04:20" width="560px" height="315px" /%}
+
+MetaPilot is an AI-powered tool within Collate that simplifies and enhances metadata management. By integrating generative AI, MetaPilot assists users in automating the documentation of data assets, writing and optimizing SQL queries, and interacting with data through natural language. This first-of-its-kind data copilot improves productivity by automating tedious tasks and providing intelligent insights into your data environment.
+
+## Key Features
+
+- **Automated Data Documentation**: Automatically generates metadata descriptions for tables and columns, saving time and effort for data owners and stewards.
+- **Natural Language SQL Query Generation**: Allows users to interact with MetaPilot through a chatbot to generate SQL queries by simply asking questions in plain English.
+- **SQL Query Optimization and Fixing**: Capable of optimizing and troubleshooting SQL queries to improve their performance and efficiency.
+
+## Why MetaPilot is Useful
+
+### Metadata Management Challenges
+
+Managing metadata across multiple data assets can be overwhelming due to the influx of new data and changing team dynamics. MetaPilot addresses these challenges by:
+
+- Automating metadata description generation.
+- Simplifying the creation and optimization of SQL queries.
+- Reducing manual effort and enhancing data quality.
+
+### Time-Saving Features
+
+Documenting thousands of tables manually is tedious and time-consuming. MetaPilot automates metadata generation, allowing data teams to focus on high-value tasks and ensuring that data assets are consistently documented and understood across the organization.
+
+## Use Cases
+
+### 1. Automatic Data Asset Documentation
+
+{% image
+src="/images/v1.5/metapilot/reviewing-generated-metadata.png"
+alt="Automatic Data Asset Documentation"
+caption="Auto Generate data Asset Documentation"
+/%}
+
+- **Problem**: Manually creating metadata descriptions for large datasets is labor-intensive and error-prone.
+- **Solution**: MetaPilot’s generative AI automates the process, analyzing database structures and suggesting accurate descriptions for tables and columns.
+- **How It Works**: After configuring MetaPilot, it scans the database schema and generates metadata descriptions automatically. Users can review these descriptions and approve or reject them in bulk or individually.
+- **Benefit**: Streamlines the documentation process, ensuring consistent and up-to-date metadata across all datasets.
+
+### 2. Natural Language SQL Query Generation
+
+{% image
+src="/images/v1.5/metapilot/using-metapilot-chatbot-2.png"
+alt="Natural Language SQL Query Generation"
+caption="Natural Language SQL Query Generation"
+/%}
+
+- **Problem**: Non-technical users often struggle with writing SQL queries to extract insights from databases.
+- **Solution**: MetaPilot’s chatbot allows users to ask questions in natural language, generating SQL queries and providing explanations to help extract the required data quickly and easily.
+- **How It Works**: Users can click on the MetaPilot chatbot widget and ask questions like "Show me sales data from last quarter." MetaPilot generates the corresponding SQL query and explains its logic.
+- **Benefit**: Democratizes data access by enabling users of all technical levels to interact with data without deep SQL knowledge.
+
+### 3. SQL Query Optimization and Troubleshooting
+
+{% image
+src="/images/v1.5/metapilot/fixing-sql-queries-3.png"
+alt="SQL Query Optimization and Troubleshooting"
+caption="SQL Query Optimization"
+/%}
+
+- **Problem**: SQL queries can become complex and inefficient, leading to performance issues and increased costs.
+- **Solution**: MetaPilot optimizes inefficient queries to improve performance, saving both time and resources.
+- **How It Works**: If a query runs too long, users can request MetaPilot to optimize it. MetaPilot provides a more efficient SQL query version that can be implemented immediately.
+- **Benefit**: Enhances query performance, reduces costs associated with inefficient queries, and speeds up data processing.
+
+### 4. Fixing SQL Queries
+
+{% image
+src="/images/v1.5/metapilot/fixing-sql-queries-4.png"
+alt="Fixing SQL Queries"
+caption="Fixing SQL Queries"
+/%}
+
+- **Problem**: Complex queries can cause issues even for SQL experts.
+- **Solution**: MetaPilot can fix problematic SQL queries, ensuring they run correctly and efficiently.
+- **How It Works**: Users can ask MetaPilot, "Can you fix this query for me?" It analyzes the query, detects issues, and returns a corrected version ready for use.
+- **Benefit**: Simplifies writing and maintaining queries, allowing data teams to focus on analysis rather than troubleshooting.
diff --git a/openmetadata-docs/content/v1.5.x/collate-menu.md b/openmetadata-docs/content/v1.5.x/collate-menu.md
index 21c35dfa2f7c..08d034076c29 100644
--- a/openmetadata-docs/content/v1.5.x/collate-menu.md
+++ b/openmetadata-docs/content/v1.5.x/collate-menu.md
@@ -5,8 +5,6 @@ site_menu:
- category: Getting Started
url: /getting-started
- color: violet-70
- icon: openmetadata
- category: Getting Started / Day 1
url: /getting-started/day-1
@@ -27,8 +25,6 @@ site_menu:
- category: Connectors
url: /connectors
- color: violet-70
- icon: openmetadata
- category: Connectors / Database
url: /connectors/database
@@ -498,8 +494,6 @@ site_menu:
- category: How-to Guides
url: /how-to-guides
- color: violet-70
- icon: openmetadata
- category: How-to Guides / Admin Guide
url: /how-to-guides/admin-guide
@@ -682,6 +676,10 @@ site_menu:
- category: How-to Guides / Data Governance
url: /how-to-guides/data-governance
+ - category: How-to Guides / Data Governance / Automation
+ url: /how-to-guides/data-governance/automation
+ - category: How-to Guides / Data Governance / Automation / How to Set Up Automations in Collate
+ url: /how-to-guides/data-governance/automation/set-up-automation
- category: How-to Guides / Data Governance / Glossary
url: /how-to-guides/data-governance/glossary
- category: How-to Guides / Data Governance / Glossary / What is a Glossary Term
@@ -725,8 +723,6 @@ site_menu:
- category: Enable Security
url: /security
- color: violet-70
- icon: deployment
- category: Enable Security / Basic Authentication
url: /security/basic-auth
@@ -759,8 +755,6 @@ site_menu:
- category: Releases
url: /releases
- color: violet-70
- icon: overview
- category: Releases / Latest Release
url: /releases/latest-release
- category: Releases / Supported Releases
@@ -855,8 +849,6 @@ site_menu:
- category: Main Concepts
url: /main-concepts
- color: violet-70
- icon: main-concepts
- category: Main Concepts / High Level Design
url: /main-concepts/high-level-design
- category: Main Concepts / Backend DB
@@ -1790,8 +1782,6 @@ site_menu:
- category: SDK
url: /sdk
- color: violet-70
- icon: sdk
- category: SDK / Python SDK
url: /sdk/python
- category: SDK / Python SDK / Entities
@@ -1870,4 +1860,8 @@ site_menu:
url: /sdk/java
- category: SDK / Go
url: /sdk/go
+ - category: Metapilot
+ url: /metapilot
+ - category: Metapilot / How to Use MetaPilot
+ url: /metapilot/how-to-use-metapilot
---
diff --git a/openmetadata-docs/content/v1.5.x/connectors/ingestion/lineage/spark-lineage.md b/openmetadata-docs/content/v1.5.x/connectors/ingestion/lineage/spark-lineage.md
index 3e48cfc5eaa0..43a7ebf8cc44 100644
--- a/openmetadata-docs/content/v1.5.x/connectors/ingestion/lineage/spark-lineage.md
+++ b/openmetadata-docs/content/v1.5.x/connectors/ingestion/lineage/spark-lineage.md
@@ -343,3 +343,40 @@ spark.openmetadata.transport.timeout 30
```
After all these steps are completed you can start/restart your compute instance and you are ready to extract the lineage from spark to OpenMetadata.
+
+
+## Using Spark Agent with Glue
+
+Follow the below steps in order to use OpenMetadata Spark Agent with glue.
+
+### 1. Specify the OpenMetadata Spark Agent JAR URL
+
+1. Upload the OpenMetadata Spark Agent Jar to S3
+2. Navigate to the glue job,In the Job details tab, navigate to Advanced properties → Libraries → Dependent Jars path
+3. Add the S3 url of OpenMetadata Spark Agent Jar in the Dependent Jars path.
+
+{% image
+ src="/images/v1.5/connectors/spark/glue-job-jar.png"
+ alt="Glue Job Configure Jar"
+ caption="Glue Job Configure Jar"
+ /%}
+
+
+### 2. Add Spark configuration in Job Parameters
+
+In the same Job details tab, add a new property under Job parameters:
+
+1. Add the `--conf` property with following value, make sure to customize this configuration as described in the above documentation.
+
+```
+spark.extraListeners=org.openmetadata.spark.agent.OpenMetadataSparkListener --conf spark.openmetadata.transport.hostPort=https://your-org.host:port --conf spark.openmetadata.transport.type=openmetadata --conf spark.openmetadata.transport.jwtToken= --conf spark.openmetadata.transport.pipelineServiceName=glue_spark_pipeline_service --conf spark.openmetadata.transport.pipelineName=glue_pipeline_name --conf spark.openmetadata.transport.timeout=30
+```
+
+2. Add the `--user-jars-first` parameter and set its value to `true`
+
+{% image
+ src="/images/v1.5/connectors/spark/glue-job-params.png"
+ alt="Glue Job Configure Params"
+ caption="Glue Job Configure Params"
+ /%}
+
diff --git a/openmetadata-docs/content/v1.5.x/connectors/metadata/alation/index.md b/openmetadata-docs/content/v1.5.x/connectors/metadata/alation/index.md
index e732fe1d71ab..a7ff96c95ded 100644
--- a/openmetadata-docs/content/v1.5.x/connectors/metadata/alation/index.md
+++ b/openmetadata-docs/content/v1.5.x/connectors/metadata/alation/index.md
@@ -122,9 +122,9 @@ Choose either postgres or mysql connection depending on the db:
**ingestDatasources**: Specifies if databases, schemas and tables should be included while ingesting. By default is set to `true`.
-**ingestDomains**: Specifies if hidden domains and subdomains should be included while ingesting. By default is set to `true`.
+**ingestDomains**: Specifies if domains and subdomains should be included while ingesting. By default is set to `true`.
-**ingestDashboards**: Specifies if hidden BI sources and dashboards should be included while ingesting. By default is set to `true`.
+**ingestDashboards**: Specifies if BI sources and dashboards should be included while ingesting. By default is set to `true`.
**alationTagClassificationName**: Specify the classification name under which the tags from alation will be created in OpenMetadata. By default it is set to `alationTags`.
diff --git a/openmetadata-docs/content/v1.5.x/connectors/pipeline/glue-pipeline/yaml.md b/openmetadata-docs/content/v1.5.x/connectors/pipeline/glue-pipeline/yaml.md
index 5898468a0f98..4468dc424867 100644
--- a/openmetadata-docs/content/v1.5.x/connectors/pipeline/glue-pipeline/yaml.md
+++ b/openmetadata-docs/content/v1.5.x/connectors/pipeline/glue-pipeline/yaml.md
@@ -112,11 +112,11 @@ This is a sample config for Glue:
```yaml {% isCodeBlock=true %}
source:
- type: glue
+ type: gluepipeline
serviceName: local_glue
serviceConnection:
config:
- type: Glue
+ type: GluePipeline
awsConfig:
```
```yaml {% srNumber=1 %}
diff --git a/openmetadata-docs/content/v1.5.x/deployment/backup-restore-metadata.md b/openmetadata-docs/content/v1.5.x/deployment/backup-restore-metadata.md
index 5e704c7e56de..306423cf28f8 100644
--- a/openmetadata-docs/content/v1.5.x/deployment/backup-restore-metadata.md
+++ b/openmetadata-docs/content/v1.5.x/deployment/backup-restore-metadata.md
@@ -70,15 +70,16 @@ Ingest some data...
```shell
BACKUP_FILE="backup_$(date +%Y%m%d%H%M).sql"
-DOCKER_COMPOSE_FILE="docker/development/docker-compose.yml"
+export COMPOSE_FILE="docker/development/docker-compose.yml"
# backup
-docker compose -f $DOCKER_COMPOSE_FILE exec ingestion mysqldump --no-tablespaces -u openmetadata_user -popenmetadata_password -h mysql -P 3306 openmetadata_db > $BACKUP_FILE
+docker compose exec ingestion mysqldump --no-tablespaces -u openmetadata_user -popenmetadata_password -h mysql -P 3306 openmetadata_db > $BACKUP_FILE
# create the restore database
-docker compose -f $DOCKER_COMPOSE_FILE exec mysql mysql -u root -ppassword -e "create database restore;"
-docker compose -f $DOCKER_COMPOSE_FILE exec mysql mysql -u root -ppassword -e "grant all privileges on restore.* to 'openmetadata_user'@'%';"
-docker compose -f $DOCKER_COMPOSE_FILE exec mysql mysql -u root -ppassword -e "flush privileges;"
+docker compose exec mysql mysql -u root -ppassword -e "create database restore;"
+docker compose exec mysql mysql -u root -ppassword -e "grant all privileges on restore.* to 'openmetadata_user'@'%';"
+docker compose exec mysql mysql -u root -ppassword -e "GRANT SUPER, SYSTEM_VARIABLES_ADMIN, SESSION_VARIABLES_ADMIN ON *.* TO 'openmetadata_user'@'%';"
+docker compose exec mysql mysql -u root -ppassword -e "flush privileges;"
# restore from the backup
-docker compose -f $DOCKER_COMPOSE_FILE exec -T ingestion mysql -u openmetadata_user -popenmetadata_password -h mysql -P 3306 restore < $BACKUP_FILE
+docker compose exec -T ingestion mysql -u openmetadata_user -popenmetadata_password -h mysql -P 3306 restore < $BACKUP_FILE
```
### 3. Restart the docker deployment with the restored database
@@ -102,14 +103,14 @@ Ingest some data...
```shell
BACKUP_FILE="backup_$(date +%Y%m%d%H%M).sql"
-DOCKER_COMPOSE_FILE="docker/development/docker-compose-postgres.yml"
+export COMPOSE_FILE="docker/development/docker-compose-postgres.yml"
# backup
-docker compose -f $DOCKER_COMPOSE_FILE exec -e PGPASSWORD=openmetadata_password ingestion pg_dump -U openmetadata_user -h postgresql -d openmetadata_db > $BACKUP_FILE
+docker compose exec -e PGPASSWORD=openmetadata_password ingestion pg_dump -U openmetadata_user -h postgresql -d openmetadata_db > $BACKUP_FILE
# create the restore database
-docker compose -f $DOCKER_COMPOSE_FILE exec -e PGPASSWORD=openmetadata_password postgresql psql -U postgres -c "create database restore;"
-docker compose -f $DOCKER_COMPOSE_FILE exec -e PGPASSWORD=openmetadata_password postgresql psql -U postgres -c "ALTER DATABASE restore OWNER TO openmetadata_user;"
+docker compose exec -e PGPASSWORD=openmetadata_password postgresql psql -U postgres -c "create database restore;"
+docker compose exec -e PGPASSWORD=openmetadata_password postgresql psql -U postgres -c "ALTER DATABASE restore OWNER TO openmetadata_user;"
# restore from the backup
-docker compose -f $DOCKER_COMPOSE_FILE exec -e PGPASSWORD=openmetadata_password -T ingestion psql -U openmetadata_user -h postgresql -d restore < $BACKUP_FILE
+docker compose exec -e PGPASSWORD=openmetadata_password -T ingestion psql -U openmetadata_user -h postgresql -d restore < $BACKUP_FILE
```
### 3. Restart the docker deployment with the restored database
diff --git a/openmetadata-docs/content/v1.5.x/deployment/bare-metal/index.md b/openmetadata-docs/content/v1.5.x/deployment/bare-metal/index.md
index c993747ef63c..180b1a29c9d5 100644
--- a/openmetadata-docs/content/v1.5.x/deployment/bare-metal/index.md
+++ b/openmetadata-docs/content/v1.5.x/deployment/bare-metal/index.md
@@ -58,7 +58,7 @@ If you are facing an error `ERROR: could not compute MD5 hash: disabled for FIPS
## Elasticsearch (version 8.X)
-OpenMetadata supports ElasticSearch version up to 8.10.2. To install or upgrade Elasticsearch to a supported version please see the instructions for your operating system at
+OpenMetadata supports ElasticSearch version up to 8.11.4. To install or upgrade Elasticsearch to a supported version please see the instructions for your operating system at
[Installing ElasticSearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html).
Please follow the instructions here to [install ElasticSearch](https://www.elastic.co/guide/en/elasticsearch/reference/7.13/setup.html).
@@ -174,7 +174,7 @@ If you are running OpenMetadata in AWS, it is recommended to use [Amazon RDS](ht
We support
- Amazon RDS (MySQL) engine version 8 or higher
-- Amazon OpenSearch (ElasticSearch) engine version up to 8.10.2 or Amazon OpenSearch engine version up to 2.7
+- Amazon OpenSearch (ElasticSearch) engine version up to 8.11.4 or Amazon OpenSearch engine version up to 2.7
- Amazon RDS (PostgreSQL) engine version between 12 or higher
For Production Systems, we recommend Amazon RDS to be in Multiple Availability Zones. For Amazon OpenSearch (or ElasticSearch) Service, we recommend Multiple Availability Zones with minimum 3 Master Nodes.
diff --git a/openmetadata-docs/content/v1.5.x/deployment/docker/index.md b/openmetadata-docs/content/v1.5.x/deployment/docker/index.md
index b5a8227e3645..0e3689962547 100644
--- a/openmetadata-docs/content/v1.5.x/deployment/docker/index.md
+++ b/openmetadata-docs/content/v1.5.x/deployment/docker/index.md
@@ -244,7 +244,7 @@ If you are running OpenMetadata in AWS, it is recommended to use [Amazon RDS](ht
We support
- Amazon RDS (MySQL) engine version 8 or higher
-- Amazon OpenSearch (ElasticSearch) engine version up to 8.10.2 or Amazon OpenSearch engine version up to 2.7
+- Amazon OpenSearch (ElasticSearch) engine version up to 8.11.4 or Amazon OpenSearch engine version up to 2.7
- Amazon RDS (PostgreSQL) engine version 12 or higher
Note:-
diff --git a/openmetadata-docs/content/v1.5.x/deployment/kubernetes/aks.md b/openmetadata-docs/content/v1.5.x/deployment/kubernetes/aks.md
index 5c4abeb5b242..39d3402c610b 100644
--- a/openmetadata-docs/content/v1.5.x/deployment/kubernetes/aks.md
+++ b/openmetadata-docs/content/v1.5.x/deployment/kubernetes/aks.md
@@ -16,7 +16,7 @@ We support
- Azure SQL (MySQL) engine version 8 or higher
- Azure SQL (PostgreSQL) engine version 12 or higher
-- Elastic Cloud (ElasticSearch version 8.10.2)
+- Elastic Cloud (ElasticSearch version 8.11.4)
Once you have the Azure SQL and Elastic Cloud on Azure configured, you can update the environment variables below for OpenMetadata kubernetes deployments to connect with Database and ElasticSearch.
diff --git a/openmetadata-docs/content/v1.5.x/deployment/kubernetes/on-prem.md b/openmetadata-docs/content/v1.5.x/deployment/kubernetes/on-prem.md
index e231b1440d6d..ed0217709f4d 100644
--- a/openmetadata-docs/content/v1.5.x/deployment/kubernetes/on-prem.md
+++ b/openmetadata-docs/content/v1.5.x/deployment/kubernetes/on-prem.md
@@ -23,7 +23,7 @@ We support
- MySQL engine version 8 or higher
- PostgreSQL engine version 12 or higher
-- ElasticSearch version 8.X (upto 8.10.2) or OpenSearch Version 2.X (upto 2.7)
+- ElasticSearch version 8.X (upto 8.11.4) or OpenSearch Version 2.X (upto 2.7)
Once you have the External Database and Search Engine configured, you can update the environment variables below for OpenMetadata kubernetes deployments to connect with Database and ElasticSearch.
diff --git a/openmetadata-docs/content/v1.5.x/deployment/upgrade/versions/110-to-120.md b/openmetadata-docs/content/v1.5.x/deployment/upgrade/versions/110-to-120.md
index b661db7eb18e..c8a61dba23a4 100644
--- a/openmetadata-docs/content/v1.5.x/deployment/upgrade/versions/110-to-120.md
+++ b/openmetadata-docs/content/v1.5.x/deployment/upgrade/versions/110-to-120.md
@@ -161,7 +161,7 @@ For Kubernetes Deployment, `openmetadata.config.database.dbParams` is available
### Version Upgrades
- The OpenMetadata Server is now based on **JDK 17**
-- OpenMetadata now **requires** **Elasticsearch** version **8.10.2** or **Opensearch** version **2.7**
+- OpenMetadata now **requires** **Elasticsearch** version **8.11.4** or **Opensearch** version **2.7**
There is no direct migration to bump the indexes to the new supported versions. You might see errors like:
diff --git a/openmetadata-docs/content/v1.5.x/how-to-guides/data-governance/automation/index.md b/openmetadata-docs/content/v1.5.x/how-to-guides/data-governance/automation/index.md
new file mode 100644
index 000000000000..c48e1bee9516
--- /dev/null
+++ b/openmetadata-docs/content/v1.5.x/how-to-guides/data-governance/automation/index.md
@@ -0,0 +1,78 @@
+---
+title: Collate Automations Documentation
+slug: /how-to-guides/data-governance/automation
+collate: true
+---
+
+# Collate Automations
+
+{% youtube videoId="ug08aLUyTyE" start="0:00" end="14:52" width="560px" height="315px" /%}
+
+## Overview
+
+Collate's **Automation** feature is a powerful tool designed to simplify and streamline metadata management tasks. By automating repetitive actions such as assigning owners, domains, or tagging data, Collate helps maintain consistency in metadata across an organization's datasets. These automations reduce manual effort and ensure that metadata is always up-to-date, accurate, and governed according to predefined policies.
+
+## Why Automations are Useful
+
+Managing metadata manually can be challenging, particularly in dynamic environments where data constantly evolves. Collate's Automation feature addresses several key pain points:
+
+- **Maintaining Consistency**: Automation helps ensure that metadata such as ownership, tags, and descriptions are applied consistently across all data assets.
+- **Saving Time**: Automations allow data teams to focus on higher-value tasks by eliminating the need for manual updates and maintenance.
+- **Enforcing Governance Policies**: Automations help ensure that data follows organizational policies at all times by automatically applying governance rules (e.g., assigning data owners or domains).
+- **Data Quality and Accountability**: Data quality suffers without clear ownership. Automating ownership assignments helps ensure that data quality issues are addressed efficiently.
+
+## Key Use Cases for Collate Automations
+
+### 1. Bulk Ownership and Domain Assignment
+
+{% image
+src="/images/v1.5/how-to-guides/governance/bulk-ownership-and.png"
+alt="Getting started with Automation"
+caption="Getting started with Automation"
+/%}
+
+- **Problem**: Many data assets lack proper ownership and domain assignment, leading to governance and accountability issues. Manually assigning owners can be error-prone and time-consuming.
+- **Solution**: Automations can bulk-assign ownership and domains to datasets, ensuring all data assets are correctly categorized and owned. This process can be applied to tables, schemas, or other assets within Collate.
+- **Benefit**: This use case ensures data assets have a designated owner and are organized under the appropriate domain, making data more discoverable and accountable.
+
+### 2. Bulk Tagging and Glossary Term Assignment
+
+{% image
+src="/images/v1.5/how-to-guides/governance/bulk-tagging-glossary.png"
+alt="Getting started with Automation"
+caption="Getting started with Automation"
+/%}
+
+- **Problem**: Manually applying the same tags or glossary terms to multiple datasets can be inefficient and inconsistent.
+- **Solution**: Automations allow users to bulk-apply tags (e.g., PII) or glossary terms (e.g., Customer ID) to specific datasets, ensuring uniformity across the platform.
+- **Benefit**: This automation reduces the risk of missing important tags like PII-sensitive and ensures that key metadata elements are applied consistently across datasets.
+
+### 3. Metadata Propagation via Lineage
+
+{% image
+src="/images/v1.5/how-to-guides/governance/metadata-propogation.png"
+alt="Getting started with Automation"
+caption="Getting started with Automation"
+/%}
+
+- **Problem**: When metadata such as tags, descriptions, or glossary terms are updated in one part of the data lineage, they may not be propagated across related datasets, leading to inconsistencies.
+- **Solution**: Use automations to propagate metadata across related datasets, ensuring that all relevant data inherits the correct metadata properties from the source dataset.
+- **Benefit**: Metadata consistency is ensured across the entire data lineage, reducing the need for manual updates and maintaining a single source of truth.
+
+### 4. Automatic PII Detection and Tagging
+
+{% image
+src="/images/v1.5/how-to-guides/governance/automatic-detection.png"
+alt="Getting started with Automation"
+caption="Getting started with Automation"
+/%}
+
+- **Problem**: Manually identifying and tagging Personally Identifiable Information (PII) across large datasets is labor-intensive and prone to errors.
+- **Solution**: Automations can automatically detect PII data (e.g., emails, usernames) and apply relevant tags to ensure that sensitive data is flagged appropriately for compliance.
+- **Benefit**: Ensures compliance with data protection regulations by consistently tagging sensitive data, reducing the risk of non-compliance.
+
+## Best Practices
+
+- **Validate Assets Before Applying Actions**: Always use the **Explore** page to verify the assets that will be affected by the automation. This ensures that only the intended datasets are updated.
+- **Use Automation Logs**: Regularly check the **Recent Runs** logs to monitor automation activity and ensure that they are running as expected.
+- **Propagate Metadata Thoughtfully**: When propagating metadata via lineage, make sure that the source metadata is correct before applying it across multiple datasets.
diff --git a/openmetadata-docs/content/v1.5.x/how-to-guides/data-governance/automation/set-up-automation.md b/openmetadata-docs/content/v1.5.x/how-to-guides/data-governance/automation/set-up-automation.md
new file mode 100644
index 000000000000..0b57f0a55b05
--- /dev/null
+++ b/openmetadata-docs/content/v1.5.x/how-to-guides/data-governance/automation/set-up-automation.md
@@ -0,0 +1,67 @@
+---
+title: How to Set Up Automations in OpenMetadata
+slug: /how-to-guides/data-governance/automation/set-up-automation
+collate: true
+---
+
+# How to Set Up Automations in Collate
+
+### Step 1: Access the Automations Section
+In the OpenMetadata UI, navigate to **Govern>Automations**.
+This will take you to the Automations page where you can view and manage your existing automations.
+
+{% image
+src="/images/v1.5/how-to-guides/governance/automation-1.png"
+alt="Getting started with Automation"
+caption="Getting started with Automation"
+/%}
+
+### Step 2: Add a New Automation
+In the Automations page, click the **Add Automation** button located at the top right of the page.
+A pop-up window will appear to begin the process of adding a new automation.
+
+{% image
+src="/images/v1.5/how-to-guides/governance/automation-2.png"
+alt="Add Automation"
+caption="Add Automation"
+/%}
+
+### Step 3: Fill in Automation Details
+In the pop-up window, provide the necessary information to set up the automation:
+- **Automation Name**: Give a meaningful name to the automation for easy identification.
+- **Description**: Add a brief description explaining what this automation will do (e.g., "Daily metadata ingestion for database XYZ").
+- **Logic/Conditions**: Define any conditions or specific criteria needed for this automation to work (e.g., specific tables or columns to be included).
+ Ensure that the logic is set up as per your specific requirements to make the automation useful for your workflows.
+
+{% image
+src="/images/v1.5/how-to-guides/governance/automation-4.png"
+alt="Automation details"
+caption="Automation details"
+/%}
+
+{% image
+src="/images/v1.5/how-to-guides/governance/automation-5.png"
+alt="Automation logics"
+caption="Automation logics"
+/%}
+
+### Step 4: Configure Automation Interval
+Once you've filled in the required details, click **Next**.
+On the next page, you’ll be prompted to select the interval for the automation. This defines how frequently the automation should run (e.g., daily, weekly, or custom intervals).
+Review your settings and click **Automate** once you are satisfied with the configuration.
+
+{% image
+src="/images/v1.5/how-to-guides/governance/automation-6.png"
+alt="Automation Interval"
+caption="Automation Interval"
+/%}
+
+### Step 5: Manage Your Automation
+After completing the setup, your automation will appear in the Automations list.
+To manage the automation, click on the three dots next to the automation entry. From here, you can **edit**, **re-deploy**, **delete**, etc.
+
+{% image
+src="/images/v1.5/how-to-guides/governance/automation-7.png"
+alt="Manage Your Automation"
+caption="Manage Your Automation"
+/%}
diff --git a/openmetadata-docs/content/v1.5.x/how-to-guides/data-quality-observability/quality/adding-test-suits.md b/openmetadata-docs/content/v1.5.x/how-to-guides/data-quality-observability/quality/adding-test-suites.md
similarity index 96%
rename from openmetadata-docs/content/v1.5.x/how-to-guides/data-quality-observability/quality/adding-test-suits.md
rename to openmetadata-docs/content/v1.5.x/how-to-guides/data-quality-observability/quality/adding-test-suites.md
index bcaa39b7b443..78d7a89f9c0d 100644
--- a/openmetadata-docs/content/v1.5.x/how-to-guides/data-quality-observability/quality/adding-test-suits.md
+++ b/openmetadata-docs/content/v1.5.x/how-to-guides/data-quality-observability/quality/adding-test-suites.md
@@ -1,6 +1,6 @@
---
-title: Adding test suits through the UI
-slug: /how-to-guides/data-quality-observability/quality/adding-test-suits
+title: Adding test suites through the UI
+slug: /how-to-guides/data-quality-observability/quality/adding-test-suites
---
# Adding Test Suites Through the UI
diff --git a/openmetadata-docs/content/v1.5.x/how-to-guides/index.md b/openmetadata-docs/content/v1.5.x/how-to-guides/index.md
index 84af90a4eb6a..fcc33a8cc675 100644
--- a/openmetadata-docs/content/v1.5.x/how-to-guides/index.md
+++ b/openmetadata-docs/content/v1.5.x/how-to-guides/index.md
@@ -5,19 +5,6 @@ slug: /how-to-guides
# How-to Guides
-## Getting Started
-
-Set up and explore OpenMetadata's core features, from basic configuration to advanced functionalities, for a seamless onboarding experience.
-
-{% tilesContainer %}
-{% tile
- title="Getting Started"
- description="Unlock metadata insights for informed business decisions."
- link="/how-to-guides/getting-started"
- icon="discovery"
-/%}
-{% /tilesContainer %}
-
The How-to Guides will give you a walk through on accomplishing the basic to the most advanced things in OpenMetadata. These step-by-step guides will help get an overview of the features and also help explore the various functionalities.
## Features in OpenMetadata
diff --git a/openmetadata-docs/content/v1.5.x/menu.md b/openmetadata-docs/content/v1.5.x/menu.md
index 14eac2bf02f8..d42b506dbba7 100644
--- a/openmetadata-docs/content/v1.5.x/menu.md
+++ b/openmetadata-docs/content/v1.5.x/menu.md
@@ -5,19 +5,21 @@ site_menu:
- category: Quickstart
url: /quick-start
- color: violet-70
- icon: quickstart
- category: Quickstart / Try OpenMetadata in Docker
url: /quick-start/local-docker-deployment
- category: Quickstart / Try OpenMetadata in Kubernetes
url: /quick-start/local-kubernetes-deployment
- category: Quickstart / Try the OpenMetadata Sandbox
url: /quick-start/sandbox
+ - category: Quickstart / Getting Started
+ url: /quick-start/getting-started
+ - category: Quickstart / Day 1
+ url: /quick-start/getting-started/day-1
+ - category: Quickstart / Day 1 / Database Service Setup
+ url: /quick-start/getting-started/day-1/database-service-setup
- category: Deployment
url: /deployment
- color: violet-70
- icon: deployment
- category: Deployment / Bare Metal Deployment
url: /deployment/bare-metal
- category: Deployment / Bare Metal Deployment / Enable Security
@@ -215,8 +217,6 @@ site_menu:
- category: Connectors
url: /connectors
- color: violet-70
- icon: openmetadata
- category: Connectors / Database
url: /connectors/database
@@ -386,12 +386,6 @@ site_menu:
url: /connectors/database/sqlite
- category: Connectors / Database / SQLite / Run Externally
url: /connectors/database/sqlite/yaml
- - category: Connectors / Database / Synapse
- url: /connectors/database/synapse
- - category: Connectors / Database / Synapse / Run Externally
- url: /connectors/database/synapse/yaml
- - category: Connectors / Database / Synapse / Troubleshooting
- url: /connectors/database/synapse/troubleshooting
- category: Connectors / Database / S3 Datalake
url: /connectors/database/s3-datalake
- category: Connectors / Database / S3 Datalake / Run Externally
@@ -577,10 +571,6 @@ site_menu:
url: /connectors/storage/gcs
- category: Connectors / Storage / GCS / Run Externally
url: /connectors/storage/gcs/yaml
- - category: Connectors / Storage / ADLS
- url: /connectors/storage/adls
- - category: Connectors / Storage / ADLS / Run Externally
- url: /connectors/storage/adls/yaml
- category: Connectors / Search
url: /connectors/search
@@ -599,10 +589,6 @@ site_menu:
url: /connectors/metadata/atlas
- category: Connectors / Metadata / Atlas / Run Externally
url: /connectors/metadata/atlas/yaml
- - category: Connectors / Metadata / Alation
- url: /connectors/metadata/alation
- - category: Connectors / Metadata / Alation / Run Externally
- url: /connectors/metadata/alation/yaml
- category: Connectors / Metadata / Alation Sink
url: /connectors/metadata/alationsink
- category: Connectors / Metadata / Alation Sink / Run Externally
@@ -694,23 +680,7 @@ site_menu:
- category: How-to Guides
url: /how-to-guides
- color: violet-70
- icon: openmetadata
- - category: How-to Guides / Data Quality Observability / Visualize
- url: /how-to-guides/data-quality-observability/visualize
- - category: How-to Guides / Data Quality Observability / Test Cases From YAML Config
- url: /how-to-guides/data-quality-observability/quality/test-cases-from-yaml-config
- - category: How-to Guides / Data Quality Observability / Adding Test Suits
- url: /how-to-guides/data-quality-observability/quality/adding-test-suits
- - category: How-to Guides / Data Quality Observability / Adding Test Cases
- url: /how-to-guides/data-quality-observability/quality/adding-test-cases
- - category: How-to Guides / Getting Started
- url: /how-to-guides/getting-started
- - category: How-to Guides / Day 1
- url: /how-to-guides/getting-started/day-1
- - category: How-to Guides / Day 1 / Database Service Setup
- url: /how-to-guides/getting-started/day-1/database-service-setup
- category: How-to Guides / Admin Guide
url: /how-to-guides/admin-guide
- category: How-to Guides / Admin Guide / How to Ingest Metadata
@@ -822,6 +792,14 @@ site_menu:
url: /how-to-guides/data-quality-observability/quality/test
- category: How-to Guides / Data Quality and Observability / Data Quality / Configure Data Quality
url: /how-to-guides/data-quality-observability/quality/configure
+ - category: How-to Guides / Data Quality Observability / Data Quality / Adding Test Cases
+ url: /how-to-guides/data-quality-observability/quality/adding-test-cases
+ - category: How-to Guides / Data Quality Observability / Data Quality / Adding Test Suites
+ url: /how-to-guides/data-quality-observability/quality/adding-test-suites
+ - category: How-to Guides / Data Quality Observability / Data Quality / Test Cases From YAML Config
+ url: /how-to-guides/data-quality-observability/quality/test-cases-from-yaml-config
+ - category: How-to Guides / Data Quality Observability / Data Quality / How to Visualize Test Results
+ url: /how-to-guides/data-quality-observability/quality/visualize
- category: How-to Guides / Data Quality and Observability / Data Quality / Tests - YAML Config
url: /how-to-guides/data-quality-observability/quality/tests-yaml
- category: How-to Guides / Data Quality and Observability / Data Quality / Custom Tests
@@ -931,8 +909,6 @@ site_menu:
- category: Releases
url: /releases
- color: violet-70
- icon: overview
- category: Releases / Latest Release
url: /releases/latest-release
- category: Releases / Supported Releases
@@ -1027,8 +1003,6 @@ site_menu:
- category: Main Concepts
url: /main-concepts
- color: violet-70
- icon: main-concepts
- category: Main Concepts / High Level Design
url: /main-concepts/high-level-design
- category: Main Concepts / Backend DB
@@ -1962,8 +1936,6 @@ site_menu:
- category: Developers
url: /developers
- color: violet-70
- icon: developers
- category: Developers / Architecture
url: /developers/architecture
- category: Developers / Architecture / Understand Code Layout
@@ -2011,8 +1983,6 @@ site_menu:
- category: SDK
url: /sdk
- color: violet-70
- icon: sdk
- category: SDK / Python SDK
url: /sdk/python
- category: SDK / Python SDK / Entities
diff --git a/openmetadata-docs/content/v1.5.x/how-to-guides/getting-started/day-1/database-service-setup.md b/openmetadata-docs/content/v1.5.x/quick-start/getting-started/day-1/database-service-setup.md
similarity index 93%
rename from openmetadata-docs/content/v1.5.x/how-to-guides/getting-started/day-1/database-service-setup.md
rename to openmetadata-docs/content/v1.5.x/quick-start/getting-started/day-1/database-service-setup.md
index a67d0dc90c28..c2c3340fb30c 100644
--- a/openmetadata-docs/content/v1.5.x/how-to-guides/getting-started/day-1/database-service-setup.md
+++ b/openmetadata-docs/content/v1.5.x/quick-start/getting-started/day-1/database-service-setup.md
@@ -1,6 +1,6 @@
---
title: Database service setup
-slug: /how-to-guides/getting-started/day-1/database-service-setup
+slug: /quick-start/getting-started/day-1/database-service-setup
---
## Setting Up a Database Service for Metadata Extraction
@@ -41,4 +41,4 @@ Click Test Connection to verify the setup. This will check if OpenMetadata can r
{% image
src="/images/v1.5/getting-started/test-connection.png"
alt="Verifying the Test Connection"
- caption="Verifying the Test Connection" /%}
+ caption="Verifying the Test Connection" /%}
\ No newline at end of file
diff --git a/openmetadata-docs/content/v1.5.x/how-to-guides/getting-started/day-1/index.md b/openmetadata-docs/content/v1.5.x/quick-start/getting-started/day-1/index.md
similarity index 98%
rename from openmetadata-docs/content/v1.5.x/how-to-guides/getting-started/day-1/index.md
rename to openmetadata-docs/content/v1.5.x/quick-start/getting-started/day-1/index.md
index 6019df091257..5a4af7e59086 100644
--- a/openmetadata-docs/content/v1.5.x/how-to-guides/getting-started/day-1/index.md
+++ b/openmetadata-docs/content/v1.5.x/quick-start/getting-started/day-1/index.md
@@ -1,6 +1,6 @@
---
title: Day 1
-slug: /how-to-guides/getting-started/day-1
+slug: /quick-start/getting-started/day-1
---
# Getting Started: Day 1
@@ -39,7 +39,7 @@ There's two options on how to set up a data connector:
{% tile
title="Run the connector in OpenMetadata"
description="Guide to start ingesting metadata seamlessly from your data sources."
- link="/how-to-guides/getting-started/day-1/database-service-setup"
+ link="/quick-start/getting-started/day-1/database-service-setup"
icon="discovery"
/%}
{% /tilesContainer %}
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/getting-started/index.md b/openmetadata-docs/content/v1.5.x/quick-start/getting-started/index.md
similarity index 97%
rename from openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/getting-started/index.md
rename to openmetadata-docs/content/v1.5.x/quick-start/getting-started/index.md
index d1044aa836fd..778794a85fd3 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/getting-started/index.md
+++ b/openmetadata-docs/content/v1.5.x/quick-start/getting-started/index.md
@@ -1,6 +1,6 @@
---
title: Getting Started with OpenMetadata for Data cataloging
-slug: /how-to-guides/getting-started
+slug: /quick-start/getting-started
---
# Getting Started
@@ -65,7 +65,7 @@ In this section, you will find a series of guides to help you get started with O
{% tile
title="Day 1: Connect your Data Sources and invite users"
description="Discover the right data assets to make timely business decisions."
- link="/how-to-guides/getting-started/day-1"
+ link="/quick-start/getting-started/day-1"
icon="discovery"
/%}
{% /tilesContainer %}
diff --git a/openmetadata-docs/content/v1.5.x/quick-start/index.md b/openmetadata-docs/content/v1.5.x/quick-start/index.md
index 9bd90f0eff7a..95b2873e39cc 100644
--- a/openmetadata-docs/content/v1.5.x/quick-start/index.md
+++ b/openmetadata-docs/content/v1.5.x/quick-start/index.md
@@ -44,3 +44,14 @@ Get OpenMetadata up and running with kubernetes in under 5 minutes!
{% /inlineCallout %}
{% /inlineCalloutContainer %}
+
+Set up and explore OpenMetadata's core features, from basic configuration to advanced functionalities, for a seamless onboarding experience.
+
+{% tilesContainer %}
+{% tile
+ title="Getting Started"
+ description="Unlock metadata insights for informed business decisions."
+ link="/how-to-guides/getting-started"
+ icon="discovery"
+/%}
+{% /tilesContainer %}
\ No newline at end of file
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/Metapilot/how-to-use-metapilot.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/Metapilot/how-to-use-metapilot.md
new file mode 100644
index 000000000000..e78793cca165
--- /dev/null
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/Metapilot/how-to-use-metapilot.md
@@ -0,0 +1,110 @@
+---
+title: How to Use MetaPilot
+slug: /metapilot/how-to-use-metapilot
+collate: true
+---
+
+# How to Use MetaPilot
+
+## 1. Setting Up MetaPilot
+- Navigate to **Settings > Applications** in the Collate platform.
+
+{% image
+src="/images/v1.6/metapilot/setting-up-metapilot-1.png"
+alt="setting up metapilot"
+caption="Navigate to Settings > Applications"
+/%}
+
+- Install MetaPilot by following the on-screen instructions.
+
+{% image
+src="/images/v1.6/metapilot/setting-up-metapilot-2.png"
+alt="Install MetaPilot"
+caption="Install MetaPilot"
+/%}
+
+- Select the databases for which you want MetaPilot to automatically generate descriptions. You can configure multiple databases and set a default database for the MetaPilot chatbot to work with.
+
+{% image
+src="/images/v1.6/metapilot/setting-up-metapilot-3.png"
+alt="automatically generate descriptions"
+caption="automatically generate descriptions"
+/%}
+
+- **Scheduling**: Schedule MetaPilot to run regularly, automatically generating metadata at predefined intervals (e.g., weekly).
+
+{% image
+src="/images/v1.6/metapilot/setting-up-metapilot-4.png"
+alt="Schedule MetaPilot"
+caption="Schedule MetaPilot"
+/%}
+
+## 2. Using the MetaPilot Chatbot
+- The MetaPilot chatbot icon appears on every page after installation.
+
+{% image
+src="/images/v1.6/metapilot/using-metapilot-chatbot-1.png"
+alt="chatbot icon"
+caption="chatbot icon"
+/%}
+
+- Interact with the chatbot by typing natural language questions. For example:
+ - “Show me sales data for Q1.”
+ - “What is the average revenue per customer?”
+
+- MetaPilot will generate the corresponding SQL query and provide a detailed explanation of the query logic.
+
+{% image
+src="/images/v1.6/metapilot/using-metapilot-chatbot-2.png"
+alt="natural language questions"
+caption="natural language questions"
+/%}
+
+- Users can refine queries by providing further instructions, and the chatbot will adjust the SQL query accordingly.
+
+## 3. Optimizing and Fixing SQL Queries
+
+- The **Metadata Usage** workflows will ingest the queries being run against the tables. You can see how long each query has been executed in the **Queries** tab.
+
+{% image
+src="/images/v1.6/metapilot/fixing-sql-queries-1.png"
+alt="metadata usage workflows"
+caption="metadata usage workflows"
+/%}
+
+- If a query runs inefficiently, ask the chatbot to optimize it by typing: “Optimize this query.”
+
+{% image
+src="/images/v1.6/metapilot/fixing-sql-queries-3.png"
+alt="Optimize this query"
+caption="Optimize this query"
+/%}
+
+- MetaPilot will return a more efficient version of the SQL query, which you can then copy and execute in your database.
+
+- If the query contains errors or isn’t functioning correctly, ask MetaPilot: “Can you fix this query?”
+
+- MetaPilot will correct the query and provide a working version.
+
+{% image
+src="/images/v1.6/metapilot/fixing-sql-queries-4.png"
+alt="Can you fix this query"
+caption="Can you fix this query"
+/%}
+
+## 4. Reviewing Generated Metadata
+- Once MetaPilot generates descriptions for tables and columns, navigate to the **database view** to review the metadata.
+- You can accept or reject each suggestion individually or choose to accept all suggestions in bulk.
+
+{% image
+src="/images/v1.6/metapilot/reviewing-generated-metadata.png"
+alt="reviewing Generated Metadata"
+caption="reviewing Generated Metadata"
+/%}
+
+- MetaPilot allows you to document entire datasets in a matter of minutes, significantly reducing the manual effort required to maintain metadata.
+
+## Best Practices
+- **Regular Scheduling**: Schedule MetaPilot to run at regular intervals to ensure your metadata is always up-to-date, especially when dealing with frequently changing datasets.
+- **Leverage the Chatbot for Query Writing**: Encourage both technical and non-technical users to use the MetaPilot chatbot for SQL query generation. It simplifies complex query writing and ensures accurate results.
+- **Optimize Queries Regularly**: Monitor the performance of your queries and use MetaPilot to optimize them, especially when working with large datasets or queries that require complex joins and filters.
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/Metapilot/index.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/Metapilot/index.md
new file mode 100644
index 000000000000..310dc0dbfb09
--- /dev/null
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/Metapilot/index.md
@@ -0,0 +1,85 @@
+---
+title: Metapilot
+slug: /metapilot
+collate: true
+---
+
+# Metapilot Technical Documentation
+
+{% youtube videoId="6glMYLzxNqk" start="0:00" end="04:20" width="560px" height="315px" /%}
+
+MetaPilot is an AI-powered tool within Collate that simplifies and enhances metadata management. By integrating generative AI, MetaPilot assists users in automating the documentation of data assets, writing and optimizing SQL queries, and interacting with data through natural language. This first-of-its-kind data copilot improves productivity by automating tedious tasks and providing intelligent insights into your data environment.
+
+## Key Features
+
+- **Automated Data Documentation**: Automatically generates metadata descriptions for tables and columns, saving time and effort for data owners and stewards.
+- **Natural Language SQL Query Generation**: Allows users to interact with MetaPilot through a chatbot to generate SQL queries by simply asking questions in plain English.
+- **SQL Query Optimization and Fixing**: Capable of optimizing and troubleshooting SQL queries to improve their performance and efficiency.
+
+## Why MetaPilot is Useful
+
+### Metadata Management Challenges
+
+Managing metadata across multiple data assets can be overwhelming due to the influx of new data and changing team dynamics. MetaPilot addresses these challenges by:
+
+- Automating metadata description generation.
+- Simplifying the creation and optimization of SQL queries.
+- Reducing manual effort and enhancing data quality.
+
+### Time-Saving Features
+
+Documenting thousands of tables manually is tedious and time-consuming. MetaPilot automates metadata generation, allowing data teams to focus on high-value tasks and ensuring that data assets are consistently documented and understood across the organization.
+
+## Use Cases
+
+### 1. Automatic Data Asset Documentation
+
+{% image
+src="/images/v1.6/metapilot/reviewing-generated-metadata.png"
+alt="Automatic Data Asset Documentation"
+caption="Auto Generate data Asset Documentation"
+/%}
+
+- **Problem**: Manually creating metadata descriptions for large datasets is labor-intensive and error-prone.
+- **Solution**: MetaPilot’s generative AI automates the process, analyzing database structures and suggesting accurate descriptions for tables and columns.
+- **How It Works**: After configuring MetaPilot, it scans the database schema and generates metadata descriptions automatically. Users can review these descriptions and approve or reject them in bulk or individually.
+- **Benefit**: Streamlines the documentation process, ensuring consistent and up-to-date metadata across all datasets.
+
+### 2. Natural Language SQL Query Generation
+
+{% image
+src="/images/v1.6/metapilot/using-metapilot-chatbot-2.png"
+alt="Natural Language SQL Query Generation"
+caption="Natural Language SQL Query Generation"
+/%}
+
+- **Problem**: Non-technical users often struggle with writing SQL queries to extract insights from databases.
+- **Solution**: MetaPilot’s chatbot allows users to ask questions in natural language, generating SQL queries and providing explanations to help extract the required data quickly and easily.
+- **How It Works**: Users can click on the MetaPilot chatbot widget and ask questions like "Show me sales data from last quarter." MetaPilot generates the corresponding SQL query and explains its logic.
+- **Benefit**: Democratizes data access by enabling users of all technical levels to interact with data without deep SQL knowledge.
+
+### 3. SQL Query Optimization and Troubleshooting
+
+{% image
+src="/images/v1.6/metapilot/fixing-sql-queries-3.png"
+alt="SQL Query Optimization and Troubleshooting"
+caption="SQL Query Optimization"
+/%}
+
+- **Problem**: SQL queries can become complex and inefficient, leading to performance issues and increased costs.
+- **Solution**: MetaPilot optimizes inefficient queries to improve performance, saving both time and resources.
+- **How It Works**: If a query runs too long, users can request MetaPilot to optimize it. MetaPilot provides a more efficient SQL query version that can be implemented immediately.
+- **Benefit**: Enhances query performance, reduces costs associated with inefficient queries, and speeds up data processing.
+
+### 4. Fixing SQL Queries
+
+{% image
+src="/images/v1.6/metapilot/fixing-sql-queries-4.png"
+alt="Fixing SQL Queries"
+caption="Fixing SQL Queries"
+/%}
+
+- **Problem**: Complex queries can cause issues even for SQL experts.
+- **Solution**: MetaPilot can fix problematic SQL queries, ensuring they run correctly and efficiently.
+- **How It Works**: Users can ask MetaPilot, "Can you fix this query for me?" It analyzes the query, detects issues, and returns a corrected version ready for use.
+- **Benefit**: Simplifies writing and maintaining queries, allowing data teams to focus on analysis rather than troubleshooting.
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/collate-menu.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/collate-menu.md
index 8457fdf5be7d..86a34d7e6921 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/collate-menu.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/collate-menu.md
@@ -5,8 +5,6 @@ site_menu:
- category: Enable Security
url: /deployment
- color: violet-70
- icon: deployment
- category: Enable Security / Basic Authentication
url: /deployment/security/basic-auth
- category: Enable Security / Ldap Authentication
@@ -38,8 +36,6 @@ site_menu:
- category: Connectors
url: /connectors
- color: violet-70
- icon: openmetadata
- category: Connectors / Database
url: /connectors/database
@@ -518,8 +514,6 @@ site_menu:
- category: How-to Guides
url: /how-to-guides
- color: violet-70
- icon: openmetadata
- category: How-to Guides / Admin Guide
url: /how-to-guides/admin-guide
@@ -702,6 +696,10 @@ site_menu:
- category: How-to Guides / Data Governance
url: /how-to-guides/data-governance
+ - category: How-to Guides / Data Governance / Automation
+ url: /how-to-guides/data-governance/automation
+ - category: How-to Guides / Data Governance / Automation / How to Set Up Automations in Collate
+ url: /how-to-guides/data-governance/automation/set-up-automation
- category: How-to Guides / Data Governance / Glossary
url: /how-to-guides/data-governance/glossary
- category: How-to Guides / Data Governance / Glossary / What is a Glossary Term
@@ -745,8 +743,6 @@ site_menu:
- category: Getting Started
url: /getting-started
- color: violet-70
- icon: openmetadata
- category: Getting Started / Day 1
url: /getting-started/day-1
@@ -765,8 +761,6 @@ site_menu:
- category: Releases
url: /releases
- color: violet-70
- icon: overview
- category: Releases / Latest Release
url: /releases/latest-release
- category: Releases / Supported Releases
@@ -861,8 +855,6 @@ site_menu:
- category: Main Concepts
url: /main-concepts
- color: violet-70
- icon: main-concepts
- category: Main Concepts / High Level Design
url: /main-concepts/high-level-design
- category: Main Concepts / Backend DB
@@ -1796,8 +1788,6 @@ site_menu:
- category: SDK
url: /sdk
- color: violet-70
- icon: sdk
- category: SDK / Python SDK
url: /sdk/python
- category: SDK / Python SDK / Entities
@@ -1876,4 +1866,10 @@ site_menu:
url: /sdk/java
- category: SDK / Go
url: /sdk/go
+
+ - category: Metapilot
+ url: /metapilot
+
+ category: Metapilot / How to Use MetaPilot
+ url: /metapilot/how-to-use-metapilot
---
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/connectors/ingestion/lineage/spark-lineage.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/connectors/ingestion/lineage/spark-lineage.md
index 3e48cfc5eaa0..3bc39c4340e9 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/connectors/ingestion/lineage/spark-lineage.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/connectors/ingestion/lineage/spark-lineage.md
@@ -343,3 +343,39 @@ spark.openmetadata.transport.timeout 30
```
After all these steps are completed you can start/restart your compute instance and you are ready to extract the lineage from spark to OpenMetadata.
+
+
+## Using Spark Agent with Glue
+
+Follow the below steps in order to use OpenMetadata Spark Agent with glue.
+
+### 1. Specify the OpenMetadata Spark Agent JAR URL
+
+1. Upload the OpenMetadata Spark Agent Jar to S3
+2. Navigate to the glue job,In the Job details tab, navigate to Advanced properties → Libraries → Dependent Jars path
+3. Add the S3 url of OpenMetadata Spark Agent Jar in the Dependent Jars path.
+
+{% image
+ src="/images/v1.6/connectors/spark/glue-job-jar.png"
+ alt="Glue Job Configure Jar"
+ caption="Glue Job Configure Jar"
+ /%}
+
+
+### 2. Add Spark configuration in Job Parameters
+
+In the same Job details tab, add a new property under Job parameters:
+
+1. Add the `--conf` property with following value, make sure to customize this configuration as described in the above documentation.
+
+```
+spark.extraListeners=org.openmetadata.spark.agent.OpenMetadataSparkListener --conf spark.openmetadata.transport.hostPort=https://your-org.host:port --conf spark.openmetadata.transport.type=openmetadata --conf spark.openmetadata.transport.jwtToken= --conf spark.openmetadata.transport.pipelineServiceName=glue_spark_pipeline_service --conf spark.openmetadata.transport.pipelineName=glue_pipeline_name --conf spark.openmetadata.transport.timeout=30
+```
+
+2. Add the `--user-jars-first` parameter and set its value to `true`
+
+{% image
+ src="/images/v1.6/connectors/spark/glue-job-params.png"
+ alt="Glue Job Configure Params"
+ caption="Glue Job Configure Params"
+ /%}
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/connectors/metadata/alation/index.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/connectors/metadata/alation/index.md
index e732fe1d71ab..a7ff96c95ded 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/connectors/metadata/alation/index.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/connectors/metadata/alation/index.md
@@ -122,9 +122,9 @@ Choose either postgres or mysql connection depending on the db:
**ingestDatasources**: Specifies if databases, schemas and tables should be included while ingesting. By default is set to `true`.
-**ingestDomains**: Specifies if hidden domains and subdomains should be included while ingesting. By default is set to `true`.
+**ingestDomains**: Specifies if domains and subdomains should be included while ingesting. By default is set to `true`.
-**ingestDashboards**: Specifies if hidden BI sources and dashboards should be included while ingesting. By default is set to `true`.
+**ingestDashboards**: Specifies if BI sources and dashboards should be included while ingesting. By default is set to `true`.
**alationTagClassificationName**: Specify the classification name under which the tags from alation will be created in OpenMetadata. By default it is set to `alationTags`.
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/connectors/pipeline/glue-pipeline/yaml.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/connectors/pipeline/glue-pipeline/yaml.md
index 5898468a0f98..4468dc424867 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/connectors/pipeline/glue-pipeline/yaml.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/connectors/pipeline/glue-pipeline/yaml.md
@@ -112,11 +112,11 @@ This is a sample config for Glue:
```yaml {% isCodeBlock=true %}
source:
- type: glue
+ type: gluepipeline
serviceName: local_glue
serviceConnection:
config:
- type: Glue
+ type: GluePipeline
awsConfig:
```
```yaml {% srNumber=1 %}
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/backup-restore-metadata.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/backup-restore-metadata.md
index 5e704c7e56de..306423cf28f8 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/backup-restore-metadata.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/backup-restore-metadata.md
@@ -70,15 +70,16 @@ Ingest some data...
```shell
BACKUP_FILE="backup_$(date +%Y%m%d%H%M).sql"
-DOCKER_COMPOSE_FILE="docker/development/docker-compose.yml"
+export COMPOSE_FILE="docker/development/docker-compose.yml"
# backup
-docker compose -f $DOCKER_COMPOSE_FILE exec ingestion mysqldump --no-tablespaces -u openmetadata_user -popenmetadata_password -h mysql -P 3306 openmetadata_db > $BACKUP_FILE
+docker compose exec ingestion mysqldump --no-tablespaces -u openmetadata_user -popenmetadata_password -h mysql -P 3306 openmetadata_db > $BACKUP_FILE
# create the restore database
-docker compose -f $DOCKER_COMPOSE_FILE exec mysql mysql -u root -ppassword -e "create database restore;"
-docker compose -f $DOCKER_COMPOSE_FILE exec mysql mysql -u root -ppassword -e "grant all privileges on restore.* to 'openmetadata_user'@'%';"
-docker compose -f $DOCKER_COMPOSE_FILE exec mysql mysql -u root -ppassword -e "flush privileges;"
+docker compose exec mysql mysql -u root -ppassword -e "create database restore;"
+docker compose exec mysql mysql -u root -ppassword -e "grant all privileges on restore.* to 'openmetadata_user'@'%';"
+docker compose exec mysql mysql -u root -ppassword -e "GRANT SUPER, SYSTEM_VARIABLES_ADMIN, SESSION_VARIABLES_ADMIN ON *.* TO 'openmetadata_user'@'%';"
+docker compose exec mysql mysql -u root -ppassword -e "flush privileges;"
# restore from the backup
-docker compose -f $DOCKER_COMPOSE_FILE exec -T ingestion mysql -u openmetadata_user -popenmetadata_password -h mysql -P 3306 restore < $BACKUP_FILE
+docker compose exec -T ingestion mysql -u openmetadata_user -popenmetadata_password -h mysql -P 3306 restore < $BACKUP_FILE
```
### 3. Restart the docker deployment with the restored database
@@ -102,14 +103,14 @@ Ingest some data...
```shell
BACKUP_FILE="backup_$(date +%Y%m%d%H%M).sql"
-DOCKER_COMPOSE_FILE="docker/development/docker-compose-postgres.yml"
+export COMPOSE_FILE="docker/development/docker-compose-postgres.yml"
# backup
-docker compose -f $DOCKER_COMPOSE_FILE exec -e PGPASSWORD=openmetadata_password ingestion pg_dump -U openmetadata_user -h postgresql -d openmetadata_db > $BACKUP_FILE
+docker compose exec -e PGPASSWORD=openmetadata_password ingestion pg_dump -U openmetadata_user -h postgresql -d openmetadata_db > $BACKUP_FILE
# create the restore database
-docker compose -f $DOCKER_COMPOSE_FILE exec -e PGPASSWORD=openmetadata_password postgresql psql -U postgres -c "create database restore;"
-docker compose -f $DOCKER_COMPOSE_FILE exec -e PGPASSWORD=openmetadata_password postgresql psql -U postgres -c "ALTER DATABASE restore OWNER TO openmetadata_user;"
+docker compose exec -e PGPASSWORD=openmetadata_password postgresql psql -U postgres -c "create database restore;"
+docker compose exec -e PGPASSWORD=openmetadata_password postgresql psql -U postgres -c "ALTER DATABASE restore OWNER TO openmetadata_user;"
# restore from the backup
-docker compose -f $DOCKER_COMPOSE_FILE exec -e PGPASSWORD=openmetadata_password -T ingestion psql -U openmetadata_user -h postgresql -d restore < $BACKUP_FILE
+docker compose exec -e PGPASSWORD=openmetadata_password -T ingestion psql -U openmetadata_user -h postgresql -d restore < $BACKUP_FILE
```
### 3. Restart the docker deployment with the restored database
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/bare-metal/index.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/bare-metal/index.md
index 3df742a45630..0299492340f6 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/bare-metal/index.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/bare-metal/index.md
@@ -54,7 +54,7 @@ You can refer a sample script [here](https://github.com/open-metadata/OpenMetada
## Elasticsearch (version 8.X)
-OpenMetadata supports ElasticSearch version up to 8.10.2. To install or upgrade Elasticsearch to a supported version please see the instructions for your operating system at
+OpenMetadata supports ElasticSearch version up to 8.11.4. To install or upgrade Elasticsearch to a supported version please see the instructions for your operating system at
[Installing ElasticSearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html).
Please follow the instructions here to [install ElasticSearch](https://www.elastic.co/guide/en/elasticsearch/reference/7.13/setup.html).
@@ -170,7 +170,7 @@ If you are running OpenMetadata in AWS, it is recommended to use [Amazon RDS](ht
We support
- Amazon RDS (MySQL) engine version 8 or higher
-- Amazon OpenSearch (ElasticSearch) engine version up to 8.10.2 or Amazon OpenSearch engine version up to 2.7
+- Amazon OpenSearch (ElasticSearch) engine version up to 8.11.4 or Amazon OpenSearch engine version up to 2.7
- Amazon RDS (PostgreSQL) engine version between 12 or higher
For Production Systems, we recommend Amazon RDS to be in Multiple Availability Zones. For Amazon OpenSearch (or ElasticSearch) Service, we recommend Multiple Availability Zones with minimum 3 Master Nodes.
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/docker/index.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/docker/index.md
index b5a8227e3645..0e3689962547 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/docker/index.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/docker/index.md
@@ -244,7 +244,7 @@ If you are running OpenMetadata in AWS, it is recommended to use [Amazon RDS](ht
We support
- Amazon RDS (MySQL) engine version 8 or higher
-- Amazon OpenSearch (ElasticSearch) engine version up to 8.10.2 or Amazon OpenSearch engine version up to 2.7
+- Amazon OpenSearch (ElasticSearch) engine version up to 8.11.4 or Amazon OpenSearch engine version up to 2.7
- Amazon RDS (PostgreSQL) engine version 12 or higher
Note:-
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/kubernetes/aks.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/kubernetes/aks.md
index 5c4abeb5b242..39d3402c610b 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/kubernetes/aks.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/kubernetes/aks.md
@@ -16,7 +16,7 @@ We support
- Azure SQL (MySQL) engine version 8 or higher
- Azure SQL (PostgreSQL) engine version 12 or higher
-- Elastic Cloud (ElasticSearch version 8.10.2)
+- Elastic Cloud (ElasticSearch version 8.11.4)
Once you have the Azure SQL and Elastic Cloud on Azure configured, you can update the environment variables below for OpenMetadata kubernetes deployments to connect with Database and ElasticSearch.
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/kubernetes/on-prem.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/kubernetes/on-prem.md
index e231b1440d6d..ed0217709f4d 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/kubernetes/on-prem.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/kubernetes/on-prem.md
@@ -23,7 +23,7 @@ We support
- MySQL engine version 8 or higher
- PostgreSQL engine version 12 or higher
-- ElasticSearch version 8.X (upto 8.10.2) or OpenSearch Version 2.X (upto 2.7)
+- ElasticSearch version 8.X (upto 8.11.4) or OpenSearch Version 2.X (upto 2.7)
Once you have the External Database and Search Engine configured, you can update the environment variables below for OpenMetadata kubernetes deployments to connect with Database and ElasticSearch.
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/upgrade/versions/110-to-120.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/upgrade/versions/110-to-120.md
index b661db7eb18e..c8a61dba23a4 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/upgrade/versions/110-to-120.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/deployment/upgrade/versions/110-to-120.md
@@ -161,7 +161,7 @@ For Kubernetes Deployment, `openmetadata.config.database.dbParams` is available
### Version Upgrades
- The OpenMetadata Server is now based on **JDK 17**
-- OpenMetadata now **requires** **Elasticsearch** version **8.10.2** or **Opensearch** version **2.7**
+- OpenMetadata now **requires** **Elasticsearch** version **8.11.4** or **Opensearch** version **2.7**
There is no direct migration to bump the indexes to the new supported versions. You might see errors like:
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-governance/automation/index.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-governance/automation/index.md
new file mode 100644
index 000000000000..5a1cb1cd7ce7
--- /dev/null
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-governance/automation/index.md
@@ -0,0 +1,78 @@
+---
+title: Collate Automations Documentation
+slug: /how-to-guides/data-governance/automation
+collate: true
+---
+
+# Collate Automations
+
+{% youtube videoId="ug08aLUyTyE" start="0:00" end="14:52" width="560px" height="315px" /%}
+
+## Overview
+
+Collate's **Automation** feature is a powerful tool designed to simplify and streamline metadata management tasks. By automating repetitive actions such as assigning owners, domains, or tagging data, Collate helps maintain consistency in metadata across an organization's datasets. These automations reduce manual effort and ensure that metadata is always up-to-date, accurate, and governed according to predefined policies.
+
+## Why Automations are Useful
+
+Managing metadata manually can be challenging, particularly in dynamic environments where data constantly evolves. Collate's Automation feature addresses several key pain points:
+
+- **Maintaining Consistency**: Automation helps ensure that metadata such as ownership, tags, and descriptions are applied consistently across all data assets.
+- **Saving Time**: Automations allow data teams to focus on higher-value tasks by eliminating the need for manual updates and maintenance.
+- **Enforcing Governance Policies**: Automations help ensure that data follows organizational policies at all times by automatically applying governance rules (e.g., assigning data owners or domains).
+- **Data Quality and Accountability**: Data quality suffers without clear ownership. Automating ownership assignments helps ensure that data quality issues are addressed efficiently.
+
+## Key Use Cases for Collate Automations
+
+### 1. Bulk Ownership and Domain Assignment
+
+{% image
+src="/images/v1.6/how-to-guides/governance/bulk-ownership-and.png"
+alt="Getting started with Automation"
+caption="Getting started with Automation"
+/%}
+
+- **Problem**: Many data assets lack proper ownership and domain assignment, leading to governance and accountability issues. Manually assigning owners can be error-prone and time-consuming.
+- **Solution**: Automations can bulk-assign ownership and domains to datasets, ensuring all data assets are correctly categorized and owned. This process can be applied to tables, schemas, or other assets within Collate.
+- **Benefit**: This use case ensures data assets have a designated owner and are organized under the appropriate domain, making data more discoverable and accountable.
+
+### 2. Bulk Tagging and Glossary Term Assignment
+
+{% image
+src="/images/v1.6/how-to-guides/governance/bulk-tagging-glossary.png"
+alt="Getting started with Automation"
+caption="Getting started with Automation"
+/%}
+
+- **Problem**: Manually applying the same tags or glossary terms to multiple datasets can be inefficient and inconsistent.
+- **Solution**: Automations allow users to bulk-apply tags (e.g., PII) or glossary terms (e.g., Customer ID) to specific datasets, ensuring uniformity across the platform.
+- **Benefit**: This automation reduces the risk of missing important tags like PII-sensitive and ensures that key metadata elements are applied consistently across datasets.
+
+### 3. Metadata Propagation via Lineage
+
+{% image
+src="/images/v1.6/how-to-guides/governance/metadata-propogation.png"
+alt="Getting started with Automation"
+caption="Getting started with Automation"
+/%}
+
+- **Problem**: When metadata such as tags, descriptions, or glossary terms are updated in one part of the data lineage, they may not be propagated across related datasets, leading to inconsistencies.
+- **Solution**: Use automations to propagate metadata across related datasets, ensuring that all relevant data inherits the correct metadata properties from the source dataset.
+- **Benefit**: Metadata consistency is ensured across the entire data lineage, reducing the need for manual updates and maintaining a single source of truth.
+
+### 4. Automatic PII Detection and Tagging
+
+{% image
+src="/images/v1.6/how-to-guides/governance/automatic-detection.png"
+alt="Getting started with Automation"
+caption="Getting started with Automation"
+/%}
+
+- **Problem**: Manually identifying and tagging Personally Identifiable Information (PII) across large datasets is labor-intensive and prone to errors.
+- **Solution**: Automations can automatically detect PII data (e.g., emails, usernames) and apply relevant tags to ensure that sensitive data is flagged appropriately for compliance.
+- **Benefit**: Ensures compliance with data protection regulations by consistently tagging sensitive data, reducing the risk of non-compliance.
+
+## Best Practices
+
+- **Validate Assets Before Applying Actions**: Always use the **Explore** page to verify the assets that will be affected by the automation. This ensures that only the intended datasets are updated.
+- **Use Automation Logs**: Regularly check the **Recent Runs** logs to monitor automation activity and ensure that they are running as expected.
+- **Propagate Metadata Thoughtfully**: When propagating metadata via lineage, make sure that the source metadata is correct before applying it across multiple datasets.
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-governance/automation/set-up-automation.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-governance/automation/set-up-automation.md
new file mode 100644
index 000000000000..a4afc9e61f46
--- /dev/null
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-governance/automation/set-up-automation.md
@@ -0,0 +1,67 @@
+---
+title: How to Set Up Automations in OpenMetadata
+slug: /how-to-guides/data-governance/automation/set-up-automation
+collate: true
+---
+
+# How to Set Up Automations in Collate
+
+### Step 1: Access the Automations Section
+In the OpenMetadata UI, navigate to **Govern>Automations**.
+This will take you to the Automations page where you can view and manage your existing automations.
+
+{% image
+src="/images/v1.6/how-to-guides/governance/automation-1.png"
+alt="Getting started with Automation"
+caption="Getting started with Automation"
+/%}
+
+### Step 2: Add a New Automation
+In the Automations page, click the **Add Automation** button located at the top right of the page.
+A pop-up window will appear to begin the process of adding a new automation.
+
+{% image
+src="/images/v1.6/how-to-guides/governance/automation-2.png"
+alt="Add Automation"
+caption="Add Automation"
+/%}
+
+### Step 3: Fill in Automation Details
+In the pop-up window, provide the necessary information to set up the automation:
+- **Automation Name**: Give a meaningful name to the automation for easy identification.
+- **Description**: Add a brief description explaining what this automation will do (e.g., "Daily metadata ingestion for database XYZ").
+- **Logic/Conditions**: Define any conditions or specific criteria needed for this automation to work (e.g., specific tables or columns to be included).
+ Ensure that the logic is set up as per your specific requirements to make the automation useful for your workflows.
+
+{% image
+src="/images/v1.6/how-to-guides/governance/automation-4.png"
+alt="Automation details"
+caption="Automation details"
+/%}
+
+{% image
+src="/images/v1.6/how-to-guides/governance/automation-5.png"
+alt="Automation logics"
+caption="Automation logics"
+/%}
+
+### Step 4: Configure Automation Interval
+Once you've filled in the required details, click **Next**.
+On the next page, you’ll be prompted to select the interval for the automation. This defines how frequently the automation should run (e.g., daily, weekly, or custom intervals).
+Review your settings and click **Automate** once you are satisfied with the configuration.
+
+{% image
+src="/images/v1.6/how-to-guides/governance/automation-6.png"
+alt="Automation Interval"
+caption="Automation Interval"
+/%}
+
+### Step 5: Manage Your Automation
+After completing the setup, your automation will appear in the Automations list.
+To manage the automation, click on the three dots next to the automation entry. From here, you can **edit**, **re-deploy**, **delete**, etc.
+
+{% image
+src="/images/v1.6/how-to-guides/governance/automation-7.png"
+alt="Manage Your Automation"
+caption="Manage Your Automation"
+/%}
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/adding-test-suits.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/adding-test-suites.md
similarity index 96%
rename from openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/adding-test-suits.md
rename to openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/adding-test-suites.md
index 11c01feb655b..e699578d3e6a 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/adding-test-suits.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/adding-test-suites.md
@@ -1,6 +1,6 @@
---
-title: Adding test suits through the UI
-slug: /how-to-guides/data-quality-observability/quality/adding-test-suits
+title: Adding test suites through the UI
+slug: /how-to-guides/data-quality-observability/quality/adding-test-suites
---
# Adding Test Suites Through the UI
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/tests-yaml.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/tests-yaml.md
index a6fc743fa37a..b79f7b38743e 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/tests-yaml.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/tests-yaml.md
@@ -487,6 +487,7 @@ Consistency
* `table2`: The table against which the comparison will be done. Must be the fully qualified name as defined in OpenMetadata
* `threshold`: The threshold of different rows above which the test should fail -- default to 0
* `where`: Any `where` clause to pass
+* `caseSensitiveColumns`: Whether the column comparison should be case sensitive or not. Default to `false`.
**Behavior**
@@ -513,6 +514,8 @@ parameterValues:
value: 10
- name: where
value: id != 999
+ - name: caseSensitiveColumns
+ value: false
```
**JSON Config**
@@ -543,6 +546,10 @@ parameterValues:
{
"name": "where",
"value": "id != 999"
+ },
+ {
+ "name": "caseSensitiveColumns",
+ "value": false
}
]
}
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/visualize.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/visualize.md
index 6237df4647da..8e0582a736ef 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/visualize.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/visualize.md
@@ -1,6 +1,6 @@
---
title: How to Visualize Test Results
-slug: /how-to-guides/data-quality-observability/visualize
+slug: /how-to-guides/data-quality-observability/quality/visualize
---
# How to Visualize Test Results
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/index.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/index.md
index 84af90a4eb6a..fcc33a8cc675 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/index.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/index.md
@@ -5,19 +5,6 @@ slug: /how-to-guides
# How-to Guides
-## Getting Started
-
-Set up and explore OpenMetadata's core features, from basic configuration to advanced functionalities, for a seamless onboarding experience.
-
-{% tilesContainer %}
-{% tile
- title="Getting Started"
- description="Unlock metadata insights for informed business decisions."
- link="/how-to-guides/getting-started"
- icon="discovery"
-/%}
-{% /tilesContainer %}
-
The How-to Guides will give you a walk through on accomplishing the basic to the most advanced things in OpenMetadata. These step-by-step guides will help get an overview of the features and also help explore the various functionalities.
## Features in OpenMetadata
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/menu.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/menu.md
index 4073dae3b862..00d1abf6ced4 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/menu.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/menu.md
@@ -5,19 +5,21 @@ site_menu:
- category: Quickstart
url: /quick-start
- color: violet-70
- icon: quickstart
- category: Quickstart / Try OpenMetadata in Docker
url: /quick-start/local-docker-deployment
- category: Quickstart / Try OpenMetadata in Kubernetes
url: /quick-start/local-kubernetes-deployment
- category: Quickstart / Try the OpenMetadata Sandbox
url: /quick-start/sandbox
+ - category: Quickstart / Getting Started
+ url: /quick-start/getting-started
+ - category: Quickstart / Day 1
+ url: /quick-start/getting-started/day-1
+ - category: Quickstart / Day 1 / Database Service Setup
+ url: /quick-start/getting-started/day-1/database-service-setup
- category: Deployment
url: /deployment
- color: violet-70
- icon: deployment
- category: Deployment / Bare Metal Deployment
url: /deployment/bare-metal
- category: Deployment / Bare Metal Deployment / Enable Security
@@ -215,8 +217,6 @@ site_menu:
- category: Connectors
url: /connectors
- color: violet-70
- icon: openmetadata
- category: Connectors / API
url: /connectors/api
@@ -386,12 +386,6 @@ site_menu:
url: /connectors/database/sqlite
- category: Connectors / Database / SQLite / Run Externally
url: /connectors/database/sqlite/yaml
- - category: Connectors / Database / Synapse
- url: /connectors/database/synapse
- - category: Connectors / Database / Synapse / Run Externally
- url: /connectors/database/synapse/yaml
- - category: Connectors / Database / Synapse / Troubleshooting
- url: /connectors/database/synapse/troubleshooting
- category: Connectors / Database / S3 Datalake
url: /connectors/database/s3-datalake
- category: Connectors / Database / S3 Datalake / Run Externally
@@ -527,11 +521,6 @@ site_menu:
url: /connectors/pipeline/kafkaconnect
- category: Connectors / Pipeline / KafkaConnect / Run Externally
url: /connectors/pipeline/kafkaconnect/yaml
- - category: Connectors / Pipeline / Matillion
- url: /connectors/pipeline/matillion
- isCollateOnly: true
- - category: Connectors / Pipeline / Matillion / Run Externally
- url: /connectors/pipeline/matillion/yaml
- category: Connectors / Pipeline / Databricks Pipeline
url: /connectors/pipeline/databricks-pipeline
- category: Connectors / Pipeline / Databricks Pipeline / Run Externally
@@ -575,10 +564,6 @@ site_menu:
url: /connectors/ml-model/sagemaker
- category: Connectors / ML Model / Sagemaker / Run Externally
url: /connectors/ml-model/sagemaker/yaml
- - category: Connectors / ML Model / VertexAI
- url: /connectors/ml-model/vertexai
- - category: Connectors / ML Model / VertexAI / Run Externally
- url: /connectors/ml-model/vertexai/yaml
- category: Connectors / Storage
url: /connectors/storage
@@ -590,10 +575,6 @@ site_menu:
url: /connectors/storage/gcs
- category: Connectors / Storage / GCS / Run Externally
url: /connectors/storage/gcs/yaml
- - category: Connectors / Storage / ADLS
- url: /connectors/storage/adls
- - category: Connectors / Storage / ADLS / Run Externally
- url: /connectors/storage/adls/yaml
- category: Connectors / Search
url: /connectors/search
@@ -612,10 +593,6 @@ site_menu:
url: /connectors/metadata/atlas
- category: Connectors / Metadata / Atlas / Run Externally
url: /connectors/metadata/atlas/yaml
- - category: Connectors / Metadata / Alation
- url: /connectors/metadata/alation
- - category: Connectors / Metadata / Alation / Run Externally
- url: /connectors/metadata/alation/yaml
- category: Connectors / Metadata / Alation Sink
url: /connectors/metadata/alationsink
- category: Connectors / Metadata / Alation Sink / Run Externally
@@ -707,23 +684,7 @@ site_menu:
- category: How-to Guides
url: /how-to-guides
- color: violet-70
- icon: openmetadata
- - category: How-to Guides / Data Quality Observability / Visualize
- url: /how-to-guides/data-quality-observability/visualize
- - category: How-to Guides / Data Quality Observability / Test Cases From YAML Config
- url: /how-to-guides/data-quality-observability/quality/test-cases-from-yaml-config
- - category: How-to Guides / Data Quality Observability / Adding Test Suits
- url: /how-to-guides/data-quality-observability/quality/adding-test-suits
- - category: How-to Guides / Data Quality Observability / Adding Test Cases
- url: /how-to-guides/data-quality-observability/quality/adding-test-cases
- - category: How-to Guides / Getting Started
- url: /how-to-guides/getting-started
- - category: How-to Guides / Day 1
- url: /how-to-guides/getting-started/day-1
- - category: How-to Guides / Day 1 / Database Service Setup
- url: /how-to-guides/getting-started/day-1/database-service-setup
- category: How-to Guides / Admin Guide
url: /how-to-guides/admin-guide
- category: How-to Guides / Admin Guide / How to Ingest Metadata
@@ -835,6 +796,14 @@ site_menu:
url: /how-to-guides/data-quality-observability/quality/test
- category: How-to Guides / Data Quality and Observability / Data Quality / Configure Data Quality
url: /how-to-guides/data-quality-observability/quality/configure
+ - category: How-to Guides / Data Quality Observability / Data Quality / Adding Test Cases
+ url: /how-to-guides/data-quality-observability/quality/adding-test-cases
+ - category: How-to Guides / Data Quality Observability / Data Quality / Adding Test Suites
+ url: /how-to-guides/data-quality-observability/quality/adding-test-suites
+ - category: How-to Guides / Data Quality Observability / Data Quality / Test Cases From YAML Config
+ url: /how-to-guides/data-quality-observability/quality/test-cases-from-yaml-config
+ - category: How-to Guides / Data Quality Observability / Data Quality / How to Visualize Test Results
+ url: /how-to-guides/data-quality-observability/quality/visualize
- category: How-to Guides / Data Quality and Observability / Data Quality / Tests - YAML Config
url: /how-to-guides/data-quality-observability/quality/tests-yaml
- category: How-to Guides / Data Quality and Observability / Data Quality / Custom Tests
@@ -944,8 +913,6 @@ site_menu:
- category: Releases
url: /releases
- color: violet-70
- icon: overview
- category: Releases / Latest Release
url: /releases/latest-release
- category: Releases / Supported Releases
@@ -1042,8 +1009,6 @@ site_menu:
- category: Main Concepts
url: /main-concepts
- color: violet-70
- icon: main-concepts
- category: Main Concepts / High Level Design
url: /main-concepts/high-level-design
- category: Main Concepts / Backend DB
@@ -1979,8 +1944,6 @@ site_menu:
- category: Developers
url: /developers
- color: violet-70
- icon: developers
- category: Developers / Architecture
url: /developers/architecture
- category: Developers / Architecture / Understand Code Layout
@@ -2028,8 +1991,6 @@ site_menu:
- category: SDK
url: /sdk
- color: violet-70
- icon: sdk
- category: SDK / Python SDK
url: /sdk/python
- category: SDK / Python SDK / Entities
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/getting-started/day-1/database-service-setup.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/quick-start/getting-started/day-1/database-service-setup.md
similarity index 96%
rename from openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/getting-started/day-1/database-service-setup.md
rename to openmetadata-docs/content/v1.6.x-SNAPSHOT/quick-start/getting-started/day-1/database-service-setup.md
index c8310a1dfff4..90f4c691bec8 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/getting-started/day-1/database-service-setup.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/quick-start/getting-started/day-1/database-service-setup.md
@@ -1,6 +1,6 @@
---
title: Database service setup
-slug: /how-to-guides/getting-started/day-1/database-service-setup
+slug: /quick-start/getting-started/day-1/database-service-setup
---
## Setting Up a Database Service for Metadata Extraction
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/getting-started/day-1/index.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/quick-start/getting-started/day-1/index.md
similarity index 98%
rename from openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/getting-started/day-1/index.md
rename to openmetadata-docs/content/v1.6.x-SNAPSHOT/quick-start/getting-started/day-1/index.md
index fd0d47d403d7..6bc58494e729 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/getting-started/day-1/index.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/quick-start/getting-started/day-1/index.md
@@ -1,6 +1,6 @@
---
title: Day 1
-slug: /how-to-guides/getting-started/day-1
+slug: /quick-start/getting-started/day-1
---
# Getting Started: Day 1
@@ -39,7 +39,7 @@ There's two options on how to set up a data connector:
{% tile
title="Run the connector in OpenMetadata"
description="Guide to start ingesting metadata seamlessly from your data sources."
- link="/how-to-guides/getting-started/day-1/database-service-setup"
+ link="/quick-start/getting-started/day-1/database-service-setup"
icon="discovery"
/%}
{% /tilesContainer %}
diff --git a/openmetadata-docs/content/v1.5.x/how-to-guides/getting-started/index.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/quick-start/getting-started/index.md
similarity index 97%
rename from openmetadata-docs/content/v1.5.x/how-to-guides/getting-started/index.md
rename to openmetadata-docs/content/v1.6.x-SNAPSHOT/quick-start/getting-started/index.md
index d1044aa836fd..778794a85fd3 100644
--- a/openmetadata-docs/content/v1.5.x/how-to-guides/getting-started/index.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/quick-start/getting-started/index.md
@@ -1,6 +1,6 @@
---
title: Getting Started with OpenMetadata for Data cataloging
-slug: /how-to-guides/getting-started
+slug: /quick-start/getting-started
---
# Getting Started
@@ -65,7 +65,7 @@ In this section, you will find a series of guides to help you get started with O
{% tile
title="Day 1: Connect your Data Sources and invite users"
description="Discover the right data assets to make timely business decisions."
- link="/how-to-guides/getting-started/day-1"
+ link="/quick-start/getting-started/day-1"
icon="discovery"
/%}
{% /tilesContainer %}
diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/quick-start/index.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/quick-start/index.md
index 9bd90f0eff7a..95b2873e39cc 100644
--- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/quick-start/index.md
+++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/quick-start/index.md
@@ -44,3 +44,14 @@ Get OpenMetadata up and running with kubernetes in under 5 minutes!
{% /inlineCallout %}
{% /inlineCalloutContainer %}
+
+Set up and explore OpenMetadata's core features, from basic configuration to advanced functionalities, for a seamless onboarding experience.
+
+{% tilesContainer %}
+{% tile
+ title="Getting Started"
+ description="Unlock metadata insights for informed business decisions."
+ link="/how-to-guides/getting-started"
+ icon="discovery"
+/%}
+{% /tilesContainer %}
\ No newline at end of file
diff --git a/openmetadata-docs/images/v1.5/connectors/spark/glue-job-jar.png b/openmetadata-docs/images/v1.5/connectors/spark/glue-job-jar.png
new file mode 100644
index 000000000000..5ce7b558770c
Binary files /dev/null and b/openmetadata-docs/images/v1.5/connectors/spark/glue-job-jar.png differ
diff --git a/openmetadata-docs/images/v1.5/connectors/spark/glue-job-params.png b/openmetadata-docs/images/v1.5/connectors/spark/glue-job-params.png
new file mode 100644
index 000000000000..3cef8e39272a
Binary files /dev/null and b/openmetadata-docs/images/v1.5/connectors/spark/glue-job-params.png differ
diff --git a/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/add-test-case.png b/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/add-test-case.png
index 3ef4e0827141..0ab13a503208 100644
Binary files a/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/add-test-case.png and b/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/add-test-case.png differ
diff --git a/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/add-test-defintion.png b/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/add-test-defintion.png
index 2294788cef78..4d8049764df4 100644
Binary files a/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/add-test-defintion.png and b/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/add-test-defintion.png differ
diff --git a/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/profiler-tab-view.png b/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/profiler-tab-view.png
index 8f0a4c149069..29381cbac6aa 100644
Binary files a/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/profiler-tab-view.png and b/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/profiler-tab-view.png differ
diff --git a/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/table-results-entity.png b/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/table-results-entity.png
index 265fa6708d19..126e7ceb9685 100644
Binary files a/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/table-results-entity.png and b/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/table-results-entity.png differ
diff --git a/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/test-case-page.png b/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/test-case-page.png
index b1d31b87b8fa..85748bf2fbf3 100644
Binary files a/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/test-case-page.png and b/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/test-case-page.png differ
diff --git a/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/test-suite-page.png b/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/test-suite-page.png
index b002453a0f0d..7a252d3ba13d 100644
Binary files a/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/test-suite-page.png and b/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/test-suite-page.png differ
diff --git a/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/test-suite-results.png b/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/test-suite-results.png
index c3eff2340546..cbff22aa35b2 100644
Binary files a/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/test-suite-results.png and b/openmetadata-docs/images/v1.5/features/ingestion/workflows/data-quality/test-suite-results.png differ
diff --git a/openmetadata-docs/images/v1.5/features/ingestion/workflows/profiler/profiler-summary-column.png b/openmetadata-docs/images/v1.5/features/ingestion/workflows/profiler/profiler-summary-column.png
index e962225b0169..5f3d3aa87fdd 100644
Binary files a/openmetadata-docs/images/v1.5/features/ingestion/workflows/profiler/profiler-summary-column.png and b/openmetadata-docs/images/v1.5/features/ingestion/workflows/profiler/profiler-summary-column.png differ
diff --git a/openmetadata-docs/images/v1.5/features/ingestion/workflows/profiler/profiler-summary-table.png b/openmetadata-docs/images/v1.5/features/ingestion/workflows/profiler/profiler-summary-table.png
index 84a6595d99c5..9745f0d7d4d0 100644
Binary files a/openmetadata-docs/images/v1.5/features/ingestion/workflows/profiler/profiler-summary-table.png and b/openmetadata-docs/images/v1.5/features/ingestion/workflows/profiler/profiler-summary-table.png differ
diff --git a/openmetadata-docs/images/v1.5/how-to-guides/governance/automatic-detection.png b/openmetadata-docs/images/v1.5/how-to-guides/governance/automatic-detection.png
new file mode 100644
index 000000000000..c5b8077414f8
Binary files /dev/null and b/openmetadata-docs/images/v1.5/how-to-guides/governance/automatic-detection.png differ
diff --git a/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-1.png b/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-1.png
new file mode 100644
index 000000000000..f6cefd8c8d78
Binary files /dev/null and b/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-1.png differ
diff --git a/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-2.png b/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-2.png
new file mode 100644
index 000000000000..cef6b5428fbd
Binary files /dev/null and b/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-2.png differ
diff --git a/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-3.png b/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-3.png
new file mode 100644
index 000000000000..b9b195c7ca09
Binary files /dev/null and b/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-3.png differ
diff --git a/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-4.png b/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-4.png
new file mode 100644
index 000000000000..4c49ede5a0b7
Binary files /dev/null and b/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-4.png differ
diff --git a/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-5.png b/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-5.png
new file mode 100644
index 000000000000..7f18a8a25134
Binary files /dev/null and b/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-5.png differ
diff --git a/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-6.png b/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-6.png
new file mode 100644
index 000000000000..155a4f05fb74
Binary files /dev/null and b/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-6.png differ
diff --git a/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-7.png b/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-7.png
new file mode 100644
index 000000000000..23b11e4b2b7a
Binary files /dev/null and b/openmetadata-docs/images/v1.5/how-to-guides/governance/automation-7.png differ
diff --git a/openmetadata-docs/images/v1.5/how-to-guides/governance/bulk-ownership-and.png b/openmetadata-docs/images/v1.5/how-to-guides/governance/bulk-ownership-and.png
new file mode 100644
index 000000000000..b15cef41a7c4
Binary files /dev/null and b/openmetadata-docs/images/v1.5/how-to-guides/governance/bulk-ownership-and.png differ
diff --git a/openmetadata-docs/images/v1.5/how-to-guides/governance/bulk-tagging-glossary.png b/openmetadata-docs/images/v1.5/how-to-guides/governance/bulk-tagging-glossary.png
new file mode 100644
index 000000000000..8497a169eb76
Binary files /dev/null and b/openmetadata-docs/images/v1.5/how-to-guides/governance/bulk-tagging-glossary.png differ
diff --git a/openmetadata-docs/images/v1.5/how-to-guides/governance/metadata-propogation.png b/openmetadata-docs/images/v1.5/how-to-guides/governance/metadata-propogation.png
new file mode 100644
index 000000000000..92562cb3cfdd
Binary files /dev/null and b/openmetadata-docs/images/v1.5/how-to-guides/governance/metadata-propogation.png differ
diff --git a/openmetadata-docs/images/v1.5/how-to-guides/quality/dq1.png b/openmetadata-docs/images/v1.5/how-to-guides/quality/dq1.png
index 230b7d4f5734..cfb44ce83470 100644
Binary files a/openmetadata-docs/images/v1.5/how-to-guides/quality/dq1.png and b/openmetadata-docs/images/v1.5/how-to-guides/quality/dq1.png differ
diff --git a/openmetadata-docs/images/v1.5/how-to-guides/quality/quality1.png b/openmetadata-docs/images/v1.5/how-to-guides/quality/quality1.png
index a6516128cc3d..e8941dabdd91 100644
Binary files a/openmetadata-docs/images/v1.5/how-to-guides/quality/quality1.png and b/openmetadata-docs/images/v1.5/how-to-guides/quality/quality1.png differ
diff --git a/openmetadata-docs/images/v1.5/how-to-guides/quality/test2.png b/openmetadata-docs/images/v1.5/how-to-guides/quality/test2.png
index d8999b1b24c9..a065fa655c4d 100644
Binary files a/openmetadata-docs/images/v1.5/how-to-guides/quality/test2.png and b/openmetadata-docs/images/v1.5/how-to-guides/quality/test2.png differ
diff --git a/openmetadata-docs/images/v1.5/how-to-guides/quality/test4.png b/openmetadata-docs/images/v1.5/how-to-guides/quality/test4.png
index efd405875f00..ab86982c3f5f 100644
Binary files a/openmetadata-docs/images/v1.5/how-to-guides/quality/test4.png and b/openmetadata-docs/images/v1.5/how-to-guides/quality/test4.png differ
diff --git a/openmetadata-docs/images/v1.5/metapilot/fixing-sql-queries-1.png b/openmetadata-docs/images/v1.5/metapilot/fixing-sql-queries-1.png
new file mode 100644
index 000000000000..911a405173c4
Binary files /dev/null and b/openmetadata-docs/images/v1.5/metapilot/fixing-sql-queries-1.png differ
diff --git a/openmetadata-docs/images/v1.5/metapilot/fixing-sql-queries-3.png b/openmetadata-docs/images/v1.5/metapilot/fixing-sql-queries-3.png
new file mode 100644
index 000000000000..4dd4e3e6c20f
Binary files /dev/null and b/openmetadata-docs/images/v1.5/metapilot/fixing-sql-queries-3.png differ
diff --git a/openmetadata-docs/images/v1.5/metapilot/fixing-sql-queries-4.png b/openmetadata-docs/images/v1.5/metapilot/fixing-sql-queries-4.png
new file mode 100644
index 000000000000..38ec4e25fbb9
Binary files /dev/null and b/openmetadata-docs/images/v1.5/metapilot/fixing-sql-queries-4.png differ
diff --git a/openmetadata-docs/images/v1.5/metapilot/reviewing-generated-metadata.png b/openmetadata-docs/images/v1.5/metapilot/reviewing-generated-metadata.png
new file mode 100644
index 000000000000..fcf8da1451e3
Binary files /dev/null and b/openmetadata-docs/images/v1.5/metapilot/reviewing-generated-metadata.png differ
diff --git a/openmetadata-docs/images/v1.5/metapilot/setting-up-metapilot-1.png b/openmetadata-docs/images/v1.5/metapilot/setting-up-metapilot-1.png
new file mode 100644
index 000000000000..08d80960e158
Binary files /dev/null and b/openmetadata-docs/images/v1.5/metapilot/setting-up-metapilot-1.png differ
diff --git a/openmetadata-docs/images/v1.5/metapilot/setting-up-metapilot-2.png b/openmetadata-docs/images/v1.5/metapilot/setting-up-metapilot-2.png
new file mode 100644
index 000000000000..2ca38a1626f9
Binary files /dev/null and b/openmetadata-docs/images/v1.5/metapilot/setting-up-metapilot-2.png differ
diff --git a/openmetadata-docs/images/v1.5/metapilot/setting-up-metapilot-3.png b/openmetadata-docs/images/v1.5/metapilot/setting-up-metapilot-3.png
new file mode 100644
index 000000000000..44c07bd7ce68
Binary files /dev/null and b/openmetadata-docs/images/v1.5/metapilot/setting-up-metapilot-3.png differ
diff --git a/openmetadata-docs/images/v1.5/metapilot/setting-up-metapilot-4.png b/openmetadata-docs/images/v1.5/metapilot/setting-up-metapilot-4.png
new file mode 100644
index 000000000000..1f87fa2f48bb
Binary files /dev/null and b/openmetadata-docs/images/v1.5/metapilot/setting-up-metapilot-4.png differ
diff --git a/openmetadata-docs/images/v1.5/metapilot/using-metapilot-chatbot-1.png b/openmetadata-docs/images/v1.5/metapilot/using-metapilot-chatbot-1.png
new file mode 100644
index 000000000000..94d9a064af10
Binary files /dev/null and b/openmetadata-docs/images/v1.5/metapilot/using-metapilot-chatbot-1.png differ
diff --git a/openmetadata-docs/images/v1.5/metapilot/using-metapilot-chatbot-2.png b/openmetadata-docs/images/v1.5/metapilot/using-metapilot-chatbot-2.png
new file mode 100644
index 000000000000..b2114ad13347
Binary files /dev/null and b/openmetadata-docs/images/v1.5/metapilot/using-metapilot-chatbot-2.png differ
diff --git a/openmetadata-docs/images/v1.6/connectors/spark/glue-job-jar.png b/openmetadata-docs/images/v1.6/connectors/spark/glue-job-jar.png
new file mode 100644
index 000000000000..5ce7b558770c
Binary files /dev/null and b/openmetadata-docs/images/v1.6/connectors/spark/glue-job-jar.png differ
diff --git a/openmetadata-docs/images/v1.6/connectors/spark/glue-job-params.png b/openmetadata-docs/images/v1.6/connectors/spark/glue-job-params.png
new file mode 100644
index 000000000000..3cef8e39272a
Binary files /dev/null and b/openmetadata-docs/images/v1.6/connectors/spark/glue-job-params.png differ
diff --git a/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/add-test-case.png b/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/add-test-case.png
index 3ef4e0827141..0ab13a503208 100644
Binary files a/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/add-test-case.png and b/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/add-test-case.png differ
diff --git a/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/add-test-defintion.png b/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/add-test-defintion.png
index 2294788cef78..4d8049764df4 100644
Binary files a/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/add-test-defintion.png and b/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/add-test-defintion.png differ
diff --git a/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/profiler-tab-view.png b/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/profiler-tab-view.png
index 8f0a4c149069..29381cbac6aa 100644
Binary files a/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/profiler-tab-view.png and b/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/profiler-tab-view.png differ
diff --git a/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/table-results-entity.png b/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/table-results-entity.png
index 265fa6708d19..126e7ceb9685 100644
Binary files a/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/table-results-entity.png and b/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/table-results-entity.png differ
diff --git a/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/test-case-page.png b/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/test-case-page.png
index b1d31b87b8fa..85748bf2fbf3 100644
Binary files a/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/test-case-page.png and b/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/test-case-page.png differ
diff --git a/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/test-suite-page.png b/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/test-suite-page.png
index b002453a0f0d..7a252d3ba13d 100644
Binary files a/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/test-suite-page.png and b/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/test-suite-page.png differ
diff --git a/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/test-suite-results.png b/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/test-suite-results.png
index c3eff2340546..cbff22aa35b2 100644
Binary files a/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/test-suite-results.png and b/openmetadata-docs/images/v1.6/features/ingestion/workflows/data-quality/test-suite-results.png differ
diff --git a/openmetadata-docs/images/v1.6/features/ingestion/workflows/profiler/profiler-summary-column.png b/openmetadata-docs/images/v1.6/features/ingestion/workflows/profiler/profiler-summary-column.png
index e962225b0169..5f3d3aa87fdd 100644
Binary files a/openmetadata-docs/images/v1.6/features/ingestion/workflows/profiler/profiler-summary-column.png and b/openmetadata-docs/images/v1.6/features/ingestion/workflows/profiler/profiler-summary-column.png differ
diff --git a/openmetadata-docs/images/v1.6/features/ingestion/workflows/profiler/profiler-summary-table.png b/openmetadata-docs/images/v1.6/features/ingestion/workflows/profiler/profiler-summary-table.png
index 84a6595d99c5..9745f0d7d4d0 100644
Binary files a/openmetadata-docs/images/v1.6/features/ingestion/workflows/profiler/profiler-summary-table.png and b/openmetadata-docs/images/v1.6/features/ingestion/workflows/profiler/profiler-summary-table.png differ
diff --git a/openmetadata-docs/images/v1.6/how-to-guides/governance/automatic-detection.png b/openmetadata-docs/images/v1.6/how-to-guides/governance/automatic-detection.png
new file mode 100644
index 000000000000..c5b8077414f8
Binary files /dev/null and b/openmetadata-docs/images/v1.6/how-to-guides/governance/automatic-detection.png differ
diff --git a/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-1.png b/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-1.png
new file mode 100644
index 000000000000..f6cefd8c8d78
Binary files /dev/null and b/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-1.png differ
diff --git a/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-2.png b/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-2.png
new file mode 100644
index 000000000000..cef6b5428fbd
Binary files /dev/null and b/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-2.png differ
diff --git a/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-3.png b/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-3.png
new file mode 100644
index 000000000000..b9b195c7ca09
Binary files /dev/null and b/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-3.png differ
diff --git a/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-4.png b/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-4.png
new file mode 100644
index 000000000000..4c49ede5a0b7
Binary files /dev/null and b/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-4.png differ
diff --git a/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-5.png b/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-5.png
new file mode 100644
index 000000000000..7f18a8a25134
Binary files /dev/null and b/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-5.png differ
diff --git a/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-6.png b/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-6.png
new file mode 100644
index 000000000000..155a4f05fb74
Binary files /dev/null and b/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-6.png differ
diff --git a/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-7.png b/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-7.png
new file mode 100644
index 000000000000..23b11e4b2b7a
Binary files /dev/null and b/openmetadata-docs/images/v1.6/how-to-guides/governance/automation-7.png differ
diff --git a/openmetadata-docs/images/v1.6/how-to-guides/governance/bulk-ownership-and.png b/openmetadata-docs/images/v1.6/how-to-guides/governance/bulk-ownership-and.png
new file mode 100644
index 000000000000..b15cef41a7c4
Binary files /dev/null and b/openmetadata-docs/images/v1.6/how-to-guides/governance/bulk-ownership-and.png differ
diff --git a/openmetadata-docs/images/v1.6/how-to-guides/governance/bulk-tagging-glossary.png b/openmetadata-docs/images/v1.6/how-to-guides/governance/bulk-tagging-glossary.png
new file mode 100644
index 000000000000..8497a169eb76
Binary files /dev/null and b/openmetadata-docs/images/v1.6/how-to-guides/governance/bulk-tagging-glossary.png differ
diff --git a/openmetadata-docs/images/v1.6/how-to-guides/governance/metadata-propogation.png b/openmetadata-docs/images/v1.6/how-to-guides/governance/metadata-propogation.png
new file mode 100644
index 000000000000..92562cb3cfdd
Binary files /dev/null and b/openmetadata-docs/images/v1.6/how-to-guides/governance/metadata-propogation.png differ
diff --git a/openmetadata-docs/images/v1.6/how-to-guides/quality/dq1.png b/openmetadata-docs/images/v1.6/how-to-guides/quality/dq1.png
index 230b7d4f5734..cfb44ce83470 100644
Binary files a/openmetadata-docs/images/v1.6/how-to-guides/quality/dq1.png and b/openmetadata-docs/images/v1.6/how-to-guides/quality/dq1.png differ
diff --git a/openmetadata-docs/images/v1.6/how-to-guides/quality/quality1.png b/openmetadata-docs/images/v1.6/how-to-guides/quality/quality1.png
index a6516128cc3d..e8941dabdd91 100644
Binary files a/openmetadata-docs/images/v1.6/how-to-guides/quality/quality1.png and b/openmetadata-docs/images/v1.6/how-to-guides/quality/quality1.png differ
diff --git a/openmetadata-docs/images/v1.6/how-to-guides/quality/test2.png b/openmetadata-docs/images/v1.6/how-to-guides/quality/test2.png
index d8999b1b24c9..a065fa655c4d 100644
Binary files a/openmetadata-docs/images/v1.6/how-to-guides/quality/test2.png and b/openmetadata-docs/images/v1.6/how-to-guides/quality/test2.png differ
diff --git a/openmetadata-docs/images/v1.6/how-to-guides/quality/test4.png b/openmetadata-docs/images/v1.6/how-to-guides/quality/test4.png
index efd405875f00..ab86982c3f5f 100644
Binary files a/openmetadata-docs/images/v1.6/how-to-guides/quality/test4.png and b/openmetadata-docs/images/v1.6/how-to-guides/quality/test4.png differ
diff --git a/openmetadata-docs/images/v1.6/metapilot/fixing-sql-queries-1.png b/openmetadata-docs/images/v1.6/metapilot/fixing-sql-queries-1.png
new file mode 100644
index 000000000000..911a405173c4
Binary files /dev/null and b/openmetadata-docs/images/v1.6/metapilot/fixing-sql-queries-1.png differ
diff --git a/openmetadata-docs/images/v1.6/metapilot/fixing-sql-queries-3.png b/openmetadata-docs/images/v1.6/metapilot/fixing-sql-queries-3.png
new file mode 100644
index 000000000000..4dd4e3e6c20f
Binary files /dev/null and b/openmetadata-docs/images/v1.6/metapilot/fixing-sql-queries-3.png differ
diff --git a/openmetadata-docs/images/v1.6/metapilot/fixing-sql-queries-4.png b/openmetadata-docs/images/v1.6/metapilot/fixing-sql-queries-4.png
new file mode 100644
index 000000000000..38ec4e25fbb9
Binary files /dev/null and b/openmetadata-docs/images/v1.6/metapilot/fixing-sql-queries-4.png differ
diff --git a/openmetadata-docs/images/v1.6/metapilot/reviewing-generated-metadata.png b/openmetadata-docs/images/v1.6/metapilot/reviewing-generated-metadata.png
new file mode 100644
index 000000000000..fcf8da1451e3
Binary files /dev/null and b/openmetadata-docs/images/v1.6/metapilot/reviewing-generated-metadata.png differ
diff --git a/openmetadata-docs/images/v1.6/metapilot/setting-up-metapilot-1.png b/openmetadata-docs/images/v1.6/metapilot/setting-up-metapilot-1.png
new file mode 100644
index 000000000000..08d80960e158
Binary files /dev/null and b/openmetadata-docs/images/v1.6/metapilot/setting-up-metapilot-1.png differ
diff --git a/openmetadata-docs/images/v1.6/metapilot/setting-up-metapilot-2.png b/openmetadata-docs/images/v1.6/metapilot/setting-up-metapilot-2.png
new file mode 100644
index 000000000000..2ca38a1626f9
Binary files /dev/null and b/openmetadata-docs/images/v1.6/metapilot/setting-up-metapilot-2.png differ
diff --git a/openmetadata-docs/images/v1.6/metapilot/setting-up-metapilot-3.png b/openmetadata-docs/images/v1.6/metapilot/setting-up-metapilot-3.png
new file mode 100644
index 000000000000..44c07bd7ce68
Binary files /dev/null and b/openmetadata-docs/images/v1.6/metapilot/setting-up-metapilot-3.png differ
diff --git a/openmetadata-docs/images/v1.6/metapilot/setting-up-metapilot-4.png b/openmetadata-docs/images/v1.6/metapilot/setting-up-metapilot-4.png
new file mode 100644
index 000000000000..1f87fa2f48bb
Binary files /dev/null and b/openmetadata-docs/images/v1.6/metapilot/setting-up-metapilot-4.png differ
diff --git a/openmetadata-docs/images/v1.6/metapilot/using-metapilot-chatbot-1.png b/openmetadata-docs/images/v1.6/metapilot/using-metapilot-chatbot-1.png
new file mode 100644
index 000000000000..94d9a064af10
Binary files /dev/null and b/openmetadata-docs/images/v1.6/metapilot/using-metapilot-chatbot-1.png differ
diff --git a/openmetadata-docs/images/v1.6/metapilot/using-metapilot-chatbot-2.png b/openmetadata-docs/images/v1.6/metapilot/using-metapilot-chatbot-2.png
new file mode 100644
index 000000000000..b2114ad13347
Binary files /dev/null and b/openmetadata-docs/images/v1.6/metapilot/using-metapilot-chatbot-2.png differ
diff --git a/openmetadata-service/pom.xml b/openmetadata-service/pom.xml
index 3a907a8ae6ea..7aea1ae9a4d8 100644
--- a/openmetadata-service/pom.xml
+++ b/openmetadata-service/pom.xml
@@ -294,6 +294,10 @@
io.dropwizard.modules
dropwizard-web
+
+ com.github.erosb
+ everit-json-schema
+
@@ -710,7 +714,7 @@
org.testcontainers.containers.PostgreSQLContainer
postgres:15
- docker.elastic.co/elasticsearch/elasticsearch:8.10.2
+ docker.elastic.co/elasticsearch/elasticsearch:8.11.4
opensearchproject/opensearch:2.7.0
false
@@ -750,7 +754,7 @@
org.testcontainers.containers.MySQLContainer
mysql:8.3.0
- docker.elastic.co/elasticsearch/elasticsearch:8.10.2
+ docker.elastic.co/elasticsearch/elasticsearch:8.11.4
opensearchproject/opensearch:2.7.0
false
diff --git a/openmetadata-service/src/main/java/org/openmetadata/csv/CsvUtil.java b/openmetadata-service/src/main/java/org/openmetadata/csv/CsvUtil.java
index 1d6ed073a1a9..d09e4b7ccc72 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/csv/CsvUtil.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/csv/CsvUtil.java
@@ -112,7 +112,7 @@ public static List fieldToInternalArray(String field) {
}
/**
- * Parses a field containing key-value pairs separated by semicolons, correctly handling quotes.
+ * Parses a field containing key-value pairs separated by FIELD_SEPARATOR, correctly handling quotes.
* Each key-value pair may also be enclosed in quotes, especially if it contains delimiter like (SEPARATOR , FIELD_SEPARATOR).
* Input Example:
* "key1:value1;key2:value2;\"key3:value;with;semicolon\""
@@ -124,7 +124,8 @@ public static List fieldToExtensionStrings(String field) throws IOExcept
return List.of();
}
- // Replace semicolons within quoted strings with a placeholder
+ // Case when semicolon is part of the fieldValue - Replace semicolons within quoted strings with
+ // a placeholder
String preprocessedField =
Pattern.compile("\"([^\"]*)\"") // Matches content inside double quotes
.matcher(field)
@@ -146,9 +147,7 @@ public static List fieldToExtensionStrings(String field) throws IOExcept
.flatMap(CSVRecord::stream)
.map(
value ->
- value
- .replace("__SEMICOLON__", ";")
- .replace("\\n", "\n")) // Restore original semicolons and newlines
+ value.replace("__SEMICOLON__", ";")) // Restore original semicolons and newlines
.map(
value ->
value.startsWith("\"") && value.endsWith("\"") // Remove outer quotes if present
@@ -158,6 +157,48 @@ public static List fieldToExtensionStrings(String field) throws IOExcept
}
}
+ /**
+ * Parses a field containing column values separated by SEPARATOR, correctly handling quotes.
+ * Each value enclosed in quotes, especially if it contains delimiter like SEPARATOR.
+ * Input Example:
+ * "value1,value2,\"value,with,comma\""
+ * Output: [value1, value2, value,with,comma]
+ *
+ */
+ public static List fieldToColumns(String field) throws IOException {
+ if (field == null || field.isBlank()) {
+ return Collections.emptyList();
+ }
+
+ // Case when comma is part of the columnValue - Replace commas within quoted strings with a
+ // placeholder
+ String preprocessedField =
+ Pattern.compile("\"([^\"]*)\"")
+ .matcher(field)
+ .replaceAll(mr -> "\"" + mr.group(1).replace(",", "__COMMA__") + "\"");
+
+ preprocessedField = preprocessedField.replace("\n", "\\n").replace("\"", "\\\"");
+
+ CSVFormat format = CSVFormat.DEFAULT.withDelimiter(',').withQuote('"').withEscape('\\');
+
+ List columns;
+ try (CSVParser parser = CSVParser.parse(new StringReader(preprocessedField), format)) {
+ columns =
+ parser.getRecords().stream()
+ .flatMap(CSVRecord::stream)
+ .map(value -> value.replace("__COMMA__", ","))
+ .map(
+ value ->
+ value.startsWith("\"")
+ && value.endsWith("\"") // Remove outer quotes if present
+ ? value.substring(1, value.length() - 1)
+ : value)
+ .collect(Collectors.toList());
+ }
+
+ return columns;
+ }
+
public static String quote(String field) {
return String.format("\"%s\"", field);
}
@@ -270,6 +311,13 @@ private static String quoteCsvField(String str) {
return str;
}
+ private static String quoteCsvFieldForSeparator(String str) {
+ if (str.contains(SEPARATOR)) {
+ return quote(str);
+ }
+ return str;
+ }
+
public static List addExtension(List csvRecord, Object extension) {
if (extension == null) {
csvRecord.add(null);
@@ -310,6 +358,8 @@ private static String formatMapValue(Map valueMap) {
return formatEntityReference(valueMap);
} else if (isTimeInterval(valueMap)) {
return formatTimeInterval(valueMap);
+ } else if (isTableType(valueMap)) {
+ return formatTableRows(valueMap);
}
return valueMap.toString();
@@ -320,11 +370,7 @@ private static String formatListValue(List> list) {
return "";
}
- if (list.get(0) instanceof Map && isEnumWithDescriptions((Map) list.get(0))) {
- return list.stream()
- .map(item -> ((Map) item).get("key").toString())
- .collect(Collectors.joining(INTERNAL_ARRAY_SEPARATOR));
- } else if (list.get(0) instanceof Map) {
+ if (list.get(0) instanceof Map) {
return list.stream()
.map(item -> formatMapValue((Map) item))
.collect(Collectors.joining(INTERNAL_ARRAY_SEPARATOR));
@@ -343,8 +389,8 @@ private static boolean isTimeInterval(Map valueMap) {
return valueMap.containsKey("start") && valueMap.containsKey("end");
}
- private static boolean isEnumWithDescriptions(Map valueMap) {
- return valueMap.containsKey("key") && valueMap.containsKey("description");
+ private static boolean isTableType(Map valueMap) {
+ return valueMap.containsKey("rows") && valueMap.containsKey("columns");
}
private static String formatEntityReference(Map valueMap) {
@@ -354,4 +400,19 @@ private static String formatEntityReference(Map valueMap) {
private static String formatTimeInterval(Map valueMap) {
return valueMap.get("start") + ENTITY_TYPE_SEPARATOR + valueMap.get("end");
}
+
+ private static String formatTableRows(Map valueMap) {
+ List columns = (List) valueMap.get("columns");
+ List