From abd87f82d3df31b2346f0e4ca2f780ea69a97596 Mon Sep 17 00:00:00 2001 From: Marcel Coetzee Date: Tue, 28 May 2024 20:05:06 +0200 Subject: [PATCH] Refactor endpoint handling and update GCS bucket configuration Signed-off-by: Marcel Coetzee --- .../impl/clickhouse/clickhouse.py | 19 +++++++++++++++---- dlt/destinations/impl/clickhouse/utils.py | 3 +-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/dlt/destinations/impl/clickhouse/clickhouse.py b/dlt/destinations/impl/clickhouse/clickhouse.py index 193888bdb4..cf1f1bc857 100644 --- a/dlt/destinations/impl/clickhouse/clickhouse.py +++ b/dlt/destinations/impl/clickhouse/clickhouse.py @@ -1,6 +1,7 @@ import os import re from copy import deepcopy +from textwrap import dedent from typing import ClassVar, Optional, Dict, List, Sequence, cast, Tuple from urllib.parse import urlparse @@ -201,13 +202,23 @@ def __init__( compression = "none" if config.get("data_writer.disable_compression") else "gz" if bucket_scheme in ("s3", "gs", "gcs"): - # get auth and bucket url - bucket_http_url = convert_storage_to_http_scheme(bucket_url) - access_key_id: str = None - secret_access_key: str = None if isinstance(staging_credentials, AwsCredentialsWithoutDefaults): + bucket_http_url = convert_storage_to_http_scheme( + bucket_url, endpoint=staging_credentials.endpoint_url + ) access_key_id = staging_credentials.aws_access_key_id secret_access_key = staging_credentials.aws_secret_access_key + else: + raise LoadJobTerminalException( + file_path, + dedent( + """ + Google Cloud Storage buckets must be configured using the S3 compatible access pattern. + Please provide the necessary S3 credentials (access key ID and secret access key), to access the GCS bucket through the S3 API. + Refer to https://dlthub.com/docs/dlt-ecosystem/destinations/filesystem#using-s3-compatible-storage. + """, + ).strip(), + ) auth = "NOSIGN" if access_key_id and secret_access_key: diff --git a/dlt/destinations/impl/clickhouse/utils.py b/dlt/destinations/impl/clickhouse/utils.py index b0b06909f9..0e2fa3db00 100644 --- a/dlt/destinations/impl/clickhouse/utils.py +++ b/dlt/destinations/impl/clickhouse/utils.py @@ -25,11 +25,10 @@ def convert_storage_to_http_scheme( protocol = "https" if use_https else "http" if endpoint: - domain = endpoint + domain = endpoint.replace("https://", "").replace("http://", "") elif region and parsed_url.scheme == "s3": domain = f"s3-{region}.amazonaws.com" else: - # TODO: Incorporate dlt.config endpoint. storage_domains = { "s3": "s3.amazonaws.com", "gs": "storage.googleapis.com",