Skip to content

Commit

Permalink
Refactor endpoint handling and update GCS bucket configuration
Browse files Browse the repository at this point in the history
Signed-off-by: Marcel Coetzee <[email protected]>
  • Loading branch information
Pipboyguy committed May 28, 2024
1 parent 7a43618 commit abd87f8
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 6 deletions.
19 changes: 15 additions & 4 deletions dlt/destinations/impl/clickhouse/clickhouse.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import re
from copy import deepcopy
from textwrap import dedent
from typing import ClassVar, Optional, Dict, List, Sequence, cast, Tuple
from urllib.parse import urlparse

Expand Down Expand Up @@ -201,13 +202,23 @@ def __init__(
compression = "none" if config.get("data_writer.disable_compression") else "gz"

if bucket_scheme in ("s3", "gs", "gcs"):
# get auth and bucket url
bucket_http_url = convert_storage_to_http_scheme(bucket_url)
access_key_id: str = None
secret_access_key: str = None
if isinstance(staging_credentials, AwsCredentialsWithoutDefaults):
bucket_http_url = convert_storage_to_http_scheme(
bucket_url, endpoint=staging_credentials.endpoint_url
)
access_key_id = staging_credentials.aws_access_key_id
secret_access_key = staging_credentials.aws_secret_access_key
else:
raise LoadJobTerminalException(
file_path,
dedent(
"""
Google Cloud Storage buckets must be configured using the S3 compatible access pattern.
Please provide the necessary S3 credentials (access key ID and secret access key), to access the GCS bucket through the S3 API.
Refer to https://dlthub.com/docs/dlt-ecosystem/destinations/filesystem#using-s3-compatible-storage.
""",
).strip(),
)

auth = "NOSIGN"
if access_key_id and secret_access_key:
Expand Down
3 changes: 1 addition & 2 deletions dlt/destinations/impl/clickhouse/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,10 @@ def convert_storage_to_http_scheme(
protocol = "https" if use_https else "http"

if endpoint:
domain = endpoint
domain = endpoint.replace("https://", "").replace("http://", "")
elif region and parsed_url.scheme == "s3":
domain = f"s3-{region}.amazonaws.com"
else:
# TODO: Incorporate dlt.config endpoint.
storage_domains = {
"s3": "s3.amazonaws.com",
"gs": "storage.googleapis.com",
Expand Down

0 comments on commit abd87f8

Please sign in to comment.