Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-45137: Drop support for direct Butler #159

Merged
merged 1 commit into from
Jul 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changelog.d/20240705_171637_rra_DM_45137.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
### Backwards-incompatible changes

- Drop support for direct Butler and require client/server Butler.
31 changes: 2 additions & 29 deletions src/datalinker/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from __future__ import annotations

from enum import Enum
from pathlib import Path
from typing import Annotated

Expand All @@ -12,18 +11,10 @@

__all__ = [
"Config",
"StorageBackend",
"config",
]


class StorageBackend(Enum):
"""Possible choices for a storage backend."""

GCS = "GCS"
S3 = "S3"


class Config(BaseSettings):
"""Configuration for datalinker."""

Expand Down Expand Up @@ -57,19 +48,6 @@ class Config(BaseSettings):
),
]

storage_backend: Annotated[
StorageBackend,
Field(
title="Storage backend",
description="Which storage backend to use for uploaded files",
),
] = StorageBackend.GCS

s3_endpoint_url: Annotated[
HttpUrl,
Field(title="Storage API URL", validation_alias="S3_ENDPOINT_URL"),
] = HttpUrl("https://storage.googleapis.com")

# TODO(DM-42660): butler_repositories can be removed once there is a
# release of daf_butler available that handles DAF_BUTLER_REPOSITORIES
# itself.
Expand All @@ -87,10 +65,7 @@ class Config(BaseSettings):
),
] = None

name: Annotated[
str,
Field(title="Application name"),
] = "datalinker"
name: Annotated[str, Field(title="Application name")] = "datalinker"

path_prefix: Annotated[
str,
Expand All @@ -113,9 +88,7 @@ class Config(BaseSettings):

profile: Annotated[
Profile,
Field(
title="Application logging profile",
),
Field(title="Application logging profile"),
] = Profile.production

log_level: Annotated[
Expand Down
89 changes: 13 additions & 76 deletions src/datalinker/handlers/external.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,23 @@
"""Handlers for the app's external root, ``/datalinker/``."""

from datetime import timedelta
from email.message import Message
from importlib.metadata import metadata
from pathlib import Path
from typing import Annotated, Literal, cast
from urllib.parse import urlencode, urlparse
from uuid import UUID

from boto3 import client
from fastapi import APIRouter, Depends, HTTPException, Query, Request, Response
from fastapi.responses import RedirectResponse
from fastapi.templating import Jinja2Templates
from google.cloud import storage
from lsst.daf.butler import LabeledButlerFactory
from safir.dependencies.gafaelfawr import auth_delegated_token_dependency
from safir.dependencies.logger import logger_dependency
from safir.metadata import Metadata, get_project_url
from safir.slack.webhook import SlackRouteErrorHandler
from structlog.stdlib import BoundLogger

from ..config import StorageBackend, config
from ..config import config
from ..constants import (
ADQL_COMPOUND_TABLE_REGEX,
ADQL_FOREIGN_COLUMN_REGEX,
Expand Down Expand Up @@ -256,87 +253,27 @@ def links(
)
image_uri = butler.getURI(ref)
logger.debug("Got image URI from Butler", image_uri=image_uri)

expires_in = timedelta(hours=1)

if image_uri.scheme in ("https", "http"):
# Butler server returns signed URLs directly, so no additional signing
# is required.
image_url = str(image_uri)
elif config.storage_backend == StorageBackend.GCS:
# If we are using a direct connection to the Butler database, the URIs
# will be S3 or GCS URIs that need to be signed.
image_url = _upload_to_gcs(str(image_uri), expires_in)
elif config.storage_backend == StorageBackend.S3:
image_url = _upload_to_s3(str(image_uri), expires_in)
if image_uri.scheme not in ("https", "http"):
logger.error("Image URL from Butler not signed", image_uri=image_uri)
raise HTTPException(
status_code=500,
detail=[
{
"msg": "Image URL from Butler server was not signed",
"type": "invalid_butler_response",
}
],
)

return _TEMPLATES.TemplateResponse(
request,
"links.xml",
{
"cutout": ref.datasetType.name != "raw",
"id": id,
"image_url": image_url,
"image_url": str(image_uri),
"image_size": image_uri.size(),
"cutout_sync_url": str(config.cutout_sync_url),
},
media_type="application/x-votable+xml",
)


def _upload_to_gcs(image_uri: str, expiry: timedelta) -> str:
"""Use GCS to upload a file and get a signed URL.

Parameters
----------
image_uri
The URI of the file
expiry
Time that the URL will be valid

Returns
-------
str
The signed URL
"""
image_uri_parts = urlparse(image_uri)
storage_client = storage.Client()
bucket = storage_client.bucket(image_uri_parts.netloc)
blob = bucket.blob(image_uri_parts.path[1:])
return blob.generate_signed_url(
version="v4",
# This URL is valid for one hour.
expiration=expiry,
# Allow only GET requests using this URL.
method="GET",
)


def _upload_to_s3(image_uri: str, expiry: timedelta) -> str:
"""Use S3 to upload a file and get a signed URL.

Parameters
----------
image_uri
The URI of the file
expiry
Time that the URL will be valid

Returns
-------
str
The signed URL
"""
image_uri_parts = urlparse(image_uri)
bucket = image_uri_parts.netloc
key = image_uri_parts.path[1:]

s3_client = client(
"s3", endpoint_url=str(config.s3_endpoint_url), region_name="us-east-1"
)

return s3_client.generate_presigned_url(
"get_object",
Params={"Bucket": bucket, "Key": key},
ExpiresIn=expiry.total_seconds(),
)
42 changes: 1 addition & 41 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,16 @@
from __future__ import annotations

from collections.abc import AsyncIterator, Iterator
from datetime import timedelta
from pathlib import Path

import boto3
import pytest
import pytest_asyncio
from asgi_lifespan import LifespanManager
from fastapi import FastAPI
from httpx import ASGITransport, AsyncClient
from moto import mock_aws
from pydantic import HttpUrl
from safir.testing.gcs import MockStorageClient, patch_google_storage

from datalinker import main
from datalinker.config import StorageBackend, config
from datalinker.config import config

from .support.butler import MockButler, patch_butler

Expand Down Expand Up @@ -55,38 +50,3 @@ async def client(app: FastAPI) -> AsyncIterator[AsyncClient]:
def mock_butler() -> Iterator[MockButler]:
"""Mock Butler for testing."""
yield from patch_butler()


@pytest.fixture
def s3(monkeypatch: pytest.MonkeyPatch) -> Iterator[boto3.client]:
"""Mock out S3 for testing."""
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "testing")
monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1")
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing")
monkeypatch.setenv("AWS_SECURITY_TOKEN", "testing")
monkeypatch.setenv("AWS_SESSION_TOKEN", "testing")
monkeypatch.setattr(config, "storage_backend", StorageBackend.S3)
monkeypatch.setattr(
config, "s3_endpoint_url", HttpUrl("https://s3.amazonaws.com/bucket")
)
with mock_aws():
yield boto3.client(
"s3",
endpoint_url=str(config.s3_endpoint_url),
region_name="us-east-1",
)


@pytest.fixture
def mock_google_storage(
monkeypatch: pytest.MonkeyPatch,
) -> Iterator[MockStorageClient]:
"""Mock out the Google Cloud Storage API."""
monkeypatch.setattr(config, "storage_backend", StorageBackend.GCS)
monkeypatch.setattr(
config, "s3_endpoint_url", HttpUrl("https://storage.googleapis.com")
)
yield from patch_google_storage(
expected_expiration=timedelta(hours=1),
bucket_name="some-bucket",
)
Loading