From c326b504c52c8a770125cd6b71e890c5a9703087 Mon Sep 17 00:00:00 2001 From: Russ Allbery Date: Fri, 5 Jul 2024 17:17:19 -0700 Subject: [PATCH] Drop support for direct Butler Require client/server Butler and remove all of the code for doing our own file upload and URL signing. This massively simplifies the test suite and removes a flaky test with a timing bug. Also drop the configuration settings used only by direct Butler. --- changelog.d/20240705_171637_rra_DM_45137.md | 3 + src/datalinker/config.py | 31 +---- src/datalinker/handlers/external.py | 89 +++------------ tests/conftest.py | 42 +------ tests/handlers/external_test.py | 118 ++++---------------- 5 files changed, 39 insertions(+), 244 deletions(-) create mode 100644 changelog.d/20240705_171637_rra_DM_45137.md diff --git a/changelog.d/20240705_171637_rra_DM_45137.md b/changelog.d/20240705_171637_rra_DM_45137.md new file mode 100644 index 0000000..5eb7b36 --- /dev/null +++ b/changelog.d/20240705_171637_rra_DM_45137.md @@ -0,0 +1,3 @@ +### Backwards-incompatible changes + +- Drop support for direct Butler and require client/server Butler. diff --git a/src/datalinker/config.py b/src/datalinker/config.py index 686ce75..8b7a0c5 100644 --- a/src/datalinker/config.py +++ b/src/datalinker/config.py @@ -2,7 +2,6 @@ from __future__ import annotations -from enum import Enum from pathlib import Path from typing import Annotated @@ -12,18 +11,10 @@ __all__ = [ "Config", - "StorageBackend", "config", ] -class StorageBackend(Enum): - """Possible choices for a storage backend.""" - - GCS = "GCS" - S3 = "S3" - - class Config(BaseSettings): """Configuration for datalinker.""" @@ -57,19 +48,6 @@ class Config(BaseSettings): ), ] - storage_backend: Annotated[ - StorageBackend, - Field( - title="Storage backend", - description="Which storage backend to use for uploaded files", - ), - ] = StorageBackend.GCS - - s3_endpoint_url: Annotated[ - HttpUrl, - Field(title="Storage API URL", validation_alias="S3_ENDPOINT_URL"), - ] = HttpUrl("https://storage.googleapis.com") - # TODO(DM-42660): butler_repositories can be removed once there is a # release of daf_butler available that handles DAF_BUTLER_REPOSITORIES # itself. @@ -87,10 +65,7 @@ class Config(BaseSettings): ), ] = None - name: Annotated[ - str, - Field(title="Application name"), - ] = "datalinker" + name: Annotated[str, Field(title="Application name")] = "datalinker" path_prefix: Annotated[ str, @@ -113,9 +88,7 @@ class Config(BaseSettings): profile: Annotated[ Profile, - Field( - title="Application logging profile", - ), + Field(title="Application logging profile"), ] = Profile.production log_level: Annotated[ diff --git a/src/datalinker/handlers/external.py b/src/datalinker/handlers/external.py index abc1092..7bdc615 100644 --- a/src/datalinker/handlers/external.py +++ b/src/datalinker/handlers/external.py @@ -1,6 +1,5 @@ """Handlers for the app's external root, ``/datalinker/``.""" -from datetime import timedelta from email.message import Message from importlib.metadata import metadata from pathlib import Path @@ -8,11 +7,9 @@ from urllib.parse import urlencode, urlparse from uuid import UUID -from boto3 import client from fastapi import APIRouter, Depends, HTTPException, Query, Request, Response from fastapi.responses import RedirectResponse from fastapi.templating import Jinja2Templates -from google.cloud import storage from lsst.daf.butler import LabeledButlerFactory from safir.dependencies.gafaelfawr import auth_delegated_token_dependency from safir.dependencies.logger import logger_dependency @@ -20,7 +17,7 @@ from safir.slack.webhook import SlackRouteErrorHandler from structlog.stdlib import BoundLogger -from ..config import StorageBackend, config +from ..config import config from ..constants import ( ADQL_COMPOUND_TABLE_REGEX, ADQL_FOREIGN_COLUMN_REGEX, @@ -256,19 +253,17 @@ def links( ) image_uri = butler.getURI(ref) logger.debug("Got image URI from Butler", image_uri=image_uri) - - expires_in = timedelta(hours=1) - - if image_uri.scheme in ("https", "http"): - # Butler server returns signed URLs directly, so no additional signing - # is required. - image_url = str(image_uri) - elif config.storage_backend == StorageBackend.GCS: - # If we are using a direct connection to the Butler database, the URIs - # will be S3 or GCS URIs that need to be signed. - image_url = _upload_to_gcs(str(image_uri), expires_in) - elif config.storage_backend == StorageBackend.S3: - image_url = _upload_to_s3(str(image_uri), expires_in) + if image_uri.scheme not in ("https", "http"): + logger.error("Image URL from Butler not signed", image_uri=image_uri) + raise HTTPException( + status_code=500, + detail=[ + { + "msg": "Image URL from Butler server was not signed", + "type": "invalid_butler_response", + } + ], + ) return _TEMPLATES.TemplateResponse( request, @@ -276,67 +271,9 @@ def links( { "cutout": ref.datasetType.name != "raw", "id": id, - "image_url": image_url, + "image_url": str(image_uri), "image_size": image_uri.size(), "cutout_sync_url": str(config.cutout_sync_url), }, media_type="application/x-votable+xml", ) - - -def _upload_to_gcs(image_uri: str, expiry: timedelta) -> str: - """Use GCS to upload a file and get a signed URL. - - Parameters - ---------- - image_uri - The URI of the file - expiry - Time that the URL will be valid - - Returns - ------- - str - The signed URL - """ - image_uri_parts = urlparse(image_uri) - storage_client = storage.Client() - bucket = storage_client.bucket(image_uri_parts.netloc) - blob = bucket.blob(image_uri_parts.path[1:]) - return blob.generate_signed_url( - version="v4", - # This URL is valid for one hour. - expiration=expiry, - # Allow only GET requests using this URL. - method="GET", - ) - - -def _upload_to_s3(image_uri: str, expiry: timedelta) -> str: - """Use S3 to upload a file and get a signed URL. - - Parameters - ---------- - image_uri - The URI of the file - expiry - Time that the URL will be valid - - Returns - ------- - str - The signed URL - """ - image_uri_parts = urlparse(image_uri) - bucket = image_uri_parts.netloc - key = image_uri_parts.path[1:] - - s3_client = client( - "s3", endpoint_url=str(config.s3_endpoint_url), region_name="us-east-1" - ) - - return s3_client.generate_presigned_url( - "get_object", - Params={"Bucket": bucket, "Key": key}, - ExpiresIn=expiry.total_seconds(), - ) diff --git a/tests/conftest.py b/tests/conftest.py index 5a660fb..79f14aa 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,21 +3,16 @@ from __future__ import annotations from collections.abc import AsyncIterator, Iterator -from datetime import timedelta from pathlib import Path -import boto3 import pytest import pytest_asyncio from asgi_lifespan import LifespanManager from fastapi import FastAPI from httpx import ASGITransport, AsyncClient -from moto import mock_aws -from pydantic import HttpUrl -from safir.testing.gcs import MockStorageClient, patch_google_storage from datalinker import main -from datalinker.config import StorageBackend, config +from datalinker.config import config from .support.butler import MockButler, patch_butler @@ -55,38 +50,3 @@ async def client(app: FastAPI) -> AsyncIterator[AsyncClient]: def mock_butler() -> Iterator[MockButler]: """Mock Butler for testing.""" yield from patch_butler() - - -@pytest.fixture -def s3(monkeypatch: pytest.MonkeyPatch) -> Iterator[boto3.client]: - """Mock out S3 for testing.""" - monkeypatch.setenv("AWS_ACCESS_KEY_ID", "testing") - monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1") - monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing") - monkeypatch.setenv("AWS_SECURITY_TOKEN", "testing") - monkeypatch.setenv("AWS_SESSION_TOKEN", "testing") - monkeypatch.setattr(config, "storage_backend", StorageBackend.S3) - monkeypatch.setattr( - config, "s3_endpoint_url", HttpUrl("https://s3.amazonaws.com/bucket") - ) - with mock_aws(): - yield boto3.client( - "s3", - endpoint_url=str(config.s3_endpoint_url), - region_name="us-east-1", - ) - - -@pytest.fixture -def mock_google_storage( - monkeypatch: pytest.MonkeyPatch, -) -> Iterator[MockStorageClient]: - """Mock out the Google Cloud Storage API.""" - monkeypatch.setattr(config, "storage_backend", StorageBackend.GCS) - monkeypatch.setattr( - config, "s3_endpoint_url", HttpUrl("https://storage.googleapis.com") - ) - yield from patch_google_storage( - expected_expiration=timedelta(hours=1), - bucket_name="some-bucket", - ) diff --git a/tests/handlers/external_test.py b/tests/handlers/external_test.py index 82f516c..27890b7 100644 --- a/tests/handlers/external_test.py +++ b/tests/handlers/external_test.py @@ -2,12 +2,10 @@ from __future__ import annotations -from datetime import timedelta from unittest.mock import patch from urllib.parse import parse_qs, urlparse from uuid import uuid4 -import boto3 import pytest from httpx import AsyncClient from jinja2 import Environment, PackageLoader, select_autoescape @@ -191,65 +189,28 @@ async def test_timeseries_detail(client: AsyncClient) -> None: @pytest.mark.asyncio -async def test_links_gcs( - client: AsyncClient, mock_butler: MockButler, mock_google_storage: None -) -> None: - label = "label-gcs" - url = f"https://example.com/{mock_butler.uuid!s}" - - await _test_links(client, mock_butler, label, url) - - -@pytest.mark.asyncio -async def test_links_s3( - client: AsyncClient, mock_butler: MockButler, s3: boto3.client -) -> None: - label = "label-s3" - - expires = timedelta(hours=1) - url = s3.generate_presigned_url( - "get_object", - Params={"Bucket": "some-bucket", "Key": str(mock_butler.uuid)}, - ExpiresIn=expires.total_seconds(), - ) - - await _test_links(client, mock_butler, label, url) - - -@pytest.mark.asyncio -async def test_links_https( - client: AsyncClient, mock_butler: MockButler, s3: boto3.client -) -> None: - label = "label-http" - - # The URL is already signed, so it should be passed through unchanged - url = ( +async def test_links(client: AsyncClient, mock_butler: MockButler) -> None: + mock_butler.mock_uri = ( f"https://presigned-url.example.com/{mock_butler.uuid!s}" "?X-Amz-Signature=abcdef" ) - mock_butler.mock_uri = url - await _test_links(client, mock_butler, label, url) - -async def _test_links( - client: AsyncClient, mock_butler: MockButler, label: str, url: str -) -> None: - # Note: use iD to test the IVOA requirement of - # case insensitive parameters. + # Use iD to test the IVOA requirement of case insensitive parameters. r = await client.get( "/api/datalink/links", - params={"iD": f"butler://{label}/{mock_butler.uuid!s}"}, + params={"iD": f"butler://label-http/{mock_butler.uuid!s}"}, ) assert r.status_code == 200 + # The URL is already signed, so it should be passed through unchanged env = Environment( loader=PackageLoader("datalinker"), autoescape=select_autoescape() ) template = env.get_template("links.xml") expected = template.render( cutout=True, - id=f"butler://{label}/{mock_butler.uuid!s}", - image_url=url, + id=f"butler://label-http/{mock_butler.uuid!s}", + image_url=mock_butler.mock_uri, image_size=1234, cutout_sync_url=config.cutout_sync_url, ) @@ -260,7 +221,7 @@ async def _test_links( r = await client.get( "/api/datalink/links", params={ - "id": f"butler://{label}/{mock_butler.uuid!s}", + "id": f"butler://label-http/{mock_butler.uuid!s}", "responseformat": response_format, }, ) @@ -269,53 +230,28 @@ async def _test_links( @pytest.mark.asyncio -async def test_links_raw_gcs( - client: AsyncClient, mock_butler: MockButler, mock_google_storage: None -) -> None: - await _test_links_raw( - client, - mock_butler, - "label-gcs-raw", - f"https://example.com/{mock_butler.uuid!s}", - ) - - -@pytest.mark.asyncio -async def test_links_raw_s3( - client: AsyncClient, mock_butler: MockButler, s3: boto3.client -) -> None: - label = "label-s3-raw" - - expires = timedelta(hours=1) - url = s3.generate_presigned_url( - "get_object", - Params={"Bucket": "some-bucket", "Key": str(mock_butler.uuid)}, - ExpiresIn=expires.total_seconds(), +async def test_links_raw(client: AsyncClient, mock_butler: MockButler) -> None: + mock_butler.is_raw = True + mock_butler.mock_uri = ( + f"https://presigned-url.example.com/{mock_butler.uuid!s}" + "?X-Amz-Signature=abcdef" ) - await _test_links_raw(client, mock_butler, label, url) - - -async def _test_links_raw( - client: AsyncClient, mock_butler: MockButler, label: str, url: str -) -> None: - mock_butler.is_raw = True - # Note: use iD to test the IVOA requirement of - # case insensitive parameters. r = await client.get( "/api/datalink/links", - params={"iD": f"butler://{label}/{mock_butler.uuid!s}"}, + params={"id": f"butler://label-raw/{mock_butler.uuid!s}"}, ) assert r.status_code == 200 + # The URL is already signed, so it should be passed through unchanged env = Environment( loader=PackageLoader("datalinker"), autoescape=select_autoescape() ) template = env.get_template("links.xml") expected = template.render( cutout=False, - id=f"butler://{label}/{mock_butler.uuid!s}", - image_url=url, + id=f"butler://label-raw/{mock_butler.uuid!s}", + image_url=mock_butler.mock_uri, image_size=1234, cutout_sync_url=config.cutout_sync_url, ) @@ -324,20 +260,7 @@ async def _test_links_raw( @pytest.mark.asyncio -async def test_links_errors_gcs( - client: AsyncClient, mock_butler: MockButler, mock_google_storage: None -) -> None: - await _test_links_errors(client, mock_butler) - - -@pytest.mark.asyncio -async def test_links_errors_s3( - client: AsyncClient, mock_butler: MockButler, s3: None -) -> None: - await _test_links_errors(client, mock_butler) - - -async def _test_links_errors( +async def test_links_errors( client: AsyncClient, mock_butler: MockButler ) -> None: uuid = uuid4() @@ -369,9 +292,8 @@ async def _test_links_errors( async def test_links_bad_repo(client: AsyncClient) -> None: uuid = uuid4() - # Rather than using the regular mock Butler, mock it out to raise - # KeyError from the constructor. This simulates an invalid - # label. + # Rather than using the regular mock Butler, mock it out to raise KeyError + # from the constructor. This simulates an invalid label. with patch.object(LabeledButlerFactory, "create_butler") as mock_butler: mock_butler.side_effect = KeyError r = await client.get(