From 614a83f41c72c9a5198c0b258bfbe57b34dced4f Mon Sep 17 00:00:00 2001
From: Russ Allbery <rra@lsst.org>
Date: Tue, 28 Jun 2022 11:21:00 -0700
Subject: [PATCH 1/4] Use per-thread Google storage clients

We're seeing problems where crawlspace locks up and stops answering
any requests after a small amount of traffic.  The backtrace is a
timeout in the handler.  This may be due to reusing Google Cloud
Storage clients across threads, resulting in cross-thread confusion
about responses to requests.  Try using per-thread instances via
context variables to see if that helps.
---
 src/crawlspace/dependencies/gcs.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/crawlspace/dependencies/gcs.py b/src/crawlspace/dependencies/gcs.py
index 721c43c..b873ba4 100644
--- a/src/crawlspace/dependencies/gcs.py
+++ b/src/crawlspace/dependencies/gcs.py
@@ -2,12 +2,14 @@
 
 from __future__ import annotations
 
-from typing import Optional
+from contextvars import ContextVar
 
 from google.cloud import storage
 
 from ..config import config
 
+_GCS_CLIENT: ContextVar[storage.Client] = ContextVar("_GCS_CLIENT")
+
 __all__ = [
     "GCSClientDependency",
     "gcs_client_dependency",
@@ -17,14 +19,13 @@
 class GCSClientDependency:
     """Provides a `google.cloud.storage.Client` as a dependency."""
 
-    def __init__(self) -> None:
-        self.gcs: Optional[storage.Client] = None
-
     async def __call__(self) -> storage.client.Client:
         """Return the cached `google.cloud.storage.Client`."""
-        if not self.gcs:
-            self.gcs = storage.Client(project=config.gcs_project)
-        return self.gcs
+        client = _GCS_CLIENT.get(None)
+        if not client:
+            client = storage.Client(project=config.gcs_project)
+            _GCS_CLIENT.set(client)
+        return client
 
 
 gcs_client_dependency = GCSClientDependency()

From 5be198d99aa996257c07cc7f7d3bf31d085cb981 Mon Sep 17 00:00:00 2001
From: Russ Allbery <rra@lsst.org>
Date: Tue, 28 Jun 2022 11:25:10 -0700
Subject: [PATCH 2/4] Add debug logging for requests

Add debug logging to make it easier to debug problems in the future.
---
 src/crawlspace/handlers/external.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/crawlspace/handlers/external.py b/src/crawlspace/handlers/external.py
index 6db9776..0785efb 100644
--- a/src/crawlspace/handlers/external.py
+++ b/src/crawlspace/handlers/external.py
@@ -40,6 +40,7 @@ def get_file(
     etags: List[str] = Depends(etag_validation_dependency),
     logger: BoundLogger = Depends(logger_dependency),
 ) -> Response:
+    logger.debug("File request", path=path)
     if path == "":
         path = "index.html"
 
@@ -47,6 +48,7 @@ def get_file(
     try:
         crawlspace_file = file_service.get_file(path)
     except GCSFileNotFoundError:
+        logger.debug("File not found", path=path)
         raise HTTPException(status_code=404, detail="File not found")
     except Exception as e:
         logger.exception(f"Failed to retrieve {path}", error=str(e))
@@ -55,6 +57,7 @@ def get_file(
         )
 
     if crawlspace_file.blob.etag in etags:
+        logger.debug("File unchanged", path=path)
         return Response(
             status_code=304,
             content="",
@@ -62,6 +65,7 @@ def get_file(
             media_type=crawlspace_file.media_type,
         )
     else:
+        logger.debug("Returning file", path=path)
         return StreamingResponse(
             crawlspace_file.stream(),
             media_type=crawlspace_file.media_type,
@@ -82,6 +86,7 @@ def head_file(
     gcs: storage.Client = Depends(gcs_client_dependency),
     logger: BoundLogger = Depends(logger_dependency),
 ) -> Response:
+    logger.debug("Head request", path=path)
     if path == "":
         path = "index.html"
 
@@ -89,6 +94,7 @@ def head_file(
     try:
         crawlspace_file = file_service.get_file(path)
     except GCSFileNotFoundError:
+        logger.debug("File not found for head request", path=path)
         raise HTTPException(status_code=404, detail="File not found")
     except Exception as e:
         logger.exception(f"Failed to retrieve {path}", error=str(e))
@@ -96,6 +102,7 @@ def head_file(
             status_code=500, detail="Failed to retrieve file from GCS"
         )
 
+    logger.debug("Returning file metadata", path=path)
     return Response(
         status_code=200,
         content="",

From 2ad80a5cecaba19b96bef0d6b2299ac102120282 Mon Sep 17 00:00:00 2001
From: Russ Allbery <rra@lsst.org>
Date: Tue, 28 Jun 2022 11:36:13 -0700
Subject: [PATCH 3/4] Recreate GCS client on each request

See if the problem is still some sort of contention between clients
or problems with expiring credentials.
---
 src/crawlspace/dependencies/gcs.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/crawlspace/dependencies/gcs.py b/src/crawlspace/dependencies/gcs.py
index b873ba4..8c18ae6 100644
--- a/src/crawlspace/dependencies/gcs.py
+++ b/src/crawlspace/dependencies/gcs.py
@@ -21,11 +21,7 @@ class GCSClientDependency:
 
     async def __call__(self) -> storage.client.Client:
         """Return the cached `google.cloud.storage.Client`."""
-        client = _GCS_CLIENT.get(None)
-        if not client:
-            client = storage.Client(project=config.gcs_project)
-            _GCS_CLIENT.set(client)
-        return client
+        return storage.Client(project=config.gcs_project)
 
 
 gcs_client_dependency = GCSClientDependency()

From 911817197e87de55fc461e3fac1200df6a40e6e4 Mon Sep 17 00:00:00 2001
From: Russ Allbery <rra@lsst.org>
Date: Tue, 28 Jun 2022 11:48:19 -0700
Subject: [PATCH 4/4] Don't stream files from GCS

Download the full file and return it directly, in case streaming
is causing the deadlock issues.
---
 src/crawlspace/handlers/external.py | 8 +++++---
 src/crawlspace/services/file.py     | 4 ++++
 tests/support/gcs.py                | 3 +++
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/crawlspace/handlers/external.py b/src/crawlspace/handlers/external.py
index 0785efb..6222db7 100644
--- a/src/crawlspace/handlers/external.py
+++ b/src/crawlspace/handlers/external.py
@@ -3,7 +3,7 @@
 from typing import List
 
 from fastapi import APIRouter, Depends, HTTPException, Path, Request, Response
-from fastapi.responses import RedirectResponse, StreamingResponse
+from fastapi.responses import RedirectResponse
 from google.cloud import storage
 from safir.dependencies.logger import logger_dependency
 from structlog.stdlib import BoundLogger
@@ -66,8 +66,10 @@ def get_file(
         )
     else:
         logger.debug("Returning file", path=path)
-        return StreamingResponse(
-            crawlspace_file.stream(),
+        data = crawlspace_file.download_as_bytes()
+        return Response(
+            status_code=200,
+            content=data,
             media_type=crawlspace_file.media_type,
             headers=crawlspace_file.headers,
         )
diff --git a/src/crawlspace/services/file.py b/src/crawlspace/services/file.py
index a1edfc5..ba29a4b 100644
--- a/src/crawlspace/services/file.py
+++ b/src/crawlspace/services/file.py
@@ -52,6 +52,10 @@ def from_blob(cls, path: str, blob: storage.Blob) -> CrawlspaceFile:
             media_type = guessed_type if guessed_type else "text/plain"
         return cls(blob=blob, headers=headers, media_type=media_type)
 
+    def download_as_bytes(self) -> bytes:
+        """Download the content from GCS."""
+        return self.blob.download_as_bytes()
+
     def stream(self) -> Iterator[bytes]:
         """Stream the content from GCS."""
         with self.blob.open("rb") as content:
diff --git a/tests/support/gcs.py b/tests/support/gcs.py
index b20533c..0d4a297 100644
--- a/tests/support/gcs.py
+++ b/tests/support/gcs.py
@@ -30,6 +30,9 @@ def __init__(self, name: str) -> None:
             self.updated = datetime.fromtimestamp(mtime, tz=timezone.utc)
             self.etag = str(self._path.stat().st_ino)
 
+    def download_as_bytes(self) -> bytes:
+        return self._path.read_bytes()
+
     def exists(self) -> bool:
         return self._exists