Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-43852: Get xsrf token from each redirect #77

Merged
merged 4 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,22 @@

<!-- scriv-insert-here -->

<a id='changelog-0.12.0'></a>

## 0.12.0 (2024-05-15)

### New features

- Create Gafaelfawr service tokens instead of user tokens for authenticated calls to JupyterHub and JupyterLab. Gafaelfawr is standardizing on the new service token type for all service-to-service authentication.

- Reduced the frequency of keep alive tasks for the Noteburst workers to once every 15 minutes, from once every 5 minutes. This is intended to clean up the logging output.

### Bug fixes

- Correctly extract cookies from the middle of the redirect chain caused by initial authentication to a Nublado lab. This fixes failures seen with labs containing JupyterHub 4.1.3.

<a id='changelog-0.11.0'></a>

## 0.11.0 (2024-04-24)

### Bug fixes
Expand Down
3 changes: 0 additions & 3 deletions changelog.d/20240501_080604_rra_DM_44136.md

This file was deleted.

64 changes: 53 additions & 11 deletions src/noteburst/jupyterclient/jupyterlab.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import httpx
import websockets
from httpx import Cookies, Timeout
from httpx import Cookies, Response, Timeout
from pydantic import BaseModel, Field
from structlog import BoundLogger
from websockets.client import WebSocketClientProtocol
Expand Down Expand Up @@ -463,6 +463,23 @@ def url_for_websocket(self, path: str) -> str:
http_url = self.url_for(path)
return urlparse(http_url)._replace(scheme="wss").geturl()

def _extract_xsrf(self, response: Response) -> str | None:
"""Extract the XSRF token from the cookies in a response.

Parameters
----------
response
Response from a Jupyter server.

Returns
-------
str or None
Extracted XSRF value or `None` if none was present.
"""
cookies = Cookies()
cookies.extract_cookies(response)
return cookies.get("_xsrf")

async def log_into_hub(self) -> None:
"""Log into JupyterHub or raise a JupyterError."""
self.logger.debug("Logging into JupyterHub")
Expand All @@ -472,25 +489,42 @@ async def log_into_hub(self) -> None:
# to set cookies.
if r.status_code >= 400:
raise JupyterError.from_response(self.user.username, r)
cookies = Cookies()
cookies.extract_cookies(r)
xsrf = cookies.get("_xsrf")
xsrf = self._extract_xsrf(r)
if xsrf:
self._hub_xsrf = xsrf

async def log_into_lab(self) -> None:
"""Log into JupyterLab or raise a JupyterError."""
self.logger.debug("Logging into JupyterLab")
url = self.url_for(f"user/{self.user.username}/lab")
# Setting ``Sec-Fetch-Mode`` is not currently required, but it
# suppresses an annoying error message in the lab logs.
headers = {"Sec-Fetch-Mode": "navigate"}
r = await self.http_client.get(
self.url_for(f"user/{self.user.username}/lab")
url, headers=headers, follow_redirects=False
)
if r.status_code != 200:
raise JupyterError.from_response(self.user.username, r)
cookies = Cookies()
cookies.extract_cookies(r)
xsrf = cookies.get("_xsrf")
if xsrf:
while r.is_redirect:
xsrf = self._extract_xsrf(r)
if xsrf and xsrf != self._hub_xsrf:
self._lab_xsrf = xsrf
next_url = urljoin(url, r.headers["Location"])
r = await self.http_client.get(
next_url, headers=headers, follow_redirects=False
)
r.raise_for_status()
xsrf = self._extract_xsrf(r)
if xsrf and xsrf != self._hub_xsrf:
self._lab_xsrf = xsrf
if not self._lab_xsrf:
raise JupyterError(
reason="No XSRF token found for JupyterLab",
url=url,
username=self.user.username,
status=r.status_code,
method="GET",
body=r.text,
)
self.logger.debug("Logged into JupyterLab with XSRF token")

async def spawn_lab(self) -> JupyterImage:
"""Spawn a JupyterLab pod."""
Expand Down Expand Up @@ -629,6 +663,7 @@ async def open_lab_session(
Send and receive messages from JupyterLab using the ``websocket``
property on `JupyterLabSession`.
"""
self.logger.debug("Opening JupyterLab session")
session_url = self.url_for(f"user/{self.user.username}/api/sessions")
session_type = "notebook" if notebook_name else "console"
body = {
Expand All @@ -640,6 +675,8 @@ async def open_lab_session(
headers = {}
if self._lab_xsrf:
headers["X-XSRFToken"] = self._lab_xsrf
else:
self.logger.warning("No XSRF token found for JupyterLab.")
r = await self.http_client.post(
session_url, json=body, headers=headers
)
Expand Down Expand Up @@ -726,6 +763,11 @@ async def execute_notebook(
headers = {}
if self._lab_xsrf:
headers["X-XSRFToken"] = self._lab_xsrf
elif self._hub_xsrf:
self.logger.warning(
"No XSRF token found for JupyterLab, using hub token."
)
headers["X-XSRFToken"] = self._hub_xsrf
try:
# The timeout is designed to catch issues connecting to JupyterLab
# but to wait as long as possible for the notebook itself
Expand Down
1 change: 0 additions & 1 deletion src/noteburst/jupyterclient/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ async def create(
"username": username,
"name": "Noteburst",
"token_type": "service",
"token_name": f"noteburst {float(time.time())!s}",
"scopes": scopes,
"expires": int(time.time() + lifetime),
}
Expand Down
2 changes: 1 addition & 1 deletion src/noteburst/worker/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ async def shutdown(ctx: dict[Any, Any]) -> None: # noqa: PLR0912
elif config.worker_keepalive == WorkerKeepAliveSetting.normal:
f = cron(
keep_alive,
minute={0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55},
minute={0, 15, 30, 45},
unique=False,
)
cron_jobs.append(f)
Expand Down
2 changes: 0 additions & 2 deletions tests/support/gafaelfawr.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def handler(request: httpx.Request) -> httpx.Response:
assert request_json == {
"username": ANY,
"token_type": "service",
"token_name": ANY,
"scopes": ["exec:notebook"],
"expires": ANY,
"name": "Noteburst",
Expand All @@ -69,7 +68,6 @@ def handler(request: httpx.Request) -> httpx.Response:
assert request_json["uid"] == uid
if gid:
assert request_json["gid"] == gid
assert request_json["token_name"].startswith("noteburst ")
assert request_json["expires"] > time.time()
response = {"token": make_gafaelfawr_token(request_json["username"])}
return httpx.Response(200, json=response, request=request)
Expand Down