Skip to content

Commit

Permalink
Add support for product_slug_pattern in ingest/ltd
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathansick committed Jul 20, 2023
1 parent 39bec72 commit f0a36bb
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 4 deletions.
7 changes: 7 additions & 0 deletions src/ook/handlers/external/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,11 @@ async def post_ingest_ltd(
edition_slug=ingest_request.edition_slug,
)
)
if ingest_request.product_slug_pattern is not None:
task_group.create_task(
classifier.queue_ingest_for_ltd_product_slug_pattern(
product_slug_pattern=ingest_request.product_slug_pattern,
edition_slug=ingest_request.edition_slug,
)
)
return Response(status_code=202)
19 changes: 19 additions & 0 deletions src/ook/services/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import asyncio
import re
from datetime import UTC, datetime

Expand Down Expand Up @@ -103,6 +104,24 @@ async def queue_ingest_for_ltd_product_slug(
value=kafka_value,
)

async def queue_ingest_for_ltd_product_slug_pattern(
self, *, product_slug_pattern: str, edition_slug: str
) -> None:
"""Queue an ingest for a LSST the Docs project slug pattern."""
pattern = re.compile(product_slug_pattern)
project_urls = await self._ltd_service.get_project_urls()
async with asyncio.TaskGroup() as task_group:
for project_url in project_urls:
# The slug is the last component of the API URL
project_slug = project_url.split("/")[-1]
if pattern.match(project_slug) is not None:
task_group.create_task(
self.queue_ingest_for_ltd_product_slug(
product_slug=project_slug,
edition_slug=edition_slug,
)
)

async def classify_ltd_site(
self, *, product_slug: str, published_url: str
) -> DocumentSourceType:
Expand Down
14 changes: 10 additions & 4 deletions src/ook/services/ltdmetadataservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,20 @@ class LtdMetadataService:
def __init__(
self, *, logger: BoundLogger, http_client: AsyncClient
) -> None:
self._base = "https://keeper.lsst.codes"
self._logger = logger
self._http_client = http_client

def get_product_api_url(self, product_slug: str) -> str:
"""Get the LTD API URL for a given product slug."""
return f"https://keeper.lsst.codes/products/{product_slug}"
return f"{self._base}/products/{product_slug}"

async def get_project_urls(self) -> dict:
"""Get all LTD Project URLs."""
url = f"{self._base}/products/"
response = await self._http_client.get(url)
response.raise_for_status()
return response.json()["products"]

async def get_project(self, product_slug: str) -> dict:
"""Get the LTD project metadata for a given product slug."""
Expand All @@ -32,9 +40,7 @@ async def get_edition(
self, product_slug: str, edition_slug: str = "main"
) -> dict:
"""Get the LTD edition metadata for a given product and edition."""
editions_url = (
f"https://keeper.lsst.codes/products/{product_slug}/editions/"
)
editions_url = f"{self._base}/products/{product_slug}/editions/"
response = await self._http_client.get(editions_url)
response.raise_for_status()
editions = response.json()
Expand Down

0 comments on commit f0a36bb

Please sign in to comment.