Skip to content

Commit

Permalink
Add ability for audit to queue ingest for missing
Browse files Browse the repository at this point in the history
AlgoliaAuditService can now optionally trigger the ingest of missing
documents via the ClassificationService. From the CLI this is triggered
with the --reingest flag.
  • Loading branch information
jonathansick committed Aug 30, 2023
1 parent c0b31c3 commit d926bd4
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 4 deletions.
7 changes: 5 additions & 2 deletions src/ook/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,9 @@ async def upload_doc_stub(dataset: Path) -> None:


@main.command()
@click.option("--reingest", is_flag=True, help="Reingest missing documents.")
@run_with_asyncio
async def audit() -> None:
async def audit(*, reingest: bool = False) -> None:
"""Audit the Algolia document index and check if any documents are missing
based on the listing of projects registered in the LTD Keeper service.
"""
Expand All @@ -109,4 +110,6 @@ async def audit() -> None:
raise click.UsageError("Algolia credentials not set in environment.")
async with Factory.create_standalone(logger=logger) as factory:
algolia_audit_service = factory.create_algolia_audit_service()
await algolia_audit_service.audit_missing_documents()
await algolia_audit_service.audit_missing_documents(
ingest_missing=reingest
)
1 change: 1 addition & 0 deletions src/ook/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,4 +235,5 @@ def create_algolia_audit_service(self) -> AlgoliaAuditService:
http_client=self.http_client,
algolia_search_client=self._process_context.algolia_client,
logger=self._logger,
classification_service=self.create_classification_service(),
)
15 changes: 13 additions & 2 deletions src/ook/services/algoliaaudit.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from structlog.stdlib import BoundLogger

from ..config import config
from .classification import ClassificationService

# Python regular expression pattern that matches an LTD document slug such as
# "sqr-000".
Expand Down Expand Up @@ -57,13 +58,17 @@ def __init__(
http_client: AsyncClient,
logger: BoundLogger,
algolia_search_client: SearchClient,
classification_service: ClassificationService,
) -> None:
"""Initialize the service."""
self._http_client = http_client
self._search_client = algolia_search_client
self._classifier = classification_service
self._logger = logger

async def audit_missing_documents(self) -> list[LtdDocument]:
async def audit_missing_documents(
self, *, ingest_missing: bool = False
) -> list[LtdDocument]:
"""Audit the Algolia indices for completeness of missing documents.
A document is considered "missing" if it is registered in the LTD API,
Expand Down Expand Up @@ -97,14 +102,20 @@ async def audit_missing_documents(self) -> list[LtdDocument]:
published_url=expected_ltd_doc.published_url,
)
missing_docs.append(expected_ltd_doc)
missing_docs.sort()

self._logger.info(
"Audit complete.",
found=len(expected_ltd_docs) - len(missing_docs),
missing=len(missing_docs),
)

missing_docs.sort()
if ingest_missing and len(missing_docs) > 0:
for doc in missing_docs:
await self._classifier.queue_ingest_for_ltd_product_slug(
product_slug=doc.slug, edition_slug="main"
)

return missing_docs

async def _get_ltd_documents(self) -> list[LtdDocument]:
Expand Down

0 comments on commit d926bd4

Please sign in to comment.