From e9913e7e231bde5eb1f4cba7cfb3a5c3cb9ca440 Mon Sep 17 00:00:00 2001 From: Tomas Machalek Date: Fri, 6 Oct 2023 12:16:21 +0200 Subject: [PATCH] Fix fcs multisearch (row limit) plus some refact. --- lib/action/model/fcs.py | 7 ++++++- lib/plugin_types/corparch/corpus.py | 1 + lib/views/fcs.py | 10 +++++++--- templates/fcs/v1_complete.html | 5 +++-- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/lib/action/model/fcs.py b/lib/action/model/fcs.py index 78782ac3f2..65881f56d2 100644 --- a/lib/action/model/fcs.py +++ b/lib/action/model/fcs.py @@ -40,6 +40,10 @@ @dataclass class FCSResourceInfo: + pid: str + """ + pid should be a universaly accepted persistent identifier (Handle.Net, DOI etc.) + """ title: str landing_page_uri: Optional[str] = None language: Optional[str] = None @@ -100,6 +104,7 @@ async def corpora_info(self, value: str, max_items: int) -> List[FCSResourceInfo lang_code = get_lang_code(a2=cinfo.collator_locale.split('_')[0]) resources.append( FCSResourceInfo( + pid=corpus_id, # TODO we need something like an ID from Handle.Net or DOI title=corpus_id, description=cinfo.localized_desc('en'), landing_page_uri=cinfo.web, @@ -215,7 +220,7 @@ async def fcs_search( try: with plugins.runtime.AUTH as auth: anon_id = auth.anonymous_user(self.plugin_ctx)['id'] - q = ['q' + rq] + q = ('q' + rq,) # try to locate concordance in cache lock = asyncio.Lock() diff --git a/lib/plugin_types/corparch/corpus.py b/lib/plugin_types/corparch/corpus.py index e2e27d99c9..08cc2b7d41 100644 --- a/lib/plugin_types/corparch/corpus.py +++ b/lib/plugin_types/corparch/corpus.py @@ -197,6 +197,7 @@ class CorpusInfo: """ id: Optional[str] = None + pid: Optional[str] = None # TODO see https://github.com/czcorpus/kontext/issues/6039 name: Optional[str] = None description: Optional[str] = None # filled in during localization _description_cs: Optional[str] = None diff --git a/lib/views/fcs.py b/lib/views/fcs.py index f65fb0728f..1e64c68087 100644 --- a/lib/views/fcs.py +++ b/lib/views/fcs.py @@ -17,8 +17,8 @@ from collections import defaultdict from dataclasses import asdict, dataclass, field from typing import Any, Dict, List, Optional, Tuple +import random -import aiofiles import plugins import settings from action.argmapping.conc import QueryFormArgs @@ -78,6 +78,7 @@ async def op_explain(amodel: FCSActionModel, req: KRequest, resp_common: FCSResp lang_code = get_lang_code(a2=cinfo.collator_locale.split('_')[0]) resp['resources'].append( FCSResourceInfo( + pid=corp, # TODO we need something like an ID from Handle.Net or DOI title=corp, description=cinfo.localized_desc('en'), landing_page_uri=cinfo.web, @@ -159,9 +160,12 @@ async def op_search_retrieve(amodel: FCSActionModel, req: KRequest, resp_common: ] results: List[List[FCSSearchResult], str] = await asyncio.gather(*tasks) # merging results + merged_rows = [row for result, _ in results for row in result.rows] + if len(merged_rows) > resp_common.maximumRecords: + merged_rows = random.sample(merged_rows, k=resp_common.maximumRecords) merged_results = FCSSearchResult( - rows=[row for result, _ in results for row in result.rows], - size=sum(r.size for r, _ in results), + rows=merged_rows, + size=len(merged_rows), ) cql_query = results[0][1] diff --git a/templates/fcs/v1_complete.html b/templates/fcs/v1_complete.html index 8479b68a69..57c8af7f5f 100644 --- a/templates/fcs/v1_complete.html +++ b/templates/fcs/v1_complete.html @@ -64,7 +64,7 @@ {% for resource in resources %} - + {{ resource.title }} {% if resource.description %} {{ resource.description }} @@ -134,7 +134,8 @@ http://clarin.eu/fcs/resource {{ recordPacking }} - + {{ left|xmle }}{{ word|xmle }} {{ right|xmle }}