Skip to content

Commit

Permalink
Merge pull request #6040 from tomachalek/fcs_refacts_and_fixes
Browse files Browse the repository at this point in the history
Fcs refacts and fixes
  • Loading branch information
tomachalek authored Oct 6, 2023
2 parents 12d197d + e9913e7 commit cc98a42
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 8 deletions.
15 changes: 13 additions & 2 deletions lib/action/model/fcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@

@dataclass
class FCSResourceInfo:
pid: str
"""
pid should be a universaly accepted persistent identifier (Handle.Net, DOI etc.)
"""
title: str
landing_page_uri: Optional[str] = None
language: Optional[str] = None
Expand Down Expand Up @@ -100,6 +104,7 @@ async def corpora_info(self, value: str, max_items: int) -> List[FCSResourceInfo
lang_code = get_lang_code(a2=cinfo.collator_locale.split('_')[0])
resources.append(
FCSResourceInfo(
pid=corpus_id, # TODO we need something like an ID from Handle.Net or DOI
title=corpus_id,
description=cinfo.localized_desc('en'),
landing_page_uri=cinfo.web,
Expand Down Expand Up @@ -148,7 +153,13 @@ async def fcs_scan(self, corpname: str, scan_query: str, max_ter: int, start: in
wl = await wordlist(corp, args, max_ter)
return [(d['str'], d['freq']) for d in wl][start:][:max_ter]

async def fcs_search(self, corp: AbstractKCorpus, corpname: str, fcs_query: str, max_rec: int, start: int) -> Tuple[FCSSearchResult, str]:
async def fcs_search(
self,
corp: AbstractKCorpus,
fcs_query: str,
max_rec: int,
start: int
) -> Tuple[FCSSearchResult, str]:
"""
aux function for federated content search: operation=searchRetrieve
"""
Expand Down Expand Up @@ -209,7 +220,7 @@ async def fcs_search(self, corp: AbstractKCorpus, corpname: str, fcs_query: str,
try:
with plugins.runtime.AUTH as auth:
anon_id = auth.anonymous_user(self.plugin_ctx)['id']
q = ['q' + rq]
q = ('q' + rq,)

# try to locate concordance in cache
lock = asyncio.Lock()
Expand Down
1 change: 1 addition & 0 deletions lib/plugin_types/corparch/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ class CorpusInfo:
"""

id: Optional[str] = None
pid: Optional[str] = None # TODO see https://github.com/czcorpus/kontext/issues/6039
name: Optional[str] = None
description: Optional[str] = None # filled in during localization
_description_cs: Optional[str] = None
Expand Down
11 changes: 7 additions & 4 deletions lib/views/fcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
from collections import defaultdict
from dataclasses import asdict, dataclass, field
from typing import Any, Dict, List, Optional, Tuple
import random

import aiofiles
import plugins
import settings
from action.argmapping.conc import QueryFormArgs
Expand Down Expand Up @@ -78,6 +78,7 @@ async def op_explain(amodel: FCSActionModel, req: KRequest, resp_common: FCSResp
lang_code = get_lang_code(a2=cinfo.collator_locale.split('_')[0])
resp['resources'].append(
FCSResourceInfo(
pid=corp, # TODO we need something like an ID from Handle.Net or DOI
title=corp,
description=cinfo.localized_desc('en'),
landing_page_uri=cinfo.web,
Expand Down Expand Up @@ -151,7 +152,6 @@ async def op_search_retrieve(amodel: FCSActionModel, req: KRequest, resp_common:
tasks = [
amodel.fcs_search(
await amodel.cf.get_corpus(corp),
amodel.args.corpname,
query,
resp_common.maximumRecords,
resp_common.startRecord,
Expand All @@ -160,9 +160,12 @@ async def op_search_retrieve(amodel: FCSActionModel, req: KRequest, resp_common:
]
results: List[List[FCSSearchResult], str] = await asyncio.gather(*tasks)
# merging results
merged_rows = [row for result, _ in results for row in result.rows]
if len(merged_rows) > resp_common.maximumRecords:
merged_rows = random.sample(merged_rows, k=resp_common.maximumRecords)
merged_results = FCSSearchResult(
rows=[row for result, _ in results for row in result.rows],
size=sum(r.size for r, _ in results),
rows=merged_rows,
size=len(merged_rows),
)
cql_query = results[0][1]

Expand Down
5 changes: 3 additions & 2 deletions templates/fcs/v1_complete.html
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
</ed:SupportedDataViews>
<ed:Resources>
{% for resource in resources %}
<ed:Resource>
<ed:Resource pid="{{ resource.pid }}">
<ed:Title xml:lang="en">{{ resource.title }}</ed:Title>
{% if resource.description %}
<ed:Description xml:lang="en">{{ resource.description }}</ed:Description>
Expand Down Expand Up @@ -134,7 +134,8 @@
<sru:recordSchema>http://clarin.eu/fcs/resource</sru:recordSchema>
<sru:recordPacking>{{ recordPacking }}</sru:recordPacking>
<sru:recordData>
<fcs:Resource xmlns:fcs="http://clarin.eu/fcs/resource" pid="{{ corppid }}" ref="{{ conc_view_url_tpl }}{{ corppid }}">
<fcs:Resource xmlns:fcs="http://clarin.eu/fcs/resource"
{% if corppid %} pid="{{ corppid }}" ref="{{ conc_view_url_tpl }}{{ corppid }}" {% endif %}>
<fcs:DataView type="application/x-clarin-fcs-hits+xml">
<hits:Result xmlns:hits="http://clarin.eu/fcs/dataview/hits">
{{ left|xmle }}<hits:Hit>{{ word|xmle }}</hits:Hit> {{ right|xmle }}
Expand Down

0 comments on commit cc98a42

Please sign in to comment.