Skip to content

Commit

Permalink
Merge pull request #6319 from mzimandl/fixes
Browse files Browse the repository at this point in the history
clean up regex search
  • Loading branch information
tomachalek authored Oct 7, 2024
2 parents d759b56 + f40f8f0 commit b668bd4
Showing 1 changed file with 17 additions and 25 deletions.
42 changes: 17 additions & 25 deletions lib/plugins/ucnk_query_history/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import logging
from datetime import datetime, timezone
import re
from urllib.parse import quote
from urllib.parse import urljoin
import ujson as json
Expand All @@ -44,23 +43,16 @@ def escape_bleve_chars(s: str) -> str:
(note: there is empty space)
"""

for ch in '\\+-=&|><!(){}[]^"~*?:/':
for ch in '\\+-=&|><!(){}[]^"~*?:/ ':
s = s.replace(ch, f'\\{ch}')
return s


def make_bleve_field(field: str, values_string: str) -> str:
def make_bleve_field(field: str, match: str) -> str:
"""
Builds bleve field query using regex
spaces serve as OR in regex
Creates required bleve match query
"""
values = []
for v in values_string.split(" "):
if v:
values.append(re.escape(v.lower()))
if len(values) > 0:
return f'{field}:/.*({"|".join(values)}).*/'
return ''
return f'+{field}:{escape_bleve_chars(match)}'


class UcnkQueryHistory(MySqlQueryHistory):
Expand Down Expand Up @@ -124,41 +116,41 @@ def generate_query_string(
) -> str:
parts = [f'+user_id:{user_id}']
if q_supertype:
parts.append(f'+query_supertype:{q_supertype}')
parts.append(make_bleve_field('query_supertype', q_supertype))

if corpname:
parts.append(f'+corpora:{escape_bleve_chars(corpname)}')
parts.append(make_bleve_field('corpora', corpname))

if full_search_args.subcorpus:
parts.append(make_bleve_field('+subcorpus', full_search_args.subcorpus))
parts.append(make_bleve_field('subcorpus', full_search_args.subcorpus))

if full_search_args.any_property_value:
parts.append(make_bleve_field('+_all', full_search_args.any_property_value))
parts.append(make_bleve_field('_all', full_search_args.any_property_value))

else:
if q_supertype in ('conc', 'pquery'):
if full_search_args.posattr_name:
parts.append(make_bleve_field('+pos_attr_names', full_search_args.posattr_name))
parts.append(make_bleve_field('pos_attr_names', full_search_args.posattr_name))
if full_search_args.posattr_value:
parts.append(make_bleve_field('+pos_attr_values', full_search_args.posattr_value))
parts.append(make_bleve_field('pos_attr_values', full_search_args.posattr_value))
if full_search_args.structattr_name:
parts.append(make_bleve_field('+struct_attr_names', full_search_args.structattr_name))
parts.append(make_bleve_field('struct_attr_names', full_search_args.structattr_name))
if full_search_args.structattr_value:
parts.append(make_bleve_field('+struct_attr_values', full_search_args.structattr_value))
parts.append(make_bleve_field('struct_attr_values', full_search_args.structattr_value))

elif q_supertype == 'wlist':
if full_search_args.wl_pat:
parts.append(make_bleve_field('+raw_query', full_search_args.wl_pat))
parts.append(make_bleve_field('raw_query', full_search_args.wl_pat))
if full_search_args.wl_attr:
parts.append(make_bleve_field('+pos_attr_names', full_search_args.wl_attr))
parts.append(make_bleve_field('pos_attr_names', full_search_args.wl_attr))
if full_search_args.wl_pfilter:
parts.append(make_bleve_field('+pfilter_words', full_search_args.wl_pfilter))
parts.append(make_bleve_field('pfilter_words', full_search_args.wl_pfilter))
if full_search_args.wl_nfilter:
parts.append(make_bleve_field('+nfilter_words', full_search_args.wl_nfilter))
parts.append(make_bleve_field('nfilter_words', full_search_args.wl_nfilter))

elif q_supertype == 'kwords':
if full_search_args.wl_attr:
parts.append(make_bleve_field('+pos_attr_names', full_search_args.posattr_name))
parts.append(make_bleve_field('pos_attr_names', full_search_args.posattr_name))

return quote(' '.join(parts))

Expand Down

0 comments on commit b668bd4

Please sign in to comment.