Skip to content

Commit

Permalink
Correct tests and clean up the way regex are applied (#329)
Browse files Browse the repository at this point in the history
* Fixing tests

* Revert update of status

* Fix an issue where not having a ControlListUser would lead to being unable to apply stuff

* Normalize control list
  • Loading branch information
PonteIneptique authored Aug 27, 2024
1 parent 666ff6d commit 56bac05
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 96 deletions.
24 changes: 15 additions & 9 deletions app/control_lists/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,8 +379,11 @@ def information_read(control_list_id):
@control_lists_bp.route("/controls/<int:control_list_id>/ignore_terms", methods=["POST", "GET"])
@login_required
def ignore_terms_filter(control_list_id):
current_controlListUser = ControlListsUser.query.filter_by(**{"control_lists_id":control_list_id,"user_id":current_user.id}).first_or_404()
print(current_controlListUser)
current_controlListUser = ControlListsUser.retrieve(
user_id=current_user.id,
control_list_id=control_list_id
).first_or_404()

list_filter = []
if request.method == "POST":
list_filter.append(request.form.get("punct"))
Expand All @@ -389,18 +392,21 @@ def ignore_terms_filter(control_list_id):
list_filter.append(request.form.get('metadata'))
filtered_filter = []
for el in list_filter:
if el != None:
if el is not None:
filtered_filter.append(el)
filter = " ".join(filtered_filter)
current_controlListUser.filter_punct = 'punct' in filter
current_controlListUser.filter_metadata = 'metadata' in filter
current_controlListUser.filter_numeral = 'numeral' in filter
current_controlListUser.filter_ignore = 'ignore' in filter

current_controlListUser.filter_punct = 'punct' in filtered_filter
current_controlListUser.filter_metadata = 'metadata' in filtered_filter
current_controlListUser.filter_numeral = 'numeral' in filtered_filter
current_controlListUser.filter_ignore = 'ignore' in filtered_filter
db.session.add(current_controlListUser)
db.session.commit()

flash('The filters have been updated.', 'success')
current_controlListUser = ControlListsUser.query.filter_by(**{"control_lists_id":control_list_id,"user_id":current_user.id}).first_or_404()
current_controlListUser = ControlListsUser.retrieve(
user_id=current_user.id,
control_list_id=control_list_id
).first()
return render_template_with_nav_info('control_lists/ignore_filter.html', control_list_id=control_list_id,
current_control_list=current_controlListUser)

Expand Down
14 changes: 5 additions & 9 deletions app/main/views/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,7 @@ def error():
list_filter.append(request.form.get("numeral"))
list_filter.append(request.form.get("ignore"))
list_filter.append(request.form.get("metadata"))
filtered_filter = []
for el in list_filter:
if el != None:
filtered_filter.append(el)
filter = " ".join(filtered_filter)
list_filter = [flt for flt in list_filter if flt]

try:
corpus: Corpus = Corpus.create(**form_kwargs)
Expand All @@ -133,10 +129,10 @@ def error():
db.session.commit()
current_controlListUser = ControlListsUser.query.filter_by(
**{"control_lists_id": corpus.control_lists_id, "user_id": current_user.id}).first_or_404()
current_controlListUser.filter_punct = 'punct' in filter
current_controlListUser.filter_metadata = 'metadata' in filter
current_controlListUser.filter_numeral = 'numeral' in filter
current_controlListUser.filter_ignore = 'ignore' in filter
current_controlListUser.filter_punct = 'punct' in list_filter
current_controlListUser.filter_metadata = 'metadata' in list_filter
current_controlListUser.filter_numeral = 'numeral' in list_filter
current_controlListUser.filter_ignore = 'ignore' in list_filter
db.session.commit()
flash("New corpus registered", category="success")
except (sqlalchemy.exc.StatementError, sqlalchemy.exc.IntegrityError) as e:
Expand Down
18 changes: 14 additions & 4 deletions app/models/control_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
from collections import Counter
# PIP Packages
import unidecode
import regex as re
import yaml
from flask_sqlalchemy.query import Query as FlaskQuery
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.orm import backref
from sqlalchemy.orm import backref, Query
from sqlalchemy import literal, case
from werkzeug.exceptions import BadRequest
# APP Logic
Expand Down Expand Up @@ -290,6 +292,17 @@ class ControlListsUser(db.Model):
control = db.relationship("ControlLists", backref=backref("control_lists_user", cascade="all, delete-orphan"))
user = db.relationship(User, backref=backref("control_lists_user", cascade="all, delete-orphan"))

re_filter_metadata = r'(\[[^\]]+:[^\]]*\]$)'
re_filter_ignore = r'(^\[IGNORE\])'
re_filter_punct = "(^[^\w\s]$)"
re_filter_numeral = r'(^\d+$)'

@classmethod
def retrieve(cls, user_id: int, control_list_id: int) -> FlaskQuery:
return cls.query.filter(db.and_(
cls.user_id == user_id,
cls.control_lists_id == control_list_id
))

class AllowedLemma(db.Model):
""" An allowed lemma is a lemma that is accepted
Expand Down Expand Up @@ -352,9 +365,6 @@ def to_input_format(query):
)





class AllowedPOS(db.Model):
""" An allowed POS is a POS that is accepted
Expand Down
98 changes: 45 additions & 53 deletions app/models/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# PIP Packages
import unidecode
import sqlalchemy.exc
import re
import regex as re
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.orm import backref
from sqlalchemy import func, literal, not_, or_, and_
Expand Down Expand Up @@ -337,20 +337,19 @@ def get_unallowed(self, user_id, corpus_id, allowed_type="lemma"):
current_corpus = Corpus.query.filter_by(**{"id":corpus_id}).first_or_404()
current_controlListUser = ControlListsUser.query.filter_by(
**{"control_lists_id":current_corpus.control_lists_id, "user_id": user_id}).first_or_404()
dict_filter = {'punct': current_controlListUser.filter_punct,
'metadata': current_controlListUser.filter_metadata,
'ignore': current_controlListUser.filter_ignore,
'numeral': current_controlListUser.filter_numeral}
if True in dict_filter.values():
regex_liste = []
if dict_filter['metadata']:
regex_liste.append(r'^(?!\[[^\]]+:[^\]]*\]$).*')
if dict_filter['ignore']:
regex_liste.append(r'^(?!^\[IGNORE\]$)')
if dict_filter['punct']:
regex_liste.append(r"((?!^[^\w\s]$).)")
if dict_filter["numeral"]:
regex_liste.append(r'(^(?!\d+$).+)')

regex_liste = []
if current_controlListUser:
if current_controlListUser.filter_metadata:
regex_liste.append(ControlListsUser.re_filter_metadata)
if current_controlListUser.filter_ignore:
regex_liste.append(ControlListsUser.re_filter_ignore)
if current_controlListUser.filter_punct:
regex_liste.append(ControlListsUser.re_filter_punct)
if current_controlListUser.filter_numeral:
regex_liste.append(ControlListsUser.re_filter_numeral)

if regex_liste:
list_darguments.append(WordToken.form.op('~')("".join(regex_liste)))

return db.session.query(WordToken).filter(
Expand Down Expand Up @@ -1097,7 +1096,7 @@ def get_like(filter_id, form, group_by, type_like="lemma", allowed_list=False):
return query

@staticmethod
def is_valid(lemma, POS, morph, corpus, user_id, filter):
def is_valid(lemma, POS, morph, corpus, user_id):
""" Check if a token is valid for a given corpus
:param lemma: Lemma value of the token to validate
Expand All @@ -1122,38 +1121,32 @@ def is_valid(lemma, POS, morph, corpus, user_id, filter):
}

allowed_column = corpus.displayed_columns_by_name
if filter:
current_controlListUser = ControlListsUser.query.filter_by(
**{"control_lists_id": corpus.control_lists_id, "user_id": user_id}).first_or_404()
dict_filter = {'punct': current_controlListUser.filter_punct,
'metadata': current_controlListUser.filter_metadata,
'ignore': current_controlListUser.filter_ignore,
'numeral': current_controlListUser.filter_numeral}
if lemma and "lemma" in allowed_column and allowed_lemma.count():
current_controlListUser = ControlListsUser.retrieve(
user_id=user_id, control_list_id=corpus.control_lists_id
).first()
regex_liste = []
if True in dict_filter.values():
if dict_filter['metadata']:
regex_liste.append(r'(\[[^\]]+:[^\]]*\]$)')
if dict_filter['ignore']:
regex_liste.append(r'(^\[IGNORE\])')
if dict_filter['punct']:
regex_liste.append(r"(^[^\w\s]$)")
if dict_filter['numeral']:
regex_liste.append(r'(^\d+$)')
regex = "|".join(regex_liste)

if lemma is not None \
and "lemma" in allowed_column \
and allowed_lemma.count() > 0 \
and corpus.get_allowed_values("lemma", label=lemma).count() == 0:
if filter:
if not re.match(regex, lemma):
if not corpus.has_custom_dictionary_value("lemma", lemma):
statuses["lemma"] = False
else:
if not corpus.has_custom_dictionary_value("lemma", lemma):
statuses["lemma"] = False
else:
if not corpus.has_custom_dictionary_value("lemma", lemma):
if current_controlListUser:
if current_controlListUser.filter_metadata:
regex_liste.append(ControlListsUser.re_filter_metadata)
if current_controlListUser.filter_ignore:
regex_liste.append(ControlListsUser.re_filter_ignore)
if current_controlListUser.filter_punct:
regex_liste.append(ControlListsUser.re_filter_punct)
if current_controlListUser.filter_numeral:
regex_liste.append(ControlListsUser.re_filter_numeral)

ignored_by_regex = False

for regex in regex_liste:
if re.match(regex, lemma) is not None:
ignored_by_regex = True

if (
not ignored_by_regex and
corpus.has_custom_dictionary_value("lemma", lemma) is False and
corpus.get_allowed_values("lemma", label=lemma).count() == 0
):
statuses["lemma"] = False

if POS is not None \
Expand All @@ -1170,7 +1163,6 @@ def is_valid(lemma, POS, morph, corpus, user_id, filter):
if not corpus.has_custom_dictionary_value("morph", morph):
statuses["morph"] = False


return statuses

@staticmethod
Expand Down Expand Up @@ -1327,7 +1319,7 @@ def to_input_format(query):
return csv_file.getvalue()

@staticmethod
def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None, filter=False):
def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None):
""" Update a given token with lemma, POS and morph value
:param user_id: ID of the user who performs the update
Expand Down Expand Up @@ -1358,9 +1350,10 @@ def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None, filte
error = WordToken.NothingChangedError("No value where changed")
error.msg = "No value where changed"
raise error

print(token)
# Check if values are correct regarding allowed values
validity = WordToken.is_valid(lemma=lemma, POS=POS, morph=morph, corpus=corpus, user_id=user_id, filter=filter)
validity = WordToken.is_valid(lemma=lemma, POS=POS, morph=morph, corpus=corpus, user_id=user_id)
print(token.POS, validity)
if False in list(validity.values()):
error_msg = "Invalid value in {}".format(
", ".join([key for key in validity.keys() if validity[key] is False])
Expand All @@ -1370,7 +1363,6 @@ def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None, filte
error.statuses = validity
error.invalid_columns = [key for key in validity.keys() if validity[key] is False]
raise error

# Updating
if not lemma:
lemma = token.lemma
Expand Down Expand Up @@ -1495,7 +1487,7 @@ def get_nearly_similar_to(token, mode):
WordToken.id != token.id,
*filtering
)
)
)


class TokenHistory(db.Model):
Expand Down
2 changes: 1 addition & 1 deletion app/templates/control_lists/ignore_filter.html
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ <h2>{{ _('Change the filters for Control List') }}</h2>
</label>
</li>
</ul>
<button type="submit">Submit</button>
<button type="submit" id="submit">Submit</button>
</form>
</div>
</div>
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@ nose==1.3.7
selenium>=3.141.0
Flask-Testing==0.8.1
mock==5.1.0
regex
26 changes: 14 additions & 12 deletions tests/test_selenium/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import csv
import signal
import logging
from typing import Union
import tempfile
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
Expand Down Expand Up @@ -362,8 +363,11 @@ def addControlLists(self, cl_name, *args, **kwargs):
self.addControlListsUser(cl.name, user_mail, is_owner=owner)
return cl

def addControlListsUser(self, cl_name, email, is_owner=False):
cl = ControlLists.query.filter(ControlLists.name == cl_name).first()
def addControlListsUser(self, cl_name: Union[str, int], email, is_owner=False):
if isinstance(cl_name, str):
cl = ControlLists.query.filter(ControlLists.name == cl_name).first()
else:
cl = ControlLists.query.filter(ControlLists.id == cl_name).first()
user = User.query.filter(User.email == email).first()
new_clu = ControlListsUser(control_lists_id=cl.id, user_id=user.id, is_owner=is_owner)
self.db.session.add(new_clu)
Expand Down Expand Up @@ -567,7 +571,6 @@ def edith_nth_row_value(
if go_to_edit_token_page is None:
go_to_edit_token_page = self.go_to_edit_token_page(corpus_id)
go_to_edit_token_page()
self.driver.save_screenshot("./token_correct_1.png")
if additional_action_before is not None:
additional_action_before()

Expand All @@ -578,35 +581,35 @@ def edith_nth_row_value(
)

# Take the first row
row = self.driver_find_element_by_id("token_" + id_row + "_row")
row = self.driver_find_element_by_id(f"token_{id_row}_row")
# Take the td to edit
if value_type == "POS":
td = self.element_find_element_by_class_name(row, "token_pos")
elif value_type == "morph":
td = self.element_find_element_by_class_name(row, "token_morph")
else:
td = self.element_find_element_by_class_name(row, "token_lemma")
self.driver.save_screenshot("./token_correct_2.png")
# Click, clear the td and send a new value
td.click()
td.clear()
self.driver.save_screenshot("./token_correct_25.png")
td.send_keys(value)
self.driver.save_screenshot("./token_correct_3.png")
# Save
self.element_find_element_by_css_selector(row, "a.save").click()
self.driver.save_screenshot("./token_correct_3.png")
# It's safer to wait for the AJAX call to be completed
row = self.driver_find_element_by_id("token_" + id_row + "_row")
row = self.driver_find_element_by_id(f"token_{id_row}_row")

rel_tr = f"[rel='token_{id_row}_row'] .badge-status"
WebDriverWait(self.driver, 10).until(
EC.visibility_of_element_located(
(By.CSS_SELECTOR, "[rel='token_{}_row'] .badge-status".format(id_row))
(By.CSS_SELECTOR, rel_tr)
)
)

token = self.db.session.get(WordToken, int(id_row))

self.db.session.refresh(token)
return (
self.db.session.get(WordToken, int(id_row)),
token,
self.element_find_element_by_css_selector(row, "#token_"+id_row+"_row > td a.save").text.strip(),
row
)
Expand Down Expand Up @@ -636,7 +639,6 @@ def test_edit_token(self):
self.addCorpus(with_token=True, tokens_up_to=24)
self.driver.refresh()
token, status_text, row = self.edith_nth_row_value("un", corpus_id=self.CORPUS_ID)
self.driver.save_screenshot('./token_correct_4.png')
self.assertEqual(token.lemma, "un", "Lemma should have been changed")
self.assertEqual(status_text, "Save")
self.assert_saved(row)
Expand Down
Loading

0 comments on commit 56bac05

Please sign in to comment.