diff --git a/app/control_lists/views.py b/app/control_lists/views.py index 3e5f9da..571412d 100644 --- a/app/control_lists/views.py +++ b/app/control_lists/views.py @@ -379,8 +379,11 @@ def information_read(control_list_id): @control_lists_bp.route("/controls//ignore_terms", methods=["POST", "GET"]) @login_required def ignore_terms_filter(control_list_id): - current_controlListUser = ControlListsUser.query.filter_by(**{"control_lists_id":control_list_id,"user_id":current_user.id}).first_or_404() - print(current_controlListUser) + current_controlListUser = ControlListsUser.retrieve( + user_id=current_user.id, + control_list_id=control_list_id + ).first_or_404() + list_filter = [] if request.method == "POST": list_filter.append(request.form.get("punct")) @@ -389,18 +392,21 @@ def ignore_terms_filter(control_list_id): list_filter.append(request.form.get('metadata')) filtered_filter = [] for el in list_filter: - if el != None: + if el is not None: filtered_filter.append(el) - filter = " ".join(filtered_filter) - current_controlListUser.filter_punct = 'punct' in filter - current_controlListUser.filter_metadata = 'metadata' in filter - current_controlListUser.filter_numeral = 'numeral' in filter - current_controlListUser.filter_ignore = 'ignore' in filter + current_controlListUser.filter_punct = 'punct' in filtered_filter + current_controlListUser.filter_metadata = 'metadata' in filtered_filter + current_controlListUser.filter_numeral = 'numeral' in filtered_filter + current_controlListUser.filter_ignore = 'ignore' in filtered_filter + db.session.add(current_controlListUser) db.session.commit() flash('The filters have been updated.', 'success') - current_controlListUser = ControlListsUser.query.filter_by(**{"control_lists_id":control_list_id,"user_id":current_user.id}).first_or_404() + current_controlListUser = ControlListsUser.retrieve( + user_id=current_user.id, + control_list_id=control_list_id + ).first() return render_template_with_nav_info('control_lists/ignore_filter.html', control_list_id=control_list_id, current_control_list=current_controlListUser) diff --git a/app/main/views/corpus.py b/app/main/views/corpus.py index 38db594..d64cc3e 100644 --- a/app/main/views/corpus.py +++ b/app/main/views/corpus.py @@ -119,11 +119,7 @@ def error(): list_filter.append(request.form.get("numeral")) list_filter.append(request.form.get("ignore")) list_filter.append(request.form.get("metadata")) - filtered_filter = [] - for el in list_filter: - if el != None: - filtered_filter.append(el) - filter = " ".join(filtered_filter) + list_filter = [flt for flt in list_filter if flt] try: corpus: Corpus = Corpus.create(**form_kwargs) @@ -133,10 +129,10 @@ def error(): db.session.commit() current_controlListUser = ControlListsUser.query.filter_by( **{"control_lists_id": corpus.control_lists_id, "user_id": current_user.id}).first_or_404() - current_controlListUser.filter_punct = 'punct' in filter - current_controlListUser.filter_metadata = 'metadata' in filter - current_controlListUser.filter_numeral = 'numeral' in filter - current_controlListUser.filter_ignore = 'ignore' in filter + current_controlListUser.filter_punct = 'punct' in list_filter + current_controlListUser.filter_metadata = 'metadata' in list_filter + current_controlListUser.filter_numeral = 'numeral' in list_filter + current_controlListUser.filter_ignore = 'ignore' in list_filter db.session.commit() flash("New corpus registered", category="success") except (sqlalchemy.exc.StatementError, sqlalchemy.exc.IntegrityError) as e: diff --git a/app/models/control_lists.py b/app/models/control_lists.py index 49ee321..fbaabb3 100644 --- a/app/models/control_lists.py +++ b/app/models/control_lists.py @@ -8,9 +8,11 @@ from collections import Counter # PIP Packages import unidecode +import regex as re import yaml +from flask_sqlalchemy.query import Query as FlaskQuery from sqlalchemy.ext.associationproxy import association_proxy -from sqlalchemy.orm import backref +from sqlalchemy.orm import backref, Query from sqlalchemy import literal, case from werkzeug.exceptions import BadRequest # APP Logic @@ -290,6 +292,17 @@ class ControlListsUser(db.Model): control = db.relationship("ControlLists", backref=backref("control_lists_user", cascade="all, delete-orphan")) user = db.relationship(User, backref=backref("control_lists_user", cascade="all, delete-orphan")) + re_filter_metadata = r'(\[[^\]]+:[^\]]*\]$)' + re_filter_ignore = r'(^\[IGNORE\])' + re_filter_punct = "(^[^\w\s]$)" + re_filter_numeral = r'(^\d+$)' + + @classmethod + def retrieve(cls, user_id: int, control_list_id: int) -> FlaskQuery: + return cls.query.filter(db.and_( + cls.user_id == user_id, + cls.control_lists_id == control_list_id + )) class AllowedLemma(db.Model): """ An allowed lemma is a lemma that is accepted @@ -352,9 +365,6 @@ def to_input_format(query): ) - - - class AllowedPOS(db.Model): """ An allowed POS is a POS that is accepted diff --git a/app/models/corpus.py b/app/models/corpus.py index 3317a3a..f2f47df 100644 --- a/app/models/corpus.py +++ b/app/models/corpus.py @@ -7,7 +7,7 @@ # PIP Packages import unidecode import sqlalchemy.exc -import re +import regex as re from sqlalchemy.ext.associationproxy import association_proxy from sqlalchemy.orm import backref from sqlalchemy import func, literal, not_, or_, and_ @@ -337,20 +337,19 @@ def get_unallowed(self, user_id, corpus_id, allowed_type="lemma"): current_corpus = Corpus.query.filter_by(**{"id":corpus_id}).first_or_404() current_controlListUser = ControlListsUser.query.filter_by( **{"control_lists_id":current_corpus.control_lists_id, "user_id": user_id}).first_or_404() - dict_filter = {'punct': current_controlListUser.filter_punct, - 'metadata': current_controlListUser.filter_metadata, - 'ignore': current_controlListUser.filter_ignore, - 'numeral': current_controlListUser.filter_numeral} - if True in dict_filter.values(): - regex_liste = [] - if dict_filter['metadata']: - regex_liste.append(r'^(?!\[[^\]]+:[^\]]*\]$).*') - if dict_filter['ignore']: - regex_liste.append(r'^(?!^\[IGNORE\]$)') - if dict_filter['punct']: - regex_liste.append(r"((?!^[^\w\s]$).)") - if dict_filter["numeral"]: - regex_liste.append(r'(^(?!\d+$).+)') + + regex_liste = [] + if current_controlListUser: + if current_controlListUser.filter_metadata: + regex_liste.append(ControlListsUser.re_filter_metadata) + if current_controlListUser.filter_ignore: + regex_liste.append(ControlListsUser.re_filter_ignore) + if current_controlListUser.filter_punct: + regex_liste.append(ControlListsUser.re_filter_punct) + if current_controlListUser.filter_numeral: + regex_liste.append(ControlListsUser.re_filter_numeral) + + if regex_liste: list_darguments.append(WordToken.form.op('~')("".join(regex_liste))) return db.session.query(WordToken).filter( @@ -1097,7 +1096,7 @@ def get_like(filter_id, form, group_by, type_like="lemma", allowed_list=False): return query @staticmethod - def is_valid(lemma, POS, morph, corpus, user_id, filter): + def is_valid(lemma, POS, morph, corpus, user_id): """ Check if a token is valid for a given corpus :param lemma: Lemma value of the token to validate @@ -1122,38 +1121,32 @@ def is_valid(lemma, POS, morph, corpus, user_id, filter): } allowed_column = corpus.displayed_columns_by_name - if filter: - current_controlListUser = ControlListsUser.query.filter_by( - **{"control_lists_id": corpus.control_lists_id, "user_id": user_id}).first_or_404() - dict_filter = {'punct': current_controlListUser.filter_punct, - 'metadata': current_controlListUser.filter_metadata, - 'ignore': current_controlListUser.filter_ignore, - 'numeral': current_controlListUser.filter_numeral} + if lemma and "lemma" in allowed_column and allowed_lemma.count(): + current_controlListUser = ControlListsUser.retrieve( + user_id=user_id, control_list_id=corpus.control_lists_id + ).first() regex_liste = [] - if True in dict_filter.values(): - if dict_filter['metadata']: - regex_liste.append(r'(\[[^\]]+:[^\]]*\]$)') - if dict_filter['ignore']: - regex_liste.append(r'(^\[IGNORE\])') - if dict_filter['punct']: - regex_liste.append(r"(^[^\w\s]$)") - if dict_filter['numeral']: - regex_liste.append(r'(^\d+$)') - regex = "|".join(regex_liste) - - if lemma is not None \ - and "lemma" in allowed_column \ - and allowed_lemma.count() > 0 \ - and corpus.get_allowed_values("lemma", label=lemma).count() == 0: - if filter: - if not re.match(regex, lemma): - if not corpus.has_custom_dictionary_value("lemma", lemma): - statuses["lemma"] = False - else: - if not corpus.has_custom_dictionary_value("lemma", lemma): - statuses["lemma"] = False - else: - if not corpus.has_custom_dictionary_value("lemma", lemma): + if current_controlListUser: + if current_controlListUser.filter_metadata: + regex_liste.append(ControlListsUser.re_filter_metadata) + if current_controlListUser.filter_ignore: + regex_liste.append(ControlListsUser.re_filter_ignore) + if current_controlListUser.filter_punct: + regex_liste.append(ControlListsUser.re_filter_punct) + if current_controlListUser.filter_numeral: + regex_liste.append(ControlListsUser.re_filter_numeral) + + ignored_by_regex = False + + for regex in regex_liste: + if re.match(regex, lemma) is not None: + ignored_by_regex = True + + if ( + not ignored_by_regex and + corpus.has_custom_dictionary_value("lemma", lemma) is False and + corpus.get_allowed_values("lemma", label=lemma).count() == 0 + ): statuses["lemma"] = False if POS is not None \ @@ -1170,7 +1163,6 @@ def is_valid(lemma, POS, morph, corpus, user_id, filter): if not corpus.has_custom_dictionary_value("morph", morph): statuses["morph"] = False - return statuses @staticmethod @@ -1327,7 +1319,7 @@ def to_input_format(query): return csv_file.getvalue() @staticmethod - def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None, filter=False): + def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None): """ Update a given token with lemma, POS and morph value :param user_id: ID of the user who performs the update @@ -1358,9 +1350,10 @@ def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None, filte error = WordToken.NothingChangedError("No value where changed") error.msg = "No value where changed" raise error - + print(token) # Check if values are correct regarding allowed values - validity = WordToken.is_valid(lemma=lemma, POS=POS, morph=morph, corpus=corpus, user_id=user_id, filter=filter) + validity = WordToken.is_valid(lemma=lemma, POS=POS, morph=morph, corpus=corpus, user_id=user_id) + print(token.POS, validity) if False in list(validity.values()): error_msg = "Invalid value in {}".format( ", ".join([key for key in validity.keys() if validity[key] is False]) @@ -1370,7 +1363,6 @@ def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None, filte error.statuses = validity error.invalid_columns = [key for key in validity.keys() if validity[key] is False] raise error - # Updating if not lemma: lemma = token.lemma @@ -1495,7 +1487,7 @@ def get_nearly_similar_to(token, mode): WordToken.id != token.id, *filtering ) - ) + ) class TokenHistory(db.Model): diff --git a/app/templates/control_lists/ignore_filter.html b/app/templates/control_lists/ignore_filter.html index 83d789a..cf21efa 100644 --- a/app/templates/control_lists/ignore_filter.html +++ b/app/templates/control_lists/ignore_filter.html @@ -34,7 +34,7 @@

{{ _('Change the filters for Control List') }}

- + diff --git a/requirements.txt b/requirements.txt index 3bc0c4a..c244130 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,3 +36,4 @@ nose==1.3.7 selenium>=3.141.0 Flask-Testing==0.8.1 mock==5.1.0 +regex \ No newline at end of file diff --git a/tests/test_selenium/base.py b/tests/test_selenium/base.py index c35589c..bc83252 100644 --- a/tests/test_selenium/base.py +++ b/tests/test_selenium/base.py @@ -5,6 +5,7 @@ import csv import signal import logging +from typing import Union import tempfile from selenium import webdriver from selenium.common.exceptions import NoSuchElementException @@ -362,8 +363,11 @@ def addControlLists(self, cl_name, *args, **kwargs): self.addControlListsUser(cl.name, user_mail, is_owner=owner) return cl - def addControlListsUser(self, cl_name, email, is_owner=False): - cl = ControlLists.query.filter(ControlLists.name == cl_name).first() + def addControlListsUser(self, cl_name: Union[str, int], email, is_owner=False): + if isinstance(cl_name, str): + cl = ControlLists.query.filter(ControlLists.name == cl_name).first() + else: + cl = ControlLists.query.filter(ControlLists.id == cl_name).first() user = User.query.filter(User.email == email).first() new_clu = ControlListsUser(control_lists_id=cl.id, user_id=user.id, is_owner=is_owner) self.db.session.add(new_clu) @@ -567,7 +571,6 @@ def edith_nth_row_value( if go_to_edit_token_page is None: go_to_edit_token_page = self.go_to_edit_token_page(corpus_id) go_to_edit_token_page() - self.driver.save_screenshot("./token_correct_1.png") if additional_action_before is not None: additional_action_before() @@ -578,7 +581,7 @@ def edith_nth_row_value( ) # Take the first row - row = self.driver_find_element_by_id("token_" + id_row + "_row") + row = self.driver_find_element_by_id(f"token_{id_row}_row") # Take the td to edit if value_type == "POS": td = self.element_find_element_by_class_name(row, "token_pos") @@ -586,27 +589,27 @@ def edith_nth_row_value( td = self.element_find_element_by_class_name(row, "token_morph") else: td = self.element_find_element_by_class_name(row, "token_lemma") - self.driver.save_screenshot("./token_correct_2.png") # Click, clear the td and send a new value td.click() td.clear() - self.driver.save_screenshot("./token_correct_25.png") td.send_keys(value) - self.driver.save_screenshot("./token_correct_3.png") # Save self.element_find_element_by_css_selector(row, "a.save").click() - self.driver.save_screenshot("./token_correct_3.png") # It's safer to wait for the AJAX call to be completed - row = self.driver_find_element_by_id("token_" + id_row + "_row") + row = self.driver_find_element_by_id(f"token_{id_row}_row") + rel_tr = f"[rel='token_{id_row}_row'] .badge-status" WebDriverWait(self.driver, 10).until( EC.visibility_of_element_located( - (By.CSS_SELECTOR, "[rel='token_{}_row'] .badge-status".format(id_row)) + (By.CSS_SELECTOR, rel_tr) ) ) + token = self.db.session.get(WordToken, int(id_row)) + + self.db.session.refresh(token) return ( - self.db.session.get(WordToken, int(id_row)), + token, self.element_find_element_by_css_selector(row, "#token_"+id_row+"_row > td a.save").text.strip(), row ) @@ -636,7 +639,6 @@ def test_edit_token(self): self.addCorpus(with_token=True, tokens_up_to=24) self.driver.refresh() token, status_text, row = self.edith_nth_row_value("un", corpus_id=self.CORPUS_ID) - self.driver.save_screenshot('./token_correct_4.png') self.assertEqual(token.lemma, "un", "Lemma should have been changed") self.assertEqual(status_text, "Save") self.assert_saved(row) diff --git a/tests/test_selenium/test_token_correct.py b/tests/test_selenium/test_token_correct.py index 8a8245b..0a4bed6 100644 --- a/tests/test_selenium/test_token_correct.py +++ b/tests/test_selenium/test_token_correct.py @@ -1,3 +1,4 @@ +from app.models import ControlListsUser from tests.test_selenium.base import TokenCorrectBase, TokenCorrect2CorporaBase import selenium from sqlalchemy import text @@ -100,24 +101,27 @@ def test_edit_morph_with_allowed(self): self.assert_token_has_values(token, lemma="mout", POS="ADVgen", morph="NOMB.=s|GENRE=m|CAS=n") def test_edit_token_with_filter(self): - self.add_user("foo", "foo") - self.login("%s.%s@ppa.fr" % ("foo", "foo"), self.app.config['ADMIN_PASSWORD']) - self.addCorpus(with_token=True) + corpus = self.addCorpus(with_token=True, cl=True, with_allowed_lemma=True) + clu = self.addControlListsUser(corpus.control_lists_id, self.app.config["ADMIN_EMAIL"], is_owner=True) self.driver.refresh() + + # ajouter corpus + token, status_text, row = self.edith_nth_row_value("#", id_row="1") + self.assertNotEqual(token.lemma, "#", "Lemma # is forbidden in control list") + # modifier les filtres self.driver_find_element_by_link_text("Dashboard").click() controllists_dashboard = self.driver_find_element_by_id("control_lists-dashboard") self.element_find_element_by_partial_link_text(controllists_dashboard, "Wauchier").click() - self.driver_find_element_by_link_text("Ignore values").click() + self.driver_find_element_by_partial_link_text("Ignore values").click() self.driver_find_element_by_name("punct").click() self.driver_find_element_by_id("submit").click() - self.driver.save_screenshot("./test_edit_token_filter6.png") - self.driver.implicitly_wait(15) + self.driver.implicitly_wait(5) # ajouter corpus - token, status_text, row = self.edith_nth_row_value(",", id_row="1") - self.assert_token_has_values(token, lemma=",") + token, status_text, row = self.edith_nth_row_value("]", value_type="lemma", id_row="1") self.assert_saved(row) + self.assert_token_has_values(token, lemma="]") class TestTokensCorrectFloovant(TokenCorrectBase):