Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Control list filters #323

Open
wants to merge 43 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
7034e27
Revert "Add more details about the lemmatizer at the new corpus phase…
Juliettejns Mar 21, 2024
6e202f0
ajout filtres de correction token
Juliettejns Jun 13, 2024
49016d0
Revert "ajout filtres de correction token"
Juliettejns Jun 13, 2024
4308ecc
ajout fonction filtres tokens à corriger
Juliettejns Jun 13, 2024
09768ec
Add more details about the lemmatizer at the new corpus phase. (#317)
PonteIneptique Mar 20, 2024
0acb09d
ajout filtres modifications de tokens + début affichage + templates c…
Juliettejns Jun 24, 2024
04d080c
suppression anciennes modifications
Juliettejns Jun 24, 2024
61f5957
ajout filtres token invalid + lien corpus
Juliettejns Jun 25, 2024
9d5844f
correction Ignore values des listes de menus dans Control Lists
Juliettejns Jun 26, 2024
f5517e9
correction tests corpus_init//fonction get_unallowed
Juliettejns Jun 27, 2024
b5a2355
correction erreurs tests - bug new corpus
Juliettejns Jun 28, 2024
eaf0822
suppression commentaire test
Juliettejns Jun 28, 2024
797d484
ajouts premiers jets tests
Juliettejns Jul 1, 2024
006fca8
correction test filter update
Juliettejns Jul 2, 2024
2b11ebc
correction test registration corpus filter
Juliettejns Jul 2, 2024
ee34646
ajout test edit token with filter
Juliettejns Jul 2, 2024
0fe9659
modif aggrandissement varchar models corpus + print logs
Juliettejns Jul 9, 2024
f97e432
test ajout création user pour CL filter
Juliettejns Jul 12, 2024
b8bbd51
test bug control filters - ajout users"
Juliettejns Jul 12, 2024
89e9aae
tests CL - modification find element by ID > NAME
Juliettejns Aug 27, 2024
666ff6d
find element by ID>NAME
Juliettejns Aug 27, 2024
56bac05
Correct tests and clean up the way regex are applied (#329)
PonteIneptique Aug 27, 2024
a2fb107
changement filtres CLS ControlListUser>controlList
Juliettejns Aug 28, 2024
8e0ef97
suppression ajout count
Juliettejns Aug 28, 2024
aa1fb7d
modifications corpus.id => self.id + get_unallowed attributes
Juliettejns Aug 28, 2024
dcd28a6
Adding tests back to control list for changing filter
PonteIneptique Sep 3, 2024
41039bd
ajout test base filtre
Juliettejns Sep 3, 2024
1c9e66c
Creating combinatory tests
PonteIneptique Sep 3, 2024
dbf15fa
ajout filtre test combinaison assert + modif filtre ponctuation
Juliettejns Sep 3, 2024
775f7f4
ajout filtre none
Juliettejns Sep 3, 2024
f76e9a1
Fix a condition on lemma
PonteIneptique Sep 3, 2024
f4e925c
Better message
PonteIneptique Sep 3, 2024
06332f7
modif test regex ajout condition spé Sans test
Juliettejns Sep 9, 2024
d482cf3
modif filtre metadata sur form et non lemma + correction unallowed
Juliettejns Sep 11, 2024
1f4b80a
modif test filtres avec metadata
Juliettejns Sep 11, 2024
e00e28f
suppression user_id des appels de get_unallowed
Juliettejns Sep 11, 2024
71a920d
ajout choix unallowed sqlite ou posgtresé
Juliettejns Sep 11, 2024
11caf39
ajout diff sqlite/postgres pour filtres get_unallowed
Juliettejns Sep 11, 2024
668fd90
déplacement logging
Juliettejns Sep 11, 2024
fbabbcf
deplacement logging
Juliettejns Sep 12, 2024
c6c0a15
Change the way the control list filter view is shown
PonteIneptique Sep 17, 2024
db60b6a
modif metadata validity + tests + presentation filtres CL dans inform…
Juliettejns Sep 17, 2024
360a560
Delete tests/test_selenium/download_temp/wauchier.xml
Juliettejns Sep 17, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions app/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import logging
from flask import Flask, g

from flask_compress import Compress
Expand All @@ -25,6 +26,11 @@
login_manager.session_protection = 'strong'
login_manager.login_view = 'account.login'

logging.basicConfig(filename='./pyrrha_corpus_creation.log', level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(name)s %(message)s')

logger = logging.getLogger(__name__)


def create_app(config_name="dev"):
""" Create the application """
Expand Down Expand Up @@ -64,6 +70,8 @@ def _set_sqlite_case_insensitive_pragma(dbapi_con, connection_record):
md = Markdown(app, safe_mode=True)
babel.init_app(app, locale_selector=get_locale)



# Register Jinja template functions
from .main import main as main_blueprint
app.register_blueprint(main_blueprint)
Expand Down
40 changes: 39 additions & 1 deletion app/control_lists/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


from app.main.views.utils import render_template_with_nav_info
from app.models import ControlLists, AllowedLemma, WordToken, User, PublicationStatus, CorpusCustomDictionary
from app.models import ControlLists, ControlListsUser, AllowedLemma, WordToken, User, PublicationStatus, CorpusCustomDictionary
from app import db, email
from ..utils import PyrrhaError
from ..utils.forms import strip_or_none
Expand Down Expand Up @@ -374,3 +374,41 @@ def information_edit(control_list_id, control_list):
def information_read(control_list_id):
control_list, is_owner = ControlLists.get_linked_or_404(control_list_id=control_list_id, user=current_user)
return render_template_with_nav_info('control_lists/information_read.html', control_list=control_list)


@control_lists_bp.route("/controls/<int:control_list_id>/ignore_terms", methods=["POST", "GET"])
@login_required
@cl_editable("control_list_id")
def ignore_terms_filter(control_list_id, control_list):
list_filter = []
if request.method == "POST":
list_filter.append(request.form.get("punct"))
list_filter.append(request.form.get("numeral"))
list_filter.append(request.form.get('ignore'))
list_filter.append(request.form.get('metadata'))
filtered_filter = []
for el in list_filter:
if el is not None:
filtered_filter.append(el)

control_list.filter_punct = 'punct' in filtered_filter
control_list.filter_metadata = 'metadata' in filtered_filter
control_list.filter_numeral = 'numeral' in filtered_filter
control_list.filter_ignore = 'ignore' in filtered_filter
db.session.add(control_list)
db.session.commit()


flash('The filters have been updated.', 'success')
db.session.refresh(control_list)
return render_template_with_nav_info(
'control_lists/ignore_filter.html',
control_list_id=control_list_id,
control_list=control_list
)

return render_template_with_nav_info(
'control_lists/ignore_filter.html',
control_list_id=control_list_id,
control_list=control_list
)
26 changes: 23 additions & 3 deletions app/main/views/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
from .utils import requires_corpus_admin_access, requires_corpus_access
from ..forms import Delete
from app.utils import PreferencesUpdateError, PersonalDictionaryError

from app import logger
import logging
AUTOCOMPLETE_LIMIT = 20


Expand All @@ -36,12 +37,12 @@ def _get_available():
lists[cl.str_public].append(cl)
return lists


@main.route('/corpus/new', methods=["POST", "GET"])
@login_required
def corpus_new():
""" Register a new corpus
"""

lemmatizers = current_app.config.get("LEMMATIZERS", [])

def normal_view():
Expand Down Expand Up @@ -93,8 +94,9 @@ def error():
tokens = read_input_tokens(request.form.get("tsv"))
try:
control_list = ControlLists.query.get_or_404(request.form.get("control_list_select"))
except Exception:
except Exception as e:
flash("This control list does not exist", category="error")
logger.error(e)
return error()
form_kwargs.update({"word_tokens_dict": tokens,
"control_list": control_list})
Expand All @@ -110,17 +112,32 @@ def error():
form_kwargs.update({"word_tokens_dict": tokens, "allowed_lemma": allowed_lemma,
"allowed_POS": allowed_POS, "allowed_morph": allowed_morph})

list_filter = []
list_filter.append(request.form.get("punct"))
list_filter.append(request.form.get("numeral"))
list_filter.append(request.form.get("ignore"))
list_filter.append(request.form.get("metadata"))
list_filter = [flt for flt in list_filter if flt]

try:
corpus: Corpus = Corpus.create(**form_kwargs)
db.session.add(CorpusUser(corpus=corpus, user=current_user, is_owner=True))
# Add a link to the control list
ControlLists.link(corpus.control_lists_id, current_user.id, is_owner=cl_owner)
db.session.commit()
current_controlList = ControlLists.query.filter_by(**{"id":corpus.control_lists_id}).first_or_404()
current_controlList.filter_punct = 'punct' in list_filter
current_controlList.filter_metadata = 'metadata' in list_filter
current_controlList.filter_numeral = 'numeral' in list_filter
current_controlList.filter_ignore = 'ignore' in list_filter
db.session.commit()
flash("New corpus registered", category="success")
except (sqlalchemy.exc.StatementError, sqlalchemy.exc.IntegrityError) as e:
print(e)
db.session.rollback()
flash("The corpus cannot be registered. Check your data", category="error")
flash(str(e.orig).lower())
logger.error(e)
if db.session.get_bind().dialect.name == "postgresql":
unique_constraint = 'duplicate key value violates unique constraint "corpus_name_key"'
else:
Expand All @@ -133,6 +150,7 @@ def error():
db.session.rollback()
flash("At least one line of your corpus is missing a token/form. Check line %s " % exc.line,
category="error")
logger.error(exc)
return error()
except NoTokensInput:
db.session.rollback()
Expand All @@ -143,8 +161,10 @@ def error():
flash(exception, category="error")
return error()
except Exception as e:
print(e)
db.session.rollback()
flash("The corpus cannot be registered. Check your data", category="error")
logger.error(e)
return error()
return redirect(url_for(".corpus_get", corpus_id=corpus.id))

Expand Down
3 changes: 3 additions & 0 deletions app/main/views/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def tokens_correct_unallowed(corpus_id, allowed_type):
:param allowed_type: Type of allowed value to check agains (lemma, POS, morph)
"""
corpus = Corpus.query.filter_by(**{"id": corpus_id}).first()
user_id = current_user.id
tokens = corpus\
.get_unallowed(allowed_type)\
.paginate(
Expand All @@ -67,6 +68,7 @@ def tokens_correct_unallowed(corpus_id, allowed_type):
)



@main.route('/corpus/<int:corpus_id>/tokens/changes/similar/<int:record_id>')
@login_required
@requires_corpus_access("corpus_id")
Expand Down Expand Up @@ -131,6 +133,7 @@ def tokens_correct_single(corpus_id, token_id):
token, change_record = WordToken.update(
user_id=current_user.id,
token_id=token_id, corpus_id=corpus_id,
form = string_to_none(request.form.get("form")),
lemma=string_to_none(request.form.get("lemma")),
POS=string_to_none(request.form.get("POS")),
morph=string_to_none(request.form.get("morph"))
Expand Down
23 changes: 22 additions & 1 deletion app/models/control_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
from collections import Counter
# PIP Packages
import unidecode
import regex as re
import yaml
from flask_sqlalchemy.query import Query as FlaskQuery
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.orm import backref
from sqlalchemy.orm import backref, Query
from sqlalchemy import literal, case
from werkzeug.exceptions import BadRequest
# APP Logic
Expand Down Expand Up @@ -43,6 +45,15 @@ class ControlLists(db.Model):
bibliography = db.Column(db.Text, nullable=True)
language = db.Column(db.String(10), nullable=True)
notes = db.Column(db.Text, nullable=True)
filter_punct = db.Column(db.Boolean, unique=False, default=False)
filter_numeral = db.Column(db.Boolean, unique=False, default=False)
filter_metadata = db.Column(db.Boolean, unique=False, default=False)
filter_ignore = db.Column(db.Boolean, unique=False, default=False)

re_filter_metadata = r'(\[[^\]]+:[^\]]*\]$)'
re_filter_ignore = r'(^\[IGNORE\])'
re_filter_punct = "(^[^\w\s]+$)"
re_filter_numeral = r'(^\d+$)'

# For caching purposes, we record the last time these fields were edited
#last_lemma_edit = db.Column(db.DateTime, default=datetime.datetime.utcnow)
Expand Down Expand Up @@ -240,6 +251,7 @@ def has_list(self, allowed_type):
).exists()
).scalar()


@staticmethod
def add_default_lists(path=None):
""" Loads the default lists from the config folder
Expand Down Expand Up @@ -281,10 +293,19 @@ class ControlListsUser(db.Model):
user_id = db.Column(db.Integer, db.ForeignKey(User.id), primary_key=True)
is_owner = db.Column(db.Boolean, default=False)


control = db.relationship("ControlLists", backref=backref("control_lists_user", cascade="all, delete-orphan"))
user = db.relationship(User, backref=backref("control_lists_user", cascade="all, delete-orphan"))



@classmethod
def retrieve(cls, user_id: int, control_list_id: int) -> FlaskQuery:
return cls.query.filter(db.and_(
cls.user_id == user_id,
cls.control_lists_id == control_list_id
))

class AllowedLemma(db.Model):
""" An allowed lemma is a lemma that is accepted

Expand Down
Loading
Loading