diff --git a/config/sarc-dev.json b/config/sarc-dev.json index 3550f10e..cfed1e6b 100644 --- a/config/sarc-dev.json +++ b/config/sarc-dev.json @@ -7,7 +7,9 @@ "local_private_key_file": "secrets/ldap/Google_2026_01_26_66827.key", "local_certificate_file": "secrets/ldap/Google_2026_01_26_66827.crt", "ldap_service_uri": "ldaps://ldap.google.com", - "mongo_collection_name": "users" + "mongo_collection_name": "users", + "group_to_prof_json_path": "secrets/group_to_prof.json", + "exceptions_json_path": "secrets/exceptions.json" }, "account_matching": { "drac_members_csv_path": "secrets/account_matching/members-rrg-bengioy-ad-2022-11-25.csv", diff --git a/config/sarc-prod.json b/config/sarc-prod.json index 34f3ce9b..ef688ddd 100644 --- a/config/sarc-prod.json +++ b/config/sarc-prod.json @@ -7,7 +7,9 @@ "local_private_key_file": "secrets/ldap/Google_2026_01_26_66827.key", "local_certificate_file": "secrets/ldap/Google_2026_01_26_66827.crt", "ldap_service_uri": "ldaps://ldap.google.com", - "mongo_collection_name": "users" + "mongo_collection_name": "users", + "group_to_prof_json_path": "secrets/group_to_prof.json", + "exceptions_json_path": "secrets/exceptions.json" }, "account_matching": { "drac_members_csv_path": "secrets/account_matching/members-rrg-bengioy-ad-2022-11-25.csv", diff --git a/sarc/config.py b/sarc/config.py index 5ac67dc3..52185fc7 100644 --- a/sarc/config.py +++ b/sarc/config.py @@ -139,6 +139,16 @@ class LDAPConfig(BaseModel): local_certificate_file: str ldap_service_uri: str mongo_collection_name: str + group_to_prof_json_path: str = None + exceptions_json_path: str = None + + @validator("group_to_prof_json_path") + def _relative_group_to_prof(cls, value): + return relative_filepath(value) + + @validator("exceptions_json_path") + def _relative_exception(cls, value): + return relative_filepath(value) class AccountMatchingConfig(BaseModel): @@ -172,9 +182,27 @@ def _complete_cluster_fields(cls, value, values): config_var = ContextVar("config", default=None) +_config_folder = None + + +def relative_filepath(path): + """Allows files to be relative to the config""" + if path is None: + return path + + if "$SELF" in path: + return path.replace("$SELF", str(_config_folder)) + + return path + + def parse_config(config_path): + # pylint: disable=global-statement + global _config_folder config_path = Path(config_path) + _config_folder = str(config_path.parent) + if not config_path.exists(): raise ConfigurationError( f"Cannot read SARC configuration file: '{config_path}'" diff --git a/sarc/ldap/acquire.py b/sarc/ldap/acquire.py index 97b7803d..887e07a8 100644 --- a/sarc/ldap/acquire.py +++ b/sarc/ldap/acquire.py @@ -22,24 +22,21 @@ def run(): cfg = config() + user_collection = cfg.mongo.database_instance[cfg.ldap.mongo_collection_name] + + # Sync LDAP and mongodb sarc.ldap.read_mila_ldap.run( - local_private_key_file=cfg.ldap.local_private_key_file, - local_certificate_file=cfg.ldap.local_certificate_file, - ldap_service_uri=cfg.ldap.ldap_service_uri, - # write results in database - mongodb_database_instance=cfg.mongo.database_instance, - mongodb_collection=cfg.ldap.mongo_collection_name, - # output_json_file="secrets/account_matching/mila_users.json" + ldap=cfg.ldap, + mongodb_collection=user_collection, ) # It becomes really hard to test this with script when # we mock the `open` calls, so we'll instead rely on # what has already been populated in the database. - LD_users = list( - cfg.mongo.database_instance[cfg.ldap.mongo_collection_name].find({}) - ) + LD_users = list(user_collection.find({})) LD_users = [D_user["mila_ldap"] for D_user in LD_users] + # Match DRAC/CC to mila accounts DLD_data = sarc.account_matching.make_matches.load_data_from_files( { "mila_ldap": LD_users, # pass through diff --git a/sarc/ldap/read_mila_ldap.py b/sarc/ldap/read_mila_ldap.py index c632ff22..71a78345 100644 --- a/sarc/ldap/read_mila_ldap.py +++ b/sarc/ldap/read_mila_ldap.py @@ -134,74 +134,18 @@ } """ - -import argparse import json import os import ssl +from datetime import datetime # Requirements # - pip install ldap3 from ldap3 import ALL_ATTRIBUTES, SUBTREE, Connection, Server, Tls from pymongo import MongoClient, UpdateOne -parser = argparse.ArgumentParser( - description="Query LDAP and update the MongoDB database users based on values returned." -) -parser.add_argument( - "--local_private_key_file", - type=str, - help="local_private_key_file for LDAP connection", -) -parser.add_argument( - "--local_certificate_file", - type=str, - help="local_certificate_file for LDAP connection", -) -parser.add_argument( - "--ldap_service_uri", - type=str, - default="ldaps://ldap.google.com", - help="ldap service uri", -) -# We have two possible things that we can do with the data fetched. -# Dumping to a json file is possible. -parser.add_argument( - "--mongodb_connection_string", - default=None, - type=str, - help="(optional) MongoDB connection string. Contains username and password.", -) -parser.add_argument( - "--mongodb_database_name", - default="sarc", - type=str, - help="(optional) MongoDB database to modify. Better left at default.", -) -parser.add_argument( - "--mongodb_collection", - default="users", - type=str, - help="(optional) MongoDB collection to modify. Better left at default.", -) -parser.add_argument( - "--input_json_file", - default=None, - type=str, - help="(optional) Ignore the LDAP and load from this json file instead.", -) -parser.add_argument( - "--output_json_file", - default=None, - type=str, - help="(optional) Write results to json file.", -) -parser.add_argument( - "--output_raw_LDAP_json_file", - default=None, - type=str, - help="(optional) Write results of the raw LDAP query to json file.", -) +from ..config import LDAPConfig, config +from .supervisor import resolve_supervisors def query_ldap(local_private_key_file, local_certificate_file, ldap_service_uri): @@ -258,6 +202,10 @@ def process_user(user_raw: dict) -> dict: "googleUid" and "uid" match that of "mail" (except for the "@mila.quebec" suffix). """ + + supervisor = user_raw.get("supervisor") + cosupervisor = user_raw.get("co_supervisor") + user = { # include the suffix "@mila.quebec" "mila_email_username": user_raw["mail"][0], @@ -265,6 +213,8 @@ def process_user(user_raw: dict) -> dict: "mila_cluster_uid": user_raw["uidNumber"][0], "mila_cluster_gid": user_raw["gidNumber"][0], "display_name": user_raw["displayName"][0], + "supervisor": supervisor if supervisor else None, + "co_supervisor": cosupervisor if cosupervisor else None, "status": "disabled" if (user_raw["suspended"][0] in ["True", "true", True]) else "enabled", @@ -316,50 +266,107 @@ def client_side_user_updates(LD_users_DB, LD_users_LDAP): return LD_users_to_update_or_insert +def _query_and_dump( + ldap, + save_ldap=False, +): + LD_users_raw = query_ldap( + ldap.local_private_key_file, + ldap.local_certificate_file, + ldap.ldap_service_uri, + ) + + if save_ldap: + today = datetime.utcnow() + cache_path = config().cache / "ldap" / f"raw.{today.strftime('%Y-%m-%d')}.json" + + with open(cache_path, "w", encoding="utf-8") as f_out: + json.dump(LD_users_raw, f_out, indent=4) + + return LD_users_raw + + +def _save_to_mongo(collection, LD_users): + if collection is None: + return + + # read only the "mila_ldap" field from the entries, and ignore the + # "drac_roles" and "drac_members" components + LD_users_DB = [u["mila_ldap"] for u in list(collection.find())] + + L_updated_users = client_side_user_updates( + LD_users_DB=LD_users_DB, + LD_users_LDAP=LD_users, + ) + + L_updates_to_do = [ + UpdateOne( + {"mila_ldap.mila_email_username": updated_user["mila_email_username"]}, + { + # We set all the fields corresponding to the fields from `updated_user`, + # so that's a convenient way to do it. Note that this does not affect + # the fields in the database that are already present for that user. + "$set": {"mila_ldap": updated_user}, + }, + upsert=True, + ) + for updated_user in L_updated_users + ] + + if L_updates_to_do: + result = collection.bulk_write(L_updates_to_do) # <- the actual commit + print(result.bulk_api_result) + + +def load_ldap_exceptions(ldap_config: LDAPConfig): + if ldap_config.exceptions_json_path is None: + return {} + + with open(ldap_config.exceptions_json_path, "r", encoding="utf-8") as file: + return json.load(file) + + +def load_group_to_prof_mapping(ldap_config: LDAPConfig): + if ldap_config.group_to_prof_json_path is None: + return {} + + with open(ldap_config.group_to_prof_json_path, "r", encoding="utf-8") as file: + return json.load(file) + + def run( - local_private_key_file=None, - local_certificate_file=None, - ldap_service_uri=None, - # DB option 1 - mongodb_database_instance=None, - # DB option 2 - mongodb_connection_string=None, - mongodb_database_name=None, - # + ldap, mongodb_collection=None, - input_json_file=None, output_json_file=None, - output_raw_LDAP_json_file=None, - LD_users=None, # for external testing purposes + save_ldap=False, ): - """ - If `mongodb_database_instance` is not `None`, it overrides the two arguments - `mongodb_connection_string`, `mongodb_database_name`. - This is done because the SARC config gets us a client connected to a database already, - so it's better to use that functionality. - """ + """Runs periodically to synchronize mongodb with LDAP""" - if LD_users is not None: - # Used mostly for testing purposes. - # Overrides the "input_json_file" argument. - # Just make sure it's a list of dict, at least. - assert isinstance(LD_users, list) - if LD_users: - assert isinstance(LD_users[0], dict) - elif input_json_file: - with open(input_json_file, "r", encoding="utf-8") as f_in: - LD_users = json.load(f_in) - else: - # this is the usual branch taken in practice - LD_users_raw = query_ldap( - local_private_key_file, local_certificate_file, ldap_service_uri - ) - if output_raw_LDAP_json_file: - with open(output_raw_LDAP_json_file, "w", encoding="utf-8") as f_out: - json.dump(LD_users_raw, f_out, indent=4) - print(f"Wrote {output_raw_LDAP_json_file}.") + # retrive users from LDAP + LD_users_raw = _query_and_dump(ldap, save_ldap) + + # Transform users into the json we will save + group_to_prof = load_group_to_prof_mapping(ldap) + exceptions = load_ldap_exceptions(ldap) + errors = resolve_supervisors(LD_users_raw, group_to_prof, exceptions) + + LD_users = [process_user(D_user_raw) for D_user_raw in LD_users_raw] + + _save_to_mongo(mongodb_collection, LD_users) + + errors.show() + + if output_json_file: + with open(output_json_file, "w", encoding="utf-8") as f_out: + json.dump(LD_users, f_out, indent=4) + print(f"Wrote {output_json_file}.") - LD_users = [process_user(D_user_raw) for D_user_raw in LD_users_raw] + +def get_ldap_collection(cfg): + mongodb_database_instance = cfg.mongo.database_instance + mongodb_collection = cfg.ldap.mongo_collection_name + mongodb_connection_string = cfg.mongo.connection_string + mongodb_database_name = cfg.mongo.database_name # Two ways to get the MongoDB collection, and then it's possible that we don't care # about getting one, in which case we'll skip that step of the output. @@ -376,51 +383,4 @@ def run( else: users_collection = None - if users_collection is not None: - # read only the "mila_ldap" field from the entries, and ignore the - # "drac_roles" and "drac_members" components - LD_users_DB = [u["mila_ldap"] for u in list(users_collection.find())] - - L_updated_users = client_side_user_updates( - LD_users_DB=LD_users_DB, LD_users_LDAP=LD_users - ) - - L_updates_to_do = [ - UpdateOne( - {"mila_ldap.mila_email_username": updated_user["mila_email_username"]}, - { - # We set all the fields corresponding to the fields from `updated_user`, - # so that's a convenient way to do it. Note that this does not affect - # the fields in the database that are already present for that user. - "$set": {"mila_ldap": updated_user}, - }, - upsert=True, - ) - for updated_user in L_updated_users - ] - - if L_updates_to_do: - result = users_collection.bulk_write( - L_updates_to_do - ) # <- the actual commit - print(result.bulk_api_result) - - if output_json_file: - with open(output_json_file, "w", encoding="utf-8") as f_out: - json.dump(LD_users, f_out, indent=4) - print(f"Wrote {output_json_file}.") - - -if __name__ == "__main__": - args = parser.parse_args() - run( - local_private_key_file=args.local_private_key_file, - local_certificate_file=args.local_certificate_file, - ldap_service_uri=args.ldap_service_uri, - mongodb_connection_string=args.mongodb_connection_string, - mongodb_database_name=args.mongodb_database_name, - mongodb_collection=args.mongodb_collection, - input_json_file=args.input_json_file, - output_json_file=args.output_json_file, - output_raw_LDAP_json_file=args.output_raw_LDAP_json_file, - ) + return users_collection diff --git a/sarc/ldap/supervisor.py b/sarc/ldap/supervisor.py new file mode 100644 index 00000000..84671a88 --- /dev/null +++ b/sarc/ldap/supervisor.py @@ -0,0 +1,228 @@ +import re +from dataclasses import dataclass, field +from itertools import chain + +universities = { + "mcgill", + "udem", + "poly", + "ets", + "concordia", + "ulaval", + "hec", +} + + +class MultipleSupervisor(Exception): + pass + + +def extract_groups(member_of: list[str]): + supervisors = [] + groups = [] + is_student = False + is_core = False + university = None + + for e in member_of: + if m := re.match(r"^cn=(.+?)-students.*", e): + if m.group(1) in universities: + university = m.group(1) + is_student = True + continue + + supervisors.append(m.group(1)) + is_student = True + continue + + if m := re.match(r"^cn=(.+?),.*", e): + if m.group(1) in ["mila-core-profs", "mila-profs", "core-academic-member"]: + is_core = True + + is_student = False + groups.append(m.group(1)) + continue + + return supervisors, groups, university, is_student, is_core + + +@dataclass +class Result: + ldap: dict + is_prof: bool + is_core: bool + is_student: bool + supervisors: list + cn_groups: list + university: str + + +def _student_or_prof(person: dict, S_profs: set[str], exceptions: dict) -> Result: + if exceptions is None: + exceptions = {} + + # the most straightforward way to determine if a person is a prof, + # because you can't trust the cn_groups "core-profs" where + # the mila directors are also listed + university = None + is_prof = person["mail"][0] in S_profs + ( + cn_groups_of_supervisors, + cn_groups, + university, + is_student, + is_core, + ) = extract_groups(person["memberOf"]) + + if person["suspended"][0] == "true": + return None + + if person["mail"][0] in exceptions.get("not_student", []): + is_student = False + is_prof = True + + elif person["mail"][0] in exceptions.get("not_teacher", []): + is_prof = False + is_student = True + + if is_prof: + return Result( + person, + supervisors=[], + university=university, + is_core=is_core, + is_prof=is_prof, + is_student=is_student, + cn_groups=set(cn_groups), + ) + + return Result( + person, + is_prof, + is_core, + is_student, + cn_groups_of_supervisors, + cn_groups, + university, + ) + + +@dataclass +class SupervisorMatchingErrors: + no_supervisors: list = field(default_factory=list) + too_many_supervisors: list = field(default_factory=list) + unknown_supervisors: list = field(default_factory=list) + unknown_group: list = field(default_factory=list) + prof_and_student: list = field(default_factory=list) + + def errors(self): + return chain( + self.no_supervisors, + self.too_many_supervisors, + self.unknown_supervisors, + self.unknown_group, + self.prof_and_student, + ) + + def error_count(self): + return len(list(self.errors())) + + def has_errors(self): + return self.error_count() > 0 + + def show(self): + def make_list(errors): + return [person.ldap["mail"][0] for person in errors] + + def show_error(msg, array): + if len(array) > 0: + print(f"{msg} {make_list(array)}") + + show_error(" Missing supervisors:", self.no_supervisors) + show_error(" Too many supervisors:", self.too_many_supervisors) + show_error(" Prof and Student:", self.prof_and_student) + + if self.unknown_supervisors: + print(f" Unknown supervisors: {self.unknown_supervisors}") + + if self.unknown_group: + print(f" Unknown group: {self.unknown_group}") + + +def _extract_supervisors_from_groups( + person: Result, group_to_prof: dict, errors: SupervisorMatchingErrors, index: dict +) -> list: + supervisors = [] + + for group in person.supervisors: + prof = group_to_prof.get(group) + + if prof is None: + errors.unknown_group.append(group) + else: + p = index.get(prof) + + if p is None: + errors.unknown_supervisors.append(prof) + + supervisors.append(prof) + + # We need to sort them, make the core prof index 0 + def sortkey(x): + person = index.get(x) + if person: + return int(person.is_core) + return 0 + + return sorted(supervisors, key=sortkey, reverse=True) + + +def resolve_supervisors( + ldap_people: list[dict], group_to_prof: dict, exceptions: dict +) -> SupervisorMatchingErrors: + index = {} + people = [] + S_profs = set(group_to_prof.values()) + errors = SupervisorMatchingErrors() + + # Build the index for supervisor resolution + for person in ldap_people: + result = _student_or_prof( + person, + S_profs, + exceptions, + ) + + if result.is_prof and result.is_student: + errors.prof_and_student.append(result) + continue + + if result is None: + continue + + index[result.ldap["mail"][0]] = result + people.append(result) + + for person in people: + if person.is_student: + person.ldap["is_student"] = True + + supervisors = _extract_supervisors_from_groups( + person, group_to_prof, errors, index + ) + + if len(supervisors) == 0: + person.ldap["supervisor"] = [] + errors.no_supervisors.append(person) + + elif len(supervisors) == 1: + person.ldap["supervisor"] = supervisors[0] + + elif len(supervisors) == 2: + person.ldap["supervisor"] = supervisors[0] + person.ldap["co_supervisor"] = supervisors[1] + + else: + errors.too_many_supervisors.append(person) + + return errors diff --git a/scripts/launch_mongod.sh b/scripts/launch_mongod.sh new file mode 100644 index 00000000..adab9690 --- /dev/null +++ b/scripts/launch_mongod.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +PORT=${MONGO_PORT:-"8123"} +ADDRESS=${MONGO_ADDRESS:-"localhost"} +ADMIN=${MONGO_ADMIN:-"god"} +PASSWORD=${MONGO_PASS:-"god123"} +DB_PATH=${MONGO_PATH:-"/tmp/db"} + + +function start { + # + # starts a new mongodb instance running locally at a specified location + # + # Usage: + # + # start + # + mkdir -p $DB_PATH + mongod --dbpath $DB_PATH/ --wiredTigerCacheSizeGB 1 --port $PORT --bind_ip localhost --pidfilepath $DB_PATH/pid +} + +function stop { + # + # stop the mongodb instance running at the current DB path + # + # Usage: + # + # stop + # + mongod --dbpath $DB_PATH/ --shutdown +} + +function restore { + # + # restore mongodb collections from a backup for a given database + # + # Usage: + # + # restore /home/sarc/mongo_backups/sarc_mongo.sarc.2023-07-12 sarc + # + path=$1 + db=$2 + + collections=("allocations" "diskusage" "jobs" "users") + + for collection in collections; do + mongorestore --gzip --port=$PORT -d $db $path/$db/$collection.bson.gz --gzip + done +} \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 503a71e2..68ec8b6c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,6 @@ import zoneinfo from pathlib import Path -from unittest.mock import MagicMock +from unittest.mock import MagicMock, mock_open, patch import pytest @@ -101,3 +101,92 @@ def prom_custom_query_mock(monkeypatch): ) yield PrometheusConnect.custom_query + + +@pytest.fixture +def file_contents(): + # We also need to generate the data for the two files being read: + # cfg.account_matching.drac_roles_csv_path + # cfg.account_matching.drac_members_csv_path + # + # We will define the content we want to inject for each file. + # These are based on the fake users generated by `fake_raw_ldap_data`. + # We don't need to create weird edge cases, because we are not testing + # the details of the matching algorithm here. We are testing the pipeline. + # + # Naturally, the content of the CSV files must be consistent with the + # fake users defined by `fake_raw_ldap_data`. + # We'll add an extra use that won't match, called "Mysterious Stranger". + + cfg = config() + + # inspired by sponsored_roles_for_Yoshua_Bengio_(CCI_jvb-000).csv + account_matching_drac_roles_csv_path = """"Status","Username","Nom","Email","État du compte" +"Activated","john.smith000","John Smith the 000rd","js000@yahoo.ca","activé" +"Activated","john.smith001","John Smith the 001rd","js001@yahoo.ca","activé" +"Activated","john.smith002","John Smith the 002rd","js002@yahoo.ca","activé" +"Activated","stranger.person","Mysterious Stranger","ms@hotmail.com","activé" +""" + + # inspired by members-rrg-bengioy-ad-2022-11-25.csv + account_matching_drac_members_csv_path = """Name,Sponsor,Permission,Activation_Status,username,Email +John Smith the 000rd,BigProf,Manager,activated,john.smith000,js000@yahoo.ca +John Smith the 001rd,BigProf,Manager,activated,john.smith001,js001@yahoo.ca +John Smith the 002rd,BigProf,Manager,activated,john.smith002,js002@yahoo.ca +Mysterious Stranger,BigProf,Manager,activated,stranger.person,ms@hotmail.com +""" + + # inspired by make_matches_config.json + account_matching_make_matches_config = """{ + "L_phantom_mila_emails_to_ignore": + [ + "iamnobody@mila.quebec" + ], + "D_override_matches_mila_to_cc_account_username": + { + "john.smith001@mila.quebec": "js_the_first" + } + } + """ + + group_to_prof = """ + { + "supervisor000": "john.smith000@mila.quebec" + } + """ + exceptions_json_path = """ + { + "not_prof": [], + "not_student": [] + } + """ + + return { + cfg.account_matching.drac_roles_csv_path: account_matching_drac_roles_csv_path, + cfg.account_matching.drac_members_csv_path: account_matching_drac_members_csv_path, + cfg.account_matching.make_matches_config: account_matching_make_matches_config, + cfg.ldap.group_to_prof_json_path: group_to_prof, + cfg.ldap.exceptions_json_path: exceptions_json_path, + } + + +@pytest.fixture +def mock_file(file_contents): + # Get the original open function before it gets patched + original = __builtins__["open"] + + # Define a function that returns a new mock file object with the content + def _mock_file(filename, *vargs, **kwargs): + nonlocal original + + if filename in file_contents: + return mock_open(read_data=file_contents[filename]).return_value + if filename.startswith("/tmp"): + return original(filename, *vargs, **kwargs) + else: + # we haven't found a way to pass through the other calls + # to `open` other files, so let's just raise an error + # because those aren't going to work anyways + raise FileNotFoundError(filename) + + return _mock_file diff --git a/tests/functional/cli/acquire/test_acquire_users.py b/tests/functional/cli/acquire/test_acquire_users.py index f5d52781..f2dc623e 100644 --- a/tests/functional/cli/acquire/test_acquire_users.py +++ b/tests/functional/cli/acquire/test_acquire_users.py @@ -54,14 +54,12 @@ def fake_raw_ldap_data(nbr_users=10): @pytest.mark.usefixtures("empty_read_write_db") -def test_acquire_users(cli_main, monkeypatch): +def test_acquire_users(cli_main, monkeypatch, mock_file): """Test command line `sarc acquire users`. Copied from tests.functional.ldap.test_acquire_ldap.test_acquire_ldap and replaced direct call with CLI call. """ - - cfg = config() nbr_users = 10 def mock_query_ldap( @@ -72,44 +70,6 @@ def mock_query_ldap( monkeypatch.setattr(sarc.ldap.read_mila_ldap, "query_ldap", mock_query_ldap) - file1_content = """"Status","Username","Nom","Email","État du compte" -"Activated","john.smith000","John Smith the 000rd","js000@yahoo.ca","activé" -"Activated","john.smith001","John Smith the 001rd","js001@yahoo.ca","activé" -"Activated","john.smith002","John Smith the 002rd","js002@yahoo.ca","activé" -"Activated","stranger.person","Mysterious Stranger","ms@hotmail.com","activé" -""" - # inspired by members-rrg-bengioy-ad-2022-11-25.csv - file2_content = """Name,Sponsor,Permission,Activation_Status,username,Email -John Smith the 000rd,BigProf,Manager,activated,john.smith000,js000@yahoo.ca -John Smith the 001rd,BigProf,Manager,activated,john.smith001,js001@yahoo.ca -John Smith the 002rd,BigProf,Manager,activated,john.smith002,js002@yahoo.ca -Mysterious Stranger,BigProf,Manager,activated,stranger.person,ms@hotmail.com -""" - # inspired by make_matches_comfig.json - file3_content = """{ - "L_phantom_mila_emails_to_ignore": - [ - "iamnobody@mila.quebec" - ], - "D_override_matches_mila_to_cc_account_username": - { - "john.smith001@mila.quebec": "js_the_first" - } -} -""" - - file_contents = { - cfg.account_matching.drac_roles_csv_path: file1_content, - cfg.account_matching.drac_members_csv_path: file2_content, - cfg.account_matching.make_matches_config: file3_content, - } - - def mock_file(filename, *vargs, **kwargs): - if filename in file_contents: - return mock_open(read_data=file_contents[filename]).return_value - else: - raise FileNotFoundError(filename) - with patch("builtins.open", side_effect=mock_file): assert ( cli_main( @@ -128,6 +88,7 @@ def mock_file(filename, *vargs, **kwargs): # test some drac_roles and drac_members fields for segment in [js_user.drac_roles, js_user.drac_members]: + assert segment is not None assert "email" in segment assert segment["email"] == f"js{i:03d}@yahoo.ca" assert "username" in segment diff --git a/tests/functional/ldap/test_acquire_ldap.py b/tests/functional/ldap/test_acquire_ldap.py index 720a8cd1..c4c48e54 100644 --- a/tests/functional/ldap/test_acquire_ldap.py +++ b/tests/functional/ldap/test_acquire_ldap.py @@ -5,7 +5,6 @@ import sarc.account_matching.make_matches import sarc.ldap.acquire import sarc.ldap.read_mila_ldap # will monkeypatch "query_ldap" -from sarc.config import config from sarc.ldap.api import get_user from .test_read_mila_ldap import fake_raw_ldap_data @@ -13,15 +12,13 @@ # @pytest.mark.usefixtures("read_write_db") @pytest.mark.usefixtures("empty_read_write_db") -def test_acquire_ldap(monkeypatch): +def test_acquire_ldap(monkeypatch, mock_file): """ Override the LDAP queries. Have users with matches to do. (at least we don't need to flex `perform_matching` with edge cases) Inspect the results in the database to make sure they're all there. """ - - cfg = config() nbr_users = 10 def mock_query_ldap( @@ -32,65 +29,6 @@ def mock_query_ldap( monkeypatch.setattr(sarc.ldap.read_mila_ldap, "query_ldap", mock_query_ldap) - # We also need to generate the data for the two files being read: - # cfg.account_matching.drac_roles_csv_path - # cfg.account_matching.drac_members_csv_path - # - # We will define the content we want to inject for each file. - # These are based on the fake users generated by `fake_raw_ldap_data`. - # We don't need to create weird edge cases, because we are not testing - # the details of the matching algorithm here. We are testing the pipeline. - # - # Naturally, the content of the CSV files must be consistent with the - # fake users defined by `fake_raw_ldap_data`. - # We'll add an extra use that won't match, called "Mysterious Stranger". - - # inspired by sponsored_roles_for_Yoshua_Bengio_(CCI_jvb-000).csv - file1_content = """"Status","Username","Nom","Email","État du compte" -"Activated","john.smith000","John Smith the 000rd","js000@yahoo.ca","activé" -"Activated","john.smith001","John Smith the 001rd","js001@yahoo.ca","activé" -"Activated","john.smith002","John Smith the 002rd","js002@yahoo.ca","activé" -"Activated","stranger.person","Mysterious Stranger","ms@hotmail.com","activé" -""" - # inspired by members-rrg-bengioy-ad-2022-11-25.csv - file2_content = """Name,Sponsor,Permission,Activation_Status,username,Email -John Smith the 000rd,BigProf,Manager,activated,john.smith000,js000@yahoo.ca -John Smith the 001rd,BigProf,Manager,activated,john.smith001,js001@yahoo.ca -John Smith the 002rd,BigProf,Manager,activated,john.smith002,js002@yahoo.ca -Mysterious Stranger,BigProf,Manager,activated,stranger.person,ms@hotmail.com -""" - # inspired by make_matches_config.json - file3_content = """ -{ - "L_phantom_mila_emails_to_ignore": - [ - "john.smith005@mila.quebec", - "john.smith006@mila.quebec" - ], - "D_override_matches_mila_to_cc_account_username": - { - "john.smith008@mila.quebec": "smith000" - } -} -""" - - # Create a dictionary of file paths and their corresponding content - file_contents = { - cfg.account_matching.drac_roles_csv_path: file1_content, - cfg.account_matching.drac_members_csv_path: file2_content, - cfg.account_matching.make_matches_config: file3_content, - } - - # Define a function that returns a new mock file object with the content - def mock_file(filename, *vargs, **kwargs): - if filename in file_contents: - return mock_open(read_data=file_contents[filename]).return_value - else: - # we haven't found a way to pass through the other calls - # to `open` other files, so let's just raise an error - # because those aren't going to work anyways - raise FileNotFoundError(filename) - # Patch the built-in `open()` function for each file path with patch("builtins.open", side_effect=mock_file): sarc.ldap.acquire.run() @@ -107,8 +45,10 @@ def mock_file(filename, *vargs, **kwargs): # test some drac_roles and drac_members fields js_user_d = js_user.dict() + print(i, js_user_d) for segment in ["drac_roles", "drac_members"]: assert segment in js_user_d + assert js_user_d[segment] is not None assert "email" in js_user_d[segment] assert js_user_d[segment]["email"] == f"js{i:03d}@yahoo.ca" assert "username" in js_user_d[segment] @@ -124,6 +64,9 @@ def mock_file(filename, *vargs, **kwargs): assert js_user.drac.username == js_user.drac_members["username"] assert js_user.drac.active + if i == 1: + assert js_user_d["mila_ldap"]["supervisor"] is not None + # test the absence of the mysterious stranger js_user = get_user(drac_account_username="ms@hotmail.com") assert js_user is None diff --git a/tests/functional/ldap/test_read_mila_ldap.py b/tests/functional/ldap/test_read_mila_ldap.py index 6976c809..289b3401 100644 --- a/tests/functional/ldap/test_read_mila_ldap.py +++ b/tests/functional/ldap/test_read_mila_ldap.py @@ -9,6 +9,21 @@ from sarc.config import config +def fake_member_of(index, count): + member_of_config = { + # Core prof + 0: ["cn=mila-core-profs,ou=Groups,dc=mila,dc=quebec"], + # Student + 1: [ + "cn=mcgill-students,ou=Groups,dc=mila,dc=quebec", + "cn=supervisor000-students,ou=Groups,dc=mila,dc=quebec", + ], + # Not core prof, not student + 2: [], + } + return member_of_config.get(index, []) + + def fake_raw_ldap_data(nbr_users=10): """ Return a deterministically-generated list of fake LDAP users just as @@ -31,7 +46,7 @@ def fake_raw_ldap_data(nbr_users=10): "homeDirectory": [f"/home/john.smith{i:03d}"], "loginShell": ["/bin/bash"], "mail": [f"john.smith{i:03d}@mila.quebec"], - "memberOf": [], + "memberOf": fake_member_of(i, nbr_users), "objectClass": [ "top", "person", @@ -53,7 +68,7 @@ def fake_raw_ldap_data(nbr_users=10): ) -def test_query_to_ldap_server_and_writing_to_output_json(monkeypatch): +def test_query_to_ldap_server_and_writing_to_output_json(monkeypatch, mock_file): cfg = config() nbr_users = 10 @@ -68,13 +83,12 @@ def mock_query_ldap( with tempfile.NamedTemporaryFile() as tmp_file: tmp_file_path = tmp_file.name - sarc.ldap.read_mila_ldap.run( - local_private_key_file=cfg.ldap.local_private_key_file, - local_certificate_file=cfg.ldap.local_certificate_file, - ldap_service_uri=cfg.ldap.ldap_service_uri, - # write results to here - output_json_file=tmp_file_path, - ) + with patch("builtins.open", side_effect=mock_file): + sarc.ldap.read_mila_ldap.run( + cfg.ldap, + # write results to here + output_json_file=tmp_file_path, + ) E = json.load(tmp_file) @@ -83,7 +97,12 @@ def mock_query_ldap( # This means that we are not testing much. assert len(E) == nbr_users for e, raw_user in zip(E, fake_raw_ldap_data(nbr_users)): - assert e == sarc.ldap.read_mila_ldap.process_user(raw_user) + processed_user = sarc.ldap.read_mila_ldap.process_user(raw_user) + + # resolve_supervisors is not called here + e["supervisor"] = None + + assert e == processed_user # note that the elements being compared are of the form """ @@ -99,7 +118,7 @@ def mock_query_ldap( @pytest.mark.usefixtures("empty_read_write_db") -def test_query_to_ldap_server_and_commit_to_db(monkeypatch): +def test_query_to_ldap_server_and_commit_to_db(monkeypatch, mock_file): """ This test is going to use the database and it will make two queries to the LDAP server. The second query will have @@ -130,15 +149,12 @@ def mock_query_ldap( monkeypatch.setattr(sarc.ldap.read_mila_ldap, "query_ldap", mock_query_ldap) - sarc.ldap.read_mila_ldap.run( - local_private_key_file=cfg.ldap.local_private_key_file, - local_certificate_file=cfg.ldap.local_certificate_file, - ldap_service_uri=cfg.ldap.ldap_service_uri, - # write results to here - mongodb_connection_string=cfg.mongo.connection_string, - mongodb_database_name=cfg.mongo.database_name, - mongodb_collection=cfg.ldap.mongo_collection_name, - ) + with patch("builtins.open", side_effect=mock_file): + sarc.ldap.read_mila_ldap.run( + cfg.ldap, + # write results to here + mongodb_collection=sarc.ldap.read_mila_ldap.get_ldap_collection(cfg), + ) L_users = list(db[cfg.ldap.mongo_collection_name].find({}, {"_id": False})) return L_users diff --git a/tests/sarc-test.json b/tests/sarc-test.json index f5ef64ec..6cbcdd14 100644 --- a/tests/sarc-test.json +++ b/tests/sarc-test.json @@ -7,7 +7,9 @@ "local_private_key_file": "not_a_valid_path.key", "local_certificate_file": "not_a_valid_path.crt", "ldap_service_uri": "ldaps://ldap.google.com", - "mongo_collection_name": "users" + "mongo_collection_name": "users", + "group_to_prof_json_path": "secrets/group_to_prof.json", + "exceptions_json_path": "secrets/exceptions.json" }, "account_matching": { "drac_members_csv_path": "drac_members_not_valid_path.csv", diff --git a/tests/unittests/ldap/test_sync.py b/tests/unittests/ldap/test_sync.py new file mode 100644 index 00000000..519fba8c --- /dev/null +++ b/tests/unittests/ldap/test_sync.py @@ -0,0 +1,325 @@ +from collections import namedtuple + +import sarc.ldap.read_mila_ldap +from sarc.ldap.read_mila_ldap import resolve_supervisors, run +from sarc.ldap.supervisor import _student_or_prof, extract_groups + + +class CollectionMock: + def __init__(self) -> None: + self.documents = [] + + def find(self, *args, **kwargs): + return [] + + def bulk_write(self, write_ops): + self.documents = write_ops + return namedtuple("Result", ["bulk_api_result"])(len(write_ops)) + + +def make_person(name, suspended=False): + return { + "mail": [f"{name}@email.com"], + "memberOf": [], + "suspended": ["true" if suspended else "false"], + "posixUid": [f"{name}"], + "uidNumber": [f"{name}"], + "gidNumber": [f"{name}"], + "displayName": [f"{name}"], + "googleUid": [f"{name}"], + "uid": [f"{name}"], + "sn": [f"{name}"], + } + + +def make_core(name): + person = make_person(name, False) + person["memberOf"] = ["cn=mila-core-profs,ou=Groups,dc=mila,dc=quebec"] + return person + + +def make_student(name, supervisors, suspended=False): + person = make_person(name, suspended) + members = [] + if supervisors: + for s in supervisors: + members.append(f"cn={s}-students,ou=Groups,dc=mila,dc=quebec") + + person["memberOf"] = members + return person + + +def ldap_mock(*args, **kwargs): + return [ + make_student("good", ["mcgill", "co.supervisor", "supervisor"]), + make_person("co.supervisor"), + make_core("supervisor"), + ] + + +def ldap_mock_no_supervisor(*args, **kwargs): + return [ + make_student("good", ["mcgill"]), + make_person("co.supervisor"), + make_core("supervisor"), + ] + + +def ldap_mock_too_many_supervisor(*args, **kwargs): + return [ + make_student("good", ["mcgill", "co.supervisor", "supervisor", "metoo"]), + make_person("co.supervisor"), + make_core("supervisor"), + make_core("metoo"), + ] + + +def ldap_mock_nocore_supervisor(*args, **kwargs): + return [ + make_student("good", ["mcgill", "co.supervisor", "supervisor"]), + make_person("co.supervisor"), + make_person("supervisor"), + ] + + +def ldap_mock_missing_supervisor(*args, **kwargs): + """Supervisor is not in LDAP""" + return [ + make_student("good", ["mcgill", "co.supervisor", "supervisor"]), + make_person("co.supervisor"), + ] + + +def ldap_mock_missing_supervisor_mapping(*args, **kwargs): + """Cannot find supervisor from group; missing mapping""" + return [ + make_student("good", ["mcgill", "co.supervisor", "idontexist"]), + make_person("co.supervisor"), + make_core("supervisor"), + ] + + +def group_to_prof(*args): + return { + "supervisor": "supervisor@email.com", + "co.supervisor": "co.supervisor@email.com", + "metoo": "metoo@email.com", + } + + +def test_extract_groups_student_no_supervisor(): + supervisors, groups, university, is_student, is_core = extract_groups( + make_student("ok", ["mcgill"])["memberOf"] + ) + + assert university == "mcgill" + assert is_student is True + assert is_core is False + + # The supervisors are extracted as is and not yet sorted + assert supervisors == [] + assert groups == [] + + +def test_extract_groups_student(): + ldap_people = ldap_mock() + + supervisors, groups, university, is_student, is_core = extract_groups( + ldap_people[0]["memberOf"] + ) + + assert university == "mcgill" + assert is_student is True + assert is_core is False + + # The supervisors are extracted as is and not yet sorted + assert supervisors == ["co.supervisor", "supervisor"] + assert groups == [] + + +def test_extract_groups_not_core(): + ldap_people = ldap_mock() + + supervisors, groups, university, is_student, is_core = extract_groups( + ldap_people[1]["memberOf"] + ) + + assert university is None + assert is_student is False + assert is_core is False + + # The supervisors are extracted as is and not yet sorted + assert supervisors == [] + assert groups == [] + + +def test_extract_groups_is_core(): + ldap_people = ldap_mock() + + supervisors, groups, university, is_student, is_core = extract_groups( + ldap_people[2]["memberOf"] + ) + + assert university is None + assert is_student is False + assert is_core is True + assert supervisors == [] + assert groups == ["mila-core-profs"] + + +def test_resolve_supervisors(): + ldap_people = ldap_mock() + + errors = resolve_supervisors(ldap_people, group_to_prof(), exceptions=None) + + assert errors.has_errors() is False + + # The supervisors got sorted + assert ldap_people[0]["supervisor"] == "supervisor@email.com" + assert ldap_people[0]["co_supervisor"] == "co.supervisor@email.com" + + +def test_resolve_no_supervisors(): + ldap_people = ldap_mock_no_supervisor() + + errors = resolve_supervisors(ldap_people, group_to_prof(), exceptions=None) + + errors.show() + + assert errors.has_errors() is True + assert errors.error_count() == 1 + assert len(errors.no_supervisors) == 1 + + +def test_resolve_too_many_supervisors(): + ldap_people = ldap_mock_too_many_supervisor() + + errors = resolve_supervisors(ldap_people, group_to_prof(), exceptions=None) + + errors.show() + assert errors.has_errors() is True + assert errors.error_count() == 1 + assert len(errors.too_many_supervisors) == 1 + + +def test_resolve_missing_supervisors(): + ldap_people = ldap_mock_missing_supervisor() + + errors = resolve_supervisors(ldap_people, group_to_prof(), exceptions=None) + + errors.show() + assert errors.has_errors() is True + assert errors.error_count() == 1 + assert len(errors.unknown_supervisors) == 1 + + +def test_resolve_missing_supervisors_mapping(): + ldap_people = [ + make_student("supervisor", ["mcgill", "supervisor"]), + ] + + errors = resolve_supervisors(ldap_people, group_to_prof(), exceptions=None) + + errors.show() + assert errors.has_errors() is True + assert errors.error_count() == 1 + assert len(errors.prof_and_student) == 1 + + +def test_student_and_prof(): + ldap_people = ldap_mock_missing_supervisor_mapping() + + errors = resolve_supervisors(ldap_people, group_to_prof(), exceptions=None) + + errors.show() + assert errors.has_errors() is True + assert errors.error_count() == 2 + assert len(errors.unknown_group) == 1 + assert len(errors.no_core_supervisors) == 1 + + +def test_person_is_suspended(): + result = _student_or_prof( + make_person("hello", True), + dict(), + dict(), + ) + assert result is None + + +def test_not_student_and_not_prof(): + result = _student_or_prof( + make_person("hello", []), + dict(), + dict(), + ) + assert result is not None + assert (not result.is_student) and (not result.is_prof) + + +def test_student_and_prof(): + result = _student_or_prof( + make_student("supervisor", ["mcgill", "supervisor"]), + {"supervisor@email.com"}, + dict(), + ) + assert result is not None + assert result.is_student and result.is_prof + + +def test_student_or_prof_exception_student_is_prof(): + result = _student_or_prof( + make_student("good", ["mcgill"]), + dict(), + exceptions=dict(not_student=["good@email.com"], not_teacher=[]), + ) + assert result.is_prof is True + + +def test_student_or_prof_exception_prof_is_student(): + result = _student_or_prof( + make_person("good", False), + dict(), + exceptions=dict(not_student=[], not_teacher=["good@email.com"]), + ) + assert result is not None + assert result.is_prof is False + + +def ldap_exception(*args): + return { + "not_student": [], + "not_teacher": [], + } + + +def test_ldap_simple_sync(monkeypatch): + monkeypatch.setattr(sarc.ldap.read_mila_ldap, "_query_and_dump", ldap_mock) + monkeypatch.setattr( + sarc.ldap.read_mila_ldap, "load_ldap_exceptions", ldap_exception + ) + monkeypatch.setattr( + sarc.ldap.read_mila_ldap, "load_group_to_prof_mapping", group_to_prof + ) + + collection = CollectionMock() + + run( + ldap=None, + mongodb_collection=collection, + ) + + docs = collection.documents + + # find Student + for d in docs: + if d._doc["$set"]["mila_ldap"]["mila_email_username"].startswith("good"): + break + else: + assert False, "Did not find username" + + student = d._doc["$set"]["mila_ldap"] + assert student["supervisor"] == "supervisor@email.com", "Supervisor was found" + assert ( + student["co_supervisor"] == "co.supervisor@email.com" + ), "2nd supervisor was found"