Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Récupérer données et jumeler les étudiants à leurs superviseurs [SARC-128] #63

Merged
merged 46 commits into from
Jul 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
f4addd8
Add files as is
Jul 10, 2023
6cf5cb2
Reformat
Jul 10, 2023
6a69786
Remove hardcoded values to files
Jul 10, 2023
fc8842b
Move hardcoded list to files
Jul 10, 2023
7f0318b
group globals
Jul 10, 2023
685b72c
-
Jul 11, 2023
57db3ad
-
Jul 11, 2023
993fd18
-
Jul 11, 2023
d732401
-
Jul 11, 2023
23b58f8
-
Jul 11, 2023
84517cb
-
Jul 11, 2023
51bd3d7
-
Jul 11, 2023
32721dd
-
Jul 12, 2023
3ec269e
Merge branch 'master' of github.com:mila-iqia/SARC into issue_SARC128
Jul 12, 2023
bc0c1e6
update tests
Jul 12, 2023
b6cf442
-
Jul 12, 2023
e30572d
-
Jul 12, 2023
7b158ce
-
Jul 12, 2023
441a85b
Update sarc/ldap/acquire.py
Delaunay Jul 17, 2023
aa6b0a1
Update sarc/ldap/read_mila_ldap.py
Delaunay Jul 17, 2023
58fcc93
Update sarc/ldap/read_mila_ldap.py
Delaunay Jul 17, 2023
09c5f65
Update sarc/ldap/supervisor.py
Delaunay Jul 17, 2023
351c9b0
Update sarc/ldap/supervisor.py
Delaunay Jul 17, 2023
be22e1a
Update tests/unittests/ldap/test_sync.py
Delaunay Jul 17, 2023
1e9b06b
Update tests/unittests/ldap/test_sync.py
Delaunay Jul 17, 2023
d5d470b
Update sarc/ldap/supervisor.py
Delaunay Jul 17, 2023
d13b6be
Merge branch 'master' of github.com:mila-iqia/SARC into issue_SARC128
Jul 17, 2023
2cb5244
Merge branch 'issue_SARC128' of github.com:mila-iqia/SARC into issue_…
Jul 17, 2023
de7c4dd
Update tests/unittests/ldap/test_sync.py
Delaunay Jul 17, 2023
53d6f5e
-
Jul 17, 2023
be3c58d
Merge branch 'issue_SARC128' of github.com:mila-iqia/SARC into issue_…
Jul 17, 2023
03bf99b
-
Jul 17, 2023
2426941
-
Jul 17, 2023
d6cc709
-
Jul 17, 2023
c8a24d9
-
Jul 18, 2023
e15b65f
-
Jul 18, 2023
ff608e6
-
Jul 18, 2023
1a5a615
-
Jul 18, 2023
b6a6337
-
Jul 18, 2023
2b9869e
-
Jul 18, 2023
9e690a3
-
Jul 18, 2023
50bf926
Merge branch 'master' of github.com:mila-iqia/SARC into issue_SARC128
Jul 18, 2023
9e0af54
fix EVERYTHING
Jul 18, 2023
361e9f4
-
Jul 18, 2023
8eaa048
-
Jul 19, 2023
72d6ab6
-
Jul 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion config/sarc-dev.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
"local_private_key_file": "secrets/ldap/Google_2026_01_26_66827.key",
"local_certificate_file": "secrets/ldap/Google_2026_01_26_66827.crt",
"ldap_service_uri": "ldaps://ldap.google.com",
"mongo_collection_name": "users"
"mongo_collection_name": "users",
"group_to_prof_json_path": "secrets/group_to_prof.json",
"exceptions_json_path": "secrets/exceptions.json"
},
"account_matching": {
"drac_members_csv_path": "secrets/account_matching/members-rrg-bengioy-ad-2022-11-25.csv",
Expand Down
4 changes: 3 additions & 1 deletion config/sarc-prod.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
"local_private_key_file": "secrets/ldap/Google_2026_01_26_66827.key",
"local_certificate_file": "secrets/ldap/Google_2026_01_26_66827.crt",
"ldap_service_uri": "ldaps://ldap.google.com",
"mongo_collection_name": "users"
"mongo_collection_name": "users",
"group_to_prof_json_path": "secrets/group_to_prof.json",
"exceptions_json_path": "secrets/exceptions.json"
},
"account_matching": {
"drac_members_csv_path": "secrets/account_matching/members-rrg-bengioy-ad-2022-11-25.csv",
Expand Down
28 changes: 28 additions & 0 deletions sarc/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,16 @@ class LDAPConfig(BaseModel):
local_certificate_file: str
ldap_service_uri: str
mongo_collection_name: str
group_to_prof_json_path: str = None
exceptions_json_path: str = None

@validator("group_to_prof_json_path")
def _relative_group_to_prof(cls, value):
return relative_filepath(value)

@validator("exceptions_json_path")
def _relative_exception(cls, value):
return relative_filepath(value)


class AccountMatchingConfig(BaseModel):
Expand Down Expand Up @@ -172,9 +182,27 @@ def _complete_cluster_fields(cls, value, values):
config_var = ContextVar("config", default=None)


_config_folder = None


def relative_filepath(path):
"""Allows files to be relative to the config"""
if path is None:
return path

if "$SELF" in path:
return path.replace("$SELF", str(_config_folder))

return path


def parse_config(config_path):
# pylint: disable=global-statement
global _config_folder
config_path = Path(config_path)

_config_folder = str(config_path.parent)

if not config_path.exists():
raise ConfigurationError(
f"Cannot read SARC configuration file: '{config_path}'"
Expand Down
17 changes: 7 additions & 10 deletions sarc/ldap/acquire.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,21 @@
def run():
cfg = config()

user_collection = cfg.mongo.database_instance[cfg.ldap.mongo_collection_name]

# Sync LDAP and mongodb
sarc.ldap.read_mila_ldap.run(
local_private_key_file=cfg.ldap.local_private_key_file,
local_certificate_file=cfg.ldap.local_certificate_file,
ldap_service_uri=cfg.ldap.ldap_service_uri,
# write results in database
mongodb_database_instance=cfg.mongo.database_instance,
mongodb_collection=cfg.ldap.mongo_collection_name,
# output_json_file="secrets/account_matching/mila_users.json"
ldap=cfg.ldap,
mongodb_collection=user_collection,
)

# It becomes really hard to test this with script when
# we mock the `open` calls, so we'll instead rely on
# what has already been populated in the database.
LD_users = list(
cfg.mongo.database_instance[cfg.ldap.mongo_collection_name].find({})
)
LD_users = list(user_collection.find({}))
LD_users = [D_user["mila_ldap"] for D_user in LD_users]

# Match DRAC/CC to mila accounts
DLD_data = sarc.account_matching.make_matches.load_data_from_files(
{
"mila_ldap": LD_users, # pass through
Expand Down
250 changes: 105 additions & 145 deletions sarc/ldap/read_mila_ldap.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,74 +134,18 @@
}

"""

import argparse
import json
import os
import ssl
from datetime import datetime

# Requirements
# - pip install ldap3
from ldap3 import ALL_ATTRIBUTES, SUBTREE, Connection, Server, Tls
from pymongo import MongoClient, UpdateOne

parser = argparse.ArgumentParser(
description="Query LDAP and update the MongoDB database users based on values returned."
)
parser.add_argument(
"--local_private_key_file",
type=str,
help="local_private_key_file for LDAP connection",
)
parser.add_argument(
"--local_certificate_file",
type=str,
help="local_certificate_file for LDAP connection",
)
parser.add_argument(
"--ldap_service_uri",
type=str,
default="ldaps://ldap.google.com",
help="ldap service uri",
)
# We have two possible things that we can do with the data fetched.
# Dumping to a json file is possible.
parser.add_argument(
"--mongodb_connection_string",
default=None,
type=str,
help="(optional) MongoDB connection string. Contains username and password.",
)
parser.add_argument(
"--mongodb_database_name",
default="sarc",
type=str,
help="(optional) MongoDB database to modify. Better left at default.",
)
parser.add_argument(
"--mongodb_collection",
default="users",
type=str,
help="(optional) MongoDB collection to modify. Better left at default.",
)
parser.add_argument(
"--input_json_file",
default=None,
type=str,
help="(optional) Ignore the LDAP and load from this json file instead.",
)
parser.add_argument(
"--output_json_file",
default=None,
type=str,
help="(optional) Write results to json file.",
)
parser.add_argument(
"--output_raw_LDAP_json_file",
default=None,
type=str,
help="(optional) Write results of the raw LDAP query to json file.",
)
from ..config import LDAPConfig, config
from .supervisor import resolve_supervisors


def query_ldap(local_private_key_file, local_certificate_file, ldap_service_uri):
Expand Down Expand Up @@ -258,13 +202,19 @@ def process_user(user_raw: dict) -> dict:
"googleUid" and "uid" match that of "mail" (except for
the "@mila.quebec" suffix).
"""

supervisor = user_raw.get("supervisor")
cosupervisor = user_raw.get("co_supervisor")

user = {
Delaunay marked this conversation as resolved.
Show resolved Hide resolved
# include the suffix "@mila.quebec"
"mila_email_username": user_raw["mail"][0],
"mila_cluster_username": user_raw["posixUid"][0],
"mila_cluster_uid": user_raw["uidNumber"][0],
"mila_cluster_gid": user_raw["gidNumber"][0],
"display_name": user_raw["displayName"][0],
"supervisor": supervisor if supervisor else None,
"co_supervisor": cosupervisor if cosupervisor else None,
"status": "disabled"
if (user_raw["suspended"][0] in ["True", "true", True])
else "enabled",
Expand Down Expand Up @@ -316,50 +266,107 @@ def client_side_user_updates(LD_users_DB, LD_users_LDAP):
return LD_users_to_update_or_insert


def _query_and_dump(
ldap,
save_ldap=False,
):
LD_users_raw = query_ldap(
ldap.local_private_key_file,
ldap.local_certificate_file,
ldap.ldap_service_uri,
)

if save_ldap:
today = datetime.utcnow()
cache_path = config().cache / "ldap" / f"raw.{today.strftime('%Y-%m-%d')}.json"

with open(cache_path, "w", encoding="utf-8") as f_out:
json.dump(LD_users_raw, f_out, indent=4)

return LD_users_raw


def _save_to_mongo(collection, LD_users):
Delaunay marked this conversation as resolved.
Show resolved Hide resolved
if collection is None:
return

# read only the "mila_ldap" field from the entries, and ignore the
# "drac_roles" and "drac_members" components
LD_users_DB = [u["mila_ldap"] for u in list(collection.find())]

L_updated_users = client_side_user_updates(
LD_users_DB=LD_users_DB,
LD_users_LDAP=LD_users,
)

L_updates_to_do = [
UpdateOne(
{"mila_ldap.mila_email_username": updated_user["mila_email_username"]},
{
# We set all the fields corresponding to the fields from `updated_user`,
# so that's a convenient way to do it. Note that this does not affect
# the fields in the database that are already present for that user.
"$set": {"mila_ldap": updated_user},
},
upsert=True,
)
for updated_user in L_updated_users
]

if L_updates_to_do:
result = collection.bulk_write(L_updates_to_do) # <- the actual commit
print(result.bulk_api_result)


def load_ldap_exceptions(ldap_config: LDAPConfig):
if ldap_config.exceptions_json_path is None:
return {}

with open(ldap_config.exceptions_json_path, "r", encoding="utf-8") as file:
return json.load(file)


def load_group_to_prof_mapping(ldap_config: LDAPConfig):
if ldap_config.group_to_prof_json_path is None:
return {}

with open(ldap_config.group_to_prof_json_path, "r", encoding="utf-8") as file:
return json.load(file)


def run(
local_private_key_file=None,
local_certificate_file=None,
ldap_service_uri=None,
# DB option 1
mongodb_database_instance=None,
# DB option 2
mongodb_connection_string=None,
mongodb_database_name=None,
#
ldap,
mongodb_collection=None,
input_json_file=None,
output_json_file=None,
output_raw_LDAP_json_file=None,
LD_users=None, # for external testing purposes
save_ldap=False,
):
"""
If `mongodb_database_instance` is not `None`, it overrides the two arguments
`mongodb_connection_string`, `mongodb_database_name`.
This is done because the SARC config gets us a client connected to a database already,
so it's better to use that functionality.
"""
"""Runs periodically to synchronize mongodb with LDAP"""

if LD_users is not None:
# Used mostly for testing purposes.
# Overrides the "input_json_file" argument.
# Just make sure it's a list of dict, at least.
assert isinstance(LD_users, list)
if LD_users:
assert isinstance(LD_users[0], dict)
elif input_json_file:
with open(input_json_file, "r", encoding="utf-8") as f_in:
LD_users = json.load(f_in)
else:
# this is the usual branch taken in practice
LD_users_raw = query_ldap(
local_private_key_file, local_certificate_file, ldap_service_uri
)
if output_raw_LDAP_json_file:
with open(output_raw_LDAP_json_file, "w", encoding="utf-8") as f_out:
json.dump(LD_users_raw, f_out, indent=4)
print(f"Wrote {output_raw_LDAP_json_file}.")
# retrive users from LDAP
LD_users_raw = _query_and_dump(ldap, save_ldap)

# Transform users into the json we will save
group_to_prof = load_group_to_prof_mapping(ldap)
exceptions = load_ldap_exceptions(ldap)
errors = resolve_supervisors(LD_users_raw, group_to_prof, exceptions)

LD_users = [process_user(D_user_raw) for D_user_raw in LD_users_raw]

_save_to_mongo(mongodb_collection, LD_users)

errors.show()

if output_json_file:
with open(output_json_file, "w", encoding="utf-8") as f_out:
json.dump(LD_users, f_out, indent=4)
print(f"Wrote {output_json_file}.")

LD_users = [process_user(D_user_raw) for D_user_raw in LD_users_raw]

def get_ldap_collection(cfg):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pourrait être remplacé par quelque chose dans le genre: https://github.com/mila-iqia/SARC/blob/master/sarc/jobs/job.py#L194-L197

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked into it and it is not trivial, I think it warrants it own ticket to talk about if we want to remove some of the options that are currently provided.

  • Currently we can select a database and collection, this would go away of hardcode the collection inside AbstractRepository
  • Currently we are doing an efficient bulk insert/update, this would also go away with a single update per user

mongodb_database_instance = cfg.mongo.database_instance
mongodb_collection = cfg.ldap.mongo_collection_name
mongodb_connection_string = cfg.mongo.connection_string
mongodb_database_name = cfg.mongo.database_name

# Two ways to get the MongoDB collection, and then it's possible that we don't care
# about getting one, in which case we'll skip that step of the output.
Expand All @@ -376,51 +383,4 @@ def run(
else:
users_collection = None

if users_collection is not None:
# read only the "mila_ldap" field from the entries, and ignore the
# "drac_roles" and "drac_members" components
LD_users_DB = [u["mila_ldap"] for u in list(users_collection.find())]

L_updated_users = client_side_user_updates(
LD_users_DB=LD_users_DB, LD_users_LDAP=LD_users
)

L_updates_to_do = [
UpdateOne(
{"mila_ldap.mila_email_username": updated_user["mila_email_username"]},
{
# We set all the fields corresponding to the fields from `updated_user`,
# so that's a convenient way to do it. Note that this does not affect
# the fields in the database that are already present for that user.
"$set": {"mila_ldap": updated_user},
},
upsert=True,
)
for updated_user in L_updated_users
]

if L_updates_to_do:
result = users_collection.bulk_write(
L_updates_to_do
) # <- the actual commit
print(result.bulk_api_result)

if output_json_file:
with open(output_json_file, "w", encoding="utf-8") as f_out:
json.dump(LD_users, f_out, indent=4)
print(f"Wrote {output_json_file}.")


if __name__ == "__main__":
args = parser.parse_args()
run(
local_private_key_file=args.local_private_key_file,
local_certificate_file=args.local_certificate_file,
ldap_service_uri=args.ldap_service_uri,
mongodb_connection_string=args.mongodb_connection_string,
mongodb_database_name=args.mongodb_database_name,
mongodb_collection=args.mongodb_collection,
input_json_file=args.input_json_file,
output_json_file=args.output_json_file,
output_raw_LDAP_json_file=args.output_raw_LDAP_json_file,
)
return users_collection
Loading