From 5dc1da954e3f1035d315aee9c0b3a650074bcc52 Mon Sep 17 00:00:00 2001 From: Albert Snow Date: Thu, 18 Jul 2024 13:42:44 -0500 Subject: [PATCH 01/16] adding default empty dict --- fence/config-default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fence/config-default.yaml b/fence/config-default.yaml index a73ebb976..ab81c1548 100755 --- a/fence/config-default.yaml +++ b/fence/config-default.yaml @@ -557,7 +557,7 @@ dbGaP: # # If `parse_consent_code` is true, then a user will be given access to the exact # same consent codes in the child studies - parent_to_child_studies_mapping: + parent_to_child_studies_mapping: {} # 'phs001194': ['phs000571', 'phs001843'] # A consent of "c999" can indicate access to that study's "exchange area data" # and when a user has access to one study's exchange area data, they From 67dc533fa0f5a88aa72d99e8372edc2a545b9a89 Mon Sep 17 00:00:00 2001 From: Albert Snow Date: Thu, 18 Jul 2024 13:47:59 -0500 Subject: [PATCH 02/16] adding sanity happy path test --- tests/test_app_config.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_app_config.py b/tests/test_app_config.py index 3c3af9fd0..d8e65357b 100755 --- a/tests/test_app_config.py +++ b/tests/test_app_config.py @@ -138,3 +138,22 @@ def test_app_config_parent_child_study_mapping(monkeypatch): ] with pytest.raises(Exception): FenceConfig._validate_parent_child_studies(invalid_dbgap_configs) + + valid_dbgap_configs = [ + { + "parent_to_child_studies_mapping": { + "phs001194": ["phs000571", "phs001843"], + "phs001193": ["phs000572", "phs001844"], + } + }, + { + "parent_to_child_studies_mapping": { + "phs001195": ["phs0015623"], + "phs001192": ["phs0001", "phs002"], + } + }, + ] + try: + FenceConfig._validate_parent_child_studies(valid_dbgap_configs) + except Exception: + pytest.fail("Study validation failed when it should have passed!") From 19384adb9db4002e955a5f6b3bc2b4469e369427 Mon Sep 17 00:00:00 2001 From: Albert Snow Date: Thu, 18 Jul 2024 13:58:57 -0500 Subject: [PATCH 03/16] redesigning code to handle none case without getting too much more complicated --- fence/config.py | 46 ++++++++++++++++++++++++---------------- tests/test_app_config.py | 8 +++++++ 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/fence/config.py b/fence/config.py index d981bfd38..252b72576 100644 --- a/fence/config.py +++ b/fence/config.py @@ -6,6 +6,7 @@ from gen3config import Config from cdislogging import get_logger +from collections import Counter logger = get_logger(__name__) @@ -150,28 +151,37 @@ def post_process(self): logger.warning( f"IdP '{idp_id}' is using multifactor_auth_claim_info '{mfa_info['claim']}', which is neither AMR or ACR. Unable to determine if a user used MFA. Fence will continue and assume they have not used MFA." ) + dbgap_configs = self._configs["dbGaP"] + if isinstance(dbgap_configs, list): + corrected_dbgap_configs = dbgap_configs + else: # assume is a dict + corrected_dbgap_configs = [dbgap_configs] + self._validate_parent_child_studies(corrected_dbgap_configs) - self._validate_parent_child_studies(self._configs["dbGaP"]) + @staticmethod + def find_duplicates(collection): + item_with_occurrences = Counter(collection) + duplicates = {item + for item, occurrences in item_with_occurrences.items() + if occurrences > 1} + return duplicates @staticmethod def _validate_parent_child_studies(dbgap_configs): - if isinstance(dbgap_configs, list): - configs = dbgap_configs - else: - configs = [dbgap_configs] - - all_parent_studies = set() - for dbgap_config in configs: - parent_studies = dbgap_config.get( - "parent_to_child_studies_mapping", {} - ).keys() - conflicts = parent_studies & all_parent_studies - if len(conflicts) > 0: - raise Exception( - f"{conflicts} are duplicate parent study ids found in parent_to_child_studies_mapping for " - f"multiple dbGaP configurations." - ) - all_parent_studies.update(parent_studies) + def get_parent_studies_safely(dbgap_config): + study_parents = dbgap_config.get('parent_to_child_studies_mapping', {}) + # study_parents could be 'None' + return list(study_parents.keys()) if isinstance(study_parents, dict) else [] + + safe_list_of_parent_studies = [safe_parent_studies + for dbgap_config in dbgap_configs + for safe_parent_studies in get_parent_studies_safely(dbgap_config)] + + duplicates = FenceConfig.find_duplicates(safe_list_of_parent_studies) + if len(duplicates) > 0: + raise Exception( + f"{duplicates} are duplicate parent study ids found in parent_to_child_studies_mapping for " + f"multiple dbGaP configurations.") config = FenceConfig(DEFAULT_CFG_PATH) diff --git a/tests/test_app_config.py b/tests/test_app_config.py index d8e65357b..488b65888 100755 --- a/tests/test_app_config.py +++ b/tests/test_app_config.py @@ -122,6 +122,14 @@ def test_app_config(): def test_app_config_parent_child_study_mapping(monkeypatch): + + none_string_config = [{"parent_to_child_studies_mapping": 'None'}, + {"parent_to_child_studies_mapping": 'None'},] + try: + FenceConfig._validate_parent_child_studies(none_string_config) + except Exception: + pytest.fail("Study validation failed when given 'None' mapping!") + invalid_dbgap_configs = [ { "parent_to_child_studies_mapping": { From 90323446129c8b3507d9795cd671d8ed04200c0e Mon Sep 17 00:00:00 2001 From: Albert Snow Date: Thu, 18 Jul 2024 14:06:06 -0500 Subject: [PATCH 04/16] gave a better name --- fence/config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fence/config.py b/fence/config.py index 252b72576..a3db435a9 100644 --- a/fence/config.py +++ b/fence/config.py @@ -169,9 +169,9 @@ def find_duplicates(collection): @staticmethod def _validate_parent_child_studies(dbgap_configs): def get_parent_studies_safely(dbgap_config): - study_parents = dbgap_config.get('parent_to_child_studies_mapping', {}) - # study_parents could be 'None' - return list(study_parents.keys()) if isinstance(study_parents, dict) else [] + study_mapping = dbgap_config.get('parent_to_child_studies_mapping', {}) + # study mapping could be 'None' + return list(study_mapping.keys()) if isinstance(study_mapping, dict) else [] safe_list_of_parent_studies = [safe_parent_studies for dbgap_config in dbgap_configs From fdc937cd441e505529d612676044b93c760fa267 Mon Sep 17 00:00:00 2001 From: Albert Snow Date: Thu, 18 Jul 2024 14:26:38 -0500 Subject: [PATCH 05/16] adding typing --- fence/config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fence/config.py b/fence/config.py index a3db435a9..80d83a77f 100644 --- a/fence/config.py +++ b/fence/config.py @@ -7,6 +7,7 @@ from cdislogging import get_logger from collections import Counter +from typing import List, Dict, Any logger = get_logger(__name__) @@ -167,7 +168,7 @@ def find_duplicates(collection): return duplicates @staticmethod - def _validate_parent_child_studies(dbgap_configs): + def _validate_parent_child_studies(dbgap_configs: List[Dict[str, Any]]) -> None: def get_parent_studies_safely(dbgap_config): study_mapping = dbgap_config.get('parent_to_child_studies_mapping', {}) # study mapping could be 'None' From ed05a1370edcaf0254976ce2cf34c5e57d1fedc8 Mon Sep 17 00:00:00 2001 From: Albert Snow Date: Thu, 18 Jul 2024 15:06:14 -0500 Subject: [PATCH 06/16] adding more state handling --- fence/config.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fence/config.py b/fence/config.py index 80d83a77f..fa3119a34 100644 --- a/fence/config.py +++ b/fence/config.py @@ -155,8 +155,10 @@ def post_process(self): dbgap_configs = self._configs["dbGaP"] if isinstance(dbgap_configs, list): corrected_dbgap_configs = dbgap_configs - else: # assume is a dict + elif isinstance(dbgap_configs, dict): corrected_dbgap_configs = [dbgap_configs] + else: + raise Exception("The dbgap configuration is not a recognized data structure, aborting!") self._validate_parent_child_studies(corrected_dbgap_configs) @staticmethod From 7c1ffe46e42bb2a6e8af3033f6e0947d30c1c721 Mon Sep 17 00:00:00 2001 From: Albert Snow Date: Mon, 22 Jul 2024 13:25:02 -0500 Subject: [PATCH 07/16] Trigger GitHub Actions workflow From 9ee8a785ca87e18eadcf359e9311133bfcc4b719 Mon Sep 17 00:00:00 2001 From: Kyle Burton Date: Thu, 18 Jul 2024 17:46:26 -0500 Subject: [PATCH 08/16] Testing --- fence/sync/sync_users.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fence/sync/sync_users.py b/fence/sync/sync_users.py index 4320e58ab..59fccc2e5 100644 --- a/fence/sync/sync_users.py +++ b/fence/sync/sync_users.py @@ -542,6 +542,7 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True): ] # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match project_id_patterns += patterns + self.logger.info(f"Using these file paths: {file_dict.items()}") for filepath, privileges in file_dict.items(): self.logger.info("Reading file {}".format(filepath)) if os.stat(filepath).st_size == 0: From 174424ff013a4a38fb52034327007d5def1f9cfb Mon Sep 17 00:00:00 2001 From: Kyle Burton Date: Mon, 22 Jul 2024 16:58:32 -0500 Subject: [PATCH 09/16] fix(PXP-11364): Fix failing dbgap sync unit test Fix so local csvs file are processed in consistent order, regardless of os Updates user_info overwrite logic to preserve data when processing CSVs without some attributes --- fence/sync/sync_users.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fence/sync/sync_users.py b/fence/sync/sync_users.py index 59fccc2e5..7b1b164ea 100644 --- a/fence/sync/sync_users.py +++ b/fence/sync/sync_users.py @@ -507,7 +507,7 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True): """ user_projects = dict() - user_info = dict() + user_info = defaultdict(dict) # parse dbGaP sftp server information dbgap_key = dbgap_config.get("decrypt_key", None) @@ -658,9 +658,9 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True): tags["pi"] = row["downloader for names"] user_info[username] = { - "email": row.get("email") or "", + "email": row.get("email") or user_info[username].get('email') or "", "display_name": display_name, - "phone_number": row.get("phone") or "", + "phone_number": row.get("phone") or user_info[username].get('phone_number') or "", "tags": tags, } @@ -1571,6 +1571,8 @@ def _sync(self, sess): local_csv_file_list = glob.glob( os.path.join(self.sync_from_local_csv_dir, "*") ) + # Sort the list so the order of of files is consistent across platforms + local_csv_file_list.sort() user_projects_csv, user_info_csv = self._merge_multiple_local_csv_files( local_csv_file_list, From 532dcc5687f4d646fcde98a81646ca28523e0239 Mon Sep 17 00:00:00 2001 From: Albert Snow Date: Mon, 22 Jul 2024 13:25:02 -0500 Subject: [PATCH 10/16] Trigger GitHub Actions workflow From 8c4a9b72caaf4fe851324c663ca8ceed29a2fa34 Mon Sep 17 00:00:00 2001 From: Albert Snow Date: Tue, 23 Jul 2024 11:31:14 -0500 Subject: [PATCH 11/16] moving to separate unit test --- tests/test_app_config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_app_config.py b/tests/test_app_config.py index 488b65888..84e7baed9 100755 --- a/tests/test_app_config.py +++ b/tests/test_app_config.py @@ -121,8 +121,7 @@ def test_app_config(): patcher.stop() -def test_app_config_parent_child_study_mapping(monkeypatch): - +def test_validate_parent_child_studies_against_none_config(): none_string_config = [{"parent_to_child_studies_mapping": 'None'}, {"parent_to_child_studies_mapping": 'None'},] try: @@ -130,6 +129,8 @@ def test_app_config_parent_child_study_mapping(monkeypatch): except Exception: pytest.fail("Study validation failed when given 'None' mapping!") + +def test_app_config_parent_child_study_mapping(monkeypatch): invalid_dbgap_configs = [ { "parent_to_child_studies_mapping": { From a06ecaf11810c29d83c32fc1a23ae0cbd856e838 Mon Sep 17 00:00:00 2001 From: Albert Snow Date: Fri, 9 Aug 2024 13:13:12 -0500 Subject: [PATCH 12/16] more documentation minor refactors implementing alex's suggestions --- fence/config.py | 69 ++++++++++++++++++++++++++++++---------- tests/test_app_config.py | 11 +++++-- 2 files changed, 61 insertions(+), 19 deletions(-) diff --git a/fence/config.py b/fence/config.py index fa3119a34..8d94f1892 100644 --- a/fence/config.py +++ b/fence/config.py @@ -152,17 +152,21 @@ def post_process(self): logger.warning( f"IdP '{idp_id}' is using multifactor_auth_claim_info '{mfa_info['claim']}', which is neither AMR or ACR. Unable to determine if a user used MFA. Fence will continue and assume they have not used MFA." ) - dbgap_configs = self._configs["dbGaP"] - if isinstance(dbgap_configs, list): - corrected_dbgap_configs = dbgap_configs - elif isinstance(dbgap_configs, dict): - corrected_dbgap_configs = [dbgap_configs] - else: - raise Exception("The dbgap configuration is not a recognized data structure, aborting!") - self._validate_parent_child_studies(corrected_dbgap_configs) + self._validate_dbgap_config(self._configs["dbGaP"]) @staticmethod def find_duplicates(collection): + """ + + Args: + collection: any data type accepted by the Counter object + + Returns: + a set of items that were found more than once in the collection + + note: if collection is a set, this function will never find duplicates + (per the definition of a set) + """ item_with_occurrences = Counter(collection) duplicates = {item for item, occurrences in item_with_occurrences.items() @@ -170,15 +174,33 @@ def find_duplicates(collection): return duplicates @staticmethod - def _validate_parent_child_studies(dbgap_configs: List[Dict[str, Any]]) -> None: - def get_parent_studies_safely(dbgap_config): - study_mapping = dbgap_config.get('parent_to_child_studies_mapping', {}) - # study mapping could be 'None' - return list(study_mapping.keys()) if isinstance(study_mapping, dict) else [] + def get_parent_studies_safely(dbgap_config): + """ + Args: + dbgap_config: { parent_to_child_studies_mapping: { k1: v1, ... } } + Returns: + [k1, k2, ...] + """ + study_mapping = dbgap_config.get('parent_to_child_studies_mapping', {}) + # study mapping could be 'None' + safe_studies = list(study_mapping.keys()) if isinstance(study_mapping, dict) else [] + return safe_studies - safe_list_of_parent_studies = [safe_parent_studies - for dbgap_config in dbgap_configs - for safe_parent_studies in get_parent_studies_safely(dbgap_config)] + @staticmethod + def validate_parent_child_studies(dbgap_configs: List[Dict[str, Any]]) -> None: + """ + Given a list of dictionaries that contain a parent -> child study mapping, + this function will check that all parents are unique and not duplicated + Args: + dbgap_configs: [ { parent_to_child_studies_mapping: { k1: v1, ... } }, ... ] + + Returns: + Nothing. This function will throw if duplicates are found. + """ + safe_list_of_parent_studies = [] + for dbgap_config in dbgap_configs: + safe_parent_studies = FenceConfig.get_parent_studies_safely(dbgap_config) + safe_list_of_parent_studies.extend(safe_parent_studies) duplicates = FenceConfig.find_duplicates(safe_list_of_parent_studies) if len(duplicates) > 0: @@ -186,5 +208,20 @@ def get_parent_studies_safely(dbgap_config): f"{duplicates} are duplicate parent study ids found in parent_to_child_studies_mapping for " f"multiple dbGaP configurations.") + @staticmethod + def enforce_is_array(dbgap_configs): + if isinstance(dbgap_configs, list): + corrected_dbgap_configs = dbgap_configs + elif isinstance(dbgap_configs, dict): + corrected_dbgap_configs = [dbgap_configs] + else: + raise Exception("The dbgap configuration is not a recognized data structure, aborting!") + return corrected_dbgap_configs + + @staticmethod + def _validate_dbgap_config(dbgap_configs): + corrected_dbgap_configs = FenceConfig.enforce_is_array(dbgap_configs) + FenceConfig.validate_parent_child_studies(corrected_dbgap_configs) + config = FenceConfig(DEFAULT_CFG_PATH) diff --git a/tests/test_app_config.py b/tests/test_app_config.py index 2fe2c8dbe..5fc0a7d93 100755 --- a/tests/test_app_config.py +++ b/tests/test_app_config.py @@ -121,10 +121,15 @@ def test_app_config(): def test_validate_parent_child_studies_against_none_config(): + """ + our dbgap config can have nothing for the parent_to_child_studies_mapping property + this test ensures the validator handles that case + """ none_string_config = [{"parent_to_child_studies_mapping": 'None'}, {"parent_to_child_studies_mapping": 'None'},] + try: - FenceConfig._validate_parent_child_studies(none_string_config) + FenceConfig.validate_parent_child_studies(none_string_config) except Exception: pytest.fail("Study validation failed when given 'None' mapping!") @@ -145,7 +150,7 @@ def test_app_config_parent_child_study_mapping(monkeypatch): }, ] with pytest.raises(Exception): - FenceConfig._validate_parent_child_studies(invalid_dbgap_configs) + FenceConfig.validate_parent_child_studies(invalid_dbgap_configs) valid_dbgap_configs = [ { @@ -162,6 +167,6 @@ def test_app_config_parent_child_study_mapping(monkeypatch): }, ] try: - FenceConfig._validate_parent_child_studies(valid_dbgap_configs) + FenceConfig.validate_parent_child_studies(valid_dbgap_configs) except Exception: pytest.fail("Study validation failed when it should have passed!") From 2b63f57b235af2a2b1fae1ae89462980b9fd5fa9 Mon Sep 17 00:00:00 2001 From: Albert Snow Date: Fri, 9 Aug 2024 18:16:35 -0500 Subject: [PATCH 13/16] more documentation changes to visibility --- fence/config.py | 9 +++++---- tests/test_app_config.py | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/fence/config.py b/fence/config.py index 8d94f1892..ce6eebdae 100644 --- a/fence/config.py +++ b/fence/config.py @@ -176,6 +176,8 @@ def find_duplicates(collection): @staticmethod def get_parent_studies_safely(dbgap_config): """ + This will get a list of parent id's from the dbgap config's mapping property + or return and empty array if the entry doesn't exist Args: dbgap_config: { parent_to_child_studies_mapping: { k1: v1, ... } } Returns: @@ -187,15 +189,14 @@ def get_parent_studies_safely(dbgap_config): return safe_studies @staticmethod - def validate_parent_child_studies(dbgap_configs: List[Dict[str, Any]]) -> None: + def _validate_parent_child_studies(dbgap_configs: List[Dict[str, Any]]) -> None: """ Given a list of dictionaries that contain a parent -> child study mapping, this function will check that all parents are unique and not duplicated Args: dbgap_configs: [ { parent_to_child_studies_mapping: { k1: v1, ... } }, ... ] - Returns: - Nothing. This function will throw if duplicates are found. + Note: This function will throw if duplicates are found. """ safe_list_of_parent_studies = [] for dbgap_config in dbgap_configs: @@ -221,7 +222,7 @@ def enforce_is_array(dbgap_configs): @staticmethod def _validate_dbgap_config(dbgap_configs): corrected_dbgap_configs = FenceConfig.enforce_is_array(dbgap_configs) - FenceConfig.validate_parent_child_studies(corrected_dbgap_configs) + FenceConfig._validate_parent_child_studies(corrected_dbgap_configs) config = FenceConfig(DEFAULT_CFG_PATH) diff --git a/tests/test_app_config.py b/tests/test_app_config.py index 5fc0a7d93..8ee46ec84 100755 --- a/tests/test_app_config.py +++ b/tests/test_app_config.py @@ -129,7 +129,7 @@ def test_validate_parent_child_studies_against_none_config(): {"parent_to_child_studies_mapping": 'None'},] try: - FenceConfig.validate_parent_child_studies(none_string_config) + FenceConfig._validate_parent_child_studies(none_string_config) except Exception: pytest.fail("Study validation failed when given 'None' mapping!") @@ -150,7 +150,7 @@ def test_app_config_parent_child_study_mapping(monkeypatch): }, ] with pytest.raises(Exception): - FenceConfig.validate_parent_child_studies(invalid_dbgap_configs) + FenceConfig._validate_parent_child_studies(invalid_dbgap_configs) valid_dbgap_configs = [ { @@ -167,6 +167,6 @@ def test_app_config_parent_child_study_mapping(monkeypatch): }, ] try: - FenceConfig.validate_parent_child_studies(valid_dbgap_configs) + FenceConfig._validate_parent_child_studies(valid_dbgap_configs) except Exception: pytest.fail("Study validation failed when it should have passed!") From 8e60f67ae0cbb270c23f01b5015b1604ee32d516 Mon Sep 17 00:00:00 2001 From: Albert Snow Date: Mon, 12 Aug 2024 13:17:18 -0500 Subject: [PATCH 14/16] cleaning up references in test fence create moving create_client to fence create to help resolve circular dependencies defining generic functions in config moving send email to google monitor to help with circular dependencies adding utils test --- fence/config.py | 102 ++++++++++++------ fence/scripting/fence_create.py | 69 +++++++++++- fence/scripting/google_monitor.py | 49 ++++++++- fence/utils.py | 150 ++------------------------- tests/scripting/test_fence-create.py | 3 +- tests/test_utils.py | 36 +++++++ 6 files changed, 231 insertions(+), 178 deletions(-) create mode 100644 tests/test_utils.py diff --git a/fence/config.py b/fence/config.py index ce6eebdae..c2bf4a565 100644 --- a/fence/config.py +++ b/fence/config.py @@ -1,3 +1,4 @@ +import collections import os from yaml import safe_load as yaml_load import urllib.parse @@ -6,7 +7,6 @@ from gen3config import Config from cdislogging import get_logger -from collections import Counter from typing import List, Dict, Any logger = get_logger(__name__) @@ -155,7 +155,66 @@ def post_process(self): self._validate_dbgap_config(self._configs["dbGaP"]) @staticmethod - def find_duplicates(collection): + def _get_parent_studies_safely(dbgap_config): + """ + This will get a list of parent id's from the dbgap config's mapping property + or return and empty array if the entry doesn't exist + Args: + dbgap_config: { parent_to_child_studies_mapping: { k1: v1, ... } } + Returns: + [k1, k2, ...] + """ + study_mapping = dbgap_config.get('parent_to_child_studies_mapping', {}) + # study mapping could be 'None' + safe_studies = list(study_mapping.keys()) if isinstance(study_mapping, dict) else [] + return safe_studies + + # TODO(fix-it): These functions should go in utils.py, but cannot be migrated until + # the variable DEFAULT_BACKOFF_SETTINGS is migrated here + @staticmethod + def _some(predicate, iterable, found_nothing_value=None): + """ Returns the first element that satisfies the predicate, else fail value + Expects predicate to be a boolean lambda and iterable to be a collection + """ + for item in iterable: + result = predicate(item) + if result: + return item + return found_nothing_value + + @staticmethod + def _coerce_to_array(unknown_data_structure, exception_message="Unrecognized data structure, aborting!"): + """ + Given a dubious data structure, coerces it into a list + depending on the data type. + Currently, handles list, dict, and None + Args: + exception_message: any string for more context + unknown_data_structure: either a list or dict, fails otherwise + + Returns: the data structure if it's a list, + or a list with the data structure in it if it's a dictionary + + note: fails if the unknown data structure is any other type + """ + # TODO: these three functions can be move out on their own when migrated to utils.py + identity = lambda v: v + wrap_in_array = lambda v: [v] + empty_array = lambda _: [] + # END TODO + data_is = lambda data_type: isinstance(unknown_data_structure, data_type) + type_to_coercion = { + list: identity, + dict: wrap_in_array, + type(None): empty_array, + } + data_type_of_parameter = FenceConfig._some(data_is, list(type_to_coercion.keys()), TypeError) + if data_type_of_parameter is TypeError: + raise ValueError(exception_message + f" | Type: {type(unknown_data_structure)}") + return type_to_coercion[data_type_of_parameter](unknown_data_structure) + + @staticmethod + def _find_duplicates(collection): """ Args: @@ -164,29 +223,17 @@ def find_duplicates(collection): Returns: a set of items that were found more than once in the collection - note: if collection is a set, this function will never find duplicates - (per the definition of a set) + note: if collection is a dictionary, counter will treat it as a + duplicates mapping! """ - item_with_occurrences = Counter(collection) + item_with_occurrences = collections.Counter(collection) duplicates = {item for item, occurrences in item_with_occurrences.items() if occurrences > 1} return duplicates - @staticmethod - def get_parent_studies_safely(dbgap_config): - """ - This will get a list of parent id's from the dbgap config's mapping property - or return and empty array if the entry doesn't exist - Args: - dbgap_config: { parent_to_child_studies_mapping: { k1: v1, ... } } - Returns: - [k1, k2, ...] - """ - study_mapping = dbgap_config.get('parent_to_child_studies_mapping', {}) - # study mapping could be 'None' - safe_studies = list(study_mapping.keys()) if isinstance(study_mapping, dict) else [] - return safe_studies + # END TODO + @staticmethod def _validate_parent_child_studies(dbgap_configs: List[Dict[str, Any]]) -> None: @@ -200,28 +247,19 @@ def _validate_parent_child_studies(dbgap_configs: List[Dict[str, Any]]) -> None: """ safe_list_of_parent_studies = [] for dbgap_config in dbgap_configs: - safe_parent_studies = FenceConfig.get_parent_studies_safely(dbgap_config) + safe_parent_studies = FenceConfig._get_parent_studies_safely(dbgap_config) safe_list_of_parent_studies.extend(safe_parent_studies) - duplicates = FenceConfig.find_duplicates(safe_list_of_parent_studies) + duplicates = FenceConfig._find_duplicates(safe_list_of_parent_studies) if len(duplicates) > 0: raise Exception( f"{duplicates} are duplicate parent study ids found in parent_to_child_studies_mapping for " f"multiple dbGaP configurations.") - @staticmethod - def enforce_is_array(dbgap_configs): - if isinstance(dbgap_configs, list): - corrected_dbgap_configs = dbgap_configs - elif isinstance(dbgap_configs, dict): - corrected_dbgap_configs = [dbgap_configs] - else: - raise Exception("The dbgap configuration is not a recognized data structure, aborting!") - return corrected_dbgap_configs - @staticmethod def _validate_dbgap_config(dbgap_configs): - corrected_dbgap_configs = FenceConfig.enforce_is_array(dbgap_configs) + error_message = "The dbgap configuration is not a recognized data structure, aborting!" + corrected_dbgap_configs = FenceConfig._coerce_to_array(dbgap_configs, error_message) FenceConfig._validate_parent_child_studies(corrected_dbgap_configs) diff --git a/fence/scripting/fence_create.py b/fence/scripting/fence_create.py index a4b15aff8..41271d411 100644 --- a/fence/scripting/fence_create.py +++ b/fence/scripting/fence_create.py @@ -57,10 +57,9 @@ from fence.config import config from fence.sync.sync_users import UserSyncer from fence.utils import ( - create_client, get_valid_expiration, generate_client_credentials, - get_SQLAlchemyDriver, + get_SQLAlchemyDriver, logger, ) from sqlalchemy.orm.attributes import flag_modified from gen3authz.client.arborist.client import ArboristClient @@ -1824,3 +1823,69 @@ def access_token_polling_job( with driver.session as db_session: loop = asyncio.get_event_loop() loop.run_until_complete(job.update_tokens(db_session)) + + +def create_client( + DB, + username=None, + urls=[], + name="", + description="", + auto_approve=False, + is_admin=False, + grant_types=None, + confidential=True, + arborist=None, + policies=None, + allowed_scopes=None, + expires_in=None, +): + client_id, client_secret, hashed_secret = generate_client_credentials(confidential) + if arborist is not None: + arborist.create_client(client_id, policies) + driver = get_SQLAlchemyDriver(DB) + auth_method = "client_secret_basic" if confidential else "none" + + allowed_scopes = allowed_scopes or config["CLIENT_ALLOWED_SCOPES"] + if not set(allowed_scopes).issubset(set(config["CLIENT_ALLOWED_SCOPES"])): + raise ValueError( + "Each allowed scope must be one of: {}".format( + config["CLIENT_ALLOWED_SCOPES"] + ) + ) + + if "openid" not in allowed_scopes: + allowed_scopes.append("openid") + logger.warning('Adding required "openid" scope to list of allowed scopes.') + + with driver.session as s: + user = None + if username: + user = query_for_user(session=s, username=username) + if not user: + user = User(username=username, is_admin=is_admin) + s.add(user) + + if s.query(Client).filter(Client.name == name).first(): + if arborist is not None: + arborist.delete_client(client_id) + raise Exception("client {} already exists".format(name)) + + client = Client( + client_id=client_id, + client_secret=hashed_secret, + user=user, + redirect_uris=urls, + allowed_scopes=" ".join(allowed_scopes), + description=description, + name=name, + auto_approve=auto_approve, + grant_types=grant_types, + is_confidential=confidential, + token_endpoint_auth_method=auth_method, + expires_in=expires_in, + ) + s.add(client) + s.commit() + + return client_id, client_secret diff --git a/fence/scripting/google_monitor.py b/fence/scripting/google_monitor.py index ad232519d..ae5be6b9e 100644 --- a/fence/scripting/google_monitor.py +++ b/fence/scripting/google_monitor.py @@ -7,6 +7,7 @@ """ import traceback +import requests from gen3cirrus.google_cloud.iam import GooglePolicyMember from gen3cirrus import GoogleCloudManager from gen3cirrus.google_cloud.errors import GoogleAPIError @@ -37,7 +38,7 @@ from fence import utils from fence.config import config from fence.models import User -from fence.errors import Unauthorized +from fence.errors import Unauthorized, NotFound logger = get_logger(__name__) @@ -445,6 +446,52 @@ def _get_user_email_list_from_google_project_with_owner_role(project_id): ) +def send_email(from_email, to_emails, subject, text, smtp_domain): + """ + Send email to group of emails using mail gun api. + + https://app.mailgun.com/ + + Args: + from_email(str): from email + to_emails(list): list of emails to receive the messages + text(str): the text message + smtp_domain(dict): smtp domain server + + { + "smtp_hostname": "smtp.mailgun.org", + "default_login": "postmaster@mailgun.planx-pla.net", + "api_url": "https://api.mailgun.net/v3/mailgun.planx-pla.net", + "smtp_password": "password", # pragma: allowlist secret + "api_key": "api key" # pragma: allowlist secret + } + + Returns: + Http response + + Exceptions: + KeyError + + """ + if smtp_domain not in config["GUN_MAIL"] or not config["GUN_MAIL"].get( + smtp_domain + ).get("smtp_password"): + raise NotFound( + "SMTP Domain '{}' does not exist in configuration for GUN_MAIL or " + "smtp_password was not provided. " + "Cannot send email.".format(smtp_domain) + ) + + api_key = config["GUN_MAIL"][smtp_domain].get("api_key", "") + email_url = config["GUN_MAIL"][smtp_domain].get("api_url", "") + "/messages" + + return requests.post( + email_url, + auth=("api", api_key), + data={"from": from_email, "to": to_emails, "subject": subject, "text": text}, + ) + + def _send_emails_informing_service_account_removal( to_emails, invalid_service_account_reasons, invalid_project_reasons, project_id ): diff --git a/fence/utils.py b/fence/utils.py index 463fb6f75..345ac1d35 100644 --- a/fence/utils.py +++ b/fence/utils.py @@ -1,3 +1,9 @@ +""" +This file is for functions that are generalized enough to be used in varied places that need not be related. +Functions placed here are/should be low level in terms of composition, and thus references to other modules +in this project should be used sparingly. +""" + import bcrypt import collections from functools import wraps @@ -6,17 +12,14 @@ from random import SystemRandom import re import string -import requests from urllib.parse import urlencode from urllib.parse import parse_qs, urlsplit, urlunsplit import sys from cdislogging import get_logger import flask -from werkzeug.datastructures import ImmutableMultiDict -from fence.models import Client, User, query_for_user -from fence.errors import NotFound, UserError +from fence.errors import UserError from fence.config import config from authlib.oauth2.rfc6749.util import scope_to_list from authlib.oauth2.rfc6749.errors import InvalidScopeError @@ -57,97 +60,6 @@ def generate_client_credentials(confidential): return client_id, client_secret, hashed_secret -def create_client( - DB, - username=None, - urls=[], - name="", - description="", - auto_approve=False, - is_admin=False, - grant_types=None, - confidential=True, - arborist=None, - policies=None, - allowed_scopes=None, - expires_in=None, -): - client_id, client_secret, hashed_secret = generate_client_credentials(confidential) - if arborist is not None: - arborist.create_client(client_id, policies) - driver = get_SQLAlchemyDriver(DB) - auth_method = "client_secret_basic" if confidential else "none" - - allowed_scopes = allowed_scopes or config["CLIENT_ALLOWED_SCOPES"] - if not set(allowed_scopes).issubset(set(config["CLIENT_ALLOWED_SCOPES"])): - raise ValueError( - "Each allowed scope must be one of: {}".format( - config["CLIENT_ALLOWED_SCOPES"] - ) - ) - - if "openid" not in allowed_scopes: - allowed_scopes.append("openid") - logger.warning('Adding required "openid" scope to list of allowed scopes.') - - with driver.session as s: - user = None - if username: - user = query_for_user(session=s, username=username) - if not user: - user = User(username=username, is_admin=is_admin) - s.add(user) - - if s.query(Client).filter(Client.name == name).first(): - if arborist is not None: - arborist.delete_client(client_id) - raise Exception("client {} already exists".format(name)) - - client = Client( - client_id=client_id, - client_secret=hashed_secret, - user=user, - redirect_uris=urls, - allowed_scopes=" ".join(allowed_scopes), - description=description, - name=name, - auto_approve=auto_approve, - grant_types=grant_types, - is_confidential=confidential, - token_endpoint_auth_method=auth_method, - expires_in=expires_in, - ) - s.add(client) - s.commit() - - return client_id, client_secret - - -def hash_secret(f): - @wraps(f) - def wrapper(*args, **kwargs): - has_secret = "client_secret" in flask.request.form - has_client_id = "client_id" in flask.request.form - if flask.request.form and has_secret and has_client_id: - form = flask.request.form.to_dict() - with flask.current_app.db.session as session: - client = ( - session.query(Client) - .filter(Client.client_id == form["client_id"]) - .first() - ) - if client: - form["client_secret"] = bcrypt.hashpw( - form["client_secret"].encode("utf-8"), - client.client_secret.encode("utf-8"), - ).decode("utf-8") - flask.request.form = ImmutableMultiDict(form) - - return f(*args, **kwargs) - - return wrapper - - def wrap_list_required(f): @wraps(f) def wrapper(d, *args, **kwargs): @@ -254,52 +166,6 @@ def split_url_and_query_params(url): return url, query_params -def send_email(from_email, to_emails, subject, text, smtp_domain): - """ - Send email to group of emails using mail gun api. - - https://app.mailgun.com/ - - Args: - from_email(str): from email - to_emails(list): list of emails to receive the messages - text(str): the text message - smtp_domain(dict): smtp domain server - - { - "smtp_hostname": "smtp.mailgun.org", - "default_login": "postmaster@mailgun.planx-pla.net", - "api_url": "https://api.mailgun.net/v3/mailgun.planx-pla.net", - "smtp_password": "password", # pragma: allowlist secret - "api_key": "api key" # pragma: allowlist secret - } - - Returns: - Http response - - Exceptions: - KeyError - - """ - if smtp_domain not in config["GUN_MAIL"] or not config["GUN_MAIL"].get( - smtp_domain - ).get("smtp_password"): - raise NotFound( - "SMTP Domain '{}' does not exist in configuration for GUN_MAIL or " - "smtp_password was not provided. " - "Cannot send email.".format(smtp_domain) - ) - - api_key = config["GUN_MAIL"][smtp_domain].get("api_key", "") - email_url = config["GUN_MAIL"][smtp_domain].get("api_url", "") + "/messages" - - return requests.post( - email_url, - auth=("api", api_key), - data={"from": from_email, "to": to_emails, "subject": subject, "text": text}, - ) - - def get_valid_expiration_from_request( expiry_param="expires_in", max_limit=None, default=None ): @@ -428,6 +294,8 @@ def get_SQLAlchemyDriver(db_conn_url): # Default settings to control usage of backoff library. +# TODO(fix-it): this variable should be moved to config.py to remove the reliance on the config +# module. Many files reference this property, so it should be handled in its own PR. DEFAULT_BACKOFF_SETTINGS = { "on_backoff": log_backoff_retry, "on_giveup": log_backoff_giveup, diff --git a/tests/scripting/test_fence-create.py b/tests/scripting/test_fence-create.py index deae90d34..4b1e439b6 100644 --- a/tests/scripting/test_fence-create.py +++ b/tests/scripting/test_fence-create.py @@ -6,13 +6,12 @@ import pytest import gen3cirrus -from gen3cirrus.google_cloud.errors import GoogleAuthError from userdatamodel.models import Group from fence.config import config from fence.errors import UserError from fence.jwt.validate import validate_jwt -from fence.utils import create_client, get_SQLAlchemyDriver +from fence.utils import get_SQLAlchemyDriver from fence.models import ( AccessPrivilege, Project, diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 000000000..9d35c668d --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,36 @@ +import pytest + +from fence.config import FenceConfig + + +def test_find_duplicates(): + dup_case = FenceConfig._find_duplicates([1, 1, 2]) + unique_case = FenceConfig._find_duplicates([1, 2, 3]) + tuple_dup_case = FenceConfig._find_duplicates((1, 1, 1)) + tuple_unique_case = FenceConfig._find_duplicates((1, 2, 3)) + assert (dup_case == {1} and unique_case == set() and + tuple_dup_case == {1} and tuple_unique_case == set()) + + +def test__coerce_to_array(): + identity_case = FenceConfig._coerce_to_array([1, 2, 3]) + wrap_case = FenceConfig._coerce_to_array({"foo": "bar"}) + none_case = FenceConfig._coerce_to_array(None) + assert (identity_case == [1, 2, 3] and wrap_case == [{"foo": "bar"}] and + none_case == []) + + test_message = "foo" + with pytest.raises(ValueError): + bad_case = FenceConfig._coerce_to_array("uh oh") + with pytest.raises(ValueError): + bad_case_custom = FenceConfig._coerce_to_array("uh oh", test_message) + + +def test__some(): + test_pred = lambda v: 2 > v > 0 + one_case = FenceConfig._some(test_pred, [3, 2, 1]) + two_case = FenceConfig._some(test_pred, [3, 2, 0.5, 2, 1]) + none_case = FenceConfig._some(test_pred, [0, 0, 0]) + custom_none_case = FenceConfig._some(test_pred, [0], "foo") + assert (one_case == 1 and two_case == 0.5 and + none_case is None and custom_none_case == "foo") From 06cebff618ba84055371ece194d5b7e25be876f0 Mon Sep 17 00:00:00 2001 From: Albert Snow Date: Mon, 12 Aug 2024 13:37:27 -0500 Subject: [PATCH 15/16] documentation + minor rename --- fence/config.py | 5 ++--- tests/test_utils.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fence/config.py b/fence/config.py index c2bf4a565..ea2be1443 100644 --- a/fence/config.py +++ b/fence/config.py @@ -190,10 +190,9 @@ def _coerce_to_array(unknown_data_structure, exception_message="Unrecognized dat Currently, handles list, dict, and None Args: exception_message: any string for more context - unknown_data_structure: either a list or dict, fails otherwise + unknown_data_structure: either a list, dict, or None; fails otherwise - Returns: the data structure if it's a list, - or a list with the data structure in it if it's a dictionary + Returns: a list of some kind depending on the provided data structure note: fails if the unknown data structure is any other type """ diff --git a/tests/test_utils.py b/tests/test_utils.py index 9d35c668d..590133d68 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,7 +3,7 @@ from fence.config import FenceConfig -def test_find_duplicates(): +def test__find_duplicates(): dup_case = FenceConfig._find_duplicates([1, 1, 2]) unique_case = FenceConfig._find_duplicates([1, 2, 3]) tuple_dup_case = FenceConfig._find_duplicates((1, 1, 1)) From af9c5b16fc9f6504a866a10db3d015b762d5fe14 Mon Sep 17 00:00:00 2001 From: Albert Snow Date: Tue, 3 Sep 2024 13:57:16 -0500 Subject: [PATCH 16/16] fix name, add description --- tests/test_utils.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 590133d68..93cb95972 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,16 +3,22 @@ from fence.config import FenceConfig -def test__find_duplicates(): +def test_find_duplicates(): + """ + Tests that the find duplicates function handles different kinds of duplicates cases + """ dup_case = FenceConfig._find_duplicates([1, 1, 2]) unique_case = FenceConfig._find_duplicates([1, 2, 3]) - tuple_dup_case = FenceConfig._find_duplicates((1, 1, 1)) + tuple_dup_case = FenceConfig._find_duplicates((1, 1, 1, 2, 2)) tuple_unique_case = FenceConfig._find_duplicates((1, 2, 3)) assert (dup_case == {1} and unique_case == set() and - tuple_dup_case == {1} and tuple_unique_case == set()) + tuple_dup_case == {1, 2} and tuple_unique_case == set()) -def test__coerce_to_array(): +def test_coerce_to_array(): + """ + Tests that we get arrays back in expected cases + """ identity_case = FenceConfig._coerce_to_array([1, 2, 3]) wrap_case = FenceConfig._coerce_to_array({"foo": "bar"}) none_case = FenceConfig._coerce_to_array(None) @@ -26,8 +32,12 @@ def test__coerce_to_array(): bad_case_custom = FenceConfig._coerce_to_array("uh oh", test_message) -def test__some(): - test_pred = lambda v: 2 > v > 0 +def test_some(): + """ + Tests that we get the first passing value out of a list in expected + use cases + """ + test_pred = lambda v: 0 < v < 2 one_case = FenceConfig._some(test_pred, [3, 2, 1]) two_case = FenceConfig._some(test_pred, [3, 2, 0.5, 2, 1]) none_case = FenceConfig._some(test_pred, [0, 0, 0])