From 5dc1da954e3f1035d315aee9c0b3a650074bcc52 Mon Sep 17 00:00:00 2001
From: Albert Snow <ajsnow2012@gmail.com>
Date: Thu, 18 Jul 2024 13:42:44 -0500
Subject: [PATCH 01/16] adding default empty dict

---
 fence/config-default.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fence/config-default.yaml b/fence/config-default.yaml
index a73ebb976..ab81c1548 100755
--- a/fence/config-default.yaml
+++ b/fence/config-default.yaml
@@ -557,7 +557,7 @@ dbGaP:
     #
     # If `parse_consent_code` is true, then a user will be given access to the exact
     # same consent codes in the child studies
-    parent_to_child_studies_mapping:
+    parent_to_child_studies_mapping: {}
       # 'phs001194': ['phs000571', 'phs001843']
     # A consent of "c999" can indicate access to that study's "exchange area data"
     # and when a user has access to one study's exchange area data, they

From 67dc533fa0f5a88aa72d99e8372edc2a545b9a89 Mon Sep 17 00:00:00 2001
From: Albert Snow <ajsnow2012@gmail.com>
Date: Thu, 18 Jul 2024 13:47:59 -0500
Subject: [PATCH 02/16] adding sanity happy path test

---
 tests/test_app_config.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tests/test_app_config.py b/tests/test_app_config.py
index 3c3af9fd0..d8e65357b 100755
--- a/tests/test_app_config.py
+++ b/tests/test_app_config.py
@@ -138,3 +138,22 @@ def test_app_config_parent_child_study_mapping(monkeypatch):
     ]
     with pytest.raises(Exception):
         FenceConfig._validate_parent_child_studies(invalid_dbgap_configs)
+
+    valid_dbgap_configs = [
+        {
+            "parent_to_child_studies_mapping": {
+                "phs001194": ["phs000571", "phs001843"],
+                "phs001193": ["phs000572", "phs001844"],
+            }
+        },
+        {
+            "parent_to_child_studies_mapping": {
+                "phs001195": ["phs0015623"],
+                "phs001192": ["phs0001", "phs002"],
+            }
+        },
+    ]
+    try:
+        FenceConfig._validate_parent_child_studies(valid_dbgap_configs)
+    except Exception:
+        pytest.fail("Study validation failed when it should have passed!")

From 19384adb9db4002e955a5f6b3bc2b4469e369427 Mon Sep 17 00:00:00 2001
From: Albert Snow <ajsnow2012@gmail.com>
Date: Thu, 18 Jul 2024 13:58:57 -0500
Subject: [PATCH 03/16] redesigning code to handle none case without getting
 too much more complicated

---
 fence/config.py          | 46 ++++++++++++++++++++++++----------------
 tests/test_app_config.py |  8 +++++++
 2 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/fence/config.py b/fence/config.py
index d981bfd38..252b72576 100644
--- a/fence/config.py
+++ b/fence/config.py
@@ -6,6 +6,7 @@
 from gen3config import Config
 
 from cdislogging import get_logger
+from collections import Counter
 
 logger = get_logger(__name__)
 
@@ -150,28 +151,37 @@ def post_process(self):
                 logger.warning(
                     f"IdP '{idp_id}' is using multifactor_auth_claim_info '{mfa_info['claim']}', which is neither AMR or ACR. Unable to determine if a user used MFA. Fence will continue and assume they have not used MFA."
                 )
+        dbgap_configs = self._configs["dbGaP"]
+        if isinstance(dbgap_configs, list):
+            corrected_dbgap_configs = dbgap_configs
+        else:  # assume is a dict
+            corrected_dbgap_configs = [dbgap_configs]
+        self._validate_parent_child_studies(corrected_dbgap_configs)
 
-        self._validate_parent_child_studies(self._configs["dbGaP"])
+    @staticmethod
+    def find_duplicates(collection):
+        item_with_occurrences = Counter(collection)
+        duplicates = {item
+                      for item, occurrences in item_with_occurrences.items()
+                      if occurrences > 1}
+        return duplicates
 
     @staticmethod
     def _validate_parent_child_studies(dbgap_configs):
-        if isinstance(dbgap_configs, list):
-            configs = dbgap_configs
-        else:
-            configs = [dbgap_configs]
-
-        all_parent_studies = set()
-        for dbgap_config in configs:
-            parent_studies = dbgap_config.get(
-                "parent_to_child_studies_mapping", {}
-            ).keys()
-            conflicts = parent_studies & all_parent_studies
-            if len(conflicts) > 0:
-                raise Exception(
-                    f"{conflicts} are duplicate parent study ids found in parent_to_child_studies_mapping for "
-                    f"multiple dbGaP configurations."
-                )
-            all_parent_studies.update(parent_studies)
+        def get_parent_studies_safely(dbgap_config):
+            study_parents = dbgap_config.get('parent_to_child_studies_mapping', {})
+            # study_parents could be 'None'
+            return list(study_parents.keys()) if isinstance(study_parents, dict) else []
+
+        safe_list_of_parent_studies = [safe_parent_studies
+                                       for dbgap_config in dbgap_configs
+                                       for safe_parent_studies in get_parent_studies_safely(dbgap_config)]
+
+        duplicates = FenceConfig.find_duplicates(safe_list_of_parent_studies)
+        if len(duplicates) > 0:
+            raise Exception(
+                f"{duplicates} are duplicate parent study ids found in parent_to_child_studies_mapping for "
+                f"multiple dbGaP configurations.")
 
 
 config = FenceConfig(DEFAULT_CFG_PATH)
diff --git a/tests/test_app_config.py b/tests/test_app_config.py
index d8e65357b..488b65888 100755
--- a/tests/test_app_config.py
+++ b/tests/test_app_config.py
@@ -122,6 +122,14 @@ def test_app_config():
 
 
 def test_app_config_parent_child_study_mapping(monkeypatch):
+
+    none_string_config = [{"parent_to_child_studies_mapping": 'None'},
+                          {"parent_to_child_studies_mapping": 'None'},]
+    try:
+        FenceConfig._validate_parent_child_studies(none_string_config)
+    except Exception:
+        pytest.fail("Study validation failed when given 'None' mapping!")
+
     invalid_dbgap_configs = [
         {
             "parent_to_child_studies_mapping": {

From 90323446129c8b3507d9795cd671d8ed04200c0e Mon Sep 17 00:00:00 2001
From: Albert Snow <ajsnow2012@gmail.com>
Date: Thu, 18 Jul 2024 14:06:06 -0500
Subject: [PATCH 04/16] gave a better name

---
 fence/config.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fence/config.py b/fence/config.py
index 252b72576..a3db435a9 100644
--- a/fence/config.py
+++ b/fence/config.py
@@ -169,9 +169,9 @@ def find_duplicates(collection):
     @staticmethod
     def _validate_parent_child_studies(dbgap_configs):
         def get_parent_studies_safely(dbgap_config):
-            study_parents = dbgap_config.get('parent_to_child_studies_mapping', {})
-            # study_parents could be 'None'
-            return list(study_parents.keys()) if isinstance(study_parents, dict) else []
+            study_mapping = dbgap_config.get('parent_to_child_studies_mapping', {})
+            # study mapping could be 'None'
+            return list(study_mapping.keys()) if isinstance(study_mapping, dict) else []
 
         safe_list_of_parent_studies = [safe_parent_studies
                                        for dbgap_config in dbgap_configs

From fdc937cd441e505529d612676044b93c760fa267 Mon Sep 17 00:00:00 2001
From: Albert Snow <ajsnow2012@gmail.com>
Date: Thu, 18 Jul 2024 14:26:38 -0500
Subject: [PATCH 05/16] adding typing

---
 fence/config.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fence/config.py b/fence/config.py
index a3db435a9..80d83a77f 100644
--- a/fence/config.py
+++ b/fence/config.py
@@ -7,6 +7,7 @@
 
 from cdislogging import get_logger
 from collections import Counter
+from typing import List, Dict, Any
 
 logger = get_logger(__name__)
 
@@ -167,7 +168,7 @@ def find_duplicates(collection):
         return duplicates
 
     @staticmethod
-    def _validate_parent_child_studies(dbgap_configs):
+    def _validate_parent_child_studies(dbgap_configs: List[Dict[str, Any]]) -> None:
         def get_parent_studies_safely(dbgap_config):
             study_mapping = dbgap_config.get('parent_to_child_studies_mapping', {})
             # study mapping could be 'None'

From ed05a1370edcaf0254976ce2cf34c5e57d1fedc8 Mon Sep 17 00:00:00 2001
From: Albert Snow <ajsnow2012@gmail.com>
Date: Thu, 18 Jul 2024 15:06:14 -0500
Subject: [PATCH 06/16] adding more state handling

---
 fence/config.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fence/config.py b/fence/config.py
index 80d83a77f..fa3119a34 100644
--- a/fence/config.py
+++ b/fence/config.py
@@ -155,8 +155,10 @@ def post_process(self):
         dbgap_configs = self._configs["dbGaP"]
         if isinstance(dbgap_configs, list):
             corrected_dbgap_configs = dbgap_configs
-        else:  # assume is a dict
+        elif isinstance(dbgap_configs, dict):
             corrected_dbgap_configs = [dbgap_configs]
+        else:
+            raise Exception("The dbgap configuration is not a recognized data structure, aborting!")
         self._validate_parent_child_studies(corrected_dbgap_configs)
 
     @staticmethod

From 7c1ffe46e42bb2a6e8af3033f6e0947d30c1c721 Mon Sep 17 00:00:00 2001
From: Albert Snow <ajsnow2012@gmail.com>
Date: Mon, 22 Jul 2024 13:25:02 -0500
Subject: [PATCH 07/16] Trigger GitHub Actions workflow


From 9ee8a785ca87e18eadcf359e9311133bfcc4b719 Mon Sep 17 00:00:00 2001
From: Kyle Burton <burtonk@uchicago.edu>
Date: Thu, 18 Jul 2024 17:46:26 -0500
Subject: [PATCH 08/16] Testing

---
 fence/sync/sync_users.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fence/sync/sync_users.py b/fence/sync/sync_users.py
index 4320e58ab..59fccc2e5 100644
--- a/fence/sync/sync_users.py
+++ b/fence/sync/sync_users.py
@@ -542,6 +542,7 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
             ]  # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match
             project_id_patterns += patterns
 
+        self.logger.info(f"Using these file paths: {file_dict.items()}")
         for filepath, privileges in file_dict.items():
             self.logger.info("Reading file {}".format(filepath))
             if os.stat(filepath).st_size == 0:

From 174424ff013a4a38fb52034327007d5def1f9cfb Mon Sep 17 00:00:00 2001
From: Kyle Burton <burtonk@uchicago.edu>
Date: Mon, 22 Jul 2024 16:58:32 -0500
Subject: [PATCH 09/16] fix(PXP-11364): Fix failing dbgap sync unit test

Fix so local csvs file are processed in consistent order, regardless of os

Updates user_info overwrite logic to preserve data when processing CSVs without
some attributes
---
 fence/sync/sync_users.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fence/sync/sync_users.py b/fence/sync/sync_users.py
index 59fccc2e5..7b1b164ea 100644
--- a/fence/sync/sync_users.py
+++ b/fence/sync/sync_users.py
@@ -507,7 +507,7 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
 
         """
         user_projects = dict()
-        user_info = dict()
+        user_info = defaultdict(dict)
 
         # parse dbGaP sftp server information
         dbgap_key = dbgap_config.get("decrypt_key", None)
@@ -658,9 +658,9 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
                         tags["pi"] = row["downloader for names"]
 
                     user_info[username] = {
-                        "email": row.get("email") or "",
+                        "email": row.get("email") or user_info[username].get('email') or "",
                         "display_name": display_name,
-                        "phone_number": row.get("phone") or "",
+                        "phone_number": row.get("phone") or user_info[username].get('phone_number') or "",
                         "tags": tags,
                     }
 
@@ -1571,6 +1571,8 @@ def _sync(self, sess):
             local_csv_file_list = glob.glob(
                 os.path.join(self.sync_from_local_csv_dir, "*")
             )
+            # Sort the list so the order of of files is consistent across platforms
+            local_csv_file_list.sort()
 
         user_projects_csv, user_info_csv = self._merge_multiple_local_csv_files(
             local_csv_file_list,

From 532dcc5687f4d646fcde98a81646ca28523e0239 Mon Sep 17 00:00:00 2001
From: Albert Snow <ajsnow2012@gmail.com>
Date: Mon, 22 Jul 2024 13:25:02 -0500
Subject: [PATCH 10/16] Trigger GitHub Actions workflow


From 8c4a9b72caaf4fe851324c663ca8ceed29a2fa34 Mon Sep 17 00:00:00 2001
From: Albert Snow <ajsnow2012@gmail.com>
Date: Tue, 23 Jul 2024 11:31:14 -0500
Subject: [PATCH 11/16] moving to separate unit test

---
 tests/test_app_config.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_app_config.py b/tests/test_app_config.py
index 488b65888..84e7baed9 100755
--- a/tests/test_app_config.py
+++ b/tests/test_app_config.py
@@ -121,8 +121,7 @@ def test_app_config():
         patcher.stop()
 
 
-def test_app_config_parent_child_study_mapping(monkeypatch):
-
+def test_validate_parent_child_studies_against_none_config():
     none_string_config = [{"parent_to_child_studies_mapping": 'None'},
                           {"parent_to_child_studies_mapping": 'None'},]
     try:
@@ -130,6 +129,8 @@ def test_app_config_parent_child_study_mapping(monkeypatch):
     except Exception:
         pytest.fail("Study validation failed when given 'None' mapping!")
 
+
+def test_app_config_parent_child_study_mapping(monkeypatch):
     invalid_dbgap_configs = [
         {
             "parent_to_child_studies_mapping": {

From a06ecaf11810c29d83c32fc1a23ae0cbd856e838 Mon Sep 17 00:00:00 2001
From: Albert Snow <ajsnow2012@gmail.com>
Date: Fri, 9 Aug 2024 13:13:12 -0500
Subject: [PATCH 12/16] more documentation minor refactors implementing alex's
 suggestions

---
 fence/config.py          | 69 ++++++++++++++++++++++++++++++----------
 tests/test_app_config.py | 11 +++++--
 2 files changed, 61 insertions(+), 19 deletions(-)

diff --git a/fence/config.py b/fence/config.py
index fa3119a34..8d94f1892 100644
--- a/fence/config.py
+++ b/fence/config.py
@@ -152,17 +152,21 @@ def post_process(self):
                 logger.warning(
                     f"IdP '{idp_id}' is using multifactor_auth_claim_info '{mfa_info['claim']}', which is neither AMR or ACR. Unable to determine if a user used MFA. Fence will continue and assume they have not used MFA."
                 )
-        dbgap_configs = self._configs["dbGaP"]
-        if isinstance(dbgap_configs, list):
-            corrected_dbgap_configs = dbgap_configs
-        elif isinstance(dbgap_configs, dict):
-            corrected_dbgap_configs = [dbgap_configs]
-        else:
-            raise Exception("The dbgap configuration is not a recognized data structure, aborting!")
-        self._validate_parent_child_studies(corrected_dbgap_configs)
+        self._validate_dbgap_config(self._configs["dbGaP"])
 
     @staticmethod
     def find_duplicates(collection):
+        """
+
+        Args:
+            collection: any data type accepted by the Counter object
+
+        Returns:
+            a set of items that were found more than once in the collection
+
+        note: if collection is a set, this function will never find duplicates
+        (per the definition of a set)
+        """
         item_with_occurrences = Counter(collection)
         duplicates = {item
                       for item, occurrences in item_with_occurrences.items()
@@ -170,15 +174,33 @@ def find_duplicates(collection):
         return duplicates
 
     @staticmethod
-    def _validate_parent_child_studies(dbgap_configs: List[Dict[str, Any]]) -> None:
-        def get_parent_studies_safely(dbgap_config):
-            study_mapping = dbgap_config.get('parent_to_child_studies_mapping', {})
-            # study mapping could be 'None'
-            return list(study_mapping.keys()) if isinstance(study_mapping, dict) else []
+    def get_parent_studies_safely(dbgap_config):
+        """
+        Args:
+            dbgap_config: { parent_to_child_studies_mapping: { k1: v1, ... } }
+        Returns:
+            [k1, k2, ...]
+        """
+        study_mapping = dbgap_config.get('parent_to_child_studies_mapping', {})
+        # study mapping could be 'None'
+        safe_studies = list(study_mapping.keys()) if isinstance(study_mapping, dict) else []
+        return safe_studies
 
-        safe_list_of_parent_studies = [safe_parent_studies
-                                       for dbgap_config in dbgap_configs
-                                       for safe_parent_studies in get_parent_studies_safely(dbgap_config)]
+    @staticmethod
+    def validate_parent_child_studies(dbgap_configs: List[Dict[str, Any]]) -> None:
+        """
+        Given a list of dictionaries that contain a parent -> child study mapping,
+        this function will check that all parents are unique and not duplicated
+        Args:
+            dbgap_configs: [ { parent_to_child_studies_mapping: { k1: v1, ... } }, ... ]
+
+        Returns:
+            Nothing. This function will throw if duplicates are found.
+        """
+        safe_list_of_parent_studies = []
+        for dbgap_config in dbgap_configs:
+            safe_parent_studies = FenceConfig.get_parent_studies_safely(dbgap_config)
+            safe_list_of_parent_studies.extend(safe_parent_studies)
 
         duplicates = FenceConfig.find_duplicates(safe_list_of_parent_studies)
         if len(duplicates) > 0:
@@ -186,5 +208,20 @@ def get_parent_studies_safely(dbgap_config):
                 f"{duplicates} are duplicate parent study ids found in parent_to_child_studies_mapping for "
                 f"multiple dbGaP configurations.")
 
+    @staticmethod
+    def enforce_is_array(dbgap_configs):
+        if isinstance(dbgap_configs, list):
+            corrected_dbgap_configs = dbgap_configs
+        elif isinstance(dbgap_configs, dict):
+            corrected_dbgap_configs = [dbgap_configs]
+        else:
+            raise Exception("The dbgap configuration is not a recognized data structure, aborting!")
+        return corrected_dbgap_configs
+
+    @staticmethod
+    def _validate_dbgap_config(dbgap_configs):
+        corrected_dbgap_configs = FenceConfig.enforce_is_array(dbgap_configs)
+        FenceConfig.validate_parent_child_studies(corrected_dbgap_configs)
+
 
 config = FenceConfig(DEFAULT_CFG_PATH)
diff --git a/tests/test_app_config.py b/tests/test_app_config.py
index 2fe2c8dbe..5fc0a7d93 100755
--- a/tests/test_app_config.py
+++ b/tests/test_app_config.py
@@ -121,10 +121,15 @@ def test_app_config():
 
 
 def test_validate_parent_child_studies_against_none_config():
+    """
+    our dbgap config can have nothing for the parent_to_child_studies_mapping property
+    this test ensures the validator handles that case
+    """
     none_string_config = [{"parent_to_child_studies_mapping": 'None'},
                           {"parent_to_child_studies_mapping": 'None'},]
+
     try:
-        FenceConfig._validate_parent_child_studies(none_string_config)
+        FenceConfig.validate_parent_child_studies(none_string_config)
     except Exception:
         pytest.fail("Study validation failed when given 'None' mapping!")
 
@@ -145,7 +150,7 @@ def test_app_config_parent_child_study_mapping(monkeypatch):
         },
     ]
     with pytest.raises(Exception):
-        FenceConfig._validate_parent_child_studies(invalid_dbgap_configs)
+        FenceConfig.validate_parent_child_studies(invalid_dbgap_configs)
 
     valid_dbgap_configs = [
         {
@@ -162,6 +167,6 @@ def test_app_config_parent_child_study_mapping(monkeypatch):
         },
     ]
     try:
-        FenceConfig._validate_parent_child_studies(valid_dbgap_configs)
+        FenceConfig.validate_parent_child_studies(valid_dbgap_configs)
     except Exception:
         pytest.fail("Study validation failed when it should have passed!")

From 2b63f57b235af2a2b1fae1ae89462980b9fd5fa9 Mon Sep 17 00:00:00 2001
From: Albert Snow <ajsnow2012@gmail.com>
Date: Fri, 9 Aug 2024 18:16:35 -0500
Subject: [PATCH 13/16] more documentation changes to visibility

---
 fence/config.py          | 9 +++++----
 tests/test_app_config.py | 6 +++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/fence/config.py b/fence/config.py
index 8d94f1892..ce6eebdae 100644
--- a/fence/config.py
+++ b/fence/config.py
@@ -176,6 +176,8 @@ def find_duplicates(collection):
     @staticmethod
     def get_parent_studies_safely(dbgap_config):
         """
+        This will get a list of parent id's from the dbgap config's mapping property
+        or return and empty array if the entry doesn't exist
         Args:
             dbgap_config: { parent_to_child_studies_mapping: { k1: v1, ... } }
         Returns:
@@ -187,15 +189,14 @@ def get_parent_studies_safely(dbgap_config):
         return safe_studies
 
     @staticmethod
-    def validate_parent_child_studies(dbgap_configs: List[Dict[str, Any]]) -> None:
+    def _validate_parent_child_studies(dbgap_configs: List[Dict[str, Any]]) -> None:
         """
         Given a list of dictionaries that contain a parent -> child study mapping,
         this function will check that all parents are unique and not duplicated
         Args:
             dbgap_configs: [ { parent_to_child_studies_mapping: { k1: v1, ... } }, ... ]
 
-        Returns:
-            Nothing. This function will throw if duplicates are found.
+        Note: This function will throw if duplicates are found.
         """
         safe_list_of_parent_studies = []
         for dbgap_config in dbgap_configs:
@@ -221,7 +222,7 @@ def enforce_is_array(dbgap_configs):
     @staticmethod
     def _validate_dbgap_config(dbgap_configs):
         corrected_dbgap_configs = FenceConfig.enforce_is_array(dbgap_configs)
-        FenceConfig.validate_parent_child_studies(corrected_dbgap_configs)
+        FenceConfig._validate_parent_child_studies(corrected_dbgap_configs)
 
 
 config = FenceConfig(DEFAULT_CFG_PATH)
diff --git a/tests/test_app_config.py b/tests/test_app_config.py
index 5fc0a7d93..8ee46ec84 100755
--- a/tests/test_app_config.py
+++ b/tests/test_app_config.py
@@ -129,7 +129,7 @@ def test_validate_parent_child_studies_against_none_config():
                           {"parent_to_child_studies_mapping": 'None'},]
 
     try:
-        FenceConfig.validate_parent_child_studies(none_string_config)
+        FenceConfig._validate_parent_child_studies(none_string_config)
     except Exception:
         pytest.fail("Study validation failed when given 'None' mapping!")
 
@@ -150,7 +150,7 @@ def test_app_config_parent_child_study_mapping(monkeypatch):
         },
     ]
     with pytest.raises(Exception):
-        FenceConfig.validate_parent_child_studies(invalid_dbgap_configs)
+        FenceConfig._validate_parent_child_studies(invalid_dbgap_configs)
 
     valid_dbgap_configs = [
         {
@@ -167,6 +167,6 @@ def test_app_config_parent_child_study_mapping(monkeypatch):
         },
     ]
     try:
-        FenceConfig.validate_parent_child_studies(valid_dbgap_configs)
+        FenceConfig._validate_parent_child_studies(valid_dbgap_configs)
     except Exception:
         pytest.fail("Study validation failed when it should have passed!")

From 8e60f67ae0cbb270c23f01b5015b1604ee32d516 Mon Sep 17 00:00:00 2001
From: Albert Snow <ajsnow2012@gmail.com>
Date: Mon, 12 Aug 2024 13:17:18 -0500
Subject: [PATCH 14/16] cleaning up references in test fence create moving
 create_client to fence create to help resolve circular dependencies defining
 generic functions in config moving send email to google monitor to help with
 circular dependencies adding utils test

---
 fence/config.py                      | 102 ++++++++++++------
 fence/scripting/fence_create.py      |  69 +++++++++++-
 fence/scripting/google_monitor.py    |  49 ++++++++-
 fence/utils.py                       | 150 ++-------------------------
 tests/scripting/test_fence-create.py |   3 +-
 tests/test_utils.py                  |  36 +++++++
 6 files changed, 231 insertions(+), 178 deletions(-)
 create mode 100644 tests/test_utils.py

diff --git a/fence/config.py b/fence/config.py
index ce6eebdae..c2bf4a565 100644
--- a/fence/config.py
+++ b/fence/config.py
@@ -1,3 +1,4 @@
+import collections
 import os
 from yaml import safe_load as yaml_load
 import urllib.parse
@@ -6,7 +7,6 @@
 from gen3config import Config
 
 from cdislogging import get_logger
-from collections import Counter
 from typing import List, Dict, Any
 
 logger = get_logger(__name__)
@@ -155,7 +155,66 @@ def post_process(self):
         self._validate_dbgap_config(self._configs["dbGaP"])
 
     @staticmethod
-    def find_duplicates(collection):
+    def _get_parent_studies_safely(dbgap_config):
+        """
+        This will get a list of parent id's from the dbgap config's mapping property
+        or return and empty array if the entry doesn't exist
+        Args:
+            dbgap_config: { parent_to_child_studies_mapping: { k1: v1, ... } }
+        Returns:
+            [k1, k2, ...]
+        """
+        study_mapping = dbgap_config.get('parent_to_child_studies_mapping', {})
+        # study mapping could be 'None'
+        safe_studies = list(study_mapping.keys()) if isinstance(study_mapping, dict) else []
+        return safe_studies
+
+    # TODO(fix-it): These functions should go in utils.py, but cannot be migrated until
+    # the variable DEFAULT_BACKOFF_SETTINGS is migrated here
+    @staticmethod
+    def _some(predicate, iterable, found_nothing_value=None):
+        """ Returns the first element that satisfies the predicate, else fail value
+            Expects predicate to be a boolean lambda and iterable to be a collection
+        """
+        for item in iterable:
+            result = predicate(item)
+            if result:
+                return item
+        return found_nothing_value
+
+    @staticmethod
+    def _coerce_to_array(unknown_data_structure, exception_message="Unrecognized data structure, aborting!"):
+        """
+        Given a dubious data structure, coerces it into a list
+        depending on the data type.
+        Currently, handles list, dict, and None
+        Args:
+            exception_message: any string for more context
+            unknown_data_structure: either a list or dict, fails otherwise
+
+        Returns: the data structure if it's a list,
+        or a list with the data structure in it if it's a dictionary
+
+        note: fails if the unknown data structure is any other type
+        """
+        # TODO: these three functions can be move out on their own when migrated to utils.py
+        identity = lambda v: v
+        wrap_in_array = lambda v: [v]
+        empty_array = lambda _: []
+        # END TODO
+        data_is = lambda data_type: isinstance(unknown_data_structure, data_type)
+        type_to_coercion = {
+            list: identity,
+            dict: wrap_in_array,
+            type(None): empty_array,
+        }
+        data_type_of_parameter = FenceConfig._some(data_is, list(type_to_coercion.keys()), TypeError)
+        if data_type_of_parameter is TypeError:
+            raise ValueError(exception_message + f" | Type: {type(unknown_data_structure)}")
+        return type_to_coercion[data_type_of_parameter](unknown_data_structure)
+
+    @staticmethod
+    def _find_duplicates(collection):
         """
 
         Args:
@@ -164,29 +223,17 @@ def find_duplicates(collection):
         Returns:
             a set of items that were found more than once in the collection
 
-        note: if collection is a set, this function will never find duplicates
-        (per the definition of a set)
+        note: if collection is a dictionary, counter will treat it as a
+        duplicates mapping!
         """
-        item_with_occurrences = Counter(collection)
+        item_with_occurrences = collections.Counter(collection)
         duplicates = {item
                       for item, occurrences in item_with_occurrences.items()
                       if occurrences > 1}
         return duplicates
 
-    @staticmethod
-    def get_parent_studies_safely(dbgap_config):
-        """
-        This will get a list of parent id's from the dbgap config's mapping property
-        or return and empty array if the entry doesn't exist
-        Args:
-            dbgap_config: { parent_to_child_studies_mapping: { k1: v1, ... } }
-        Returns:
-            [k1, k2, ...]
-        """
-        study_mapping = dbgap_config.get('parent_to_child_studies_mapping', {})
-        # study mapping could be 'None'
-        safe_studies = list(study_mapping.keys()) if isinstance(study_mapping, dict) else []
-        return safe_studies
+    # END TODO
+
 
     @staticmethod
     def _validate_parent_child_studies(dbgap_configs: List[Dict[str, Any]]) -> None:
@@ -200,28 +247,19 @@ def _validate_parent_child_studies(dbgap_configs: List[Dict[str, Any]]) -> None:
         """
         safe_list_of_parent_studies = []
         for dbgap_config in dbgap_configs:
-            safe_parent_studies = FenceConfig.get_parent_studies_safely(dbgap_config)
+            safe_parent_studies = FenceConfig._get_parent_studies_safely(dbgap_config)
             safe_list_of_parent_studies.extend(safe_parent_studies)
 
-        duplicates = FenceConfig.find_duplicates(safe_list_of_parent_studies)
+        duplicates = FenceConfig._find_duplicates(safe_list_of_parent_studies)
         if len(duplicates) > 0:
             raise Exception(
                 f"{duplicates} are duplicate parent study ids found in parent_to_child_studies_mapping for "
                 f"multiple dbGaP configurations.")
 
-    @staticmethod
-    def enforce_is_array(dbgap_configs):
-        if isinstance(dbgap_configs, list):
-            corrected_dbgap_configs = dbgap_configs
-        elif isinstance(dbgap_configs, dict):
-            corrected_dbgap_configs = [dbgap_configs]
-        else:
-            raise Exception("The dbgap configuration is not a recognized data structure, aborting!")
-        return corrected_dbgap_configs
-
     @staticmethod
     def _validate_dbgap_config(dbgap_configs):
-        corrected_dbgap_configs = FenceConfig.enforce_is_array(dbgap_configs)
+        error_message = "The dbgap configuration is not a recognized data structure, aborting!"
+        corrected_dbgap_configs = FenceConfig._coerce_to_array(dbgap_configs, error_message)
         FenceConfig._validate_parent_child_studies(corrected_dbgap_configs)
 
 
diff --git a/fence/scripting/fence_create.py b/fence/scripting/fence_create.py
index a4b15aff8..41271d411 100644
--- a/fence/scripting/fence_create.py
+++ b/fence/scripting/fence_create.py
@@ -57,10 +57,9 @@
 from fence.config import config
 from fence.sync.sync_users import UserSyncer
 from fence.utils import (
-    create_client,
     get_valid_expiration,
     generate_client_credentials,
-    get_SQLAlchemyDriver,
+    get_SQLAlchemyDriver, logger,
 )
 from sqlalchemy.orm.attributes import flag_modified
 from gen3authz.client.arborist.client import ArboristClient
@@ -1824,3 +1823,69 @@ def access_token_polling_job(
     with driver.session as db_session:
         loop = asyncio.get_event_loop()
         loop.run_until_complete(job.update_tokens(db_session))
+
+
+def create_client(
+    DB,
+    username=None,
+    urls=[],
+    name="",
+    description="",
+    auto_approve=False,
+    is_admin=False,
+    grant_types=None,
+    confidential=True,
+    arborist=None,
+    policies=None,
+    allowed_scopes=None,
+    expires_in=None,
+):
+    client_id, client_secret, hashed_secret = generate_client_credentials(confidential)
+    if arborist is not None:
+        arborist.create_client(client_id, policies)
+    driver = get_SQLAlchemyDriver(DB)
+    auth_method = "client_secret_basic" if confidential else "none"
+
+    allowed_scopes = allowed_scopes or config["CLIENT_ALLOWED_SCOPES"]
+    if not set(allowed_scopes).issubset(set(config["CLIENT_ALLOWED_SCOPES"])):
+        raise ValueError(
+            "Each allowed scope must be one of: {}".format(
+                config["CLIENT_ALLOWED_SCOPES"]
+            )
+        )
+
+    if "openid" not in allowed_scopes:
+        allowed_scopes.append("openid")
+        logger.warning('Adding required "openid" scope to list of allowed scopes.')
+
+    with driver.session as s:
+        user = None
+        if username:
+            user = query_for_user(session=s, username=username)
+            if not user:
+                user = User(username=username, is_admin=is_admin)
+                s.add(user)
+
+        if s.query(Client).filter(Client.name == name).first():
+            if arborist is not None:
+                arborist.delete_client(client_id)
+            raise Exception("client {} already exists".format(name))
+
+        client = Client(
+            client_id=client_id,
+            client_secret=hashed_secret,
+            user=user,
+            redirect_uris=urls,
+            allowed_scopes=" ".join(allowed_scopes),
+            description=description,
+            name=name,
+            auto_approve=auto_approve,
+            grant_types=grant_types,
+            is_confidential=confidential,
+            token_endpoint_auth_method=auth_method,
+            expires_in=expires_in,
+        )
+        s.add(client)
+        s.commit()
+
+    return client_id, client_secret
diff --git a/fence/scripting/google_monitor.py b/fence/scripting/google_monitor.py
index ad232519d..ae5be6b9e 100644
--- a/fence/scripting/google_monitor.py
+++ b/fence/scripting/google_monitor.py
@@ -7,6 +7,7 @@
 """
 import traceback
 
+import requests
 from gen3cirrus.google_cloud.iam import GooglePolicyMember
 from gen3cirrus import GoogleCloudManager
 from gen3cirrus.google_cloud.errors import GoogleAPIError
@@ -37,7 +38,7 @@
 from fence import utils
 from fence.config import config
 from fence.models import User
-from fence.errors import Unauthorized
+from fence.errors import Unauthorized, NotFound
 
 logger = get_logger(__name__)
 
@@ -445,6 +446,52 @@ def _get_user_email_list_from_google_project_with_owner_role(project_id):
         )
 
 
+def send_email(from_email, to_emails, subject, text, smtp_domain):
+    """
+    Send email to group of emails using mail gun api.
+
+    https://app.mailgun.com/
+
+    Args:
+        from_email(str): from email
+        to_emails(list): list of emails to receive the messages
+        text(str): the text message
+        smtp_domain(dict): smtp domain server
+
+            {
+                "smtp_hostname": "smtp.mailgun.org",
+                "default_login": "postmaster@mailgun.planx-pla.net",
+                "api_url": "https://api.mailgun.net/v3/mailgun.planx-pla.net",
+                "smtp_password": "password", # pragma: allowlist secret
+                "api_key": "api key" # pragma: allowlist secret
+            }
+
+    Returns:
+        Http response
+
+    Exceptions:
+        KeyError
+
+    """
+    if smtp_domain not in config["GUN_MAIL"] or not config["GUN_MAIL"].get(
+        smtp_domain
+    ).get("smtp_password"):
+        raise NotFound(
+            "SMTP Domain '{}' does not exist in configuration for GUN_MAIL or "
+            "smtp_password was not provided. "
+            "Cannot send email.".format(smtp_domain)
+        )
+
+    api_key = config["GUN_MAIL"][smtp_domain].get("api_key", "")
+    email_url = config["GUN_MAIL"][smtp_domain].get("api_url", "") + "/messages"
+
+    return requests.post(
+        email_url,
+        auth=("api", api_key),
+        data={"from": from_email, "to": to_emails, "subject": subject, "text": text},
+    )
+
+
 def _send_emails_informing_service_account_removal(
     to_emails, invalid_service_account_reasons, invalid_project_reasons, project_id
 ):
diff --git a/fence/utils.py b/fence/utils.py
index 463fb6f75..345ac1d35 100644
--- a/fence/utils.py
+++ b/fence/utils.py
@@ -1,3 +1,9 @@
+"""
+This file is for functions that are generalized enough to be used in varied places that need not be related.
+Functions placed here are/should be low level in terms of composition, and thus references to other modules
+in this project should be used sparingly.
+"""
+
 import bcrypt
 import collections
 from functools import wraps
@@ -6,17 +12,14 @@
 from random import SystemRandom
 import re
 import string
-import requests
 from urllib.parse import urlencode
 from urllib.parse import parse_qs, urlsplit, urlunsplit
 import sys
 
 from cdislogging import get_logger
 import flask
-from werkzeug.datastructures import ImmutableMultiDict
 
-from fence.models import Client, User, query_for_user
-from fence.errors import NotFound, UserError
+from fence.errors import UserError
 from fence.config import config
 from authlib.oauth2.rfc6749.util import scope_to_list
 from authlib.oauth2.rfc6749.errors import InvalidScopeError
@@ -57,97 +60,6 @@ def generate_client_credentials(confidential):
     return client_id, client_secret, hashed_secret
 
 
-def create_client(
-    DB,
-    username=None,
-    urls=[],
-    name="",
-    description="",
-    auto_approve=False,
-    is_admin=False,
-    grant_types=None,
-    confidential=True,
-    arborist=None,
-    policies=None,
-    allowed_scopes=None,
-    expires_in=None,
-):
-    client_id, client_secret, hashed_secret = generate_client_credentials(confidential)
-    if arborist is not None:
-        arborist.create_client(client_id, policies)
-    driver = get_SQLAlchemyDriver(DB)
-    auth_method = "client_secret_basic" if confidential else "none"
-
-    allowed_scopes = allowed_scopes or config["CLIENT_ALLOWED_SCOPES"]
-    if not set(allowed_scopes).issubset(set(config["CLIENT_ALLOWED_SCOPES"])):
-        raise ValueError(
-            "Each allowed scope must be one of: {}".format(
-                config["CLIENT_ALLOWED_SCOPES"]
-            )
-        )
-
-    if "openid" not in allowed_scopes:
-        allowed_scopes.append("openid")
-        logger.warning('Adding required "openid" scope to list of allowed scopes.')
-
-    with driver.session as s:
-        user = None
-        if username:
-            user = query_for_user(session=s, username=username)
-            if not user:
-                user = User(username=username, is_admin=is_admin)
-                s.add(user)
-
-        if s.query(Client).filter(Client.name == name).first():
-            if arborist is not None:
-                arborist.delete_client(client_id)
-            raise Exception("client {} already exists".format(name))
-
-        client = Client(
-            client_id=client_id,
-            client_secret=hashed_secret,
-            user=user,
-            redirect_uris=urls,
-            allowed_scopes=" ".join(allowed_scopes),
-            description=description,
-            name=name,
-            auto_approve=auto_approve,
-            grant_types=grant_types,
-            is_confidential=confidential,
-            token_endpoint_auth_method=auth_method,
-            expires_in=expires_in,
-        )
-        s.add(client)
-        s.commit()
-
-    return client_id, client_secret
-
-
-def hash_secret(f):
-    @wraps(f)
-    def wrapper(*args, **kwargs):
-        has_secret = "client_secret" in flask.request.form
-        has_client_id = "client_id" in flask.request.form
-        if flask.request.form and has_secret and has_client_id:
-            form = flask.request.form.to_dict()
-            with flask.current_app.db.session as session:
-                client = (
-                    session.query(Client)
-                    .filter(Client.client_id == form["client_id"])
-                    .first()
-                )
-                if client:
-                    form["client_secret"] = bcrypt.hashpw(
-                        form["client_secret"].encode("utf-8"),
-                        client.client_secret.encode("utf-8"),
-                    ).decode("utf-8")
-                flask.request.form = ImmutableMultiDict(form)
-
-        return f(*args, **kwargs)
-
-    return wrapper
-
-
 def wrap_list_required(f):
     @wraps(f)
     def wrapper(d, *args, **kwargs):
@@ -254,52 +166,6 @@ def split_url_and_query_params(url):
     return url, query_params
 
 
-def send_email(from_email, to_emails, subject, text, smtp_domain):
-    """
-    Send email to group of emails using mail gun api.
-
-    https://app.mailgun.com/
-
-    Args:
-        from_email(str): from email
-        to_emails(list): list of emails to receive the messages
-        text(str): the text message
-        smtp_domain(dict): smtp domain server
-
-            {
-                "smtp_hostname": "smtp.mailgun.org",
-                "default_login": "postmaster@mailgun.planx-pla.net",
-                "api_url": "https://api.mailgun.net/v3/mailgun.planx-pla.net",
-                "smtp_password": "password", # pragma: allowlist secret
-                "api_key": "api key" # pragma: allowlist secret
-            }
-
-    Returns:
-        Http response
-
-    Exceptions:
-        KeyError
-
-    """
-    if smtp_domain not in config["GUN_MAIL"] or not config["GUN_MAIL"].get(
-        smtp_domain
-    ).get("smtp_password"):
-        raise NotFound(
-            "SMTP Domain '{}' does not exist in configuration for GUN_MAIL or "
-            "smtp_password was not provided. "
-            "Cannot send email.".format(smtp_domain)
-        )
-
-    api_key = config["GUN_MAIL"][smtp_domain].get("api_key", "")
-    email_url = config["GUN_MAIL"][smtp_domain].get("api_url", "") + "/messages"
-
-    return requests.post(
-        email_url,
-        auth=("api", api_key),
-        data={"from": from_email, "to": to_emails, "subject": subject, "text": text},
-    )
-
-
 def get_valid_expiration_from_request(
     expiry_param="expires_in", max_limit=None, default=None
 ):
@@ -428,6 +294,8 @@ def get_SQLAlchemyDriver(db_conn_url):
 
 
 # Default settings to control usage of backoff library.
+# TODO(fix-it): this variable should be moved to config.py to remove the reliance on the config
+# module. Many files reference this property, so it should be handled in its own PR.
 DEFAULT_BACKOFF_SETTINGS = {
     "on_backoff": log_backoff_retry,
     "on_giveup": log_backoff_giveup,
diff --git a/tests/scripting/test_fence-create.py b/tests/scripting/test_fence-create.py
index deae90d34..4b1e439b6 100644
--- a/tests/scripting/test_fence-create.py
+++ b/tests/scripting/test_fence-create.py
@@ -6,13 +6,12 @@
 import pytest
 
 import gen3cirrus
-from gen3cirrus.google_cloud.errors import GoogleAuthError
 from userdatamodel.models import Group
 
 from fence.config import config
 from fence.errors import UserError
 from fence.jwt.validate import validate_jwt
-from fence.utils import create_client, get_SQLAlchemyDriver
+from fence.utils import get_SQLAlchemyDriver
 from fence.models import (
     AccessPrivilege,
     Project,
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 000000000..9d35c668d
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,36 @@
+import pytest
+
+from fence.config import FenceConfig
+
+
+def test_find_duplicates():
+    dup_case = FenceConfig._find_duplicates([1, 1, 2])
+    unique_case = FenceConfig._find_duplicates([1, 2, 3])
+    tuple_dup_case = FenceConfig._find_duplicates((1, 1, 1))
+    tuple_unique_case = FenceConfig._find_duplicates((1, 2, 3))
+    assert (dup_case == {1} and unique_case == set() and
+            tuple_dup_case == {1} and tuple_unique_case == set())
+
+
+def test__coerce_to_array():
+    identity_case = FenceConfig._coerce_to_array([1, 2, 3])
+    wrap_case = FenceConfig._coerce_to_array({"foo": "bar"})
+    none_case = FenceConfig._coerce_to_array(None)
+    assert (identity_case == [1, 2, 3] and wrap_case == [{"foo": "bar"}] and
+            none_case == [])
+
+    test_message = "foo"
+    with pytest.raises(ValueError):
+        bad_case = FenceConfig._coerce_to_array("uh oh")
+    with pytest.raises(ValueError):
+        bad_case_custom = FenceConfig._coerce_to_array("uh oh", test_message)
+
+
+def test__some():
+    test_pred = lambda v: 2 > v > 0
+    one_case = FenceConfig._some(test_pred, [3, 2, 1])
+    two_case = FenceConfig._some(test_pred, [3, 2, 0.5, 2, 1])
+    none_case = FenceConfig._some(test_pred, [0, 0, 0])
+    custom_none_case = FenceConfig._some(test_pred, [0], "foo")
+    assert (one_case == 1 and two_case == 0.5 and
+            none_case is None and custom_none_case == "foo")

From 06cebff618ba84055371ece194d5b7e25be876f0 Mon Sep 17 00:00:00 2001
From: Albert Snow <ajsnow2012@gmail.com>
Date: Mon, 12 Aug 2024 13:37:27 -0500
Subject: [PATCH 15/16] documentation + minor rename

---
 fence/config.py     | 5 ++---
 tests/test_utils.py | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/fence/config.py b/fence/config.py
index c2bf4a565..ea2be1443 100644
--- a/fence/config.py
+++ b/fence/config.py
@@ -190,10 +190,9 @@ def _coerce_to_array(unknown_data_structure, exception_message="Unrecognized dat
         Currently, handles list, dict, and None
         Args:
             exception_message: any string for more context
-            unknown_data_structure: either a list or dict, fails otherwise
+            unknown_data_structure: either a list, dict, or None; fails otherwise
 
-        Returns: the data structure if it's a list,
-        or a list with the data structure in it if it's a dictionary
+        Returns: a list of some kind depending on the provided data structure
 
         note: fails if the unknown data structure is any other type
         """
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 9d35c668d..590133d68 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -3,7 +3,7 @@
 from fence.config import FenceConfig
 
 
-def test_find_duplicates():
+def test__find_duplicates():
     dup_case = FenceConfig._find_duplicates([1, 1, 2])
     unique_case = FenceConfig._find_duplicates([1, 2, 3])
     tuple_dup_case = FenceConfig._find_duplicates((1, 1, 1))

From af9c5b16fc9f6504a866a10db3d015b762d5fe14 Mon Sep 17 00:00:00 2001
From: Albert Snow <ajsnow2012@gmail.com>
Date: Tue, 3 Sep 2024 13:57:16 -0500
Subject: [PATCH 16/16] fix name, add description

---
 tests/test_utils.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index 590133d68..93cb95972 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -3,16 +3,22 @@
 from fence.config import FenceConfig
 
 
-def test__find_duplicates():
+def test_find_duplicates():
+    """
+    Tests that the find duplicates function handles different kinds of duplicates cases
+    """
     dup_case = FenceConfig._find_duplicates([1, 1, 2])
     unique_case = FenceConfig._find_duplicates([1, 2, 3])
-    tuple_dup_case = FenceConfig._find_duplicates((1, 1, 1))
+    tuple_dup_case = FenceConfig._find_duplicates((1, 1, 1, 2, 2))
     tuple_unique_case = FenceConfig._find_duplicates((1, 2, 3))
     assert (dup_case == {1} and unique_case == set() and
-            tuple_dup_case == {1} and tuple_unique_case == set())
+            tuple_dup_case == {1, 2} and tuple_unique_case == set())
 
 
-def test__coerce_to_array():
+def test_coerce_to_array():
+    """
+    Tests that we get arrays back in expected cases
+    """
     identity_case = FenceConfig._coerce_to_array([1, 2, 3])
     wrap_case = FenceConfig._coerce_to_array({"foo": "bar"})
     none_case = FenceConfig._coerce_to_array(None)
@@ -26,8 +32,12 @@ def test__coerce_to_array():
         bad_case_custom = FenceConfig._coerce_to_array("uh oh", test_message)
 
 
-def test__some():
-    test_pred = lambda v: 2 > v > 0
+def test_some():
+    """
+    Tests that we get the first passing value out of a list in expected
+    use cases
+    """
+    test_pred = lambda v: 0 < v < 2
     one_case = FenceConfig._some(test_pred, [3, 2, 1])
     two_case = FenceConfig._some(test_pred, [3, 2, 0.5, 2, 1])
     none_case = FenceConfig._some(test_pred, [0, 0, 0])