SIMEXP · htwangtw · Jul 23, 2024
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,7 @@
+[flake8]
+doctests = False
+exclude =
+    **/__init__.py
+    **/tests/*
+    *build/
+    wonkyconn/_version.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -24,6 +24,7 @@ dependencies = [
   "numba",
   "seaborn",
   "matplotlib",
+  "statsmodels"
 ]
 dynamic = ["version"]
 
@@ -41,6 +42,7 @@ dev = [
   "types-all",
   "pandas-stubs",
   "types-tqdm",
+  "pyrsistent",
 ]
 test = ["nibabel", "nilearn", "pytest", "pytest-cov", "templateflow < 23.0.0"]
 docs = ["sphinx", "sphinx_rtd_theme", "myst-parser", "sphinx-argparse"]
@@ -94,6 +96,8 @@ module = [
   "seaborn.*",
   "statsmodels.*",
   "templateflow.*",
+  "nibabel.*",
+  "pyrsistent.*",
 ]
 
 [[tool.mypy.overrides]]

diff --git a/wonkyconn/features/calculate_degrees_of_freedom.py b/wonkyconn/features/calculate_degrees_of_freedom.py
@@ -23,7 +23,8 @@ def calculate_degrees_of_freedom_loss(
     Calculate the percent of degrees of freedom lost during denoising.
 
     Parameters:
-    - bids_file (BIDSFile): The BIDS file for which to calculate the degrees of freedom.
+    - bids_file (BIDSFile): The BIDS file for which to calculate the
+        degrees of freedom.
 
     Returns:
     - float: The percentage of degrees of freedom lost.
@@ -50,7 +51,9 @@ def calculate_degrees_of_freedom_loss(
 
 
 def _calculate_for_key(
-    connectivity_matrices: list[ConnectivityMatrix], count: list[int], key: str
+    connectivity_matrices: list[ConnectivityMatrix],
+    count: npt.NDArray[np.int64],
+    key: str,
 ) -> float:
     values: Sequence[int | list[str] | None] = [
         connectivity_matrix.metadata.get(key, None)

diff --git a/wonkyconn/features/quality_control_connectivity.py b/wonkyconn/features/quality_control_connectivity.py
@@ -26,12 +26,17 @@ def calculate_qcfc(
     accounted for participant age and sex
 
     Parameters:
-        data_frame (pd.DataFrame): The data frame containing the covariates "age" and "gender". It needs to have one row for each connectivity matrix.
-        connectivity_matrices (Iterable[ConnectivityMatrix]): The connectivity matrices to calculate QCFC for.
-        metric_key (str, optional): The key of the metric to use for QCFC calculation. Defaults to "MeanFramewiseDisplacement".
+        data_frame (pd.DataFrame): The data frame containing the covariates
+            "age" and "gender". It needs to have one row for each
+            connectivity matrix.
+        connectivity_matrices (Iterable[ConnectivityMatrix]): The
+            connectivity matrices to calculate QCFC for.
+        metric_key (str, optional): The key of the metric to use for QCFC
+            calculation. Defaults to "MeanFramewiseDisplacement".
 
     Returns:
-        pd.DataFrame: The QCFC values between connectivity matrices and the metric.
+        pd.DataFrame: The QCFC values between connectivity matrices and
+            the metric.
 
     """
     metrics = np.asarray(
@@ -45,7 +50,9 @@ def calculate_qcfc(
     connectivity_arrays = [
         connectivity_matrix.load()
         for connectivity_matrix in tqdm(
-            connectivity_matrices, desc="Loading connectivity matrices", leave=False
+            connectivity_matrices,
+            desc="Loading connectivity matrices",
+            leave=False,
         )
     ]
 
@@ -64,19 +71,21 @@ def calculate_qcfc(
 
     p_value = correlation_p_value(correlation, m)
 
-    qcfc = pd.DataFrame(dict(i=i, j=j, correlation=correlation, p_value=p_value))
+    qcfc = pd.DataFrame(
+        dict(i=i, j=j, correlation=correlation, p_value=p_value)
+    )
     qcfc = qcfc.set_index(["i", "j"])
 
     return qcfc
 
 
-def calculate_median_absolute(x: pd.Series) -> float:
+def calculate_median_absolute(x: pd.Series[float]) -> float:
     """Calculate Absolute median value"""
     return x.abs().median()
 
 
 def significant_level(
-    x: pd.Series, alpha: float = 0.05, correction: str | None = None
+    x: pd.Series[float], alpha: float = 0.05, correction: str | None = None
 ) -> npt.NDArray[np.bool_]:
     """
     Apply FDR correction to a pandas.Series p-value object.
@@ -92,7 +101,7 @@ def significant_level(
 
     method : None or str
         Default as None for no multiple comparison
-        Mutiple comparison methods.
+        Multiple comparison methods.
         See statsmodels.stats.multitest.multipletests
 
     Returns

diff --git a/wonkyconn/logger.py b/wonkyconn/logger.py
@@ -18,7 +18,7 @@ def gc_logger(log_level: str = "INFO") -> logging.Logger:
         handlers=[RichHandler()],
     )
 
-    return logging.getLogger("giga_connectome")
+    return logging.getLogger("wonkyconn")
 
 
 gc_log = gc_logger()

diff --git a/wonkyconn/run.py b/wonkyconn/run.py
@@ -12,7 +12,8 @@ def global_parser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser(
         formatter_class=argparse.RawTextHelpFormatter,
         description=(
-            "Evaluating the residual motion in fMRI connectome and visualize reports"
+            "Evaluating the residual motion in fMRI connectome and visualize "
+            "reports per segmentation per denoising strategy."
         ),
     )
 
@@ -41,13 +42,19 @@ def global_parser() -> argparse.ArgumentParser:
         "--group-by",
         type=str,
         nargs="+",
-        default=["seg"],
-        help="Select which tags to group the connectivity matrices by. Default is `seg`.",
+        help=(
+            "Select which entities to group the connectivity matrices by "
+            "(e.g. `sub`, `ses`, `task`)."
+        ),
+        required=False,
     )
     parser.add_argument(
         "--phenotypes",
         type=str,
-        help="Path to the phenotype file that has the columns `participant_id`, `gender` coded as `M` and `F` and `age` in years.",
+        help=(
+            "Path to the phenotype file that has the columns `participant_id`,"
+            " `gender` coded as `M` and `F` and `age` in years."
+        ),
         required=True,
     )
     parser.add_argument(
@@ -57,10 +64,15 @@ def global_parser() -> argparse.ArgumentParser:
         action="append",
         metavar=("SEG", "ATLAS"),
         default=list(),
-        help="Specify the atlas file to use for a segmentation label in the data",
+        help=(
+            "Specify the atlas file to use for a segmentation label in the "
+            "data"
+        ),
     )
 
-    parser.add_argument("-v", "--version", action="version", version=__version__)
+    parser.add_argument(
+        "-v", "--version", action="version", version=__version__
+    )
     parser.add_argument("--debug", action="store_true", default=False)
     parser.add_argument(
         "--verbosity",

diff --git a/wonkyconn/tests/test_cli.py b/wonkyconn/tests/test_cli.py
@@ -35,10 +35,7 @@ def test_help(capsys):
     except SystemExit:
         pass
     captured = capsys.readouterr()
-    assert (
-        "Evaluating the residual motion in fMRI connectome and visualize reports"
-        in captured.out
-    )
+    assert "fMRI connectome and visualize reports" in captured.out
 
 
 def _copy_file(path: Path, new_path: Path, sub: str) -> None:
@@ -49,7 +46,9 @@ def _copy_file(path: Path, new_path: Path, sub: str) -> None:
         relmat = pd.read_csv(path, sep="\t")
         (n,) = set(relmat.shape)
 
-        array = scipy.spatial.distance.squareform(relmat.to_numpy() - np.eye(n))
+        array = scipy.spatial.distance.squareform(
+            relmat.to_numpy() - np.eye(n)
+        )
         np.random.shuffle(array)
 
         new_array = scipy.spatial.distance.squareform(array) + np.eye(n)
@@ -115,9 +114,6 @@ def test_smoke(tmp_path: Path):
     argv = [
         "--phenotypes",
         str(phenotypes_path),
-        "--group-by",
-        "seg",
-        "desc",
         *seg_to_atlas_args,
         str(bids_dir),
         str(output_dir),

diff --git a/wonkyconn/visualization/plot.py b/wonkyconn/visualization/plot.py
@@ -13,7 +13,7 @@
 matplotlib.rcParams["font.family"] = "DejaVu Sans"
 
 
-def _make_group_label(group_by: list[str], values: pd.Series) -> str:
+def _make_group_label(group_by: list[str], values: pd.Series[str]) -> str:
     label: str = ""
     for a, b in zip(group_by, values, strict=True):
         if label:
@@ -22,17 +22,23 @@ def _make_group_label(group_by: list[str], values: pd.Series) -> str:
     return label
 
 
-def plot(result_frame: pd.DataFrame, group_by: list[str], output_dir: Path) -> None:
+def plot(
+    result_frame: pd.DataFrame, group_by: list[str], output_dir: Path
+) -> None:
     """
     Plot all three metrics based on the given result data frame.
 
     Args:
-        result_frame (pd.DataFrame): The DataFrame containing the the columns "median_absolute_qcfc",
-            "percentage_significant_qcfc", "distance_dependence", "confound_regression_percentage",
-            "motion_scrubbing_percentage", and "nonsteady_states_detector_percentage", and the
-            columns in the `group_by` variable.
-        group_by (list[str]): The list of columns that the results are grouped by.
-        output_dir (Path): The directory to save the plot image into as "metrics.png".
+        result_frame (pd.DataFrame): The DataFrame containing the the columns
+            "median_absolute_qcfc", "percentage_significant_qcfc",
+            "distance_dependence", "confound_regression_percentage",
+            "motion_scrubbing_percentage", and
+            "nonsteady_states_detector_percentage", and the columns in the
+            `group_by` variable.
+        group_by (list[str]): The list of columns that the results are
+            grouped by.
+        output_dir (Path): The directory to save the plot image into as
+            "metrics.png".
 
     Returns:
         None
@@ -60,7 +66,9 @@ def plot(result_frame: pd.DataFrame, group_by: list[str], output_dir: Path) -> N
         color=palette[0],
         ax=median_absolute_qcfc_axes,
     )
-    median_absolute_qcfc_axes.set_title("Median absolute value of QC-FC correlations")
+    median_absolute_qcfc_axes.set_title(
+        "Median absolute value of QC-FC correlations"
+    )
     median_absolute_qcfc_axes.set_xlabel("Median absolute value")
     median_absolute_qcfc_axes.set_ylabel("Group")
 
@@ -93,7 +101,7 @@ def plot(result_frame: pd.DataFrame, group_by: list[str], output_dir: Path) -> N
 
 def plot_degrees_of_freedom_loss(
     result_frame: pd.DataFrame,
-    group_labels: pd.Series,
+    group_labels: pd.Series[str],
     degrees_of_freedom_loss_axes: Axes,
     legend_axes: Axes,
 ) -> None:
@@ -116,9 +124,18 @@ def plot_degrees_of_freedom_loss(
         color=colors[2],
         ax=degrees_of_freedom_loss_axes,
     )
-    degrees_of_freedom_loss_axes.set_title("Percentage of degrees of freedom lost")
+    degrees_of_freedom_loss_axes.set_title(
+        "Percentage of degrees of freedom lost"
+    )
     degrees_of_freedom_loss_axes.set_xlabel("Percentage %")
-    labels = ["Confounds regression", "Motion scrubbing", "Non-steady states detector"]
-    handles = [mpatches.Patch(color=c, label=label) for c, label in zip(colors, labels)]
+    labels = [
+        "Confounds regression",
+        "Motion scrubbing",
+        "Non-steady states detector",
+    ]
+    handles = [
+        mpatches.Patch(color=c, label=label)
+        for c, label in zip(colors, labels)
+    ]
     legend_axes.legend(handles=handles)
     legend_axes.axis("off")