CoBrALab · Gab-D-G · Sep 26, 2023 · Sep 21, 2023 · Sep 22, 2023 · Sep 22, 2023
diff --git a/rabies/analysis_pkg/main_wf.py b/rabies/analysis_pkg/main_wf.py
@@ -17,8 +17,18 @@ def init_main_analysis_wf(preprocess_opts, cr_opts, analysis_opts):
 
     split_dict, split_name_list, target_list = read_confound_workflow(conf_output, nativespace=cr_opts.nativespace_analysis)
 
-    # update split_name according to the --scan_list option
-    split_name_list = get_iterable_scan_list(analysis_opts.scan_list, split_name_list)
+    if len(split_name_list)==0:
+        raise ValueError(f"""
+            No outputs were founds from the confound correction stage. 
+            All scans may have been removed for not meeting the minimum_timepoint threshold 
+            when applying --frame_censoring. Outputs will be named empty.nii.gz if this is 
+            the case.
+            """)
+
+    # filter inclusion/exclusion lists
+    from rabies.utils import filter_scan_inclusion, filter_scan_exclusion
+    split_name_list = filter_scan_inclusion(analysis_opts.inclusion_ids, split_name_list)
+    split_name_list = filter_scan_exclusion(analysis_opts.exclusion_ids, split_name_list)
 
     # setting up iterables from the BOLD scan splits
     main_split = pe.Node(niu.IdentityInterface(fields=['split_name']),
@@ -310,6 +320,8 @@ def load_sub_input_dict(maps_dict, bold_file, CR_data_dict, VE_file, STD_file, C
 
 
 def read_confound_workflow(conf_output, nativespace=False):
+    from nipype import logging
+    log = logging.getLogger('nipype.workflow')
 
     conf_workflow_file = f'{conf_output}/rabies_confound_correction_workflow.pkl'
 
@@ -358,15 +370,26 @@ def read_confound_workflow(conf_output, nativespace=False):
 
     # don't include scans that were removed during confound correction
     corrected_split_name=[]
+    remove_list = []
     import pathlib
     for name in split_name:
         filename = pathlib.Path(split_dict[name]['cleaned_path']).name
         if 'empty' in filename:
+            remove_list.append(name)
             del split_dict[name]
         else:
             corrected_split_name.append(name)
     split_name = corrected_split_name
 
+    if len(remove_list)>0:
+        scan_list_str = ''
+        for name in remove_list:
+            scan_list_str += f'\n        - {name}'
+        log.warning(f"""
+        The following scans were not included for analysis as the file was empty: {scan_list_str}
+        This is likely due to not meeting the minimum_timepoints threshold from --frame_censoring.
+                    """)
+
     return split_dict, split_name, target_list
 
 

diff --git a/rabies/confound_correction_pkg/main_wf.py b/rabies/confound_correction_pkg/main_wf.py
@@ -23,6 +23,11 @@ def init_main_confound_correction_wf(preprocess_opts, cr_opts):
     else:
         split_dict, split_name, target_list = read_preproc_workflow(preproc_output, nativespace=cr_opts.nativespace_analysis)
 
+    # filter inclusion/exclusion lists
+    from rabies.utils import filter_scan_inclusion, filter_scan_exclusion
+    split_name = filter_scan_inclusion(cr_opts.inclusion_ids, split_name)
+    split_name = filter_scan_exclusion(cr_opts.exclusion_ids, split_name)
+
     # setting up iterables from the BOLD scan splits
     main_split = pe.Node(niu.IdentityInterface(fields=['split_name']),
                          name="main_split")

diff --git a/rabies/parser.py b/rabies/parser.py
@@ -80,6 +80,33 @@ def get_parser():
         description=
             "Options for parallel execution and memory management."
         )
+    g_execution.add_argument(
+        '--inclusion_ids', type=str,
+        nargs="*",  # 0 or more values expected => creates a list
+        default=['all'],
+        help=
+            "Define a list of BOLD scan to include, i.e. run the pipeline on a subset of the data. \n"
+            "To do so, provide the full path to the corresponding BOLD file in the input BIDS folder. The list \n"
+            "of scan can be specified manually as a list of file name '--scan_list scan1.nii.gz \n"
+            "scan2.nii.gz ...' or the files can be imbedded into a .txt file with one filename per row.\n"
+            "By default, 'all' the scans found in the input BIDS directory or from the previous \n"
+            "processing step. This can be provided at any processing stage.\n"
+            "***NOTE: do not enter this parameter right before the processing stage (preprocess, etc...), this will cause \n"
+            "parsing errors. Instead, provide another parameter after --inclusion_ids (e.g. --verbose or -p). \n"
+            "(default: %(default)s)\n"
+            "\n"
+        )
+    g_execution.add_argument(
+        '--exclusion_ids', type=str,
+        nargs="*",  # 0 or more values expected => creates a list
+        default=['none'],
+        help=
+            "Instead of providing a list of scans to include, this argument provides a list of scans to exclude (while \n"
+            "keeping all other scans). This argument follows the same syntax rules as --includion_ids. --exclusion_ids \n"
+            "and --inclusion_ids cannot be used simultaneously. \n"
+            "(default: %(default)s)\n"
+            "\n"
+        )
     g_execution.add_argument(
         "-p", "--plugin", default='Linear',
         choices=['Linear', 'MultiProc', 'SGE', 'SGEGraph',
@@ -129,6 +156,14 @@ def get_parser():
             "(default: %(default)s)\n"
             "\n"
         )
+    g_execution.add_argument(
+        "-f", "--force", dest='force', action='store_true',
+        help=
+            "The pipeline will not stop if previous outputs are encountered. \n"
+            "Previous outputs will be overwritten.\n"
+            "(default: %(default)s)\n"
+            "\n"
+        )
 
 
     ####Preprocessing
@@ -958,8 +993,11 @@ def get_parser():
     return parser
 
 
-def read_parser(parser):
-    opts = parser.parse_args()
+def read_parser(parser, args):
+    if args is None:
+        opts = parser.parse_args()
+    else:
+        opts = parser.parse_args(args)
 
     if opts.rabies_stage == 'preprocess':
         opts.anat_inho_cor = parse_argument(opt=opts.anat_inho_cor, 

diff --git a/rabies/preprocess_pkg/main_wf.py b/rabies/preprocess_pkg/main_wf.py
@@ -135,7 +135,7 @@ def init_main_wf(data_dir_path, output_folder, opts, name='main_wf'):
     bids.config.set_option('extension_initial_dot', True)
     layout = bids.layout.BIDSLayout(data_dir_path, validate=False)
     split_name, scan_info, run_iter, scan_list, bold_scan_list = prep_bids_iter(
-        layout, opts.bold_only)
+        layout, opts.bold_only, inclusion_list=opts.inclusion_ids, exclusion_list=opts.exclusion_ids)
 
     # setting up all iterables
     main_split = pe.Node(niu.IdentityInterface(fields=['split_name', 'scan_info']),

diff --git a/rabies/preprocess_pkg/utils.py b/rabies/preprocess_pkg/utils.py
@@ -8,7 +8,7 @@
 )
 from rabies.utils import run_command
 
-def prep_bids_iter(layout, bold_only=False):
+def prep_bids_iter(layout, bold_only=False, inclusion_list=['all'], exclusion_list=['none']):
     '''
     This function takes as input a BIDSLayout, and generates iteration lists
     for managing the workflow's iterables depending on whether --bold_only is
@@ -38,6 +38,19 @@ def prep_bids_iter(layout, bold_only=False):
         raise ValueError(
             "No functional file with the suffix 'bold' were found among the BIDS directory.")
 
+    # filter inclusion/exclusion lists
+    from rabies.utils import filter_scan_inclusion, filter_scan_exclusion
+    boldname_list=[pathlib.Path(bold.filename).name.rsplit(".nii")[0] for bold in bold_bids]
+    updated_split_name = filter_scan_inclusion(inclusion_list, boldname_list)
+    updated_split_name = filter_scan_exclusion(exclusion_list, updated_split_name)
+
+    filtered_bold_bids=[]
+    for name in updated_split_name:
+        for bold in bold_bids:
+            if name in bold.filename:
+                filtered_bold_bids.append(bold)
+    bold_bids = filtered_bold_bids
+
     bold_dict = {}
     for bold in bold_bids:
         sub = bold.get_entities()['subject']

diff --git a/rabies/run_main.py b/rabies/run_main.py
@@ -11,10 +11,10 @@
     rabies_path = os.environ['HOME']+'/.local/share/rabies'
 
 
-def execute_workflow():
+def execute_workflow(args=None):
     # generates the parser CLI and execute the workflow based on specified parameters.
     parser = get_parser()
-    opts = read_parser(parser)
+    opts = read_parser(parser, args)
 
     try: # convert the output path to absolute if not already the case
         opts.output_dir = os.path.abspath(str(opts.output_dir))
@@ -40,6 +40,13 @@ def execute_workflow():
         args += input
     log.info(args)
 
+    # inclusion/exclusion list are incompatible parameters
+    if (not opts.inclusion_ids[0]=='all') and (not opts.exclusion_ids[0]=='none'):
+        raise ValueError(f"""
+           Either an inclusion list (--inclusion_ids) or exclusion list (--exclusion_ids)
+           can be provided, not both.
+           """)
+
     if opts.rabies_stage == 'preprocess':
         workflow = preprocess(opts, log)
     elif opts.rabies_stage == 'confound_correction':
@@ -72,12 +79,13 @@ def execute_workflow():
 
 def prep_logging(opts, output_folder):
     cli_file = f'{output_folder}/rabies_{opts.rabies_stage}.pkl'
-    if os.path.isfile(cli_file):
+    if os.path.isfile(cli_file) and not opts.force:
         raise ValueError(f"""
             A previous run was indicated by the presence of {cli_file}.
             This can lead to inconsistencies between previous outputs and the log files.
-            To prevent this, you are required to manually remove {cli_file}, and we 
-            recommend also removing previous datasinks from the {opts.rabies_stage} RABIES step.
+            To prevent this, we recommend removing previous datasinks from the {opts.rabies_stage} 
+            RABIES stage. To continue with your execution, the {cli_file} file must be  
+            removed (use --force to automatically do so).
             """)
 
     # remove old versions of the log if already existing

diff --git a/rabies/utils.py b/rabies/utils.py
@@ -389,6 +389,71 @@ def flatten_list(l):
         return l
 
 
+def filter_scan_exclusion(exclusion_list, split_name):
+    # the function removes a list of scan IDs from split_name
+
+    # exclusion_list: the input provided by the user
+    # split_name: a list of all scan IDs that were found
+
+    import numpy as np
+    import pandas as pd
+    if os.path.isfile(os.path.abspath(exclusion_list[0])):
+        updated_split_name=[]
+        if not '.nii' in pathlib.Path(exclusion_list[0]).name:
+            # read the file as a .txt
+            exclusion_list = np.array(pd.read_csv(os.path.abspath(exclusion_list[0]), header=None)).flatten()
+        for split in split_name:
+            exclude = False
+            for scan in exclusion_list:
+                if split in scan:
+                    exclude = True
+            if not exclude:
+                updated_split_name.append(split)
+    elif exclusion_list[0]=='none':
+        updated_split_name = split_name
+    else:
+        raise ValueError(f"The --exclusion_ids {exclusion_list} input had improper format. It must the full path to a .txt or .nii files.")
+
+    if len(updated_split_name)==0:
+        raise ValueError(f"""
+            No scans are left after scan exclusion!
+            """)
+
+    return updated_split_name
+
+
+def filter_scan_inclusion(inclusion_list, split_name):
+    # the function will update the list of scan IDs in split_name to correspond to inclusion/exclusion list
+
+    # inclusion_list: the input provided by the user
+    # split_name: a list of all scan IDs that were found
+
+    import numpy as np
+    import pandas as pd
+    if os.path.isfile(os.path.abspath(inclusion_list[0])):
+        updated_split_name=[]
+        if '.nii' in pathlib.Path(inclusion_list[0]).name:
+            for scan in inclusion_list:
+                updated_split_name.append(find_split(scan, split_name))
+        else:
+            # read the file as a .txt
+            inclusion_list = np.array(pd.read_csv(os.path.abspath(inclusion_list[0]), header=None)).flatten()
+            for scan in inclusion_list:
+                updated_split_name.append(find_split(scan, split_name))
+    elif inclusion_list[0]=='all':
+        updated_split_name = split_name
+    else:
+        raise ValueError(f"The --inclusion_ids {inclusion_list} input had improper format. It must the full path to a .txt or .nii files, or 'all' to keep all scans.")
+    return updated_split_name
+
+
+def find_split(scan, split_name):
+    for split in split_name:
+        if split in scan:
+            return split
+    raise ValueError(f"No previous file name is matching {scan}")
+
+
 ######################
 #FUNCTIONS TO READ WORKFLOW GRAPH
 ######################
@@ -437,3 +502,53 @@ def fill_node_dict(d, key_l, e):
         return d
     else:
         return e
+
+
+######################
+#DEBUGGING
+######################
+
+def generate_token_data(tmppath, number_scans):
+    # this function generates fake scans at low resolution for quick testing and debugging
+
+    os.makedirs(tmppath+'/inputs', exist_ok=True)
+
+    if 'XDG_DATA_HOME' in os.environ.keys():
+        rabies_path = os.environ['XDG_DATA_HOME']+'/rabies'
+    else:
+        rabies_path = os.environ['HOME']+'/.local/share/rabies'
+
+    template = f"{rabies_path}/DSURQE_40micron_average.nii.gz"
+    mask = f"{rabies_path}/DSURQE_40micron_mask.nii.gz"
+
+    spacing = (float(1), float(1), float(1))  # resample to 1mmx1mmx1mm
+    resampled_template = resample_image_spacing(sitk.ReadImage(template), spacing)
+    # generate template masks
+    resampled_mask = resample_image_spacing(sitk.ReadImage(mask), spacing)
+    array = sitk.GetArrayFromImage(resampled_mask)
+    array[array < 1] = 0
+    array[array > 1] = 1
+    binarized = sitk.GetImageFromArray(array, isVector=False)
+    binarized.CopyInformation(resampled_mask)
+    sitk.WriteImage(binarized, tmppath+'/inputs/token_mask.nii.gz')
+    array[:, :, :6] = 0
+    binarized = sitk.GetImageFromArray(array, isVector=False)
+    binarized.CopyInformation(resampled_mask)
+    sitk.WriteImage(binarized, tmppath+'/inputs/token_mask_half.nii.gz')
+
+    # generate fake scans from the template
+    array = sitk.GetArrayFromImage(resampled_template)
+    array_4d = np.repeat(array[np.newaxis, :, :, :], 15, axis=0)
+
+    for i in range(number_scans):
+        # generate anatomical scan
+        sitk.WriteImage(resampled_template, tmppath+f'/inputs/sub-token{i+1}_T1w.nii.gz')
+        # generate functional scan
+        array_4d_ = array_4d + np.random.normal(0, array_4d.mean()
+                                    / 100, array_4d.shape)  # add gaussian noise
+        sitk.WriteImage(sitk.GetImageFromArray(array_4d_, isVector=False),
+                        tmppath+f'/inputs/sub-token{i+1}_bold.nii.gz')
+
+        # necessary to read matrix orientation properly at the analysis stage
+        sitk.WriteImage(copyInfo_4DImage(sitk.ReadImage(tmppath+f'/inputs/sub-token{i+1}_bold.nii.gz'), sitk.ReadImage(tmppath
+                        + f'/inputs/sub-token{i+1}_T1w.nii.gz'), sitk.ReadImage(tmppath+f'/inputs/sub-token{i+1}_bold.nii.gz')), tmppath+f'/inputs/sub-token{i+1}_bold.nii.gz')