forked from LeonDLotter/MAsync
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fsn.py
265 lines (232 loc) · 12.5 KB
/
fsn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Dec 18 12:23:24 2021
@author: Leon D. Lotter
"""
import logging
lgr = logging.getLogger(__name__)
lgr.setLevel(logging.INFO)
import os
import numpy as np
import pandas as pd
import nimare as ni
import nilearn as nl
from .ale import ale
def fsn(ds, cl_true, template=None, fsn_range=None, max_fsn_it=None,
quadrant=True, n_subj_null=None, n_coord_null=None, save_dir=None,
pref='fsn_', n_perm=1000, v_thr=0.001, cluster='mass',
seed=None, plot_gb=False):
"""
Computes the 'Fail-Safe-N' (FSN) for ALE results by determining the maximum
number of noise studies that can be added to the analysis before the
cluster at hand is no longer significant. Python implementation of the
method proposed by Acar et al. (2018). To exclude the possibility that
noise studies contribute to the 'true' cluster, all noise foci are sampled
outside of the brain quadrant where the cluster's center of mass is located.
Reference: https://doi.org/10.1371/journal.pone.0208177
Github - Noise study generation: https://github.com/NeuroStat/GenerateNull
Github - FSN: https://github.com/NeuroStat/FailSafeN
Input:
ds = 'true' NiMARE dataset from which the original ALE was computed
cl_true = list of binarized cluster volumes (nifti object or path)
template = binarized volume to determine possible random foci coordinates
from. Defaults to MNI152 grey matter probability map thresholded at 0.2
fsn_range = minimum and maximum number of noise studies to start the
FSN-finding process. Defaults to [length(true ds), length(true ds)*10]
max_fsn_it = maximum number of iterations to find exact FSN. Defaults
to 10. Final maximum number of ALE runs = 2 + max_fsn_it
quadrant = if true, samples noise coordinates only in the brain quadrant
where the current cluster's center of mass is not located
n_subj_null, n_coord_null = lists of sample sizes and numbers of foci
to use in noise studies. By default, samples from true ds
save_dir, pref, n_perm, v_thr, cluster = ALE settings, see 'ale.py'
seed = starting seed for sampling procedures
plot_gb = if True, plots glass brain for each ALE.
Output:
list of NiMARE results of final ALEs for each cluster, table with FSN
and NiMARE noise dataset with n = fsn_range[1] noise studies.
"""
# Set variables ----------------------------------------------------------
ds.coordinates['space'] = 'MNI' # no effect but avoids warning when merging datasets
# set save directory
if save_dir is None:
save_dir = os.getcwd()
# set randum number generator
if seed is None:
rng = np.random.default_rng()
else:
rng = np.random.default_rng(seed)
# set minimum and maximum number noise studies to start FSN finding process
if fsn_range is None:
m = len(ds.ids)
M = len(ds.ids) * 10
else:
m = fsn_range[0]
M = fsn_range[1]
# set maximum number of iterations to determine FSN
if max_fsn_it is None:
max_fsn_it = 10
# get sample sizes for noise studies from 'true' dataset
if n_subj_null is None:
n_subj_true = ds.get_metadata(field='sample_sizes')
n_subj_null = rng.choice(n_subj_true, M)
# get numbers of foci per study for noise studies from 'true' dataset
if n_coord_null is None:
n_coord_true = [len(ds.coordinates.x[ds.coordinates.id==iD])
for iD in ds.ids]
n_coord_null = rng.choice(n_coord_true, M)
# Template ---------------------------------------------------------------
# If no template given, fetch MNI152 GM mask and resample to cl_true[0] space
# produces warning: "pixdim[0] (qfac) should be 1 or -1; setting qfac to 1",
# the result is okay, however.
if template is None:
template_1mm = nl.datasets.fetch_icbm152_brain_gm_mask(threshold=0.2)
template = nl.image.resample_to_img(template_1mm, cl_true[0],
interpolation='nearest')
# Get possible MNI coordinates for noise studies = all voxels in template
dat = template.get_fdata() # get matrix
x, y, z = np.where(dat == 1.0) # get GM coordinates
affine = template.affine # get affine matrix for MNI transform
# get mni coordinates
mni_all = nl.image.coord_transform(x=x, y=y, z=z, affine=affine)
mni_all = np.array(mni_all).T
# FSN --------------------------------------------------------------------
# define function to run ALE and return whether a significant cluster within
# the location of the 'true' cluster was found
def fsn_ale(ds_add, cl, prefix, title):
# run ale
res, _, mask = ale(data=ds_add, work_dir=save_dir, pref=prefix,
cluster=cluster, vox_thr=v_thr, n_perm=n_perm,
save_cl_info=False,
print_glassbrain=plot_gb, glassbrain_title=title)
if mask is not None: # if 'run_ale' found any significant cluster
# intersect significant result with 'true' cluster
intersect_cl = nl.image.math_img('img1 * img2', img1=cl, img2=mask)
# check whether any above-zero voxels exist within 'true' cluster
check_cl = np.sum(intersect_cl.get_fdata()) > 0
elif mask is None: # if 'run_ale' found no significant cluster at all
check_cl = False
return(res, check_cl)
# initialize lists to store results
fsn_tab = list()
fsn_ress = list()
# loop over clusters
for cl_i, cl in enumerate(cl_true, start=1):
lgr.info(f'Cluster {cl_i}:\n') # print cluster number
cl_pref = pref+'cl'+str(cl_i)+'_' # prefix to store ALE results
# 1. Generate noise studies ------------------------------------------
# Make sure that noise coordinates do not overlap with the target
# cluster by determining a "nogo" brain quadrant
if quadrant == True:
lgr.info(f'Generating noise studies while excluding cluster brain quadrant.')
# Get center of mass of cluster
ctr = nl.plotting.find_parcellation_cut_coords(cl)[0]
# Get MNI coordinates we can sample from by removing coordinates
# belonging to the 'nogo' brain quadrant where 'ctr' is placed in
q = [ (1 if c > 0 else -1) for c in ctr ]
mni_go = mni_all[(mni_all[:,0] * q[0] < 0) |
(mni_all[:,1] * q[1] < 0) |
(mni_all[:,2] * q[2] < 0), :]
else:
lgr.info(f'Generating noise studies while including all brain quadrants.')
mni_go = mni_all
# Generate noise dataset
ds_null_dict = {} # empty dict to store noise study-wise results
for i in range(0, M):
# re-initiate RNG with different seed in every loop, just in case
rng = np.random.default_rng(seed+i)
study_id = 'NullExp_' + str(f'{i+1:03}') # study id
coords_list = rng.choice(mni_go, n_coord_null[i]) # pick random foci
coords = np.vstack(coords_list) # make coordinates array
# write into dict as required by NiMARE
ds_null_dict[study_id] = {
'contrasts': { '1': {
'coords': {
'space': 'MNI',
'x': list(coords[:, 0]),
'y': list(coords[:, 1]),
'z': list(coords[:, 2]),
},
'metadata': {
'sample_sizes': n_subj_null[i]
}}}}
# convert dict to NiMARE dataset
ds_null = ni.dataset.Dataset(ds_null_dict)
ds_null.coordinates['space'] = 'MNI' # avoids error when merging datasets
ids_null = ds_null.ids # get all noise study IDs
# 2. FSN algorithm ---------------------------------------------------
# Step 1: run ale with minimum 'm' noise studies added ---------------
lgr.info(f'FSN step 1: Add minimum {m} noise studies.\n')
# add first m noise studies to ds
ds_add = ds.merge(ds_null.slice(ids_null[:m]))
fsn_res, check_cl = fsn_ale(ds_add, cl, cl_pref,
f'FSN, {m} noise studies') # run
if check_cl == False:
lgr.info('FSN finished: Cluster not significant, '
'indicative of low robustness.\n')
m_str = '< '+str(m) # true FSN is below minimum
fsn_tab.append(['cl' + str(cl_i), m_str, m_str, m_str]) # save
elif check_cl == True:
lgr.info('Cluster significant, proceed.\n')
# Step 2: run ale with maximum 'M' noise studies -----------------
lgr.info(f'FSN step 2: Add maximum {M} noise studies.\n')
# add M noise studies to ds
ds_add = ds.merge(ds_null.slice(ids_null[:M]))
fsn_res, check_cl = fsn_ale(ds_add, cl, cl_pref,
f'FSN, {M} noise studies') # run
if check_cl == True:
lgr.info('FSN finished: Cluster still significant, '
'indicative of highly influencial studies!\n')
M_str = '> '+str(M) # true FSN is above maximum
fsn_tab.append(['cl'+str(cl_i), M_str, M_str, M_str]) # save
elif check_cl == False:
lgr.info('Cluster not significant, proceed.\n')
# Step 3 to n: run ale with N noise studies ------------------
m_ = m
M_ = M
N = int((m_ + M_) / 2) # start with average of m and M,
# cut to the lower integer
for ii in range(0, max_fsn_it):
lgr.info(f'FSN step 3.{ii}: Add {N} noise studies.\n')
# add N noise studies to ds
ds_add = ds.merge(ds_null.slice(ids_null[:N]))
fsn_res, check_cl = fsn_ale(ds_add, cl, cl_pref,
f'FSN, {N} noise studies') # run
if check_cl == False:
lgr.info('Cluster not significant.')
m_ = m_ # minimum remains the same
M_ = N # last N is our new maximum
N = int((N + m_) / 2) # new N is mean of min and old N
elif check_cl == True:
lgr.info('Cluster significant.')
m_ = N # last N is our new minimum
M_ = M_ # maximum remains the same
N = int((N + M_) / 2) # new N is mean of old N and max
lgr.info(f'New min FSN = {m_}, new max FSN = {M_}, '
f'new N = {N}.')
# loop out if FSN exactly determined:
# case 1: N==m_==M__
# case 2: m_==n & M_==n+1
# m_==n -> significant cluster,
# M_==n+1 -> no significant cluster, thus FSN==m_
if (m_ == M_) or (M_ == m_+1):
lgr.info(f'Looping out after {ii+1} iterations, '
'FSN exactly determined for the given set of '
'noise studies.\n')
M_ = m_
break
# Finished: Show and store results ---------------------------
# If enough iterations where performed, FSN, min and max are
# the same. If not enough iterations, the true FSN lies
# between min and max.
lgr.info(f'Cluster {cl_i}: FSN finished: FSN (~)= {N} '
f'(min = {m_}, max = {M_})\n')
fsn_tab.append(['cl'+str(cl_i), N, m_, M_]) # save FSN values
# store ALE results
fsn_ress.append(fsn_res) # save last ALE result
# write list to dataframe
fsn_tab = pd.DataFrame(fsn_tab, columns=['cluster', 'appr_fsn',
'min_fsn', 'max_fsn'])
# return
return(fsn_tab, fsn_ress, ds_null)