I am trying to do decoding on a time-frequency matrix (401 time points, 39 frequencies, 102 sensors), with two experimental conditions, and use bootstrapped decoding scores with shuffled labels to test for significance (code snippet below).
One instance of computing the scores (cross_val_multiscore) takes about 20s to run, and if I multiply this with 39 (frequencies) and 50 (bootstrap repetitions), that is almost 11h for a single subject.
Based on the snippets below, do you have recommendations on how to improve the code?
Thanks!
import numpy as np
import mne
from mne.decoding import SlidingEstimator, cross_val_multiscore
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
n_splits = 10
n_jobs = 40
n_boot = 50
random_state = 42
# starting from a previously loaded epochsTFR called power, cropped to the time window of interest, and picked for magnetometers
# X are data matrices of trials x sensors x frequency x time
# get data from both conditions
X1 = power[condition1].data
X2 = power[condition2].data
# equalize event counts
n_min = np.min([X1.shape[0], X2.shape[0]])
X1 = X1[0:n_min, :]
X2 = X2[0:n_min, :]
X = np.concatenate([X1, X2])
# labels
y = np.r_[np.ones(n_min), np.zeros(n_min)]
se = SlidingEstimator(make_pipeline(StandardScaler(), LogisticRegression(solver='liblinear',
random_state=random_state)), scoring=βroc_aucβ,
n_jobs=config.n_jobs)
cv = StratifiedKFold(random_state=random_state, n_splits=n_splits, shuffle=True)
SCORES = np.zeros((n_splits, X.shape[-2], X.shape[-1]))
SCORES.fill(np.nan)
SCORES_BS = np.zeros((n_boot, n_splits, X.shape[-2], X.shape[-1]))
SCORES_BS.fill(np.nan)
# loop over frequencies and compute decoding
for i_freq, freq in enumerate(power.freqs):
X_freq = X[:, :, i_freq, :]
scores = cross_val_multiscore(se, X=X_freq, y=y, cv=cv, n_jobs=n_jobs)
SCORES[:, i_freq, :] = scores
# shuffle labels and recompute decoding scores
for i in np.arange(0, n_shuffle):
y_shuffle = y
np.random.shuffle(y_shuffle)
scores_BS = cross_val_multiscore(se, X=Xfreq, y=y_shuffle, cv=cv, n_jobs=40)
SCORES[i, :, i_freq, :] = scores_BS