I’m working through this exercise:
Fetch 50 subjects from the Physionet database and run a 5-fold cross-validation leaving each time 10 subjects out in the test set.
Easy-peasy, right?
I can’t seem to figure out how to go from a bunch of datasets to a bunch of epochs, then use train_ndx, test_ndx from skf.split() to select from those epochs and call pipe.fit() with them.
In the code below, the problem is that X_train, y_train are lists of Epochs, and (naturally) don’t have a compute_psd() method.
I would greatly appreciate any suggestions!
pipe = make_pipeline(
FunctionTransformer(eeg_power_band, validate=False),
RandomForestClassifier(n_estimators=100, random_state=42),
)
from sklearn.model_selection import StratifiedKFold
# create lists of epochs and corresponding events
X = []
y = []
skf = StratifiedKFold(n_splits=5)
for ndx in range(len(epochs)):
epochs[ndx].drop_bad()
for ep_ndx in range(len(epochs[ndx])):
X.append(epochs[ndx][ep_ndx])
y.append(epochs[ndx][ep_ndx].events[:, 2][0])
# skf.split() gives us indices into the lists to create stratified train & test epochs
for train_ndx, test_ndx in skf.split(np.zeros(len(y)), y):
X_train = []
y_train = []
for ndx in train_ndx:
X_train.append(X[ndx])
y_train.append(y[ndx])
X_test = []
y_test = []
for ndx in test_ndx:
X_test.append(X[ndx])
y_test.append(y[ndx])
# fit the training data (errors here)
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy score: {acc}")