If you have a question or issue with MNE-Python, please include the following info:
- MNE version: 1.9.0
- operating system: Kaggle Notebook
def preprocess_eeg_data(raw_data):
"""
Preprocess EEG data with manual annotation of bad segments and robust ICA settings.
"""
try:
# Create a copy of the raw data
raw_data = raw_data.copy()
# Select EEG channels
picks = mne.pick_types(raw_data.info, eeg=True, exclude='bads')
n_channels = len(picks)
# Apply filters
raw_data.filter(l_freq=1.0, h_freq=None, method='fir', phase='zero-double')
raw_data.filter(l_freq=None, h_freq=45.0, method='fir', phase='zero-double')
raw_data.notch_filter([50, 60], method='fir', phase='zero-double')
# Debug: Check data quality
data = raw_data.get_data(picks=picks)
print(f"Max amplitude: {np.max(np.abs(data))} µV")
print(f"Min amplitude: {np.min(np.abs(data))} µV")
# Manually inspect and annotate bad segments
print("Manually inspect and annotate bad segments...")
raw_data.plot(block=True, scalings=dict(eeg=100e-6)) # Adjust scalings as needed
# Print annotated bad segments
if raw_data.annotations:
print(f"Annotated bad segments: {raw_data.annotations.description}")
else:
print("No bad segments annotated.")
try:
# More conservative ICA approach
# Calculate optimal number of components based on explained variance
from sklearn.decomposition import PCA
pca = PCA(n_components=0.999) # Keep 99.9% of variance
pca.fit(data.T)
n_components = min(5, sum(pca.explained_variance_ratio_.cumsum() < 0.999))
# Apply ICA with conservative settings
ica = ICA(
n_components=n_components, # Use calculated number of components
random_state=97,
method='infomax', # More stable than fastica
fit_params=dict(extended=True), # Extended infomax for better stability
max_iter=1000
)
# Fit ICA with more robust parameters
ica.fit(
raw_data,
picks=picks,
decim=3,
reject=dict(eeg=100000e-6), # Increased rejection threshold (100,000 µV)
reject_by_annotation=True,
verbose=False
)
# Find EOG artifacts more conservatively
eog_indices, scores = ica.find_bads_eog(raw_data, threshold=3.5)
if eog_indices:
ica.exclude.extend(eog_indices[:2]) # Limit to max 2 components
# Apply ICA
ica.apply(raw_data)
except Exception as e:
print(f"ICA processing skipped: {str(e)}")
# Re-reference to average and apply projection
raw_data.set_eeg_reference('average', projection=True)
raw_data.apply_proj() # Apply the average reference projection
# Ensure data is clean
data = raw_data.get_data()
data[np.isnan(data) | np.isinf(data)] = 0
raw_data._data = data
return raw_data
except Exception as e:
print(f"Error in preprocessing: {str(e)}")
raise
Output I’m receiving:
Max amplitude: 65366.45013135544 µV
24.1s 115 Min amplitude: 2.6645352591003757e-15 µV
24.1s 116 Manually inspect and annotate bad segments...
24.1s 117 Using matplotlib as 2D backend.
25.1s 118 No bad segments annotated.
25.3s 119 ICA processing skipped: No clean segment found. Please consider updating your rejection thresholds.
25.3s 120 EEG channel type selected for re-referencing
25.3s 121 Adding average EEG reference projection.
25.3s 122 1 projection items deactivated
25.3s 123 Average reference projection was added, but has not been applied yet. Use the apply_proj method to apply it.