Hi, I am new to data preprocessing, and lately, I have applied an āautomaticā artifact removal with ICA. I have over 400 EEG files that need to be preprocessed, so I couldnāt do all of them manually. Instead, I have applied the code below. However, since I am new to data processing, I cannot ensure that I have applied ICA correctly. How can I ensure that the artifact is removed successfully? When I have plotted raw and processed data to compare in few of the files I have noticed changes in Fp1, Fp2, P2, P5 and FT9 (all of them are frontal channels).
# %% Imports
import mne
import numpy as np
import os
import matplotlib.pyplot as plt
# Define the directory with your EEG files
data_directory = "/software/dataset/chb01"
output_directory = "/software/dataset/chb01/filtered" # Where corrected files will be saved
# Mapping for renaming channels (modify as needed based on your data)
ch_mapping = {
'FP1-F7': 'Fp1', 'F7-T7': 'F7', 'T7-P7': 'T7', 'P7-O1': 'P7',
'F3-C3': 'F3', 'C3-P3': 'C3', 'P3-O1': 'P3',
'FP2-F4': 'Fp2', 'F4-C4': 'F4', 'C4-P4': 'C4', 'P4-O2': 'P4',
'FZ-CZ': 'Fz', 'CZ-PZ': 'Cz', 'FP2-F8': 'P5', 'F8-T8': 'F8',
'T8-P8-0': 'T8', 'P8-O2': 'P8', 'FP1-F3': 'P2', 'P7-T7': 'P9',
'T7-FT9': 'T9', 'FT9-FT10': 'FT9', 'FT10-T8': 'FT10', 'T8-P8-1': 'TP8'
}
# Define function to automate ICA and artifact removal
def process_file(file_path, ch_mapping, output_directory):
print(f"Processing {file_path}...")
# Load the EEG data
eeg = mne.io.read_raw_edf(file_path, preload=True)
# Rename channels and set montage
eeg.rename_channels(ch_mapping)
eeg.set_montage("standard_1020", on_missing="ignore")
# Bandpass filter
eeg.filter(l_freq=1, h_freq=None)
# Apply ICA
ica = mne.preprocessing.ICA(
method="picard",
fit_params={"extended": True, "ortho": False},
random_state=1
)
ica.fit(eeg)
# Automate artifact detection by correlation with frontal channels
# Here, 'Fp1' and 'Fp2' are assumed to be the frontal channels; adjust if needed
frontal_ch_names = ['Fp1', 'Fp2']
artifact_components = []
component_activations = ica.get_sources(eeg).get_data() # Get time series of ICA components
for i in range(ica.n_components_):
component_data = component_activations[i] # Time series for component i
for ch in frontal_ch_names:
if ch in eeg.ch_names:
ch_data = eeg.copy().pick_channels([ch]).get_data()[0]
# Calculate correlation between the component time series and the channel
corr = np.corrcoef(component_data, ch_data)[0, 1]
if abs(corr) > 0.4: # Threshold for correlation; adjust based on testing
artifact_components.append(i)
break
# Set identified artifact components for exclusion
ica.exclude = artifact_components
print(f"Excluding components: {artifact_components}")
# Apply ICA to remove artifacts
eeg_corrected = eeg.copy()
ica.apply(eeg_corrected)
# Fix meas_date issue by setting it to None or a valid date
eeg_corrected.set_meas_date(None) # or you can set it to a specific valid date, e.g., datetime(2000, 1, 1)
# Save corrected EEG data
output_path = os.path.join(output_directory, os.path.basename(file_path).replace('.edf', '_corrected.fif'))
eeg_corrected.save(output_path, overwrite=True)
print(f"Saved corrected file to {output_path}\n")
# Process each file in the directory
for filename in os.listdir(data_directory):
if filename.endswith(".edf"):
file_path = os.path.join(data_directory, filename)
process_file(file_path, ch_mapping, output_directory)