Understanding application of the ICA

Hi, I am new to data preprocessing, and lately, I have applied an ā€˜automaticā€™ artifact removal with ICA. I have over 400 EEG files that need to be preprocessed, so I couldnā€™t do all of them manually. Instead, I have applied the code below. However, since I am new to data processing, I cannot ensure that I have applied ICA correctly. How can I ensure that the artifact is removed successfully? When I have plotted raw and processed data to compare in few of the files I have noticed changes in Fp1, Fp2, P2, P5 and FT9 (all of them are frontal channels).

# %% Imports
import mne
import numpy as np
import os
import matplotlib.pyplot as plt

# Define the directory with your EEG files
data_directory = "/software/dataset/chb01"
output_directory = "/software/dataset/chb01/filtered"  # Where corrected files will be saved

# Mapping for renaming channels (modify as needed based on your data)
ch_mapping = {
    'FP1-F7': 'Fp1', 'F7-T7': 'F7', 'T7-P7': 'T7', 'P7-O1': 'P7', 
    'F3-C3': 'F3', 'C3-P3': 'C3', 'P3-O1': 'P3', 
    'FP2-F4': 'Fp2', 'F4-C4': 'F4', 'C4-P4': 'C4', 'P4-O2': 'P4', 
    'FZ-CZ': 'Fz', 'CZ-PZ': 'Cz', 'FP2-F8': 'P5', 'F8-T8': 'F8', 
    'T8-P8-0': 'T8', 'P8-O2': 'P8', 'FP1-F3': 'P2', 'P7-T7': 'P9',
    'T7-FT9': 'T9', 'FT9-FT10': 'FT9', 'FT10-T8': 'FT10', 'T8-P8-1': 'TP8'
}

# Define function to automate ICA and artifact removal
def process_file(file_path, ch_mapping, output_directory):
    print(f"Processing {file_path}...")
    
    # Load the EEG data
    eeg = mne.io.read_raw_edf(file_path, preload=True)
    
    # Rename channels and set montage
    eeg.rename_channels(ch_mapping)
    eeg.set_montage("standard_1020", on_missing="ignore")

    # Bandpass filter
    eeg.filter(l_freq=1, h_freq=None)

    # Apply ICA
    ica = mne.preprocessing.ICA(
        method="picard",
        fit_params={"extended": True, "ortho": False},
        random_state=1
    )
    ica.fit(eeg)

    # Automate artifact detection by correlation with frontal channels
    # Here, 'Fp1' and 'Fp2' are assumed to be the frontal channels; adjust if needed
    frontal_ch_names = ['Fp1', 'Fp2']
    artifact_components = []
    component_activations = ica.get_sources(eeg).get_data()  # Get time series of ICA components

    for i in range(ica.n_components_):
        component_data = component_activations[i]  # Time series for component i
        for ch in frontal_ch_names:
            if ch in eeg.ch_names:
                ch_data = eeg.copy().pick_channels([ch]).get_data()[0]
                # Calculate correlation between the component time series and the channel
                corr = np.corrcoef(component_data, ch_data)[0, 1]
                if abs(corr) > 0.4:  # Threshold for correlation; adjust based on testing
                    artifact_components.append(i)
                    break

    # Set identified artifact components for exclusion
    ica.exclude = artifact_components
    print(f"Excluding components: {artifact_components}")

    # Apply ICA to remove artifacts
    eeg_corrected = eeg.copy()
    ica.apply(eeg_corrected)

    # Fix meas_date issue by setting it to None or a valid date
    eeg_corrected.set_meas_date(None)  # or you can set it to a specific valid date, e.g., datetime(2000, 1, 1)


    # Save corrected EEG data
    output_path = os.path.join(output_directory, os.path.basename(file_path).replace('.edf', '_corrected.fif'))
    eeg_corrected.save(output_path, overwrite=True)
    print(f"Saved corrected file to {output_path}\n")

# Process each file in the directory
for filename in os.listdir(data_directory):
    if filename.endswith(".edf"):
        file_path = os.path.join(data_directory, filename)
        process_file(file_path, ch_mapping, output_directory)

A post was merged into an existing topic: Artifact Removal from EEG Data using ICA

Please do not create new posts on the same topic (Iā€™ve moved this post to the existing discussion).

1 Like