LCMV source localisation failed (frequency shift?)

:red_question_mark: If you have a question or issue with MNE-Python, please include the following info:

  • MNE version: 1.10.1
  • operating system: e.g. Windows 11

Dear List:

I have some questions regarding the source localisation using the LCMV method. I found the source localisation results include some frequency band up-shift (please see the sanity check for the occipital channels VS V1/V2 regions).

Occipital channels (sensor level)

V1+V2 (source level)

This results in some effects observed at the sensor level becoming weaker or disappearing at the source level.

My source-localisation pipeline is roughly as follows:

  • Epochs were extracted from -2.5 to 3.5 s, with the actual time window of interest being 0.5–1.5 s. The additional pre-/post-stimulus periods were included as padding for the TFR computation
  • LCMV beamforming was used for source reconstruction.
  • Noise covariance was computed from the baseline period (-0.5 to -0.1 s), whereas the data covariance was computed from 0 to 1.5 s.
  • The spatial filters were constructed using pick_ori='max-power'.
# about 63 minutes - trial level-induced-method: LCMV - max-power
for epochs_fname in epoch_files:

    fname = os.path.basename(epochs_fname)
    print(f'\nProcessing: {fname}')

    if 'sem' in fname:
        prefix = 'Sem'
        sub = fname.split('sem')[0]
    elif 'tom' in fname:
        prefix = 'Tom'
        sub = fname.split('tom')[0]
    else:
        print('Unknown file type, skip')
        continue

    print(f'Subject ID: {sub}')

    epochs = mne.read_epochs(epochs_fname, preload=True)

    # ===============================
    epochs.pick_types(meg=False, eeg=True)
    epochs.set_eeg_reference('average', projection=True)

    # desample: 1024 → 256Hz
    epochs.resample(256)

    # time window with padding
    epochs.crop(-2.5, 3.5)

    conditions = list(epochs.event_id.keys())
    print('Conditions:', conditions)

    fwd_fname = os.path.join(data_path_1, f'{sub}-biosemi128-eeg-fwd.fif')
    if not os.path.exists(fwd_fname):
        print(f'Forward not found for subject {sub}, skip.')
        del epochs
        gc.collect()
        continue

    fwd = mne.read_forward_solution(fwd_fname)
    fwd = mne.pick_types_forward(fwd, meg=False, eeg=True)

    info = epochs.info.copy()

    # ===============================
    # making common filters
    # ===============================
    noise_cov = mne.compute_covariance(
        epochs,
        tmin=-0.5,
        tmax=-0.1
    )

    data_cov = mne.compute_covariance(
        epochs,
        tmin=0,
        tmax=1.5
    )   # which include: article + target + first end word

    filters = mne.beamformer.make_lcmv(
        info,
        fwd,
        data_cov=data_cov,
        noise_cov=noise_cov,
        pick_ori='max-power', # each vertex has its own optimal orientation, but this is the same across conditions
        reg=0.05,
        reduce_rank=True, # remove weakest singular dimension
        weight_norm='nai',  # Neural Activity Index: using the noise covariance to do the noise normalization
        rank=None  # let MNE automatically estimate the effective data rank to stabilise covariance inversion in the beamformer
    )

    # ===============================
    # condition loop within each epoch
    # ===============================
    for cond in conditions:

        print(f'------ Condition: {cond} ------')

        if cond not in epochs.event_id:
            continue

        # induced:asubstract evoked
        epochs_cond = epochs[cond].copy().subtract_evoked()

        if len(epochs_cond) < 5:
            print(f'Not enough trials for {cond}, skip')
            continue

        # for e
        stcs = mne.beamformer.apply_lcmv_epochs(
            epochs_cond,
            filters,
            return_generator=True
        )

        trial_data = []
        stc_template = None

        for stc in stcs:

            if stc_template is None:
                stc_template = stc.copy()

            x = stc.data  # (n_vertices, 3, n_times)

            # vector → scalar <- this is useless here because using the 'max-power' method won't be enter this path
            if x.ndim == 3:
                x = np.linalg.norm(x, axis=1)

            trial_data.append(x.astype(np.float32))

        if len(trial_data) == 0:
            continue

        trial_data = np.stack(trial_data, axis=0)
        # shape = (n_trials, n_vertices, n_times)

        out_fname = os.path.join(
            out_dir,
            f'{sub}_{cond}_trial_source.npz'
        )

        np.savez_compressed(
            out_fname,
            data=trial_data,
            times=stc_template.times,
            vertices=np.array(stc_template.vertices, dtype=object),
            sfreq=epochs.info['sfreq']
        )

        print(f'Saved: {out_fname}, shape={trial_data.shape}')

        del trial_data, epochs_cond
        gc.collect()

    del epochs, fwd, info, noise_cov, data_cov, filters
    gc.collect()

Then, my pipeline for the source level time frequency analysis is:

  • Morlet-based power estimation in source space (2–30 Hz, n_cycles = 5)
  • averaging power across trials
  • iterative 1/f correction
  • shared baseline z-score normalisation across conditions
  • group-level condition contrasts / cluster permutation tests
import numpy as np
import re
from pathlib import Path
from mne.time_frequency import tfr_array_morlet
import gc
import os

os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"

# ===== path =====
base_dir = Path(r"E:/1_study/0_PhD_Liverpool/18_data/1_AGPPC1_EEG/4_source_localisation")

data_dir = base_dir / "6_tfa_stc_longer_maxp"
out_dir  = base_dir / "8_4_source_tfr_maxp4_1f1st_015/0_sem"
out_dir.mkdir(parents=True, exist_ok=True)

conditions = [
    'Sem_HC_C_article',
    'Sem_HC_IC_article',
    'Sem_LC_C_article'
]

pattern = re.compile(r'^(\d+)_Sem_.*\.npz$', re.IGNORECASE)

freqs = np.linspace(2, 30, 29)
n_cycles = 5

baseline = (-0.5, -0.1)
real_tmin, real_tmax = -0.5, 1.5

chunk_size = 400  # one time calculating for 400 vertexs - for saving the 
#decim = 4   

all_files = sorted([f for f in data_dir.glob("*.npz") if pattern.search(f.name)])
print(f"Found {len(all_files)} files")

# ===== grouping by subject =====
subj_files = {}
for f in all_files:
    m = pattern.match(f.name)
    if m is None:
        continue
    subj_id = m.group(1)
    cond = '_'.join(f.stem.split('_')[1:5])
    if cond not in conditions:
        continue
    subj_files.setdefault(subj_id, {})
    subj_files[subj_id][cond] = f     # re-organise .npz file to sub_cond_xxx


# =========================
# main loop
# =========================
for subj_id, cond_dict in subj_files.items():

    if not all(c in cond_dict for c in conditions):
        continue

    print(f"\nProcessing subject {subj_id}")

    sum_, sumsq_, count_ = None, None, None  # initialise accumulators for incremental mean/variance computation

    # =========================
    # PASS 1:baseline stats
    # =========================
    for cond in conditions:

        npz = np.load(cond_dict[cond], allow_pickle=True)
        data = npz['data']
        times = npz['times']
        sfreq = float(npz['sfreq'])

        data_small = data
        n_trials, n_vertices, _ = data_small.shape
        n_freqs = len(freqs)

        # decimated time axis
        times_decim = times

        tmask = (times_decim >= real_tmin) & (times_decim <= real_tmax) # making the time_window that I'm interested: -0.5 ~ 1.5s
        times_crop = times_decim[tmask]
        bl_mask = (times_crop >= baseline[0]) & (times_crop <= baseline[1])

        if sum_ is None:
            sum_ = np.zeros((n_vertices, n_freqs), dtype=np.float32)
            sumsq_ = np.zeros((n_vertices, n_freqs), dtype=np.float32)
            count_ = np.zeros((n_vertices, n_freqs), dtype=np.float32)

        for start in range(0, n_vertices, chunk_size):
            end = min(start + chunk_size, n_vertices)

            print(f"  [PASS1] {cond} | chunk {start}-{end}")

            data_chunk = data_small[:, start:end, :]  # epoch with padding: -2.5 ~ 3.5s

            power = tfr_array_morlet(
                data_chunk,
                sfreq=sfreq,
                freqs=freqs,
                n_cycles=n_cycles,
                output='power',
                #decim=decim,
                n_jobs=2
            )

            power = power[..., tmask]  # crop to -0.5 ~ 1.5s

            # average trials first, to match sensor-level average=True
            power = power.mean(axis=0)  # shape: n_vertices_chunk × n_freqs × n_times, and the trial has been averaged out 

            # # 1/f correction after trial averaging
            power = remove_1f_iterative_loglog(power, freqs)

            bl = power[..., bl_mask]  # get the power data within the baseline

            n_bl = bl.shape[-1]  # get the baseline timepoints

            # accumulate baseline sum, squared sum, and sample count
            sum_[start:end] += bl.sum(axis=-1)  # adding the power at each time-point within the baseline
            sumsq_[start:end] += (bl**2).sum(axis=-1)
            count_[start:end] += n_bl

            del data_chunk, power, bl
            gc.collect()

        del data, data_small, npz
        gc.collect()

    # ===== baseline =====
    # =====================================================
    # shared baseline (within-subject, across-condition)
    # baseline mean/std are computed across all conditions
    # and baseline time points for each vertex × frequency
    # =====================================================
    bl_mean = sum_ / count_    # baseline time-point is averaged out
    bl_var = sumsq_ / count_ - bl_mean**2

    # avoid small negative values caused by floating-point errors
    bl_var = np.maximum(bl_var, 0)
    bl_std = np.sqrt(bl_var)

    # avoid division by zero
    bl_std[bl_std == 0] = np.finfo(float).eps   # within-subject, across-condition baseline

    # add singleton time dimension for broadcasting
    bl_mean = bl_mean[..., None]
    bl_std  = bl_std[..., None]    

    # =========================
    # PASS 2:1/f + baseline + mean
    # =========================
    for cond in conditions:

        npz = np.load(cond_dict[cond], allow_pickle=True)
        data = npz['data']
        times = npz['times']
        sfreq = float(npz['sfreq'])

        data_small = data
        n_trials, n_vertices, _ = data_small.shape

        times_decim = times
        tmask = (times_decim >= real_tmin) & (times_decim <= real_tmax)
        times_crop = times_decim[tmask]

        power_mean = []

        for start in range(0, n_vertices, chunk_size):
            end = min(start + chunk_size, n_vertices)

            print(f"  [PASS2] {cond} | chunk {start}-{end}")

            data_chunk = data_small[:, start:end, :]

            power = tfr_array_morlet(
                data_chunk,
                sfreq=sfreq,
                freqs=freqs,
                n_cycles=n_cycles,
                output='power',
                #decim=decim,
                n_jobs=2
            )

            power = power[..., tmask]

            # average trials first, to match sensor-level average=True
            power = power.mean(axis=0)  # shape: n_vertices_chunk × n_freqs × n_times

            # # 1/f correction after trial averaging
            power = remove_1f_iterative_loglog(power, freqs)

            power = (power - bl_mean[start:end]) / bl_std[start:end]

            power_mean.append(power)

            del data_chunk, power
            gc.collect()

        power_mean = np.concatenate(power_mean, axis=0)

        power_mean = power_mean.astype(np.float32) 

        out_fname = out_dir / f"{subj_id}_{cond}_TFR_mean.npz"
        np.savez_compressed(
            out_fname,
            power=power_mean,
            freqs=freqs,
            times=times_crop,
            sfreq=sfreq
        )

        print(f"  ✅ Saved: {out_fname}")

        del data, data_small, power_mean, npz
        gc.collect()

I was wondering if there are any issues with my pipeline which induced this kind of frequency band shifts? Or have you ever encountered any cases like this?

Apologies for this very long question and thank you very much!

Best wishes,

Yi