Platform Linux-6.12.10-76061203-generic-x86_64-with-glibc2.35
Python 3.10.12 (main, May 27 2025, 17:12:29) [GCC 11.4.0]
mne 1.10.0 (latest release)
I use mne.io.read_raw_edf
to read *_Segment_*.edf
and then call
ra.compute_psd(method="welch", n_fft=2048, n_jobs=1, ...)
on each channel separately (after annotating and filtering; code appended).
I get
Setting 390140 of 396296 (98.45%) samples to NaN, retaining 6156 (1.55%) samples.
Effective window size : 4.096 (s)
At least one good data span is shorter than n_per_seg, and will be analyzed with a shorter window than the rest of the file.
and then
UserWarning: nperseg = 2048 is greater than input length = 1, using nperseg = 1
How to I avoid this?
I suppose I can drop all “good data spans shorter than 2048” (how? is it worth it?)
Another option is to suppress this specific warning:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, message="nperseg = 2048 is greater than input length")
what would you recommend?
(it is not an option for me so see these warnings - I process many files with many channels).
code:
def edf2psd(edf, config, *, stats=None, verbose=None, logger=pu.stdout):
"""Extract the bands from the Raw EDF data.
Remove flatlines using annotate_amplitude() and expand them.
Process each channel one-by-one so that a flatline on one channel
does not affect the others.
Limit processing to channels.
Use welch method, limiting frequencies to [fmin,fmax].
Ensure frequency step is at most .25Hz."""
start_time = pu.start_time()
edf_df = edf.to_data_frame()
sfreq = edf.info["sfreq"]
psdd = {}
freqs = None
bad = {}
data_end = edf.times[-1]
dropped = []
for ch in config.channels or edf.ch_names:
print(f"----{ch} {edf_df[ch].shape}", flush=True)
ra = mne.io.RawArray(edf_df[ch].values.reshape((-1, edf_df.shape[0])),
mne.create_info(ch_names=[ch], sfreq=sfreq, ch_types="eeg"),
verbose=verbose)
# https://mne.tools/stable/generated/mne.preprocessing.annotate_amplitude.html
anns, bads = mne.preprocessing.annotate_amplitude(
ra, flat=0, bad_percent=100, verbose=verbose)
if bads:
bad.setdefault("annotate_amplitude",[]).append(ch)
dropped.append(1)
print(f"////bads {ch}", flush=True)
continue
if anns: # non-trivial
anns = expand_annotations(anns, data_end=data_end, min_good=config.min_good)
if anns is None:
bad.setdefault("expand_annotations",[]).append(ch)
dropped.append(1)
print(f"////anns {ch}", flush=True)
continue
ra.set_annotations(anns)
dropped.append(anns.duration.sum()/(edf.times[-1] - edf.times[0]))
else:
dropped.append(0) # nothing was dropped!
# https://mne.tools/stable/generated/mne.io.Raw.html#mne.io.Raw.filter
print(f"++filter++{len(ra)}", flush=True)
ra = ra.filter(l_freq=config.l_freq, h_freq=config.h_freq,
skip_by_annotation="BAD_", verbose=verbose)
# https://mne.tools/stable/generated/mne.io.Raw.html#mne.io.Raw.compute_psd
# n_jobs > #channels -- does not yield any performance benefit
# https://github.com/mne-tools/mne-python/pull/11298
# https://github.com/mne-tools/mne-python/issues/11297
print(f"++compute_psd++{len(ra)}", flush=True)
spectrum = ra.compute_psd(method="welch", fmin=config.fmin, fmax=config.fmax,
n_fft=2048, n_jobs=1, verbose=verbose)
psds, freqs0 = spectrum.get_data(return_freqs=True)
psds0 = np.isnan(psds)
if psds0.any():
bad.setdefault("spectrum NaN",[]).append(ch)
if not psds0.all():
raise ValueError("edf2psd: some but not all PSD are NaN",edf,ch,psds)
else:
if freqs is None:
freqs = freqs0
if np.diff(freqs).mean() > 0.25:
raise ValueError("edf2psd: freq step too high",
edf.info, np.diff(freqs).mean())
elif not np.array_equal(freqs,freqs0):
raise ValueError("edf2psd: inconsistent freq",edf,ch,freqs,freqs0)
psdd[ch] = psds.flatten()
if not psdd:
raise ValueError("edf2psd: no useful data", edf, bad)
df = pd.DataFrame(psdd, index=freqs)
df.index.name = "Freq"
elapsed = pu.elapsed(start_time)
logger.info("edf2psd: %s [%s]", pu.shape(df), elapsed)
if stats is not None:
for d in dropped:
stats.dropped.add(d)
stats.run_time.add(elapsed)
stats.edf_len.add(len(edf))
if bad:
logger.info(" %d failed:%s", sum(map(len,bad.values())), "".join(
f"\n {kind} {len(chans)}: {chans}" for kind,chans in bad.items()))
return df