Dear MNE community
I am analyzing EEG microstate using MNE-Pycorstate, and I am going to estimate MetaCreation based on the qulity metrics defined in Pycorstate.
However, for some, I need to calculate the second derivative, like cross-validation or Silhouette; however, I couldn’t find a formula to calculate in Python.
I was wondering if you know how to do it?
Here is my code for the second derivative, and using it in MetaCreation only used for cross-validation. Here I asked AI to help, but I am not sure if this is correct.
Best regards,
Pouya
def compute_cv_criterion(X, A, labels):
"""
How well the cluster centroids explain the assigned EEG maps.
It shows how much variance the model failed to explain.
Higher CV_criterion = worse clustering
Lower CV_criterion = better clustering (more stable, lower residual variance)
If maps match their cluster centroid well = low residual = CV small
If maps are scattered or misassigned = residual high = CV large
Pascual-Marqui cross-validation criterion.
X: (n_channels, n_samples)
A: (n_channels, n_clusters)
labels: (n_samples,)
"""
C, N = X.shape
K = A.shape[1]
# Filter out unassigned samples (Cartool threshold < 0.5)
valid_idx = labels != -1
X = X[:, valid_idx]
labels = labels[valid_idx]
N = X.shape[1]
if N == 0:
# In case all samples were unassigned (extremely rare)
return np.inf, np.inf
# Isolate normalization to copies (does NOT alter originals)
X = X - X.mean(axis=0, keepdims=True)
A = A - A.mean(axis=0, keepdims=True)
A = A / np.linalg.norm(A, axis=0, keepdims=True)
# Vectorized residual variance ---
# Compute squared norm of each sample (||x_n||^2)
x_norm2 = np.sum(X ** 2, axis=0)
# Get the template assigned to each sample
A_sel = A[:, labels] # (C, N)
# Compute projection (a^T x) for each sample
proj = np.sum(A_sel * X, axis=0)
# Residual variance = ||x||^2 - (a^T x)^2
resid = x_norm2 - proj**2
# CV criterion
sigma2 = np.sum(resid) / (N * (C - 1))
cv = sigma2 * ((C - 1) / (C - K - 1))**2
return cv, sigma2
def _second_derivative_discrete(values: np.ndarray) -> np.ndarray:
"""
second derivative on a 1D array of metric values.
values: array ordered by k (ascending).
Returns: array of same length with second derivative at each index.
"""
n = len(values)
d2 = np.empty(n, dtype=float)
for i in range(n):
i_prev = max(0, i - 1)
i_next = min(n - 1, i + 1)
vm1 = values[i_prev]
v0 = values[i]
vp1 = values[i_next]
d2[i] = vm1 + vp1 - 2.0 * v0
return d2
def compute_meta_criterion(df):
"""
Compute the MetaCriterion (Custo et al., 2017) combining multiple clustering metrics.
For each number of clusters (k), metric values are ranked across all k.
The MetaCriterion is defined as:
MetaCriterion(k) = IQM(ranks_k)** 2 / IQR(ranks_k)
Where:
IQM = Interquartile Mean of the metric ranks (central tendency)
IQR = Interquartile Range of the metric ranks (dispersion)
A higher MetaCriterion indicates high overall performance and strong agreement
among metrics, suggesting the most stable and reliable cluster solution.
"""
data = df.copy().sort_values("k").reset_index(drop=True)
# Convert metrics to numeric safely
for col in ["Silhouette", "Calinski-Harabasz", "Dunn",
"Davies-Bouldin", "CV"]:
if col in data.columns:
data[col] = pd.to_numeric(data[col], errors="coerce")
# Invert DB index = higher DB_inv is better
if "Davies-Bouldin" in data.columns:
data["DB_inv"] = 1.0 / (1.0 + data["Davies-Bouldin"])
# Second derivative of CV
if "CV" in data.columns and data["CV"].notna().any():
cv_vals = data["CV"].to_numpy()
data["CV_d2"] = _second_derivative_discrete(cv_vals)
else:
data["CV_d2"] = np.nan
# Select metrics to feed into MetaCriterion
metrics = []
for m in ["Silhouette", "Calinski-Harabasz", "Dunn", "DB_inv", "CV_d2"]:
if m in data.columns and data[m].notna().any():
metrics.append(m)
if len(metrics) == 0:
raise ValueError("No valid metrics available for MetaCriterion.")
# Rank each metric across k (higher rank = better)
ranks = np.vstack([
rankdata(data[m], method="average")
for m in metrics
]).T # shape: (n_k, n_metrics)
# Compute IQM and IQR of ranks (Custo et al., 2017)
iqm, iqr_vals = [], []
for row in ranks:
sorted_r = np.sort(row)
q1, q3 = np.percentile(sorted_r, [25, 75])
in_iqr = sorted_r[(sorted_r >= q1) & (sorted_r <= q3)]
iqm.append(np.mean(in_iqr))
iqr_vals.append(
iqr(sorted_r, rng=(25, 75), nan_policy="omit")
)
iqm = np.array(iqm)
iqr_vals = np.array(iqr_vals)
# Avoid division by zero in degenerate cases
iqr_safe = np.where(iqr_vals == 0, np.nan, iqr_vals)
# Compute MetaCriterion exactly as defined
data["MetaCriterion"] = (iqm ** 2) / iqr_safe
return data