Estimating second derivative in python

Pouyarabiei · May 14, 2026, 12:54pm

Dear MNE community

I am analyzing EEG microstate using MNE-Pycorstate, and I am going to estimate MetaCreation based on the qulity metrics defined in Pycorstate.
However, for some, I need to calculate the second derivative, like cross-validation or Silhouette; however, I couldn’t find a formula to calculate in Python.

I was wondering if you know how to do it?

Here is my code for the second derivative, and using it in MetaCreation only used for cross-validation. Here I asked AI to help, but I am not sure if this is correct.

Best regards,
Pouya

def compute_cv_criterion(X, A, labels):
    """
    How well the cluster centroids explain the assigned EEG maps.
    It shows how much variance the model failed to explain.

    Higher CV_criterion = worse clustering
    Lower CV_criterion = better clustering (more stable, lower residual variance)
    
    If maps match their cluster centroid well = low residual = CV small
    If maps are scattered or misassigned = residual high = CV large
  
    Pascual-Marqui cross-validation criterion.
    X: (n_channels, n_samples)
    A: (n_channels, n_clusters)
    labels: (n_samples,)
    
    """
    C, N = X.shape
    K = A.shape[1]
    
    # Filter out unassigned samples (Cartool threshold < 0.5)
    valid_idx = labels != -1
    X = X[:, valid_idx]
    labels = labels[valid_idx]
    N = X.shape[1]

    if N == 0:
        # In case all samples were unassigned (extremely rare)
        return np.inf, np.inf
    
    # Isolate normalization to copies (does NOT alter originals)
    X = X - X.mean(axis=0, keepdims=True)
    A = A - A.mean(axis=0, keepdims=True)
    A = A / np.linalg.norm(A, axis=0, keepdims=True)

    # Vectorized residual variance ---
    # Compute squared norm of each sample (||x_n||^2)
    x_norm2 = np.sum(X ** 2, axis=0)

    # Get the template assigned to each sample
    A_sel = A[:, labels]  # (C, N)

    # Compute projection (a^T x) for each sample
    proj = np.sum(A_sel * X, axis=0)

    # Residual variance = ||x||^2 - (a^T x)^2
    resid = x_norm2 - proj**2

    # CV criterion
    sigma2 = np.sum(resid) / (N * (C - 1))
    cv = sigma2 * ((C - 1) / (C - K - 1))**2
    
    return cv, sigma2

def _second_derivative_discrete(values: np.ndarray) -> np.ndarray:
    """
    second derivative on a 1D array of metric values.

    values: array ordered by k (ascending).
    Returns: array of same length with second derivative at each index.
    """
    n = len(values)
    d2 = np.empty(n, dtype=float)

    for i in range(n):
        i_prev = max(0, i - 1)
        i_next = min(n - 1, i + 1)

        vm1 = values[i_prev]
        v0  = values[i]
        vp1 = values[i_next]

        d2[i] = vm1 + vp1 - 2.0 * v0

    return d2

def compute_meta_criterion(df):
    """
Compute the MetaCriterion (Custo et al., 2017) combining multiple clustering metrics.

For each number of clusters (k), metric values are ranked across all k.
The MetaCriterion is defined as:

    MetaCriterion(k) = IQM(ranks_k)** 2 / IQR(ranks_k)
    
Where:
    IQM = Interquartile Mean of the metric ranks (central tendency)
    IQR = Interquartile Range of the metric ranks (dispersion)

A higher MetaCriterion indicates high overall performance and strong agreement
among metrics, suggesting the most stable and reliable cluster solution.

   """
    data = df.copy().sort_values("k").reset_index(drop=True)

    # Convert metrics to numeric safely
    for col in ["Silhouette", "Calinski-Harabasz", "Dunn",
                "Davies-Bouldin", "CV"]:
        if col in data.columns:
            data[col] = pd.to_numeric(data[col], errors="coerce")

    # Invert DB index = higher DB_inv is better
    if "Davies-Bouldin" in data.columns:
        data["DB_inv"] = 1.0 / (1.0 + data["Davies-Bouldin"])

    # Second derivative of CV
    if "CV" in data.columns and data["CV"].notna().any():
        cv_vals = data["CV"].to_numpy()
        data["CV_d2"] = _second_derivative_discrete(cv_vals)
    else:
        data["CV_d2"] = np.nan

    # Select metrics to feed into MetaCriterion
    metrics = []
    for m in ["Silhouette", "Calinski-Harabasz", "Dunn", "DB_inv", "CV_d2"]:
        if m in data.columns and data[m].notna().any():
            metrics.append(m)

    if len(metrics) == 0:
        raise ValueError("No valid metrics available for MetaCriterion.")

    # Rank each metric across k (higher rank = better)
    ranks = np.vstack([
        rankdata(data[m], method="average")
        for m in metrics
    ]).T  # shape: (n_k, n_metrics)

    # Compute IQM and IQR of ranks (Custo et al., 2017)
    iqm, iqr_vals = [], []
    for row in ranks:
        sorted_r = np.sort(row)
        q1, q3 = np.percentile(sorted_r, [25, 75])

        in_iqr = sorted_r[(sorted_r >= q1) & (sorted_r <= q3)]
        iqm.append(np.mean(in_iqr))

        iqr_vals.append(
            iqr(sorted_r, rng=(25, 75), nan_policy="omit")
        )

    iqm = np.array(iqm)
    iqr_vals = np.array(iqr_vals)

    # Avoid division by zero in degenerate cases
    iqr_safe = np.where(iqr_vals == 0, np.nan, iqr_vals)

    # Compute MetaCriterion exactly as defined
    data["MetaCriterion"] = (iqm ** 2) / iqr_safe

    return data

Topic		Replies	Views
[Freesurfer] question about how to calculate COV for MNE Mailing List Archive (read-only) list-archive	12	234	April 11, 2014
Get the scores of each trial not only each split Support & Discussions	6	407	May 4, 2021
noise covariance matrix Mailing List Archive (read-only) list-archive	6	295	April 10, 2017
Computing regression on sensor data then transforming to source space Mailing List Archive (read-only) list-archive	12	406	February 25, 2014
TFCE parameters Mailing List Archive (read-only) list-archive	17	598	July 29, 2016

Estimating second derivative in python

Related topics