Source code for calibrated_explanations.core.validation

"""Validation helpers shared across the core package.

These utilities centralize defensive argument checks while preserving
existing behavior, ensuring future refactors can rely on a consistent
error vocabulary.

ADR-002 compliance: Validation functions use the exception taxonomy
(ValidationError, DataShapeError, NotFittedError, ConfigurationError, etc.)
and accept optional details payloads.
"""

from __future__ import annotations

import sys
from typing import Any, Literal, Type, cast

import numpy as np
import numpy.typing as npt

from ..utils.exceptions import (
    CalibratedError,
    DataShapeError,
    ModelNotSupportedError,
    NotFittedError,
    ValidationError,
)


[docs] def validate_not_none(value: Any, name: str) -> None: """Raise ``ValidationError`` when ``value`` is ``None``.""" if value is None: raise ValidationError(f"Argument '{name}' must not be None.")
[docs] def validate_type(value: Any, expected_type: type, name: str) -> None: """Ensure that ``value`` is an instance of ``expected_type``.""" if not isinstance(value, expected_type): raise DataShapeError( f"Argument '{name}' must be of type {expected_type.__name__}, got {type(value).__name__}." )
[docs] def validate_non_empty(value: Any, name: str) -> None: """Ensure that length-aware inputs are not empty.""" if hasattr(value, "__len__") and len(value) == 0: raise ValidationError(f"Argument '{name}' must not be empty.")
[docs] def validate_inputs( x: Any, y: Any | None = None, task: Literal["auto", "classification", "regression"] = "auto", allow_nan: bool = False, require_y: bool = False, n_features: int | None = None, class_labels: Any | None = None, check_finite: bool = True, ) -> None: """Validate input features and target for downstream operations. This function provides the primary validation entry point per ADR-002, accepting feature matrix x and optional target y with comprehensive shape, dtype, and value checks. Parameters ---------- x : array-like Feature matrix of shape (n_samples, n_features). Can be a NumPy array, pandas DataFrame, or similar. y : array-like, optional Target vector of shape (n_samples,). If provided, length must match x. Default is None. task : {"auto", "classification", "regression"}, default="auto" Task type. When "auto", inferred from model capabilities or y dtype. allow_nan : bool, default=False If False, raises ValidationError when x or y contain NaN values. require_y : bool, default=False If True, raises ValidationError when y is None. n_features : int, optional Expected number of features in x. If provided and mismatch occurs, raises DataShapeError. class_labels : array-like, optional Class labels for classification tasks. Stored for later use. check_finite : bool, default=True If True, checks that x and y contain only finite values (except NaN when allow_nan=True). Raises ------ ValidationError When y is required but None, or when values contain NaN/inf unexpectedly. DataShapeError When x is not 2D, feature count mismatches, or y length mismatches. Examples -------- >>> from calibrated_explanations.core.validation import validate_inputs >>> import numpy as np >>> x = np.array([[1.0, 2.0], [3.0, 4.0]]) >>> y = np.array([0, 1]) >>> validate_inputs(x, y, task="classification", require_y=True, n_features=2) # Passes silently if valid """ validate_not_none(x, "x") x_arr = _as_2d_array(x) if x_arr.ndim != 2: raise DataShapeError( "Argument 'x' must be 2D (n_samples, n_features).", details={"param": "x", "ndim": x_arr.ndim, "expected": 2}, ) n_samples = x_arr.shape[0] if n_features is not None and x_arr.shape[1] != n_features: raise DataShapeError( f"Argument 'x' must have {n_features} features, got {x_arr.shape[1]}.", details={ "param": "x", "expected_features": n_features, "actual_features": x_arr.shape[1], }, ) if require_y and y is None: raise ValidationError( "Argument 'y' must be provided when require_y=True.", details={"param": "y", "requirement": "required", "task": task}, ) if y is not None: y_arr = _as_1d_array(y) if y_arr.shape[0] != n_samples: raise DataShapeError( f"Length of 'y' ({y_arr.shape[0]}) does not match number of samples in x ({n_samples}).", details={ "param": "y", "y_length": y_arr.shape[0], "x_samples": n_samples, }, ) if ( check_finite and not allow_nan and np.issubdtype(y_arr.dtype, np.number) and not np.isfinite(y_arr).all() ): raise ValidationError( "Argument 'y' contains NaN or infinite values.", details={"param": "y", "check": "finitude", "allow_nan": allow_nan}, ) if check_finite and not allow_nan and not np.isfinite(x_arr).all(): raise ValidationError( "Argument 'x' contains NaN or infinite values.", details={"param": "x", "check": "finitude", "allow_nan": allow_nan}, ) # Store class_labels if provided (for metadata tracking) if class_labels is not None: validate_not_none(class_labels, "class_labels") # Reserve task inference for future behavior without changing runtime output. _ = infer_task(x, y, None) if task == "auto" else task
[docs] def infer_task( x: Any = None, y: Any = None, model: Any = None ) -> Literal["classification", "regression"]: """Infer the task type using model capabilities or target dtype. Priority is given to model capabilities (``predict_proba`` implies classification). When a model is unavailable, heuristics based on the target dtype are used. Regression is the safe fallback. """ if model is not None: if hasattr(model, "predict_proba"): return "classification" return "regression" if y is not None: y_arr = _as_1d_array(y) if np.issubdtype(y_arr.dtype, np.floating): return "regression" return "classification" return "regression"
def _as_2d_array(x: Any) -> npt.NDArray[np.generic]: """Return ``x`` coerced to a 2D ``ndarray``.""" if hasattr(x, "values") and hasattr(x, "shape"): try: return cast(npt.NDArray[np.generic], np.asarray(x.values)) except: # noqa: E722 if not isinstance(sys.exc_info()[1], Exception): raise # pragma: no cover - fallback return cast(npt.NDArray[np.generic], np.asarray(x)) return cast(npt.NDArray[np.generic], np.asarray(x)) def _as_1d_array(y: Any) -> npt.NDArray[np.generic]: """Return ``y`` coerced to a flattened 1D ``ndarray``.""" if hasattr(y, "values") and not isinstance(y, np.ndarray): y = y.values arr = cast(npt.NDArray[np.generic], np.asarray(y)) return cast(npt.NDArray[np.generic], arr.reshape(-1))
[docs] def validate_inputs_matrix( x: Any, y: Any | None = None, *, task: Literal["auto", "classification", "regression"] = "auto", allow_nan: bool = False, require_y: bool = False, n_features: int | None = None, check_finite: bool = True, ) -> None: """Validate a feature/target matrix pair for downstream operations. - Ensure ``x`` is 2D and matches the expected feature count when provided. - Confirm that ``y`` has the same number of samples when supplied. - Guard against NaN or infinite values unless explicitly allowed. """ validate_not_none(x, "x") x_arr = _as_2d_array(x) if x_arr.ndim != 2: raise DataShapeError("Argument 'x' must be 2D (n_samples, n_features).") n_samples = x_arr.shape[0] if n_features is not None and x_arr.shape[1] != n_features: raise DataShapeError(f"Argument 'x' must have {n_features} features, got {x_arr.shape[1]}.") if require_y and y is None: raise ValidationError("Argument 'y' must be provided when require_y=True.") if y is not None: y_arr = _as_1d_array(y) if y_arr.shape[0] != n_samples: raise DataShapeError( f"Length of 'y' ({y_arr.shape[0]}) does not match number of samples in x ({n_samples})." ) if ( check_finite and not allow_nan and np.issubdtype(y_arr.dtype, np.number) and not np.isfinite(y_arr).all() ): raise ValidationError("Argument 'y' contains NaN or infinite values.") if check_finite and not allow_nan and not np.isfinite(x_arr).all(): raise ValidationError("Argument 'x' contains NaN or infinite values.") # Reserve task inference for future behavior without changing runtime output. _ = infer_task(x, y, None) if task == "auto" else task
[docs] def validate_model(model: Any) -> None: """Validate minimal model protocol requirements.""" validate_not_none(model, "model") if not hasattr(model, "predict"): raise ModelNotSupportedError("Model must implement a 'predict' method.")
[docs] def validate_fit_state(obj: Any, *, require: bool = True) -> None: """Validate fit state flags before executing stateful operations.""" if not require: return if hasattr(obj, "fitted") and not obj.fitted: raise NotFittedError("Operation requires a fitted estimator/explainer.")
[docs] def validate( condition: bool, exc_cls: Type[CalibratedError], message: str, *, details: dict[str, Any] | None = None, ) -> None: """Conditional validation helper for common patterns. Raises an exception when a condition is False, enabling concise guard clauses. Parameters ---------- condition : bool Condition to check. If False, raises exc_cls. exc_cls : Type[CalibratedError] Exception class to raise when condition is False. message : str Error message. details : dict, optional Structured error details to attach to the exception. Raises ------ exc_cls If condition is False. Examples -------- >>> from calibrated_explanations.core.validation import validate >>> from calibrated_explanations.utils.exceptions import ValidationError >>> validate(len(x) > 0, ValidationError, "x must not be empty", details={"param": "x"}) """ if not condition: raise exc_cls(message, details=details)
__all__ = [ "validate_inputs", "validate_not_none", "validate_type", "validate_non_empty", "validate_inputs_matrix", "validate_model", "validate_fit_state", "infer_task", "validate", ]