Source code for causalpy.experiments.inverse_propensity_weighting

#   Copyright 2022 - 2026 The PyMC Labs Developers
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
"""
Inverse propensity weighting
"""

import warnings
from typing import Any, Literal

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.lines import Line2D
from patsy import dmatrices
from sklearn.linear_model import LinearRegression as sk_lin_reg

from causalpy.custom_exceptions import DataException
from causalpy.pymc_models import PropensityScore
from causalpy.reporting import EffectSummary

from .base import BaseExperiment



[docs]
class InversePropensityWeighting(BaseExperiment):
    """A class to analyse inverse propensity weighting experiments.

    Parameters
    ----------
    data : pd.DataFrame
        A pandas dataframe.
    formula : str
        A statistical model formula for the propensity model.
    outcome_variable : str
        A string denoting the outcome variable in data to be reweighted.
    weighting_scheme : str
        A string denoting which weighting scheme to use among: 'raw',
        'robust', 'doubly_robust' or 'overlap'. See Aronow and Miller
        "Foundations of Agnostic Statistics" for discussion and computation
        of these weighting schemes.
    model : PropensityScore, optional
        A PyMC model. Defaults to PropensityScore.

    Example
    --------
    >>> import causalpy as cp
    >>> df = cp.load_data("nhefs")
    >>> seed = 42
    >>> result = cp.InversePropensityWeighting(
    ...     df,
    ...     formula="trt ~ 1 + age + race",
    ...     outcome_variable="outcome",
    ...     weighting_scheme="robust",
    ...     model=cp.pymc_models.PropensityScore(
    ...         sample_kwargs={
    ...             "draws": 100,
    ...             "target_accept": 0.95,
    ...             "random_seed": seed,
    ...             "progressbar": False,
    ...         },
    ...     ),
    ... )
    """

    supports_ols = False
    supports_bayes = True
    _default_model_class = PropensityScore


[docs]
    def __init__(
        self,
        data: pd.DataFrame,
        formula: str,
        outcome_variable: str,
        weighting_scheme: str,
        model: PropensityScore | None = None,
        **kwargs: dict,
    ) -> None:
        super().__init__(model=model)
        self.expt_type = "Inverse Propensity Score Weighting"
        self.data = data
        self.formula = formula
        self.outcome_variable = outcome_variable
        self.weighting_scheme = weighting_scheme
        self.input_validation()
        self._build_design_matrices()
        self.algorithm()


    def _build_design_matrices(self) -> None:
        """Build design matrices for the propensity score model.

        Uses the patsy formula stored in ``self.formula`` to construct treatment
        and covariate matrices from ``self.data``.  Also creates an augmented
        design matrix (``self.X_outcome``) that includes the treatment indicator,
        for use with doubly-robust outcome modelling.
        """
        t, X = dmatrices(self.formula, self.data)
        self._t_design_info = t.design_info
        self._t_design_info = X.design_info
        self.labels = X.design_info.column_names
        self.t, self.X = np.asarray(t), np.asarray(X)
        self.y = self.data[self.outcome_variable]

        COORDS = {"obs_ind": list(range(self.X.shape[0])), "coeffs": self.labels}
        self.coords = COORDS
        self.X_outcome = pd.DataFrame(self.X, columns=self.labels)
        self.X_outcome["trt"] = self.t
        self.coords["outcome_coeffs"] = self.X_outcome.columns


[docs]
    def algorithm(self) -> None:
        """Run the experiment algorithm by fitting the propensity score model.

        Delegates to ``self.model.fit`` with the covariate matrix ``self.X``,
        treatment vector ``self.t``, and coordinate metadata ``self.coords``.
        """
        self.model.fit(X=self.X, t=self.t, coords=self.coords)  # type: ignore[call-arg]



[docs]
    def input_validation(self) -> None:
        """Validate the input data and model formula for correctness.

        Checks that both the treatment variable (left-hand side of the formula)
        and the ``outcome_variable`` exist in ``self.data``, and that the
        treatment variable has at most two unique values.  Note that a
        constant (single-value) treatment passes this check without error
        but may cause downstream estimators to fail or produce undefined
        results.

        Raises
        ------
        DataException
            If the treatment or outcome variable is missing from the data,
            or if the treatment variable has more than two unique values.
        """
        treatment = self.formula.split("~")[0]
        test = treatment.strip() in self.data.columns
        test = test & (self.outcome_variable in self.data.columns)
        if not test:
            raise DataException(
                f"""
                The treatment variable:
                {treatment} must appear in the data to be used
                as an outcome variable. And {self.outcome_variable}
                must also be available in the data to be re-weighted
                """
            )
        T = self.data[treatment.strip()]
        check_binary = len(np.unique(T)) > 2
        if check_binary:
            raise DataException(
                """Warning. The treatment variable is not 0-1 Binary.
                """
            )


    def _prepare_ps(self, ps: np.ndarray, eps: float = 1e-6) -> np.ndarray:
        """Check for extreme propensity scores and clip them for numerical stability.

        Parameters
        ----------
        ps : np.ndarray
            Raw propensity scores for each observation, expected in [0, 1].
        eps : float, optional
            Clipping tolerance.  Scores are capped to the interval
            ``[eps, 1 - eps]``.  Defaults to 1e-6.

        Returns
        -------
        np.ndarray
            Clipped propensity scores in ``[eps, 1 - eps]``.

        Warns
        -----
        UserWarning
            If any scores fall outside ``[eps, 1 - eps]`` before clipping.
        """
        if np.any((ps < eps) | (ps > 1 - eps)):
            warnings.warn(
                f"Extreme propensity scores detected (outside [{eps}, {1 - eps}]). "
                "Capping values to prevent numerical instability.",
                UserWarning,
                stacklevel=2,
            )
        return np.clip(ps, eps, 1 - eps)


[docs]
    def make_robust_adjustments(
        self, ps: np.ndarray
    ) -> tuple[pd.Series, pd.Series, int, int]:
        """Compute inverse-propensity-weighted outcomes using the robust (Horvitz-Thompson) scheme.

        This estimator is discussed in Aronow and Miller's
        *Foundations of Agnostic Statistics* as being related to the
        Horvitz-Thompson method.

        Parameters
        ----------
        ps : np.ndarray
            Propensity scores for each observation.

        Returns
        -------
        tuple[pd.Series, pd.Series, int, int]
            A tuple of ``(weighted_outcome0, weighted_outcome1, n_ntrt, n_trt)``
            where the weighted outcomes are the IPW-adjusted outcome values for the
            control and treated groups, and ``n_ntrt`` / ``n_trt`` are the
            corresponding group sizes used for normalisation.
        """
        ps = self._prepare_ps(ps)  # <--- Safety Check
        X = pd.DataFrame(self.X, columns=self.labels)
        X["ps"] = ps
        X[self.outcome_variable] = self.y
        t = self.t.flatten()
        p_of_t = np.mean(t)
        X["i_ps"] = np.where(t == 1, (p_of_t / X["ps"]), (1 - p_of_t) / (1 - X["ps"]))
        n_ntrt = X[t == 0].shape[0]
        n_trt = X[t == 1].shape[0]
        outcome_trt = X[t == 1][self.outcome_variable]
        outcome_ntrt = X[t == 0][self.outcome_variable]
        i_propensity0 = X[t == 0]["i_ps"]
        i_propensity1 = X[t == 1]["i_ps"]
        weighted_outcome1 = outcome_trt * i_propensity1
        weighted_outcome0 = outcome_ntrt * i_propensity0
        return weighted_outcome0, weighted_outcome1, n_ntrt, n_trt



[docs]
    def make_raw_adjustments(
        self, ps: np.ndarray
    ) -> tuple[pd.Series, pd.Series, int, int]:
        """Compute inverse-propensity-weighted outcomes using the raw (basic) scheme.

        This is the simplest form of inverse propensity weighting, as discussed
        in Aronow and Miller's *Foundations of Agnostic Statistics*.  Each
        observation is weighted by the reciprocal of its propensity score (or
        ``1 - ps`` for the control group).

        Parameters
        ----------
        ps : np.ndarray
            Propensity scores for each observation.

        Returns
        -------
        tuple[pd.Series, pd.Series, int, int]
            A tuple of ``(weighted_outcome0, weighted_outcome1, n_ntrt, n_trt)``
            where the weighted outcomes are the IPW-adjusted outcome values for the
            control and treated groups.  ``n_ntrt`` and ``n_trt`` are both equal
            to the total number of observations (the raw scheme normalises by
            the full sample size).
        """
        ps = self._prepare_ps(ps)  # <--- Safety Check
        X = pd.DataFrame(self.X, columns=self.labels)
        X["ps"] = ps
        X[self.outcome_variable] = self.y
        t = self.t.flatten()
        X["ps"] = np.where(t, X["ps"], 1 - X["ps"])
        X["i_ps"] = 1 / X["ps"]
        n_ntrt = n_trt = len(X)
        outcome_trt = X[t == 1][self.outcome_variable]
        outcome_ntrt = X[t == 0][self.outcome_variable]
        i_propensity0 = X[t == 0]["i_ps"]
        i_propensity1 = X[t == 1]["i_ps"]
        weighted_outcome1 = outcome_trt * i_propensity1
        weighted_outcome0 = outcome_ntrt * i_propensity0
        return weighted_outcome0, weighted_outcome1, n_ntrt, n_trt



[docs]
    def make_overlap_adjustments(
        self, ps: np.ndarray
    ) -> tuple[pd.Series, pd.Series, pd.Series, pd.Series]:
        """Compute inverse-propensity-weighted outcomes using the overlap scheme.

        This weighting scheme was adapted from Lucy D’Agostino McGowan’s blog
        on propensity score weights, referenced in the primary CausalPy
        explainer notebook.  Overlap weights target the population for which
        there is clinical equipoise (i.e., where propensity scores are near
        0.5), reducing sensitivity to extreme scores.

        Parameters
        ----------
        ps : np.ndarray
            Propensity scores for each observation.

        Returns
        -------
        tuple[pd.Series, pd.Series, pd.Series, pd.Series]
            A tuple of ``(weighted_outcome0, weighted_outcome1, n_ntrt, n_trt)``
            where the weighted outcomes and normalisation terms are all
            ``pd.Series`` (unlike the raw/robust schemes which return integer
            counts).
        """
        ps = self._prepare_ps(ps)  # <--- Safety Check
        X = pd.DataFrame(self.X, columns=self.labels)
        X["ps"] = ps
        X[self.outcome_variable] = self.y
        t = self.t.flatten()
        X["i_ps"] = np.where(t, (1 - X["ps"]) * t, X["ps"] * (1 - t))
        n_ntrt = (1 - t[t == 0]) * X[t == 0]["i_ps"]
        n_trt = t[t == 1] * X[t == 1]["i_ps"]
        outcome_trt = X[t == 1][self.outcome_variable]
        outcome_ntrt = X[t == 0][self.outcome_variable]
        i_propensity0 = X[t == 0]["i_ps"]
        i_propensity1 = X[t == 1]["i_ps"]
        weighted_outcome1 = t[t == 1] * outcome_trt * i_propensity1
        weighted_outcome0 = (1 - t[t == 0]) * outcome_ntrt * i_propensity0
        return weighted_outcome0, weighted_outcome1, n_ntrt, n_trt



[docs]
    def make_doubly_robust_adjustment(
        self, ps: np.ndarray
    ) -> tuple[pd.Series, pd.Series, None, None]:
        """Compute doubly-robust adjusted outcomes.

        The doubly-robust weighting scheme is discussed in Aronow and Miller's
        *Foundations of Agnostic Statistics*.  This implementation fixes the
        outcome model to ordinary least squares (OLS), so the compromise
        between the outcome model and the propensity model is always performed
        with a linear regression.

        Parameters
        ----------
        ps : np.ndarray
            Propensity scores for each observation.

        Returns
        -------
        tuple[pd.Series, pd.Series, None, None]
            A tuple of ``(weighted_outcome0, weighted_outcome1, None, None)``.
            The two ``None`` values are returned for interface consistency with
            the other adjustment methods; no explicit group sizes are needed
            because the doubly-robust estimator averages over all observations.
        """
        ps = self._prepare_ps(ps)  # <--- Safety Check
        X = pd.DataFrame(self.X, columns=self.labels)
        X["ps"] = ps
        t = self.t.flatten()
        m0 = sk_lin_reg().fit(X[t == 0].astype(float), self.y[t == 0])
        m1 = sk_lin_reg().fit(X[t == 1].astype(float), self.y[t == 1])
        m0_pred = m0.predict(X)
        m1_pred = m1.predict(X)
        ## Compromise between outcome and treatment assignment model
        weighted_outcome0 = (1 - t) * (self.y - m0_pred) / (1 - X["ps"]) + m0_pred
        weighted_outcome1 = t * (self.y - m1_pred) / X["ps"] + m1_pred
        return weighted_outcome0, weighted_outcome1, None, None


    def _compute_ate_robust(
        self, ps: np.ndarray
    ) -> tuple[float | np.floating, float | np.floating, float | np.floating]:
        """Compute ATE using the robust (Horvitz-Thompson) weighting scheme.

        Parameters
        ----------
        ps : np.ndarray
            Propensity scores for each observation.

        Returns
        -------
        tuple
            A tuple of (ate, trt, ntrt) where:
            - ate: Average Treatment Effect
            - trt: Weighted mean outcome for treated group
            - ntrt: Weighted mean outcome for non-treated group
        """
        (
            weighted_outcome_ntrt,
            weighted_outcome_trt,
            n_ntrt,
            n_trt,
        ) = self.make_robust_adjustments(ps)
        ntrt = weighted_outcome_ntrt.sum() / n_ntrt
        trt = weighted_outcome_trt.sum() / n_trt
        ate = trt - ntrt
        return ate, trt, ntrt

    def _compute_ate_raw(
        self, ps: np.ndarray
    ) -> tuple[float | np.floating, float | np.floating, float | np.floating]:
        """Compute ATE using the raw inverse propensity weighting scheme.

        Parameters
        ----------
        ps : np.ndarray
            Propensity scores for each observation.

        Returns
        -------
        tuple
            A tuple of (ate, trt, ntrt) where:
            - ate: Average Treatment Effect
            - trt: Weighted mean outcome for treated group
            - ntrt: Weighted mean outcome for non-treated group
        """
        (
            weighted_outcome_ntrt,
            weighted_outcome_trt,
            n_ntrt,
            n_trt,
        ) = self.make_raw_adjustments(ps)
        ntrt = weighted_outcome_ntrt.sum() / n_ntrt
        trt = weighted_outcome_trt.sum() / n_trt
        ate = trt - ntrt
        return ate, trt, ntrt

    def _compute_ate_overlap(
        self, ps: np.ndarray
    ) -> tuple[float | np.floating, float | np.floating, float | np.floating]:
        """Compute ATE using the overlap weighting scheme.

        Parameters
        ----------
        ps : np.ndarray
            Propensity scores for each observation.

        Returns
        -------
        tuple
            A tuple of (ate, trt, ntrt) where:
            - ate: Average Treatment Effect
            - trt: Weighted mean outcome for treated group
            - ntrt: Weighted mean outcome for non-treated group
        """
        (
            weighted_outcome_ntrt,
            weighted_outcome_trt,
            n_ntrt,
            n_trt,
        ) = self.make_overlap_adjustments(ps)  # type: ignore[assignment]
        ntrt = np.sum(weighted_outcome_ntrt) / np.sum(n_ntrt)  # type: ignore[arg-type]
        trt = np.sum(weighted_outcome_trt) / np.sum(n_trt)  # type: ignore[arg-type]
        ate = trt - ntrt
        return ate, trt, ntrt

    def _compute_ate_doubly_robust(
        self, ps: np.ndarray
    ) -> tuple[float | np.floating, float | np.floating, float | np.floating]:
        """Compute ATE using the doubly robust weighting scheme.

        Parameters
        ----------
        ps : np.ndarray
            Propensity scores for each observation.

        Returns
        -------
        tuple
            A tuple of (ate, trt, ntrt) where:
            - ate: Average Treatment Effect
            - trt: Weighted mean outcome for treated group
            - ntrt: Weighted mean outcome for non-treated group
        """
        (
            weighted_outcome_ntrt,
            weighted_outcome_trt,
            _n_ntrt,
            _n_trt,
        ) = self.make_doubly_robust_adjustment(ps)  # type: ignore[assignment]
        trt = np.mean(weighted_outcome_trt)
        ntrt = np.mean(weighted_outcome_ntrt)
        ate = trt - ntrt
        return ate, trt, ntrt


[docs]
    def get_ate(
        self, i: int, idata: az.InferenceData, method: str = "doubly_robust"
    ) -> list[float | np.floating]:
        """Compute the Average Treatment Effect for a single posterior sample.

        Post-processes the sample posterior distribution for propensity scores,
        one sample at a time, using the specified weighting method.

        Parameters
        ----------
        i : int
            Index of the posterior sample to process.
        idata : az.InferenceData
            ArviZ InferenceData object containing the posterior samples.
        method : str, optional
            Weighting scheme to use. One of 'robust', 'raw', 'overlap',
            or 'doubly_robust'. Defaults to 'doubly_robust'.

        Returns
        -------
        list[float]
            A list of [ate, trt, ntrt] where:
            - ate: Average Treatment Effect
            - trt: Weighted mean outcome for treated group
            - ntrt: Weighted mean outcome for non-treated group
        """
        ps = idata["posterior"]["p"].stack(z=("chain", "draw"))[:, i].values

        ate_methods = {
            "robust": self._compute_ate_robust,
            "raw": self._compute_ate_raw,
            "overlap": self._compute_ate_overlap,
            "doubly_robust": self._compute_ate_doubly_robust,
        }

        compute_fn = ate_methods.get(method, self._compute_ate_doubly_robust)
        ate, trt, ntrt = compute_fn(ps)

        return [ate, trt, ntrt]



[docs]
    def plot_ate(
        self,
        idata: az.InferenceData | None = None,
        method: str | None = None,
        prop_draws: int = 100,
        ate_draws: int = 300,
    ) -> tuple[plt.Figure, list[plt.Axes]]:
        """Plot the Average Treatment Effect and propensity score distributions.

        Produces a three-panel figure:

        * **Top panel** -- Weighted and unweighted histograms of posterior
          propensity scores for treated and control groups, drawn from
          ``prop_draws`` posterior samples.  The ``"raw"`` method shows
          unweighted counts; ``"overlap"`` uses overlap weights; all other
          methods (``"robust"``, ``"doubly_robust"``) share a common
          IPW-weighted histogram branch.
        * **Bottom-left panel** -- Histograms of the reweighted potential
          outcomes E[Y(1)] and E[Y(0)].
        * **Bottom-right panel** -- Histogram of the ATE distribution with a
          vertical line at its posterior mean.

        Parameters
        ----------
        idata : az.InferenceData | None, optional
            ArviZ InferenceData with posterior propensity score samples.
            If ``None``, uses ``self.model.idata``.
        method : str | None, optional
            Weighting scheme to apply.  One of ``'robust'``, ``'raw'``,
            ``'overlap'``, or ``'doubly_robust'``.  If ``None``, falls back
            to ``self.weighting_scheme``.
        prop_draws : int, optional
            Number of posterior draws used for the propensity score histogram.
            Defaults to 100.
        ate_draws : int, optional
            Number of posterior draws used to compute ATE samples.
            Defaults to 300.

        Returns
        -------
        tuple[plt.Figure, list[plt.Axes]]
            The matplotlib Figure and a list of three Axes objects.
        """
        if idata is None:
            idata = self.model.idata
        if method is None:
            method = self.weighting_scheme

        def plot_weights(bins, top0, top1, ax, color="population"):
            colors_dict = {
                "population": ["orange", "skyblue", 0.6],
                "pseudo_population": ["grey", "grey", 0.1],
            }

            ax.axhline(0, c="gray", linewidth=1)
            bars0 = ax.bar(
                bins[:-1] + 0.025,
                top0,
                width=0.04,
                facecolor=colors_dict[color][0],
                alpha=colors_dict[color][2],
            )
            bars1 = ax.bar(
                bins[:-1] + 0.025,
                -top1,
                width=0.04,
                facecolor=colors_dict[color][1],
                alpha=colors_dict[color][2],
            )

            for bars in (bars0, bars1):
                for bar in bars:
                    bar.set_edgecolor("black")

        def make_hists(idata, i, axs, method=method):
            p_i = az.extract(idata)["p"][:, i].values
            if method == "raw":
                weight0 = 1 / (1 - p_i[self.t.flatten() == 0])
                weight1 = 1 / (p_i[self.t.flatten() == 1])
            elif method == "overlap":
                t = self.t.flatten()
                weight1 = (1 - p_i[t == 1]) * t[t == 1]
                weight0 = p_i[t == 0] * (1 - t[t == 0])
            else:
                t = self.t.flatten()
                p_of_t = np.mean(t)
                weight1 = p_of_t / p_i[t == 1]
                weight0 = (1 - p_of_t) / (1 - p_i[t == 0])
            bins = np.arange(0.025, 0.99, 0.005)
            top0, _ = np.histogram(p_i[self.t.flatten() == 0], bins=bins)
            top1, _ = np.histogram(p_i[self.t.flatten() == 1], bins=bins)
            plot_weights(bins, top0, top1, axs[0])
            top0, _ = np.histogram(
                p_i[self.t.flatten() == 0], bins=bins, weights=weight0
            )
            top1, _ = np.histogram(
                p_i[self.t.flatten() == 1], bins=bins, weights=weight1
            )
            plot_weights(bins, top0, top1, axs[0], color="pseudo_population")

        mosaic = """AAAAAA
                    BBBBCC"""

        fig, axs = plt.subplot_mosaic(mosaic, figsize=(20, 13))
        axs = [axs[k] for k in axs]
        axs[0].axvline(
            0.1, linestyle="--", label="Low Extreme Propensity Scores", color="black"
        )
        axs[0].axvline(
            0.9, linestyle="--", label="Hi Extreme Propensity Scores", color="black"
        )
        axs[0].set_title(
            "Weighted and Unweighted Draws from the Posterior \n  Propensity Scores Distribution",
            fontsize=20,
        )
        axs[0].set_ylabel("Counts of Observations")
        axs[0].set_xlabel("Propensity Scores")
        custom_lines = [
            Line2D([0], [0], color="skyblue", lw=2),
            Line2D([0], [0], color="orange", lw=2),
            Line2D([0], [0], color="grey", lw=2),
            Line2D([0], [0], color="black", lw=2, linestyle="--"),
        ]

        axs[0].legend(
            custom_lines,
            ["Treatment PS", "Control PS", "Weighted Pseudo Population", "Extreme PS"],
        )

        [make_hists(idata, i, axs) for i in range(prop_draws)]
        ate_df = pd.DataFrame(
            [self.get_ate(i, idata, method=method) for i in range(ate_draws)],
            columns=["ATE", "Y(1)", "Y(0)"],
        )
        axs[1].hist(
            ate_df["Y(1)"],
            label="E(Y(1))",
            ec="black",
            bins=10,
            alpha=0.6,
            color="skyblue",
        )
        axs[1].hist(
            ate_df["Y(0)"],
            label="E(Y(0))",
            ec="black",
            bins=10,
            alpha=0.6,
            color="orange",
        )
        axs[1].legend()
        axs[1].set_xlabel(self.outcome_variable)
        axs[1].set_title(
            f"The Outcomes \n Under the {method} re-weighting scheme", fontsize=20
        )
        axs[2].hist(
            ate_df["ATE"],
            label="ATE",
            ec="black",
            bins=10,
            color="grey",
            alpha=0.6,
        )
        axs[2].set_xlabel("Difference")
        axs[2].axvline(ate_df["ATE"].mean(), label="E(ATE)")
        axs[2].legend()
        axs[2].set_title("Average Treatment Effect", fontsize=20)

        return fig, axs



[docs]
    def weighted_percentile(
        self, data: np.ndarray, weights: np.ndarray, perc: float
    ) -> float:
        """Compute a weighted percentile of the data.

        Sorts ``data`` and ``weights`` together, builds a weighted empirical
        CDF, and linearly interpolates to find the value at the requested
        percentile.

        Parameters
        ----------
        data : np.ndarray
            One-dimensional array of data values.
        weights : np.ndarray
            Non-negative weights corresponding to each element of ``data``.
        perc : float
            Desired percentile expressed as a fraction in ``[0, 1]``
            (e.g., 0.5 for the median).

        Returns
        -------
        float
            The interpolated data value at the given weighted percentile.

        Raises
        ------
        ValueError
            If ``perc`` is not between 0 and 1.
        """
        if not (0 <= perc <= 1):
            raise ValueError("Percentile must be between 0 and 1.")
        ix = np.argsort(data)
        data = data[ix]  # sort data
        weights = weights[ix]  # sort weights
        cdf = (np.cumsum(weights) - 0.5 * weights) / np.sum(
            weights
        )  # 'like' a CDF function
        return np.interp(perc, cdf, data)



[docs]
    def plot_balance_ecdf(
        self,
        covariate: str,
        idata: az.InferenceData | None = None,
        weighting_scheme: str | None = None,
    ) -> tuple[plt.Figure, list[plt.Axes]]:
        """Plot the empirical CDF of a covariate before and after IPW adjustment.

        Produces a two-panel figure comparing the raw (unweighted) ECDFs of
        the treated and control groups with the reweighted ECDFs.  This serves
        as a visual balance diagnostic: well-balanced covariates should show
        overlapping ECDFs in the right-hand panel.

        Parameters
        ----------
        covariate : str
            Name of the covariate column (must be one of the model's design
            matrix labels) to check for balance.
        idata : az.InferenceData | None, optional
            ArviZ InferenceData containing posterior propensity score samples.
            If ``None``, uses ``self.model.idata``.
        weighting_scheme : str | None, optional
            Weighting scheme to apply.  One of ``'raw'``, ``'robust'``, or
            ``'overlap'``.  If ``None``, falls back to
            ``self.weighting_scheme``.

        Returns
        -------
        tuple[plt.Figure, list[plt.Axes]]
            The matplotlib Figure and a list of two Axes objects (raw ECDF on
            the left, weighted ECDF on the right).
        """
        if idata is None:
            idata = self.model.idata
        if weighting_scheme is None:
            weighting_scheme = self.weighting_scheme

        ps = az.extract(idata)["p"].mean(dim="sample").values
        X = pd.DataFrame(self.X, columns=self.labels)
        X["ps"] = ps
        t = self.t.flatten()
        if weighting_scheme == "raw":
            w1 = 1 / ps[t == 1]
            w0 = 1 / (1 - ps[t == 0])
        elif weighting_scheme == "robust":
            p_of_t = np.mean(t)
            w1 = p_of_t / (ps[t == 1])
            w0 = (1 - p_of_t) / (1 - ps[t == 0])
        else:
            w1 = (1 - ps[t == 1]) * t[t == 1]
            w0 = ps[t == 0] * (1 - t[t == 0])
        fig, axs = plt.subplots(1, 2, figsize=(20, 6))
        raw_trt = [
            self.weighted_percentile(
                X[t == 1][covariate].values, np.ones(len(X[t == 1])), p
            )
            for p in np.linspace(0, 1, 1000)
        ]
        raw_ntrt = [
            self.weighted_percentile(
                X[t == 0][covariate].values, np.ones(len(X[t == 0])), p
            )
            for p in np.linspace(0, 1, 1000)
        ]
        w_trt = [
            self.weighted_percentile(X[t == 1][covariate].values, w1, p)
            for p in np.linspace(0, 1, 1000)
        ]
        w_ntrt = [
            self.weighted_percentile(X[t == 0][covariate].values, w0, p)
            for p in np.linspace(0, 1, 1000)
        ]
        axs[0].plot(
            np.linspace(0, 1, 1000), raw_trt, color="skyblue", label="Raw Treated"
        )
        axs[0].plot(
            np.linspace(0, 1, 1000), raw_ntrt, color="orange", label="Raw Control"
        )
        axs[0].set_title(f"ECDF \n Raw: {covariate}")
        axs[1].set_title(
            f"ECDF \n Weighted {weighting_scheme} adjustment for {covariate}"
        )
        axs[1].plot(
            np.linspace(0, 1, 1000), w_trt, color="skyblue", label="Reweighted Treated"
        )
        axs[1].plot(
            np.linspace(0, 1, 1000),
            w_ntrt,
            color="orange",
            label="Reweighted Control",
        )
        axs[1].set_xlabel("Quantiles")
        axs[0].set_xlabel("Quantiles")
        axs[1].legend()
        axs[0].legend()
        return fig, list(axs)



[docs]
    def effect_summary(
        self,
        *,
        window: Literal["post"] | tuple | slice = "post",
        direction: Literal["increase", "decrease", "two-sided"] = "increase",
        alpha: float = 0.05,
        cumulative: bool = True,
        relative: bool = True,
        min_effect: float | None = None,
        treated_unit: str | None = None,
        period: Literal["intervention", "post", "comparison"] | None = None,
        prefix: str = "Post-period",
        **kwargs: Any,
    ) -> EffectSummary:
        """Generate a decision-ready summary of causal effects.

        .. note::

            This method is not yet implemented for
            ``InversePropensityWeighting`` experiments.  Calling it will raise
            ``NotImplementedError``.

        Parameters
        ----------
        window : Literal["post"] | tuple | slice, optional
            Time window for analysis.  Defaults to ``"post"``.
        direction : ``"increase"`` | ``"decrease"`` | ``"two-sided"``, optional
            Direction for tail probability calculation.  Defaults to
            ``"increase"``.
        alpha : float, optional
            Significance level for HDI/CI intervals.  Defaults to 0.05.
        cumulative : bool, optional
            Whether to include cumulative effect statistics.  Defaults to
            ``True``.
        relative : bool, optional
            Whether to include relative effect statistics.  Defaults to
            ``True``.
        min_effect : float | None, optional
            ROPE threshold for practical equivalence.  Defaults to ``None``.
        treated_unit : str | None, optional
            For multi-unit experiments, the unit to analyse.  Defaults to
            ``None``.
        period : ``"intervention"`` | ``"post"`` | ``"comparison"`` | None, optional
            Period to summarise for multi-period experiments.  Defaults to
            ``None``.
        prefix : str, optional
            Label prefix for prose generation.  Defaults to ``"Post-period"``.
        **kwargs : Any
            Additional keyword arguments (currently unused).

        Raises
        ------
        NotImplementedError
            Always raised; this method is a placeholder for future work.
        """
        raise NotImplementedError(
            "effect_summary is not yet implemented for InversePropensityWeighting experiments."
        )