Source code for causalpy.experiments.inverse_propensity_weighting

#   Copyright 2022 - 2026 The PyMC Labs Developers
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
"""
Inverse propensity weighting
"""

import warnings
from typing import Any, Literal

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.lines import Line2D
from patsy import dmatrices
from sklearn.linear_model import LinearRegression as sk_lin_reg

from causalpy.custom_exceptions import DataException
from causalpy.pymc_models import PropensityScore
from causalpy.reporting import EffectSummary

from .base import BaseExperiment


[docs] class InversePropensityWeighting(BaseExperiment): """A class to analyse inverse propensity weighting experiments. Parameters ---------- data : pd.DataFrame A pandas dataframe. formula : str A statistical model formula for the propensity model. outcome_variable : str A string denoting the outcome variable in data to be reweighted. weighting_scheme : str A string denoting which weighting scheme to use among: 'raw', 'robust', 'doubly_robust' or 'overlap'. See Aronow and Miller "Foundations of Agnostic Statistics" for discussion and computation of these weighting schemes. model : PropensityScore, optional A PyMC model. Defaults to PropensityScore. Example -------- >>> import causalpy as cp >>> df = cp.load_data("nhefs") >>> seed = 42 >>> result = cp.InversePropensityWeighting( ... df, ... formula="trt ~ 1 + age + race", ... outcome_variable="outcome", ... weighting_scheme="robust", ... model=cp.pymc_models.PropensityScore( ... sample_kwargs={ ... "draws": 100, ... "target_accept": 0.95, ... "random_seed": seed, ... "progressbar": False, ... }, ... ), ... ) """ supports_ols = False supports_bayes = True _default_model_class = PropensityScore
[docs] def __init__( self, data: pd.DataFrame, formula: str, outcome_variable: str, weighting_scheme: str, model: PropensityScore | None = None, **kwargs: dict, ) -> None: super().__init__(model=model) self.expt_type = "Inverse Propensity Score Weighting" self.data = data self.formula = formula self.outcome_variable = outcome_variable self.weighting_scheme = weighting_scheme self.input_validation() self._build_design_matrices() self.algorithm()
def _build_design_matrices(self) -> None: """Build design matrices for the propensity score model. Uses the patsy formula stored in ``self.formula`` to construct treatment and covariate matrices from ``self.data``. Also creates an augmented design matrix (``self.X_outcome``) that includes the treatment indicator, for use with doubly-robust outcome modelling. """ t, X = dmatrices(self.formula, self.data) self._t_design_info = t.design_info self._t_design_info = X.design_info self.labels = X.design_info.column_names self.t, self.X = np.asarray(t), np.asarray(X) self.y = self.data[self.outcome_variable] COORDS = {"obs_ind": list(range(self.X.shape[0])), "coeffs": self.labels} self.coords = COORDS self.X_outcome = pd.DataFrame(self.X, columns=self.labels) self.X_outcome["trt"] = self.t self.coords["outcome_coeffs"] = self.X_outcome.columns
[docs] def algorithm(self) -> None: """Run the experiment algorithm by fitting the propensity score model. Delegates to ``self.model.fit`` with the covariate matrix ``self.X``, treatment vector ``self.t``, and coordinate metadata ``self.coords``. """ self.model.fit(X=self.X, t=self.t, coords=self.coords) # type: ignore[call-arg]
[docs] def input_validation(self) -> None: """Validate the input data and model formula for correctness. Checks that both the treatment variable (left-hand side of the formula) and the ``outcome_variable`` exist in ``self.data``, and that the treatment variable has at most two unique values. Note that a constant (single-value) treatment passes this check without error but may cause downstream estimators to fail or produce undefined results. Raises ------ DataException If the treatment or outcome variable is missing from the data, or if the treatment variable has more than two unique values. """ treatment = self.formula.split("~")[0] test = treatment.strip() in self.data.columns test = test & (self.outcome_variable in self.data.columns) if not test: raise DataException( f""" The treatment variable: {treatment} must appear in the data to be used as an outcome variable. And {self.outcome_variable} must also be available in the data to be re-weighted """ ) T = self.data[treatment.strip()] check_binary = len(np.unique(T)) > 2 if check_binary: raise DataException( """Warning. The treatment variable is not 0-1 Binary. """ )
def _prepare_ps(self, ps: np.ndarray, eps: float = 1e-6) -> np.ndarray: """Check for extreme propensity scores and clip them for numerical stability. Parameters ---------- ps : np.ndarray Raw propensity scores for each observation, expected in [0, 1]. eps : float, optional Clipping tolerance. Scores are capped to the interval ``[eps, 1 - eps]``. Defaults to 1e-6. Returns ------- np.ndarray Clipped propensity scores in ``[eps, 1 - eps]``. Warns ----- UserWarning If any scores fall outside ``[eps, 1 - eps]`` before clipping. """ if np.any((ps < eps) | (ps > 1 - eps)): warnings.warn( f"Extreme propensity scores detected (outside [{eps}, {1 - eps}]). " "Capping values to prevent numerical instability.", UserWarning, stacklevel=2, ) return np.clip(ps, eps, 1 - eps)
[docs] def make_robust_adjustments( self, ps: np.ndarray ) -> tuple[pd.Series, pd.Series, int, int]: """Compute inverse-propensity-weighted outcomes using the robust (Horvitz-Thompson) scheme. This estimator is discussed in Aronow and Miller's *Foundations of Agnostic Statistics* as being related to the Horvitz-Thompson method. Parameters ---------- ps : np.ndarray Propensity scores for each observation. Returns ------- tuple[pd.Series, pd.Series, int, int] A tuple of ``(weighted_outcome0, weighted_outcome1, n_ntrt, n_trt)`` where the weighted outcomes are the IPW-adjusted outcome values for the control and treated groups, and ``n_ntrt`` / ``n_trt`` are the corresponding group sizes used for normalisation. """ ps = self._prepare_ps(ps) # <--- Safety Check X = pd.DataFrame(self.X, columns=self.labels) X["ps"] = ps X[self.outcome_variable] = self.y t = self.t.flatten() p_of_t = np.mean(t) X["i_ps"] = np.where(t == 1, (p_of_t / X["ps"]), (1 - p_of_t) / (1 - X["ps"])) n_ntrt = X[t == 0].shape[0] n_trt = X[t == 1].shape[0] outcome_trt = X[t == 1][self.outcome_variable] outcome_ntrt = X[t == 0][self.outcome_variable] i_propensity0 = X[t == 0]["i_ps"] i_propensity1 = X[t == 1]["i_ps"] weighted_outcome1 = outcome_trt * i_propensity1 weighted_outcome0 = outcome_ntrt * i_propensity0 return weighted_outcome0, weighted_outcome1, n_ntrt, n_trt
[docs] def make_raw_adjustments( self, ps: np.ndarray ) -> tuple[pd.Series, pd.Series, int, int]: """Compute inverse-propensity-weighted outcomes using the raw (basic) scheme. This is the simplest form of inverse propensity weighting, as discussed in Aronow and Miller's *Foundations of Agnostic Statistics*. Each observation is weighted by the reciprocal of its propensity score (or ``1 - ps`` for the control group). Parameters ---------- ps : np.ndarray Propensity scores for each observation. Returns ------- tuple[pd.Series, pd.Series, int, int] A tuple of ``(weighted_outcome0, weighted_outcome1, n_ntrt, n_trt)`` where the weighted outcomes are the IPW-adjusted outcome values for the control and treated groups. ``n_ntrt`` and ``n_trt`` are both equal to the total number of observations (the raw scheme normalises by the full sample size). """ ps = self._prepare_ps(ps) # <--- Safety Check X = pd.DataFrame(self.X, columns=self.labels) X["ps"] = ps X[self.outcome_variable] = self.y t = self.t.flatten() X["ps"] = np.where(t, X["ps"], 1 - X["ps"]) X["i_ps"] = 1 / X["ps"] n_ntrt = n_trt = len(X) outcome_trt = X[t == 1][self.outcome_variable] outcome_ntrt = X[t == 0][self.outcome_variable] i_propensity0 = X[t == 0]["i_ps"] i_propensity1 = X[t == 1]["i_ps"] weighted_outcome1 = outcome_trt * i_propensity1 weighted_outcome0 = outcome_ntrt * i_propensity0 return weighted_outcome0, weighted_outcome1, n_ntrt, n_trt
[docs] def make_overlap_adjustments( self, ps: np.ndarray ) -> tuple[pd.Series, pd.Series, pd.Series, pd.Series]: """Compute inverse-propensity-weighted outcomes using the overlap scheme. This weighting scheme was adapted from Lucy D’Agostino McGowan’s blog on propensity score weights, referenced in the primary CausalPy explainer notebook. Overlap weights target the population for which there is clinical equipoise (i.e., where propensity scores are near 0.5), reducing sensitivity to extreme scores. Parameters ---------- ps : np.ndarray Propensity scores for each observation. Returns ------- tuple[pd.Series, pd.Series, pd.Series, pd.Series] A tuple of ``(weighted_outcome0, weighted_outcome1, n_ntrt, n_trt)`` where the weighted outcomes and normalisation terms are all ``pd.Series`` (unlike the raw/robust schemes which return integer counts). """ ps = self._prepare_ps(ps) # <--- Safety Check X = pd.DataFrame(self.X, columns=self.labels) X["ps"] = ps X[self.outcome_variable] = self.y t = self.t.flatten() X["i_ps"] = np.where(t, (1 - X["ps"]) * t, X["ps"] * (1 - t)) n_ntrt = (1 - t[t == 0]) * X[t == 0]["i_ps"] n_trt = t[t == 1] * X[t == 1]["i_ps"] outcome_trt = X[t == 1][self.outcome_variable] outcome_ntrt = X[t == 0][self.outcome_variable] i_propensity0 = X[t == 0]["i_ps"] i_propensity1 = X[t == 1]["i_ps"] weighted_outcome1 = t[t == 1] * outcome_trt * i_propensity1 weighted_outcome0 = (1 - t[t == 0]) * outcome_ntrt * i_propensity0 return weighted_outcome0, weighted_outcome1, n_ntrt, n_trt
[docs] def make_doubly_robust_adjustment( self, ps: np.ndarray ) -> tuple[pd.Series, pd.Series, None, None]: """Compute doubly-robust adjusted outcomes. The doubly-robust weighting scheme is discussed in Aronow and Miller's *Foundations of Agnostic Statistics*. This implementation fixes the outcome model to ordinary least squares (OLS), so the compromise between the outcome model and the propensity model is always performed with a linear regression. Parameters ---------- ps : np.ndarray Propensity scores for each observation. Returns ------- tuple[pd.Series, pd.Series, None, None] A tuple of ``(weighted_outcome0, weighted_outcome1, None, None)``. The two ``None`` values are returned for interface consistency with the other adjustment methods; no explicit group sizes are needed because the doubly-robust estimator averages over all observations. """ ps = self._prepare_ps(ps) # <--- Safety Check X = pd.DataFrame(self.X, columns=self.labels) X["ps"] = ps t = self.t.flatten() m0 = sk_lin_reg().fit(X[t == 0].astype(float), self.y[t == 0]) m1 = sk_lin_reg().fit(X[t == 1].astype(float), self.y[t == 1]) m0_pred = m0.predict(X) m1_pred = m1.predict(X) ## Compromise between outcome and treatment assignment model weighted_outcome0 = (1 - t) * (self.y - m0_pred) / (1 - X["ps"]) + m0_pred weighted_outcome1 = t * (self.y - m1_pred) / X["ps"] + m1_pred return weighted_outcome0, weighted_outcome1, None, None
def _compute_ate_robust( self, ps: np.ndarray ) -> tuple[float | np.floating, float | np.floating, float | np.floating]: """Compute ATE using the robust (Horvitz-Thompson) weighting scheme. Parameters ---------- ps : np.ndarray Propensity scores for each observation. Returns ------- tuple A tuple of (ate, trt, ntrt) where: - ate: Average Treatment Effect - trt: Weighted mean outcome for treated group - ntrt: Weighted mean outcome for non-treated group """ ( weighted_outcome_ntrt, weighted_outcome_trt, n_ntrt, n_trt, ) = self.make_robust_adjustments(ps) ntrt = weighted_outcome_ntrt.sum() / n_ntrt trt = weighted_outcome_trt.sum() / n_trt ate = trt - ntrt return ate, trt, ntrt def _compute_ate_raw( self, ps: np.ndarray ) -> tuple[float | np.floating, float | np.floating, float | np.floating]: """Compute ATE using the raw inverse propensity weighting scheme. Parameters ---------- ps : np.ndarray Propensity scores for each observation. Returns ------- tuple A tuple of (ate, trt, ntrt) where: - ate: Average Treatment Effect - trt: Weighted mean outcome for treated group - ntrt: Weighted mean outcome for non-treated group """ ( weighted_outcome_ntrt, weighted_outcome_trt, n_ntrt, n_trt, ) = self.make_raw_adjustments(ps) ntrt = weighted_outcome_ntrt.sum() / n_ntrt trt = weighted_outcome_trt.sum() / n_trt ate = trt - ntrt return ate, trt, ntrt def _compute_ate_overlap( self, ps: np.ndarray ) -> tuple[float | np.floating, float | np.floating, float | np.floating]: """Compute ATE using the overlap weighting scheme. Parameters ---------- ps : np.ndarray Propensity scores for each observation. Returns ------- tuple A tuple of (ate, trt, ntrt) where: - ate: Average Treatment Effect - trt: Weighted mean outcome for treated group - ntrt: Weighted mean outcome for non-treated group """ ( weighted_outcome_ntrt, weighted_outcome_trt, n_ntrt, n_trt, ) = self.make_overlap_adjustments(ps) # type: ignore[assignment] ntrt = np.sum(weighted_outcome_ntrt) / np.sum(n_ntrt) # type: ignore[arg-type] trt = np.sum(weighted_outcome_trt) / np.sum(n_trt) # type: ignore[arg-type] ate = trt - ntrt return ate, trt, ntrt def _compute_ate_doubly_robust( self, ps: np.ndarray ) -> tuple[float | np.floating, float | np.floating, float | np.floating]: """Compute ATE using the doubly robust weighting scheme. Parameters ---------- ps : np.ndarray Propensity scores for each observation. Returns ------- tuple A tuple of (ate, trt, ntrt) where: - ate: Average Treatment Effect - trt: Weighted mean outcome for treated group - ntrt: Weighted mean outcome for non-treated group """ ( weighted_outcome_ntrt, weighted_outcome_trt, _n_ntrt, _n_trt, ) = self.make_doubly_robust_adjustment(ps) # type: ignore[assignment] trt = np.mean(weighted_outcome_trt) ntrt = np.mean(weighted_outcome_ntrt) ate = trt - ntrt return ate, trt, ntrt
[docs] def get_ate( self, i: int, idata: az.InferenceData, method: str = "doubly_robust" ) -> list[float | np.floating]: """Compute the Average Treatment Effect for a single posterior sample. Post-processes the sample posterior distribution for propensity scores, one sample at a time, using the specified weighting method. Parameters ---------- i : int Index of the posterior sample to process. idata : az.InferenceData ArviZ InferenceData object containing the posterior samples. method : str, optional Weighting scheme to use. One of 'robust', 'raw', 'overlap', or 'doubly_robust'. Defaults to 'doubly_robust'. Returns ------- list[float] A list of [ate, trt, ntrt] where: - ate: Average Treatment Effect - trt: Weighted mean outcome for treated group - ntrt: Weighted mean outcome for non-treated group """ ps = idata["posterior"]["p"].stack(z=("chain", "draw"))[:, i].values ate_methods = { "robust": self._compute_ate_robust, "raw": self._compute_ate_raw, "overlap": self._compute_ate_overlap, "doubly_robust": self._compute_ate_doubly_robust, } compute_fn = ate_methods.get(method, self._compute_ate_doubly_robust) ate, trt, ntrt = compute_fn(ps) return [ate, trt, ntrt]
[docs] def plot_ate( self, idata: az.InferenceData | None = None, method: str | None = None, prop_draws: int = 100, ate_draws: int = 300, ) -> tuple[plt.Figure, list[plt.Axes]]: """Plot the Average Treatment Effect and propensity score distributions. Produces a three-panel figure: * **Top panel** -- Weighted and unweighted histograms of posterior propensity scores for treated and control groups, drawn from ``prop_draws`` posterior samples. The ``"raw"`` method shows unweighted counts; ``"overlap"`` uses overlap weights; all other methods (``"robust"``, ``"doubly_robust"``) share a common IPW-weighted histogram branch. * **Bottom-left panel** -- Histograms of the reweighted potential outcomes E[Y(1)] and E[Y(0)]. * **Bottom-right panel** -- Histogram of the ATE distribution with a vertical line at its posterior mean. Parameters ---------- idata : az.InferenceData | None, optional ArviZ InferenceData with posterior propensity score samples. If ``None``, uses ``self.model.idata``. method : str | None, optional Weighting scheme to apply. One of ``'robust'``, ``'raw'``, ``'overlap'``, or ``'doubly_robust'``. If ``None``, falls back to ``self.weighting_scheme``. prop_draws : int, optional Number of posterior draws used for the propensity score histogram. Defaults to 100. ate_draws : int, optional Number of posterior draws used to compute ATE samples. Defaults to 300. Returns ------- tuple[plt.Figure, list[plt.Axes]] The matplotlib Figure and a list of three Axes objects. """ if idata is None: idata = self.model.idata if method is None: method = self.weighting_scheme def plot_weights(bins, top0, top1, ax, color="population"): colors_dict = { "population": ["orange", "skyblue", 0.6], "pseudo_population": ["grey", "grey", 0.1], } ax.axhline(0, c="gray", linewidth=1) bars0 = ax.bar( bins[:-1] + 0.025, top0, width=0.04, facecolor=colors_dict[color][0], alpha=colors_dict[color][2], ) bars1 = ax.bar( bins[:-1] + 0.025, -top1, width=0.04, facecolor=colors_dict[color][1], alpha=colors_dict[color][2], ) for bars in (bars0, bars1): for bar in bars: bar.set_edgecolor("black") def make_hists(idata, i, axs, method=method): p_i = az.extract(idata)["p"][:, i].values if method == "raw": weight0 = 1 / (1 - p_i[self.t.flatten() == 0]) weight1 = 1 / (p_i[self.t.flatten() == 1]) elif method == "overlap": t = self.t.flatten() weight1 = (1 - p_i[t == 1]) * t[t == 1] weight0 = p_i[t == 0] * (1 - t[t == 0]) else: t = self.t.flatten() p_of_t = np.mean(t) weight1 = p_of_t / p_i[t == 1] weight0 = (1 - p_of_t) / (1 - p_i[t == 0]) bins = np.arange(0.025, 0.99, 0.005) top0, _ = np.histogram(p_i[self.t.flatten() == 0], bins=bins) top1, _ = np.histogram(p_i[self.t.flatten() == 1], bins=bins) plot_weights(bins, top0, top1, axs[0]) top0, _ = np.histogram( p_i[self.t.flatten() == 0], bins=bins, weights=weight0 ) top1, _ = np.histogram( p_i[self.t.flatten() == 1], bins=bins, weights=weight1 ) plot_weights(bins, top0, top1, axs[0], color="pseudo_population") mosaic = """AAAAAA BBBBCC""" fig, axs = plt.subplot_mosaic(mosaic, figsize=(20, 13)) axs = [axs[k] for k in axs] axs[0].axvline( 0.1, linestyle="--", label="Low Extreme Propensity Scores", color="black" ) axs[0].axvline( 0.9, linestyle="--", label="Hi Extreme Propensity Scores", color="black" ) axs[0].set_title( "Weighted and Unweighted Draws from the Posterior \n Propensity Scores Distribution", fontsize=20, ) axs[0].set_ylabel("Counts of Observations") axs[0].set_xlabel("Propensity Scores") custom_lines = [ Line2D([0], [0], color="skyblue", lw=2), Line2D([0], [0], color="orange", lw=2), Line2D([0], [0], color="grey", lw=2), Line2D([0], [0], color="black", lw=2, linestyle="--"), ] axs[0].legend( custom_lines, ["Treatment PS", "Control PS", "Weighted Pseudo Population", "Extreme PS"], ) [make_hists(idata, i, axs) for i in range(prop_draws)] ate_df = pd.DataFrame( [self.get_ate(i, idata, method=method) for i in range(ate_draws)], columns=["ATE", "Y(1)", "Y(0)"], ) axs[1].hist( ate_df["Y(1)"], label="E(Y(1))", ec="black", bins=10, alpha=0.6, color="skyblue", ) axs[1].hist( ate_df["Y(0)"], label="E(Y(0))", ec="black", bins=10, alpha=0.6, color="orange", ) axs[1].legend() axs[1].set_xlabel(self.outcome_variable) axs[1].set_title( f"The Outcomes \n Under the {method} re-weighting scheme", fontsize=20 ) axs[2].hist( ate_df["ATE"], label="ATE", ec="black", bins=10, color="grey", alpha=0.6, ) axs[2].set_xlabel("Difference") axs[2].axvline(ate_df["ATE"].mean(), label="E(ATE)") axs[2].legend() axs[2].set_title("Average Treatment Effect", fontsize=20) return fig, axs
[docs] def weighted_percentile( self, data: np.ndarray, weights: np.ndarray, perc: float ) -> float: """Compute a weighted percentile of the data. Sorts ``data`` and ``weights`` together, builds a weighted empirical CDF, and linearly interpolates to find the value at the requested percentile. Parameters ---------- data : np.ndarray One-dimensional array of data values. weights : np.ndarray Non-negative weights corresponding to each element of ``data``. perc : float Desired percentile expressed as a fraction in ``[0, 1]`` (e.g., 0.5 for the median). Returns ------- float The interpolated data value at the given weighted percentile. Raises ------ ValueError If ``perc`` is not between 0 and 1. """ if not (0 <= perc <= 1): raise ValueError("Percentile must be between 0 and 1.") ix = np.argsort(data) data = data[ix] # sort data weights = weights[ix] # sort weights cdf = (np.cumsum(weights) - 0.5 * weights) / np.sum( weights ) # 'like' a CDF function return np.interp(perc, cdf, data)
[docs] def plot_balance_ecdf( self, covariate: str, idata: az.InferenceData | None = None, weighting_scheme: str | None = None, ) -> tuple[plt.Figure, list[plt.Axes]]: """Plot the empirical CDF of a covariate before and after IPW adjustment. Produces a two-panel figure comparing the raw (unweighted) ECDFs of the treated and control groups with the reweighted ECDFs. This serves as a visual balance diagnostic: well-balanced covariates should show overlapping ECDFs in the right-hand panel. Parameters ---------- covariate : str Name of the covariate column (must be one of the model's design matrix labels) to check for balance. idata : az.InferenceData | None, optional ArviZ InferenceData containing posterior propensity score samples. If ``None``, uses ``self.model.idata``. weighting_scheme : str | None, optional Weighting scheme to apply. One of ``'raw'``, ``'robust'``, or ``'overlap'``. If ``None``, falls back to ``self.weighting_scheme``. Returns ------- tuple[plt.Figure, list[plt.Axes]] The matplotlib Figure and a list of two Axes objects (raw ECDF on the left, weighted ECDF on the right). """ if idata is None: idata = self.model.idata if weighting_scheme is None: weighting_scheme = self.weighting_scheme ps = az.extract(idata)["p"].mean(dim="sample").values X = pd.DataFrame(self.X, columns=self.labels) X["ps"] = ps t = self.t.flatten() if weighting_scheme == "raw": w1 = 1 / ps[t == 1] w0 = 1 / (1 - ps[t == 0]) elif weighting_scheme == "robust": p_of_t = np.mean(t) w1 = p_of_t / (ps[t == 1]) w0 = (1 - p_of_t) / (1 - ps[t == 0]) else: w1 = (1 - ps[t == 1]) * t[t == 1] w0 = ps[t == 0] * (1 - t[t == 0]) fig, axs = plt.subplots(1, 2, figsize=(20, 6)) raw_trt = [ self.weighted_percentile( X[t == 1][covariate].values, np.ones(len(X[t == 1])), p ) for p in np.linspace(0, 1, 1000) ] raw_ntrt = [ self.weighted_percentile( X[t == 0][covariate].values, np.ones(len(X[t == 0])), p ) for p in np.linspace(0, 1, 1000) ] w_trt = [ self.weighted_percentile(X[t == 1][covariate].values, w1, p) for p in np.linspace(0, 1, 1000) ] w_ntrt = [ self.weighted_percentile(X[t == 0][covariate].values, w0, p) for p in np.linspace(0, 1, 1000) ] axs[0].plot( np.linspace(0, 1, 1000), raw_trt, color="skyblue", label="Raw Treated" ) axs[0].plot( np.linspace(0, 1, 1000), raw_ntrt, color="orange", label="Raw Control" ) axs[0].set_title(f"ECDF \n Raw: {covariate}") axs[1].set_title( f"ECDF \n Weighted {weighting_scheme} adjustment for {covariate}" ) axs[1].plot( np.linspace(0, 1, 1000), w_trt, color="skyblue", label="Reweighted Treated" ) axs[1].plot( np.linspace(0, 1, 1000), w_ntrt, color="orange", label="Reweighted Control", ) axs[1].set_xlabel("Quantiles") axs[0].set_xlabel("Quantiles") axs[1].legend() axs[0].legend() return fig, list(axs)
[docs] def effect_summary( self, *, window: Literal["post"] | tuple | slice = "post", direction: Literal["increase", "decrease", "two-sided"] = "increase", alpha: float = 0.05, cumulative: bool = True, relative: bool = True, min_effect: float | None = None, treated_unit: str | None = None, period: Literal["intervention", "post", "comparison"] | None = None, prefix: str = "Post-period", **kwargs: Any, ) -> EffectSummary: """Generate a decision-ready summary of causal effects. .. note:: This method is not yet implemented for ``InversePropensityWeighting`` experiments. Calling it will raise ``NotImplementedError``. Parameters ---------- window : Literal["post"] | tuple | slice, optional Time window for analysis. Defaults to ``"post"``. direction : ``"increase"`` | ``"decrease"`` | ``"two-sided"``, optional Direction for tail probability calculation. Defaults to ``"increase"``. alpha : float, optional Significance level for HDI/CI intervals. Defaults to 0.05. cumulative : bool, optional Whether to include cumulative effect statistics. Defaults to ``True``. relative : bool, optional Whether to include relative effect statistics. Defaults to ``True``. min_effect : float | None, optional ROPE threshold for practical equivalence. Defaults to ``None``. treated_unit : str | None, optional For multi-unit experiments, the unit to analyse. Defaults to ``None``. period : ``"intervention"`` | ``"post"`` | ``"comparison"`` | None, optional Period to summarise for multi-period experiments. Defaults to ``None``. prefix : str, optional Label prefix for prose generation. Defaults to ``"Post-period"``. **kwargs : Any Additional keyword arguments (currently unused). Raises ------ NotImplementedError Always raised; this method is a placeholder for future work. """ raise NotImplementedError( "effect_summary is not yet implemented for InversePropensityWeighting experiments." )