"""
Wild cluster bootstrap for inference with few clusters.
This module implements the wild cluster bootstrap method for more reliable
inference when the number of clusters is small. The method is particularly
useful in difference-in-differences settings where standard cluster-robust
standard errors may perform poorly.
The wild cluster bootstrap is recommended when:
- Number of clusters G < 30
- Cluster sizes are unbalanced
- Few treated clusters
Key features:
- Full enumeration mode for exact p-values when G <= 12
- Optional integration with wildboottest package for optimized computation
- Multiple weight distributions: Rademacher, Mammen, Webb
Notes
-----
The Rademacher weights are the simplest and most commonly used. Mammen weights
match the first three moments of the error distribution. Webb weights provide
better performance with very few clusters (G < 10).
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Literal
import warnings
from itertools import product
import numpy as np
import pandas as pd
from scipy import stats
from scipy.optimize import brentq
import statsmodels.api as sm
import statsmodels.formula.api as smf
# Check if wildboottest package is available.
try:
from wildboottest.wildboottest import wildboottest as _wildboottest
HAS_WILDBOOTTEST = True
except ImportError:
HAS_WILDBOOTTEST = False
_wildboottest = None
[docs]
@dataclass
class WildClusterBootstrapResult:
"""
Result of wild cluster bootstrap inference.
This class contains the results of wild cluster bootstrap,
which provides more reliable inference when the number of
clusters is small.
Attributes
----------
att : float
Point estimate of ATT.
se_bootstrap : float
Bootstrap standard error.
ci_lower : float
Lower bound of bootstrap confidence interval.
ci_upper : float
Upper bound of bootstrap confidence interval.
pvalue : float
Bootstrap p-value (two-sided).
n_clusters : int
Number of clusters.
n_bootstrap : int
Number of bootstrap replications.
weight_type : str
Type of bootstrap weights used.
t_stat_original : float
Original t-statistic.
t_stats_bootstrap : np.ndarray
Bootstrap t-statistics.
rejection_rate : float
Proportion of bootstrap t-stats exceeding original.
ci_method : str
Method used for CI construction ('percentile_t' or 'test_inversion').
"""
att: float
se_bootstrap: float
ci_lower: float
ci_upper: float
pvalue: float
n_clusters: int
n_bootstrap: int
weight_type: str
t_stat_original: float
t_stats_bootstrap: np.ndarray
rejection_rate: float
ci_method: str = 'percentile_t'
[docs]
def summary(self) -> str:
"""
Generate human-readable summary of bootstrap results.
Returns
-------
str
Formatted summary string.
"""
sig = "***" if self.pvalue < 0.01 else "**" if self.pvalue < 0.05 else "*" if self.pvalue < 0.1 else ""
return (
f"Wild Cluster Bootstrap Results\n"
f"{'='*50}\n"
f"ATT: {self.att:.4f} {sig}\n"
f"Bootstrap SE: {self.se_bootstrap:.4f}\n"
f"95% CI: [{self.ci_lower:.4f}, {self.ci_upper:.4f}]\n"
f"P-value: {self.pvalue:.4f}\n"
f"N clusters: {self.n_clusters}\n"
f"N bootstrap: {self.n_bootstrap}\n"
f"Weight type: {self.weight_type}\n"
f"{'='*50}"
)
def _generate_bootstrap_weights(n_clusters: int, weight_type: str) -> np.ndarray:
"""
Generate bootstrap weights for each cluster.
Parameters
----------
n_clusters : int
Number of clusters.
weight_type : str
Type of bootstrap weights:
- 'rademacher': P(w=1) = P(w=-1) = 0.5
- 'mammen': Two-point distribution matching skewness
- 'webb': Six-point distribution (Webb 2014)
Returns
-------
np.ndarray
Array of bootstrap weights, one per cluster.
Notes
-----
Rademacher weights are the simplest and most commonly used.
Mammen weights match the first three moments of the error distribution.
Webb weights provide better performance with very few clusters.
"""
if weight_type == 'rademacher':
# P(w=1) = P(w=-1) = 0.5
# E[w] = 0, E[w²] = 1
return np.random.choice([-1, 1], size=n_clusters)
elif weight_type == 'mammen':
# Two-point distribution matching first three moments:
# P(w = -(√5-1)/2) = (√5+1)/(2√5)
# P(w = (√5+1)/2) = (√5-1)/(2√5)
# E[w] = 0, E[w²] = 1, E[w³] = 1
sqrt5 = np.sqrt(5)
p = (sqrt5 + 1) / (2 * sqrt5)
w1 = -(sqrt5 - 1) / 2 # ≈ -0.618
w2 = (sqrt5 + 1) / 2 # ≈ 1.618
return np.where(np.random.random(n_clusters) < p, w1, w2)
elif weight_type == 'webb':
# Six-point distribution (Webb 2014)
# Values: ±√(1/2), ±√(2/2), ±√(3/2)
# Each with probability 1/6
# E[w] = 0, E[w²] = 1
values = np.array([
-np.sqrt(3/2), -np.sqrt(2/2), -np.sqrt(1/2),
np.sqrt(1/2), np.sqrt(2/2), np.sqrt(3/2)
])
return np.random.choice(values, size=n_clusters)
else:
raise ValueError(
f"Unknown weight_type: {weight_type}. "
f"Must be one of: 'rademacher', 'mammen', 'webb'"
)
def _generate_all_rademacher_weights(n_clusters: int) -> np.ndarray:
"""
Generate all possible Rademacher weight combinations for full enumeration.
For G clusters, there are 2^G possible combinations of {-1, +1} weights.
Full enumeration eliminates randomness and produces deterministic results
with exact p-values.
Parameters
----------
n_clusters : int
Number of clusters.
Returns
-------
np.ndarray
Array of shape (2^G, G) containing all weight combinations.
Notes
-----
This is only practical for G <= 12 (4096 combinations).
For G > 12, use random sampling instead.
"""
# Generate all {-1, +1}^G combinations.
return np.array(list(product([-1, 1], repeat=n_clusters)))
def _estimate_ols_for_bootstrap(
data: pd.DataFrame,
y_var: str,
d_var: str,
cluster_var: str,
controls: list[str] | None = None,
) -> dict:
"""
Estimate OLS regression for bootstrap procedure.
Parameters
----------
data : pd.DataFrame
Regression data.
y_var : str
Outcome variable name.
d_var : str
Treatment indicator variable name.
cluster_var : str
Clustering variable name.
controls : list[str], optional
Control variable names.
Returns
-------
dict
Dictionary containing:
- att: Treatment effect estimate
- se: Cluster-robust standard error
- residuals: OLS residuals
- fitted: Fitted values
- X: Design matrix
- beta: All coefficient estimates
"""
import statsmodels.api as sm
# Construct design matrix.
y = data[y_var].values
X_vars = [d_var]
if controls:
X_vars.extend(controls)
X = data[X_vars].values
X = sm.add_constant(X)
# OLS estimation.
model = sm.OLS(y, X)
# Cluster-robust standard errors.
cluster_ids = data[cluster_var].values
results = model.fit(cov_type='cluster', cov_kwds={'groups': cluster_ids})
# Extract results.
att = results.params[1] # Treatment effect coefficient.
se = results.bse[1]
residuals = results.resid
fitted = results.fittedvalues
return {
'att': att,
'se': se,
'residuals': residuals,
'fitted': fitted,
'X': X,
'beta': results.params
}
[docs]
def wild_cluster_bootstrap(
data: pd.DataFrame,
y_transformed: str,
d: str,
cluster_var: str,
controls: list[str] | None = None,
n_bootstrap: int = 999,
weight_type: str = 'rademacher',
alpha: float = 0.05,
seed: int | None = None,
impose_null: bool = True,
full_enumeration: bool | None = None,
use_wildboottest: bool = False,
) -> WildClusterBootstrapResult:
"""
Perform wild cluster bootstrap for inference with few clusters.
Implements the wild cluster bootstrap of Cameron, Gelbach, and Miller (2008).
This method provides more reliable inference when the number of clusters
is small (< 20-30).
**Algorithm:**
1. Estimate original model and obtain residuals
2. For each bootstrap replication: (a) Generate cluster-level weights
(Rademacher/Mammen/Webb); (b) Construct bootstrap residuals u*_ic = w_c * u_ic;
(c) Construct bootstrap outcome y*_ic = X_ic * beta_hat + u*_ic (if impose_null)
or y*_ic = y_hat_ic + u*_ic (if not impose_null); (d) Re-estimate model and
compute t-statistic
3. Compute bootstrap p-value and confidence interval
Parameters
----------
data : pd.DataFrame
Regression data.
y_transformed : str
Transformed outcome variable (e.g., after within-transformation).
d : str
Treatment indicator variable.
cluster_var : str
Clustering variable.
controls : list[str], optional
Control variables.
n_bootstrap : int, default 999
Number of bootstrap replications. Should be odd for symmetric
p-value calculation.
weight_type : str, default 'rademacher'
Bootstrap weight distribution:
- 'rademacher': P(w=1) = P(w=-1) = 0.5 (simplest, most common)
- 'mammen': Two-point distribution matching skewness
- 'webb': Six-point distribution (best for very few clusters)
alpha : float, default 0.05
Significance level for confidence interval.
seed : int, optional
Random seed for reproducibility.
impose_null : bool, default True
Whether to impose null hypothesis (H0: τ = 0) when constructing
bootstrap samples. Recommended for hypothesis testing.
full_enumeration : bool, optional
Whether to use full enumeration of all 2^G Rademacher weight
combinations instead of random sampling. If None (default),
automatically enabled when G <= 12 and weight_type='rademacher'.
Full enumeration produces deterministic results and is the most
faithful implementation of the algorithm principle, as it computes
the exact p-value without Monte Carlo error.
use_wildboottest : bool, default False
Whether to use the wildboottest package for fast algorithm.
Requires wildboottest to be installed. When True, uses the
optimized implementation that matches Stata boottest exactly.
Note: wildboottest uses slightly different boundary handling,
resulting in p-values about 0.002 lower than the algorithm
principle definition.
Returns
-------
WildClusterBootstrapResult
Bootstrap results containing point estimate, standard error,
confidence interval, p-value, and diagnostic information.
Notes
-----
The wild cluster bootstrap is particularly useful when:
- Number of clusters G < 30
- Cluster sizes are unbalanced
- Few treated clusters
The method works by:
1. Estimating the original model to get residuals
2. Generating cluster-level random weights
3. Creating bootstrap residuals by multiplying original residuals by weights
4. Re-estimating the model with bootstrap outcomes
5. Computing the bootstrap distribution of t-statistics
When impose_null=True (recommended for hypothesis testing), the bootstrap
outcome is constructed under the null hypothesis that the treatment effect
is zero. This provides better size control.
See Also
--------
diagnose_clustering : Diagnose clustering structure.
recommend_clustering_level : Get recommendation for clustering level.
"""
# Set random seed.
if seed is not None:
np.random.seed(seed)
# Validate inputs.
if y_transformed not in data.columns:
raise ValueError(f"Outcome variable '{y_transformed}' not found in data")
if d not in data.columns:
raise ValueError(f"Treatment variable '{d}' not found in data")
if cluster_var not in data.columns:
raise ValueError(f"Cluster variable '{cluster_var}' not found in data")
if weight_type not in ['rademacher', 'mammen', 'webb']:
raise ValueError(
f"Unknown weight_type: {weight_type}. "
f"Must be one of: 'rademacher', 'mammen', 'webb'"
)
# Get number of clusters.
G = data[cluster_var].nunique()
# Determine whether to use full enumeration.
if full_enumeration is None:
# Auto-enable when G <= 12 and using Rademacher weights.
full_enumeration = (G <= 12 and weight_type == 'rademacher')
# If wildboottest package is requested.
if use_wildboottest:
if not HAS_WILDBOOTTEST:
raise ImportError(
"wildboottest package is not installed. "
"Install it with: pip install wildboottest"
)
return _wild_cluster_bootstrap_wildboottest(
data=data,
y_transformed=y_transformed,
d=d,
cluster_var=cluster_var,
controls=controls,
n_bootstrap=n_bootstrap,
weight_type=weight_type,
alpha=alpha,
seed=seed,
impose_null=impose_null,
full_enumeration=full_enumeration,
)
# Step 1: Estimate original model.
original_result = _estimate_ols_for_bootstrap(
data, y_transformed, d, cluster_var, controls
)
att_original = original_result['att']
se_original = original_result['se']
# Handle degenerate cases where SE is zero or NaN.
if se_original == 0 or np.isnan(se_original):
# Return degenerate result.
return WildClusterBootstrapResult(
att=att_original,
se_bootstrap=np.nan,
ci_lower=np.nan,
ci_upper=np.nan,
pvalue=np.nan,
n_clusters=data[cluster_var].nunique(),
n_bootstrap=n_bootstrap,
weight_type=weight_type,
t_stat_original=np.nan,
t_stats_bootstrap=np.array([]),
rejection_rate=np.nan
)
t_stat_original = att_original / se_original
residuals = original_result['residuals']
fitted = original_result['fitted']
X = original_result['X']
cluster_ids = data[cluster_var].values
unique_clusters = np.unique(cluster_ids)
G = len(unique_clusters)
# Create cluster-to-index mapping.
cluster_to_idx = {c: i for i, c in enumerate(unique_clusters)}
obs_cluster_idx = np.array([cluster_to_idx[c] for c in cluster_ids])
# When impose_null=True, re-estimate under H0 to obtain restricted residuals.
if impose_null:
# Restricted model: y = alpha + epsilon (intercept only, no treatment).
y_values = data[y_transformed].values
X_restricted = np.ones((len(data), 1))
model_restricted = sm.OLS(y_values, X_restricted)
results_restricted = model_restricted.fit()
fitted_restricted = results_restricted.fittedvalues
residuals_restricted = results_restricted.resid
# Step 2: Bootstrap loop.
# Determine whether to use full enumeration or random sampling.
if full_enumeration and weight_type == 'rademacher':
# Full enumeration: generate all 2^G Rademacher weight combinations.
all_weights = _generate_all_rademacher_weights(G)
actual_n_bootstrap = len(all_weights)
use_full_enum = True
else:
actual_n_bootstrap = n_bootstrap
use_full_enum = False
t_stats_bootstrap = np.zeros(actual_n_bootstrap)
att_bootstrap = np.zeros(actual_n_bootstrap)
for b in range(actual_n_bootstrap):
# Generate cluster-level weights.
if use_full_enum:
weights = all_weights[b]
else:
weights = _generate_bootstrap_weights(G, weight_type)
# Map weights to observations.
obs_weights = weights[obs_cluster_idx]
# Construct bootstrap residuals and outcome variable.
if impose_null:
# Use restricted model residuals under the null hypothesis.
# y* = alpha_hat_r + w * epsilon_hat_r
u_star = obs_weights * residuals_restricted
y_star = fitted_restricted + u_star
else:
# When not imposing null, use unrestricted model residuals.
u_star = obs_weights * residuals
y_star = fitted + u_star
# Create bootstrap data.
data_boot = data.copy()
data_boot[y_transformed] = y_star
# Re-estimate and compute t-statistic.
try:
boot_result = _estimate_ols_for_bootstrap(
data_boot, y_transformed, d, cluster_var, controls
)
if boot_result['se'] > 0 and not np.isnan(boot_result['se']):
t_stats_bootstrap[b] = boot_result['att'] / boot_result['se']
att_bootstrap[b] = boot_result['att']
else:
t_stats_bootstrap[b] = np.nan
att_bootstrap[b] = np.nan
except Exception:
t_stats_bootstrap[b] = np.nan
att_bootstrap[b] = np.nan
# Remove NaN values.
valid_mask = ~np.isnan(t_stats_bootstrap)
t_stats_valid = t_stats_bootstrap[valid_mask]
att_valid = att_bootstrap[valid_mask]
if len(t_stats_valid) == 0:
# All bootstrap replications failed.
return WildClusterBootstrapResult(
att=att_original,
se_bootstrap=np.nan,
ci_lower=np.nan,
ci_upper=np.nan,
pvalue=np.nan,
n_clusters=G,
n_bootstrap=n_bootstrap,
weight_type=weight_type,
t_stat_original=t_stat_original,
t_stats_bootstrap=t_stats_bootstrap,
rejection_rate=np.nan
)
# Step 3: Compute bootstrap p-value and confidence interval.
#
# The p-value is computed as: p = P(|t*| >= |t_orig| | H0)
#
# Using >= instead of > ensures that:
# 1. In full enumeration, the all +1 weights combination produces t* = t_orig.
# 2. This combination is a valid realization under the null and should be included.
# 3. Excluding any valid combination would bias the p-value estimate.
pvalue = np.mean(np.abs(t_stats_valid) >= np.abs(t_stat_original))
# Bootstrap SE (standard deviation of bootstrap estimates).
se_bootstrap = np.std(att_valid)
# Confidence interval construction.
# Use symmetric CI based on |t*| distribution:
# CI = [att - t*_crit * se, att + t*_crit * se]
# where t*_crit is the (1-alpha) quantile of |t*|.
if impose_null:
# Symmetric CI based on (1-alpha) quantile of |t*|.
t_abs_crit = np.percentile(np.abs(t_stats_valid), 100 * (1 - alpha))
ci_lower = att_original - t_abs_crit * se_original
ci_upper = att_original + t_abs_crit * se_original
else:
# When not imposing null, use percentile CI.
ci_lower = np.percentile(att_valid, 100 * alpha / 2)
ci_upper = np.percentile(att_valid, 100 * (1 - alpha / 2))
return WildClusterBootstrapResult(
att=att_original,
se_bootstrap=se_bootstrap,
ci_lower=ci_lower,
ci_upper=ci_upper,
pvalue=pvalue,
n_clusters=G,
n_bootstrap=actual_n_bootstrap,
weight_type=weight_type,
t_stat_original=t_stat_original,
t_stats_bootstrap=t_stats_bootstrap,
rejection_rate=pvalue,
ci_method='percentile_t'
)
def _wild_cluster_bootstrap_wildboottest(
data: pd.DataFrame,
y_transformed: str,
d: str,
cluster_var: str,
controls: list[str] | None = None,
n_bootstrap: int = 999,
weight_type: str = 'rademacher',
alpha: float = 0.05,
seed: int | None = None,
impose_null: bool = True,
full_enumeration: bool = False,
) -> WildClusterBootstrapResult:
"""
Perform wild cluster bootstrap using the wildboottest package.
This function wraps the wildboottest package to provide an optimized
implementation of the wild cluster bootstrap algorithm.
Parameters
----------
data : pd.DataFrame
Regression data.
y_transformed : str
Outcome variable name.
d : str
Treatment variable name.
cluster_var : str
Clustering variable name.
controls : list[str], optional
Control variables.
n_bootstrap : int, default 999
Number of bootstrap replications (ignored when using full enumeration).
weight_type : str, default 'rademacher'
Type of bootstrap weights.
alpha : float, default 0.05
Significance level.
seed : int, optional
Random seed for reproducibility.
impose_null : bool, default True
Whether to impose the null hypothesis.
full_enumeration : bool, default False
Whether to use full enumeration.
Returns
-------
WildClusterBootstrapResult
Bootstrap results.
"""
if not HAS_WILDBOOTTEST:
raise ImportError("wildboottest package is not installed")
# Construct formula.
if controls:
formula = f"{y_transformed} ~ {d} + {' + '.join(controls)}"
else:
formula = f"{y_transformed} ~ {d}"
# Create OLS model using statsmodels.
# Note: wildboottest requires an unfitted model object.
model = smf.ols(formula, data=data)
# Get number of clusters.
G = data[cluster_var].nunique()
# Determine bootstrap type and number of replications.
if full_enumeration and weight_type == 'rademacher':
# Full enumeration: set B > 2^G to trigger automatic full enumeration.
actual_n_bootstrap = 2 ** G
B_param = actual_n_bootstrap * 2 # Set > 2^G to trigger full enumeration.
else:
actual_n_bootstrap = n_bootstrap
B_param = n_bootstrap
# Map weight type.
weight_map = {
'rademacher': 'rademacher',
'mammen': 'mammen',
'webb': 'webb'
}
# Call wildboottest.
boot_result = _wildboottest(
model=model,
cluster=data[cluster_var],
param=d,
B=B_param,
weights_type=weight_map[weight_type],
impose_null=impose_null,
bootstrap_type='11', # WCR11 is the default type.
seed=seed,
)
# Fit model to obtain coefficients.
results = model.fit()
# Extract results - wildboottest returns a DataFrame.
att = results.params[d]
se_original = results.bse[d]
# Extract p-value and t-statistic from DataFrame.
if isinstance(boot_result, pd.DataFrame):
pvalue = boot_result['p-value'].values[0]
t_stat = boot_result['statistic'].values[0]
else:
# Fallback for older versions.
t_stat = boot_result.t_stat if hasattr(boot_result, 't_stat') else att / se_original
pvalue = boot_result.pvalue if hasattr(boot_result, 'pvalue') else np.nan
# Confidence interval using t-distribution approximation.
from scipy import stats as scipy_stats
t_crit = scipy_stats.t.ppf(1 - alpha/2, G - 1)
ci_lower = att - t_crit * se_original
ci_upper = att + t_crit * se_original
# Bootstrap SE.
se_bootstrap = se_original
return WildClusterBootstrapResult(
att=att,
se_bootstrap=se_bootstrap,
ci_lower=ci_lower,
ci_upper=ci_upper,
pvalue=pvalue,
n_clusters=G,
n_bootstrap=actual_n_bootstrap,
weight_type=weight_type,
t_stat_original=t_stat,
t_stats_bootstrap=np.array([]), # wildboottest does not return full distribution.
rejection_rate=pvalue,
ci_method='wildboottest'
)
def _compute_bootstrap_pvalue_at_null(
data: pd.DataFrame,
y_var: str,
d_var: str,
cluster_var: str,
null_value: float,
att_original: float,
se_original: float,
n_bootstrap: int,
weight_type: str,
controls: list[str] | None = None,
seed: int | None = None,
) -> float:
"""
Compute bootstrap p-value at a given null hypothesis value.
Internal function used for test inversion confidence intervals.
Parameters
----------
data : pd.DataFrame
Regression data.
y_var : str
Outcome variable name.
d_var : str
Treatment variable name.
cluster_var : str
Clustering variable name.
null_value : float
Null hypothesis value theta (H0: beta = theta).
att_original : float
Original ATT estimate.
se_original : float
Original cluster-robust standard error.
n_bootstrap : int
Number of bootstrap replications.
weight_type : str
Type of bootstrap weights.
controls : list[str], optional
Control variables.
seed : int, optional
Random seed for reproducibility.
Returns
-------
float
Bootstrap p-value.
"""
if seed is not None:
np.random.seed(seed)
# Original t-statistic (relative to null_value).
t_original = (att_original - null_value) / se_original
# Cluster information.
cluster_ids = data[cluster_var].values
unique_clusters = np.unique(cluster_ids)
G = len(unique_clusters)
cluster_to_idx = {c: i for i, c in enumerate(unique_clusters)}
obs_cluster_idx = np.array([cluster_to_idx[c] for c in cluster_ids])
# Restricted model: y - theta*d = alpha + epsilon.
y_restricted = data[y_var].values - null_value * data[d_var].values
X_r = np.ones((len(data), 1))
model_r = sm.OLS(y_restricted, X_r)
results_r = model_r.fit()
fitted_r = results_r.fittedvalues
residuals_r = results_r.resid
t_stats = []
for b in range(n_bootstrap):
# Generate weights.
weights = _generate_bootstrap_weights(G, weight_type)
obs_weights = weights[obs_cluster_idx]
# Bootstrap sample: y* = alpha_hat + theta*d + w*epsilon_hat.
u_star = obs_weights * residuals_r
y_star = fitted_r + null_value * data[d_var].values + u_star
# Re-estimate.
data_boot = data.copy()
data_boot[y_var] = y_star
try:
boot_result = _estimate_ols_for_bootstrap(
data_boot, y_var, d_var, cluster_var, controls
)
if boot_result['se'] > 0 and not np.isnan(boot_result['se']):
# t* = (beta* - theta) / se*
t_stats.append((boot_result['att'] - null_value) / boot_result['se'])
except Exception:
pass
if len(t_stats) == 0:
return np.nan
t_stats = np.array(t_stats)
# Two-sided p-value.
pvalue = np.mean(np.abs(t_stats) >= np.abs(t_original))
return pvalue
[docs]
def wild_cluster_bootstrap_test_inversion(
data: pd.DataFrame,
y_transformed: str,
d: str,
cluster_var: str,
controls: list[str] | None = None,
n_bootstrap: int = 999,
weight_type: str = 'rademacher',
alpha: float = 0.05,
seed: int | None = None,
grid_points: int = 25,
ci_tol: float = 0.01,
) -> WildClusterBootstrapResult:
"""
Compute wild cluster bootstrap confidence interval using test inversion.
This method constructs confidence intervals by inverting hypothesis tests:
CI = {theta : p(theta) >= alpha}
That is, the CI consists of all null hypothesis values for which the
bootstrap p-value exceeds the significance level.
Parameters
----------
data : pd.DataFrame
Regression data.
y_transformed : str
Transformed outcome variable.
d : str
Treatment variable.
cluster_var : str
Clustering variable.
controls : list[str], optional
Control variables.
n_bootstrap : int, default 999
Number of bootstrap replications.
weight_type : str, default 'rademacher'
Type of bootstrap weights.
alpha : float, default 0.05
Significance level.
seed : int, optional
Random seed for reproducibility.
grid_points : int, default 25
Number of grid points for initial search.
ci_tol : float, default 0.01
Tolerance for CI boundary precision.
Returns
-------
WildClusterBootstrapResult
Results containing test inversion CI.
Notes
-----
Advantages of test inversion CI:
1. Can be more accurate than percentile-t CI in some settings.
2. Handles asymmetric distributions appropriately.
Disadvantages:
1. More computationally intensive (requires multiple bootstrap runs).
2. Requires numerical optimization to find boundaries.
"""
# Set random seed.
rng_state = np.random.get_state() if seed is None else None
if seed is not None:
np.random.seed(seed)
# Validate inputs.
if y_transformed not in data.columns:
raise ValueError(f"Outcome variable '{y_transformed}' not found in data")
if d not in data.columns:
raise ValueError(f"Treatment variable '{d}' not found in data")
if cluster_var not in data.columns:
raise ValueError(f"Cluster variable '{cluster_var}' not found in data")
# Step 1: Estimate original model.
original_result = _estimate_ols_for_bootstrap(
data, y_transformed, d, cluster_var, controls
)
att_original = original_result['att']
se_original = original_result['se']
if se_original == 0 or np.isnan(se_original):
return WildClusterBootstrapResult(
att=att_original,
se_bootstrap=np.nan,
ci_lower=np.nan,
ci_upper=np.nan,
pvalue=np.nan,
n_clusters=data[cluster_var].nunique(),
n_bootstrap=n_bootstrap,
weight_type=weight_type,
t_stat_original=np.nan,
t_stats_bootstrap=np.array([]),
rejection_rate=np.nan,
ci_method='test_inversion'
)
t_stat_original = att_original / se_original
G = data[cluster_var].nunique()
# Step 2: Compute p-value at theta=0 (for reporting).
pvalue_at_zero = _compute_bootstrap_pvalue_at_null(
data, y_transformed, d, cluster_var,
null_value=0,
att_original=att_original,
se_original=se_original,
n_bootstrap=n_bootstrap,
weight_type=weight_type,
controls=controls,
seed=seed
)
# Step 3: Find CI boundaries using test inversion.
# First perform coarse grid search.
# Search range: ATT +/- 4 * SE.
search_range = 4 * se_original
theta_min = att_original - search_range
theta_max = att_original + search_range
theta_grid = np.linspace(theta_min, theta_max, grid_points)
pvalues_grid = []
for theta in theta_grid:
pval = _compute_bootstrap_pvalue_at_null(
data, y_transformed, d, cluster_var,
null_value=theta,
att_original=att_original,
se_original=se_original,
n_bootstrap=n_bootstrap,
weight_type=weight_type,
controls=controls,
seed=seed
)
pvalues_grid.append(pval)
pvalues_grid = np.array(pvalues_grid)
# Find region where p >= alpha.
ci_mask = pvalues_grid >= alpha
if not np.any(ci_mask):
# No region found with p >= alpha, use percentile-t as fallback.
ci_lower = att_original - 1.96 * se_original
ci_upper = att_original + 1.96 * se_original
else:
# Coarse boundaries.
ci_thetas = theta_grid[ci_mask]
ci_lower_approx = ci_thetas.min()
ci_upper_approx = ci_thetas.max()
# Use bisection to precisely locate boundaries.
def pvalue_minus_alpha(theta):
pval = _compute_bootstrap_pvalue_at_null(
data, y_transformed, d, cluster_var,
null_value=theta,
att_original=att_original,
se_original=se_original,
n_bootstrap=n_bootstrap,
weight_type=weight_type,
controls=controls,
seed=seed
)
return pval - alpha
# Find lower bound.
try:
# Search for p < alpha point to the left of ATT.
lower_search_min = theta_min
lower_search_max = att_original
ci_lower = brentq(pvalue_minus_alpha, lower_search_min, lower_search_max, xtol=ci_tol)
except ValueError:
ci_lower = ci_lower_approx
# Find upper bound.
try:
# Search for p < alpha point to the right of ATT.
upper_search_min = att_original
upper_search_max = theta_max
ci_upper = brentq(pvalue_minus_alpha, upper_search_min, upper_search_max, xtol=ci_tol)
except ValueError:
ci_upper = ci_upper_approx
# Compute bootstrap SE (simplified using CI width estimate).
se_bootstrap = (ci_upper - ci_lower) / (2 * 1.96)
# Restore random state.
if rng_state is not None:
np.random.set_state(rng_state)
return WildClusterBootstrapResult(
att=att_original,
se_bootstrap=se_bootstrap,
ci_lower=ci_lower,
ci_upper=ci_upper,
pvalue=pvalue_at_zero,
n_clusters=G,
n_bootstrap=n_bootstrap,
weight_type=weight_type,
t_stat_original=t_stat_original,
t_stats_bootstrap=np.array([]), # Test inversion does not save t distribution.
rejection_rate=pvalue_at_zero,
ci_method='test_inversion'
)