Source code for lwdid.inference.wild_bootstrap

"""
Wild cluster bootstrap for inference with few clusters.

This module implements the wild cluster bootstrap method for more reliable
inference when the number of clusters is small. The method is particularly
useful in difference-in-differences settings where standard cluster-robust
standard errors may perform poorly.

The wild cluster bootstrap is recommended when:

- Number of clusters G < 30
- Cluster sizes are unbalanced
- Few treated clusters

Key features:

- Full enumeration mode for exact p-values when G <= 12
- Optional integration with wildboottest package for optimized computation
- Multiple weight distributions: Rademacher, Mammen, Webb

Notes
-----
The Rademacher weights are the simplest and most commonly used. Mammen weights
match the first three moments of the error distribution. Webb weights provide
better performance with very few clusters (G < 10).
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Literal
import warnings
from itertools import product

import numpy as np
import pandas as pd
from scipy import stats
from scipy.optimize import brentq
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Check if wildboottest package is available.
try:
    from wildboottest.wildboottest import wildboottest as _wildboottest
    HAS_WILDBOOTTEST = True
except ImportError:
    HAS_WILDBOOTTEST = False
    _wildboottest = None


[docs] @dataclass class WildClusterBootstrapResult: """ Result of wild cluster bootstrap inference. This class contains the results of wild cluster bootstrap, which provides more reliable inference when the number of clusters is small. Attributes ---------- att : float Point estimate of ATT. se_bootstrap : float Bootstrap standard error. ci_lower : float Lower bound of bootstrap confidence interval. ci_upper : float Upper bound of bootstrap confidence interval. pvalue : float Bootstrap p-value (two-sided). n_clusters : int Number of clusters. n_bootstrap : int Number of bootstrap replications. weight_type : str Type of bootstrap weights used. t_stat_original : float Original t-statistic. t_stats_bootstrap : np.ndarray Bootstrap t-statistics. rejection_rate : float Proportion of bootstrap t-stats exceeding original. ci_method : str Method used for CI construction ('percentile_t' or 'test_inversion'). """ att: float se_bootstrap: float ci_lower: float ci_upper: float pvalue: float n_clusters: int n_bootstrap: int weight_type: str t_stat_original: float t_stats_bootstrap: np.ndarray rejection_rate: float ci_method: str = 'percentile_t'
[docs] def summary(self) -> str: """ Generate human-readable summary of bootstrap results. Returns ------- str Formatted summary string. """ sig = "***" if self.pvalue < 0.01 else "**" if self.pvalue < 0.05 else "*" if self.pvalue < 0.1 else "" return ( f"Wild Cluster Bootstrap Results\n" f"{'='*50}\n" f"ATT: {self.att:.4f} {sig}\n" f"Bootstrap SE: {self.se_bootstrap:.4f}\n" f"95% CI: [{self.ci_lower:.4f}, {self.ci_upper:.4f}]\n" f"P-value: {self.pvalue:.4f}\n" f"N clusters: {self.n_clusters}\n" f"N bootstrap: {self.n_bootstrap}\n" f"Weight type: {self.weight_type}\n" f"{'='*50}" )
def _generate_bootstrap_weights(n_clusters: int, weight_type: str) -> np.ndarray: """ Generate bootstrap weights for each cluster. Parameters ---------- n_clusters : int Number of clusters. weight_type : str Type of bootstrap weights: - 'rademacher': P(w=1) = P(w=-1) = 0.5 - 'mammen': Two-point distribution matching skewness - 'webb': Six-point distribution (Webb 2014) Returns ------- np.ndarray Array of bootstrap weights, one per cluster. Notes ----- Rademacher weights are the simplest and most commonly used. Mammen weights match the first three moments of the error distribution. Webb weights provide better performance with very few clusters. """ if weight_type == 'rademacher': # P(w=1) = P(w=-1) = 0.5 # E[w] = 0, E[w²] = 1 return np.random.choice([-1, 1], size=n_clusters) elif weight_type == 'mammen': # Two-point distribution matching first three moments: # P(w = -(√5-1)/2) = (√5+1)/(2√5) # P(w = (√5+1)/2) = (√5-1)/(2√5) # E[w] = 0, E[w²] = 1, E[w³] = 1 sqrt5 = np.sqrt(5) p = (sqrt5 + 1) / (2 * sqrt5) w1 = -(sqrt5 - 1) / 2 # ≈ -0.618 w2 = (sqrt5 + 1) / 2 # ≈ 1.618 return np.where(np.random.random(n_clusters) < p, w1, w2) elif weight_type == 'webb': # Six-point distribution (Webb 2014) # Values: ±√(1/2), ±√(2/2), ±√(3/2) # Each with probability 1/6 # E[w] = 0, E[w²] = 1 values = np.array([ -np.sqrt(3/2), -np.sqrt(2/2), -np.sqrt(1/2), np.sqrt(1/2), np.sqrt(2/2), np.sqrt(3/2) ]) return np.random.choice(values, size=n_clusters) else: raise ValueError( f"Unknown weight_type: {weight_type}. " f"Must be one of: 'rademacher', 'mammen', 'webb'" ) def _generate_all_rademacher_weights(n_clusters: int) -> np.ndarray: """ Generate all possible Rademacher weight combinations for full enumeration. For G clusters, there are 2^G possible combinations of {-1, +1} weights. Full enumeration eliminates randomness and produces deterministic results with exact p-values. Parameters ---------- n_clusters : int Number of clusters. Returns ------- np.ndarray Array of shape (2^G, G) containing all weight combinations. Notes ----- This is only practical for G <= 12 (4096 combinations). For G > 12, use random sampling instead. """ # Generate all {-1, +1}^G combinations. return np.array(list(product([-1, 1], repeat=n_clusters))) def _estimate_ols_for_bootstrap( data: pd.DataFrame, y_var: str, d_var: str, cluster_var: str, controls: list[str] | None = None, ) -> dict: """ Estimate OLS regression for bootstrap procedure. Parameters ---------- data : pd.DataFrame Regression data. y_var : str Outcome variable name. d_var : str Treatment indicator variable name. cluster_var : str Clustering variable name. controls : list[str], optional Control variable names. Returns ------- dict Dictionary containing: - att: Treatment effect estimate - se: Cluster-robust standard error - residuals: OLS residuals - fitted: Fitted values - X: Design matrix - beta: All coefficient estimates """ import statsmodels.api as sm # Construct design matrix. y = data[y_var].values X_vars = [d_var] if controls: X_vars.extend(controls) X = data[X_vars].values X = sm.add_constant(X) # OLS estimation. model = sm.OLS(y, X) # Cluster-robust standard errors. cluster_ids = data[cluster_var].values results = model.fit(cov_type='cluster', cov_kwds={'groups': cluster_ids}) # Extract results. att = results.params[1] # Treatment effect coefficient. se = results.bse[1] residuals = results.resid fitted = results.fittedvalues return { 'att': att, 'se': se, 'residuals': residuals, 'fitted': fitted, 'X': X, 'beta': results.params }
[docs] def wild_cluster_bootstrap( data: pd.DataFrame, y_transformed: str, d: str, cluster_var: str, controls: list[str] | None = None, n_bootstrap: int = 999, weight_type: str = 'rademacher', alpha: float = 0.05, seed: int | None = None, impose_null: bool = True, full_enumeration: bool | None = None, use_wildboottest: bool = False, ) -> WildClusterBootstrapResult: """ Perform wild cluster bootstrap for inference with few clusters. Implements the wild cluster bootstrap of Cameron, Gelbach, and Miller (2008). This method provides more reliable inference when the number of clusters is small (< 20-30). **Algorithm:** 1. Estimate original model and obtain residuals 2. For each bootstrap replication: (a) Generate cluster-level weights (Rademacher/Mammen/Webb); (b) Construct bootstrap residuals u*_ic = w_c * u_ic; (c) Construct bootstrap outcome y*_ic = X_ic * beta_hat + u*_ic (if impose_null) or y*_ic = y_hat_ic + u*_ic (if not impose_null); (d) Re-estimate model and compute t-statistic 3. Compute bootstrap p-value and confidence interval Parameters ---------- data : pd.DataFrame Regression data. y_transformed : str Transformed outcome variable (e.g., after within-transformation). d : str Treatment indicator variable. cluster_var : str Clustering variable. controls : list[str], optional Control variables. n_bootstrap : int, default 999 Number of bootstrap replications. Should be odd for symmetric p-value calculation. weight_type : str, default 'rademacher' Bootstrap weight distribution: - 'rademacher': P(w=1) = P(w=-1) = 0.5 (simplest, most common) - 'mammen': Two-point distribution matching skewness - 'webb': Six-point distribution (best for very few clusters) alpha : float, default 0.05 Significance level for confidence interval. seed : int, optional Random seed for reproducibility. impose_null : bool, default True Whether to impose null hypothesis (H0: τ = 0) when constructing bootstrap samples. Recommended for hypothesis testing. full_enumeration : bool, optional Whether to use full enumeration of all 2^G Rademacher weight combinations instead of random sampling. If None (default), automatically enabled when G <= 12 and weight_type='rademacher'. Full enumeration produces deterministic results and is the most faithful implementation of the algorithm principle, as it computes the exact p-value without Monte Carlo error. use_wildboottest : bool, default False Whether to use the wildboottest package for fast algorithm. Requires wildboottest to be installed. When True, uses the optimized implementation that matches Stata boottest exactly. Note: wildboottest uses slightly different boundary handling, resulting in p-values about 0.002 lower than the algorithm principle definition. Returns ------- WildClusterBootstrapResult Bootstrap results containing point estimate, standard error, confidence interval, p-value, and diagnostic information. Notes ----- The wild cluster bootstrap is particularly useful when: - Number of clusters G < 30 - Cluster sizes are unbalanced - Few treated clusters The method works by: 1. Estimating the original model to get residuals 2. Generating cluster-level random weights 3. Creating bootstrap residuals by multiplying original residuals by weights 4. Re-estimating the model with bootstrap outcomes 5. Computing the bootstrap distribution of t-statistics When impose_null=True (recommended for hypothesis testing), the bootstrap outcome is constructed under the null hypothesis that the treatment effect is zero. This provides better size control. See Also -------- diagnose_clustering : Diagnose clustering structure. recommend_clustering_level : Get recommendation for clustering level. """ # Set random seed. if seed is not None: np.random.seed(seed) # Validate inputs. if y_transformed not in data.columns: raise ValueError(f"Outcome variable '{y_transformed}' not found in data") if d not in data.columns: raise ValueError(f"Treatment variable '{d}' not found in data") if cluster_var not in data.columns: raise ValueError(f"Cluster variable '{cluster_var}' not found in data") if weight_type not in ['rademacher', 'mammen', 'webb']: raise ValueError( f"Unknown weight_type: {weight_type}. " f"Must be one of: 'rademacher', 'mammen', 'webb'" ) # Get number of clusters. G = data[cluster_var].nunique() # Determine whether to use full enumeration. if full_enumeration is None: # Auto-enable when G <= 12 and using Rademacher weights. full_enumeration = (G <= 12 and weight_type == 'rademacher') # If wildboottest package is requested. if use_wildboottest: if not HAS_WILDBOOTTEST: raise ImportError( "wildboottest package is not installed. " "Install it with: pip install wildboottest" ) return _wild_cluster_bootstrap_wildboottest( data=data, y_transformed=y_transformed, d=d, cluster_var=cluster_var, controls=controls, n_bootstrap=n_bootstrap, weight_type=weight_type, alpha=alpha, seed=seed, impose_null=impose_null, full_enumeration=full_enumeration, ) # Step 1: Estimate original model. original_result = _estimate_ols_for_bootstrap( data, y_transformed, d, cluster_var, controls ) att_original = original_result['att'] se_original = original_result['se'] # Handle degenerate cases where SE is zero or NaN. if se_original == 0 or np.isnan(se_original): # Return degenerate result. return WildClusterBootstrapResult( att=att_original, se_bootstrap=np.nan, ci_lower=np.nan, ci_upper=np.nan, pvalue=np.nan, n_clusters=data[cluster_var].nunique(), n_bootstrap=n_bootstrap, weight_type=weight_type, t_stat_original=np.nan, t_stats_bootstrap=np.array([]), rejection_rate=np.nan ) t_stat_original = att_original / se_original residuals = original_result['residuals'] fitted = original_result['fitted'] X = original_result['X'] cluster_ids = data[cluster_var].values unique_clusters = np.unique(cluster_ids) G = len(unique_clusters) # Create cluster-to-index mapping. cluster_to_idx = {c: i for i, c in enumerate(unique_clusters)} obs_cluster_idx = np.array([cluster_to_idx[c] for c in cluster_ids]) # When impose_null=True, re-estimate under H0 to obtain restricted residuals. if impose_null: # Restricted model: y = alpha + epsilon (intercept only, no treatment). y_values = data[y_transformed].values X_restricted = np.ones((len(data), 1)) model_restricted = sm.OLS(y_values, X_restricted) results_restricted = model_restricted.fit() fitted_restricted = results_restricted.fittedvalues residuals_restricted = results_restricted.resid # Step 2: Bootstrap loop. # Determine whether to use full enumeration or random sampling. if full_enumeration and weight_type == 'rademacher': # Full enumeration: generate all 2^G Rademacher weight combinations. all_weights = _generate_all_rademacher_weights(G) actual_n_bootstrap = len(all_weights) use_full_enum = True else: actual_n_bootstrap = n_bootstrap use_full_enum = False t_stats_bootstrap = np.zeros(actual_n_bootstrap) att_bootstrap = np.zeros(actual_n_bootstrap) for b in range(actual_n_bootstrap): # Generate cluster-level weights. if use_full_enum: weights = all_weights[b] else: weights = _generate_bootstrap_weights(G, weight_type) # Map weights to observations. obs_weights = weights[obs_cluster_idx] # Construct bootstrap residuals and outcome variable. if impose_null: # Use restricted model residuals under the null hypothesis. # y* = alpha_hat_r + w * epsilon_hat_r u_star = obs_weights * residuals_restricted y_star = fitted_restricted + u_star else: # When not imposing null, use unrestricted model residuals. u_star = obs_weights * residuals y_star = fitted + u_star # Create bootstrap data. data_boot = data.copy() data_boot[y_transformed] = y_star # Re-estimate and compute t-statistic. try: boot_result = _estimate_ols_for_bootstrap( data_boot, y_transformed, d, cluster_var, controls ) if boot_result['se'] > 0 and not np.isnan(boot_result['se']): t_stats_bootstrap[b] = boot_result['att'] / boot_result['se'] att_bootstrap[b] = boot_result['att'] else: t_stats_bootstrap[b] = np.nan att_bootstrap[b] = np.nan except Exception: t_stats_bootstrap[b] = np.nan att_bootstrap[b] = np.nan # Remove NaN values. valid_mask = ~np.isnan(t_stats_bootstrap) t_stats_valid = t_stats_bootstrap[valid_mask] att_valid = att_bootstrap[valid_mask] if len(t_stats_valid) == 0: # All bootstrap replications failed. return WildClusterBootstrapResult( att=att_original, se_bootstrap=np.nan, ci_lower=np.nan, ci_upper=np.nan, pvalue=np.nan, n_clusters=G, n_bootstrap=n_bootstrap, weight_type=weight_type, t_stat_original=t_stat_original, t_stats_bootstrap=t_stats_bootstrap, rejection_rate=np.nan ) # Step 3: Compute bootstrap p-value and confidence interval. # # The p-value is computed as: p = P(|t*| >= |t_orig| | H0) # # Using >= instead of > ensures that: # 1. In full enumeration, the all +1 weights combination produces t* = t_orig. # 2. This combination is a valid realization under the null and should be included. # 3. Excluding any valid combination would bias the p-value estimate. pvalue = np.mean(np.abs(t_stats_valid) >= np.abs(t_stat_original)) # Bootstrap SE (standard deviation of bootstrap estimates). se_bootstrap = np.std(att_valid) # Confidence interval construction. # Use symmetric CI based on |t*| distribution: # CI = [att - t*_crit * se, att + t*_crit * se] # where t*_crit is the (1-alpha) quantile of |t*|. if impose_null: # Symmetric CI based on (1-alpha) quantile of |t*|. t_abs_crit = np.percentile(np.abs(t_stats_valid), 100 * (1 - alpha)) ci_lower = att_original - t_abs_crit * se_original ci_upper = att_original + t_abs_crit * se_original else: # When not imposing null, use percentile CI. ci_lower = np.percentile(att_valid, 100 * alpha / 2) ci_upper = np.percentile(att_valid, 100 * (1 - alpha / 2)) return WildClusterBootstrapResult( att=att_original, se_bootstrap=se_bootstrap, ci_lower=ci_lower, ci_upper=ci_upper, pvalue=pvalue, n_clusters=G, n_bootstrap=actual_n_bootstrap, weight_type=weight_type, t_stat_original=t_stat_original, t_stats_bootstrap=t_stats_bootstrap, rejection_rate=pvalue, ci_method='percentile_t' )
def _wild_cluster_bootstrap_wildboottest( data: pd.DataFrame, y_transformed: str, d: str, cluster_var: str, controls: list[str] | None = None, n_bootstrap: int = 999, weight_type: str = 'rademacher', alpha: float = 0.05, seed: int | None = None, impose_null: bool = True, full_enumeration: bool = False, ) -> WildClusterBootstrapResult: """ Perform wild cluster bootstrap using the wildboottest package. This function wraps the wildboottest package to provide an optimized implementation of the wild cluster bootstrap algorithm. Parameters ---------- data : pd.DataFrame Regression data. y_transformed : str Outcome variable name. d : str Treatment variable name. cluster_var : str Clustering variable name. controls : list[str], optional Control variables. n_bootstrap : int, default 999 Number of bootstrap replications (ignored when using full enumeration). weight_type : str, default 'rademacher' Type of bootstrap weights. alpha : float, default 0.05 Significance level. seed : int, optional Random seed for reproducibility. impose_null : bool, default True Whether to impose the null hypothesis. full_enumeration : bool, default False Whether to use full enumeration. Returns ------- WildClusterBootstrapResult Bootstrap results. """ if not HAS_WILDBOOTTEST: raise ImportError("wildboottest package is not installed") # Construct formula. if controls: formula = f"{y_transformed} ~ {d} + {' + '.join(controls)}" else: formula = f"{y_transformed} ~ {d}" # Create OLS model using statsmodels. # Note: wildboottest requires an unfitted model object. model = smf.ols(formula, data=data) # Get number of clusters. G = data[cluster_var].nunique() # Determine bootstrap type and number of replications. if full_enumeration and weight_type == 'rademacher': # Full enumeration: set B > 2^G to trigger automatic full enumeration. actual_n_bootstrap = 2 ** G B_param = actual_n_bootstrap * 2 # Set > 2^G to trigger full enumeration. else: actual_n_bootstrap = n_bootstrap B_param = n_bootstrap # Map weight type. weight_map = { 'rademacher': 'rademacher', 'mammen': 'mammen', 'webb': 'webb' } # Call wildboottest. boot_result = _wildboottest( model=model, cluster=data[cluster_var], param=d, B=B_param, weights_type=weight_map[weight_type], impose_null=impose_null, bootstrap_type='11', # WCR11 is the default type. seed=seed, ) # Fit model to obtain coefficients. results = model.fit() # Extract results - wildboottest returns a DataFrame. att = results.params[d] se_original = results.bse[d] # Extract p-value and t-statistic from DataFrame. if isinstance(boot_result, pd.DataFrame): pvalue = boot_result['p-value'].values[0] t_stat = boot_result['statistic'].values[0] else: # Fallback for older versions. t_stat = boot_result.t_stat if hasattr(boot_result, 't_stat') else att / se_original pvalue = boot_result.pvalue if hasattr(boot_result, 'pvalue') else np.nan # Confidence interval using t-distribution approximation. from scipy import stats as scipy_stats t_crit = scipy_stats.t.ppf(1 - alpha/2, G - 1) ci_lower = att - t_crit * se_original ci_upper = att + t_crit * se_original # Bootstrap SE. se_bootstrap = se_original return WildClusterBootstrapResult( att=att, se_bootstrap=se_bootstrap, ci_lower=ci_lower, ci_upper=ci_upper, pvalue=pvalue, n_clusters=G, n_bootstrap=actual_n_bootstrap, weight_type=weight_type, t_stat_original=t_stat, t_stats_bootstrap=np.array([]), # wildboottest does not return full distribution. rejection_rate=pvalue, ci_method='wildboottest' ) def _compute_bootstrap_pvalue_at_null( data: pd.DataFrame, y_var: str, d_var: str, cluster_var: str, null_value: float, att_original: float, se_original: float, n_bootstrap: int, weight_type: str, controls: list[str] | None = None, seed: int | None = None, ) -> float: """ Compute bootstrap p-value at a given null hypothesis value. Internal function used for test inversion confidence intervals. Parameters ---------- data : pd.DataFrame Regression data. y_var : str Outcome variable name. d_var : str Treatment variable name. cluster_var : str Clustering variable name. null_value : float Null hypothesis value theta (H0: beta = theta). att_original : float Original ATT estimate. se_original : float Original cluster-robust standard error. n_bootstrap : int Number of bootstrap replications. weight_type : str Type of bootstrap weights. controls : list[str], optional Control variables. seed : int, optional Random seed for reproducibility. Returns ------- float Bootstrap p-value. """ if seed is not None: np.random.seed(seed) # Original t-statistic (relative to null_value). t_original = (att_original - null_value) / se_original # Cluster information. cluster_ids = data[cluster_var].values unique_clusters = np.unique(cluster_ids) G = len(unique_clusters) cluster_to_idx = {c: i for i, c in enumerate(unique_clusters)} obs_cluster_idx = np.array([cluster_to_idx[c] for c in cluster_ids]) # Restricted model: y - theta*d = alpha + epsilon. y_restricted = data[y_var].values - null_value * data[d_var].values X_r = np.ones((len(data), 1)) model_r = sm.OLS(y_restricted, X_r) results_r = model_r.fit() fitted_r = results_r.fittedvalues residuals_r = results_r.resid t_stats = [] for b in range(n_bootstrap): # Generate weights. weights = _generate_bootstrap_weights(G, weight_type) obs_weights = weights[obs_cluster_idx] # Bootstrap sample: y* = alpha_hat + theta*d + w*epsilon_hat. u_star = obs_weights * residuals_r y_star = fitted_r + null_value * data[d_var].values + u_star # Re-estimate. data_boot = data.copy() data_boot[y_var] = y_star try: boot_result = _estimate_ols_for_bootstrap( data_boot, y_var, d_var, cluster_var, controls ) if boot_result['se'] > 0 and not np.isnan(boot_result['se']): # t* = (beta* - theta) / se* t_stats.append((boot_result['att'] - null_value) / boot_result['se']) except Exception: pass if len(t_stats) == 0: return np.nan t_stats = np.array(t_stats) # Two-sided p-value. pvalue = np.mean(np.abs(t_stats) >= np.abs(t_original)) return pvalue
[docs] def wild_cluster_bootstrap_test_inversion( data: pd.DataFrame, y_transformed: str, d: str, cluster_var: str, controls: list[str] | None = None, n_bootstrap: int = 999, weight_type: str = 'rademacher', alpha: float = 0.05, seed: int | None = None, grid_points: int = 25, ci_tol: float = 0.01, ) -> WildClusterBootstrapResult: """ Compute wild cluster bootstrap confidence interval using test inversion. This method constructs confidence intervals by inverting hypothesis tests: CI = {theta : p(theta) >= alpha} That is, the CI consists of all null hypothesis values for which the bootstrap p-value exceeds the significance level. Parameters ---------- data : pd.DataFrame Regression data. y_transformed : str Transformed outcome variable. d : str Treatment variable. cluster_var : str Clustering variable. controls : list[str], optional Control variables. n_bootstrap : int, default 999 Number of bootstrap replications. weight_type : str, default 'rademacher' Type of bootstrap weights. alpha : float, default 0.05 Significance level. seed : int, optional Random seed for reproducibility. grid_points : int, default 25 Number of grid points for initial search. ci_tol : float, default 0.01 Tolerance for CI boundary precision. Returns ------- WildClusterBootstrapResult Results containing test inversion CI. Notes ----- Advantages of test inversion CI: 1. Can be more accurate than percentile-t CI in some settings. 2. Handles asymmetric distributions appropriately. Disadvantages: 1. More computationally intensive (requires multiple bootstrap runs). 2. Requires numerical optimization to find boundaries. """ # Set random seed. rng_state = np.random.get_state() if seed is None else None if seed is not None: np.random.seed(seed) # Validate inputs. if y_transformed not in data.columns: raise ValueError(f"Outcome variable '{y_transformed}' not found in data") if d not in data.columns: raise ValueError(f"Treatment variable '{d}' not found in data") if cluster_var not in data.columns: raise ValueError(f"Cluster variable '{cluster_var}' not found in data") # Step 1: Estimate original model. original_result = _estimate_ols_for_bootstrap( data, y_transformed, d, cluster_var, controls ) att_original = original_result['att'] se_original = original_result['se'] if se_original == 0 or np.isnan(se_original): return WildClusterBootstrapResult( att=att_original, se_bootstrap=np.nan, ci_lower=np.nan, ci_upper=np.nan, pvalue=np.nan, n_clusters=data[cluster_var].nunique(), n_bootstrap=n_bootstrap, weight_type=weight_type, t_stat_original=np.nan, t_stats_bootstrap=np.array([]), rejection_rate=np.nan, ci_method='test_inversion' ) t_stat_original = att_original / se_original G = data[cluster_var].nunique() # Step 2: Compute p-value at theta=0 (for reporting). pvalue_at_zero = _compute_bootstrap_pvalue_at_null( data, y_transformed, d, cluster_var, null_value=0, att_original=att_original, se_original=se_original, n_bootstrap=n_bootstrap, weight_type=weight_type, controls=controls, seed=seed ) # Step 3: Find CI boundaries using test inversion. # First perform coarse grid search. # Search range: ATT +/- 4 * SE. search_range = 4 * se_original theta_min = att_original - search_range theta_max = att_original + search_range theta_grid = np.linspace(theta_min, theta_max, grid_points) pvalues_grid = [] for theta in theta_grid: pval = _compute_bootstrap_pvalue_at_null( data, y_transformed, d, cluster_var, null_value=theta, att_original=att_original, se_original=se_original, n_bootstrap=n_bootstrap, weight_type=weight_type, controls=controls, seed=seed ) pvalues_grid.append(pval) pvalues_grid = np.array(pvalues_grid) # Find region where p >= alpha. ci_mask = pvalues_grid >= alpha if not np.any(ci_mask): # No region found with p >= alpha, use percentile-t as fallback. ci_lower = att_original - 1.96 * se_original ci_upper = att_original + 1.96 * se_original else: # Coarse boundaries. ci_thetas = theta_grid[ci_mask] ci_lower_approx = ci_thetas.min() ci_upper_approx = ci_thetas.max() # Use bisection to precisely locate boundaries. def pvalue_minus_alpha(theta): pval = _compute_bootstrap_pvalue_at_null( data, y_transformed, d, cluster_var, null_value=theta, att_original=att_original, se_original=se_original, n_bootstrap=n_bootstrap, weight_type=weight_type, controls=controls, seed=seed ) return pval - alpha # Find lower bound. try: # Search for p < alpha point to the left of ATT. lower_search_min = theta_min lower_search_max = att_original ci_lower = brentq(pvalue_minus_alpha, lower_search_min, lower_search_max, xtol=ci_tol) except ValueError: ci_lower = ci_lower_approx # Find upper bound. try: # Search for p < alpha point to the right of ATT. upper_search_min = att_original upper_search_max = theta_max ci_upper = brentq(pvalue_minus_alpha, upper_search_min, upper_search_max, xtol=ci_tol) except ValueError: ci_upper = ci_upper_approx # Compute bootstrap SE (simplified using CI width estimate). se_bootstrap = (ci_upper - ci_lower) / (2 * 1.96) # Restore random state. if rng_state is not None: np.random.set_state(rng_state) return WildClusterBootstrapResult( att=att_original, se_bootstrap=se_bootstrap, ci_lower=ci_lower, ci_upper=ci_upper, pvalue=pvalue_at_zero, n_clusters=G, n_bootstrap=n_bootstrap, weight_type=weight_type, t_stat_original=t_stat_original, t_stats_bootstrap=np.array([]), # Test inversion does not save t distribution. rejection_rate=pvalue_at_zero, ci_method='test_inversion' )