#!/usr/bin/env python3
"""
Fix 01: Augmented Dickey-Fuller Unit Root Tests for Liberty Series
==================================================================
Tests whether the Liberty score series contains a unit root (non-stationary)
or is stationary (mean-reverting). Uses ADF regression with 1-3 lags.
Also implements Im-Pesaran-Shin panel unit root test.

All stdlib only: csv, math, random, statistics, collections
"""

import csv
import math
import statistics
from collections import defaultdict

# ===================================================================
# DATA LOADING
# ===================================================================

def load_csv(path):
    """Load the governance topology CSV."""
    rows = []
    with open(path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            try:
                row['year'] = int(row['year'])
                row['liberty'] = float(row['liberty'])
            except (ValueError, KeyError):
                continue
            rows.append(row)
    return rows


# ===================================================================
# LINEAR ALGEBRA UTILITIES
# ===================================================================

def ols_multi(X_rows, y):
    """
    Multivariate OLS via normal equations (X'X)^{-1} X'y.
    X_rows: list of lists (each row is [1, x1, x2, ...]).
    Returns (betas, se_betas, r2, n, residuals, sigma2).
    """
    n = len(y)
    k = len(X_rows[0])

    # X'X
    XtX = [[0.0] * k for _ in range(k)]
    for row in X_rows:
        for i in range(k):
            for j in range(k):
                XtX[i][j] += row[i] * row[j]
    # X'y
    Xty = [0.0] * k
    for row, yi in zip(X_rows, y):
        for i in range(k):
            Xty[i] += row[i] * yi

    # Solve via Gauss-Jordan with partial pivoting
    aug = [XtX[i][:] + [Xty[i]] for i in range(k)]
    pivot_order = list(range(k))
    for col in range(k):
        max_row = max(range(col, k), key=lambda r: abs(aug[r][col]))
        aug[col], aug[max_row] = aug[max_row], aug[col]
        pivot_order[col], pivot_order[max_row] = pivot_order[max_row], pivot_order[col]
        pivot = aug[col][col]
        if abs(pivot) < 1e-14:
            return ([0] * k, [float('inf')] * k, 0, n, [0] * n, float('inf'))
        for j in range(col, k + 1):
            aug[col][j] /= pivot
        for i in range(k):
            if i != col:
                factor = aug[i][col]
                for j in range(col, k + 1):
                    aug[i][j] -= factor * aug[col][j]
    betas = [aug[i][k] for i in range(k)]

    # Residuals, R2, and SE
    ybar = sum(y) / n
    ss_tot = sum((yi - ybar) ** 2 for yi in y)
    residuals = []
    ss_res = 0.0
    for row, yi in zip(X_rows, y):
        yhat = sum(b * x for b, x in zip(betas, row))
        r = yi - yhat
        residuals.append(r)
        ss_res += r ** 2
    r2 = 1 - ss_res / ss_tot if ss_tot > 0 else 0
    sigma2 = ss_res / (n - k) if n > k else float('inf')

    # Invert X'X for standard errors
    # Build augmented matrix [XtX | I]
    inv_aug = [XtX[i][:] + [1.0 if i == j else 0.0 for j in range(k)] for i in range(k)]
    # Re-compute XtX since we modified aug above
    XtX2 = [[0.0] * k for _ in range(k)]
    for row in X_rows:
        for i in range(k):
            for j in range(k):
                XtX2[i][j] += row[i] * row[j]
    inv_aug = [XtX2[i][:] + [1.0 if i == j else 0.0 for j in range(k)] for i in range(k)]
    for col in range(k):
        max_row = max(range(col, k), key=lambda r: abs(inv_aug[r][col]))
        inv_aug[col], inv_aug[max_row] = inv_aug[max_row], inv_aug[col]
        pivot = inv_aug[col][col]
        if abs(pivot) < 1e-14:
            return (betas, [float('inf')] * k, r2, n, residuals, sigma2)
        for j in range(2 * k):
            inv_aug[col][j] /= pivot
        for i in range(k):
            if i != col:
                factor = inv_aug[i][col]
                for j in range(2 * k):
                    inv_aug[i][j] -= factor * inv_aug[col][j]
    XtX_inv = [[inv_aug[i][k + j] for j in range(k)] for i in range(k)]

    se_betas = [math.sqrt(sigma2 * XtX_inv[i][i]) if sigma2 * XtX_inv[i][i] > 0 else float('inf') for i in range(k)]

    return (betas, se_betas, r2, n, residuals, sigma2)


# ===================================================================
# ADF TEST IMPLEMENTATION
# ===================================================================

def adf_test(series, max_lag=3, include_trend=False):
    """
    Augmented Dickey-Fuller test for a single time series.

    Model: DeltaL(t) = alpha + [delta*t] + beta * L(t-1) + sum(gamma_j * DeltaL(t-j)) + eps

    H0: beta = 0 (unit root)
    H1: beta < 0 (stationary)

    Returns dict with results for each lag specification (1, 2, ..., max_lag).
    """
    T = len(series)
    if T < max_lag + 5:
        return None

    results = {}
    for p in range(1, max_lag + 1):
        # Construct the dependent variable: DeltaL(t) for t = p+1, ..., T-1
        delta_L = [series[t] - series[t - 1] for t in range(1, T)]

        # We need p lagged differences and one lagged level
        # For t = p+1 to T-1 (0-indexed in delta_L: indices p to T-2)
        y_vec = []
        X_mat = []

        for t_idx in range(p, len(delta_L)):
            actual_t = t_idx + 1  # time index in original series
            y_vec.append(delta_L[t_idx])

            row = [1.0]  # constant
            if include_trend:
                row.append(float(actual_t))  # trend
            row.append(series[actual_t - 1])  # L(t-1) -- this is beta coefficient

            # Lagged differences: DeltaL(t-1), DeltaL(t-2), ..., DeltaL(t-p)
            for j in range(1, p + 1):
                row.append(delta_L[t_idx - j])

            X_mat.append(row)

        if len(y_vec) < len(X_mat[0]) + 2:
            continue

        betas, se_betas, r2, n, residuals, sigma2 = ols_multi(X_mat, y_vec)

        # beta coefficient is at index 1 (constant-only) or 2 (with trend)
        beta_idx = 2 if include_trend else 1
        beta_coef = betas[beta_idx]
        se_beta = se_betas[beta_idx]
        t_stat = beta_coef / se_beta if se_beta > 0 and se_beta != float('inf') else 0

        # AIC/BIC for lag selection
        if sigma2 > 0 and sigma2 != float('inf'):
            k_params = len(betas)
            aic = n * math.log(sigma2) + 2 * k_params
            bic = n * math.log(sigma2) + k_params * math.log(n)
        else:
            aic = float('inf')
            bic = float('inf')

        results[p] = {
            'beta': beta_coef,
            'se_beta': se_beta,
            't_stat': t_stat,
            'n': n,
            'r2': r2,
            'sigma2': sigma2,
            'aic': aic,
            'bic': bic,
            'lags': p,
            'all_betas': betas,
            'all_ses': se_betas,
        }

    return results


# ===================================================================
# CRITICAL VALUES
# ===================================================================

# ADF critical values for constant-only model (no trend), finite samples
# MacKinnon (1994) approximate values
ADF_CRITICAL = {
    '1%': -3.43,
    '5%': -2.86,
    '10%': -2.57,
}

# For constant + trend model
ADF_CRITICAL_TREND = {
    '1%': -3.96,
    '5%': -3.41,
    '10%': -3.12,
}

# IPS (2003) critical values for panel test (W_tbar statistic)
# Standard normal critical values (asymptotic)
IPS_CRITICAL = {
    '1%': -2.326,
    '5%': -1.645,
    '10%': -1.282,
}


# ===================================================================
# MAIN ANALYSIS
# ===================================================================

def main():
    CSV_PATH = "/tmp/pt-data/political-topology-flat.csv"
    print("=" * 70)
    print("FIX 01: AUGMENTED DICKEY-FULLER UNIT ROOT TESTS")
    print("=" * 70)
    print()

    print("Loading data...")
    data_rows = load_csv(CSV_PATH)
    print(f"  Loaded {len(data_rows)} rows")

    # Build country time series
    country_ts = defaultdict(list)
    for row in data_rows:
        country_ts[row['country']].append((row['year'], row['liberty']))
    for c in country_ts:
        country_ts[c].sort()

    # Filter countries with 5+ observations
    eligible = {c: ts for c, ts in country_ts.items() if len(ts) >= 5}
    print(f"  Countries with 5+ observations: {len(eligible)}")
    print()

    results = []
    results.append("# Fix 01: Augmented Dickey-Fuller Unit Root Tests")
    results.append("")
    results.append("Tests whether Liberty score series contain a unit root (non-stationary)")
    results.append("or are mean-reverting (stationary). Key question: is the AR(1) beta=0.956")
    results.append("evidence of a unit root, or strong persistence with mean reversion?")
    results.append("")

    # ===============================================================
    # SECTION 1: Country-by-Country ADF Tests
    # ===============================================================
    results.append("## 1. Country-by-Country ADF Tests")
    results.append("")
    results.append("ADF regression: DL(t) = alpha + beta*L(t-1) + sum(gamma_j*DL(t-j)) + eps")
    results.append("H0: beta=0 (unit root). H1: beta<0 (stationary, mean-reverting).")
    results.append("Critical values (constant-only): 1%=-3.43, 5%=-2.86, 10%=-2.57")
    results.append("")

    country_results = {}
    t_stats_all = []
    t_stats_post72 = []

    # Detailed table header
    results.append("| Country | N | Best Lag | ADF t-stat | Reject 5%? | Reject 10%? | Beta |")
    results.append("|---------|---|----------|------------|------------|-------------|------|")

    for country in sorted(eligible.keys()):
        ts = eligible[country]
        series = [lib for yr, lib in ts]

        adf_res = adf_test(series, max_lag=min(3, len(series) // 3 - 1))
        if adf_res is None or len(adf_res) == 0:
            continue

        # Select best lag by BIC
        best_lag = min(adf_res.keys(), key=lambda p: adf_res[p]['bic'])
        best = adf_res[best_lag]

        reject_5 = best['t_stat'] < ADF_CRITICAL['5%']
        reject_10 = best['t_stat'] < ADF_CRITICAL['10%']

        country_results[country] = best
        t_stats_all.append(best['t_stat'])

        results.append(
            f"| {country[:30]:30s} | {best['n']:3d} | {best_lag} | "
            f"{best['t_stat']:+7.3f} | {'YES' if reject_5 else 'no':10s} | "
            f"{'YES' if reject_10 else 'no':11s} | {best['beta']:+.4f} |"
        )

    results.append("")

    # Summary statistics
    n_countries = len(country_results)
    n_reject_1 = sum(1 for r in country_results.values() if r['t_stat'] < ADF_CRITICAL['1%'])
    n_reject_5 = sum(1 for r in country_results.values() if r['t_stat'] < ADF_CRITICAL['5%'])
    n_reject_10 = sum(1 for r in country_results.values() if r['t_stat'] < ADF_CRITICAL['10%'])

    results.append("### Summary of Country-Level ADF Tests")
    results.append("")
    results.append(f"- Total countries tested: {n_countries}")
    results.append(f"- Reject unit root at 1%:  {n_reject_1} ({100*n_reject_1/n_countries:.1f}%)")
    results.append(f"- Reject unit root at 5%:  {n_reject_5} ({100*n_reject_5/n_countries:.1f}%)")
    results.append(f"- Reject unit root at 10%: {n_reject_10} ({100*n_reject_10/n_countries:.1f}%)")
    results.append(f"- Mean ADF t-statistic: {statistics.mean(t_stats_all):.3f}")
    results.append(f"- Median ADF t-statistic: {statistics.median(t_stats_all):.3f}")
    results.append(f"- Std dev of t-statistics: {statistics.stdev(t_stats_all):.3f}")
    results.append("")

    # ===============================================================
    # SECTION 2: Post-1972 Subset
    # ===============================================================
    results.append("---")
    results.append("")
    results.append("## 2. Post-1972 Subset ADF Tests")
    results.append("")
    results.append("Restricting to post-1972 data (more regular spacing, better data quality).")
    results.append("")

    # Build post-1972 series
    eligible_post72 = {}
    for c, ts in country_ts.items():
        ts72 = [(yr, lib) for yr, lib in ts if yr >= 1972]
        if len(ts72) >= 5:
            eligible_post72[c] = ts72

    print(f"  Countries with 5+ post-1972 observations: {len(eligible_post72)}")

    country_results_72 = {}

    results.append(f"Countries with 5+ post-1972 observations: {len(eligible_post72)}")
    results.append("")
    results.append("| Country | N | Best Lag | ADF t-stat | Reject 5%? | Beta |")
    results.append("|---------|---|----------|------------|------------|------|")

    for country in sorted(eligible_post72.keys()):
        ts72 = eligible_post72[country]
        series72 = [lib for yr, lib in ts72]

        adf_res = adf_test(series72, max_lag=min(3, len(series72) // 3 - 1))
        if adf_res is None or len(adf_res) == 0:
            continue

        best_lag = min(adf_res.keys(), key=lambda p: adf_res[p]['bic'])
        best = adf_res[best_lag]

        reject_5 = best['t_stat'] < ADF_CRITICAL['5%']
        country_results_72[country] = best
        t_stats_post72.append(best['t_stat'])

        results.append(
            f"| {country[:30]:30s} | {best['n']:3d} | {best_lag} | "
            f"{best['t_stat']:+7.3f} | {'YES' if reject_5 else 'no':10s} | "
            f"{best['beta']:+.4f} |"
        )

    results.append("")

    n72 = len(country_results_72)
    n72_reject_1 = sum(1 for r in country_results_72.values() if r['t_stat'] < ADF_CRITICAL['1%'])
    n72_reject_5 = sum(1 for r in country_results_72.values() if r['t_stat'] < ADF_CRITICAL['5%'])
    n72_reject_10 = sum(1 for r in country_results_72.values() if r['t_stat'] < ADF_CRITICAL['10%'])

    results.append("### Summary of Post-1972 ADF Tests")
    results.append("")
    results.append(f"- Total countries tested: {n72}")
    results.append(f"- Reject unit root at 1%:  {n72_reject_1} ({100*n72_reject_1/n72:.1f}%)" if n72 > 0 else "- N/A")
    results.append(f"- Reject unit root at 5%:  {n72_reject_5} ({100*n72_reject_5/n72:.1f}%)" if n72 > 0 else "- N/A")
    results.append(f"- Reject unit root at 10%: {n72_reject_10} ({100*n72_reject_10/n72:.1f}%)" if n72 > 0 else "- N/A")
    if t_stats_post72:
        results.append(f"- Mean ADF t-statistic: {statistics.mean(t_stats_post72):.3f}")
        results.append(f"- Median ADF t-statistic: {statistics.median(t_stats_post72):.3f}")
    results.append("")

    # ===============================================================
    # SECTION 3: Im-Pesaran-Shin Panel Unit Root Test
    # ===============================================================
    results.append("---")
    results.append("")
    results.append("## 3. Im-Pesaran-Shin (IPS) Panel Unit Root Test")
    results.append("")
    results.append("The IPS test averages individual ADF t-statistics and standardizes")
    results.append("by sqrt(N). Under H0 (all series have unit roots), the standardized")
    results.append("statistic W_tbar is asymptotically N(0,1).")
    results.append("")
    results.append("W_tbar = sqrt(N) * (t_bar - E[t]) / sqrt(Var[t])")
    results.append("")
    results.append("Where E[t] and Var[t] are the expected value and variance of individual")
    results.append("ADF t-statistics under the null (tabulated by IPS 2003).")
    results.append("")

    # IPS uses the mean and variance of the individual ADF t-statistics
    # Under H0 with no trend, T=small:
    # E[t_i] ~ -1.52 (varies with T, using approximation for small T)
    # Var[t_i] ~ 1.0 (varies with T)
    # For T~10-20 with p=1 lag: E[t] ~ -1.51, Var[t] ~ 0.87
    # We use tabulated values for T=10, p=1 from IPS Table 1

    # Approximate values for typical small T (T=10-20 with 1 lag)
    E_t_null = -1.510  # IPS Table 3, T=10, p=1
    Var_t_null = 0.870  # IPS Table 3, T=10, p=1

    def ips_test(t_stats_list, E_t, Var_t):
        """Compute IPS W_tbar statistic."""
        N = len(t_stats_list)
        if N < 2:
            return None, None
        t_bar = statistics.mean(t_stats_list)
        W_tbar = math.sqrt(N) * (t_bar - E_t) / math.sqrt(Var_t)
        return t_bar, W_tbar

    # Full sample IPS
    t_bar_full, W_full = ips_test(t_stats_all, E_t_null, Var_t_null)
    results.append("### Full Sample IPS Test")
    results.append("")
    results.append(f"- N (countries): {len(t_stats_all)}")
    results.append(f"- t-bar (mean individual ADF): {t_bar_full:.4f}")
    results.append(f"- E[t] under H0: {E_t_null:.3f}")
    results.append(f"- Var[t] under H0: {Var_t_null:.3f}")
    results.append(f"- W_tbar statistic: {W_full:.4f}")
    results.append(f"- Critical values: 1%={IPS_CRITICAL['1%']}, 5%={IPS_CRITICAL['5%']}, 10%={IPS_CRITICAL['10%']}")

    reject_ips_1 = W_full < IPS_CRITICAL['1%'] if W_full is not None else False
    reject_ips_5 = W_full < IPS_CRITICAL['5%'] if W_full is not None else False
    reject_ips_10 = W_full < IPS_CRITICAL['10%'] if W_full is not None else False

    results.append(f"- Reject H0 at 1%:  {'YES' if reject_ips_1 else 'NO'}")
    results.append(f"- Reject H0 at 5%:  {'YES' if reject_ips_5 else 'NO'}")
    results.append(f"- Reject H0 at 10%: {'YES' if reject_ips_10 else 'NO'}")
    results.append("")

    # Post-1972 IPS
    if t_stats_post72:
        t_bar_72, W_72 = ips_test(t_stats_post72, E_t_null, Var_t_null)
        results.append("### Post-1972 IPS Test")
        results.append("")
        results.append(f"- N (countries): {len(t_stats_post72)}")
        results.append(f"- t-bar: {t_bar_72:.4f}")
        results.append(f"- W_tbar statistic: {W_72:.4f}")

        reject_ips72_1 = W_72 < IPS_CRITICAL['1%'] if W_72 is not None else False
        reject_ips72_5 = W_72 < IPS_CRITICAL['5%'] if W_72 is not None else False
        reject_ips72_10 = W_72 < IPS_CRITICAL['10%'] if W_72 is not None else False

        results.append(f"- Reject H0 at 1%:  {'YES' if reject_ips72_1 else 'NO'}")
        results.append(f"- Reject H0 at 5%:  {'YES' if reject_ips72_5 else 'NO'}")
        results.append(f"- Reject H0 at 10%: {'YES' if reject_ips72_10 else 'NO'}")
        results.append("")

    # ===============================================================
    # SECTION 4: Pooled ADF (All Countries Concatenated + Dummies)
    # ===============================================================
    results.append("---")
    results.append("")
    results.append("## 4. Pooled ADF Test (Panel Concatenation with Country Fixed Effects)")
    results.append("")
    results.append("Concatenate all country series and run ADF with country dummies.")
    results.append("This is the Levin-Lin-Chu style approach (homogeneous beta).")
    results.append("")

    # Build pooled panel
    # For each country, compute DeltaL and lagged level
    pooled_y = []
    pooled_X = []
    country_list = sorted(eligible.keys())
    country_to_idx = {c: i for i, c in enumerate(country_list)}
    n_countries_pooled = len(country_list)

    for country in country_list:
        ts = eligible[country]
        series = [lib for yr, lib in ts]
        T = len(series)
        if T < 4:
            continue

        for t in range(2, T):  # Need t-1 for level, t-1 for lagged diff
            delta_L = series[t] - series[t - 1]
            level_lag = series[t - 1]
            delta_lag1 = series[t - 1] - series[t - 2]

            pooled_y.append(delta_L)
            row = [0.0] * n_countries_pooled  # country dummies
            row[country_to_idx[country]] = 1.0
            row.append(level_lag)  # beta coefficient
            row.append(delta_lag1)  # gamma_1
            pooled_X.append(row)

    print(f"  Pooled panel: {len(pooled_y)} observations, {n_countries_pooled} country dummies")

    if len(pooled_y) > n_countries_pooled + 5:
        betas_pooled, se_pooled, r2_pooled, n_pooled, res_pooled, sigma2_pooled = ols_multi(pooled_X, pooled_y)

        # beta is at index n_countries_pooled (after all dummies)
        beta_idx = n_countries_pooled
        beta_pooled = betas_pooled[beta_idx]
        se_beta_pooled = se_pooled[beta_idx]
        t_pooled = beta_pooled / se_beta_pooled if se_beta_pooled > 0 and se_beta_pooled != float('inf') else 0

        results.append(f"- N observations: {n_pooled}")
        results.append(f"- N country dummies: {n_countries_pooled}")
        results.append(f"- ADF lag: 1")
        results.append(f"- Beta (pooled): {beta_pooled:.6f}")
        results.append(f"- SE(beta): {se_beta_pooled:.6f}")
        results.append(f"- ADF t-statistic: {t_pooled:.4f}")
        results.append(f"- R-squared: {r2_pooled:.4f}")
        results.append("")

        reject_pooled_1 = t_pooled < ADF_CRITICAL['1%']
        reject_pooled_5 = t_pooled < ADF_CRITICAL['5%']
        reject_pooled_10 = t_pooled < ADF_CRITICAL['10%']

        results.append(f"- Reject unit root at 1%:  {'YES' if reject_pooled_1 else 'NO'} (cv={ADF_CRITICAL['1%']})")
        results.append(f"- Reject unit root at 5%:  {'YES' if reject_pooled_5 else 'NO'} (cv={ADF_CRITICAL['5%']})")
        results.append(f"- Reject unit root at 10%: {'YES' if reject_pooled_10 else 'NO'} (cv={ADF_CRITICAL['10%']})")
        results.append("")

    # ===============================================================
    # SECTION 4b: Pooled ADF Post-1972
    # ===============================================================
    results.append("### Pooled ADF -- Post-1972 Only")
    results.append("")

    pooled_y_72 = []
    pooled_X_72 = []
    country_list_72 = sorted(eligible_post72.keys())
    country_to_idx_72 = {c: i for i, c in enumerate(country_list_72)}
    n_countries_72 = len(country_list_72)

    for country in country_list_72:
        ts72 = eligible_post72[country]
        series72 = [lib for yr, lib in ts72]
        T = len(series72)
        if T < 4:
            continue

        for t in range(2, T):
            delta_L = series72[t] - series72[t - 1]
            level_lag = series72[t - 1]
            delta_lag1 = series72[t - 1] - series72[t - 2]

            pooled_y_72.append(delta_L)
            row = [0.0] * n_countries_72
            row[country_to_idx_72[country]] = 1.0
            row.append(level_lag)
            row.append(delta_lag1)
            pooled_X_72.append(row)

    print(f"  Pooled panel (post-1972): {len(pooled_y_72)} observations, {n_countries_72} country dummies")

    if len(pooled_y_72) > n_countries_72 + 5:
        betas_p72, se_p72, r2_p72, n_p72, res_p72, sigma2_p72 = ols_multi(pooled_X_72, pooled_y_72)
        beta_idx_72 = n_countries_72
        beta_p72 = betas_p72[beta_idx_72]
        se_bp72 = se_p72[beta_idx_72]
        t_p72 = beta_p72 / se_bp72 if se_bp72 > 0 and se_bp72 != float('inf') else 0

        results.append(f"- N observations: {n_p72}")
        results.append(f"- N country dummies: {n_countries_72}")
        results.append(f"- Beta (pooled, post-72): {beta_p72:.6f}")
        results.append(f"- SE(beta): {se_bp72:.6f}")
        results.append(f"- ADF t-statistic: {t_p72:.4f}")
        results.append(f"- R-squared: {r2_p72:.4f}")
        results.append("")

        reject_p72_5 = t_p72 < ADF_CRITICAL['5%']
        results.append(f"- Reject unit root at 5%: {'YES' if reject_p72_5 else 'NO'} (cv={ADF_CRITICAL['5%']})")
        results.append("")

    # ===============================================================
    # SECTION 5: Distribution of t-statistics
    # ===============================================================
    results.append("---")
    results.append("")
    results.append("## 5. Distribution of Individual ADF t-Statistics")
    results.append("")

    if t_stats_all:
        sorted_t = sorted(t_stats_all)
        pctiles = [10, 25, 50, 75, 90]
        results.append("### Full Sample")
        results.append("")
        results.append("| Percentile | t-statistic |")
        results.append("|------------|-------------|")
        for p in pctiles:
            idx = min(int(len(sorted_t) * p / 100), len(sorted_t) - 1)
            results.append(f"| {p}th | {sorted_t[idx]:+.3f} |")
        results.append("")

        # Histogram of t-statistics
        bins = [(-10, -4), (-4, -3.43), (-3.43, -2.86), (-2.86, -2.57), (-2.57, -2), (-2, -1), (-1, 0), (0, 2), (2, 10)]
        results.append("### t-Statistic Histogram")
        results.append("")
        results.append("| Range | Count | % | Interpretation |")
        results.append("|-------|-------|---|----------------|")
        for lo, hi in bins:
            cnt = sum(1 for t in t_stats_all if lo <= t < hi)
            pct = 100 * cnt / len(t_stats_all)
            if hi <= -3.43:
                interp = "Reject at 1%"
            elif hi <= -2.86:
                interp = "Reject at 5%"
            elif hi <= -2.57:
                interp = "Reject at 10%"
            else:
                interp = "Fail to reject"
            results.append(f"| [{lo:+.2f}, {hi:+.2f}) | {cnt:3d} | {pct:5.1f}% | {interp} |")
        results.append("")

    # ===============================================================
    # SECTION 6: Interpretation and Implications
    # ===============================================================
    results.append("---")
    results.append("")
    results.append("## 6. Interpretation and Implications for the Thesis")
    results.append("")

    # Determine overall verdict
    pct_reject_5 = 100 * n_reject_5 / n_countries if n_countries > 0 else 0
    mean_t = statistics.mean(t_stats_all) if t_stats_all else 0

    results.append("### Unit Root Verdict")
    results.append("")
    if pct_reject_5 > 50:
        results.append(f"**MAJORITY REJECT** ({pct_reject_5:.0f}% reject at 5%): The Liberty series")
        results.append("are predominantly **stationary** (mean-reverting). The AR(1) beta=0.956")
        results.append("reflects strong persistence but NOT a unit root for most countries.")
    elif pct_reject_5 > 20:
        results.append(f"**MIXED** ({pct_reject_5:.0f}% reject at 5%): Evidence is divided. Some")
        results.append("countries show stationarity while others appear to have unit roots.")
        results.append("This is consistent with a **heterogeneous panel** where different")
        results.append("countries have different long-run dynamics.")
    else:
        results.append(f"**MOSTLY FAIL TO REJECT** ({pct_reject_5:.0f}% reject at 5%): Most countries")
        results.append("show unit root behavior. The Liberty series may be I(1) for individual")
        results.append("countries, though panel tests may still reject.")
    results.append("")

    results.append("### Panel Test Verdict")
    results.append("")
    if W_full is not None:
        if W_full < IPS_CRITICAL['5%']:
            results.append(f"The IPS panel test (W_tbar = {W_full:.2f}) **REJECTS** the null of")
            results.append("unit roots in all series at the 5% level. This means at least SOME")
            results.append("fraction of countries have stationary Liberty processes, even though")
            results.append("many individual country tests fail to reject.")
        else:
            results.append(f"The IPS panel test (W_tbar = {W_full:.2f}) **FAILS TO REJECT** the")
            results.append("null of unit roots. Even pooling information across countries, there")
            results.append("is insufficient evidence of stationarity.")
    results.append("")

    results.append("### Implications for AR(1) Model (beta = 0.956)")
    results.append("")
    results.append("- If **unit root is present** (beta = 1): The AR(1) model's mean reversion")
    results.append("  toward L* = 81.6 is an artifact. Liberty shocks are permanent and")
    results.append("  the long-run forecast is the current level, not 81.6.")
    results.append("- If **stationary** (beta < 1): Mean reversion is real but slow (half-life")
    results.append("  ~16 years). The basin structure is valid: countries are pulled toward")
    results.append("  attractor basins, just very slowly.")
    results.append("- **Near-unit-root**: Even if stationary, beta=0.956 is close enough to 1")
    results.append("  that finite-sample bias and low power make the distinction practically")
    results.append("  irrelevant over policy-relevant horizons (5-20 years).")
    results.append("")

    results.append("### Implications for Attractor Basin Structure")
    results.append("")
    results.append("The unit root question does NOT invalidate the tristable/quadristable")
    results.append("basin structure. A process can have:")
    results.append("- **Local stationarity within basins** (mean reversion toward basin center)")
    results.append("- **Regime switching between basins** (nonlinear dynamics)")
    results.append("- **Near-unit-root behavior globally** (because the cross-basin process")
    results.append("  is highly persistent)")
    results.append("")
    results.append("The ADF test assumes a LINEAR AR process. The governance topology model is")
    results.append("fundamentally NONLINEAR (multiple attractors). A linear ADF test will tend")
    results.append("to fail to reject unit root for a nonlinear mean-reverting process because")
    results.append("the effective beta varies by regime. This is a well-known limitation of")
    results.append("ADF tests applied to threshold/regime-switching processes (Enders & Granger, 1998).")
    results.append("")

    results.append("---")
    results.append("")
    results.append("*Generated by fix01_unit_root_tests.py. All computations use stdlib only.*")

    # Write results
    output = "\n".join(results)
    output_path = "/tmp/pt-data/fix01-unit-root-results.md"
    with open(output_path, 'w') as f:
        f.write(output)

    print()
    print(output)
    print()
    print(f"Results saved to {output_path}")


if __name__ == "__main__":
    main()