#!/usr/bin/env python3
"""
RP-09: HCI Weighting Sensitivity & RP-05: Pre-1900 Sample Bias
Comprehensive quantitative analysis of the Human Capabilities Index
and its relationship with Liberty scores across historical eras.

Data sources:
  - /tmp/pt-data/human_capabilities_index.xlsx (15 indicators, 91 countries, 1800-2023)
  - /tmp/pt-data/political-topology-flat.csv (Liberty scores, ~1656 observations)
"""

import os
import csv
import math
import random
import statistics
from collections import defaultdict, OrderedDict

import openpyxl

random.seed(42)

# ===========================================================================
# CONFIGURATION
# ===========================================================================
HCI_PATH = '/tmp/pt-data/human_capabilities_index.xlsx'
CSV_PATH = '/tmp/pt-data/political-topology-flat.csv'
OUTPUT_PATH = '/tmp/pt-data/rp09-rp05-results.md'

# The 15 indicators and their sheets
INDICATOR_SHEETS = [
    '1. Life Expectancy',
    '2. Infant Mortality',
    '3. Maternal Mortality',
    '4. Stunting Prevalence',
    '5. Adult Literacy',
    '6. Mean Years Schooling',
    '7. Expected Years Schooling',
    '8. GDP per Capita',
    '9. Extreme Poverty',
    '10. Life Satisfaction',
    '11. Suicide Rate',
    '12. Safe Water Access',
    '13. Electricity Access',
    '14. Gender Development',
    '15. Voter Turnout',
]

# Domain mapping: domain_name -> list of indicator sheet names
DOMAINS = OrderedDict([
    ('D1: Survival & Longevity', ['1. Life Expectancy', '2. Infant Mortality']),
    ('D2: Maternal & Child Health', ['3. Maternal Mortality', '4. Stunting Prevalence']),
    ('D3: Knowledge & Education', ['5. Adult Literacy', '6. Mean Years Schooling', '7. Expected Years Schooling']),
    ('D4: Material Living Standard', ['8. GDP per Capita', '9. Extreme Poverty']),
    ('D5: Psychological Well-Being', ['10. Life Satisfaction', '11. Suicide Rate']),
    ('D6: Basic Infrastructure', ['12. Safe Water Access', '13. Electricity Access']),
    ('D7: Agency & Equality', ['14. Gender Development', '15. Voter Turnout']),
])

# Indicators where HIGHER raw values = WORSE outcomes (need inversion for normalization)
INVERTED_INDICATORS = {
    '2. Infant Mortality',     # deaths per 1000 -- lower is better
    '3. Maternal Mortality',   # deaths per 100k -- lower is better
    '4. Stunting Prevalence',  # % stunted -- lower is better
    '9. Extreme Poverty',      # % below poverty line -- lower is better
    '11. Suicide Rate',        # per 100k -- lower is better
}

# Time periods in both datasets
HCI_YEARS = ['1800', '1850', '1900', '1913', '1929', '1945', '1960', '1975', '1990', '2000', '2010', '2020', '2023']
CSV_PERIODS = ['1800-1899', '1900-1971', '1972-2005', '2006-2025']

# Mapping from CSV period to closest HCI year(s)
PERIOD_TO_HCI_YEAR = {
    '1800-1899': '1850',   # midpoint
    '1900-1971': '1945',   # midpoint
    '1972-2005': '1990',   # midpoint
    '2006-2025': '2023',   # latest
}


# ===========================================================================
# DATA LOADING
# ===========================================================================

def load_hci_data():
    """Load all HCI data from Excel. Returns:
       - composite: {country: {year: score}}
       - indicators: {sheet_name: {country: {year: raw_value}}}
       - regions_hci: {country: region}
       - analysis_sheet: list of dicts from Liberty x HCI Analysis
    """
    wb = openpyxl.load_workbook(HCI_PATH, data_only=True)

    # Load composite scores
    ws = wb['Composite HCI Score']
    headers = [cell.value for cell in ws[5]]
    years = headers[2:]  # ['1800', '1850', ...]
    composite = {}
    regions_hci = {}
    for row in ws.iter_rows(min_row=6, max_row=96, values_only=True):
        country = row[0]
        if country is None:
            continue
        region = row[1]
        regions_hci[country] = region
        composite[country] = {}
        for i, yr in enumerate(years):
            val = row[i + 2]
            if val is not None:
                composite[country][str(yr)] = float(val)

    # Load individual indicator sheets
    indicators = {}
    for sname in INDICATOR_SHEETS:
        ws = wb[sname]
        # Header is at row 6 for all indicator sheets
        headers = [cell.value for cell in ws[6]]
        ind_years = [str(h) for h in headers[2:]]
        indicators[sname] = {}
        for row in ws.iter_rows(min_row=7, max_row=98, values_only=True):
            country = row[0]
            if country is None:
                continue
            indicators[sname][country] = {}
            for i, yr in enumerate(ind_years):
                val = row[i + 2]
                if val is not None:
                    try:
                        indicators[sname][country][yr] = float(val)
                    except (ValueError, TypeError):
                        pass

    # Load Liberty x HCI Analysis sheet
    ws2 = wb['Liberty × HCI Analysis']
    analysis = []
    for row in ws2.iter_rows(min_row=5, max_row=107, values_only=True):
        if row[0] is None:
            continue
        analysis.append({
            'country': row[0],
            'region': row[1],
            'liberty': float(row[2]) if row[2] is not None else None,
            'hci': float(row[3]) if row[3] is not None else None,
            'gap': float(row[4]) if row[4] is not None else None,
            'quadrant': row[5],
        })

    return composite, indicators, regions_hci, analysis


def load_csv_data():
    """Load governance topology CSV. Returns list of dicts + index by country."""
    rows = []
    with open(CSV_PATH, 'r') as f:
        reader = csv.DictReader(f)
        for r in reader:
            r['liberty'] = float(r['liberty']) if r['liberty'] else None
            r['year'] = int(r['year']) if r['year'] else None
            rows.append(r)
    return rows


# ===========================================================================
# STATISTICAL UTILITIES
# ===========================================================================

def pearson_r(x, y):
    """Pearson correlation coefficient."""
    n = len(x)
    if n < 3:
        return float('nan')
    mx = sum(x) / n
    my = sum(y) / n
    sx = math.sqrt(sum((xi - mx) ** 2 for xi in x) / (n - 1)) if n > 1 else 0
    sy = math.sqrt(sum((yi - my) ** 2 for yi in y) / (n - 1)) if n > 1 else 0
    if sx == 0 or sy == 0:
        return float('nan')
    cov = sum((xi - mx) * (yi - my) for xi, yi in zip(x, y)) / (n - 1)
    return cov / (sx * sy)


def spearman_rho(x, y):
    """Spearman rank correlation."""
    def rank(data):
        indexed = sorted(enumerate(data), key=lambda t: t[1])
        ranks = [0.0] * len(data)
        i = 0
        while i < len(indexed):
            j = i
            while j < len(indexed) and indexed[j][1] == indexed[i][1]:
                j += 1
            avg_rank = (i + j + 1) / 2.0
            for k in range(i, j):
                ranks[indexed[k][0]] = avg_rank
            i = j
        return ranks

    rx = rank(x)
    ry = rank(y)
    return pearson_r(rx, ry)


def p_value_approx(r, n):
    """Approximate two-tailed p-value for Pearson r using t-distribution approximation."""
    if n < 3 or math.isnan(r) or abs(r) >= 1.0:
        return float('nan')
    t = r * math.sqrt((n - 2) / (1 - r * r))
    # Approximate p-value using normal distribution for large n
    df = n - 2
    # Using a simple approximation
    p = 2 * (1 - _norm_cdf(abs(t) * math.sqrt(df / (df + t * t))))
    return p


def _norm_cdf(x):
    """Approximation of the normal CDF."""
    return 0.5 * (1 + math.erf(x / math.sqrt(2)))


def geometric_mean(values):
    """Geometric mean of positive values."""
    if not values or any(v <= 0 for v in values):
        return float('nan')
    log_sum = sum(math.log(v) for v in values)
    return math.exp(log_sum / len(values))


# ===========================================================================
# NORMALIZATION
# ===========================================================================

def compute_normalization_params(indicators):
    """Compute min/max for each indicator across all countries and years for normalization."""
    params = {}
    for sname in INDICATOR_SHEETS:
        all_vals = []
        if sname in indicators:
            for country_data in indicators[sname].values():
                all_vals.extend(country_data.values())
        if all_vals:
            params[sname] = (min(all_vals), max(all_vals))
        else:
            params[sname] = (0, 1)
    return params


def normalize_value(raw, sname, params):
    """Normalize a raw indicator value to 0-100 scale."""
    mn, mx = params[sname]
    if mx == mn:
        return 50.0
    if sname in INVERTED_INDICATORS:
        # Higher raw = worse, so invert
        return ((mx - raw) / (mx - mn)) * 100.0
    else:
        return ((raw - mn) / (mx - mn)) * 100.0


# ===========================================================================
# RP-09: HCI WEIGHTING SENSITIVITY
# ===========================================================================

def rp09_t1_geometric_mean(composite, indicators, norm_params):
    """T1: Recompute HCI using geometric mean instead of arithmetic mean.
    Compare rankings via Spearman rho against original."""
    out = []
    out.append("### RP-09 T1: Geometric Mean vs Arithmetic Mean\n")
    out.append("Recompute HCI using geometric mean of normalized indicators (shifted by +1")
    out.append("to handle zeros) and compare country rankings against the original composite.\n")

    year = '2023'  # Use the most complete year

    original_scores = {}
    geomean_scores = {}

    for country in composite:
        if year not in composite[country]:
            continue
        original_scores[country] = composite[country][year]

        # Gather normalized indicator values for this country at this year
        normed = []
        for sname in INDICATOR_SHEETS:
            if sname in indicators and country in indicators[sname]:
                if year in indicators[sname][country]:
                    val = normalize_value(indicators[sname][country][year], sname, norm_params)
                    normed.append(val)

        if len(normed) >= 3:
            # Geometric mean: shift by +1 to avoid zero issues, then shift back
            shifted = [v + 1 for v in normed]
            gm = geometric_mean(shifted) - 1
            geomean_scores[country] = max(0, gm)  # floor at 0

    # Find common countries
    common = sorted(set(original_scores.keys()) & set(geomean_scores.keys()))

    if len(common) < 5:
        out.append(f"Insufficient common countries ({len(common)}). Cannot compute.\n")
        return '\n'.join(out)

    orig_vals = [original_scores[c] for c in common]
    geom_vals = [geomean_scores[c] for c in common]

    rho = spearman_rho(orig_vals, geom_vals)
    r = pearson_r(orig_vals, geom_vals)

    out.append(f"| Metric | Value |")
    out.append(f"|--------|-------|")
    out.append(f"| Countries compared | {len(common)} |")
    out.append(f"| Spearman rho (rank correlation) | {rho:.4f} |")
    out.append(f"| Pearson r (linear correlation) | {r:.4f} |")
    out.append(f"| Robust if rho > 0.95? | **{'YES' if rho > 0.95 else 'NO'}** |")
    out.append("")

    # Show top 10 divergences
    divergences = [(c, original_scores[c], geomean_scores[c],
                     abs(original_scores[c] - geomean_scores[c])) for c in common]
    divergences.sort(key=lambda x: x[3], reverse=True)

    out.append("**Top 10 Divergences (Arithmetic vs Geometric):**\n")
    out.append("| Country | Arithmetic HCI | Geometric HCI | Abs Diff |")
    out.append("|---------|---------------|--------------|----------|")
    for c, a, g, d in divergences[:10]:
        out.append(f"| {c} | {a:.1f} | {g:.1f} | {d:.1f} |")
    out.append("")

    if rho > 0.95:
        out.append("**VERDICT:** Rankings are robust to aggregation method. The geometric mean")
        out.append("penalizes countries with extreme lows in any indicator, but the overall")
        out.append("rank ordering is preserved.\n")
    else:
        out.append(f"**VERDICT:** Rankings shift meaningfully (rho={rho:.3f}). The geometric mean")
        out.append("penalizes uneven development profiles, reshuffling countries with")
        out.append("extreme indicator disparities.\n")

    return '\n'.join(out)


def rp09_t2_leave_one_domain_out(composite, indicators, norm_params, csv_data):
    """T2: Leave-One-Domain-Out Jackknife. Recompute HCI 7 times, each dropping one domain."""
    out = []
    out.append("### RP-09 T2: Leave-One-Domain-Out Jackknife\n")
    out.append("Recompute HCI dropping each of 7 domains in turn. Test whether any single")
    out.append("domain removal changes the Liberty-HCI correlation by >0.05 or the crossover")
    out.append("count (capable autocracies) by >3.\n")

    year = '2023'

    # Get Liberty scores for matching — use latest observation per country from CSV
    liberty_by_country = {}
    for row in csv_data:
        c = row['country']
        if row['liberty'] is not None:
            if c not in liberty_by_country or row['year'] > liberty_by_country[c][1]:
                liberty_by_country[c] = (row['liberty'], row['year'])
    liberty_latest = {c: v[0] for c, v in liberty_by_country.items()}

    # Baseline: full HCI correlation with Liberty
    baseline_pairs = []
    for country in composite:
        if year in composite[country] and country in liberty_latest:
            baseline_pairs.append((composite[country][year], liberty_latest[country]))

    if len(baseline_pairs) < 10:
        out.append("Insufficient matched data for baseline correlation.\n")
        return '\n'.join(out)

    baseline_r = pearson_r([p[0] for p in baseline_pairs], [p[1] for p in baseline_pairs])
    baseline_crossover = sum(1 for h, l in baseline_pairs if h > statistics.median([p[0] for p in baseline_pairs]) and l < 50)

    out.append(f"**Baseline (full HCI, {year}):** r = {baseline_r:.4f}, N = {len(baseline_pairs)}, crossover count = {baseline_crossover}\n")

    out.append("| Domain Dropped | r(Liberty,HCI_reduced) | delta_r | Crossover Count | delta_crossover |")
    out.append("|---------------|----------------------|---------|-----------------|-----------------|")

    any_sensitive = False

    for domain_name, domain_indicators in DOMAINS.items():
        # Recompute HCI without this domain's indicators
        remaining_indicators = [s for s in INDICATOR_SHEETS if s not in domain_indicators]

        reduced_hci = {}
        for country in composite:
            normed = []
            for sname in remaining_indicators:
                if sname in indicators and country in indicators[sname]:
                    if year in indicators[sname][country]:
                        val = normalize_value(indicators[sname][country][year], sname, norm_params)
                        normed.append(val)
            if len(normed) >= 2:
                reduced_hci[country] = sum(normed) / len(normed)

        # Compute correlation with Liberty
        pairs = []
        for country in reduced_hci:
            if country in liberty_latest:
                pairs.append((reduced_hci[country], liberty_latest[country]))

        if len(pairs) < 10:
            out.append(f"| {domain_name} | N/A | N/A | N/A | N/A |")
            continue

        r_reduced = pearson_r([p[0] for p in pairs], [p[1] for p in pairs])
        med_hci = statistics.median([p[0] for p in pairs])
        crossover = sum(1 for h, l in pairs if h > med_hci and l < 50)

        delta_r = r_reduced - baseline_r
        delta_cross = crossover - baseline_crossover

        sensitive_r = abs(delta_r) > 0.05
        sensitive_c = abs(delta_cross) > 3
        flag = " **!" if (sensitive_r or sensitive_c) else ""
        if sensitive_r or sensitive_c:
            any_sensitive = True

        out.append(f"| {domain_name} | {r_reduced:.4f} | {delta_r:+.4f}{flag} | {crossover} | {delta_cross:+d}{' **!' if sensitive_c else ''} |")

    out.append("")
    if any_sensitive:
        out.append("**VERDICT:** At least one domain removal exceeds sensitivity thresholds.")
        out.append("The HCI is NOT fully robust to domain composition — specific domains")
        out.append("drive the Liberty-HCI relationship.\n")
    else:
        out.append("**VERDICT:** No single domain removal exceeds sensitivity thresholds.")
        out.append("The HCI-Liberty correlation is robust to domain composition.\n")

    return '\n'.join(out)


def rp09_t3_minimum_indicator_sensitivity(composite, indicators, norm_params, csv_data):
    """T3: Test what happens when requiring different minimum indicator counts."""
    out = []
    out.append("### RP-09 T3: Minimum Indicator Count Sensitivity\n")
    out.append("The composite HCI requires min 3 indicators. Test thresholds of 3, 5, 7, 10.\n")

    year = '2023'

    # Get Liberty
    liberty_by_country = {}
    for row in csv_data:
        c = row['country']
        if row['liberty'] is not None:
            if c not in liberty_by_country or row['year'] > liberty_by_country[c][1]:
                liberty_by_country[c] = (row['liberty'], row['year'])
    liberty_latest = {c: v[0] for c, v in liberty_by_country.items()}

    out.append("| Min Indicators | N Countries | r(Liberty,HCI) | Crossover Count | Mean HCI |")
    out.append("|---------------|-------------|----------------|-----------------|----------|")

    for min_ind in [3, 5, 7, 10]:
        hci_scores = {}
        for country in composite:
            normed = []
            for sname in INDICATOR_SHEETS:
                if sname in indicators and country in indicators[sname]:
                    if year in indicators[sname][country]:
                        val = normalize_value(indicators[sname][country][year], sname, norm_params)
                        normed.append(val)
            if len(normed) >= min_ind:
                hci_scores[country] = sum(normed) / len(normed)

        pairs = []
        for c in hci_scores:
            if c in liberty_latest:
                pairs.append((hci_scores[c], liberty_latest[c]))

        if len(pairs) < 5:
            out.append(f"| {min_ind} | {len(hci_scores)} | N/A | N/A | N/A |")
            continue

        r = pearson_r([p[0] for p in pairs], [p[1] for p in pairs])
        med = statistics.median([p[0] for p in pairs])
        crossover = sum(1 for h, l in pairs if h > med and l < 50)
        mean_hci = statistics.mean([p[0] for p in pairs])

        out.append(f"| {min_ind} | {len(hci_scores)} | {r:.4f} | {crossover} | {mean_hci:.1f} |")

    out.append("")
    out.append("**INTERPRETATION:** If crossover count and correlation are stable across")
    out.append("thresholds, findings are not driven by countries with sparse indicator coverage.\n")

    return '\n'.join(out)


def rp09_t4_capable_autocracies(analysis_sheet):
    """T4: List all 'Capable Autocracy' countries from the analysis sheet."""
    out = []
    out.append("### RP-09 T4: The Capable Autocracies (Great Decoupling Cases)\n")
    out.append("Countries where HCI is above median AND Liberty is below 50.")
    out.append("These represent the central puzzle: high human capabilities without political freedom.\n")

    # Get all entries from the analysis sheet
    capable = [e for e in analysis_sheet if e['quadrant'] == 'Capable Autocracy']
    capable.sort(key=lambda x: x['gap'] if x['gap'] is not None else 0, reverse=True)

    # Also compute median HCI from the full dataset
    all_hci = [e['hci'] for e in analysis_sheet if e['hci'] is not None]
    median_hci = statistics.median(all_hci)

    out.append(f"**Median HCI across all {len(all_hci)} countries:** {median_hci:.1f}")
    out.append(f"**Number of Capable Autocracies:** {len(capable)}\n")

    out.append("| # | Country | Region | Liberty | HCI | Gap (HCI-L) |")
    out.append("|---|---------|--------|---------|-----|-------------|")
    for i, e in enumerate(capable, 1):
        out.append(f"| {i} | {e['country']} | {e['region']} | {e['liberty']:.0f} | {e['hci']:.1f} | {e['gap']:.1f} |")

    out.append("")

    # Regional breakdown of capable autocracies
    region_counts = defaultdict(int)
    for e in capable:
        region_counts[e['region']] += 1

    out.append("**Regional Distribution of Capable Autocracies:**\n")
    out.append("| Region | Count | % of Total |")
    out.append("|--------|-------|-----------|")
    for reg in sorted(region_counts, key=region_counts.get, reverse=True):
        pct = 100 * region_counts[reg] / len(capable)
        out.append(f"| {reg} | {region_counts[reg]} | {pct:.1f}% |")

    out.append("")

    # Quadrant summary
    quad_counts = defaultdict(int)
    for e in analysis_sheet:
        if e['quadrant']:
            quad_counts[e['quadrant']] += 1

    out.append("**Full Quadrant Distribution:**\n")
    out.append("| Quadrant | Count |")
    out.append("|----------|-------|")
    for q in sorted(quad_counts, key=quad_counts.get, reverse=True):
        out.append(f"| {q} | {quad_counts[q]} |")
    out.append("")

    return '\n'.join(out)


def rp09_t5_era_by_era_correlation(composite, csv_data):
    """T5: Compute HCI-Liberty correlation for each era. Test the r=0.79->0.57 decline claim."""
    out = []
    out.append("### RP-09 T5: Era-by-Era HCI-Liberty Correlation\n")
    out.append("The thesis claims r=0.79 (pre-1900) declining to r=0.57 (post-1990).")
    out.append("We test this by matching HCI composite scores to Liberty scores per era.\n")

    # For each CSV period, get the latest observation per country in that period
    # and match to the closest HCI year

    out.append("| Era (CSV Period) | HCI Year Used | N Countries | r(HCI,Liberty) | p-value (approx) | Mean L | Std L |")
    out.append("|-----------------|---------------|-------------|----------------|------------------|--------|-------|")

    correlations_by_era = {}

    for period in CSV_PERIODS:
        hci_year = PERIOD_TO_HCI_YEAR[period]

        # Get latest Liberty observation per country in this period
        country_liberty = {}
        for row in csv_data:
            if row['data_source_period'] == period and row['liberty'] is not None:
                c = row['country']
                if c not in country_liberty or row['year'] > country_liberty[c][1]:
                    country_liberty[c] = (row['liberty'], row['year'])

        # Match with HCI
        pairs = []
        for country, (lib, yr) in country_liberty.items():
            if country in composite and hci_year in composite[country]:
                pairs.append((composite[country][hci_year], lib, country))

        if len(pairs) < 5:
            out.append(f"| {period} | {hci_year} | {len(pairs)} | N/A | N/A | N/A | N/A |")
            continue

        hci_vals = [p[0] for p in pairs]
        lib_vals = [p[1] for p in pairs]

        r = pearson_r(hci_vals, lib_vals)
        p = p_value_approx(r, len(pairs))
        mean_l = statistics.mean(lib_vals)
        std_l = statistics.stdev(lib_vals) if len(lib_vals) > 1 else 0

        correlations_by_era[period] = (r, len(pairs))

        p_str = f"{p:.4f}" if not math.isnan(p) else "N/A"
        out.append(f"| {period} | {hci_year} | {len(pairs)} | {r:.4f} | {p_str} | {mean_l:.1f} | {std_l:.1f} |")

    out.append("")

    # Test the specific claim
    early = correlations_by_era.get('1800-1899')
    late = correlations_by_era.get('2006-2025')

    if early and late:
        out.append(f"**Claimed:** r=0.79 (pre-1900) -> r=0.57 (post-1990)")
        out.append(f"**Observed:** r={early[0]:.4f} (1800-1899, N={early[1]}) -> r={late[0]:.4f} (2006-2025, N={late[1]})")
        decline = early[0] - late[0]
        out.append(f"**Decline magnitude:** {decline:.4f}")
        if decline > 0:
            out.append(f"**VERDICT:** Decline is {'CONFIRMED' if decline > 0.10 else 'PARTIALLY CONFIRMED (decline < 0.10)'}.")
        else:
            out.append("**VERDICT:** No decline observed — correlation actually INCREASED.")
    out.append("")

    # Also try with more granular HCI year matching
    out.append("**Sensitivity: Alternative HCI Year Matching**\n")
    out.append("Testing with multiple HCI years per era to check robustness:\n")

    alt_mappings = {
        '1800-1899': ['1800', '1850', '1900'],
        '1900-1971': ['1913', '1929', '1945', '1960'],
        '1972-2005': ['1975', '1990', '2000'],
        '2006-2025': ['2010', '2020', '2023'],
    }

    out.append("| Era | HCI Year | N | r |")
    out.append("|-----|----------|---|---|")

    for period in CSV_PERIODS:
        country_liberty = {}
        for row in csv_data:
            if row['data_source_period'] == period and row['liberty'] is not None:
                c = row['country']
                if c not in country_liberty or row['year'] > country_liberty[c][1]:
                    country_liberty[c] = (row['liberty'], row['year'])

        for hci_year in alt_mappings[period]:
            pairs = []
            for country, (lib, yr) in country_liberty.items():
                if country in composite and hci_year in composite[country]:
                    pairs.append((composite[country][hci_year], lib))

            if len(pairs) >= 5:
                r = pearson_r([p[0] for p in pairs], [p[1] for p in pairs])
                out.append(f"| {period} | {hci_year} | {len(pairs)} | {r:.4f} |")
            else:
                out.append(f"| {period} | {hci_year} | {len(pairs)} | N/A (too few) |")

    out.append("")
    return '\n'.join(out)


# ===========================================================================
# RP-05: PRE-1900 SAMPLE BIAS
# ===========================================================================

def rp05_t1_constant_panel(composite, csv_data):
    """T1: Identify countries present in ALL eras. Compute correlation for constant panel."""
    out = []
    out.append("### RP-05 T1: Constant Panel Test\n")
    out.append("Identify countries present in ALL data_source_period eras. If correlation")
    out.append("declines even within this constant panel, the decorrelation is genuine.\n")

    # Find countries present in each era
    countries_by_era = defaultdict(set)
    country_liberty_by_era = defaultdict(dict)

    for row in csv_data:
        if row['liberty'] is not None:
            era = row['data_source_period']
            c = row['country']
            countries_by_era[era].add(c)
            if c not in country_liberty_by_era[era] or row['year'] > country_liberty_by_era[era][c][1]:
                country_liberty_by_era[era][c] = (row['liberty'], row['year'])

    # Constant panel = intersection of all eras
    constant_panel = None
    for era in CSV_PERIODS:
        if constant_panel is None:
            constant_panel = countries_by_era[era].copy()
        else:
            constant_panel &= countries_by_era[era]

    out.append(f"**Countries per era:**\n")
    for era in CSV_PERIODS:
        out.append(f"  - {era}: {len(countries_by_era[era])} countries")
    out.append(f"\n**Constant panel (present in ALL eras): {len(constant_panel)} countries**\n")

    if len(constant_panel) > 0:
        out.append("Countries in constant panel: " + ", ".join(sorted(constant_panel)))
        out.append("")

    if len(constant_panel) < 5:
        out.append("Constant panel too small for meaningful correlation. Testing with")
        out.append("countries present in at least 3 of 4 eras instead.\n")

        # Find countries present in at least 3 eras
        country_era_count = defaultdict(int)
        for era in CSV_PERIODS:
            for c in countries_by_era[era]:
                country_era_count[c] += 1

        relaxed_panel = {c for c, cnt in country_era_count.items() if cnt >= 3}
        out.append(f"**Relaxed panel (>= 3 eras): {len(relaxed_panel)} countries**\n")
        panel = relaxed_panel
    else:
        panel = constant_panel

    # Compute correlation for the panel in each era
    out.append("| Era | HCI Year | N (panel matched) | r(HCI,Liberty) panel | r(HCI,Liberty) full era |")
    out.append("|-----|----------|-------------------|---------------------|------------------------|")

    for period in CSV_PERIODS:
        hci_year = PERIOD_TO_HCI_YEAR[period]

        # Panel correlation
        panel_pairs = []
        for c in panel:
            if c in country_liberty_by_era[period] and c in composite and hci_year in composite[c]:
                panel_pairs.append((composite[c][hci_year], country_liberty_by_era[period][c][0]))

        # Full era correlation
        full_pairs = []
        for c in country_liberty_by_era[period]:
            if c in composite and hci_year in composite[c]:
                full_pairs.append((composite[c][hci_year], country_liberty_by_era[period][c][0]))

        r_panel = pearson_r([p[0] for p in panel_pairs], [p[1] for p in panel_pairs]) if len(panel_pairs) >= 5 else float('nan')
        r_full = pearson_r([p[0] for p in full_pairs], [p[1] for p in full_pairs]) if len(full_pairs) >= 5 else float('nan')

        r_panel_str = f"{r_panel:.4f}" if not math.isnan(r_panel) else "N/A"
        r_full_str = f"{r_full:.4f}" if not math.isnan(r_full) else "N/A"

        out.append(f"| {period} | {hci_year} | {len(panel_pairs)} | {r_panel_str} | {r_full_str} |")

    out.append("")
    out.append("**INTERPRETATION:** If the panel correlation also declines across eras,")
    out.append("the decorrelation is genuine and not an artifact of changing sample composition.\n")

    return '\n'.join(out)


def rp05_t2_regional_decomposition(composite, csv_data):
    """T2: Compute era-by-era correlations separately by region."""
    out = []
    out.append("### RP-05 T2: Regional Decomposition\n")
    out.append("Does Europe's HCI-Liberty correlation also decline? Are some regions driving the overall pattern?\n")

    # Organize data by region and era
    region_era_data = defaultdict(lambda: defaultdict(list))

    for row in csv_data:
        if row['liberty'] is not None and row['region']:
            era = row['data_source_period']
            region_era_data[row['region']][era].append(row)

    # For each region, get latest observation per country per era
    regions = sorted(region_era_data.keys())

    out.append("| Region | Era | HCI Year | N | r(HCI,Liberty) |")
    out.append("|--------|-----|----------|---|----------------|")

    for region in regions:
        for period in CSV_PERIODS:
            hci_year = PERIOD_TO_HCI_YEAR[period]

            # Get latest Liberty per country in this region/era
            country_lib = {}
            for row in region_era_data[region][period]:
                c = row['country']
                if c not in country_lib or row['year'] > country_lib[c][1]:
                    country_lib[c] = (row['liberty'], row['year'])

            pairs = []
            for c, (lib, yr) in country_lib.items():
                if c in composite and hci_year in composite[c]:
                    pairs.append((composite[c][hci_year], lib))

            if len(pairs) >= 5:
                r = pearson_r([p[0] for p in pairs], [p[1] for p in pairs])
                out.append(f"| {region} | {period} | {hci_year} | {len(pairs)} | {r:.4f} |")
            elif len(pairs) >= 3:
                r = pearson_r([p[0] for p in pairs], [p[1] for p in pairs])
                out.append(f"| {region} | {period} | {hci_year} | {len(pairs)} | {r:.4f} (N<5, unreliable) |")
            else:
                out.append(f"| {region} | {period} | {hci_year} | {len(pairs)} | N/A |")

    out.append("")
    out.append("**KEY QUESTION:** Does Europe show the same decorrelation pattern as the global sample?\n")

    return '\n'.join(out)


def rp05_t3_sample_size_effect(composite, csv_data):
    """T3: Bootstrap test — draw N=78 from post-1990, compute correlation, repeat 10k times."""
    out = []
    out.append("### RP-05 T3: Sample Size Effect (Bootstrap)\n")
    out.append("Could the high pre-1900 r just be small-sample noise? Draw N=78 from post-2006")
    out.append("data 10,000 times and compute the correlation distribution.\n")

    # Get the pre-1900 sample size
    pre1900_countries = set()
    pre1900_liberty = {}
    for row in csv_data:
        if row['data_source_period'] == '1800-1899' and row['liberty'] is not None:
            c = row['country']
            pre1900_countries.add(c)
            if c not in pre1900_liberty or row['year'] > pre1900_liberty[c][1]:
                pre1900_liberty[c] = (row['liberty'], row['year'])

    # Match pre-1900 with HCI
    pre1900_matched = []
    hci_year_early = '1850'
    for c in pre1900_liberty:
        if c in composite and hci_year_early in composite[c]:
            pre1900_matched.append((composite[c][hci_year_early], pre1900_liberty[c][0]))

    pre1900_n = len(pre1900_matched)
    if pre1900_n >= 5:
        pre1900_r = pearson_r([p[0] for p in pre1900_matched], [p[1] for p in pre1900_matched])
    else:
        pre1900_r = float('nan')

    out.append(f"**Pre-1900 matched sample:** N = {pre1900_n}, r = {pre1900_r:.4f}\n")

    # Get post-2006 full sample
    post2006_liberty = {}
    for row in csv_data:
        if row['data_source_period'] == '2006-2025' and row['liberty'] is not None:
            c = row['country']
            if c not in post2006_liberty or row['year'] > post2006_liberty[c][1]:
                post2006_liberty[c] = (row['liberty'], row['year'])

    hci_year_late = '2023'
    post2006_pairs = []
    for c in post2006_liberty:
        if c in composite and hci_year_late in composite[c]:
            post2006_pairs.append((composite[c][hci_year_late], post2006_liberty[c][0], c))

    full_r = pearson_r([p[0] for p in post2006_pairs], [p[1] for p in post2006_pairs])
    out.append(f"**Post-2006 full sample:** N = {len(post2006_pairs)}, r = {full_r:.4f}\n")

    # Bootstrap
    n_boot = 10000
    sample_size = min(pre1900_n, len(post2006_pairs))

    if sample_size < 5:
        out.append("Sample too small for bootstrap test.\n")
        return '\n'.join(out)

    boot_rs = []
    for _ in range(n_boot):
        sample = random.sample(post2006_pairs, sample_size)
        r = pearson_r([p[0] for p in sample], [p[1] for p in sample])
        if not math.isnan(r):
            boot_rs.append(r)

    if not boot_rs:
        out.append("Bootstrap produced no valid correlations.\n")
        return '\n'.join(out)

    boot_rs.sort()
    mean_r = statistics.mean(boot_rs)
    std_r = statistics.stdev(boot_rs)
    p5 = boot_rs[int(0.05 * len(boot_rs))]
    p25 = boot_rs[int(0.25 * len(boot_rs))]
    p50 = boot_rs[int(0.50 * len(boot_rs))]
    p75 = boot_rs[int(0.75 * len(boot_rs))]
    p95 = boot_rs[int(0.95 * len(boot_rs))]

    out.append(f"**Bootstrap Distribution (N={sample_size} drawn from post-2006, {n_boot} draws):**\n")
    out.append("| Statistic | Value |")
    out.append("|-----------|-------|")
    out.append(f"| Mean r | {mean_r:.4f} |")
    out.append(f"| Std r | {std_r:.4f} |")
    out.append(f"| 5th percentile | {p5:.4f} |")
    out.append(f"| 25th percentile | {p25:.4f} |")
    out.append(f"| Median | {p50:.4f} |")
    out.append(f"| 75th percentile | {p75:.4f} |")
    out.append(f"| 95th percentile | {p95:.4f} |")
    out.append(f"| Observed pre-1900 r | {pre1900_r:.4f} |")
    out.append("")

    # What fraction of bootstrap samples have r >= pre1900_r?
    if not math.isnan(pre1900_r):
        frac_above = sum(1 for r in boot_rs if r >= pre1900_r) / len(boot_rs)
        out.append(f"**Fraction of bootstrap samples with r >= {pre1900_r:.4f}: {frac_above:.4f} ({frac_above*100:.1f}%)**\n")

        if frac_above < 0.05:
            out.append("**VERDICT:** The pre-1900 correlation is significantly higher than what")
            out.append("small-sample noise from the modern distribution would produce (p<0.05).")
            out.append("The high early r is NOT just a sample-size artifact.\n")
        else:
            out.append(f"**VERDICT:** {frac_above*100:.1f}% of small-sample draws from modern data")
            out.append("match or exceed the pre-1900 r. The high early correlation COULD be")
            out.append("partly a small-sample artifact.\n")

    return '\n'.join(out)


def rp05_t4_composition_analysis(csv_data, composite):
    """T4: For each era, report N, regional breakdown, mean L, std L."""
    out = []
    out.append("### RP-05 T4: Sample Composition Analysis\n")
    out.append("How does the sample diversify over time?\n")

    for period in CSV_PERIODS:
        era_rows = [r for r in csv_data if r['data_source_period'] == period]

        # Get unique countries (latest observation per country)
        country_data = {}
        for row in era_rows:
            c = row['country']
            if row['liberty'] is not None:
                if c not in country_data or row['year'] > country_data[c]['year']:
                    country_data[c] = row

        n = len(country_data)
        liberties = [country_data[c]['liberty'] for c in country_data if country_data[c]['liberty'] is not None]

        out.append(f"#### Era: {period}\n")
        out.append(f"- **N countries:** {n}")
        out.append(f"- **Mean Liberty:** {statistics.mean(liberties):.1f}" if liberties else "- **Mean Liberty:** N/A")
        out.append(f"- **Std Liberty:** {statistics.stdev(liberties):.1f}" if len(liberties) > 1 else "- **Std Liberty:** N/A")
        out.append(f"- **Min/Max Liberty:** {min(liberties):.0f} / {max(liberties):.0f}" if liberties else "")

        # Regional breakdown
        region_counts = defaultdict(int)
        for c in country_data:
            region_counts[country_data[c]['region']] += 1

        out.append("\n| Region | Count | % |")
        out.append("|--------|-------|---|")
        for reg in sorted(region_counts, key=region_counts.get, reverse=True):
            pct = 100 * region_counts[reg] / n
            out.append(f"| {reg} | {region_counts[reg]} | {pct:.1f}% |")

        # HCI coverage
        hci_year = PERIOD_TO_HCI_YEAR[period]
        hci_matched = sum(1 for c in country_data if c in composite and hci_year in composite[c])
        out.append(f"\nHCI matched at {hci_year}: {hci_matched}/{n} ({100*hci_matched/n:.0f}%)")
        out.append("")

    return '\n'.join(out)


def rp05_t5_survivor_bias(composite, csv_data, regions_hci):
    """T5: Assess survivor bias in pre-1900 data. What's missing? Simulate adding missing polities."""
    out = []
    out.append("### RP-05 T5: Survivor Bias Assessment\n")
    out.append("What regions/country types are present vs missing in pre-1900 data?")
    out.append("If we added plausible 'missing' polities, how much would r drop?\n")

    # Pre-1900 countries in CSV
    pre1900_countries = set()
    pre1900_liberty = {}
    for row in csv_data:
        if row['data_source_period'] == '1800-1899' and row['liberty'] is not None:
            c = row['country']
            pre1900_countries.add(c)
            if c not in pre1900_liberty or row['year'] > pre1900_liberty[c][1]:
                pre1900_liberty[c] = (row['liberty'], row['year'], row['region'])

    # Pre-1900 HCI coverage
    hci_year = '1850'
    pre1900_with_hci = {}
    for c in pre1900_liberty:
        if c in composite and hci_year in composite[c]:
            pre1900_with_hci[c] = (composite[c][hci_year], pre1900_liberty[c][0])

    out.append(f"**Pre-1900 countries in CSV:** {len(pre1900_countries)}")
    out.append(f"**Pre-1900 countries with HCI at {hci_year}:** {len(pre1900_with_hci)}\n")

    # Regional breakdown of pre-1900 sample
    region_present = defaultdict(list)
    for c in pre1900_liberty:
        region_present[pre1900_liberty[c][2]].append(c)

    out.append("**Pre-1900 Sample by Region:**\n")
    out.append("| Region | Countries | Count |")
    out.append("|--------|-----------|-------|")
    for reg in sorted(region_present, key=lambda r: len(region_present[r]), reverse=True):
        countries = sorted(region_present[reg])
        display = ", ".join(countries[:10])
        if len(countries) > 10:
            display += f"... (+{len(countries)-10})"
        out.append(f"| {reg} | {display} | {len(countries)} |")

    out.append("")

    # Countries in later eras but missing from pre-1900
    all_countries = set()
    for row in csv_data:
        all_countries.add(row['country'])

    missing_from_pre1900 = all_countries - pre1900_countries
    missing_regions = defaultdict(list)
    for row in csv_data:
        if row['country'] in missing_from_pre1900:
            if row['country'] not in [c for r in missing_regions.values() for c in r]:
                missing_regions[row['region']].append(row['country'])

    out.append("**Missing from Pre-1900 by Region:**\n")
    out.append("| Region | Missing Count | Examples |")
    out.append("|--------|--------------|----------|")
    for reg in sorted(missing_regions, key=lambda r: len(missing_regions[r]), reverse=True):
        examples = ", ".join(sorted(missing_regions[reg])[:5])
        if len(missing_regions[reg]) > 5:
            examples += f"... (+{len(missing_regions[reg])-5})"
        out.append(f"| {reg} | {len(missing_regions[reg])} | {examples} |")

    out.append("")

    # Simulation: Add 10 plausible missing polities with estimated scores
    # These would be low-L, low-HCI cases (African kingdoms, Ottoman provinces, Central Asian khanates)
    if len(pre1900_with_hci) >= 5:
        baseline_r = pearson_r(
            [v[0] for v in pre1900_with_hci.values()],
            [v[1] for v in pre1900_with_hci.values()]
        )

        out.append(f"**Baseline pre-1900 r (N={len(pre1900_with_hci)}):** {baseline_r:.4f}\n")

        # Simulate adding missing polities
        # Plausible missing polities: low HCI (10-25), low Liberty (5-15)
        missing_polities = [
            ("Zulu Kingdom", 15, 8),
            ("Ashanti Empire", 18, 10),
            ("Ethiopia (historical)", 12, 5),
            ("Ottoman Empire (provinces)", 22, 7),
            ("Qajar Persia", 20, 5),
            ("Siam (historical)", 20, 12),
            ("Korea (Joseon)", 25, 5),
            ("Bukhara Khanate", 12, 5),
            ("Madagascar Kingdom", 14, 8),
            ("Moroccan Sultanate", 18, 7),
        ]

        out.append("**Simulated Missing Polities (estimated scores):**\n")
        out.append("| Polity | Est. HCI | Est. Liberty |")
        out.append("|--------|----------|-------------|")
        for name, hci, lib in missing_polities:
            out.append(f"| {name} | {hci} | {lib} |")
        out.append("")

        # Compute r with added polities
        for n_add in [3, 5, 7, 10]:
            augmented_hci = [v[0] for v in pre1900_with_hci.values()] + [p[1] for p in missing_polities[:n_add]]
            augmented_lib = [v[1] for v in pre1900_with_hci.values()] + [p[2] for p in missing_polities[:n_add]]

            r_aug = pearson_r(augmented_hci, augmented_lib)
            n_total = len(pre1900_with_hci) + n_add

            out.append(f"- Adding {n_add} missing polities: r = {r_aug:.4f} (N={n_total}, delta = {r_aug - baseline_r:+.4f})")

        out.append("")

        # Also test: what if missing polities are HIGH-HCI, LOW-Liberty (like modern capable autocracies)?
        out.append("**Counter-scenario: Missing capable autocracies (high HCI, low L):**\n")
        missing_capable = [
            ("China (Qing)", 25, 5),
            ("Ottoman Empire (core)", 30, 8),
            ("Russia (Imperial)", 28, 10),  # Actually may be present
        ]

        for n_add in [3]:
            augmented_hci = [v[0] for v in pre1900_with_hci.values()] + [p[1] for p in missing_capable[:n_add]]
            augmented_lib = [v[1] for v in pre1900_with_hci.values()] + [p[2] for p in missing_capable[:n_add]]
            r_aug = pearson_r(augmented_hci, augmented_lib)
            out.append(f"- Adding {n_add} capable autocracies: r = {r_aug:.4f} (delta = {r_aug - baseline_r:+.4f})")

        out.append("")
        out.append("**VERDICT:** The impact of plausible missing polities on the pre-1900")
        out.append("correlation depends on whether they cluster in the low-HCI/low-Liberty")
        out.append("quadrant (maintaining r) or the high-HCI/low-Liberty quadrant (reducing r).\n")
    else:
        out.append("Insufficient pre-1900 HCI data for simulation.\n")

    return '\n'.join(out)


# ===========================================================================
# MAIN EXECUTION
# ===========================================================================

def main():
    print("Loading HCI Excel data...")
    composite, indicators, regions_hci, analysis_sheet = load_hci_data()
    print(f"  Loaded {len(composite)} countries with composite HCI scores")
    print(f"  Loaded {len(INDICATOR_SHEETS)} indicator sheets")
    print(f"  Analysis sheet: {len(analysis_sheet)} entries")

    print("Loading CSV data...")
    csv_data = load_csv_data()
    print(f"  Loaded {len(csv_data)} observations")

    print("Computing normalization parameters...")
    norm_params = compute_normalization_params(indicators)

    # Verify normalization by recomputing HCI for 2023 and comparing
    print("Verifying normalization against composite scores...")
    diffs = []
    for country in composite:
        if '2023' in composite[country]:
            normed = []
            for sname in INDICATOR_SHEETS:
                if sname in indicators and country in indicators[sname]:
                    if '2023' in indicators[sname][country]:
                        val = normalize_value(indicators[sname][country]['2023'], sname, norm_params)
                        normed.append(val)
            if len(normed) >= 3:
                recomputed = sum(normed) / len(normed)
                original = composite[country]['2023']
                diffs.append(abs(recomputed - original))

    if diffs:
        print(f"  Mean abs diff (recomputed vs original): {statistics.mean(diffs):.2f}")
        print(f"  Max abs diff: {max(diffs):.2f}")
        print(f"  (Some difference expected due to different normalization scales)")

    # Build report
    report = []
    report.append("# RP-09 & RP-05: HCI Weighting Sensitivity and Pre-1900 Sample Bias\n")
    report.append("## Executive Summary\n")
    report.append("This analysis tests the robustness of the Human Capabilities Index (HCI)")
    report.append("and its relationship with Liberty scores. The HCI uses 15 indicators across")
    report.append("7 domains to create a composite score for 91 countries from 1800-2023.")
    report.append("The central claim under scrutiny: the HCI-Liberty correlation was r~0.79")
    report.append("pre-1900 and has declined to r~0.57 in the modern era.\n")
    report.append("---\n")

    # RP-09: HCI WEIGHTING SENSITIVITY
    report.append("## RP-09: HCI Weighting Sensitivity\n")

    print("\nRunning RP-09 T1: Geometric Mean...")
    report.append(rp09_t1_geometric_mean(composite, indicators, norm_params))

    print("Running RP-09 T2: Leave-One-Domain-Out...")
    report.append(rp09_t2_leave_one_domain_out(composite, indicators, norm_params, csv_data))

    print("Running RP-09 T3: Minimum Indicator Sensitivity...")
    report.append(rp09_t3_minimum_indicator_sensitivity(composite, indicators, norm_params, csv_data))

    print("Running RP-09 T4: Capable Autocracies...")
    report.append(rp09_t4_capable_autocracies(analysis_sheet))

    print("Running RP-09 T5: Era-by-Era Correlation...")
    report.append(rp09_t5_era_by_era_correlation(composite, csv_data))

    report.append("---\n")

    # RP-05: PRE-1900 SAMPLE BIAS
    report.append("## RP-05: Pre-1900 Sample Bias\n")

    print("Running RP-05 T1: Constant Panel...")
    report.append(rp05_t1_constant_panel(composite, csv_data))

    print("Running RP-05 T2: Regional Decomposition...")
    report.append(rp05_t2_regional_decomposition(composite, csv_data))

    print("Running RP-05 T3: Sample Size Effect...")
    report.append(rp05_t3_sample_size_effect(composite, csv_data))

    print("Running RP-05 T4: Composition Analysis...")
    report.append(rp05_t4_composition_analysis(csv_data, composite))

    print("Running RP-05 T5: Survivor Bias...")
    report.append(rp05_t5_survivor_bias(composite, csv_data, regions_hci))

    # Write report
    report_text = '\n'.join(report)
    with open(OUTPUT_PATH, 'w') as f:
        f.write(report_text)

    print(f"\nResults written to {OUTPUT_PATH}")
    print(f"Report length: {len(report_text)} characters")

    # Print the full report to stdout too
    print("\n" + "=" * 80)
    print(report_text)


if __name__ == '__main__':
    main()