#!/usr/bin/env python3
"""
parse_givewp.py - GiveWP PII Extraction & Analysis Script
Target: primature.gouv.ht (Haiti Prime Minister's Office)
Source: Unauthenticated GiveWP v3 REST API endpoints
Purpose: Authorized OSINT assessment - read, parse, and report only
"""

import json
import os
import sys
import re
from datetime import datetime
from collections import Counter, defaultdict

# Force UTF-8 output on Windows
if sys.platform == 'win32':
    import io
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')

BASE_DIR = os.path.dirname(os.path.abspath(__file__))

FILES = {
    'donors': os.path.join(BASE_DIR, 'givewp-donors.json'),
    'donations': os.path.join(BASE_DIR, 'givewp-donations.json'),
    'donors_embedded': os.path.join(BASE_DIR, 'givewp-donors-embedded.json'),
    'donors_1_17': os.path.join(BASE_DIR, 'givewp-donors-1-17.json'),
    'comments': os.path.join(BASE_DIR, 'givewp-campaign12-comments.json'),
    'donations_page2': os.path.join(BASE_DIR, 'givewp-donations-page2.json'),
    'campaigns': os.path.join(BASE_DIR, 'givewp-campaigns.json'),
    'forms': os.path.join(BASE_DIR, 'givewp-forms.json'),
    'subscriptions': os.path.join(BASE_DIR, 'givewp-subscriptions.json'),
}

REPORT_PATH = os.path.join(BASE_DIR, 'GIVEWP-PII-REPORT.txt')


def load_json(filepath):
    """Load a JSON file, return empty list on error."""
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            data = json.load(f)
        return data if isinstance(data, list) else [data]
    except Exception as e:
        print(f"  [WARN] Could not load {os.path.basename(filepath)}: {e}")
        return []


def is_gibberish(name):
    """
    Heuristic to detect gibberish/test names vs potentially real names.
    Returns (is_gibberish: bool, reason: str)
    """
    if not name or not name.strip():
        return True, "empty"

    name_lower = name.strip().lower()

    # Explicit test patterns
    test_patterns = [
        r'^test\b', r'^first\s*name', r'^asd[f]?', r'^dfd\b', r'^sda\b',
        r'^ddf\b', r'^ee\b', r'^d\s+d$', r'^cv\s+v$', r'^aaa\b',
    ]
    for pat in test_patterns:
        if re.match(pat, name_lower):
            return True, "test/placeholder pattern"

    # Single character last name with gibberish first name
    parts = name.strip().split()
    if len(parts) >= 2:
        first = parts[0]
        last = parts[-1]
        # Check if it's keyboard mashing: high consonant ratio, no vowels in sequence
        consonant_only = re.match(r'^[^aeiouAEIOUàáâãäåèéêëìíîïòóôõöùúûü]+$', first)
        if consonant_only and len(first) > 2:
            return True, "consonant-only first name (keyboard mash)"

    # Known gibberish patterns
    gibberish_firsts = [
        'fdvdfbg', 'fbzdgbzfvs', 'tatag', 'rewrwr', 'klkjk', 'gfd',
        'ghhh', 'fdghdt', 'sfsdf', 'ejhkerk', 'dfgdhxdg', 'sdad',
        'asdsada', 'jkljklj', 'kujihkujh', 'kjhkjhjh', 'rrtg', 'fsf',
        'ewf', 'dxvx', 'toniyes',
    ]
    if parts[0].lower() in gibberish_firsts:
        return True, "known gibberish first name"

    # Cyrillic text (likely test from non-Haitian source)
    if re.search(r'[\u0400-\u04FF]', name):
        return True, "Cyrillic characters (likely test)"

    return False, "appears legitimate"


def classify_donor(name, total_donated, num_donations, comment=None):
    """
    Classify a donor as REAL, TEST, or SUSPICIOUS.
    Returns (classification, confidence, reasons)
    """
    reasons = []
    gib, gib_reason = is_gibberish(name)

    if gib:
        reasons.append(f"Gibberish name: {gib_reason}")

    # Check for famous/joke names
    famous = ['barack o', 'jimmy c', 'steve j', 'john d']
    if name.strip().lower() in famous:
        reasons.append("Matches famous person pattern (likely joke)")

    # Check donation amount patterns
    amount = float(total_donated) if total_donated else 0
    if amount >= 100000:
        reasons.append(f"Unusually large amount: ${amount:,.2f}")
    if amount == 0:
        reasons.append("Zero donation total (registration only)")

    # Check comments for test indicators
    if comment and comment.lower() in ['test', 'asd', 'asas', 'asasas', 'ss', 'tes ajah']:
        reasons.append(f"Test-like comment: '{comment}'")

    # Classification logic
    if gib:
        return "TEST/GIBBERISH", "HIGH", reasons
    elif any("famous" in r.lower() for r in reasons):
        return "SUSPICIOUS", "MEDIUM", reasons
    elif amount >= 100000:
        return "SUSPICIOUS", "HIGH", reasons
    else:
        return "POTENTIALLY REAL", "MEDIUM", reasons


def extract_gravatar_hash(avatar_url):
    """Extract SHA256 hash from Gravatar URL (fingerprint of email)."""
    if avatar_url and 'gravatar.com/avatar/' in avatar_url:
        return avatar_url.split('gravatar.com/avatar/')[1].split('?')[0]
    return None


def main():
    print("=" * 70)
    print("GiveWP PII Extraction - primature.gouv.ht")
    print("=" * 70)
    print()

    # ---- Load all data ----
    print("[*] Loading JSON files...")
    all_data = {}
    for key, path in FILES.items():
        data = load_json(path)
        all_data[key] = data
        print(f"  {os.path.basename(path)}: {len(data)} records")

    # ---- Merge all donors (deduplicate by ID) ----
    print("\n[*] Merging donor records...")
    donors = {}
    for source in ['donors_1_17', 'donors', 'donors_embedded']:
        for d in all_data[source]:
            did = d.get('id')
            if did and did not in donors:
                donors[did] = d
            elif did and did in donors:
                # Merge: prefer records with more data
                existing = donors[did]
                if d.get('_embedded') and not existing.get('_embedded'):
                    donors[did] = d
    print(f"  Total unique donors: {len(donors)}")

    # ---- Process donations ----
    print("[*] Processing donations...")
    donations = []
    for source in ['donations', 'donations_page2']:
        donations.extend(all_data[source])

    # Also extract embedded donations from donor records
    embedded_donation_ids = set()
    for d in donors.values():
        emb = d.get('_embedded', {})
        emb_donations = emb.get('givewp:donations', [])
        if emb_donations:
            for item in emb_donations:
                if isinstance(item, list):
                    for dd in item:
                        if isinstance(dd, dict) and dd.get('id') not in {x['id'] for x in donations}:
                            donations.append(dd)
                            embedded_donation_ids.add(dd['id'])

    # Deduplicate donations by ID
    seen_ids = set()
    unique_donations = []
    for d in donations:
        did = d.get('id')
        if did and did not in seen_ids:
            seen_ids.add(did)
            unique_donations.append(d)
    donations = unique_donations
    print(f"  Total unique donations: {len(donations)}")

    # ---- Process comments ----
    comments = all_data['comments']
    print(f"  Campaign comments: {len(comments)}")

    # ---- Build full name map from comments (these have full last names!) ----
    comment_fullnames = {}
    comment_gravatar_hashes = {}
    for c in comments:
        full_name = c.get('donorName', '').strip()
        avatar = c.get('avatar', '')
        grav_hash = extract_gravatar_hash(avatar)
        if full_name:
            # Map truncated donor name -> full comment name
            parts = full_name.split()
            if len(parts) >= 2:
                truncated = f"{parts[0]} {parts[-1][0]}"
                comment_fullnames[truncated.lower()] = full_name
        if grav_hash:
            comment_gravatar_hashes[full_name] = grav_hash

    # ===================================================================
    # BUILD REPORT
    # ===================================================================
    report_lines = []

    def w(line=""):
        report_lines.append(line)

    w("=" * 80)
    w("GIVEWP PII EXTRACTION REPORT")
    w("Target: primature.gouv.ht (Haiti Prime Minister's Office)")
    w(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    w("Method: Unauthenticated HTTP GET to public GiveWP v3 REST API endpoints")
    w("Status: RESEARCH ONLY - read, parse, report")
    w("=" * 80)
    w()

    # ---- Section 1: Executive Summary ----
    w("=" * 80)
    w("SECTION 1: EXECUTIVE SUMMARY")
    w("=" * 80)
    w()

    total_amount = sum(float(d.get('totalAmountDonated', {}).get('value', 0)) for d in donors.values())
    donation_amounts = [float(d['amount']['value']) for d in donations if d.get('amount')]
    total_donation_sum = sum(donation_amounts)

    dates = []
    for d in donations:
        try:
            dt = datetime.fromisoformat(d['createdAt'].replace('Z', '+00:00'))
            dates.append(dt)
        except:
            pass

    earliest = min(dates).strftime('%Y-%m-%d') if dates else 'N/A'
    latest = max(dates).strftime('%Y-%m-%d') if dates else 'N/A'

    # Count classifications
    real_count = 0
    test_count = 0
    suspicious_count = 0
    for did, d in donors.items():
        name = f"{d.get('firstName', '')} {d.get('lastName', '')}".strip()
        total = d.get('totalAmountDonated', {}).get('value', '0')
        num = d.get('totalNumberOfDonations', 0)
        cls, _, _ = classify_donor(name, total, num)
        if cls == "POTENTIALLY REAL":
            real_count += 1
        elif cls == "TEST/GIBBERISH":
            test_count += 1
        else:
            suspicious_count += 1

    w(f"Total unique donors:              {len(donors)}")
    w(f"Total unique donations:           {len(donations)}")
    w(f"Total campaign comments:          {len(comments)}")
    w(f"Date range:                       {earliest} to {latest}")
    w(f"Sum of donation amounts:          ${total_donation_sum:,.2f} USD")
    w(f"Sum of donor totals (per-donor):  ${total_amount:,.2f} USD")
    w()
    w(f"Donor classification:")
    w(f"  Potentially real people:        {real_count}")
    w(f"  Test/gibberish entries:         {test_count}")
    w(f"  Suspicious entries:             {suspicious_count}")
    w()
    w(f"Gravatar email hashes recovered:  {len(comment_gravatar_hashes)}")
    w(f"Full names from comments:         {len(comment_fullnames)}")
    w()
    w("CRITICAL FINDING: The GiveWP REST API at primature.gouv.ht exposes")
    w("donor records including names, donation amounts, dates, comments,")
    w("and Gravatar SHA256 hashes (which are email fingerprints) to any")
    w("unauthenticated visitor. While the email field itself is redacted")
    w("(returned as empty string), the Gravatar hashes in campaign comments")
    w("are derived from the raw email addresses and can be used for")
    w("correlation attacks or brute-force email recovery.")
    w()
    w("NOTE: All 82 donations and the 6 earlier offline donations use the")
    w("'Test Donation' or 'Offline Donation' gateway. No live payment")
    w("gateway (Stripe, PayPal, etc.) was ever configured. This means")
    w("the donation form was publicly accessible but never connected to")
    w("a real payment processor. All 'donations' are test/demo entries.")
    w()

    # ---- Section 2: All Donors (Full PII Table) ----
    w("=" * 80)
    w("SECTION 2: ALL DONOR RECORDS (81 unique)")
    w("=" * 80)
    w()
    w(f"{'ID':>4} | {'Classification':<18} | {'Name':<25} | {'Full Name (from comments)':<30} | {'Total Donated':>14} | {'# Donations':>4} | {'Created':<20}")
    w("-" * 140)

    for did in sorted(donors.keys()):
        d = donors[did]
        name = f"{d.get('firstName', '')} {d.get('lastName', '')}".strip()
        total = d.get('totalAmountDonated', {}).get('value', '0')
        num = d.get('totalNumberOfDonations', 0)
        created = d.get('createdAt', '')
        cls, conf, reasons = classify_donor(name, total, num)

        # Look up full name from comments
        full_name = comment_fullnames.get(name.lower(), '')

        amt_str = f"${float(total):>12,.2f}"
        w(f"{did:>4} | {cls:<18} | {name:<25} | {full_name:<30} | {amt_str} | {num:>4} | {created}")

    w()

    # ---- Section 3: Potentially Real People ----
    w("=" * 80)
    w("SECTION 3: POTENTIALLY REAL PEOPLE (flagged for review)")
    w("=" * 80)
    w()
    w("These donors have names that appear to be real human names rather than")
    w("keyboard mashing or test data. However, given that ALL donations used")
    w("the 'Test Donation' gateway, even these may be test entries.")
    w()

    real_donors = []
    for did in sorted(donors.keys()):
        d = donors[did]
        name = f"{d.get('firstName', '')} {d.get('lastName', '')}".strip()
        total = d.get('totalAmountDonated', {}).get('value', '0')
        num = d.get('totalNumberOfDonations', 0)
        cls, conf, reasons = classify_donor(name, total, num)
        if cls == "POTENTIALLY REAL":
            full_name = comment_fullnames.get(name.lower(), '')
            real_donors.append((did, name, full_name, total, num, d.get('createdAt', ''), reasons))

    for did, name, full_name, total, num, created, reasons in real_donors:
        w(f"  Donor ID {did}:")
        w(f"    Name (truncated):  {name}")
        if full_name:
            w(f"    Name (full):       {full_name}")
        w(f"    Total donated:     ${float(total):,.2f} USD")
        w(f"    # Donations:       {num}")
        w(f"    First seen:        {created}")
        # Check if this name appears in comments with gravatar hash
        if full_name and full_name in comment_gravatar_hashes:
            w(f"    Gravatar SHA256:   {comment_gravatar_hashes[full_name]}")
            w(f"    (This hash is derived from the donor's email address)")
        w()

    # ---- Section 4: Suspicious Entries ----
    w("=" * 80)
    w("SECTION 4: SUSPICIOUS ENTRIES")
    w("=" * 80)
    w()

    for did in sorted(donors.keys()):
        d = donors[did]
        name = f"{d.get('firstName', '')} {d.get('lastName', '')}".strip()
        total = d.get('totalAmountDonated', {}).get('value', '0')
        num = d.get('totalNumberOfDonations', 0)
        cls, conf, reasons = classify_donor(name, total, num)
        if cls == "SUSPICIOUS":
            full_name = comment_fullnames.get(name.lower(), '')
            w(f"  Donor ID {did}: {name}")
            if full_name:
                w(f"    Full name: {full_name}")
            w(f"    Total: ${float(total):,.2f} | Donations: {num} | Created: {d.get('createdAt','')}")
            for r in reasons:
                w(f"    FLAG: {r}")
            if full_name and full_name in comment_gravatar_hashes:
                w(f"    Gravatar SHA256: {comment_gravatar_hashes[full_name]}")
            w()

    # ---- Section 5: All Donations Detail ----
    w("=" * 80)
    w("SECTION 5: ALL DONATIONS (chronological)")
    w("=" * 80)
    w()

    # Sort by date
    sorted_donations = sorted(donations, key=lambda x: x.get('createdAt', ''))

    gateway_counts = Counter()
    status_counts = Counter()
    form_counts = Counter()
    campaign_ids = set()

    w(f"{'DonID':>5} | {'DonorID':>4} | {'Name':<25} | {'Amount':>12} | {'Status':<10} | {'Gateway':<18} | {'Date':<20} | Comment")
    w("-" * 140)

    for d in sorted_donations:
        don_id = d.get('id', '')
        donor_id = d.get('donorId', '')
        name = f"{d.get('firstName', '')} {d.get('lastName', '')}".strip()
        amount = d.get('amount', {}).get('value', '0')
        currency = d.get('amount', {}).get('currency', 'USD')
        status = d.get('status', '')
        gateway = d.get('gateway', {}).get('name', d.get('gatewayId', ''))
        date = d.get('createdAt', '')
        comment = d.get('comment', '') or ''
        if comment == 'None':
            comment = ''
        form_title = d.get('formTitle', '')
        campaign_id = d.get('campaignId', 0)

        gateway_counts[gateway] += 1
        status_counts[status] += 1
        form_counts[form_title] += 1
        if campaign_id:
            campaign_ids.add(campaign_id)

        amt_str = f"${float(amount):>10,.2f}"
        comment_short = comment[:40] if comment else ''
        w(f"{don_id:>5} | {donor_id:>4} | {name:<25} | {amt_str} | {status:<10} | {gateway:<18} | {date:<20} | {comment_short}")

    w()

    # ---- Section 6: Donation Statistics ----
    w("=" * 80)
    w("SECTION 6: DONATION STATISTICS")
    w("=" * 80)
    w()

    w(f"Total donations:    {len(donations)}")
    w(f"Total amount:       ${total_donation_sum:,.2f} USD")
    w(f"Average donation:   ${total_donation_sum/len(donations):,.2f} USD" if donations else "")
    w(f"Median donation:    ${sorted(donation_amounts)[len(donation_amounts)//2]:,.2f} USD" if donation_amounts else "")
    w(f"Min donation:       ${min(donation_amounts):,.2f} USD" if donation_amounts else "")
    w(f"Max donation:       ${max(donation_amounts):,.2f} USD" if donation_amounts else "")
    w(f"Date range:         {earliest} to {latest}")
    w()

    w("By Gateway:")
    for gw, count in gateway_counts.most_common():
        w(f"  {gw:<25} {count:>4} donations")
    w()

    w("By Status:")
    for st, count in status_counts.most_common():
        w(f"  {st:<25} {count:>4} donations")
    w()

    w("By Form:")
    for form, count in form_counts.most_common():
        w(f"  {form:<35} {count:>4} donations")
    w()

    w(f"Campaign IDs referenced: {sorted(campaign_ids)}")
    w()

    # Amount distribution
    w("Amount Distribution:")
    brackets = [(0, 5), (5.01, 20), (20.01, 100), (100.01, 500), (500.01, 1000),
                (1000.01, 5000), (5000.01, 10000), (10000.01, 1000000)]
    for low, high in brackets:
        count = sum(1 for a in donation_amounts if low <= a <= high)
        if count > 0:
            w(f"  ${low:>10,.2f} - ${high:>10,.2f}:  {count:>3} donations")
    w()

    # ---- Section 7: Campaign Comments with Gravatar Hashes ----
    w("=" * 80)
    w("SECTION 7: CAMPAIGN COMMENTS & GRAVATAR EMAIL HASHES")
    w("=" * 80)
    w()
    w("Campaign 12 comments expose full donor names and Gravatar SHA256 hashes.")
    w("Gravatar hashes are SHA256(lowercase(trim(email))). These can be used to:")
    w("  1. Confirm if a suspected email belongs to a donor (hash comparison)")
    w("  2. Brute-force common email patterns (name@domain combinations)")
    w("  3. Cross-reference with other Gravatar hash leaks")
    w()

    for c in comments:
        full_name = c.get('donorName', '')
        comment_text = c.get('comment', '')
        anon = c.get('anonymous', False)
        date = c.get('date', '')
        avatar = c.get('avatar', '')
        grav_hash = extract_gravatar_hash(avatar)

        w(f"  Name:      {full_name}")
        w(f"  Comment:   {comment_text}")
        w(f"  Anonymous: {anon}")
        w(f"  Date:      {date}")
        if grav_hash:
            w(f"  Gravatar:  {grav_hash}")
            w(f"  Avatar:    {avatar}")
        w()

    # ---- Section 8: Full Names Recovery ----
    w("=" * 80)
    w("SECTION 8: FULL NAME RECOVERY (donor API truncated -> comments full)")
    w("=" * 80)
    w()
    w("The donor API returns last names truncated to a single initial.")
    w("Campaign comments reveal the full last names for 16 donors:")
    w()

    w(f"{'Donor API Name':<25} | {'Full Name (from comments)':<30} | {'Gravatar SHA256'}")
    w("-" * 100)

    for truncated, full in sorted(comment_fullnames.items(), key=lambda x: x[1]):
        grav = comment_gravatar_hashes.get(full, 'N/A')
        w(f"{truncated:<25} | {full:<30} | {grav}")
    w()

    # ---- Section 9: High-Value / Notable Entries ----
    w("=" * 80)
    w("SECTION 9: HIGH-VALUE & NOTABLE ENTRIES")
    w("=" * 80)
    w()

    w("--- Largest Single Donation ---")
    largest = max(donations, key=lambda d: float(d.get('amount', {}).get('value', 0)))
    w(f"  Donation ID:  {largest['id']}")
    w(f"  Donor ID:     {largest['donorId']}")
    w(f"  Name:         {largest['firstName']} {largest['lastName']}")
    w(f"  Amount:       ${float(largest['amount']['value']):,.2f} {largest['amount']['currency']}")
    w(f"  Date:         {largest['createdAt']}")
    w(f"  Gateway:      {largest['gateway']['name']}")
    w(f"  Comment:      {largest.get('comment', 'N/A')}")
    w()

    w("--- Top 10 Donors by Total Amount ---")
    top_donors = sorted(donors.values(), key=lambda d: float(d.get('totalAmountDonated', {}).get('value', 0)), reverse=True)[:10]
    for i, d in enumerate(top_donors, 1):
        name = f"{d.get('firstName', '')} {d.get('lastName', '')}".strip()
        total = float(d.get('totalAmountDonated', {}).get('value', 0))
        full_name = comment_fullnames.get(name.lower(), '')
        display = full_name if full_name else name
        w(f"  {i:>2}. {display:<30} ${total:>12,.2f} USD  ({d.get('totalNumberOfDonations', 0)} donations)")
    w()

    w("--- Donors with Multiple Donations ---")
    multi = [(did, d) for did, d in donors.items() if d.get('totalNumberOfDonations', 0) > 1]
    multi.sort(key=lambda x: x[1].get('totalNumberOfDonations', 0), reverse=True)
    for did, d in multi:
        name = f"{d.get('firstName', '')} {d.get('lastName', '')}".strip()
        w(f"  Donor {did}: {name} - {d['totalNumberOfDonations']} donations, ${float(d['totalAmountDonated']['value']):,.2f}")
    w()

    w("--- Notable Name Patterns ---")
    w("  'Barack O' (ID 35) - Famous name pattern, $80 test donation")
    w("  'Jimmy C' (ID 38) - Famous name pattern, $750 test donation")
    w("  'Steve J' (ID 56) - Famous name pattern, $80 test donation")
    w("  'john d' (ID 27) -> 'john doe' (from comments) - Classic test name")
    w("  'First Name L' (ID 60) -> 'First Name Last Name' (from comments) - Literal placeholder")
    w("  'MOMA E' (ID 48) -> 'MOMA EL MOCTAR' (from comments) - $100,005 total, possibly real")
    w("  'FODOUOP J' (ID 80) -> 'FODOUOP JAHSWANT' - Comment says 'DOUALA' (city in Cameroon)")
    w("  'Abdalla I' (ID 75) -> 'Abdalla IBRAHIM' - Comment is gibberish but name looks real")
    w("  'Cem G' (ID 51) -> 'Cem GOKSU' - Turkish name, commented 'asasas'")
    w("  'pedro m' (ID 50) -> 'pedro mulluni' - Spanish comment 'quiero donar' (I want to donate)")
    w()

    # ---- Section 10: PII Inventory ----
    w("=" * 80)
    w("SECTION 10: PII INVENTORY SUMMARY")
    w("=" * 80)
    w()

    w("Data Type                | Count  | Notes")
    w("-" * 70)
    w(f"Donor names (truncated)  | {len(donors):>5}  | Last name = single initial")
    w(f"Full names (from cmts)   | {len(comment_fullnames):>5}  | Recovered from campaign comments")
    w(f"Email addresses          |     0  | Redacted by API (returned as empty string)")
    w(f"Gravatar SHA256 hashes   | {len(comment_gravatar_hashes):>5}  | Email fingerprints (reversible)")
    w(f"Phone numbers            |     0  | All empty in records")
    w(f"Billing addresses        |     0  | All null/empty in records")
    w(f"IP addresses             |     0  | Redacted (donorIp field empty)")
    w(f"Companies                |     0  | All empty in records")
    w(f"Donation amounts         | {len(donations):>5}  | Individual transaction amounts")
    w(f"Donation dates           | {len(donations):>5}  | Timestamps of each donation")
    w(f"Payment gateways         | {len(gateway_counts):>5}  | Gateway types used")
    w(f"Form titles              | {len(form_counts):>5}  | Donation form names")
    w(f"Donor comments           | {sum(1 for d in donations if d.get('comment') and d['comment'] != 'None'):>5}  | Free-text comments on donations")
    w(f"WordPress user links     |     0  | wpUserPermalink all empty")
    w(f"Purchase keys            |     0  | All empty")
    w(f"Gateway transaction IDs  |     0  | All empty (no real payments)")
    w()

    # ---- Section 11: API Endpoint Documentation ----
    w("=" * 80)
    w("SECTION 11: EXPOSED API ENDPOINTS")
    w("=" * 80)
    w()
    w("The following GiveWP v3 REST API endpoints were accessible without authentication:")
    w()
    w("  GET /wp-json/givewp/v3/donors")
    w("      -> Returns all donor records with names, amounts, dates")
    w("      -> Supports pagination (?page=N&per_page=N)")
    w("      -> Supports embedding (?_embed=1) for nested donations/stats")
    w()
    w("  GET /wp-json/givewp/v3/donations")
    w("      -> Returns all donation transaction records")
    w("      -> Includes donor name, amount, gateway, status, comments")
    w()
    w("  GET /wp-json/givewp/v3/campaigns")
    w("      -> Campaign metadata (returned empty)")
    w()
    w("  GET /wp-json/givewp/v3/campaigns/{id}/comments")
    w("      -> Campaign comments with FULL donor names and Gravatar hashes")
    w()
    w("  GET /wp-json/givewp/v3/forms")
    w("      -> Donation form metadata (returned empty)")
    w()
    w("  GET /wp-json/givewp/v3/subscriptions")
    w("      -> Recurring subscription data (returned empty)")
    w()
    w("NOTE: The /donors/{id}/statistics endpoint returns 401 Forbidden,")
    w("suggesting some endpoints DO have authentication. The donor and")
    w("donation listing endpoints, however, are fully open.")
    w()

    # ---- Section 12: Gravatar Hash Analysis ----
    w("=" * 80)
    w("SECTION 12: GRAVATAR HASH TABLE (email fingerprints)")
    w("=" * 80)
    w()
    w("These SHA256 hashes are derived from donor email addresses via:")
    w("  SHA256(lowercase(trim(email_address)))")
    w()
    w("They can be verified against known emails or brute-forced.")
    w()

    w(f"{'#':>2} | {'Donor Name':<25} | {'SHA256 Hash':<66}")
    w("-" * 100)
    for i, (name, h) in enumerate(sorted(comment_gravatar_hashes.items()), 1):
        w(f"{i:>2} | {name:<25} | {h}")
    w()

    # ---- Section 13: Risk Assessment ----
    w("=" * 80)
    w("SECTION 13: RISK ASSESSMENT")
    w("=" * 80)
    w()
    w("SEVERITY: MEDIUM")
    w()
    w("FINDINGS:")
    w("  1. The GiveWP plugin's REST API is exposed without authentication")
    w("     on the official Haiti Prime Minister's website.")
    w()
    w("  2. 81 donor records are publicly accessible, including names,")
    w("     donation amounts, dates, and comments.")
    w()
    w("  3. 16 Gravatar SHA256 hashes are exposed in campaign comments,")
    w("     which serve as fingerprints of donor email addresses.")
    w()
    w("  4. The majority of records (estimated 50+) appear to be test/")
    w("     gibberish data, suggesting the donation system was never")
    w("     properly launched or used in production.")
    w()
    w("  5. No real payment gateway was configured - all donations used")
    w("     'Test Donation' (manual) or 'Offline Donation' gateways.")
    w()
    w("  6. A small number of entries (10-15) have names consistent with")
    w("     real people, particularly those from the comment data where")
    w("     full names and location info (e.g., 'DOUALA') appear.")
    w()
    w("  7. The $100,000 donation from 'MOMA EL MOCTAR' is almost")
    w("     certainly a test entry given it used the manual gateway.")
    w()
    w("MITIGATING FACTORS:")
    w("  - Email fields are redacted (empty) in the API response")
    w("  - IP addresses are redacted")
    w("  - No billing addresses are populated")
    w("  - No real payment transaction IDs exist")
    w("  - Most data appears to be test entries, not real donors")
    w()
    w("RECOMMENDATIONS:")
    w("  1. Disable public access to GiveWP REST API endpoints")
    w("  2. Require authentication for /wp-json/givewp/v3/* routes")
    w("  3. Remove or disable the GiveWP plugin if not in active use")
    w("  4. Purge test/demo data from the database")
    w()

    w("=" * 80)
    w("END OF REPORT")
    w("=" * 80)

    # ---- Write report to file ----
    report_text = '\n'.join(report_lines)

    with open(REPORT_PATH, 'w', encoding='utf-8') as f:
        f.write(report_text)

    print(f"\n[+] Report written to: {REPORT_PATH}")
    print(f"    Total lines: {len(report_lines)}")
    print(f"    File size: {os.path.getsize(REPORT_PATH):,} bytes")

    # Print summary to console
    print("\n" + "=" * 60)
    print("SUMMARY")
    print("=" * 60)
    print(f"Unique donors:           {len(donors)}")
    print(f"Unique donations:        {len(donations)}")
    print(f"Potentially real people: {real_count}")
    print(f"Test/gibberish entries:  {test_count}")
    print(f"Suspicious entries:      {suspicious_count}")
    print(f"Gravatar email hashes:   {len(comment_gravatar_hashes)}")
    print(f"Full names recovered:    {len(comment_fullnames)}")
    print(f"Total donated (sum):     ${total_donation_sum:,.2f} USD")
    print(f"Date range:              {earliest} to {latest}")
    print(f"Payment gateways:        {dict(gateway_counts)}")


if __name__ == '__main__':
    main()
