"""
Scan Other Agencies - Discovers what other agencies have data on repodatos.atdt.gob.mx
"""
import requests
from pathlib import Path
import time
import string

BASE_URL = "https://repodatos.atdt.gob.mx/api_update/"

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
}

OUTPUT_FILE = Path("discovered_agencies.txt")

# Known Mexican government agency acronyms/names to probe
KNOWN_AGENCIES = [
    # Immigration/Security
    "inm",
    "inami",
    "migracion",
    "segob",
    "cisen",
    "cni",
    "sspc",
    "guardia_nacional",
    "policia_federal",
    "aduanas",
    "sat",

    # Foreign affairs
    "sre",
    "relaciones_exteriores",
    "consulados",
    "embajadas",

    # Justice/Legal
    "fgr",
    "pgr",
    "fiscalia",
    "justicia",
    "tribunales",
    "scjn",

    # Health
    "salud",
    "ssa",
    "imss",
    "issste",
    "cofepris",

    # Education
    "sep",
    "educacion",
    "conacyt",
    "unam",
    "ipn",

    # Economy/Finance
    "shcp",
    "hacienda",
    "banxico",
    "economia",
    "se",

    # Labor/Social
    "stps",
    "trabajo",
    "bienestar",
    "sedesol",

    # Environment/Resources
    "semarnat",
    "medio_ambiente",
    "conagua",
    "pemex",
    "cfe",

    # Communications/Transport
    "sct",
    "comunicaciones",
    "transportes",

    # Agriculture
    "sagarpa",
    "agricultura",
    "sader",

    # Defense
    "sedena",
    "semar",
    "defensa",
    "marina",

    # Other common patterns
    "datos",
    "data",
    "api",
    "v1",
    "v2",
    "public",
    "publico",
    "open",
    "abiertos",
    "catalogo",
    "catalog",
]


def check_agency(name):
    """Check if an agency endpoint exists."""
    url = f"{BASE_URL}{name}/"
    try:
        r = requests.get(url, headers=HEADERS, timeout=10)
        if r.status_code == 200:
            try:
                data = r.json()
                if isinstance(data, list):
                    return url, len(data), data
            except:
                pass
            return url, -1, None  # Exists but not JSON list
        return None, 0, None
    except:
        return None, 0, None


def brute_force_short_names():
    """Try 2-4 letter combinations."""
    found = []
    chars = string.ascii_lowercase

    print("\n[BRUTE FORCE] Trying 2-3 letter combinations...")

    # 2 letter
    for a in chars:
        for b in chars:
            name = f"{a}{b}"
            url, count, _ = check_agency(name)
            if url:
                print(f"[FOUND] {url} ({count} items)")
                found.append((url, count))
            time.sleep(0.1)

    # 3 letter
    for a in chars:
        for b in chars:
            for c in chars:
                name = f"{a}{b}{c}"
                url, count, _ = check_agency(name)
                if url:
                    print(f"[FOUND] {url} ({count} items)")
                    found.append((url, count))
                time.sleep(0.05)
        print(f"[PROGRESS] Completed 3-letter combinations starting with '{a}'")

    return found


def main():
    print("=" * 70)
    print("Agency Discovery Scanner - repodatos.atdt.gob.mx")
    print("=" * 70)
    print(f"Base URL: {BASE_URL}")
    print("=" * 70)

    all_found = []

    # Phase 1: Check known agency names
    print("\n[PHASE 1] Checking known agency names...")
    for agency in KNOWN_AGENCIES:
        url, count, data = check_agency(agency)
        if url:
            print(f"[FOUND] {url}")
            if count > 0:
                print(f"        Contains {count} items")
                if data:
                    for item in data[:5]:  # Show first 5 items
                        print(f"        - {item.get('name', item)}")
                    if count > 5:
                        print(f"        ... and {count - 5} more")
            all_found.append((url, count))
        time.sleep(0.3)

    # Phase 2: Brute force short names
    print("\n[PHASE 2] Brute forcing short agency codes...")
    brute_found = brute_force_short_names()
    all_found.extend(brute_found)

    # Save results
    print("\n" + "=" * 70)
    print(f"COMPLETE: Found {len(all_found)} accessible endpoints")
    print("=" * 70)

    with open(OUTPUT_FILE, "w") as f:
        f.write("# Discovered Agency Endpoints\n")
        f.write(f"# Base: {BASE_URL}\n\n")
        for url, count in sorted(set(all_found)):
            f.write(f"{url} ({count} items)\n")

    print(f"Results saved to: {OUTPUT_FILE}")

    for url, count in sorted(set(all_found)):
        print(f"  {url} ({count} items)")


if __name__ == "__main__":
    main()
