"""
Simple Parallel Scanner - Just works
"""
import requests
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import sys
import time

# Unbuffered
print = lambda *args, **kwargs: (sys.stdout.write(' '.join(map(str, args)) + '\n'), sys.stdout.flush())

BASE_URL = "https://repodatos.atdt.gob.mx/api_update/inm/"
DOWNLOAD_DIR = Path(r"C:\Users\Squir\Desktop\Mexico- Second Look\inm_downloads")
DOWNLOAD_DIR.mkdir(exist_ok=True)

HITS_LOG = Path(r"C:\Users\Squir\Desktop\Mexico- Second Look\hits.txt")
lock = threading.Lock()

HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}

DIRS = ["capacitacion_servidores_publicos_inm", "estadisticas_control_verificacion_migratoria",
        "estadisticas_regulacion_migratoria", "migrantes_atendidos_grupos_beta_proteccion_migrantes",
        "personas_extranjeras_victimas_delito", "programa_paisano",
        "programa_pesca_deportiva_turismo_nautico", "salida_menores_pais", ""]

SUFFIXES = ["menores_viajan", "tramites_migratorios", "documentos_migratorios", "visitas_verificacion",
            "revisiones_migratorias", "atendidos_grupo_beta", "victimas_delito", "peticiones_paisano",
            "atenciones_programa_paisanos", "quejas_paisano", "pesca_deportiva", "servidores_capacitados",
            "datos", "registros", "estadisticas", "reporte", "informe"]

EXTS = [".csv", ".json", ".xlsx", ".pdf", ".xml", ".zip", ".doc", ".docx", ".txt", ".xls"]

hits = 0
tried = 0


def download(args):
    global hits, tried
    url, path = args

    try:
        r = requests.get(url, headers=HEADERS, timeout=8)
        if r.status_code == 200 and len(r.content) > 0:
            path.parent.mkdir(parents=True, exist_ok=True)
            path.write_bytes(r.content)
            with lock:
                hits += 1
                with open(HITS_LOG, "a") as f:
                    f.write(f"{url} -> {path}\n")
            return True, url
    except:
        pass

    with lock:
        tried += 1
    return False, url


def generate_batch(start_id, end_id):
    """Generate all URLs for a range of IDs."""
    urls = []
    for fid in range(start_id, end_id + 1):
        for d in DIRS:
            base = f"{BASE_URL}{d}/" if d else BASE_URL
            dp = d if d else ""
            for s in SUFFIXES:
                for e in EXTS:
                    fn = f"{fid}_{s}{e}"
                    urls.append((f"{base}{fn}", DOWNLOAD_DIR / dp / fn))
            for e in EXTS:
                fn = f"{fid}{e}"
                urls.append((f"{base}{fn}", DOWNLOAD_DIR / dp / fn))
    return urls


print("=" * 60)
print("SIMPLE PARALLEL SCANNER - 30 WORKERS")
print("=" * 60)

# Process in chunks of 100 IDs
CHUNK_SIZE = 100
MAX_ID = 10000

for chunk_start in range(1, MAX_ID + 1, CHUNK_SIZE):
    chunk_end = min(chunk_start + CHUNK_SIZE - 1, MAX_ID)
    urls = generate_batch(chunk_start, chunk_end)

    print(f"[CHUNK] IDs {chunk_start}-{chunk_end} ({len(urls)} URLs)")

    with ThreadPoolExecutor(max_workers=30) as ex:
        futures = [ex.submit(download, u) for u in urls]
        for f in as_completed(futures):
            success, url = f.result()
            if success:
                print(f"[HIT] {url}")

    print(f"[STATUS] Tried: {tried + hits}, Hits: {hits}")

print("=" * 60)
print(f"DONE - Total hits: {hits}")
print("=" * 60)
