"""
Aggressive Scanner - Fast timeouts, connection pooling, 50 workers
"""
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import sys
import time

# Unbuffered output
class Unbuffered:
    def __init__(self, stream):
        self.stream = stream
    def write(self, data):
        self.stream.write(data)
        self.stream.flush()
    def flush(self):
        self.stream.flush()

sys.stdout = Unbuffered(sys.stdout)

BASE_URL = "https://repodatos.atdt.gob.mx/api_update/inm/"
DOWNLOAD_DIR = Path(r"C:\Users\Squir\Desktop\Mexico- Second Look\inm_downloads")
DOWNLOAD_DIR.mkdir(exist_ok=True)
HITS_LOG = Path(r"C:\Users\Squir\Desktop\Mexico- Second Look\hits.txt")

lock = threading.Lock()
HEADERS = {"User-Agent": "Mozilla/5.0"}

# Create session with connection pooling
session = requests.Session()
adapter = HTTPAdapter(pool_connections=50, pool_maxsize=50, max_retries=0)
session.mount('https://', adapter)
session.mount('http://', adapter)

DIRS = ["capacitacion_servidores_publicos_inm", "estadisticas_control_verificacion_migratoria",
        "estadisticas_regulacion_migratoria", "migrantes_atendidos_grupos_beta_proteccion_migrantes",
        "personas_extranjeras_victimas_delito", "programa_paisano",
        "programa_pesca_deportiva_turismo_nautico", "salida_menores_pais", ""]

SUFFIXES = ["menores_viajan", "tramites_migratorios", "documentos_migratorios", "visitas_verificacion",
            "revisiones_migratorias", "atendidos_grupo_beta", "victimas_delito", "peticiones_paisano",
            "quejas_paisano", "pesca_deportiva", "servidores_capacitados", "datos", "estadisticas"]

EXTS = [".csv", ".json", ".xlsx", ".pdf", ".xml", ".zip"]

hits = 0
checked = 0
start_time = time.time()


def check(url_path):
    global hits, checked
    url, path = url_path

    try:
        r = session.get(url, headers=HEADERS, timeout=3, stream=True)
        with lock:
            checked += 1

        if r.status_code == 200:
            # Download content
            content = r.content
            if len(content) > 100:  # Not empty/error page
                path.parent.mkdir(parents=True, exist_ok=True)
                path.write_bytes(content)
                with lock:
                    hits += 1
                    with open(HITS_LOG, "a") as f:
                        f.write(f"{url} -> {path}\n")
                return True, url, len(content)
    except:
        with lock:
            checked += 1

    return False, url, 0


print("=" * 60)
print("AGGRESSIVE SCANNER - 50 WORKERS, 3s TIMEOUT")
print("=" * 60)
print(f"Directories: {len(DIRS)}")
print(f"Suffixes: {len(SUFFIXES)}")
print(f"Extensions: {len(EXTS)}")

# Process IDs in batches
BATCH_SIZE = 50
MAX_ID = 10000

for batch_start in range(1, MAX_ID + 1, BATCH_SIZE):
    batch_end = min(batch_start + BATCH_SIZE - 1, MAX_ID)

    urls = []
    for fid in range(batch_start, batch_end + 1):
        for dirname in DIRS:
            base = f"{BASE_URL}{dirname}/" if dirname else BASE_URL
            dp = dirname if dirname else ""

            for s in SUFFIXES:
                for e in EXTS:
                    fn = f"{fid}_{s}{e}"
                    urls.append((f"{base}{fn}", DOWNLOAD_DIR / dp / fn))
            for e in EXTS:
                fn = f"{fid}{e}"
                urls.append((f"{base}{fn}", DOWNLOAD_DIR / dp / fn))

    with ThreadPoolExecutor(max_workers=50) as ex:
        futures = [ex.submit(check, u) for u in urls]
        for f in as_completed(futures):
            success, url, size = f.result()
            if success:
                print(f"[HIT] {url} ({size:,} bytes)")

    elapsed = time.time() - start_time
    rate = checked / elapsed if elapsed > 0 else 0
    print(f"[{batch_start}-{batch_end}] Checked: {checked:,} | Hits: {hits} | Rate: {rate:.0f}/sec")

print("\n" + "=" * 60)
print(f"DONE - Hits: {hits} | Total checked: {checked:,}")
print("=" * 60)
