import requests, json, sys, os, concurrent.futures, urllib3
urllib3.disable_warnings()

TARGETS = {
    # MILITARY
    "armada-sigeda": "https://sigeda.armada.mil.co",
    "armada-dicodarc": "https://dicodarcgisarc.armada.mil.co",
    "armada-registro": "https://registro.armada.mil.co",
    "fac-sigsa": "https://sigsa.fac.mil.co",
    # POLICE GIS (24 subdomains)
    "policia-gisponal": "https://gisponal.policia.gov.co",
    "policia-maps-analytics": "https://maps.analytics.ia.policia.gov.co",
    "policia-srvgis1": "https://srvgis1.policia.gov.co",
    "policia-srvgis2": "https://srvgis2.policia.gov.co",
    "policia-srvgis3": "https://srvgis3.policia.gov.co",
    "policia-srvgis4": "https://srvgis4.policia.gov.co",
    "policia-srvgis5": "https://srvgis5.policia.gov.co",
    "policia-srvgis6": "https://srvgis6.policia.gov.co",
    "policia-srvgis7": "https://srvgis7.policia.gov.co",
    "policia-srvgis8": "https://srvgis8.policia.gov.co",
    "policia-portalgis1": "https://srvportalgis1.policia.gov.co",
    "policia-portalgis2": "https://srvportalgis2.policia.gov.co",
    "policia-sigadmin": "https://srvsigadmin.policia.gov.co",
    "policia-sigadmin1": "https://srvsigadmin1.policia.gov.co",
    "policia-sigaplica": "https://srvsigaplica.policia.gov.co",
    "policia-sigcarto": "https://srvsigcarto.policia.gov.co",
    "policia-sigexterno": "https://srvsigexterno.policia.gov.co",
    "policia-sigponal1": "https://srvsigponal1.policia.gov.co",
    "policia-sigponal2": "https://srvsigponal2.policia.gov.co",
    "policia-sigponalapp1": "https://srvsigponalapp1.policia.gov.co",
    "policia-sigponalapp2": "https://srvsigponalapp2.policia.gov.co",
    "policia-sigponalext1": "https://srvsigponalext1.policia.gov.co",
    "policia-sigponalext2": "https://srvsigponalext2.policia.gov.co",
    # DANE - Statistics
    "dane-geoportal": "https://geoportal.dane.gov.co",
    "dane-geoserver": "https://geoserverportal.dane.gov.co",
    "dane-sige": "https://sige.dane.gov.co",
    # ANM - Mining
    "anm-arcgis": "https://arcgisserver.anm.gov.co",
    # IDEAM - Meteorological
    "ideam-geoservicio": "https://geoservicio.smartmetospa.ideam.gov.co",
    "ideam-geoweb": "https://geoweb.smartmetospa.ideam.gov.co",
    # CAR - Environmental
    "car-sig": "https://sig.car.gov.co",
    "car-geoambiental": "https://geoambiental.car.gov.co",
    "car-geourbana": "https://geourbana.car.gov.co",
    "car-sigci": "https://sigci.car.gov.co",
    "car-sigu": "https://sigu.car.gov.co",
    "car-sigriobogota": "https://sigriobogota.car.gov.co",
    "car-datosgeograficos": "https://datosgeograficos.car.gov.co",
    # Transport
    "mintransporte-sigt": "https://sigt.mintransporte.gov.co",
    "mintransporte-sigtdev": "https://sigtdev.mintransporte.gov.co",
    # Geological Survey
    "sgc-geored": "https://geored.sgc.gov.co",
    "sgc-geored2": "https://geored2.sgc.gov.co",
    # Hydrocarbons
    "anh-geoportal": "https://geoportal.anh.gov.co",
    "anh-geovisor": "https://geovisor.anh.gov.co",
    "anh-dataroom": "https://geovisordataroom.anh.gov.co",
    # Roads
    "invias-sigpav": "https://sigpav.invias.gov.co",
    "invias-sigpinspeccion": "https://sigpinspeccion.invias.gov.co",
    # Agricultural Institute
    "ica-sigma": "https://sigma.ica.gov.co",
    "ica-sigeco": "https://sigeco.ica.gov.co",
    # Land Restitution
    "restitucion-geoserver": "https://geoserver.restituciondetierras.gov.co",
    "restitucion-portalmapas": "https://portalmapas.restituciondetierras.gov.co",
    "restitucion-portalmapasurt": "https://portalmapasurt.restituciondetierras.gov.co",
    # Land Agency
    "agenciatierras-geoportal": "https://geoportal.agenciadetierras.gov.co",
    "agenciatierras-sig": "https://sig.agenciadetierras.gov.co",
    # Bogota Cadastre
    "catastrobogota-serviciosgis": "https://serviciosgis.catastrobogota.gov.co",
    "catastrobogota-sig": "https://sig.catastrobogota.gov.co",
    # Fiscalia GIS
    "fiscalia-geoapp": "https://geoappfgn.fiscalia.gov.co",
    "fiscalia-sigob": "https://sigob-web.fiscalia.gov.co",
}

PATHS = [
    "/arcgis/rest/services?f=json",
    "/server/rest/services?f=json",
    "/arcgis/rest/services?f=pjson",
    "/geoserver/web/",
    "/arcgis/rest/info?f=json",
]

results = {"arcgis_open": [], "geoserver": [], "alive": [], "dead": []}

def probe(name, base_url):
    hits = []
    for path in PATHS:
        url = base_url + path
        try:
            r = requests.get(url, timeout=10, verify=False, allow_redirects=True)
            body = r.text[:2000]
            if r.status_code == 200:
                if '"services"' in body or '"folders"' in body or '"currentVersion"' in body:
                    hits.append({"name": name, "url": url, "status": r.status_code, "type": "arcgis", "size": len(body), "snippet": body[:500]})
                elif "GeoServer" in body or "geoserver" in body.lower():
                    hits.append({"name": name, "url": url, "status": r.status_code, "type": "geoserver", "size": len(body)})
                elif len(body) < 500 or "blocked" in body.lower() or "denied" in body.lower():
                    hits.append({"name": name, "url": url, "status": r.status_code, "type": "maybe_waf", "size": len(body)})
                else:
                    hits.append({"name": name, "url": url, "status": r.status_code, "type": "other_200", "size": len(body)})
            elif r.status_code in [301, 302, 307, 308]:
                hits.append({"name": name, "url": url, "status": r.status_code, "type": "redirect", "location": r.headers.get("Location", "")})
            elif r.status_code == 403:
                hits.append({"name": name, "url": url, "status": 403, "type": "forbidden"})
            elif r.status_code == 401:
                hits.append({"name": name, "url": url, "status": 401, "type": "auth_required"})
            else:
                hits.append({"name": name, "url": url, "status": r.status_code, "type": f"http_{r.status_code}"})
        except requests.exceptions.ConnectTimeout:
            hits.append({"name": name, "url": url, "type": "timeout"})
        except requests.exceptions.ConnectionError as e:
            hits.append({"name": name, "url": url, "type": "conn_error", "error": str(e)[:100]})
        except Exception as e:
            hits.append({"name": name, "url": url, "type": "error", "error": str(e)[:100]})
    return name, hits

print(f"Probing {len(TARGETS)} targets across {len(PATHS)} paths ({len(TARGETS)*len(PATHS)} requests)...")
sys.stdout.flush()

with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
    futures = {executor.submit(probe, name, url): name for name, url in TARGETS.items()}
    for future in concurrent.futures.as_completed(futures):
        name, hits = future.result()
        arcgis_hit = any(h.get("type") == "arcgis" for h in hits)
        geo_hit = any(h.get("type") == "geoserver" for h in hits)
        if arcgis_hit:
            results["arcgis_open"].append({"name": name, "hits": [h for h in hits if h.get("type") == "arcgis"]})
            print(f"  [ARCGIS OPEN] {name}")
        elif geo_hit:
            results["geoserver"].append({"name": name, "hits": [h for h in hits if h.get("type") == "geoserver"]})
            print(f"  [GEOSERVER] {name}")
        elif any(h.get("type") in ["other_200", "maybe_waf", "forbidden", "auth_required"] for h in hits):
            results["alive"].append({"name": name, "hits": hits})
            best = next((h for h in hits if h.get("type") in ["other_200", "maybe_waf", "forbidden", "auth_required"]), hits[0])
            print(f"  [ALIVE-{best.get('type', '?')}] {name} ({best.get('status', '?')})")
        else:
            results["dead"].append({"name": name, "hits": hits})
            err = hits[0].get("type", "?") if hits else "?"
            print(f"  [DEAD] {name} ({err})")
        sys.stdout.flush()

print(f"\n{'='*60}")
print(f"=== RESULTS SUMMARY ===")
print(f"{'='*60}")
print(f"ArcGIS Open (dumpable): {len(results['arcgis_open'])}")
print(f"GeoServer found:        {len(results['geoserver'])}")
print(f"Alive (other):          {len(results['alive'])}")
print(f"Dead/Unreachable:       {len(results['dead'])}")

print(f"\n=== ARCGIS OPEN (READY TO DUMP) ===")
for r in results["arcgis_open"]:
    for h in r["hits"]:
        print(f"  {r['name']}: {h['url']}")
        if "snippet" in h:
            print(f"    Preview: {h['snippet'][:200]}")

print(f"\n=== GEOSERVER FOUND ===")
for r in results["geoserver"]:
    for h in r["hits"]:
        print(f"  {r['name']}: {h['url']}")

print(f"\n=== ALIVE (NEEDS INVESTIGATION) ===")
for r in results["alive"]:
    best = next((h for h in r["hits"] if h.get("type") in ["other_200", "maybe_waf", "forbidden", "auth_required"]), r["hits"][0])
    print(f"  {r['name']}: type={best.get('type', '')} status={best.get('status', '?')} url={best.get('url', '')}")

with open("mass_probe_results.json", "w") as f:
    json.dump(results, f, indent=2, default=str)
print("\nFull results saved to mass_probe_results.json")
