#!/usr/bin/env python3
"""Wave 2 ArcGIS Dumper - CAR, Bogota Cadastre, ANH Hydrocarbons"""
import requests, json, os, sys, time, urllib3
urllib3.disable_warnings()

SERVERS = {
    "arcgis-car": {
        "base_urls": [("https://sig.car.gov.co/arcgis/rest/services", "arcgis")],
        "label": "CAR Environmental Authority"
    },
    "arcgis-catastro-bogota": {
        "base_urls": [
            ("https://sig.catastrobogota.gov.co/arcgis/rest/services", "arcgis"),
            ("https://sig.catastrobogota.gov.co/server/rest/services", "server"),
        ],
        "label": "Bogota Cadastre"
    },
    "arcgis-anh": {
        "base_urls": [("https://geovisor.anh.gov.co/server/rest/services", "server")],
        "label": "ANH Hydrocarbons"
    },
}

DUMP_DIR = os.path.dirname(os.path.abspath(__file__))
BATCH = 5000
SKIP_FOLDERS = ["Utilities"]

def safe_name(s):
    return "".join(c if c.isalnum() or c in "._- " else "_" for c in s)

def get_json(url, retries=3):
    for i in range(retries):
        try:
            r = requests.get(url, timeout=60, verify=False)
            if r.status_code == 200:
                try:
                    return r.json()
                except:
                    return None
            elif r.status_code == 403:
                print(f"    403 Forbidden: {url}")
                return None
        except Exception as e:
            if i == retries - 1:
                print(f"    Error: {e}")
            time.sleep(2)
    return None

def dump_features(service_url, out_dir, service_name):
    info = get_json(f"{service_url}?f=json")
    if not info:
        return

    with open(os.path.join(out_dir, "service_info.json"), "w", encoding="utf-8") as f:
        json.dump(info, f, ensure_ascii=False, indent=2)

    layers = info.get("layers", []) + info.get("tables", [])
    if not layers:
        # Try as single layer (layer 0)
        layers = [{"id": 0, "name": service_name}]

    for layer in layers:
        lid = layer.get("id", 0)
        lname = layer.get("name", f"layer_{lid}")
        safe_lname = safe_name(lname)

        layer_url = f"{service_url}/{lid}"
        layer_info = get_json(f"{layer_url}?f=json")
        if not layer_info:
            continue

        with open(os.path.join(out_dir, f"layer_{lid}_{safe_lname}_info.json"), "w", encoding="utf-8") as f:
            json.dump(layer_info, f, ensure_ascii=False, indent=2)

        # Check if queryable
        caps = layer_info.get("capabilities", "")
        if "Query" not in caps:
            continue

        # Count features
        count_url = f"{layer_url}/query?where=1%3D1&returnCountOnly=true&f=json"
        count_data = get_json(count_url)
        total = count_data.get("count", 0) if count_data else 0

        if total == 0:
            continue

        print(f"    Layer {lid} ({lname}): {total} features")

        # Query all features with pagination
        all_features = []
        offset = 0
        while True:
            q = f"{layer_url}/query?where=1%3D1&outFields=*&f=json&resultOffset={offset}&resultRecordCount={BATCH}"
            data = get_json(q)
            if not data or "features" not in data:
                break

            features = data["features"]
            if not features:
                break

            all_features.extend(features)
            offset += len(features)

            if not data.get("exceededTransferLimit", False):
                break

            print(f"      ...{offset}/{total}")

        if all_features:
            fname = os.path.join(out_dir, f"{safe_lname}_features.json")
            with open(fname, "w", encoding="utf-8") as f:
                json.dump(all_features, f, ensure_ascii=False)
            print(f"    Saved {len(all_features)} features")

def dump_server(server_key, config):
    print(f"\n{'='*60}")
    print(f"DUMPING: {config['label']} -> {server_key}/")
    print(f"{'='*60}")

    base_dir = os.path.join(DUMP_DIR, server_key)
    os.makedirs(base_dir, exist_ok=True)

    for base_url, endpoint_name in config["base_urls"]:
        print(f"\nEndpoint: {base_url}")

        catalog = get_json(f"{base_url}?f=json")
        if not catalog:
            print(f"  Failed to get catalog")
            continue

        with open(os.path.join(base_dir, f"catalog-{endpoint_name}.json"), "w", encoding="utf-8") as f:
            json.dump(catalog, f, ensure_ascii=False, indent=2)

        folders = catalog.get("folders", [])
        services = catalog.get("services", [])

        print(f"  Folders: {len(folders)}, Root services: {len(services)}")

        # Dump root services
        for svc in services:
            sname = svc.get("name", "unknown")
            stype = svc.get("type", "MapServer")
            if stype not in ("MapServer", "FeatureServer"):
                continue

            safe_sname = safe_name(sname.replace("/", "_"))
            out_dir = os.path.join(base_dir, f"{endpoint_name}_{safe_sname}")
            os.makedirs(out_dir, exist_ok=True)

            svc_url = f"{base_url}/{sname}/{stype}"
            print(f"  Service: {sname}/{stype}")
            dump_features(svc_url, out_dir, safe_sname)

        # Dump folder services
        for folder in folders:
            if folder in SKIP_FOLDERS:
                continue

            print(f"\n  Folder: {folder}")
            folder_catalog = get_json(f"{base_url}/{folder}?f=json")
            if not folder_catalog:
                print(f"    Failed to get folder catalog")
                continue

            folder_services = folder_catalog.get("services", [])
            print(f"    Services: {len(folder_services)}")

            for svc in folder_services:
                sname = svc.get("name", "unknown")
                stype = svc.get("type", "MapServer")
                if stype not in ("MapServer", "FeatureServer"):
                    continue

                safe_sname = safe_name(sname.replace("/", "_"))
                out_dir = os.path.join(base_dir, f"{endpoint_name}_{safe_sname}")
                os.makedirs(out_dir, exist_ok=True)

                svc_url = f"{base_url}/{sname}/{stype}"
                print(f"    Service: {sname}/{stype}")
                dump_features(svc_url, out_dir, safe_sname)

if __name__ == "__main__":
    target = sys.argv[1] if len(sys.argv) > 1 else "all"

    if target == "all":
        for key, config in SERVERS.items():
            dump_server(key, config)
    elif target in SERVERS:
        dump_server(target, SERVERS[target])
    else:
        print(f"Unknown target: {target}")
        print(f"Available: {', '.join(SERVERS.keys())}, all")
        sys.exit(1)

    print("\n\nDONE!")
