import json, os, time, urllib.request, ssl, sys

BASE = "https://ergit.presidencia.gov.co/server/rest/services"
DUMP = "C:/Users/Squir/Desktop/COLOMBIA/DUMP 2_25_2026/arcgis"

# Skip SSL verification
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

def fetch(url, retries=2):
    for attempt in range(retries + 1):
        try:
            req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
            with urllib.request.urlopen(req, timeout=30, context=ctx) as resp:
                return resp.read().decode("utf-8", errors="replace")
        except Exception as e:
            if attempt < retries:
                time.sleep(2)
            else:
                return None

def fetch_json(url):
    raw = fetch(url + "?f=json")
    if raw:
        try:
            return json.loads(raw)
        except:
            return None
    return None

def save_json(path, data):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)

# Step 1: Get root catalog
print("=== Fetching root catalog ===")
root = fetch_json(BASE)
if not root:
    print("FATAL: Cannot reach ArcGIS server")
    sys.exit(1)

save_json(f"{DUMP}/catalog.json", root)
folders = root.get("folders", [])
root_services = root.get("services", [])
print(f"Folders: {len(folders)}, Root services: {len(root_services)}")

all_services = []

# Add root services
for svc in root_services:
    all_services.append(svc)

# Step 2: Enumerate all folders
for folder in folders:
    print(f"\n--- Folder: {folder} ---")
    folder_data = fetch_json(f"{BASE}/{folder}")
    if folder_data:
        folder_dir = f"{DUMP}/folders/{folder}"
        os.makedirs(folder_dir, exist_ok=True)
        save_json(f"{folder_dir}/_catalog.json", folder_data)
        svcs = folder_data.get("services", [])
        print(f"  Services: {len(svcs)}")
        for svc in svcs:
            all_services.append(svc)
    time.sleep(0.5)

print(f"\n=== Total services to dump: {len(all_services)} ===\n")

# Step 3: Dump each service
dumped = 0
errors = 0
for svc in all_services:
    name = svc.get("name", "unknown")
    stype = svc.get("type", "MapServer")
    safe_name = name.replace("/", "_")
    
    svc_url = f"{BASE}/{name}/{stype}"
    print(f"[{dumped+1}/{len(all_services)}] {name}/{stype}")
    
    # Get service info
    svc_data = fetch_json(svc_url)
    if svc_data:
        svc_dir = f"{DUMP}/services/{safe_name}"
        os.makedirs(svc_dir, exist_ok=True)
        save_json(f"{svc_dir}/_info.json", svc_data)
        
        # Get layers
        layers = svc_data.get("layers", [])
        tables = svc_data.get("tables", [])
        print(f"  Layers: {len(layers)}, Tables: {len(tables)}")
        
        # Dump each layer's data
        for layer in layers + tables:
            lid = layer.get("id", 0)
            lname = layer.get("name", f"layer_{lid}")
            safe_lname = lname.replace("/", "_").replace("\\", "_").replace(":", "_")
            
            # Get layer info
            layer_info = fetch_json(f"{svc_url}/{lid}")
            if layer_info:
                save_json(f"{svc_dir}/{safe_lname}_info.json", layer_info)
                
                # Try to query features (first 5000)
                query_url = f"{svc_url}/{lid}/query?f=json&where=1%3D1&outFields=*&returnGeometry=true&resultRecordCount=5000"
                features = fetch(query_url)
                if features:
                    try:
                        fdata = json.loads(features)
                        feat_count = len(fdata.get("features", []))
                        exceeded = fdata.get("exceededTransferLimit", False)
                        save_json(f"{svc_dir}/{safe_lname}_data.json", fdata)
                        marker = " [MORE AVAILABLE]" if exceeded else ""
                        print(f"    Layer {lid} '{lname}': {feat_count} features{marker}")
                        
                        # If exceeded, try to get total count
                        if exceeded:
                            count_url = f"{svc_url}/{lid}/query?f=json&where=1%3D1&returnCountOnly=true"
                            count_raw = fetch(count_url)
                            if count_raw:
                                try:
                                    cdata = json.loads(count_raw)
                                    total = cdata.get("count", "?")
                                    print(f"    -> Total records: {total}")
                                    save_json(f"{svc_dir}/{safe_lname}_count.json", cdata)
                                except:
                                    pass
                    except:
                        # Save raw response
                        with open(f"{svc_dir}/{safe_lname}_data_raw.txt", "w", encoding="utf-8") as f:
                            f.write(features[:100000])
            time.sleep(0.3)
        
        dumped += 1
    else:
        print(f"  ERROR: Could not fetch service info")
        errors += 1
    
    time.sleep(0.5)

print(f"\n=== COMPLETE: {dumped} services dumped, {errors} errors ===")
