import requests
import json
import os
import time
import urllib3
urllib3.disable_warnings()

BASE_URLS = [
    ("https://sig.upra.gov.co/arcgis/rest/services", "arcgis"),
    ("https://sig.upra.gov.co/server/rest/services", "server"),
]
OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "arcgis-upra")
os.makedirs(OUTPUT_DIR, exist_ok=True)

session = requests.Session()
session.verify = False
session.headers.update({"User-Agent": "Mozilla/5.0"})

def safe_name(name):
    return name.replace("/", "_").replace("\\", "_").replace(" ", "_")

def fetch_json(url, params=None):
    try:
        if params is None:
            params = {}
        params["f"] = "json"
        r = session.get(url, params=params, timeout=30)
        return r.json()
    except Exception as e:
        print(f"  ERROR fetching {url}: {e}")
        return None

def dump_features(url, layer_name, output_dir, max_records=5000):
    count_data = fetch_json(f"{url}/query", {"where": "1=1", "returnCountOnly": "true"})
    total = count_data.get("count", 0) if count_data else 0
    print(f"    Layer: {layer_name} - {total} features")
    
    if total == 0:
        return
    
    all_features = []
    offset = 0
    while True:
        data = fetch_json(f"{url}/query", {
            "where": "1=1",
            "outFields": "*",
            "resultOffset": str(offset),
            "resultRecordCount": str(max_records),
            "returnGeometry": "true"
        })
        if not data or "features" not in data:
            break
        features = data["features"]
        if not features:
            break
        all_features.extend(features)
        offset += len(features)
        if not data.get("exceededTransferLimit", False):
            break
        time.sleep(0.3)
    
    if all_features:
        out_file = os.path.join(output_dir, f"{safe_name(layer_name)}_features.json")
        with open(out_file, "w", encoding="utf-8") as f:
            json.dump({"count": len(all_features), "features": all_features}, f, ensure_ascii=False)
        print(f"    Saved {len(all_features)} features to {out_file}")

def process_service(base_url, service_name, service_type, prefix):
    svc_url = f"{base_url}/{service_name}/{service_type}"
    svc_dir = os.path.join(OUTPUT_DIR, f"{prefix}_{safe_name(service_name)}")
    os.makedirs(svc_dir, exist_ok=True)
    
    print(f"\n[{prefix}] Processing: {service_name} ({service_type})")
    
    info = fetch_json(svc_url)
    if not info:
        return
    
    with open(os.path.join(svc_dir, "service_info.json"), "w", encoding="utf-8") as f:
        json.dump(info, f, ensure_ascii=False, indent=2)
    
    layers = info.get("layers", []) + info.get("tables", [])
    for layer in layers:
        lid = layer["id"]
        lname = layer.get("name", f"layer_{lid}")
        layer_url = f"{svc_url}/{lid}"
        
        layer_info = fetch_json(layer_url)
        if layer_info:
            with open(os.path.join(svc_dir, f"layer_{lid}_{safe_name(lname)}_info.json"), "w", encoding="utf-8") as f:
                json.dump(layer_info, f, ensure_ascii=False, indent=2)
        
        dump_features(layer_url, lname, svc_dir)
        time.sleep(0.2)

def main():
    for base_url, prefix in BASE_URLS:
        print(f"\n{'='*60}")
        print(f"Processing {prefix}: {base_url}")
        print(f"{'='*60}")
        
        catalog = fetch_json(base_url)
        if not catalog:
            continue
        
        # Process root services
        for svc in catalog.get("services", []):
            process_service(base_url, svc["name"], svc["type"], prefix)
        
        # Process folders
        for folder in catalog.get("folders", []):
            if folder == "Utilities":
                continue
            print(f"\n--- Folder: {folder} ---")
            folder_data = fetch_json(f"{base_url}/{folder}")
            if not folder_data:
                continue
            
            with open(os.path.join(OUTPUT_DIR, f"{prefix}_{safe_name(folder)}_catalog.json"), "w", encoding="utf-8") as f:
                json.dump(folder_data, f, ensure_ascii=False, indent=2)
            
            for svc in folder_data.get("services", []):
                process_service(base_url, svc["name"], svc["type"], prefix)
                time.sleep(0.5)
    
    print("\n\nDONE!")

if __name__ == "__main__":
    main()
