#!/bin/bash
# Dump ALL remaining all_data directories not already covered
# This gets: conapo, coneval, secretaria_hacienda, secretaria_economia, etc.
# Run after the targeted downloads finish

DEST="/c/Users/Squir/Desktop/MEXICO/V A U L T/repodatos.atdt.gob.mx"
BASE="https://repodatos.atdt.gob.mx"
LOG="$DEST/remaining-download.log"

echo "=== Downloading remaining all_data datasets ===" | tee "$LOG"
echo "=== $(date) ===" | tee -a "$LOG"

download_dir() {
    local url="$1"
    local local_dir="$2"
    local depth="$3"

    [ "$depth" -gt 8 ] && return
    mkdir -p "$local_dir"

    local json
    json=$(curl -skL --max-time 30 "$url" 2>/dev/null)
    [ -z "$json" ] && return

    echo "$json" | python3 -c "
import json, sys
try:
    data = json.load(sys.stdin)
    if isinstance(data, list):
        for item in data:
            print(item.get('type','') + '|' + item.get('name','') + '|' + str(item.get('size',0)))
except:
    pass
" 2>/dev/null | while IFS='|' read -r ftype fname fsize; do
        [ -z "$fname" ] && continue
        if [ "$ftype" = "directory" ]; then
            echo "[DIR] $local_dir/$fname/" | tee -a "$LOG"
            download_dir "${url}${fname}/" "$local_dir/$fname" $((depth + 1))
        else
            local target="$local_dir/$fname"
            if [ -f "$target" ]; then
                local existing_size
                existing_size=$(wc -c < "$target" 2>/dev/null | tr -d ' ')
                if [ "$existing_size" = "$fsize" ]; then
                    echo "[SKIP] $fname" >> "$LOG"
                    continue
                fi
            fi
            echo "[DL] $fname -> $local_dir/" | tee -a "$LOG"
            curl -skL --max-time 1800 -o "$target" "${url}${fname}" 2>/dev/null
        fi
    done
}

# Get all top-level all_data agencies
agencies=$(curl -skL "$BASE/all_data/" 2>/dev/null | grep -o '"name":"[^"]*"' | sed 's/"name":"//;s/"//')

for agency in $agencies; do
    echo "=== Agency: $agency ===" | tee -a "$LOG"
    download_dir "$BASE/all_data/$agency/" "$DEST/all_data/$agency" 0
done

# Also get s_ directories (secretariat mirrors)
for sdir in s_agricultura_des_rural s_ciencia_human_tec_inov s_cultura s_economia s_educacion_publica s_hacienda_cred_publico s_infra_comunic_transportes s_medio_ambiente_rec_naturales s_salud s_trabajo_prev_social s_turismo; do
    echo "=== Secretariat: $sdir ===" | tee -a "$LOG"
    download_dir "$BASE/$sdir/" "$DEST/$sdir" 0
done

# prueba directories
for pdir in prueba prueba_rem; do
    echo "=== Test: $pdir ===" | tee -a "$LOG"
    download_dir "$BASE/$pdir/" "$DEST/$pdir" 0
done

echo "" | tee -a "$LOG"
echo "=== REMAINING DUMP COMPLETE ===" | tee -a "$LOG"
echo "=== $(date) ===" | tee -a "$LOG"
du -sh "$DEST" | tee -a "$LOG"
