"""
Download From List - Downloads files from a list of URLs (like valid_urls.txt)
Run this AFTER fast_url_checker.py to download discovered files
"""
import requests
from pathlib import Path
import time
import sys
from urllib.parse import urlparse

DOWNLOAD_DIR = Path("inm_downloads")
DOWNLOAD_DIR.mkdir(exist_ok=True)

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
}


def download_file(url: str):
    """Download a single file."""
    # Parse URL to create local path
    parsed = urlparse(url)
    path_parts = parsed.path.replace("/api_update/inm/", "").strip("/")
    local_path = DOWNLOAD_DIR / path_parts

    if local_path.exists():
        print(f"[SKIP] Already exists: {local_path}")
        return True

    try:
        print(f"[DOWNLOADING] {url}")
        with requests.get(url, headers=HEADERS, stream=True, timeout=60) as r:
            r.raise_for_status()
            local_path.parent.mkdir(parents=True, exist_ok=True)

            total_size = int(r.headers.get('content-length', 0))
            downloaded = 0

            with open(local_path, "wb") as f:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)
                    downloaded += len(chunk)

            size_mb = local_path.stat().st_size / (1024 * 1024)
            print(f"[SAVED] {local_path} ({size_mb:.2f} MB)")
            return True

    except Exception as e:
        print(f"[ERROR] {url}: {e}")
        return False


def main():
    # Default to valid_urls.txt or accept command line argument
    url_file = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("valid_urls.txt")

    if not url_file.exists():
        print(f"ERROR: {url_file} not found")
        print("Run fast_url_checker.py first, or provide a URL list file")
        sys.exit(1)

    urls = [line.strip() for line in url_file.read_text().splitlines() if line.strip()]

    print("=" * 70)
    print(f"Download From List - {len(urls)} URLs")
    print("=" * 70)
    print(f"Source: {url_file}")
    print(f"Destination: {DOWNLOAD_DIR.resolve()}")
    print("=" * 70)

    success = 0
    failed = 0

    for i, url in enumerate(urls, 1):
        print(f"\n[{i}/{len(urls)}]", end=" ")
        if download_file(url):
            success += 1
        else:
            failed += 1
        time.sleep(0.5)  # Be polite

    print("\n" + "=" * 70)
    print(f"COMPLETE: {success} downloaded, {failed} failed")
    print(f"Files saved in: {DOWNLOAD_DIR.resolve()}")
    print("=" * 70)


if __name__ == "__main__":
    main()
