// Package engine provides a mode-agnostic scan engine that can be used
// independently of any UI layer. This file implements the worker pool
// that performs concurrent path scanning using errgroup for coordination.
package engine

import (
	"context"
	"crypto/tls"
	"fmt"
	"io"
	"net/http"
	"strconv"
	"strings"
	"time"

	"golang.org/x/sync/errgroup"
)

// maxAttempts is the maximum number of request attempts (original + retries).
const maxAttempts = 3

// defaultBackoff is the default delay when Retry-After header is missing.
const defaultBackoff = 5 * time.Second

// workerPool manages concurrent scanning of targets using errgroup.
// It is internal to the engine package and not exported.
type workerPool struct {
	cfg       Config
	client    *http.Client
	reporter  ProgressReporter
	stats     *Stats
	rateLimit *RateLimitState
	baselines *baselineCache
}

// newWorkerPool creates a new worker pool with the given configuration.
func newWorkerPool(cfg Config, client *http.Client, reporter ProgressReporter, stats *Stats, rateLimit *RateLimitState) *workerPool {
	return &workerPool{
		cfg:       cfg,
		client:    client,
		reporter:  reporter,
		stats:     stats,
		rateLimit: rateLimit,
		baselines: newBaselineCache(),
	}
}

// scanTargets scans all targets concurrently using errgroup with bounded concurrency.
// It respects context cancellation and reports progress via the ProgressReporter.
func (wp *workerPool) scanTargets(ctx context.Context, targets []string) error {
	g, ctx := errgroup.WithContext(ctx)
	g.SetLimit(wp.cfg.Workers)

	// Update stats with initial queue size
	wp.stats.SetQueueSize(int64(len(targets)))
	wp.stats.SetWorkersActive(int64(wp.cfg.Workers))

	for _, target := range targets {
		target := target // capture for goroutine

		g.Go(func() error {
			// Decrement queue size as we start processing
			wp.stats.SetQueueSize(wp.stats.Snapshot().QueueSize - 1)

			return wp.scanTarget(ctx, target)
		})
	}

	err := g.Wait()
	wp.stats.SetWorkersActive(0)
	wp.stats.SetQueueSize(0)
	return err
}

// scanTarget scans a single target domain, checking all credential paths.
// It reports progress and findings via the ProgressReporter callbacks.
func (wp *workerPool) scanTarget(ctx context.Context, target string) error {
	// Clean target: remove protocol prefix and trailing slash
	target = strings.TrimPrefix(target, "http://")
	target = strings.TrimPrefix(target, "https://")
	target = strings.TrimRight(target, "/")

	startTime := time.Now()
	pathsChecked := 0
	findingsCount := 0

	// Report start
	wp.reporter.OnProgress(ProgressUpdate{
		Target:        target,
		Status:        StatusStarted,
		PathsChecked:  0,
		PathsTotal:    len(CredentialPaths),
		FindingsCount: 0,
		Elapsed:       0,
	})

	for _, path := range CredentialPaths {
		// CRITICAL: Check ctx.Done() BEFORE each request for clean cancellation
		select {
		case <-ctx.Done():
			// Report error status on cancellation
			wp.reporter.OnProgress(ProgressUpdate{
				Target:        target,
				Status:        StatusError,
				PathsChecked:  pathsChecked,
				PathsTotal:    len(CredentialPaths),
				FindingsCount: findingsCount,
				Elapsed:       time.Since(startTime),
			})
			return ctx.Err()
		default:
		}

		finding, err := wp.checkPath(ctx, target, path)
		pathsChecked++
		wp.stats.IncrementScanned()

		if err != nil {
			// Report error but continue to next path
			wp.reporter.OnError(ErrorInfo{
				Target: target,
				Path:   path,
				Err:    err,
				At:     time.Now(),
			})
			wp.stats.IncrementErrors()
			continue
		}

		if finding != nil && finding.Exposed {
			findingsCount++
			wp.stats.IncrementFound()
			wp.reporter.OnFinding(*finding)
		}
	}

	// Determine final status
	finalStatus := StatusCompleted
	if findingsCount > 0 {
		finalStatus = StatusFound
	}

	// Report completion
	wp.reporter.OnProgress(ProgressUpdate{
		Target:        target,
		Status:        finalStatus,
		PathsChecked:  pathsChecked,
		PathsTotal:    len(CredentialPaths),
		FindingsCount: findingsCount,
		Elapsed:       time.Since(startTime),
	})

	return nil
}

// getBaseline fetches the homepage for a domain and caches the content length
// for soft 404 detection. Returns cached baseline if available.
// Tries HTTPS first, falls back to HTTP. Caches failures as zero-length baseline.
func (wp *workerPool) getBaseline(ctx context.Context, domain string) baseline {
	// Check cache first
	if b, ok := wp.baselines.get(domain); ok {
		return b
	}

	// Try to fetch homepage
	schemes := []string{"https", "http"}
	for _, scheme := range schemes {
		url := fmt.Sprintf("%s://%s/", scheme, domain)
		req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
		if err != nil {
			continue
		}

		req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
		req.Header.Set("Accept", "*/*")

		resp, err := wp.client.Do(req)
		if err != nil {
			continue // Try next scheme
		}

		// Read body (same 50KB limit as checkPath)
		bodyBuf := make([]byte, 50*1024)
		n, _ := io.ReadFull(resp.Body, bodyBuf)
		resp.Body.Close()

		// Cache successful baseline
		b := baseline{
			contentLength: n,
			fetchedAt:     time.Now(),
		}
		wp.baselines.set(domain, b)
		return b
	}

	// Failed to fetch - cache zero-length baseline (fail open)
	b := baseline{
		contentLength: 0,
		fetchedAt:     time.Now(),
	}
	wp.baselines.set(domain, b)
	return b
}

// parseRetryAfter extracts retry delay from HTTP response.
// Supports numeric seconds (most common) and HTTP date format.
// Returns defaultBackoff if header is missing or unparseable.
func parseRetryAfter(resp *http.Response) time.Duration {
	if resp == nil {
		return defaultBackoff
	}
	retryAfter := resp.Header.Get("Retry-After")
	if retryAfter == "" {
		return defaultBackoff
	}
	// Try numeric seconds first (most common)
	if seconds, err := strconv.Atoi(retryAfter); err == nil {
		return time.Duration(seconds) * time.Second
	}
	// Try HTTP date format
	if t, err := http.ParseTime(retryAfter); err == nil {
		delay := time.Until(t)
		if delay > 0 {
			return delay
		}
	}
	return defaultBackoff
}

// checkPath probes a single URL path and returns a Finding if credentials are exposed.
// Uses http.NewRequestWithContext for proper cancellation support.
// Implements retry logic for 429/502/503/504 responses with backoff.
func (wp *workerPool) checkPath(ctx context.Context, domain, path string) (*Finding, error) {
	// Try HTTPS first, fall back to HTTP
	schemes := []string{"https", "http"}

	wp.stats.IncrementInFlight()
	defer wp.stats.DecrementInFlight()

	for _, scheme := range schemes {
		url := fmt.Sprintf("%s://%s%s", scheme, domain, path)

		var lastErr error
		for attempt := 0; attempt < maxAttempts; attempt++ {
			// Wait if globally rate limited (before each attempt)
			if err := wp.rateLimit.WaitIfLimited(ctx); err != nil {
				return nil, err // Context cancelled
			}

			req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
			if err != nil {
				lastErr = err
				break // Request creation failed, try next scheme
			}

			// Set realistic headers to avoid bot detection
			req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
			req.Header.Set("Accept", "*/*")

			resp, err := wp.client.Do(req)
			if err != nil {
				// Connection error - fail immediately, no retry per CONTEXT.md
				lastErr = err
				break // Try next scheme
			}

			// Check for rate limiting / server errors that warrant retry
			switch resp.StatusCode {
			case 429: // Too Many Requests
				delay := parseRetryAfter(resp)
				resp.Body.Close()
				wp.stats.IncrementRateLimited()
				wp.rateLimit.SetLimited(delay) // Global rate limit affects all workers
				// Wait before retry
				select {
				case <-ctx.Done():
					return nil, ctx.Err()
				case <-time.After(delay):
					continue // Retry
				}

			case 502, 503, 504: // Bad Gateway, Service Unavailable, Gateway Timeout
				resp.Body.Close()
				// Wait before retry (fixed backoff for server errors)
				select {
				case <-ctx.Done():
					return nil, ctx.Err()
				case <-time.After(defaultBackoff):
					continue // Retry
				}
			}

			// For all other status codes, process the response
			// Read body (limit to 50KB to avoid huge responses)
			bodyBuf := make([]byte, 50*1024)
			n, _ := io.ReadFull(resp.Body, bodyBuf)
			resp.Body.Close()
			body := string(bodyBuf[:n])

			finding := &Finding{
				Domain:      domain,
				Path:        path,
				StatusCode:  resp.StatusCode,
				ContentType: resp.Header.Get("Content-Type"),
				FoundAt:     time.Now(),
			}

			// Only analyze 200 OK responses for credential exposure
			if resp.StatusCode == 200 {
				// Stage 1: Content-Type validation (Issue #3)
				if !isValidContentType(path, resp.Header.Get("Content-Type")) {
					return finding, nil
				}

				// Stage 2: HTML body rejection (Issue #2)
				if containsHTMLIndicators(body) {
					return finding, nil
				}

				// Stage 3: Soft 404 detection (Issue #1)
				// Lazy fetch: only get baseline after passing Stages 1-2
				base, ok := wp.baselines.get(domain)
				if !ok {
					base = wp.getBaseline(ctx, domain)
				}
				if isSoft404(len(body), base) {
					return finding, nil
				}

				// Stage 4: Context-aware pattern matching (Issue #2)
				matched := matchPatternsContextAware(body)
				specificMatched := matchSpecificPatterns(body)
				matched = append(matched, specificMatched...)

				if len(matched) > 0 {
					finding.Exposed = true
					finding.Patterns = matched
					if len(body) > 500 {
						finding.BodySnippet = body[:500]
					} else {
						finding.BodySnippet = body
					}
				}
			}

			return finding, nil
		}

		// If we exhausted retries without returning, continue to next scheme
		if lastErr != nil {
			continue
		}
	}

	return nil, fmt.Errorf("failed to connect to %s", domain)
}

// createHTTPClient creates an HTTP client configured for scanning.
// Features: TLS skip verify, connection pooling, redirect limiting, timeouts.
func createHTTPClient(cfg Config) *http.Client {
	transport := &http.Transport{
		TLSClientConfig:     &tls.Config{InsecureSkipVerify: true},
		MaxIdleConns:        cfg.Workers * 4,
		MaxIdleConnsPerHost: cfg.Workers * 2,
		IdleConnTimeout:     30 * time.Second,
		DisableKeepAlives:   false,
		MaxConnsPerHost:     0, // unlimited
	}

	client := &http.Client{
		Timeout:   cfg.Timeout,
		Transport: transport,
		CheckRedirect: func(req *http.Request, via []*http.Request) error {
			if len(via) >= 3 {
				return http.ErrUseLastResponse
			}
			return nil
		},
	}

	return client
}
