package loader

import (
	"encoding/csv"
	"fmt"
	"io"
	"net/url"
	"os"
	"strings"
)

// CSVConfig configures CSV loading behavior.
type CSVConfig struct {
	URLColumns    []string // Column names to extract URLs from (default: ["external_link", "resolved_url"])
	StatusColumn  string   // Column with status (default: "status")
	ExcludeStatus string   // Status value to skip (default: "Down")
}

// DefaultCSVConfig returns sensible defaults for Product Hunt datasets.
func DefaultCSVConfig() CSVConfig {
	return CSVConfig{
		URLColumns:    []string{"external_link", "resolved_url"},
		StatusColumn:  "status",
		ExcludeStatus: "Down",
	}
}

// CSVLoader loads targets from CSV files (Product Hunt format).
type CSVLoader struct {
	config CSVConfig
}

// NewCSVLoader creates a CSVLoader with the given configuration.
func NewCSVLoader(config CSVConfig) *CSVLoader {
	return &CSVLoader{config: config}
}

// Load reads targets from a CSV file and returns validated, deduplicated URLs.
// Implements the Loader interface.
func (l *CSVLoader) Load(path string) (*LoadResult, error) {
	file, err := os.Open(path)
	if err != nil {
		return nil, fmt.Errorf("open file: %w", err)
	}
	defer file.Close()

	reader := csv.NewReader(file)
	// Don't use FieldsPerRecord checking - some rows may have variable columns
	reader.FieldsPerRecord = -1

	// Read header row
	header, err := reader.Read()
	if err != nil {
		return nil, fmt.Errorf("read header: %w", err)
	}

	// Find column indices (case-insensitive)
	colIndex := make(map[string]int)
	for i, name := range header {
		colIndex[strings.ToLower(strings.TrimSpace(name))] = i
	}

	// Find URL columns
	var urlColIndices []int
	for _, colName := range l.config.URLColumns {
		if idx, ok := colIndex[strings.ToLower(colName)]; ok {
			urlColIndices = append(urlColIndices, idx)
		}
	}

	// Find status column
	statusColIdx := -1
	if l.config.StatusColumn != "" {
		if idx, ok := colIndex[strings.ToLower(l.config.StatusColumn)]; ok {
			statusColIdx = idx
		}
	}

	result := &LoadResult{}
	seen := make(map[string]bool)

	// Process data rows
	for {
		row, err := reader.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			// Malformed row - log and continue
			result.Filtered++
			continue
		}

		result.TotalRows++

		// Check status column
		if statusColIdx >= 0 && statusColIdx < len(row) {
			status := strings.TrimSpace(row[statusColIdx])
			if status == l.config.ExcludeStatus {
				result.Filtered++
				continue
			}
		}

		// Extract URL from columns (try in order)
		var rawURL string
		for _, idx := range urlColIndices {
			if idx < len(row) {
				candidate := strings.TrimSpace(row[idx])
				if candidate != "" {
					rawURL = candidate
					break
				}
			}
		}

		if rawURL == "" {
			result.InvalidURLs++
			continue
		}

		// Validate URL scheme
		// First check if URL already has a scheme
		if u, err := url.Parse(rawURL); err == nil && u.Scheme != "" {
			// URL has a scheme - check if it's http/https
			scheme := strings.ToLower(u.Scheme)
			if scheme != "http" && scheme != "https" {
				// Non-HTTP scheme (ftp://, file://, etc) - invalid
				result.InvalidURLs++
				continue
			}
		} else if !isValidURL(rawURL) {
			// No scheme - try prepending https://
			withScheme := "https://" + rawURL
			if isValidURL(withScheme) {
				rawURL = withScheme
			} else {
				result.InvalidURLs++
				continue
			}
		}

		// Parse URL to extract host
		u, err := url.Parse(rawURL)
		if err != nil {
			result.InvalidURLs++
			continue
		}

		// Filter private IPs and localhost
		if isPrivateOrLocalhost(u.Host) {
			result.Filtered++
			continue
		}

		// Normalize for deduplication
		normalized := normalizeURL(rawURL)

		// Deduplicate
		if seen[normalized] {
			result.Duplicates++
			continue
		}

		seen[normalized] = true
		result.Targets = append(result.Targets, normalized)
	}

	return result, nil
}
