package handlers import ( "encoding/csv" "fmt" "io" "log" "net/http" "sort" "strconv" "strings" "github.com/patel-mann/poll-system/app/internal/models" "github.com/patel-mann/poll-system/app/internal/utils" ) // AddressMatch represents a potential address match with similarity score type AddressMatch struct { AddressID int Address string CurrentStatus bool SimilarityScore float64 } // CSVUploadResult holds the results of CSV processing type CSVUploadResult struct { TotalRecords int ValidatedCount int NotFoundCount int ErrorCount int ValidatedAddresses []string NotFoundAddresses []string ErrorMessages []string FuzzyMatches []string // New field for fuzzy matches } // Combined CSV Upload Handler - handles both GET (display form) and POST (process CSV) func CSVUploadHandler(w http.ResponseWriter, r *http.Request) { username, _ := models.GetCurrentUserName(r) // Base template data templateData := map[string]interface{}{ "Title": "CSV Address Validation", "IsAuthenticated": true, "ShowAdminNav": true, "ActiveSection": "address", "UserName": username, "Role": "admin", } // Handle GET request - show the upload form if r.Method == http.MethodGet { utils.Render(w, "csv-upload.html", templateData) return } // Handle POST request - process the CSV if r.Method == http.MethodPost { // Parse multipart form (10MB max) err := r.ParseMultipartForm(10 << 20) if err != nil { http.Error(w, "Error parsing form: "+err.Error(), http.StatusBadRequest) return } // Get form values addressColumnStr := r.FormValue("address_column") if addressColumnStr == "" { templateData["Error"] = "Address column is required" utils.Render(w, "csv-upload.html", templateData) return } addressColumn, err := strconv.Atoi(addressColumnStr) if err != nil { templateData["Error"] = "Invalid address column index" utils.Render(w, "csv-upload.html", templateData) return } // Get similarity threshold (optional, default to 0.8) similarityThresholdStr := r.FormValue("similarity_threshold") similarityThreshold := 0.8 // Default threshold if similarityThresholdStr != "" { if threshold, err := strconv.ParseFloat(similarityThresholdStr, 64); err == nil { if threshold >= 0.0 && threshold <= 1.0 { similarityThreshold = threshold } } } // Get uploaded file file, header, err := r.FormFile("csv_file") if err != nil { templateData["Error"] = "Error retrieving file: " + err.Error() utils.Render(w, "csv-upload.html", templateData) return } defer file.Close() // Validate file type if !strings.HasSuffix(strings.ToLower(header.Filename), ".csv") { templateData["Error"] = "Please upload a CSV file" utils.Render(w, "csv-upload.html", templateData) return } // Read and parse CSV reader := csv.NewReader(file) reader.FieldsPerRecord = -1 // Allow variable number of fields var allRows [][]string for { record, err := reader.Read() if err == io.EOF { break } if err != nil { templateData["Error"] = "Error reading CSV: " + err.Error() utils.Render(w, "csv-upload.html", templateData) return } allRows = append(allRows, record) } if len(allRows) <= 1 { templateData["Error"] = "CSV file must contain data rows (header + at least 1 data row)" utils.Render(w, "csv-upload.html", templateData) return } // Validate address column index if addressColumn >= len(allRows[0]) { templateData["Error"] = "Invalid address column index" utils.Render(w, "csv-upload.html", templateData) return } // Process addresses with fuzzy matching result := processAddressValidationWithFuzzyMatching(allRows[1:], addressColumn, similarityThreshold) // Add result to template data templateData["Result"] = result templateData["FileName"] = header.Filename templateData["SimilarityThreshold"] = similarityThreshold // Render the same template with results utils.Render(w, "csv-upload.html", templateData) return } // Method not allowed http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) } // processAddressValidationWithFuzzyMatching processes CSV data with fuzzy string matching func processAddressValidationWithFuzzyMatching(rows [][]string, addressColumn int, threshold float64) CSVUploadResult { result := CSVUploadResult{ TotalRecords: len(rows), } // Pre-load all addresses from database for fuzzy matching dbAddresses, err := loadAllAddressesFromDB() if err != nil { result.ErrorCount = len(rows) result.ErrorMessages = append(result.ErrorMessages, "Failed to load addresses from database: "+err.Error()) return result } for i, row := range rows { // Check if the row has enough columns if addressColumn >= len(row) { result.ErrorCount++ result.ErrorMessages = append(result.ErrorMessages, fmt.Sprintf("Row %d: Missing address column", i+2)) continue } // Get and normalize address from CSV csvAddress := normalizeAddress(row[addressColumn]) if csvAddress == "" { result.ErrorCount++ result.ErrorMessages = append(result.ErrorMessages, fmt.Sprintf("Row %d: Empty address", i+2)) continue } // Find best matches using fuzzy string matching matches := findBestMatches(csvAddress, dbAddresses, 5) // Get top 5 matches if len(matches) == 0 { result.NotFoundCount++ result.NotFoundAddresses = append(result.NotFoundAddresses, csvAddress) continue } // Get the best match bestMatch := matches[0] // Check if the best match meets our similarity threshold if bestMatch.SimilarityScore < threshold { result.ErrorCount++ result.ErrorMessages = append(result.ErrorMessages, fmt.Sprintf("Row %d: No good match found for '%s' (best match: '%s' with score %.2f, threshold: %.2f)", i+2, csvAddress, bestMatch.Address, bestMatch.SimilarityScore, threshold)) continue } // Update validation status if not already validated if !bestMatch.CurrentStatus { err = updateAddressValidation(bestMatch.AddressID) if err != nil { result.ErrorCount++ result.ErrorMessages = append(result.ErrorMessages, fmt.Sprintf("Row %d: Database update error for address '%s'", i+2, csvAddress)) log.Printf("Error updating address %d: %v", bestMatch.AddressID, err) continue } result.ValidatedCount++ matchInfo := fmt.Sprintf("%s → %s (score: %.2f)", csvAddress, bestMatch.Address, bestMatch.SimilarityScore) result.ValidatedAddresses = append(result.ValidatedAddresses, matchInfo) } else { // Address was already validated result.ValidatedCount++ matchInfo := fmt.Sprintf("%s → %s (score: %.2f, already validated)", csvAddress, bestMatch.Address, bestMatch.SimilarityScore) result.ValidatedAddresses = append(result.ValidatedAddresses, matchInfo) } // Add fuzzy match info if it's not an exact match if bestMatch.SimilarityScore < 1.0 { fuzzyInfo := fmt.Sprintf("CSV: '%s' matched to DB: '%s' (similarity: %.2f)", csvAddress, bestMatch.Address, bestMatch.SimilarityScore) result.FuzzyMatches = append(result.FuzzyMatches, fuzzyInfo) } } return result } // normalizeAddress trims spaces and converts to lowercase func normalizeAddress(address string) string { return strings.ToLower(strings.TrimSpace(address)) } // loadAllAddressesFromDB loads all addresses from the database for fuzzy matching func loadAllAddressesFromDB() ([]AddressMatch, error) { rows, err := models.DB.Query(` SELECT address_id, address, visited_validated FROM address_database `) if err != nil { return nil, err } defer rows.Close() var addresses []AddressMatch for rows.Next() { var match AddressMatch var rawAddress string err := rows.Scan(&match.AddressID, &rawAddress, &match.CurrentStatus) if err != nil { log.Printf("Error scanning address row: %v", err) continue } // Normalize the address from database match.Address = normalizeAddress(rawAddress) addresses = append(addresses, match) } return addresses, rows.Err() } // findBestMatches finds the top N best matches for a given address func findBestMatches(csvAddress string, dbAddresses []AddressMatch, topN int) []AddressMatch { // Calculate similarity scores for all addresses var matches []AddressMatch for _, dbAddr := range dbAddresses { score := calculateSimilarity(csvAddress, dbAddr.Address) match := AddressMatch{ AddressID: dbAddr.AddressID, Address: dbAddr.Address, CurrentStatus: dbAddr.CurrentStatus, SimilarityScore: score, } matches = append(matches, match) } // Sort by similarity score (descending) sort.Slice(matches, func(i, j int) bool { return matches[i].SimilarityScore > matches[j].SimilarityScore }) // Return top N matches if len(matches) > topN { return matches[:topN] } return matches } // calculateSimilarity calculates Levenshtein distance-based similarity score func calculateSimilarity(s1, s2 string) float64 { if s1 == s2 { return 1.0 } distance := levenshteinDistance(s1, s2) maxLen := max(len(s1), len(s2)) if maxLen == 0 { return 1.0 } similarity := 1.0 - float64(distance)/float64(maxLen) return max(0.0, similarity) } // levenshteinDistance calculates the Levenshtein distance between two strings func levenshteinDistance(s1, s2 string) int { if len(s1) == 0 { return len(s2) } if len(s2) == 0 { return len(s1) } // Create a matrix matrix := make([][]int, len(s1)+1) for i := range matrix { matrix[i] = make([]int, len(s2)+1) } // Initialize first row and column for i := 0; i <= len(s1); i++ { matrix[i][0] = i } for j := 0; j <= len(s2); j++ { matrix[0][j] = j } // Fill the matrix for i := 1; i <= len(s1); i++ { for j := 1; j <= len(s2); j++ { cost := 0 if s1[i-1] != s2[j-1] { cost = 1 } matrix[i][j] = min( matrix[i-1][j]+1, // deletion matrix[i][j-1]+1, // insertion matrix[i-1][j-1]+cost, // substitution ) } } return matrix[len(s1)][len(s2)] } // updateAddressValidation updates an address validation status func updateAddressValidation(addressID int) error { _, err := models.DB.Exec(` UPDATE address_database SET visited_validated = true, updated_at = NOW() WHERE address_id = $1 `, addressID) return err } // Helper functions for different types func minInt(a, b int) int { if a < b { return a } return b } func maxInt(a, b int) int { if a > b { return a } return b } func maxFloat64(a, b float64) float64 { if a > b { return a } return b }