Files
Poll-system/app/internal/handlers/admin_csv_upload.go

392 lines
10 KiB
Go
Raw Permalink Normal View History

2025-08-28 23:27:24 -06:00
package handlers
import (
"encoding/csv"
"fmt"
"io"
"log"
"net/http"
2025-09-03 14:35:47 -06:00
"sort"
2025-08-28 23:27:24 -06:00
"strconv"
"strings"
"github.com/patel-mann/poll-system/app/internal/models"
"github.com/patel-mann/poll-system/app/internal/utils"
)
2025-09-03 14:35:47 -06:00
// AddressMatch represents a potential address match with similarity score
type AddressMatch struct {
AddressID int
Address string
CurrentStatus bool
SimilarityScore float64
}
2025-08-28 23:27:24 -06:00
// CSVUploadResult holds the results of CSV processing
type CSVUploadResult struct {
2025-09-03 14:35:47 -06:00
TotalRecords int
ValidatedCount int
NotFoundCount int
ErrorCount int
2025-08-28 23:27:24 -06:00
ValidatedAddresses []string
NotFoundAddresses []string
2025-09-03 14:35:47 -06:00
ErrorMessages []string
FuzzyMatches []string // New field for fuzzy matches
2025-08-28 23:27:24 -06:00
}
// Combined CSV Upload Handler - handles both GET (display form) and POST (process CSV)
func CSVUploadHandler(w http.ResponseWriter, r *http.Request) {
username, _ := models.GetCurrentUserName(r)
// Base template data
templateData := map[string]interface{}{
"Title": "CSV Address Validation",
"IsAuthenticated": true,
"ShowAdminNav": true,
"ActiveSection": "address",
"UserName": username,
"Role": "admin",
}
// Handle GET request - show the upload form
if r.Method == http.MethodGet {
utils.Render(w, "csv-upload.html", templateData)
return
}
// Handle POST request - process the CSV
if r.Method == http.MethodPost {
// Parse multipart form (10MB max)
err := r.ParseMultipartForm(10 << 20)
if err != nil {
http.Error(w, "Error parsing form: "+err.Error(), http.StatusBadRequest)
return
}
// Get form values
addressColumnStr := r.FormValue("address_column")
if addressColumnStr == "" {
templateData["Error"] = "Address column is required"
utils.Render(w, "csv-upload.html", templateData)
return
}
addressColumn, err := strconv.Atoi(addressColumnStr)
if err != nil {
templateData["Error"] = "Invalid address column index"
utils.Render(w, "csv-upload.html", templateData)
return
}
2025-09-03 14:35:47 -06:00
// Get similarity threshold (optional, default to 0.8)
similarityThresholdStr := r.FormValue("similarity_threshold")
similarityThreshold := 0.8 // Default threshold
if similarityThresholdStr != "" {
if threshold, err := strconv.ParseFloat(similarityThresholdStr, 64); err == nil {
if threshold >= 0.0 && threshold <= 1.0 {
similarityThreshold = threshold
}
}
}
2025-08-28 23:27:24 -06:00
// Get uploaded file
file, header, err := r.FormFile("csv_file")
if err != nil {
templateData["Error"] = "Error retrieving file: " + err.Error()
utils.Render(w, "csv-upload.html", templateData)
return
}
defer file.Close()
// Validate file type
if !strings.HasSuffix(strings.ToLower(header.Filename), ".csv") {
templateData["Error"] = "Please upload a CSV file"
utils.Render(w, "csv-upload.html", templateData)
return
}
// Read and parse CSV
reader := csv.NewReader(file)
reader.FieldsPerRecord = -1 // Allow variable number of fields
var allRows [][]string
for {
record, err := reader.Read()
if err == io.EOF {
break
}
if err != nil {
templateData["Error"] = "Error reading CSV: " + err.Error()
utils.Render(w, "csv-upload.html", templateData)
return
}
allRows = append(allRows, record)
}
if len(allRows) <= 1 {
templateData["Error"] = "CSV file must contain data rows (header + at least 1 data row)"
utils.Render(w, "csv-upload.html", templateData)
return
}
// Validate address column index
if addressColumn >= len(allRows[0]) {
templateData["Error"] = "Invalid address column index"
utils.Render(w, "csv-upload.html", templateData)
return
}
2025-09-03 14:35:47 -06:00
// Process addresses with fuzzy matching
result := processAddressValidationWithFuzzyMatching(allRows[1:], addressColumn, similarityThreshold)
2025-08-28 23:27:24 -06:00
// Add result to template data
templateData["Result"] = result
templateData["FileName"] = header.Filename
2025-09-03 14:35:47 -06:00
templateData["SimilarityThreshold"] = similarityThreshold
2025-08-28 23:27:24 -06:00
// Render the same template with results
utils.Render(w, "csv-upload.html", templateData)
return
}
// Method not allowed
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
}
2025-09-03 14:35:47 -06:00
// processAddressValidationWithFuzzyMatching processes CSV data with fuzzy string matching
func processAddressValidationWithFuzzyMatching(rows [][]string, addressColumn int, threshold float64) CSVUploadResult {
2025-08-28 23:27:24 -06:00
result := CSVUploadResult{
TotalRecords: len(rows),
}
2025-09-03 14:35:47 -06:00
// Pre-load all addresses from database for fuzzy matching
dbAddresses, err := loadAllAddressesFromDB()
if err != nil {
result.ErrorCount = len(rows)
result.ErrorMessages = append(result.ErrorMessages, "Failed to load addresses from database: "+err.Error())
return result
}
2025-08-28 23:27:24 -06:00
for i, row := range rows {
// Check if the row has enough columns
if addressColumn >= len(row) {
result.ErrorCount++
result.ErrorMessages = append(result.ErrorMessages,
fmt.Sprintf("Row %d: Missing address column", i+2))
continue
}
2025-09-03 14:35:47 -06:00
// Get and normalize address from CSV
csvAddress := normalizeAddress(row[addressColumn])
if csvAddress == "" {
2025-08-28 23:27:24 -06:00
result.ErrorCount++
result.ErrorMessages = append(result.ErrorMessages,
fmt.Sprintf("Row %d: Empty address", i+2))
continue
}
2025-09-03 14:35:47 -06:00
// Find best matches using fuzzy string matching
matches := findBestMatches(csvAddress, dbAddresses, 5) // Get top 5 matches
2025-08-28 23:27:24 -06:00
2025-09-03 14:35:47 -06:00
if len(matches) == 0 {
2025-08-28 23:27:24 -06:00
result.NotFoundCount++
2025-09-03 14:35:47 -06:00
result.NotFoundAddresses = append(result.NotFoundAddresses, csvAddress)
2025-08-28 23:27:24 -06:00
continue
}
2025-09-03 14:35:47 -06:00
// Get the best match
bestMatch := matches[0]
// Check if the best match meets our similarity threshold
if bestMatch.SimilarityScore < threshold {
result.ErrorCount++
result.ErrorMessages = append(result.ErrorMessages,
fmt.Sprintf("Row %d: No good match found for '%s' (best match: '%s' with score %.2f, threshold: %.2f)",
i+2, csvAddress, bestMatch.Address, bestMatch.SimilarityScore, threshold))
continue
}
// Update validation status if not already validated
if !bestMatch.CurrentStatus {
err = updateAddressValidation(bestMatch.AddressID)
2025-08-28 23:27:24 -06:00
if err != nil {
result.ErrorCount++
result.ErrorMessages = append(result.ErrorMessages,
2025-09-03 14:35:47 -06:00
fmt.Sprintf("Row %d: Database update error for address '%s'", i+2, csvAddress))
log.Printf("Error updating address %d: %v", bestMatch.AddressID, err)
2025-08-28 23:27:24 -06:00
continue
}
result.ValidatedCount++
2025-09-03 14:35:47 -06:00
matchInfo := fmt.Sprintf("%s → %s (score: %.2f)", csvAddress, bestMatch.Address, bestMatch.SimilarityScore)
result.ValidatedAddresses = append(result.ValidatedAddresses, matchInfo)
2025-08-28 23:27:24 -06:00
} else {
2025-09-03 14:35:47 -06:00
// Address was already validated
2025-08-28 23:27:24 -06:00
result.ValidatedCount++
2025-09-03 14:35:47 -06:00
matchInfo := fmt.Sprintf("%s → %s (score: %.2f, already validated)", csvAddress, bestMatch.Address, bestMatch.SimilarityScore)
result.ValidatedAddresses = append(result.ValidatedAddresses, matchInfo)
}
// Add fuzzy match info if it's not an exact match
if bestMatch.SimilarityScore < 1.0 {
fuzzyInfo := fmt.Sprintf("CSV: '%s' matched to DB: '%s' (similarity: %.2f)",
csvAddress, bestMatch.Address, bestMatch.SimilarityScore)
result.FuzzyMatches = append(result.FuzzyMatches, fuzzyInfo)
2025-08-28 23:27:24 -06:00
}
}
return result
}
2025-09-03 14:35:47 -06:00
// normalizeAddress trims spaces and converts to lowercase
func normalizeAddress(address string) string {
return strings.ToLower(strings.TrimSpace(address))
}
// loadAllAddressesFromDB loads all addresses from the database for fuzzy matching
func loadAllAddressesFromDB() ([]AddressMatch, error) {
2025-08-28 23:27:24 -06:00
rows, err := models.DB.Query(`
2025-09-03 14:35:47 -06:00
SELECT address_id, address, visited_validated
FROM address_database
2025-08-28 23:27:24 -06:00
`)
if err != nil {
2025-09-03 14:35:47 -06:00
return nil, err
2025-08-28 23:27:24 -06:00
}
defer rows.Close()
2025-09-03 14:35:47 -06:00
var addresses []AddressMatch
for rows.Next() {
var match AddressMatch
var rawAddress string
err := rows.Scan(&match.AddressID, &rawAddress, &match.CurrentStatus)
if err != nil {
log.Printf("Error scanning address row: %v", err)
continue
}
// Normalize the address from database
match.Address = normalizeAddress(rawAddress)
addresses = append(addresses, match)
}
return addresses, rows.Err()
}
// findBestMatches finds the top N best matches for a given address
func findBestMatches(csvAddress string, dbAddresses []AddressMatch, topN int) []AddressMatch {
// Calculate similarity scores for all addresses
var matches []AddressMatch
for _, dbAddr := range dbAddresses {
score := calculateSimilarity(csvAddress, dbAddr.Address)
match := AddressMatch{
AddressID: dbAddr.AddressID,
Address: dbAddr.Address,
CurrentStatus: dbAddr.CurrentStatus,
SimilarityScore: score,
}
matches = append(matches, match)
}
// Sort by similarity score (descending)
sort.Slice(matches, func(i, j int) bool {
return matches[i].SimilarityScore > matches[j].SimilarityScore
})
// Return top N matches
if len(matches) > topN {
return matches[:topN]
}
return matches
}
// calculateSimilarity calculates Levenshtein distance-based similarity score
func calculateSimilarity(s1, s2 string) float64 {
if s1 == s2 {
return 1.0
}
distance := levenshteinDistance(s1, s2)
maxLen := max(len(s1), len(s2))
if maxLen == 0 {
return 1.0
}
similarity := 1.0 - float64(distance)/float64(maxLen)
return max(0.0, similarity)
}
// levenshteinDistance calculates the Levenshtein distance between two strings
func levenshteinDistance(s1, s2 string) int {
if len(s1) == 0 {
return len(s2)
}
if len(s2) == 0 {
return len(s1)
}
// Create a matrix
matrix := make([][]int, len(s1)+1)
for i := range matrix {
matrix[i] = make([]int, len(s2)+1)
}
// Initialize first row and column
for i := 0; i <= len(s1); i++ {
matrix[i][0] = i
}
for j := 0; j <= len(s2); j++ {
matrix[0][j] = j
2025-08-28 23:27:24 -06:00
}
2025-09-03 14:35:47 -06:00
// Fill the matrix
for i := 1; i <= len(s1); i++ {
for j := 1; j <= len(s2); j++ {
cost := 0
if s1[i-1] != s2[j-1] {
cost = 1
}
matrix[i][j] = min(
matrix[i-1][j]+1, // deletion
matrix[i][j-1]+1, // insertion
matrix[i-1][j-1]+cost, // substitution
)
}
}
return matrix[len(s1)][len(s2)]
}
// updateAddressValidation updates an address validation status
func updateAddressValidation(addressID int) error {
_, err := models.DB.Exec(`
UPDATE address_database
SET visited_validated = true, updated_at = NOW()
WHERE address_id = $1
`, addressID)
return err
}
// Helper functions for different types
func minInt(a, b int) int {
if a < b {
return a
}
return b
}
func maxInt(a, b int) int {
if a > b {
return a
}
return b
}
func maxFloat64(a, b float64) float64 {
if a > b {
return a
}
return b
2025-08-28 23:27:24 -06:00
}