392 lines
10 KiB
Go
392 lines
10 KiB
Go
package handlers
|
|
|
|
import (
|
|
"encoding/csv"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"net/http"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/patel-mann/poll-system/app/internal/models"
|
|
"github.com/patel-mann/poll-system/app/internal/utils"
|
|
)
|
|
|
|
// AddressMatch represents a potential address match with similarity score
|
|
type AddressMatch struct {
|
|
AddressID int
|
|
Address string
|
|
CurrentStatus bool
|
|
SimilarityScore float64
|
|
}
|
|
|
|
// CSVUploadResult holds the results of CSV processing
|
|
type CSVUploadResult struct {
|
|
TotalRecords int
|
|
ValidatedCount int
|
|
NotFoundCount int
|
|
ErrorCount int
|
|
ValidatedAddresses []string
|
|
NotFoundAddresses []string
|
|
ErrorMessages []string
|
|
FuzzyMatches []string // New field for fuzzy matches
|
|
}
|
|
|
|
// Combined CSV Upload Handler - handles both GET (display form) and POST (process CSV)
|
|
func CSVUploadHandler(w http.ResponseWriter, r *http.Request) {
|
|
username, _ := models.GetCurrentUserName(r)
|
|
|
|
// Base template data
|
|
templateData := map[string]interface{}{
|
|
"Title": "CSV Address Validation",
|
|
"IsAuthenticated": true,
|
|
"ShowAdminNav": true,
|
|
"ActiveSection": "address",
|
|
"UserName": username,
|
|
"Role": "admin",
|
|
}
|
|
|
|
// Handle GET request - show the upload form
|
|
if r.Method == http.MethodGet {
|
|
utils.Render(w, "csv-upload.html", templateData)
|
|
return
|
|
}
|
|
|
|
// Handle POST request - process the CSV
|
|
if r.Method == http.MethodPost {
|
|
// Parse multipart form (10MB max)
|
|
err := r.ParseMultipartForm(10 << 20)
|
|
if err != nil {
|
|
http.Error(w, "Error parsing form: "+err.Error(), http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
// Get form values
|
|
addressColumnStr := r.FormValue("address_column")
|
|
if addressColumnStr == "" {
|
|
templateData["Error"] = "Address column is required"
|
|
utils.Render(w, "csv-upload.html", templateData)
|
|
return
|
|
}
|
|
|
|
addressColumn, err := strconv.Atoi(addressColumnStr)
|
|
if err != nil {
|
|
templateData["Error"] = "Invalid address column index"
|
|
utils.Render(w, "csv-upload.html", templateData)
|
|
return
|
|
}
|
|
|
|
// Get similarity threshold (optional, default to 0.8)
|
|
similarityThresholdStr := r.FormValue("similarity_threshold")
|
|
similarityThreshold := 0.8 // Default threshold
|
|
if similarityThresholdStr != "" {
|
|
if threshold, err := strconv.ParseFloat(similarityThresholdStr, 64); err == nil {
|
|
if threshold >= 0.0 && threshold <= 1.0 {
|
|
similarityThreshold = threshold
|
|
}
|
|
}
|
|
}
|
|
|
|
// Get uploaded file
|
|
file, header, err := r.FormFile("csv_file")
|
|
if err != nil {
|
|
templateData["Error"] = "Error retrieving file: " + err.Error()
|
|
utils.Render(w, "csv-upload.html", templateData)
|
|
return
|
|
}
|
|
defer file.Close()
|
|
|
|
// Validate file type
|
|
if !strings.HasSuffix(strings.ToLower(header.Filename), ".csv") {
|
|
templateData["Error"] = "Please upload a CSV file"
|
|
utils.Render(w, "csv-upload.html", templateData)
|
|
return
|
|
}
|
|
|
|
// Read and parse CSV
|
|
reader := csv.NewReader(file)
|
|
reader.FieldsPerRecord = -1 // Allow variable number of fields
|
|
|
|
var allRows [][]string
|
|
for {
|
|
record, err := reader.Read()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
templateData["Error"] = "Error reading CSV: " + err.Error()
|
|
utils.Render(w, "csv-upload.html", templateData)
|
|
return
|
|
}
|
|
allRows = append(allRows, record)
|
|
}
|
|
|
|
if len(allRows) <= 1 {
|
|
templateData["Error"] = "CSV file must contain data rows (header + at least 1 data row)"
|
|
utils.Render(w, "csv-upload.html", templateData)
|
|
return
|
|
}
|
|
|
|
// Validate address column index
|
|
if addressColumn >= len(allRows[0]) {
|
|
templateData["Error"] = "Invalid address column index"
|
|
utils.Render(w, "csv-upload.html", templateData)
|
|
return
|
|
}
|
|
|
|
// Process addresses with fuzzy matching
|
|
result := processAddressValidationWithFuzzyMatching(allRows[1:], addressColumn, similarityThreshold)
|
|
|
|
// Add result to template data
|
|
templateData["Result"] = result
|
|
templateData["FileName"] = header.Filename
|
|
templateData["SimilarityThreshold"] = similarityThreshold
|
|
|
|
// Render the same template with results
|
|
utils.Render(w, "csv-upload.html", templateData)
|
|
return
|
|
}
|
|
|
|
// Method not allowed
|
|
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
|
}
|
|
|
|
// processAddressValidationWithFuzzyMatching processes CSV data with fuzzy string matching
|
|
func processAddressValidationWithFuzzyMatching(rows [][]string, addressColumn int, threshold float64) CSVUploadResult {
|
|
result := CSVUploadResult{
|
|
TotalRecords: len(rows),
|
|
}
|
|
|
|
// Pre-load all addresses from database for fuzzy matching
|
|
dbAddresses, err := loadAllAddressesFromDB()
|
|
if err != nil {
|
|
result.ErrorCount = len(rows)
|
|
result.ErrorMessages = append(result.ErrorMessages, "Failed to load addresses from database: "+err.Error())
|
|
return result
|
|
}
|
|
|
|
for i, row := range rows {
|
|
// Check if the row has enough columns
|
|
if addressColumn >= len(row) {
|
|
result.ErrorCount++
|
|
result.ErrorMessages = append(result.ErrorMessages,
|
|
fmt.Sprintf("Row %d: Missing address column", i+2))
|
|
continue
|
|
}
|
|
|
|
// Get and normalize address from CSV
|
|
csvAddress := normalizeAddress(row[addressColumn])
|
|
if csvAddress == "" {
|
|
result.ErrorCount++
|
|
result.ErrorMessages = append(result.ErrorMessages,
|
|
fmt.Sprintf("Row %d: Empty address", i+2))
|
|
continue
|
|
}
|
|
|
|
// Find best matches using fuzzy string matching
|
|
matches := findBestMatches(csvAddress, dbAddresses, 5) // Get top 5 matches
|
|
|
|
if len(matches) == 0 {
|
|
result.NotFoundCount++
|
|
result.NotFoundAddresses = append(result.NotFoundAddresses, csvAddress)
|
|
continue
|
|
}
|
|
|
|
// Get the best match
|
|
bestMatch := matches[0]
|
|
|
|
// Check if the best match meets our similarity threshold
|
|
if bestMatch.SimilarityScore < threshold {
|
|
result.ErrorCount++
|
|
result.ErrorMessages = append(result.ErrorMessages,
|
|
fmt.Sprintf("Row %d: No good match found for '%s' (best match: '%s' with score %.2f, threshold: %.2f)",
|
|
i+2, csvAddress, bestMatch.Address, bestMatch.SimilarityScore, threshold))
|
|
continue
|
|
}
|
|
|
|
// Update validation status if not already validated
|
|
if !bestMatch.CurrentStatus {
|
|
err = updateAddressValidation(bestMatch.AddressID)
|
|
if err != nil {
|
|
result.ErrorCount++
|
|
result.ErrorMessages = append(result.ErrorMessages,
|
|
fmt.Sprintf("Row %d: Database update error for address '%s'", i+2, csvAddress))
|
|
log.Printf("Error updating address %d: %v", bestMatch.AddressID, err)
|
|
continue
|
|
}
|
|
|
|
result.ValidatedCount++
|
|
matchInfo := fmt.Sprintf("%s → %s (score: %.2f)", csvAddress, bestMatch.Address, bestMatch.SimilarityScore)
|
|
result.ValidatedAddresses = append(result.ValidatedAddresses, matchInfo)
|
|
} else {
|
|
// Address was already validated
|
|
result.ValidatedCount++
|
|
matchInfo := fmt.Sprintf("%s → %s (score: %.2f, already validated)", csvAddress, bestMatch.Address, bestMatch.SimilarityScore)
|
|
result.ValidatedAddresses = append(result.ValidatedAddresses, matchInfo)
|
|
}
|
|
|
|
// Add fuzzy match info if it's not an exact match
|
|
if bestMatch.SimilarityScore < 1.0 {
|
|
fuzzyInfo := fmt.Sprintf("CSV: '%s' matched to DB: '%s' (similarity: %.2f)",
|
|
csvAddress, bestMatch.Address, bestMatch.SimilarityScore)
|
|
result.FuzzyMatches = append(result.FuzzyMatches, fuzzyInfo)
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
// normalizeAddress trims spaces and converts to lowercase
|
|
func normalizeAddress(address string) string {
|
|
return strings.ToLower(strings.TrimSpace(address))
|
|
}
|
|
|
|
// loadAllAddressesFromDB loads all addresses from the database for fuzzy matching
|
|
func loadAllAddressesFromDB() ([]AddressMatch, error) {
|
|
rows, err := models.DB.Query(`
|
|
SELECT address_id, address, visited_validated
|
|
FROM address_database
|
|
`)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
var addresses []AddressMatch
|
|
for rows.Next() {
|
|
var match AddressMatch
|
|
var rawAddress string
|
|
|
|
err := rows.Scan(&match.AddressID, &rawAddress, &match.CurrentStatus)
|
|
if err != nil {
|
|
log.Printf("Error scanning address row: %v", err)
|
|
continue
|
|
}
|
|
|
|
// Normalize the address from database
|
|
match.Address = normalizeAddress(rawAddress)
|
|
addresses = append(addresses, match)
|
|
}
|
|
|
|
return addresses, rows.Err()
|
|
}
|
|
|
|
// findBestMatches finds the top N best matches for a given address
|
|
func findBestMatches(csvAddress string, dbAddresses []AddressMatch, topN int) []AddressMatch {
|
|
// Calculate similarity scores for all addresses
|
|
var matches []AddressMatch
|
|
for _, dbAddr := range dbAddresses {
|
|
score := calculateSimilarity(csvAddress, dbAddr.Address)
|
|
match := AddressMatch{
|
|
AddressID: dbAddr.AddressID,
|
|
Address: dbAddr.Address,
|
|
CurrentStatus: dbAddr.CurrentStatus,
|
|
SimilarityScore: score,
|
|
}
|
|
matches = append(matches, match)
|
|
}
|
|
|
|
// Sort by similarity score (descending)
|
|
sort.Slice(matches, func(i, j int) bool {
|
|
return matches[i].SimilarityScore > matches[j].SimilarityScore
|
|
})
|
|
|
|
// Return top N matches
|
|
if len(matches) > topN {
|
|
return matches[:topN]
|
|
}
|
|
return matches
|
|
}
|
|
|
|
// calculateSimilarity calculates Levenshtein distance-based similarity score
|
|
func calculateSimilarity(s1, s2 string) float64 {
|
|
if s1 == s2 {
|
|
return 1.0
|
|
}
|
|
|
|
distance := levenshteinDistance(s1, s2)
|
|
maxLen := max(len(s1), len(s2))
|
|
|
|
if maxLen == 0 {
|
|
return 1.0
|
|
}
|
|
|
|
similarity := 1.0 - float64(distance)/float64(maxLen)
|
|
return max(0.0, similarity)
|
|
}
|
|
|
|
// levenshteinDistance calculates the Levenshtein distance between two strings
|
|
func levenshteinDistance(s1, s2 string) int {
|
|
if len(s1) == 0 {
|
|
return len(s2)
|
|
}
|
|
if len(s2) == 0 {
|
|
return len(s1)
|
|
}
|
|
|
|
// Create a matrix
|
|
matrix := make([][]int, len(s1)+1)
|
|
for i := range matrix {
|
|
matrix[i] = make([]int, len(s2)+1)
|
|
}
|
|
|
|
// Initialize first row and column
|
|
for i := 0; i <= len(s1); i++ {
|
|
matrix[i][0] = i
|
|
}
|
|
for j := 0; j <= len(s2); j++ {
|
|
matrix[0][j] = j
|
|
}
|
|
|
|
// Fill the matrix
|
|
for i := 1; i <= len(s1); i++ {
|
|
for j := 1; j <= len(s2); j++ {
|
|
cost := 0
|
|
if s1[i-1] != s2[j-1] {
|
|
cost = 1
|
|
}
|
|
|
|
matrix[i][j] = min(
|
|
matrix[i-1][j]+1, // deletion
|
|
matrix[i][j-1]+1, // insertion
|
|
matrix[i-1][j-1]+cost, // substitution
|
|
)
|
|
}
|
|
}
|
|
|
|
return matrix[len(s1)][len(s2)]
|
|
}
|
|
|
|
// updateAddressValidation updates an address validation status
|
|
func updateAddressValidation(addressID int) error {
|
|
_, err := models.DB.Exec(`
|
|
UPDATE address_database
|
|
SET visited_validated = true, updated_at = NOW()
|
|
WHERE address_id = $1
|
|
`, addressID)
|
|
return err
|
|
}
|
|
|
|
// Helper functions for different types
|
|
func minInt(a, b int) int {
|
|
if a < b {
|
|
return a
|
|
}
|
|
return b
|
|
}
|
|
|
|
func maxInt(a, b int) int {
|
|
if a > b {
|
|
return a
|
|
}
|
|
return b
|
|
}
|
|
|
|
func maxFloat64(a, b float64) float64 {
|
|
if a > b {
|
|
return a
|
|
}
|
|
return b
|
|
} |