CSV Import is now working

This commit is contained in:
Mann Patel
2025-09-03 14:35:47 -06:00
parent 7f2b7e481a
commit 86d733e80e
20 changed files with 2160 additions and 1626 deletions

View File

@@ -6,23 +6,32 @@ import (
"io"
"log"
"net/http"
"sort"
"strconv"
"strings"
"time"
"github.com/patel-mann/poll-system/app/internal/models"
"github.com/patel-mann/poll-system/app/internal/utils"
)
// AddressMatch represents a potential address match with similarity score
type AddressMatch struct {
AddressID int
Address string
CurrentStatus bool
SimilarityScore float64
}
// CSVUploadResult holds the results of CSV processing
type CSVUploadResult struct {
TotalRecords int
ValidatedCount int
NotFoundCount int
ErrorCount int
TotalRecords int
ValidatedCount int
NotFoundCount int
ErrorCount int
ValidatedAddresses []string
NotFoundAddresses []string
ErrorMessages []string
ErrorMessages []string
FuzzyMatches []string // New field for fuzzy matches
}
// Combined CSV Upload Handler - handles both GET (display form) and POST (process CSV)
@@ -69,6 +78,17 @@ func CSVUploadHandler(w http.ResponseWriter, r *http.Request) {
return
}
// Get similarity threshold (optional, default to 0.8)
similarityThresholdStr := r.FormValue("similarity_threshold")
similarityThreshold := 0.8 // Default threshold
if similarityThresholdStr != "" {
if threshold, err := strconv.ParseFloat(similarityThresholdStr, 64); err == nil {
if threshold >= 0.0 && threshold <= 1.0 {
similarityThreshold = threshold
}
}
}
// Get uploaded file
file, header, err := r.FormFile("csv_file")
if err != nil {
@@ -116,12 +136,13 @@ func CSVUploadHandler(w http.ResponseWriter, r *http.Request) {
return
}
// Process addresses
result := processAddressValidation(allRows[1:], addressColumn) // Skip header
// Process addresses with fuzzy matching
result := processAddressValidationWithFuzzyMatching(allRows[1:], addressColumn, similarityThreshold)
// Add result to template data
templateData["Result"] = result
templateData["FileName"] = header.Filename
templateData["SimilarityThreshold"] = similarityThreshold
// Render the same template with results
utils.Render(w, "csv-upload.html", templateData)
@@ -132,12 +153,20 @@ func CSVUploadHandler(w http.ResponseWriter, r *http.Request) {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
}
// processAddressValidation processes CSV data and validates addresses
func processAddressValidation(rows [][]string, addressColumn int) CSVUploadResult {
// processAddressValidationWithFuzzyMatching processes CSV data with fuzzy string matching
func processAddressValidationWithFuzzyMatching(rows [][]string, addressColumn int, threshold float64) CSVUploadResult {
result := CSVUploadResult{
TotalRecords: len(rows),
}
// Pre-load all addresses from database for fuzzy matching
dbAddresses, err := loadAllAddressesFromDB()
if err != nil {
result.ErrorCount = len(rows)
result.ErrorMessages = append(result.ErrorMessages, "Failed to load addresses from database: "+err.Error())
return result
}
for i, row := range rows {
// Check if the row has enough columns
if addressColumn >= len(row) {
@@ -147,116 +176,217 @@ func processAddressValidation(rows [][]string, addressColumn int) CSVUploadResul
continue
}
// Get and normalize address
address := strings.ToLower(strings.TrimSpace(row[addressColumn]))
if address == "" {
// Get and normalize address from CSV
csvAddress := normalizeAddress(row[addressColumn])
if csvAddress == "" {
result.ErrorCount++
result.ErrorMessages = append(result.ErrorMessages,
fmt.Sprintf("Row %d: Empty address", i+2))
continue
}
// Check if address exists in database
var addressID int
var currentStatus bool
err := models.DB.QueryRow(`
SELECT address_id, visited_validated
FROM address_database
WHERE LOWER(TRIM(address)) = $1
`, address).Scan(&addressID, &currentStatus)
// Find best matches using fuzzy string matching
matches := findBestMatches(csvAddress, dbAddresses, 5) // Get top 5 matches
if err != nil {
// Address not found
if len(matches) == 0 {
result.NotFoundCount++
result.NotFoundAddresses = append(result.NotFoundAddresses, address)
result.NotFoundAddresses = append(result.NotFoundAddresses, csvAddress)
continue
}
// Address found - update validation status if not already validated
if !currentStatus {
_, err = models.DB.Exec(`
UPDATE address_database
SET visited_validated = true, updated_at = NOW()
WHERE address_id = $1
`, addressID)
// Get the best match
bestMatch := matches[0]
// Check if the best match meets our similarity threshold
if bestMatch.SimilarityScore < threshold {
result.ErrorCount++
result.ErrorMessages = append(result.ErrorMessages,
fmt.Sprintf("Row %d: No good match found for '%s' (best match: '%s' with score %.2f, threshold: %.2f)",
i+2, csvAddress, bestMatch.Address, bestMatch.SimilarityScore, threshold))
continue
}
// Update validation status if not already validated
if !bestMatch.CurrentStatus {
err = updateAddressValidation(bestMatch.AddressID)
if err != nil {
result.ErrorCount++
result.ErrorMessages = append(result.ErrorMessages,
fmt.Sprintf("Row %d: Database update error for address '%s'", i+2, address))
log.Printf("Error updating address %d: %v", addressID, err)
fmt.Sprintf("Row %d: Database update error for address '%s'", i+2, csvAddress))
log.Printf("Error updating address %d: %v", bestMatch.AddressID, err)
continue
}
result.ValidatedCount++
result.ValidatedAddresses = append(result.ValidatedAddresses, address)
matchInfo := fmt.Sprintf("%s → %s (score: %.2f)", csvAddress, bestMatch.Address, bestMatch.SimilarityScore)
result.ValidatedAddresses = append(result.ValidatedAddresses, matchInfo)
} else {
// Address was already validated - still count as validated
// Address was already validated
result.ValidatedCount++
result.ValidatedAddresses = append(result.ValidatedAddresses, address+" (already validated)")
matchInfo := fmt.Sprintf("%s → %s (score: %.2f, already validated)", csvAddress, bestMatch.Address, bestMatch.SimilarityScore)
result.ValidatedAddresses = append(result.ValidatedAddresses, matchInfo)
}
// Add fuzzy match info if it's not an exact match
if bestMatch.SimilarityScore < 1.0 {
fuzzyInfo := fmt.Sprintf("CSV: '%s' matched to DB: '%s' (similarity: %.2f)",
csvAddress, bestMatch.Address, bestMatch.SimilarityScore)
result.FuzzyMatches = append(result.FuzzyMatches, fuzzyInfo)
}
}
return result
}
// Optional: Keep the export function if you need it
// ExportValidatedAddressesHandler exports validated addresses to CSV
func ExportValidatedAddressesHandler(w http.ResponseWriter, r *http.Request) {
// Query validated addresses
// normalizeAddress trims spaces and converts to lowercase
func normalizeAddress(address string) string {
return strings.ToLower(strings.TrimSpace(address))
}
// loadAllAddressesFromDB loads all addresses from the database for fuzzy matching
func loadAllAddressesFromDB() ([]AddressMatch, error) {
rows, err := models.DB.Query(`
SELECT
a.address_id,
a.address,
a.street_name,
a.street_type,
a.street_quadrant,
a.house_number,
COALESCE(a.house_alpha, '') as house_alpha,
a.longitude,
a.latitude,
a.visited_validated,
a.created_at,
a.updated_at,
CASE
WHEN ap.sched_id IS NOT NULL THEN true
ELSE false
END as assigned,
COALESCE(u.first_name || ' ' || u.last_name, '') as user_name,
COALESCE(u.email, '') as user_email,
COALESCE(ap.appointment_date::text, '') as appointment_date,
COALESCE(ap.appointment_time::text, '') as appointment_time
FROM address_database a
LEFT JOIN appointment ap ON a.address_id = ap.address_id
LEFT JOIN users u ON ap.user_id = u.user_id
WHERE a.visited_validated = true
ORDER BY a.updated_at DESC
SELECT address_id, address, visited_validated
FROM address_database
`)
if err != nil {
log.Println("Export query error:", err)
http.Error(w, "Database error", http.StatusInternalServerError)
return
return nil, err
}
defer rows.Close()
// Set response headers for CSV download
filename := fmt.Sprintf("validated_addresses_%s.csv", time.Now().Format("2006-01-02_15-04-05"))
w.Header().Set("Content-Type", "text/csv")
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", filename))
// Create CSV writer
writer := csv.NewWriter(w)
defer writer.Flush()
// Write header
header := []string{
"Address ID", "Address", "Street Name", "Street Type", "Street Quadrant",
"House Number", "House Alpha", "Longitude", "Latitude", "Validated",
"Created At", "Updated At", "Assigned", "Assigned User", "User Email",
"Appointment Date", "Appointment Time",
var addresses []AddressMatch
for rows.Next() {
var match AddressMatch
var rawAddress string
err := rows.Scan(&match.AddressID, &rawAddress, &match.CurrentStatus)
if err != nil {
log.Printf("Error scanning address row: %v", err)
continue
}
// Normalize the address from database
match.Address = normalizeAddress(rawAddress)
addresses = append(addresses, match)
}
writer.Write(header)
// Write data rows (you'll need to define AddressWithDetails struct)
// Implementation depends on your existing struct definitions
return addresses, rows.Err()
}
// findBestMatches finds the top N best matches for a given address
func findBestMatches(csvAddress string, dbAddresses []AddressMatch, topN int) []AddressMatch {
// Calculate similarity scores for all addresses
var matches []AddressMatch
for _, dbAddr := range dbAddresses {
score := calculateSimilarity(csvAddress, dbAddr.Address)
match := AddressMatch{
AddressID: dbAddr.AddressID,
Address: dbAddr.Address,
CurrentStatus: dbAddr.CurrentStatus,
SimilarityScore: score,
}
matches = append(matches, match)
}
// Sort by similarity score (descending)
sort.Slice(matches, func(i, j int) bool {
return matches[i].SimilarityScore > matches[j].SimilarityScore
})
// Return top N matches
if len(matches) > topN {
return matches[:topN]
}
return matches
}
// calculateSimilarity calculates Levenshtein distance-based similarity score
func calculateSimilarity(s1, s2 string) float64 {
if s1 == s2 {
return 1.0
}
distance := levenshteinDistance(s1, s2)
maxLen := max(len(s1), len(s2))
if maxLen == 0 {
return 1.0
}
similarity := 1.0 - float64(distance)/float64(maxLen)
return max(0.0, similarity)
}
// levenshteinDistance calculates the Levenshtein distance between two strings
func levenshteinDistance(s1, s2 string) int {
if len(s1) == 0 {
return len(s2)
}
if len(s2) == 0 {
return len(s1)
}
// Create a matrix
matrix := make([][]int, len(s1)+1)
for i := range matrix {
matrix[i] = make([]int, len(s2)+1)
}
// Initialize first row and column
for i := 0; i <= len(s1); i++ {
matrix[i][0] = i
}
for j := 0; j <= len(s2); j++ {
matrix[0][j] = j
}
// Fill the matrix
for i := 1; i <= len(s1); i++ {
for j := 1; j <= len(s2); j++ {
cost := 0
if s1[i-1] != s2[j-1] {
cost = 1
}
matrix[i][j] = min(
matrix[i-1][j]+1, // deletion
matrix[i][j-1]+1, // insertion
matrix[i-1][j-1]+cost, // substitution
)
}
}
return matrix[len(s1)][len(s2)]
}
// updateAddressValidation updates an address validation status
func updateAddressValidation(addressID int) error {
_, err := models.DB.Exec(`
UPDATE address_database
SET visited_validated = true, updated_at = NOW()
WHERE address_id = $1
`, addressID)
return err
}
// Helper functions for different types
func minInt(a, b int) int {
if a < b {
return a
}
return b
}
func maxInt(a, b int) int {
if a > b {
return a
}
return b
}
func maxFloat64(a, b float64) float64 {
if a > b {
return a
}
return b
}