changing some things for reports section

This commit is contained in:
Mann Patel
2025-09-01 17:32:00 -06:00
parent e071ced77f
commit 76744e81cb
16 changed files with 411461 additions and 1587 deletions

319
misc-code/postal_get.py Normal file
View File

@@ -0,0 +1,319 @@
import pandas as pd
import requests
import time
import json
from typing import Optional, Dict, Any
import logging
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class PostalCodeFetcher:
"""Fetch postal codes for Canadian addresses using various geocoding services"""
def __init__(self, api_key: Optional[str] = None, service: str = 'nominatim'):
"""
Initialize the postal code fetcher
Args:
api_key: API key for paid services (Google, MapBox, etc.)
service: Geocoding service to use ('nominatim', 'google', 'mapbox')
"""
self.api_key = api_key
self.service = service.lower()
self.session = requests.Session()
self.rate_limit_delay = 1.0 # seconds between requests
def format_address(self, row: Dict[str, Any]) -> str:
"""
Format address from CSV row data
Args:
row: Dictionary containing address components
Returns:
Formatted address string
"""
# Extract components
house_num = str(row.get('HOUSE_NUMBER', '')).strip()
house_alpha = str(row.get('HOUSE_ALPHA', '')).strip()
street_name = str(row.get('STREET_NAME', '')).strip()
street_type = str(row.get('STREET_TYPE', '')).strip()
street_quad = str(row.get('STREET_QUAD', '')).strip()
# Build address
address_parts = []
# House number and alpha
house_part = house_num
if house_alpha and house_alpha != 'nan':
house_part += house_alpha
if house_part:
address_parts.append(house_part)
# Street name and type
street_part = street_name
if street_type and street_type != 'nan' and street_type.upper() not in street_name.upper():
street_part += f" {street_type}"
if street_part:
address_parts.append(street_part)
# Quadrant
if street_quad and street_quad != 'nan':
address_parts.append(street_quad)
# Add city and province
address_parts.extend(["Calgary", "AB", "Canada"])
return ", ".join(address_parts)
def get_postal_code_nominatim(self, address: str, lat: float = None, lon: float = None) -> Optional[str]:
"""
Get postal code using OpenStreetMap Nominatim (free)
Args:
address: Full address string
lat: Latitude (optional, for reverse geocoding)
lon: Longitude (optional, for reverse geocoding)
Returns:
Postal code if found, None otherwise
"""
try:
# Try reverse geocoding first if coordinates available
if lat and lon:
url = "https://nominatim.openstreetmap.org/reverse"
params = {
'format': 'json',
'lat': lat,
'lon': lon,
'zoom': 18,
'addressdetails': 1
}
else:
# Forward geocoding
url = "https://nominatim.openstreetmap.org/search"
params = {
'q': address,
'format': 'json',
'addressdetails': 1,
'limit': 1,
'countrycodes': 'ca'
}
headers = {
'User-Agent': 'PostalCodeFetcher/1.0 (your-email@domain.com)'
}
response = self.session.get(url, params=params, headers=headers, timeout=10)
response.raise_for_status()
data = response.json()
if not data:
return None
# Extract postal code
if isinstance(data, list):
result = data[0] if data else {}
else:
result = data
address_details = result.get('address', {})
postal_code = address_details.get('postcode')
return postal_code
except Exception as e:
logger.error(f"Nominatim error for {address}: {e}")
return None
def get_postal_code_google(self, address: str) -> Optional[str]:
"""
Get postal code using Google Geocoding API (requires API key)
Args:
address: Full address string
Returns:
Postal code if found, None otherwise
"""
if not self.api_key:
logger.error("Google API key required")
return None
try:
url = "https://maps.googleapis.com/maps/api/geocode/json"
params = {
'address': address,
'key': self.api_key,
'region': 'ca'
}
response = self.session.get(url, params=params, timeout=10)
response.raise_for_status()
data = response.json()
if data.get('status') != 'OK' or not data.get('results'):
return None
result = data['results'][0]
# Extract postal code from address components
for component in result.get('address_components', []):
if 'postal_code' in component.get('types', []):
return component.get('long_name')
return None
except Exception as e:
logger.error(f"Google API error for {address}: {e}")
return None
def get_postal_code(self, row: Dict[str, Any]) -> Optional[str]:
"""
Get postal code for a single address row
Args:
row: Dictionary containing address data
Returns:
Postal code if found, None otherwise
"""
# Format the address
address = self.format_address(row)
logger.info(f"Fetching postal code for: {address}")
# Extract coordinates if available
lat = row.get('latitude')
lon = row.get('longitude')
# Convert to float if they're strings
try:
if lat and str(lat) != 'nan':
lat = float(lat)
else:
lat = None
except (ValueError, TypeError):
lat = None
try:
if lon and str(lon) != 'nan':
lon = float(lon)
else:
lon = None
except (ValueError, TypeError):
lon = None
postal_code = None
# Try different services
if self.service == 'nominatim':
postal_code = self.get_postal_code_nominatim(address, lat, lon)
elif self.service == 'google':
postal_code = self.get_postal_code_google(address)
# Rate limiting
time.sleep(self.rate_limit_delay)
return postal_code
def process_csv(self, csv_file_path: str, output_file_path: str = None) -> pd.DataFrame:
"""
Process CSV file and add postal codes
Args:
csv_file_path: Path to input CSV file
output_file_path: Path to output CSV file (optional)
Returns:
DataFrame with postal codes added
"""
logger.info(f"Processing CSV file: {csv_file_path}")
# Read CSV
df = pd.read_csv(csv_file_path)
logger.info(f"Loaded {len(df)} addresses")
# Add postal code column
df['POSTAL_CODE'] = None
# Process each row
for index, row in df.iterrows():
try:
postal_code = self.get_postal_code(row.to_dict())
df.at[index, 'POSTAL_CODE'] = postal_code
if postal_code:
logger.info(f"Row {index + 1}: Found postal code {postal_code}")
else:
logger.warning(f"Row {index + 1}: No postal code found")
except Exception as e:
logger.error(f"Error processing row {index + 1}: {e}")
df.at[index, 'POSTAL_CODE'] = None
# Save results
if output_file_path:
df.to_csv(output_file_path, index=False)
logger.info(f"Results saved to: {output_file_path}")
# Summary
found_count = df['POSTAL_CODE'].notna().sum()
logger.info(f"Successfully found postal codes for {found_count}/{len(df)} addresses")
return df
# Example usage functions
def fetch_postal_codes_free(csv_file_path: str, output_file_path: str = None) -> pd.DataFrame:
"""
Fetch postal codes using free Nominatim service
Args:
csv_file_path: Path to CSV file with address data
output_file_path: Optional output file path
Returns:
DataFrame with postal codes
"""
fetcher = PostalCodeFetcher(service='nominatim')
return fetcher.process_csv(csv_file_path, output_file_path)
def fetch_postal_codes_google(csv_file_path: str, api_key: str, output_file_path: str = None) -> pd.DataFrame:
"""
Fetch postal codes using Google Geocoding API
Args:
csv_file_path: Path to CSV file with address data
api_key: Google API key
output_file_path: Optional output file path
Returns:
DataFrame with postal codes
"""
fetcher = PostalCodeFetcher(api_key=api_key, service='google')
return fetcher.process_csv(csv_file_path, output_file_path)
# Sample usage
if __name__ == "__main__":
# Example 1: Using free Nominatim service
df = fetch_postal_codes_free('./Address.csv', 'addresses_with_postal_codes.csv')
# Example 2: Using Google API (requires API key)
# df = fetch_postal_codes_google('addresses.csv', 'YOUR_GOOGLE_API_KEY', 'addresses_with_postal_codes.csv')
# Example 3: Manual usage
#fetcher = PostalCodeFetcher(service='nominatim')
# Test with sample data
#sample_row = {
# 'HOUSE_NUMBER': '531',
# 'STREET_NAME': 'NORTHMOUNT',
# 'STREET_TYPE': 'DR',
# 'STREET_QUAD': 'NW',
# 'latitude': 51.0893695,
# 'longitude': -114.08514
#}
#postal_code = fetcher.get_postal_code(sample_row)
#print(f"Postal code: {postal_code}")