changing some things for reports section
This commit is contained in:
319
misc-code/postal_get.py
Normal file
319
misc-code/postal_get.py
Normal file
@@ -0,0 +1,319 @@
|
||||
import pandas as pd
|
||||
import requests
|
||||
import time
|
||||
import json
|
||||
from typing import Optional, Dict, Any
|
||||
import logging
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class PostalCodeFetcher:
|
||||
"""Fetch postal codes for Canadian addresses using various geocoding services"""
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, service: str = 'nominatim'):
|
||||
"""
|
||||
Initialize the postal code fetcher
|
||||
|
||||
Args:
|
||||
api_key: API key for paid services (Google, MapBox, etc.)
|
||||
service: Geocoding service to use ('nominatim', 'google', 'mapbox')
|
||||
"""
|
||||
self.api_key = api_key
|
||||
self.service = service.lower()
|
||||
self.session = requests.Session()
|
||||
self.rate_limit_delay = 1.0 # seconds between requests
|
||||
|
||||
def format_address(self, row: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Format address from CSV row data
|
||||
|
||||
Args:
|
||||
row: Dictionary containing address components
|
||||
|
||||
Returns:
|
||||
Formatted address string
|
||||
"""
|
||||
# Extract components
|
||||
house_num = str(row.get('HOUSE_NUMBER', '')).strip()
|
||||
house_alpha = str(row.get('HOUSE_ALPHA', '')).strip()
|
||||
street_name = str(row.get('STREET_NAME', '')).strip()
|
||||
street_type = str(row.get('STREET_TYPE', '')).strip()
|
||||
street_quad = str(row.get('STREET_QUAD', '')).strip()
|
||||
|
||||
# Build address
|
||||
address_parts = []
|
||||
|
||||
# House number and alpha
|
||||
house_part = house_num
|
||||
if house_alpha and house_alpha != 'nan':
|
||||
house_part += house_alpha
|
||||
if house_part:
|
||||
address_parts.append(house_part)
|
||||
|
||||
# Street name and type
|
||||
street_part = street_name
|
||||
if street_type and street_type != 'nan' and street_type.upper() not in street_name.upper():
|
||||
street_part += f" {street_type}"
|
||||
if street_part:
|
||||
address_parts.append(street_part)
|
||||
|
||||
# Quadrant
|
||||
if street_quad and street_quad != 'nan':
|
||||
address_parts.append(street_quad)
|
||||
|
||||
# Add city and province
|
||||
address_parts.extend(["Calgary", "AB", "Canada"])
|
||||
|
||||
return ", ".join(address_parts)
|
||||
|
||||
def get_postal_code_nominatim(self, address: str, lat: float = None, lon: float = None) -> Optional[str]:
|
||||
"""
|
||||
Get postal code using OpenStreetMap Nominatim (free)
|
||||
|
||||
Args:
|
||||
address: Full address string
|
||||
lat: Latitude (optional, for reverse geocoding)
|
||||
lon: Longitude (optional, for reverse geocoding)
|
||||
|
||||
Returns:
|
||||
Postal code if found, None otherwise
|
||||
"""
|
||||
try:
|
||||
# Try reverse geocoding first if coordinates available
|
||||
if lat and lon:
|
||||
url = "https://nominatim.openstreetmap.org/reverse"
|
||||
params = {
|
||||
'format': 'json',
|
||||
'lat': lat,
|
||||
'lon': lon,
|
||||
'zoom': 18,
|
||||
'addressdetails': 1
|
||||
}
|
||||
else:
|
||||
# Forward geocoding
|
||||
url = "https://nominatim.openstreetmap.org/search"
|
||||
params = {
|
||||
'q': address,
|
||||
'format': 'json',
|
||||
'addressdetails': 1,
|
||||
'limit': 1,
|
||||
'countrycodes': 'ca'
|
||||
}
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'PostalCodeFetcher/1.0 (your-email@domain.com)'
|
||||
}
|
||||
|
||||
response = self.session.get(url, params=params, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
if not data:
|
||||
return None
|
||||
|
||||
# Extract postal code
|
||||
if isinstance(data, list):
|
||||
result = data[0] if data else {}
|
||||
else:
|
||||
result = data
|
||||
|
||||
address_details = result.get('address', {})
|
||||
postal_code = address_details.get('postcode')
|
||||
|
||||
return postal_code
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Nominatim error for {address}: {e}")
|
||||
return None
|
||||
|
||||
def get_postal_code_google(self, address: str) -> Optional[str]:
|
||||
"""
|
||||
Get postal code using Google Geocoding API (requires API key)
|
||||
|
||||
Args:
|
||||
address: Full address string
|
||||
|
||||
Returns:
|
||||
Postal code if found, None otherwise
|
||||
"""
|
||||
if not self.api_key:
|
||||
logger.error("Google API key required")
|
||||
return None
|
||||
|
||||
try:
|
||||
url = "https://maps.googleapis.com/maps/api/geocode/json"
|
||||
params = {
|
||||
'address': address,
|
||||
'key': self.api_key,
|
||||
'region': 'ca'
|
||||
}
|
||||
|
||||
response = self.session.get(url, params=params, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
if data.get('status') != 'OK' or not data.get('results'):
|
||||
return None
|
||||
|
||||
result = data['results'][0]
|
||||
|
||||
# Extract postal code from address components
|
||||
for component in result.get('address_components', []):
|
||||
if 'postal_code' in component.get('types', []):
|
||||
return component.get('long_name')
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Google API error for {address}: {e}")
|
||||
return None
|
||||
|
||||
def get_postal_code(self, row: Dict[str, Any]) -> Optional[str]:
|
||||
"""
|
||||
Get postal code for a single address row
|
||||
|
||||
Args:
|
||||
row: Dictionary containing address data
|
||||
|
||||
Returns:
|
||||
Postal code if found, None otherwise
|
||||
"""
|
||||
# Format the address
|
||||
address = self.format_address(row)
|
||||
logger.info(f"Fetching postal code for: {address}")
|
||||
|
||||
# Extract coordinates if available
|
||||
lat = row.get('latitude')
|
||||
lon = row.get('longitude')
|
||||
|
||||
# Convert to float if they're strings
|
||||
try:
|
||||
if lat and str(lat) != 'nan':
|
||||
lat = float(lat)
|
||||
else:
|
||||
lat = None
|
||||
except (ValueError, TypeError):
|
||||
lat = None
|
||||
|
||||
try:
|
||||
if lon and str(lon) != 'nan':
|
||||
lon = float(lon)
|
||||
else:
|
||||
lon = None
|
||||
except (ValueError, TypeError):
|
||||
lon = None
|
||||
|
||||
postal_code = None
|
||||
|
||||
# Try different services
|
||||
if self.service == 'nominatim':
|
||||
postal_code = self.get_postal_code_nominatim(address, lat, lon)
|
||||
elif self.service == 'google':
|
||||
postal_code = self.get_postal_code_google(address)
|
||||
|
||||
# Rate limiting
|
||||
time.sleep(self.rate_limit_delay)
|
||||
|
||||
return postal_code
|
||||
|
||||
def process_csv(self, csv_file_path: str, output_file_path: str = None) -> pd.DataFrame:
|
||||
"""
|
||||
Process CSV file and add postal codes
|
||||
|
||||
Args:
|
||||
csv_file_path: Path to input CSV file
|
||||
output_file_path: Path to output CSV file (optional)
|
||||
|
||||
Returns:
|
||||
DataFrame with postal codes added
|
||||
"""
|
||||
logger.info(f"Processing CSV file: {csv_file_path}")
|
||||
|
||||
# Read CSV
|
||||
df = pd.read_csv(csv_file_path)
|
||||
logger.info(f"Loaded {len(df)} addresses")
|
||||
|
||||
# Add postal code column
|
||||
df['POSTAL_CODE'] = None
|
||||
|
||||
# Process each row
|
||||
for index, row in df.iterrows():
|
||||
try:
|
||||
postal_code = self.get_postal_code(row.to_dict())
|
||||
df.at[index, 'POSTAL_CODE'] = postal_code
|
||||
|
||||
if postal_code:
|
||||
logger.info(f"Row {index + 1}: Found postal code {postal_code}")
|
||||
else:
|
||||
logger.warning(f"Row {index + 1}: No postal code found")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing row {index + 1}: {e}")
|
||||
df.at[index, 'POSTAL_CODE'] = None
|
||||
|
||||
# Save results
|
||||
if output_file_path:
|
||||
df.to_csv(output_file_path, index=False)
|
||||
logger.info(f"Results saved to: {output_file_path}")
|
||||
|
||||
# Summary
|
||||
found_count = df['POSTAL_CODE'].notna().sum()
|
||||
logger.info(f"Successfully found postal codes for {found_count}/{len(df)} addresses")
|
||||
|
||||
return df
|
||||
|
||||
# Example usage functions
|
||||
def fetch_postal_codes_free(csv_file_path: str, output_file_path: str = None) -> pd.DataFrame:
|
||||
"""
|
||||
Fetch postal codes using free Nominatim service
|
||||
|
||||
Args:
|
||||
csv_file_path: Path to CSV file with address data
|
||||
output_file_path: Optional output file path
|
||||
|
||||
Returns:
|
||||
DataFrame with postal codes
|
||||
"""
|
||||
fetcher = PostalCodeFetcher(service='nominatim')
|
||||
return fetcher.process_csv(csv_file_path, output_file_path)
|
||||
|
||||
def fetch_postal_codes_google(csv_file_path: str, api_key: str, output_file_path: str = None) -> pd.DataFrame:
|
||||
"""
|
||||
Fetch postal codes using Google Geocoding API
|
||||
|
||||
Args:
|
||||
csv_file_path: Path to CSV file with address data
|
||||
api_key: Google API key
|
||||
output_file_path: Optional output file path
|
||||
|
||||
Returns:
|
||||
DataFrame with postal codes
|
||||
"""
|
||||
fetcher = PostalCodeFetcher(api_key=api_key, service='google')
|
||||
return fetcher.process_csv(csv_file_path, output_file_path)
|
||||
|
||||
# Sample usage
|
||||
if __name__ == "__main__":
|
||||
# Example 1: Using free Nominatim service
|
||||
df = fetch_postal_codes_free('./Address.csv', 'addresses_with_postal_codes.csv')
|
||||
|
||||
# Example 2: Using Google API (requires API key)
|
||||
# df = fetch_postal_codes_google('addresses.csv', 'YOUR_GOOGLE_API_KEY', 'addresses_with_postal_codes.csv')
|
||||
|
||||
# Example 3: Manual usage
|
||||
#fetcher = PostalCodeFetcher(service='nominatim')
|
||||
|
||||
# Test with sample data
|
||||
#sample_row = {
|
||||
# 'HOUSE_NUMBER': '531',
|
||||
# 'STREET_NAME': 'NORTHMOUNT',
|
||||
# 'STREET_TYPE': 'DR',
|
||||
# 'STREET_QUAD': 'NW',
|
||||
# 'latitude': 51.0893695,
|
||||
# 'longitude': -114.08514
|
||||
#}
|
||||
|
||||
#postal_code = fetcher.get_postal_code(sample_row)
|
||||
#print(f"Postal code: {postal_code}")
|
||||
Reference in New Issue
Block a user