update
This commit is contained in:
221
recommondation-engine/example1.py
Normal file
221
recommondation-engine/example1.py
Normal file
@@ -0,0 +1,221 @@
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
|
||||
'''
|
||||
Recommender system using content-based filtering
|
||||
'''
|
||||
class Recommender:
|
||||
def __init__(self):
|
||||
# Initialize data structures
|
||||
self.products_df = None
|
||||
self.user_profiles = {}
|
||||
self.tfidf_matrix = None
|
||||
self.tfidf_vectorizer = None
|
||||
self.product_indices = None
|
||||
|
||||
def load_products(self, products_data):
|
||||
"""
|
||||
Load product data into the recommender system
|
||||
|
||||
products_data: list of dictionaries with product info (id, name, description, category, etc.)
|
||||
"""
|
||||
self.products_df = pd.DataFrame(products_data)
|
||||
|
||||
# Create a text representation for each product (combining various features)
|
||||
self.products_df['content'] = (
|
||||
self.products_df['category'] + ' ' +
|
||||
self.products_df['name'] + ' ' +
|
||||
self.products_df['description']
|
||||
)
|
||||
|
||||
# Initialize TF-IDF vectorizer to convert text to vectors
|
||||
self.tfidf_vectorizer = TfidfVectorizer(
|
||||
stop_words='english',
|
||||
max_features=5000, # Limit features to avoid sparse matrices
|
||||
ngram_range=(1, 2) # Use both unigrams and bigrams
|
||||
)
|
||||
|
||||
# Compute TF-IDF matrix
|
||||
self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.products_df['content'])
|
||||
|
||||
# Create a mapping from product_id to index
|
||||
self.product_indices = pd.Series(
|
||||
self.products_df.index,
|
||||
index=self.products_df['product_id']
|
||||
).drop_duplicates()
|
||||
|
||||
def track_user_click(self, user_id, product_id):
|
||||
"""
|
||||
Track user clicks on products to build user profiles
|
||||
"""
|
||||
if user_id not in self.user_profiles:
|
||||
self.user_profiles[user_id] = {
|
||||
'clicks': {},
|
||||
'category_weights': {
|
||||
'electronics': 0,
|
||||
'school supplies': 0,
|
||||
'rental place': 0,
|
||||
'furniture': 0
|
||||
}
|
||||
}
|
||||
|
||||
# Get the clicked product's index and details
|
||||
if product_id in self.product_indices:
|
||||
product_idx = self.product_indices[product_id]
|
||||
product_category = self.products_df.iloc[product_idx]['category']
|
||||
|
||||
# Update click count
|
||||
if product_id in self.user_profiles[user_id]['clicks']:
|
||||
self.user_profiles[user_id]['clicks'][product_id] += 1
|
||||
else:
|
||||
self.user_profiles[user_id]['clicks'][product_id] = 1
|
||||
|
||||
# Update category weight
|
||||
self.user_profiles[user_id]['category_weights'][product_category] += 1
|
||||
|
||||
def get_user_profile_vector(self, user_id):
|
||||
"""
|
||||
Generate a user profile vector based on their click history
|
||||
"""
|
||||
if user_id not in self.user_profiles or not self.user_profiles[user_id]['clicks']:
|
||||
# Return a zero vector if no click history
|
||||
return np.zeros((1, self.tfidf_matrix.shape[1]))
|
||||
|
||||
# Create a weighted average of all clicked products' TF-IDF vectors
|
||||
clicked_product_vectors = []
|
||||
weights = []
|
||||
|
||||
for product_id, click_count in self.user_profiles[user_id]['clicks'].items():
|
||||
if product_id in self.product_indices:
|
||||
product_idx = self.product_indices[product_id]
|
||||
product_category = self.products_df.iloc[product_idx]['category']
|
||||
category_weight = self.user_profiles[user_id]['category_weights'][product_category]
|
||||
|
||||
# Weight is based on both click count and category preference
|
||||
weight = click_count * (1 + 0.5 * category_weight)
|
||||
weights.append(weight)
|
||||
clicked_product_vectors.append(self.tfidf_matrix[product_idx])
|
||||
|
||||
# Normalize weights
|
||||
weights = np.array(weights) / np.sum(weights)
|
||||
|
||||
# Compute weighted average
|
||||
user_profile = np.zeros((1, self.tfidf_matrix.shape[1]))
|
||||
for i, vector in enumerate(clicked_product_vectors):
|
||||
user_profile += weights[i] * vector.toarray()
|
||||
|
||||
return user_profile
|
||||
|
||||
def recommend_products(self, user_id, n=5, category_filter=None):
|
||||
"""
|
||||
Recommend products to a user based on their profile
|
||||
|
||||
user_id: ID of the user
|
||||
n: Number of recommendations to return
|
||||
category_filter: Optional filter to limit recommendations to a specific category
|
||||
"""
|
||||
# Get user profile vector
|
||||
user_profile = self.get_user_profile_vector(user_id)
|
||||
|
||||
# If user has no profile, recommend popular products (not implemented)
|
||||
if np.sum(user_profile) == 0:
|
||||
return self._get_popular_products(n, category_filter)
|
||||
|
||||
# Calculate similarity scores
|
||||
sim_scores = cosine_similarity(user_profile, self.tfidf_matrix)
|
||||
sim_scores = sim_scores.flatten()
|
||||
|
||||
# Create a DataFrame for easier filtering
|
||||
recommendations_df = pd.DataFrame({
|
||||
'product_id': self.products_df['product_id'],
|
||||
'score': sim_scores,
|
||||
'category': self.products_df['category']
|
||||
})
|
||||
|
||||
# Filter out products that the user has already clicked on
|
||||
if user_id in self.user_profiles and self.user_profiles[user_id]['clicks']:
|
||||
clicked_products = list(self.user_profiles[user_id]['clicks'].keys())
|
||||
recommendations_df = recommendations_df[~recommendations_df['product_id'].isin(clicked_products)]
|
||||
|
||||
# Apply category filter if provided
|
||||
if category_filter:
|
||||
recommendations_df = recommendations_df[recommendations_df['category'] == category_filter]
|
||||
|
||||
# Sort by similarity score and get top n recommendations
|
||||
recommendations_df = recommendations_df.sort_values('score', ascending=False).head(n)
|
||||
|
||||
# Return recommended product IDs
|
||||
return recommendations_df['product_id'].tolist()
|
||||
|
||||
def _get_popular_products(self, n=5, category_filter=None):
|
||||
"""
|
||||
Return popular products when a user has no profile
|
||||
(This would typically be implemented with actual popularity metrics)
|
||||
"""
|
||||
filtered_df = self.products_df
|
||||
|
||||
if category_filter:
|
||||
filtered_df = filtered_df[filtered_df['category'] == category_filter]
|
||||
|
||||
# Just return random products for now (in a real system you'd use popularity metrics)
|
||||
if len(filtered_df) >= n:
|
||||
return filtered_df.sample(n)['product_id'].tolist()
|
||||
else:
|
||||
return filtered_df['product_id'].tolist()
|
||||
|
||||
def recommend_by_category_preference(self, user_id, n=5):
|
||||
"""
|
||||
Recommend products based primarily on the user's category preferences
|
||||
"""
|
||||
if user_id not in self.user_profiles:
|
||||
return self._get_popular_products(n)
|
||||
|
||||
# Get the user's most clicked category
|
||||
category_weights = self.user_profiles[user_id]['category_weights']
|
||||
|
||||
# If no category has been clicked, return popular products
|
||||
if sum(category_weights.values()) == 0:
|
||||
return self._get_popular_products(n)
|
||||
|
||||
# Sort categories by number of clicks
|
||||
sorted_categories = sorted(
|
||||
category_weights.items(),
|
||||
key=lambda x: x[1],
|
||||
reverse=True
|
||||
)
|
||||
|
||||
recommendations = []
|
||||
remaining = n
|
||||
|
||||
# Allocate recommendations proportionally across categories
|
||||
for category, weight in sorted_categories:
|
||||
if weight > 0:
|
||||
# Allocate recommendations proportionally to category weight
|
||||
category_allocation = max(1, int(remaining * (weight / sum(category_weights.values()))))
|
||||
if category_allocation > remaining:
|
||||
category_allocation = remaining
|
||||
|
||||
# Get recommendations for this category
|
||||
category_recs = self.recommend_products(user_id, category_allocation, category)
|
||||
recommendations.extend(category_recs)
|
||||
|
||||
# Update remaining slots
|
||||
remaining -= len(category_recs)
|
||||
|
||||
if remaining <= 0:
|
||||
break
|
||||
|
||||
# If we still have slots to fill, add general recommendations
|
||||
if remaining > 0:
|
||||
general_recs = self.recommend_products(user_id, remaining)
|
||||
# Filter out duplicates
|
||||
general_recs = [rec for rec in general_recs if rec not in recommendations]
|
||||
recommendations.extend(general_recs[:remaining])
|
||||
|
||||
return recommendations
|
||||
|
||||
|
||||
exported = Recommender()
|
||||
Reference in New Issue
Block a user