update
This commit is contained in:
@@ -298,10 +298,7 @@ function App() {
|
||||
// Save to localStorage to persist across refreshes
|
||||
sessionStorage.setItem("isAuthenticated", "true");
|
||||
sessionStorage.setItem("user", JSON.stringify(userObj));
|
||||
|
||||
// After successful signup, send session data to server
|
||||
sendSessionDataToServer(); // Call it after signup
|
||||
|
||||
sessionStorage.getItem("user");
|
||||
|
||||
console.log("Login successful for:", userData.email);
|
||||
@@ -388,7 +385,7 @@ function App() {
|
||||
console.log("Sending user data to the server:", requestData);
|
||||
|
||||
// Send data to Python server (replace with your actual server URL)
|
||||
const response = await fetch("http://localhost:5000/api/user/session", {
|
||||
const response = await fetch("http://0.0.0.0:5000/api/user/session", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
|
||||
BIN
recommondation-engine/__pycache__/server.cpython-313.pyc
Normal file
BIN
recommondation-engine/__pycache__/server.cpython-313.pyc
Normal file
Binary file not shown.
@@ -1,95 +1,272 @@
|
||||
# pip install mysql.connector
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
import mysql.connector as db_con
|
||||
|
||||
import mysql.connector
|
||||
# Flask app initialization
|
||||
app = Flask(__name__)
|
||||
CORS(app, resources={r"/*": {"origins": "*"}}, supports_credentials=True)
|
||||
|
||||
def database():
|
||||
db_connection = mysql.connector.connect(
|
||||
host = "localhost",
|
||||
port = "3306",
|
||||
user = "root",
|
||||
database = "Marketplace"
|
||||
# Database connection setup
|
||||
def get_db_connection():
|
||||
return db_con.connect(
|
||||
host="localhost",
|
||||
port=3306,
|
||||
user="root",
|
||||
database="Marketplace"
|
||||
)
|
||||
return db_connection
|
||||
|
||||
|
||||
# Fetch all products with category names
|
||||
def get_all_products():
|
||||
|
||||
db_con = database()
|
||||
cursor = db_con.cursor()
|
||||
|
||||
# query the category Table for everything
|
||||
cursor.execute("SELECT CategoryID FROM Category")
|
||||
categories = cursor.fetchall()
|
||||
|
||||
select_clause = "SELECT p.ProductID"
|
||||
for category in categories:
|
||||
category_id = category[0] # get the uid of the catefory and then append that to the new column
|
||||
select_clause += f", MAX(CASE WHEN pc.CategoryID = {category_id} THEN 1 ELSE 0 END) AS `Cat_{category_id}`"
|
||||
|
||||
final_query = f"""
|
||||
{select_clause}
|
||||
query = """
|
||||
SELECT p.ProductID, p.Name, p.Description, c.Name AS Category
|
||||
FROM Product p
|
||||
LEFT JOIN Product_Category pc ON p.ProductID = pc.ProductID
|
||||
LEFT JOIN Category c ON pc.CategoryID = c.CategoryID
|
||||
GROUP BY p.ProductID;
|
||||
"""
|
||||
JOIN Category c ON p.CategoryID = c.CategoryID
|
||||
"""
|
||||
|
||||
cursor.execute(final_query)
|
||||
results = cursor.fetchall()
|
||||
|
||||
print(results[1])
|
||||
final = []
|
||||
for row in results:
|
||||
final.append(row)
|
||||
print(row)
|
||||
|
||||
cursor.close()
|
||||
db_con.close()
|
||||
return final
|
||||
try:
|
||||
connection = get_db_connection()
|
||||
cursor = connection.cursor(dictionary=True)
|
||||
cursor.execute(query)
|
||||
products = cursor.fetchall()
|
||||
cursor.close()
|
||||
connection.close()
|
||||
return products
|
||||
except Exception as e:
|
||||
print(f"Database error getting products: {e}")
|
||||
return []
|
||||
|
||||
# Fetch user history
|
||||
def get_user_history(user_id):
|
||||
db_con = database()
|
||||
cursor = db_con.cursor()
|
||||
query = """
|
||||
SELECT p.ProductID, p.Name, p.Description, c.Name AS Category
|
||||
FROM History h
|
||||
JOIN Product p ON h.ProductID = p.ProductID
|
||||
JOIN Category c ON p.CategoryID = c.CategoryID
|
||||
WHERE h.UserID = %s
|
||||
"""
|
||||
try:
|
||||
connection = get_db_connection()
|
||||
cursor = connection.cursor(dictionary=True)
|
||||
cursor.execute(query, (user_id,))
|
||||
history = cursor.fetchall()
|
||||
cursor.close()
|
||||
connection.close()
|
||||
return history
|
||||
except Exception as e:
|
||||
print(f"Error getting user history: {e}")
|
||||
return []
|
||||
|
||||
cursor.execute("SELECT CategoryID FROM Category")
|
||||
categories = cursor.fetchall()
|
||||
# Store recommendations
|
||||
def store_user_recommendations(user_id, recommendations):
|
||||
delete_query = "DELETE FROM Recommendation WHERE UserID = %s"
|
||||
insert_query = "INSERT INTO Recommendation (UserID, RecommendedProductID) VALUES (%s, %s)"
|
||||
|
||||
select_clause = "SELECT p.ProductID"
|
||||
for category in categories:
|
||||
category_id = category[0] # get the uid of the catefory and then append that to the new column
|
||||
select_clause += f", MAX(CASE WHEN pc.CategoryID = {category_id} THEN 1 ELSE 0 END) AS `Cat_{category_id}`"
|
||||
try:
|
||||
connection = get_db_connection()
|
||||
cursor = connection.cursor()
|
||||
|
||||
final_query = f"""
|
||||
{select_clause}
|
||||
FROM Product p
|
||||
LEFT JOIN Product_Category pc ON p.ProductID = pc.ProductID
|
||||
LEFT JOIN Category c ON pc.CategoryID = c.CategoryID
|
||||
where p.ProductID in (select ProductID from History where UserID = {user_id})
|
||||
GROUP BY p.ProductID;
|
||||
# First delete existing recommendations
|
||||
cursor.execute(delete_query, (user_id,))
|
||||
|
||||
# Then insert new recommendations
|
||||
for product_id in recommendations:
|
||||
cursor.execute(insert_query, (user_id, product_id))
|
||||
|
||||
connection.commit()
|
||||
cursor.close()
|
||||
connection.close()
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error storing recommendations: {e}")
|
||||
return False
|
||||
|
||||
# Fetch stored recommendations
|
||||
def get_stored_recommendations(user_id):
|
||||
query = """
|
||||
SELECT p.ProductID, p.Name, p.Description, c.Name AS Category
|
||||
FROM Recommendation r
|
||||
JOIN Product p ON r.RecommendedProductID = p.ProductID
|
||||
JOIN Category c ON p.CategoryID = c.CategoryID
|
||||
WHERE r.UserID = %s
|
||||
"""
|
||||
try:
|
||||
connection = get_db_connection()
|
||||
cursor = connection.cursor(dictionary=True)
|
||||
cursor.execute(query, (user_id,))
|
||||
recommendations = cursor.fetchall()
|
||||
cursor.close()
|
||||
connection.close()
|
||||
return recommendations
|
||||
except Exception as e:
|
||||
print(f"Error getting stored recommendations: {e}")
|
||||
return []
|
||||
|
||||
# Initialize Recommender class
|
||||
class Recommender:
|
||||
def __init__(self):
|
||||
self.products_df = None
|
||||
self.tfidf_matrix = None
|
||||
self.tfidf_vectorizer = None
|
||||
self.product_indices = None
|
||||
|
||||
def load_products(self, products_data):
|
||||
self.products_df = pd.DataFrame(products_data)
|
||||
|
||||
# Combine relevant features for content-based filtering
|
||||
self.products_df['content'] = (
|
||||
self.products_df['Category'] + ' ' +
|
||||
self.products_df['Name'] + ' ' +
|
||||
self.products_df['Description'].fillna('')
|
||||
)
|
||||
|
||||
# Create TF-IDF matrix
|
||||
self.tfidf_vectorizer = TfidfVectorizer(
|
||||
stop_words='english',
|
||||
max_features=5000,
|
||||
ngram_range=(1, 2)
|
||||
)
|
||||
self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.products_df['content'])
|
||||
|
||||
# Map product IDs to indices for quick lookup
|
||||
self.product_indices = pd.Series(
|
||||
self.products_df.index,
|
||||
index=self.products_df['ProductID']
|
||||
).drop_duplicates()
|
||||
|
||||
def recommend_products_for_user(self, user_id, top_n=40):
|
||||
"""
|
||||
Generate product recommendations based on user history using cosine similarity
|
||||
"""
|
||||
# Get user history
|
||||
user_history = get_user_history(user_id)
|
||||
|
||||
cursor.execute(final_query)
|
||||
results = cursor.fetchall()
|
||||
# If no history, return popular products
|
||||
if not user_history:
|
||||
# In a real system, you might return popular products here
|
||||
return self.recommend_popular_products(top_n)
|
||||
|
||||
print(results[1])
|
||||
final = []
|
||||
for row in results:
|
||||
final.append(row)
|
||||
print(row)
|
||||
# Convert user history to DataFrame
|
||||
history_df = pd.DataFrame(user_history)
|
||||
|
||||
cursor.close()
|
||||
db_con.close()
|
||||
return final
|
||||
# Get indices of products in user history
|
||||
history_indices = []
|
||||
for product_id in history_df['ProductID']:
|
||||
if product_id in self.product_indices:
|
||||
history_indices.append(self.product_indices[product_id])
|
||||
|
||||
print("all products:")
|
||||
get_all_products()
|
||||
if not history_indices:
|
||||
return self.recommend_popular_products(top_n)
|
||||
|
||||
print("User History products:")
|
||||
get_user_history(1)
|
||||
# Get TF-IDF vectors for user's history
|
||||
user_profile = self.tfidf_matrix[history_indices].mean(axis=0).reshape(1, -1)
|
||||
|
||||
# Calculate similarity scores
|
||||
similarity_scores = cosine_similarity(user_profile, self.tfidf_matrix)
|
||||
similarity_scores = similarity_scores.flatten()
|
||||
|
||||
def Calculate_cosin_similarity():
|
||||
pass
|
||||
# Create a Series with product indices and similarity scores
|
||||
product_scores = pd.Series(similarity_scores, index=self.products_df.index)
|
||||
|
||||
def Main:
|
||||
pass
|
||||
# Remove products the user has already interacted with
|
||||
product_scores = product_scores.drop(history_indices)
|
||||
|
||||
# Sort by similarity score (highest first)
|
||||
product_scores = product_scores.sort_values(ascending=False)
|
||||
|
||||
# Get top N product indices
|
||||
top_indices = product_scores.iloc[:top_n].index
|
||||
|
||||
# Get product IDs for these indices
|
||||
recommended_product_ids = self.products_df.iloc[top_indices]['ProductID'].tolist()
|
||||
|
||||
return recommended_product_ids
|
||||
|
||||
def recommend_popular_products(self, n=40):
|
||||
"""
|
||||
Fallback recommendation strategy when user has no history
|
||||
In a real system, this would use actual popularity metrics
|
||||
"""
|
||||
# For now, just returning random products
|
||||
return self.products_df.sample(min(n, len(self.products_df)))['ProductID'].tolist()
|
||||
|
||||
# Create recommender instance
|
||||
recommender = Recommender()
|
||||
|
||||
@app.route('/load_products', methods=['GET'])
|
||||
def load_products():
|
||||
products = get_all_products()
|
||||
if not products:
|
||||
return jsonify({'error': 'Failed to load products from database'}), 500
|
||||
|
||||
recommender.load_products(products)
|
||||
return jsonify({'message': 'Products loaded successfully', 'count': len(products)})
|
||||
|
||||
@app.route('/recommend/<int:user_id>', methods=['GET'])
|
||||
def recommend(user_id):
|
||||
# Check if products are loaded
|
||||
if recommender.products_df is None:
|
||||
products = get_all_products()
|
||||
if not products:
|
||||
return jsonify({'error': 'No products available'}), 500
|
||||
recommender.load_products(products)
|
||||
|
||||
# Generate recommendations using cosine similarity
|
||||
recommendations = recommender.recommend_products_for_user(user_id)
|
||||
|
||||
# Store recommendations in database
|
||||
if store_user_recommendations(user_id, recommendations):
|
||||
return jsonify({
|
||||
'userId': user_id,
|
||||
'recommendations': recommendations,
|
||||
'count': len(recommendations)
|
||||
})
|
||||
else:
|
||||
return jsonify({'error': 'Failed to store recommendations'}), 500
|
||||
|
||||
@app.route('/api/user/session', methods=['POST'])
|
||||
def handle_session_data():
|
||||
try:
|
||||
data = request.get_json()
|
||||
print("Received data:", data) # Debug print
|
||||
|
||||
user_id = data.get('userId')
|
||||
email = data.get('email')
|
||||
is_authenticated = data.get('isAuthenticated')
|
||||
|
||||
if not user_id or not email or is_authenticated is None:
|
||||
print("Missing required fields") # Debug print
|
||||
return jsonify({'error': 'Invalid data'}), 400
|
||||
|
||||
print(f"Processing session data: User ID: {user_id}, Email: {email}, Authenticated: {is_authenticated}")
|
||||
|
||||
# Test database connection first
|
||||
try:
|
||||
conn = get_db_connection()
|
||||
conn.close()
|
||||
print("Database connection successful")
|
||||
except Exception as db_err:
|
||||
print(f"Database connection error: {db_err}")
|
||||
return jsonify({'error': f'Database connection error: {str(db_err)}'}), 500
|
||||
|
||||
# Continue with the rest of your code...
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f"Error in handle_session_data: {e}")
|
||||
print(traceback.format_exc()) # Print full stack trace
|
||||
return jsonify({'error': f'Server error: {str(e)}'}), 500
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Load products on startup
|
||||
products = get_all_products()
|
||||
if products:
|
||||
recommender.load_products(products)
|
||||
print(f"Loaded {len(products)} products at startup")
|
||||
else:
|
||||
print("Warning: No products loaded at startup")
|
||||
|
||||
app.run(debug=True, host='0.0.0.0', port=5000)
|
||||
|
||||
221
recommondation-engine/example1.py
Normal file
221
recommondation-engine/example1.py
Normal file
@@ -0,0 +1,221 @@
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
|
||||
'''
|
||||
Recommender system using content-based filtering
|
||||
'''
|
||||
class Recommender:
|
||||
def __init__(self):
|
||||
# Initialize data structures
|
||||
self.products_df = None
|
||||
self.user_profiles = {}
|
||||
self.tfidf_matrix = None
|
||||
self.tfidf_vectorizer = None
|
||||
self.product_indices = None
|
||||
|
||||
def load_products(self, products_data):
|
||||
"""
|
||||
Load product data into the recommender system
|
||||
|
||||
products_data: list of dictionaries with product info (id, name, description, category, etc.)
|
||||
"""
|
||||
self.products_df = pd.DataFrame(products_data)
|
||||
|
||||
# Create a text representation for each product (combining various features)
|
||||
self.products_df['content'] = (
|
||||
self.products_df['category'] + ' ' +
|
||||
self.products_df['name'] + ' ' +
|
||||
self.products_df['description']
|
||||
)
|
||||
|
||||
# Initialize TF-IDF vectorizer to convert text to vectors
|
||||
self.tfidf_vectorizer = TfidfVectorizer(
|
||||
stop_words='english',
|
||||
max_features=5000, # Limit features to avoid sparse matrices
|
||||
ngram_range=(1, 2) # Use both unigrams and bigrams
|
||||
)
|
||||
|
||||
# Compute TF-IDF matrix
|
||||
self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.products_df['content'])
|
||||
|
||||
# Create a mapping from product_id to index
|
||||
self.product_indices = pd.Series(
|
||||
self.products_df.index,
|
||||
index=self.products_df['product_id']
|
||||
).drop_duplicates()
|
||||
|
||||
def track_user_click(self, user_id, product_id):
|
||||
"""
|
||||
Track user clicks on products to build user profiles
|
||||
"""
|
||||
if user_id not in self.user_profiles:
|
||||
self.user_profiles[user_id] = {
|
||||
'clicks': {},
|
||||
'category_weights': {
|
||||
'electronics': 0,
|
||||
'school supplies': 0,
|
||||
'rental place': 0,
|
||||
'furniture': 0
|
||||
}
|
||||
}
|
||||
|
||||
# Get the clicked product's index and details
|
||||
if product_id in self.product_indices:
|
||||
product_idx = self.product_indices[product_id]
|
||||
product_category = self.products_df.iloc[product_idx]['category']
|
||||
|
||||
# Update click count
|
||||
if product_id in self.user_profiles[user_id]['clicks']:
|
||||
self.user_profiles[user_id]['clicks'][product_id] += 1
|
||||
else:
|
||||
self.user_profiles[user_id]['clicks'][product_id] = 1
|
||||
|
||||
# Update category weight
|
||||
self.user_profiles[user_id]['category_weights'][product_category] += 1
|
||||
|
||||
def get_user_profile_vector(self, user_id):
|
||||
"""
|
||||
Generate a user profile vector based on their click history
|
||||
"""
|
||||
if user_id not in self.user_profiles or not self.user_profiles[user_id]['clicks']:
|
||||
# Return a zero vector if no click history
|
||||
return np.zeros((1, self.tfidf_matrix.shape[1]))
|
||||
|
||||
# Create a weighted average of all clicked products' TF-IDF vectors
|
||||
clicked_product_vectors = []
|
||||
weights = []
|
||||
|
||||
for product_id, click_count in self.user_profiles[user_id]['clicks'].items():
|
||||
if product_id in self.product_indices:
|
||||
product_idx = self.product_indices[product_id]
|
||||
product_category = self.products_df.iloc[product_idx]['category']
|
||||
category_weight = self.user_profiles[user_id]['category_weights'][product_category]
|
||||
|
||||
# Weight is based on both click count and category preference
|
||||
weight = click_count * (1 + 0.5 * category_weight)
|
||||
weights.append(weight)
|
||||
clicked_product_vectors.append(self.tfidf_matrix[product_idx])
|
||||
|
||||
# Normalize weights
|
||||
weights = np.array(weights) / np.sum(weights)
|
||||
|
||||
# Compute weighted average
|
||||
user_profile = np.zeros((1, self.tfidf_matrix.shape[1]))
|
||||
for i, vector in enumerate(clicked_product_vectors):
|
||||
user_profile += weights[i] * vector.toarray()
|
||||
|
||||
return user_profile
|
||||
|
||||
def recommend_products(self, user_id, n=5, category_filter=None):
|
||||
"""
|
||||
Recommend products to a user based on their profile
|
||||
|
||||
user_id: ID of the user
|
||||
n: Number of recommendations to return
|
||||
category_filter: Optional filter to limit recommendations to a specific category
|
||||
"""
|
||||
# Get user profile vector
|
||||
user_profile = self.get_user_profile_vector(user_id)
|
||||
|
||||
# If user has no profile, recommend popular products (not implemented)
|
||||
if np.sum(user_profile) == 0:
|
||||
return self._get_popular_products(n, category_filter)
|
||||
|
||||
# Calculate similarity scores
|
||||
sim_scores = cosine_similarity(user_profile, self.tfidf_matrix)
|
||||
sim_scores = sim_scores.flatten()
|
||||
|
||||
# Create a DataFrame for easier filtering
|
||||
recommendations_df = pd.DataFrame({
|
||||
'product_id': self.products_df['product_id'],
|
||||
'score': sim_scores,
|
||||
'category': self.products_df['category']
|
||||
})
|
||||
|
||||
# Filter out products that the user has already clicked on
|
||||
if user_id in self.user_profiles and self.user_profiles[user_id]['clicks']:
|
||||
clicked_products = list(self.user_profiles[user_id]['clicks'].keys())
|
||||
recommendations_df = recommendations_df[~recommendations_df['product_id'].isin(clicked_products)]
|
||||
|
||||
# Apply category filter if provided
|
||||
if category_filter:
|
||||
recommendations_df = recommendations_df[recommendations_df['category'] == category_filter]
|
||||
|
||||
# Sort by similarity score and get top n recommendations
|
||||
recommendations_df = recommendations_df.sort_values('score', ascending=False).head(n)
|
||||
|
||||
# Return recommended product IDs
|
||||
return recommendations_df['product_id'].tolist()
|
||||
|
||||
def _get_popular_products(self, n=5, category_filter=None):
|
||||
"""
|
||||
Return popular products when a user has no profile
|
||||
(This would typically be implemented with actual popularity metrics)
|
||||
"""
|
||||
filtered_df = self.products_df
|
||||
|
||||
if category_filter:
|
||||
filtered_df = filtered_df[filtered_df['category'] == category_filter]
|
||||
|
||||
# Just return random products for now (in a real system you'd use popularity metrics)
|
||||
if len(filtered_df) >= n:
|
||||
return filtered_df.sample(n)['product_id'].tolist()
|
||||
else:
|
||||
return filtered_df['product_id'].tolist()
|
||||
|
||||
def recommend_by_category_preference(self, user_id, n=5):
|
||||
"""
|
||||
Recommend products based primarily on the user's category preferences
|
||||
"""
|
||||
if user_id not in self.user_profiles:
|
||||
return self._get_popular_products(n)
|
||||
|
||||
# Get the user's most clicked category
|
||||
category_weights = self.user_profiles[user_id]['category_weights']
|
||||
|
||||
# If no category has been clicked, return popular products
|
||||
if sum(category_weights.values()) == 0:
|
||||
return self._get_popular_products(n)
|
||||
|
||||
# Sort categories by number of clicks
|
||||
sorted_categories = sorted(
|
||||
category_weights.items(),
|
||||
key=lambda x: x[1],
|
||||
reverse=True
|
||||
)
|
||||
|
||||
recommendations = []
|
||||
remaining = n
|
||||
|
||||
# Allocate recommendations proportionally across categories
|
||||
for category, weight in sorted_categories:
|
||||
if weight > 0:
|
||||
# Allocate recommendations proportionally to category weight
|
||||
category_allocation = max(1, int(remaining * (weight / sum(category_weights.values()))))
|
||||
if category_allocation > remaining:
|
||||
category_allocation = remaining
|
||||
|
||||
# Get recommendations for this category
|
||||
category_recs = self.recommend_products(user_id, category_allocation, category)
|
||||
recommendations.extend(category_recs)
|
||||
|
||||
# Update remaining slots
|
||||
remaining -= len(category_recs)
|
||||
|
||||
if remaining <= 0:
|
||||
break
|
||||
|
||||
# If we still have slots to fill, add general recommendations
|
||||
if remaining > 0:
|
||||
general_recs = self.recommend_products(user_id, remaining)
|
||||
# Filter out duplicates
|
||||
general_recs = [rec for rec in general_recs if rec not in recommendations]
|
||||
recommendations.extend(general_recs[:remaining])
|
||||
|
||||
return recommendations
|
||||
|
||||
|
||||
exported = Recommender()
|
||||
Reference in New Issue
Block a user