Cosine Sim Calc now Working properly

This commit is contained in:
Mann Patel
2025-04-03 11:59:25 -06:00
parent e7580c36f5
commit 99f12319d5
5 changed files with 152 additions and 887 deletions

View File

@@ -1,5 +1,4 @@
-- Inserting sample data into the Marketplace database
-- Clear existing data (if needed)
SET
FOREIGN_KEY_CHECKS = 0;
@@ -59,33 +58,6 @@ VALUES
'hashedpassword2',
'555-234-5678',
'456 Oak Ave, Calgary, AB'
),
(
3,
'Michael Brown',
'michael.b@example.com',
'U345678',
'hashedpassword3',
'555-345-6789',
'789 Pine Rd, Calgary, AB'
),
(
4,
'Sarah Wilson',
'sarah.w@example.com',
'U456789',
'hashedpassword4',
'555-456-7890',
'101 Elm Blvd, Calgary, AB'
),
(
5,
'David Taylor',
'david.t@example.com',
'U567890',
'hashedpassword5',
'555-567-8901',
'202 Maple Dr, Calgary, AB'
);
-- Insert User Roles
@@ -93,10 +65,7 @@ INSERT INTO
UserRole (UserID, Client, Admin)
VALUES
(1, TRUE, TRUE),
(2, TRUE, FALSE),
(3, TRUE, FALSE),
(4, TRUE, FALSE),
(5, TRUE, FALSE);
(2, TRUE, FALSE);
-- Insert Categories
INSERT INTO
@@ -161,7 +130,7 @@ VALUES
'HP Laptop',
699.99,
1,
2,
1,
'2023 HP Pavilion, 16GB RAM, 512GB SSD',
2,
'2024-10-10 14:30:00'
@@ -171,7 +140,7 @@ VALUES
'Dorm Desk',
120.00,
1,
3,
2,
'Sturdy desk perfect for studying, minor scratches',
3,
'2024-10-12 09:15:00'
@@ -181,7 +150,7 @@ VALUES
'University Hoodie',
35.00,
3,
1,
2,
'Size L, university logo, worn twice',
4,
'2024-10-14 16:45:00'
@@ -191,7 +160,7 @@ VALUES
'Basketball',
25.50,
1,
4,
2,
'Slightly used indoor basketball',
5,
'2024-10-11 11:20:00'
@@ -201,7 +170,7 @@ VALUES
'Acoustic Guitar',
175.00,
1,
2,
1,
'Beginner acoustic guitar with case',
6,
'2024-10-09 13:10:00'
@@ -211,7 +180,7 @@ VALUES
'Physics Textbook',
65.00,
2,
5,
2,
'University Physics 14th Edition, good condition',
1,
'2024-10-08 10:30:00'
@@ -221,7 +190,7 @@ VALUES
'Mini Fridge',
85.00,
1,
3,
1,
'Small dorm fridge, works perfectly',
8,
'2024-10-13 15:00:00'
@@ -231,7 +200,7 @@ VALUES
'PlayStation 5 Controller',
55.00,
1,
4,
2,
'Extra controller, barely used',
9,
'2024-10-07 17:20:00'
@@ -241,7 +210,7 @@ VALUES
'Mountain Bike',
350.00,
1,
5,
1,
'Trek mountain bike, great condition, new tires',
10,
'2024-10-06 14:00:00'
@@ -271,7 +240,7 @@ VALUES
'Graphing Calculator',
75.00,
1,
3,
1,
'TI-84 Plus, perfect working condition',
12,
'2024-10-03 11:15:00'
@@ -281,7 +250,7 @@ VALUES
'Yoga Mat',
20.00,
1,
4,
2,
'Thick yoga mat, barely used',
13,
'2024-10-02 16:00:00'
@@ -291,7 +260,7 @@ VALUES
'Winter Jacket',
120.00,
1,
5,
1,
'Columbia winter jacket, size XL, very warm',
26,
'2024-10-01 10:20:00'
@@ -301,7 +270,7 @@ VALUES
'Computer Science Textbook',
70.00,
1,
1,
2,
'Introduction to Algorithms, like new',
1,
'2024-09-30 14:30:00'
@@ -321,7 +290,7 @@ VALUES
'Scientific Calculator',
25.00,
1,
3,
1,
'Casio scientific calculator',
12,
'2024-09-28 11:30:00'
@@ -331,7 +300,7 @@ VALUES
'Bluetooth Speaker',
45.00,
1,
4,
1,
'JBL Bluetooth speaker, great sound',
23,
'2024-09-27 15:45:00'
@@ -341,7 +310,7 @@ VALUES
'Backpack',
40.00,
1,
5,
2,
'North Face backpack, lots of pockets',
22,
'2024-09-26 09:15:00'
@@ -360,7 +329,17 @@ VALUES
('/image1.avif', 7),
('/image1.avif', 8),
('/image1.avif', 9),
('/image1.avif', 10);
('/image1.avif', 10),
('/image1.avif', 11),
('/image1.avif', 12),
('/image1.avif', 13),
('/image1.avif', 14),
('/image1.avif', 15),
('/image1.avif', 16),
('/image1.avif', 17),
('/image1.avif', 18),
('/image1.avif', 19),
('/image1.avif', 20);
-- Insert Product-Category relationships (products with multiple categories)
INSERT INTO
@@ -420,82 +399,20 @@ VALUES
-- Backpack: Backpacks & Bags, School Supplies, Dorm Essentials
-- Insert History records
--
INSERT INTO
History (HistoryID, UserID, ProductID, Date)
History (HistoryID, UserID, ProductID)
VALUES
(1, 1, 1, '2024-10-15 11:30:00'),
(2, 1, 2, '2024-10-14 13:45:00'),
(3, 1, 5, '2024-10-13 09:20:00'),
(4, 1, 4, '2024-10-12 16:10:00');
--
INSERT INTO
History (HistoryID, UserID, ProductID, Date)
VALUES
(1, 2, 1, '2024-10-15 11:30:00'), -- User 2 viewed Calculus Textbook
(2, 3, 2, '2024-10-14 13:45:00'), -- User 3 viewed HP Laptop
(3, 4, 3, '2024-10-13 09:20:00'), -- User 4 viewed Dorm Desk
(4, 5, 4, '2024-10-12 16:10:00'), -- User 5 viewed University Hoodie
(5, 1, 5, '2024-10-11 14:30:00'), -- User 1 viewed Basketball
(6, 2, 6, '2024-10-10 10:15:00'), -- User 2 viewed Acoustic Guitar
(7, 3, 7, '2024-10-09 15:40:00'), -- User 3 viewed Physics Textbook
(8, 4, 8, '2024-10-08 11:25:00'), -- User 4 viewed Mini Fridge
(9, 5, 9, '2024-10-07 17:50:00'), -- User 5 viewed PS5 Controller
(10, 1, 10, '2024-10-06 14:15:00');
-- User 1 viewed Mountain Bike
-- Insert Reviews
INSERT INTO
Review (
ReviewID,
UserID,
ProductID,
Comment,
Rating,
Date
)
VALUES
(
1,
2,
1,
'Great condition, exactly as described!',
5,
'2024-10-16 09:30:00'
),
(
2,
3,
2,
'Works well, but had a small scratch not mentioned in the listing.',
4,
'2024-10-15 14:20:00'
),
(
3,
4,
6,
'Perfect for beginners, sounds great!',
5,
'2024-10-14 11:10:00'
),
(
4,
5,
8,
'Keeps my drinks cold, but a bit noisy at night.',
3,
'2024-10-13 16:45:00'
),
(
5,
1,
10,
'Excellent bike, well maintained!',
5,
'2024-10-12 13:25:00'
);
(1, 1, 1),
(2, 1, 3),
(3, 1, 5),
(4, 1, 7),
(5, 1, 9),
(6, 1, 11),
(7, 2, 2),
(8, 2, 4),
(9, 2, 5),
(10, 1, 15),
(11, 1, 18);
-- Insert Favorites
INSERT INTO
@@ -505,9 +422,9 @@ VALUES
(1, 7), -- User 1 likes Physics Textbook
(2, 3), -- User 2 likes Dorm Desk
(2, 10), -- User 2 likes Mountain Bike
(3, 6), -- User 3 likes Acoustic Guitar
(4, 5), -- User 4 likes Basketball
(5, 8);
(1, 6), -- User 3 likes Acoustic Guitar
(1, 5), -- User 4 likes Basketball
(2, 8);
-- User 5 likes Mini Fridge
-- Insert Transactions
@@ -520,242 +437,8 @@ INSERT INTO
PaymentStatus
)
VALUES
(1, 2, 1, '2024-10-16 10:30:00', 'Completed'),
(2, 3, 6, '2024-10-15 15:45:00', 'Completed'),
(3, 4, 8, '2024-10-14 12:20:00', 'Pending'),
(4, 5, 10, '2024-10-13 17:10:00', 'Completed'),
(5, 1, 4, '2024-10-12 14:30:00', 'Completed');
-- Insert Recommendations
INSERT INTO
Recommendation (RecommendationID_PK, UserID, RecommendedProductID)
VALUES
(1, 1, 7), -- Recommend Physics Textbook to User 1
(2, 1, 13), -- Recommend Graphing Calculator to User 1
(3, 2, 3), -- Recommend Dorm Desk to User 2
(4, 2, 17), -- Recommend Desk Lamp to User 2
(5, 3, 16), -- Recommend CS Textbook to User 3
(6, 4, 14), -- Recommend Yoga Mat to User 4
(7, 5, 15);
INSERT INTO
Recommendation (RecommendationID_PK, UserID, RecommendedProductID)
VALUES
(12, 1, 19),
(13, 1, 9),
(14, 1, 11),
(15, 1, 16),
-- Insert Authentication records
INSERT INTO
AuthVerification (Email, VerificationCode, Authenticated, Date)
VALUES
(
'john.doe@example.com',
'123456',
TRUE,
'2024-10-01 09:00:00'
),
(
'jane.smith@example.com',
'234567',
TRUE,
'2024-10-02 10:15:00'
),
(
'michael.b@example.com',
'345678',
TRUE,
'2024-10-03 11:30:00'
),
(
'sarah.w@example.com',
'456789',
TRUE,
'2024-10-04 12:45:00'
),
(
'david.t@example.com',
'567890',
TRUE,
'2024-10-05 14:00:00'
);
INSERT INTO
Product (
ProductID,
Name,
Description,
Price,
StockQuantity,
CategoryID
)
VALUES
(
101,
'Smart Coffee Maker',
'Wi-Fi enabled coffee machine with scheduling feature',
129.99,
50,
11
),
(
102,
'Ergonomic Office Chair',
'Adjustable mesh chair with lumbar support',
199.99,
35,
12
),
(
103,
'Wireless Mechanical Keyboard',
'RGB-backlit wireless keyboard with mechanical switches',
89.99,
60,
13
),
(
104,
'Portable Solar Charger',
'Foldable solar power bank with USB-C support',
59.99,
40,
14
),
(
105,
'Noise-Canceling Headphones',
'Over-ear Bluetooth headphones with ANC',
179.99,
25,
15
),
(
106,
'Smart Water Bottle',
'Tracks water intake and glows as a hydration reminder',
39.99,
75,
11
),
(
107,
'Compact Air Purifier',
'HEPA filter air purifier for small rooms',
149.99,
30,
16
),
(
108,
'Smart LED Desk Lamp',
'Adjustable LED lamp with voice control',
69.99,
45,
12
),
(
109,
'4K Streaming Device',
'HDMI streaming stick with voice remote',
49.99,
80,
17
),
(
110,
'Smart Plant Monitor',
'Bluetooth-enabled sensor for plant health tracking',
34.99,
55,
18
),
(
111,
'Wireless Charging Pad',
'Fast-charging pad for Qi-compatible devices',
29.99,
90,
13
),
(
112,
'Mini Projector',
'Portable projector with built-in speakers',
129.99,
20,
14
),
(
113,
'Foldable Bluetooth Keyboard',
'Ultra-thin keyboard for travel use',
39.99,
70,
19
),
(
114,
'Smart Alarm Clock',
'AI-powered alarm clock with sunrise simulation',
79.99,
40,
15
),
(
115,
'Touchscreen Toaster',
'Customizable toaster with a digital display',
99.99,
30,
11
),
(
116,
'Cordless Vacuum Cleaner',
'Lightweight handheld vacuum with strong suction',
159.99,
25,
16
),
(
117,
'Smart Bike Lock',
'Fingerprint and app-controlled bike security lock',
89.99,
35,
20
),
(
118,
'Bluetooth Sleep Headband',
'Comfortable sleep headband with built-in speakers',
49.99,
60,
18
),
(
119,
'Retro Game Console',
'Plug-and-play console with 500+ classic games',
79.99,
50,
17
),
(
120,
'Automatic Pet Feeder',
'App-controlled food dispenser for pets',
99.99,
40,
20
);
SELECT
p.*,
i.URL AS image_url
FROM
Product p
LEFT JOIN Image_URL i ON p.ProductID = i.ProductID
WHERE
p.ProductID = 1
(1, 1, 1, '2024-10-16 10:30:00', 'Completed'),
(2, 1, 6, '2024-10-15 15:45:00', 'Completed'),
(3, 1, 8, '2024-10-14 12:20:00', 'Pending'),
(4, 2, 10, '2024-10-13 17:10:00', 'Completed'),
(5, 2, 4, '2024-10-12 14:30:00', 'Completed');

View File

@@ -1,272 +0,0 @@
from flask import Flask, request, jsonify
from flask_cors import CORS
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import mysql.connector as db_con
# Flask app initialization
app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "*"}}, supports_credentials=True)
# Database connection setup
def get_db_connection():
return db_con.connect(
host="localhost",
port=3306,
user="root",
database="Marketplace"
)
# Fetch all products with category names
def get_all_products():
query = """
SELECT p.ProductID, p.Name, p.Description, c.Name AS Category
FROM Product p
JOIN Category c ON p.CategoryID = c.CategoryID
"""
try:
connection = get_db_connection()
cursor = connection.cursor(dictionary=True)
cursor.execute(query)
products = cursor.fetchall()
cursor.close()
connection.close()
return products
except Exception as e:
print(f"Database error getting products: {e}")
return []
# Fetch user history
def get_user_history(user_id):
query = """
SELECT p.ProductID, p.Name, p.Description, c.Name AS Category
FROM History h
JOIN Product p ON h.ProductID = p.ProductID
JOIN Category c ON p.CategoryID = c.CategoryID
WHERE h.UserID = %s
"""
try:
connection = get_db_connection()
cursor = connection.cursor(dictionary=True)
cursor.execute(query, (user_id,))
history = cursor.fetchall()
cursor.close()
connection.close()
return history
except Exception as e:
print(f"Error getting user history: {e}")
return []
# Store recommendations
def store_user_recommendations(user_id, recommendations):
delete_query = "DELETE FROM Recommendation WHERE UserID = %s"
insert_query = "INSERT INTO Recommendation (UserID, RecommendedProductID) VALUES (%s, %s)"
try:
connection = get_db_connection()
cursor = connection.cursor()
# First delete existing recommendations
cursor.execute(delete_query, (user_id,))
# Then insert new recommendations
for product_id in recommendations:
cursor.execute(insert_query, (user_id, product_id))
connection.commit()
cursor.close()
connection.close()
return True
except Exception as e:
print(f"Error storing recommendations: {e}")
return False
# Fetch stored recommendations
def get_stored_recommendations(user_id):
query = """
SELECT p.ProductID, p.Name, p.Description, c.Name AS Category
FROM Recommendation r
JOIN Product p ON r.RecommendedProductID = p.ProductID
JOIN Category c ON p.CategoryID = c.CategoryID
WHERE r.UserID = %s
"""
try:
connection = get_db_connection()
cursor = connection.cursor(dictionary=True)
cursor.execute(query, (user_id,))
recommendations = cursor.fetchall()
cursor.close()
connection.close()
return recommendations
except Exception as e:
print(f"Error getting stored recommendations: {e}")
return []
# Initialize Recommender class
class Recommender:
def __init__(self):
self.products_df = None
self.tfidf_matrix = None
self.tfidf_vectorizer = None
self.product_indices = None
def load_products(self, products_data):
self.products_df = pd.DataFrame(products_data)
# Combine relevant features for content-based filtering
self.products_df['content'] = (
self.products_df['Category'] + ' ' +
self.products_df['Name'] + ' ' +
self.products_df['Description'].fillna('')
)
# Create TF-IDF matrix
self.tfidf_vectorizer = TfidfVectorizer(
stop_words='english',
max_features=5000,
ngram_range=(1, 2)
)
self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.products_df['content'])
# Map product IDs to indices for quick lookup
self.product_indices = pd.Series(
self.products_df.index,
index=self.products_df['ProductID']
).drop_duplicates()
def recommend_products_for_user(self, user_id, top_n=40):
"""
Generate product recommendations based on user history using cosine similarity
"""
# Get user history
user_history = get_user_history(user_id)
# If no history, return popular products
if not user_history:
# In a real system, you might return popular products here
return self.recommend_popular_products(top_n)
# Convert user history to DataFrame
history_df = pd.DataFrame(user_history)
# Get indices of products in user history
history_indices = []
for product_id in history_df['ProductID']:
if product_id in self.product_indices:
history_indices.append(self.product_indices[product_id])
if not history_indices:
return self.recommend_popular_products(top_n)
# Get TF-IDF vectors for user's history
user_profile = self.tfidf_matrix[history_indices].mean(axis=0).reshape(1, -1)
# Calculate similarity scores
similarity_scores = cosine_similarity(user_profile, self.tfidf_matrix)
similarity_scores = similarity_scores.flatten()
# Create a Series with product indices and similarity scores
product_scores = pd.Series(similarity_scores, index=self.products_df.index)
# Remove products the user has already interacted with
product_scores = product_scores.drop(history_indices)
# Sort by similarity score (highest first)
product_scores = product_scores.sort_values(ascending=False)
# Get top N product indices
top_indices = product_scores.iloc[:top_n].index
# Get product IDs for these indices
recommended_product_ids = self.products_df.iloc[top_indices]['ProductID'].tolist()
return recommended_product_ids
def recommend_popular_products(self, n=40):
"""
Fallback recommendation strategy when user has no history
In a real system, this would use actual popularity metrics
"""
# For now, just returning random products
return self.products_df.sample(min(n, len(self.products_df)))['ProductID'].tolist()
# Create recommender instance
recommender = Recommender()
@app.route('/load_products', methods=['GET'])
def load_products():
products = get_all_products()
if not products:
return jsonify({'error': 'Failed to load products from database'}), 500
recommender.load_products(products)
return jsonify({'message': 'Products loaded successfully', 'count': len(products)})
@app.route('/recommend/<int:user_id>', methods=['GET'])
def recommend(user_id):
# Check if products are loaded
if recommender.products_df is None:
products = get_all_products()
if not products:
return jsonify({'error': 'No products available'}), 500
recommender.load_products(products)
# Generate recommendations using cosine similarity
recommendations = recommender.recommend_products_for_user(user_id)
# Store recommendations in database
if store_user_recommendations(user_id, recommendations):
return jsonify({
'userId': user_id,
'recommendations': recommendations,
'count': len(recommendations)
})
else:
return jsonify({'error': 'Failed to store recommendations'}), 500
@app.route('/api/user/session', methods=['POST'])
def handle_session_data():
try:
data = request.get_json()
print("Received data:", data) # Debug print
user_id = data.get('userId')
email = data.get('email')
is_authenticated = data.get('isAuthenticated')
if not user_id or not email or is_authenticated is None:
print("Missing required fields") # Debug print
return jsonify({'error': 'Invalid data'}), 400
print(f"Processing session data: User ID: {user_id}, Email: {email}, Authenticated: {is_authenticated}")
# Test database connection first
try:
conn = get_db_connection()
conn.close()
print("Database connection successful")
except Exception as db_err:
print(f"Database connection error: {db_err}")
return jsonify({'error': f'Database connection error: {str(db_err)}'}), 500
# Continue with the rest of your code...
except Exception as e:
import traceback
print(f"Error in handle_session_data: {e}")
print(traceback.format_exc()) # Print full stack trace
return jsonify({'error': f'Server error: {str(e)}'}), 500
if __name__ == '__main__':
# Load products on startup
products = get_all_products()
if products:
recommender.load_products(products)
print(f"Loaded {len(products)} products at startup")
else:
print("Warning: No products loaded at startup")
app.run(debug=True, host='0.0.0.0', port=5000)

View File

@@ -1,5 +0,0 @@
select *
from Product
Where ProductID in (select ProductID
from History
where UserID=1);

View File

@@ -1,221 +1,113 @@
# pip install mysql.connector
import pandas as pd
import numpy as np
import mysql.connector
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import logging
'''
Recommender system using content-based filtering
'''
class Recommender:
def __init__(self):
# Initialize data structures
self.products_df = None
self.user_profiles = {}
self.tfidf_matrix = None
self.tfidf_vectorizer = None
self.product_indices = None
def load_products(self, products_data):
"""
Load product data into the recommender system
products_data: list of dictionaries with product info (id, name, description, category, etc.)
"""
self.products_df = pd.DataFrame(products_data)
# Create a text representation for each product (combining various features)
self.products_df['content'] = (
self.products_df['category'] + ' ' +
self.products_df['name'] + ' ' +
self.products_df['description']
def database():
db_connection = mysql.connector.connect(
host = "localhost",
port = "3306",
user = "root",
database = "Marketplace"
)
return db_connection
# Initialize TF-IDF vectorizer to convert text to vectors
self.tfidf_vectorizer = TfidfVectorizer(
stop_words='english',
max_features=5000, # Limit features to avoid sparse matrices
ngram_range=(1, 2) # Use both unigrams and bigrams
)
# Compute TF-IDF matrix
self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.products_df['content'])
def get_all_products():
# Create a mapping from product_id to index
self.product_indices = pd.Series(
self.products_df.index,
index=self.products_df['product_id']
).drop_duplicates()
db_con = database()
cursor = db_con.cursor()
def track_user_click(self, user_id, product_id):
cursor.execute("SELECT CategoryID FROM Category")
categories = cursor.fetchall()
select_clause = "SELECT p.ProductID"
for category in categories:
category_id = category[0]
select_clause += f", MAX(CASE WHEN pc.CategoryID = {category_id} THEN 1 ELSE 0 END) AS `Cat_{category_id}`"
final_query = f"""
{select_clause}
FROM Product p
LEFT JOIN Product_Category pc ON p.ProductID = pc.ProductID
LEFT JOIN Category c ON pc.CategoryID = c.CategoryID
GROUP BY p.ProductID;
"""
Track user clicks on products to build user profiles
cursor.execute(final_query)
results = cursor.fetchall()
final = []
for row in results:
text_list = list(row)
text_list.pop(0)
final.append(text_list)
cursor.close()
db_con.close()
return final
def get_user_history(user_id):
db_con = database()
cursor = db_con.cursor()
cursor.execute("SELECT CategoryID FROM Category")
categories = cursor.fetchall()
select_clause = "SELECT p.ProductID"
for category in categories:
category_id = category[0] # get the uid of the catefory and then append that to the new column
select_clause += f", MAX(CASE WHEN pc.CategoryID = {category_id} THEN 1 ELSE 0 END) AS `Cat_{category_id}`"
final_query = f"""
{select_clause}
FROM Product p
LEFT JOIN Product_Category pc ON p.ProductID = pc.ProductID
LEFT JOIN Category c ON pc.CategoryID = c.CategoryID
where p.ProductID in (select ProductID from History where UserID = {user_id})
GROUP BY p.ProductID;
"""
if user_id not in self.user_profiles:
self.user_profiles[user_id] = {
'clicks': {},
'category_weights': {
'electronics': 0,
'school supplies': 0,
'rental place': 0,
'furniture': 0
}
}
# Get the clicked product's index and details
if product_id in self.product_indices:
product_idx = self.product_indices[product_id]
product_category = self.products_df.iloc[product_idx]['category']
cursor.execute(final_query)
results = cursor.fetchall()
final = []
for row in results:
text_list = list(row)
text_list.pop(0)
final.append(text_list)
# Update click count
if product_id in self.user_profiles[user_id]['clicks']:
self.user_profiles[user_id]['clicks'][product_id] += 1
else:
self.user_profiles[user_id]['clicks'][product_id] = 1
cursor.close()
db_con.close()
return final
# Update category weight
self.user_profiles[user_id]['category_weights'][product_category] += 1
def get_user_profile_vector(self, user_id):
"""
Generate a user profile vector based on their click history
"""
if user_id not in self.user_profiles or not self.user_profiles[user_id]['clicks']:
# Return a zero vector if no click history
return np.zeros((1, self.tfidf_matrix.shape[1]))
def get_recommendations(user_id, top_n=40):
try:
# Get all products and user history with their category vectors
all_products = get_all_products()
user_history = get_user_history(user_id)
# Create a weighted average of all clicked products' TF-IDF vectors
clicked_product_vectors = []
weights = []
# if not user_history:
# # Cold start: return popular products
# return get_popular_products(top_n)
for product_id, click_count in self.user_profiles[user_id]['clicks'].items():
if product_id in self.product_indices:
product_idx = self.product_indices[product_id]
product_category = self.products_df.iloc[product_idx]['category']
category_weight = self.user_profiles[user_id]['category_weights'][product_category]
# Calculate similarity between all products and user history
user_profile = np.mean(user_history, axis=0) # Average user preferences
similarities = cosine_similarity([user_profile], all_products)
# Weight is based on both click count and category preference
weight = click_count * (1 + 0.5 * category_weight)
weights.append(weight)
clicked_product_vectors.append(self.tfidf_matrix[product_idx])
# Normalize weights
weights = np.array(weights) / np.sum(weights)
# Compute weighted average
user_profile = np.zeros((1, self.tfidf_matrix.shape[1]))
for i, vector in enumerate(clicked_product_vectors):
user_profile += weights[i] * vector.toarray()
return user_profile
def recommend_products(self, user_id, n=5, category_filter=None):
"""
Recommend products to a user based on their profile
user_id: ID of the user
n: Number of recommendations to return
category_filter: Optional filter to limit recommendations to a specific category
"""
# Get user profile vector
user_profile = self.get_user_profile_vector(user_id)
# If user has no profile, recommend popular products (not implemented)
if np.sum(user_profile) == 0:
return self._get_popular_products(n, category_filter)
# Calculate similarity scores
sim_scores = cosine_similarity(user_profile, self.tfidf_matrix)
sim_scores = sim_scores.flatten()
# Create a DataFrame for easier filtering
recommendations_df = pd.DataFrame({
'product_id': self.products_df['product_id'],
'score': sim_scores,
'category': self.products_df['category']
})
# Filter out products that the user has already clicked on
if user_id in self.user_profiles and self.user_profiles[user_id]['clicks']:
clicked_products = list(self.user_profiles[user_id]['clicks'].keys())
recommendations_df = recommendations_df[~recommendations_df['product_id'].isin(clicked_products)]
# Apply category filter if provided
if category_filter:
recommendations_df = recommendations_df[recommendations_df['category'] == category_filter]
# Sort by similarity score and get top n recommendations
recommendations_df = recommendations_df.sort_values('score', ascending=False).head(n)
# finds the indices of the top N products that have the highest
# cosine similarity with the user's profile and sorted from most similar to least similar.
product_indices = similarities[0].argsort()[-top_n:][::-1]
print("product", product_indices)
# Return recommended product IDs
return recommendations_df['product_id'].tolist()
def _get_popular_products(self, n=5, category_filter=None):
"""
Return popular products when a user has no profile
(This would typically be implemented with actual popularity metrics)
"""
filtered_df = self.products_df
if category_filter:
filtered_df = filtered_df[filtered_df['category'] == category_filter]
# Just return random products for now (in a real system you'd use popularity metrics)
if len(filtered_df) >= n:
return filtered_df.sample(n)['product_id'].tolist()
else:
return filtered_df['product_id'].tolist()
def recommend_by_category_preference(self, user_id, n=5):
"""
Recommend products based primarily on the user's category preferences
"""
if user_id not in self.user_profiles:
return self._get_popular_products(n)
# Get the user's most clicked category
category_weights = self.user_profiles[user_id]['category_weights']
# If no category has been clicked, return popular products
if sum(category_weights.values()) == 0:
return self._get_popular_products(n)
# Sort categories by number of clicks
sorted_categories = sorted(
category_weights.items(),
key=lambda x: x[1],
reverse=True
)
recommendations = []
remaining = n
# Allocate recommendations proportionally across categories
for category, weight in sorted_categories:
if weight > 0:
# Allocate recommendations proportionally to category weight
category_allocation = max(1, int(remaining * (weight / sum(category_weights.values()))))
if category_allocation > remaining:
category_allocation = remaining
# Get recommendations for this category
category_recs = self.recommend_products(user_id, category_allocation, category)
recommendations.extend(category_recs)
# Update remaining slots
remaining -= len(category_recs)
if remaining <= 0:
break
# If we still have slots to fill, add general recommendations
if remaining > 0:
general_recs = self.recommend_products(user_id, remaining)
# Filter out duplicates
general_recs = [rec for rec in general_recs if rec not in recommendations]
recommendations.extend(general_recs[:remaining])
return recommendations
return [all_products[i][0] for i in product_indices] # Product IDs
except Exception as e:
logging.error(f"Recommendation error for user {user_id}: {str(e)}")
# return get_popular_products(top_n) # Fallback to popular products
exported = Recommender()
get_recommendations(1)

View File

@@ -1,33 +0,0 @@
import mysql.connector as db_con
#TODO: Specify all the required queries
query_get_all_Prod= ("SELECT * FROM Product ")
#TODO: connect with the db
def database():
db = db_con.connect(
host = "localhost",
port = 3306,
user = "root",
database = "Marketplace"
)
cursor = db.cursor()
cursor.execute(query_get_all_Prod)
data = [None]
for item in cursor:
data.append(item)
# print(item)
print(data[1])
cursor.close()
db.close()
#TODO: Get All products
# Make it into a dictionary with product id and the list of category it would have
# {Prod1:[1,0,0,0,1]} this could mean its a [elctronics, 0,0,0, kitchen]
database()