Cosine Sim Calc now Working properly
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
-- Inserting sample data into the Marketplace database
|
||||
-- Clear existing data (if needed)
|
||||
SET
|
||||
FOREIGN_KEY_CHECKS = 0;
|
||||
|
||||
@@ -59,33 +58,6 @@ VALUES
|
||||
'hashedpassword2',
|
||||
'555-234-5678',
|
||||
'456 Oak Ave, Calgary, AB'
|
||||
),
|
||||
(
|
||||
3,
|
||||
'Michael Brown',
|
||||
'michael.b@example.com',
|
||||
'U345678',
|
||||
'hashedpassword3',
|
||||
'555-345-6789',
|
||||
'789 Pine Rd, Calgary, AB'
|
||||
),
|
||||
(
|
||||
4,
|
||||
'Sarah Wilson',
|
||||
'sarah.w@example.com',
|
||||
'U456789',
|
||||
'hashedpassword4',
|
||||
'555-456-7890',
|
||||
'101 Elm Blvd, Calgary, AB'
|
||||
),
|
||||
(
|
||||
5,
|
||||
'David Taylor',
|
||||
'david.t@example.com',
|
||||
'U567890',
|
||||
'hashedpassword5',
|
||||
'555-567-8901',
|
||||
'202 Maple Dr, Calgary, AB'
|
||||
);
|
||||
|
||||
-- Insert User Roles
|
||||
@@ -93,10 +65,7 @@ INSERT INTO
|
||||
UserRole (UserID, Client, Admin)
|
||||
VALUES
|
||||
(1, TRUE, TRUE),
|
||||
(2, TRUE, FALSE),
|
||||
(3, TRUE, FALSE),
|
||||
(4, TRUE, FALSE),
|
||||
(5, TRUE, FALSE);
|
||||
(2, TRUE, FALSE);
|
||||
|
||||
-- Insert Categories
|
||||
INSERT INTO
|
||||
@@ -161,7 +130,7 @@ VALUES
|
||||
'HP Laptop',
|
||||
699.99,
|
||||
1,
|
||||
2,
|
||||
1,
|
||||
'2023 HP Pavilion, 16GB RAM, 512GB SSD',
|
||||
2,
|
||||
'2024-10-10 14:30:00'
|
||||
@@ -171,7 +140,7 @@ VALUES
|
||||
'Dorm Desk',
|
||||
120.00,
|
||||
1,
|
||||
3,
|
||||
2,
|
||||
'Sturdy desk perfect for studying, minor scratches',
|
||||
3,
|
||||
'2024-10-12 09:15:00'
|
||||
@@ -181,7 +150,7 @@ VALUES
|
||||
'University Hoodie',
|
||||
35.00,
|
||||
3,
|
||||
1,
|
||||
2,
|
||||
'Size L, university logo, worn twice',
|
||||
4,
|
||||
'2024-10-14 16:45:00'
|
||||
@@ -191,7 +160,7 @@ VALUES
|
||||
'Basketball',
|
||||
25.50,
|
||||
1,
|
||||
4,
|
||||
2,
|
||||
'Slightly used indoor basketball',
|
||||
5,
|
||||
'2024-10-11 11:20:00'
|
||||
@@ -201,7 +170,7 @@ VALUES
|
||||
'Acoustic Guitar',
|
||||
175.00,
|
||||
1,
|
||||
2,
|
||||
1,
|
||||
'Beginner acoustic guitar with case',
|
||||
6,
|
||||
'2024-10-09 13:10:00'
|
||||
@@ -211,7 +180,7 @@ VALUES
|
||||
'Physics Textbook',
|
||||
65.00,
|
||||
2,
|
||||
5,
|
||||
2,
|
||||
'University Physics 14th Edition, good condition',
|
||||
1,
|
||||
'2024-10-08 10:30:00'
|
||||
@@ -221,7 +190,7 @@ VALUES
|
||||
'Mini Fridge',
|
||||
85.00,
|
||||
1,
|
||||
3,
|
||||
1,
|
||||
'Small dorm fridge, works perfectly',
|
||||
8,
|
||||
'2024-10-13 15:00:00'
|
||||
@@ -231,7 +200,7 @@ VALUES
|
||||
'PlayStation 5 Controller',
|
||||
55.00,
|
||||
1,
|
||||
4,
|
||||
2,
|
||||
'Extra controller, barely used',
|
||||
9,
|
||||
'2024-10-07 17:20:00'
|
||||
@@ -241,7 +210,7 @@ VALUES
|
||||
'Mountain Bike',
|
||||
350.00,
|
||||
1,
|
||||
5,
|
||||
1,
|
||||
'Trek mountain bike, great condition, new tires',
|
||||
10,
|
||||
'2024-10-06 14:00:00'
|
||||
@@ -271,7 +240,7 @@ VALUES
|
||||
'Graphing Calculator',
|
||||
75.00,
|
||||
1,
|
||||
3,
|
||||
1,
|
||||
'TI-84 Plus, perfect working condition',
|
||||
12,
|
||||
'2024-10-03 11:15:00'
|
||||
@@ -281,7 +250,7 @@ VALUES
|
||||
'Yoga Mat',
|
||||
20.00,
|
||||
1,
|
||||
4,
|
||||
2,
|
||||
'Thick yoga mat, barely used',
|
||||
13,
|
||||
'2024-10-02 16:00:00'
|
||||
@@ -291,7 +260,7 @@ VALUES
|
||||
'Winter Jacket',
|
||||
120.00,
|
||||
1,
|
||||
5,
|
||||
1,
|
||||
'Columbia winter jacket, size XL, very warm',
|
||||
26,
|
||||
'2024-10-01 10:20:00'
|
||||
@@ -301,7 +270,7 @@ VALUES
|
||||
'Computer Science Textbook',
|
||||
70.00,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
'Introduction to Algorithms, like new',
|
||||
1,
|
||||
'2024-09-30 14:30:00'
|
||||
@@ -321,7 +290,7 @@ VALUES
|
||||
'Scientific Calculator',
|
||||
25.00,
|
||||
1,
|
||||
3,
|
||||
1,
|
||||
'Casio scientific calculator',
|
||||
12,
|
||||
'2024-09-28 11:30:00'
|
||||
@@ -331,7 +300,7 @@ VALUES
|
||||
'Bluetooth Speaker',
|
||||
45.00,
|
||||
1,
|
||||
4,
|
||||
1,
|
||||
'JBL Bluetooth speaker, great sound',
|
||||
23,
|
||||
'2024-09-27 15:45:00'
|
||||
@@ -341,7 +310,7 @@ VALUES
|
||||
'Backpack',
|
||||
40.00,
|
||||
1,
|
||||
5,
|
||||
2,
|
||||
'North Face backpack, lots of pockets',
|
||||
22,
|
||||
'2024-09-26 09:15:00'
|
||||
@@ -360,7 +329,17 @@ VALUES
|
||||
('/image1.avif', 7),
|
||||
('/image1.avif', 8),
|
||||
('/image1.avif', 9),
|
||||
('/image1.avif', 10);
|
||||
('/image1.avif', 10),
|
||||
('/image1.avif', 11),
|
||||
('/image1.avif', 12),
|
||||
('/image1.avif', 13),
|
||||
('/image1.avif', 14),
|
||||
('/image1.avif', 15),
|
||||
('/image1.avif', 16),
|
||||
('/image1.avif', 17),
|
||||
('/image1.avif', 18),
|
||||
('/image1.avif', 19),
|
||||
('/image1.avif', 20);
|
||||
|
||||
-- Insert Product-Category relationships (products with multiple categories)
|
||||
INSERT INTO
|
||||
@@ -420,82 +399,20 @@ VALUES
|
||||
|
||||
-- Backpack: Backpacks & Bags, School Supplies, Dorm Essentials
|
||||
-- Insert History records
|
||||
--
|
||||
INSERT INTO
|
||||
History (HistoryID, UserID, ProductID, Date)
|
||||
History (HistoryID, UserID, ProductID)
|
||||
VALUES
|
||||
(1, 1, 1, '2024-10-15 11:30:00'),
|
||||
(2, 1, 2, '2024-10-14 13:45:00'),
|
||||
(3, 1, 5, '2024-10-13 09:20:00'),
|
||||
(4, 1, 4, '2024-10-12 16:10:00');
|
||||
|
||||
--
|
||||
INSERT INTO
|
||||
History (HistoryID, UserID, ProductID, Date)
|
||||
VALUES
|
||||
(1, 2, 1, '2024-10-15 11:30:00'), -- User 2 viewed Calculus Textbook
|
||||
(2, 3, 2, '2024-10-14 13:45:00'), -- User 3 viewed HP Laptop
|
||||
(3, 4, 3, '2024-10-13 09:20:00'), -- User 4 viewed Dorm Desk
|
||||
(4, 5, 4, '2024-10-12 16:10:00'), -- User 5 viewed University Hoodie
|
||||
(5, 1, 5, '2024-10-11 14:30:00'), -- User 1 viewed Basketball
|
||||
(6, 2, 6, '2024-10-10 10:15:00'), -- User 2 viewed Acoustic Guitar
|
||||
(7, 3, 7, '2024-10-09 15:40:00'), -- User 3 viewed Physics Textbook
|
||||
(8, 4, 8, '2024-10-08 11:25:00'), -- User 4 viewed Mini Fridge
|
||||
(9, 5, 9, '2024-10-07 17:50:00'), -- User 5 viewed PS5 Controller
|
||||
(10, 1, 10, '2024-10-06 14:15:00');
|
||||
|
||||
-- User 1 viewed Mountain Bike
|
||||
-- Insert Reviews
|
||||
INSERT INTO
|
||||
Review (
|
||||
ReviewID,
|
||||
UserID,
|
||||
ProductID,
|
||||
Comment,
|
||||
Rating,
|
||||
Date
|
||||
)
|
||||
VALUES
|
||||
(
|
||||
1,
|
||||
2,
|
||||
1,
|
||||
'Great condition, exactly as described!',
|
||||
5,
|
||||
'2024-10-16 09:30:00'
|
||||
),
|
||||
(
|
||||
2,
|
||||
3,
|
||||
2,
|
||||
'Works well, but had a small scratch not mentioned in the listing.',
|
||||
4,
|
||||
'2024-10-15 14:20:00'
|
||||
),
|
||||
(
|
||||
3,
|
||||
4,
|
||||
6,
|
||||
'Perfect for beginners, sounds great!',
|
||||
5,
|
||||
'2024-10-14 11:10:00'
|
||||
),
|
||||
(
|
||||
4,
|
||||
5,
|
||||
8,
|
||||
'Keeps my drinks cold, but a bit noisy at night.',
|
||||
3,
|
||||
'2024-10-13 16:45:00'
|
||||
),
|
||||
(
|
||||
5,
|
||||
1,
|
||||
10,
|
||||
'Excellent bike, well maintained!',
|
||||
5,
|
||||
'2024-10-12 13:25:00'
|
||||
);
|
||||
(1, 1, 1),
|
||||
(2, 1, 3),
|
||||
(3, 1, 5),
|
||||
(4, 1, 7),
|
||||
(5, 1, 9),
|
||||
(6, 1, 11),
|
||||
(7, 2, 2),
|
||||
(8, 2, 4),
|
||||
(9, 2, 5),
|
||||
(10, 1, 15),
|
||||
(11, 1, 18);
|
||||
|
||||
-- Insert Favorites
|
||||
INSERT INTO
|
||||
@@ -505,9 +422,9 @@ VALUES
|
||||
(1, 7), -- User 1 likes Physics Textbook
|
||||
(2, 3), -- User 2 likes Dorm Desk
|
||||
(2, 10), -- User 2 likes Mountain Bike
|
||||
(3, 6), -- User 3 likes Acoustic Guitar
|
||||
(4, 5), -- User 4 likes Basketball
|
||||
(5, 8);
|
||||
(1, 6), -- User 3 likes Acoustic Guitar
|
||||
(1, 5), -- User 4 likes Basketball
|
||||
(2, 8);
|
||||
|
||||
-- User 5 likes Mini Fridge
|
||||
-- Insert Transactions
|
||||
@@ -520,242 +437,8 @@ INSERT INTO
|
||||
PaymentStatus
|
||||
)
|
||||
VALUES
|
||||
(1, 2, 1, '2024-10-16 10:30:00', 'Completed'),
|
||||
(2, 3, 6, '2024-10-15 15:45:00', 'Completed'),
|
||||
(3, 4, 8, '2024-10-14 12:20:00', 'Pending'),
|
||||
(4, 5, 10, '2024-10-13 17:10:00', 'Completed'),
|
||||
(5, 1, 4, '2024-10-12 14:30:00', 'Completed');
|
||||
|
||||
-- Insert Recommendations
|
||||
INSERT INTO
|
||||
Recommendation (RecommendationID_PK, UserID, RecommendedProductID)
|
||||
VALUES
|
||||
(1, 1, 7), -- Recommend Physics Textbook to User 1
|
||||
(2, 1, 13), -- Recommend Graphing Calculator to User 1
|
||||
(3, 2, 3), -- Recommend Dorm Desk to User 2
|
||||
(4, 2, 17), -- Recommend Desk Lamp to User 2
|
||||
(5, 3, 16), -- Recommend CS Textbook to User 3
|
||||
(6, 4, 14), -- Recommend Yoga Mat to User 4
|
||||
(7, 5, 15);
|
||||
|
||||
INSERT INTO
|
||||
Recommendation (RecommendationID_PK, UserID, RecommendedProductID)
|
||||
VALUES
|
||||
(12, 1, 19),
|
||||
(13, 1, 9),
|
||||
(14, 1, 11),
|
||||
(15, 1, 16),
|
||||
-- Insert Authentication records
|
||||
INSERT INTO
|
||||
AuthVerification (Email, VerificationCode, Authenticated, Date)
|
||||
VALUES
|
||||
(
|
||||
'john.doe@example.com',
|
||||
'123456',
|
||||
TRUE,
|
||||
'2024-10-01 09:00:00'
|
||||
),
|
||||
(
|
||||
'jane.smith@example.com',
|
||||
'234567',
|
||||
TRUE,
|
||||
'2024-10-02 10:15:00'
|
||||
),
|
||||
(
|
||||
'michael.b@example.com',
|
||||
'345678',
|
||||
TRUE,
|
||||
'2024-10-03 11:30:00'
|
||||
),
|
||||
(
|
||||
'sarah.w@example.com',
|
||||
'456789',
|
||||
TRUE,
|
||||
'2024-10-04 12:45:00'
|
||||
),
|
||||
(
|
||||
'david.t@example.com',
|
||||
'567890',
|
||||
TRUE,
|
||||
'2024-10-05 14:00:00'
|
||||
);
|
||||
|
||||
INSERT INTO
|
||||
Product (
|
||||
ProductID,
|
||||
Name,
|
||||
Description,
|
||||
Price,
|
||||
StockQuantity,
|
||||
CategoryID
|
||||
)
|
||||
VALUES
|
||||
(
|
||||
101,
|
||||
'Smart Coffee Maker',
|
||||
'Wi-Fi enabled coffee machine with scheduling feature',
|
||||
129.99,
|
||||
50,
|
||||
11
|
||||
),
|
||||
(
|
||||
102,
|
||||
'Ergonomic Office Chair',
|
||||
'Adjustable mesh chair with lumbar support',
|
||||
199.99,
|
||||
35,
|
||||
12
|
||||
),
|
||||
(
|
||||
103,
|
||||
'Wireless Mechanical Keyboard',
|
||||
'RGB-backlit wireless keyboard with mechanical switches',
|
||||
89.99,
|
||||
60,
|
||||
13
|
||||
),
|
||||
(
|
||||
104,
|
||||
'Portable Solar Charger',
|
||||
'Foldable solar power bank with USB-C support',
|
||||
59.99,
|
||||
40,
|
||||
14
|
||||
),
|
||||
(
|
||||
105,
|
||||
'Noise-Canceling Headphones',
|
||||
'Over-ear Bluetooth headphones with ANC',
|
||||
179.99,
|
||||
25,
|
||||
15
|
||||
),
|
||||
(
|
||||
106,
|
||||
'Smart Water Bottle',
|
||||
'Tracks water intake and glows as a hydration reminder',
|
||||
39.99,
|
||||
75,
|
||||
11
|
||||
),
|
||||
(
|
||||
107,
|
||||
'Compact Air Purifier',
|
||||
'HEPA filter air purifier for small rooms',
|
||||
149.99,
|
||||
30,
|
||||
16
|
||||
),
|
||||
(
|
||||
108,
|
||||
'Smart LED Desk Lamp',
|
||||
'Adjustable LED lamp with voice control',
|
||||
69.99,
|
||||
45,
|
||||
12
|
||||
),
|
||||
(
|
||||
109,
|
||||
'4K Streaming Device',
|
||||
'HDMI streaming stick with voice remote',
|
||||
49.99,
|
||||
80,
|
||||
17
|
||||
),
|
||||
(
|
||||
110,
|
||||
'Smart Plant Monitor',
|
||||
'Bluetooth-enabled sensor for plant health tracking',
|
||||
34.99,
|
||||
55,
|
||||
18
|
||||
),
|
||||
(
|
||||
111,
|
||||
'Wireless Charging Pad',
|
||||
'Fast-charging pad for Qi-compatible devices',
|
||||
29.99,
|
||||
90,
|
||||
13
|
||||
),
|
||||
(
|
||||
112,
|
||||
'Mini Projector',
|
||||
'Portable projector with built-in speakers',
|
||||
129.99,
|
||||
20,
|
||||
14
|
||||
),
|
||||
(
|
||||
113,
|
||||
'Foldable Bluetooth Keyboard',
|
||||
'Ultra-thin keyboard for travel use',
|
||||
39.99,
|
||||
70,
|
||||
19
|
||||
),
|
||||
(
|
||||
114,
|
||||
'Smart Alarm Clock',
|
||||
'AI-powered alarm clock with sunrise simulation',
|
||||
79.99,
|
||||
40,
|
||||
15
|
||||
),
|
||||
(
|
||||
115,
|
||||
'Touchscreen Toaster',
|
||||
'Customizable toaster with a digital display',
|
||||
99.99,
|
||||
30,
|
||||
11
|
||||
),
|
||||
(
|
||||
116,
|
||||
'Cordless Vacuum Cleaner',
|
||||
'Lightweight handheld vacuum with strong suction',
|
||||
159.99,
|
||||
25,
|
||||
16
|
||||
),
|
||||
(
|
||||
117,
|
||||
'Smart Bike Lock',
|
||||
'Fingerprint and app-controlled bike security lock',
|
||||
89.99,
|
||||
35,
|
||||
20
|
||||
),
|
||||
(
|
||||
118,
|
||||
'Bluetooth Sleep Headband',
|
||||
'Comfortable sleep headband with built-in speakers',
|
||||
49.99,
|
||||
60,
|
||||
18
|
||||
),
|
||||
(
|
||||
119,
|
||||
'Retro Game Console',
|
||||
'Plug-and-play console with 500+ classic games',
|
||||
79.99,
|
||||
50,
|
||||
17
|
||||
),
|
||||
(
|
||||
120,
|
||||
'Automatic Pet Feeder',
|
||||
'App-controlled food dispenser for pets',
|
||||
99.99,
|
||||
40,
|
||||
20
|
||||
);
|
||||
|
||||
SELECT
|
||||
p.*,
|
||||
i.URL AS image_url
|
||||
FROM
|
||||
Product p
|
||||
LEFT JOIN Image_URL i ON p.ProductID = i.ProductID
|
||||
WHERE
|
||||
p.ProductID = 1
|
||||
(1, 1, 1, '2024-10-16 10:30:00', 'Completed'),
|
||||
(2, 1, 6, '2024-10-15 15:45:00', 'Completed'),
|
||||
(3, 1, 8, '2024-10-14 12:20:00', 'Pending'),
|
||||
(4, 2, 10, '2024-10-13 17:10:00', 'Completed'),
|
||||
(5, 2, 4, '2024-10-12 14:30:00', 'Completed');
|
||||
@@ -1,272 +0,0 @@
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
import mysql.connector as db_con
|
||||
|
||||
# Flask app initialization
|
||||
app = Flask(__name__)
|
||||
CORS(app, resources={r"/*": {"origins": "*"}}, supports_credentials=True)
|
||||
|
||||
# Database connection setup
|
||||
def get_db_connection():
|
||||
return db_con.connect(
|
||||
host="localhost",
|
||||
port=3306,
|
||||
user="root",
|
||||
database="Marketplace"
|
||||
)
|
||||
|
||||
# Fetch all products with category names
|
||||
def get_all_products():
|
||||
query = """
|
||||
SELECT p.ProductID, p.Name, p.Description, c.Name AS Category
|
||||
FROM Product p
|
||||
JOIN Category c ON p.CategoryID = c.CategoryID
|
||||
"""
|
||||
|
||||
try:
|
||||
connection = get_db_connection()
|
||||
cursor = connection.cursor(dictionary=True)
|
||||
cursor.execute(query)
|
||||
products = cursor.fetchall()
|
||||
cursor.close()
|
||||
connection.close()
|
||||
return products
|
||||
except Exception as e:
|
||||
print(f"Database error getting products: {e}")
|
||||
return []
|
||||
|
||||
# Fetch user history
|
||||
def get_user_history(user_id):
|
||||
query = """
|
||||
SELECT p.ProductID, p.Name, p.Description, c.Name AS Category
|
||||
FROM History h
|
||||
JOIN Product p ON h.ProductID = p.ProductID
|
||||
JOIN Category c ON p.CategoryID = c.CategoryID
|
||||
WHERE h.UserID = %s
|
||||
"""
|
||||
try:
|
||||
connection = get_db_connection()
|
||||
cursor = connection.cursor(dictionary=True)
|
||||
cursor.execute(query, (user_id,))
|
||||
history = cursor.fetchall()
|
||||
cursor.close()
|
||||
connection.close()
|
||||
return history
|
||||
except Exception as e:
|
||||
print(f"Error getting user history: {e}")
|
||||
return []
|
||||
|
||||
# Store recommendations
|
||||
def store_user_recommendations(user_id, recommendations):
|
||||
delete_query = "DELETE FROM Recommendation WHERE UserID = %s"
|
||||
insert_query = "INSERT INTO Recommendation (UserID, RecommendedProductID) VALUES (%s, %s)"
|
||||
|
||||
try:
|
||||
connection = get_db_connection()
|
||||
cursor = connection.cursor()
|
||||
|
||||
# First delete existing recommendations
|
||||
cursor.execute(delete_query, (user_id,))
|
||||
|
||||
# Then insert new recommendations
|
||||
for product_id in recommendations:
|
||||
cursor.execute(insert_query, (user_id, product_id))
|
||||
|
||||
connection.commit()
|
||||
cursor.close()
|
||||
connection.close()
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error storing recommendations: {e}")
|
||||
return False
|
||||
|
||||
# Fetch stored recommendations
|
||||
def get_stored_recommendations(user_id):
|
||||
query = """
|
||||
SELECT p.ProductID, p.Name, p.Description, c.Name AS Category
|
||||
FROM Recommendation r
|
||||
JOIN Product p ON r.RecommendedProductID = p.ProductID
|
||||
JOIN Category c ON p.CategoryID = c.CategoryID
|
||||
WHERE r.UserID = %s
|
||||
"""
|
||||
try:
|
||||
connection = get_db_connection()
|
||||
cursor = connection.cursor(dictionary=True)
|
||||
cursor.execute(query, (user_id,))
|
||||
recommendations = cursor.fetchall()
|
||||
cursor.close()
|
||||
connection.close()
|
||||
return recommendations
|
||||
except Exception as e:
|
||||
print(f"Error getting stored recommendations: {e}")
|
||||
return []
|
||||
|
||||
# Initialize Recommender class
|
||||
class Recommender:
|
||||
def __init__(self):
|
||||
self.products_df = None
|
||||
self.tfidf_matrix = None
|
||||
self.tfidf_vectorizer = None
|
||||
self.product_indices = None
|
||||
|
||||
def load_products(self, products_data):
|
||||
self.products_df = pd.DataFrame(products_data)
|
||||
|
||||
# Combine relevant features for content-based filtering
|
||||
self.products_df['content'] = (
|
||||
self.products_df['Category'] + ' ' +
|
||||
self.products_df['Name'] + ' ' +
|
||||
self.products_df['Description'].fillna('')
|
||||
)
|
||||
|
||||
# Create TF-IDF matrix
|
||||
self.tfidf_vectorizer = TfidfVectorizer(
|
||||
stop_words='english',
|
||||
max_features=5000,
|
||||
ngram_range=(1, 2)
|
||||
)
|
||||
self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.products_df['content'])
|
||||
|
||||
# Map product IDs to indices for quick lookup
|
||||
self.product_indices = pd.Series(
|
||||
self.products_df.index,
|
||||
index=self.products_df['ProductID']
|
||||
).drop_duplicates()
|
||||
|
||||
def recommend_products_for_user(self, user_id, top_n=40):
|
||||
"""
|
||||
Generate product recommendations based on user history using cosine similarity
|
||||
"""
|
||||
# Get user history
|
||||
user_history = get_user_history(user_id)
|
||||
|
||||
# If no history, return popular products
|
||||
if not user_history:
|
||||
# In a real system, you might return popular products here
|
||||
return self.recommend_popular_products(top_n)
|
||||
|
||||
# Convert user history to DataFrame
|
||||
history_df = pd.DataFrame(user_history)
|
||||
|
||||
# Get indices of products in user history
|
||||
history_indices = []
|
||||
for product_id in history_df['ProductID']:
|
||||
if product_id in self.product_indices:
|
||||
history_indices.append(self.product_indices[product_id])
|
||||
|
||||
if not history_indices:
|
||||
return self.recommend_popular_products(top_n)
|
||||
|
||||
# Get TF-IDF vectors for user's history
|
||||
user_profile = self.tfidf_matrix[history_indices].mean(axis=0).reshape(1, -1)
|
||||
|
||||
# Calculate similarity scores
|
||||
similarity_scores = cosine_similarity(user_profile, self.tfidf_matrix)
|
||||
similarity_scores = similarity_scores.flatten()
|
||||
|
||||
# Create a Series with product indices and similarity scores
|
||||
product_scores = pd.Series(similarity_scores, index=self.products_df.index)
|
||||
|
||||
# Remove products the user has already interacted with
|
||||
product_scores = product_scores.drop(history_indices)
|
||||
|
||||
# Sort by similarity score (highest first)
|
||||
product_scores = product_scores.sort_values(ascending=False)
|
||||
|
||||
# Get top N product indices
|
||||
top_indices = product_scores.iloc[:top_n].index
|
||||
|
||||
# Get product IDs for these indices
|
||||
recommended_product_ids = self.products_df.iloc[top_indices]['ProductID'].tolist()
|
||||
|
||||
return recommended_product_ids
|
||||
|
||||
def recommend_popular_products(self, n=40):
|
||||
"""
|
||||
Fallback recommendation strategy when user has no history
|
||||
In a real system, this would use actual popularity metrics
|
||||
"""
|
||||
# For now, just returning random products
|
||||
return self.products_df.sample(min(n, len(self.products_df)))['ProductID'].tolist()
|
||||
|
||||
# Create recommender instance
|
||||
recommender = Recommender()
|
||||
|
||||
@app.route('/load_products', methods=['GET'])
|
||||
def load_products():
|
||||
products = get_all_products()
|
||||
if not products:
|
||||
return jsonify({'error': 'Failed to load products from database'}), 500
|
||||
|
||||
recommender.load_products(products)
|
||||
return jsonify({'message': 'Products loaded successfully', 'count': len(products)})
|
||||
|
||||
@app.route('/recommend/<int:user_id>', methods=['GET'])
|
||||
def recommend(user_id):
|
||||
# Check if products are loaded
|
||||
if recommender.products_df is None:
|
||||
products = get_all_products()
|
||||
if not products:
|
||||
return jsonify({'error': 'No products available'}), 500
|
||||
recommender.load_products(products)
|
||||
|
||||
# Generate recommendations using cosine similarity
|
||||
recommendations = recommender.recommend_products_for_user(user_id)
|
||||
|
||||
# Store recommendations in database
|
||||
if store_user_recommendations(user_id, recommendations):
|
||||
return jsonify({
|
||||
'userId': user_id,
|
||||
'recommendations': recommendations,
|
||||
'count': len(recommendations)
|
||||
})
|
||||
else:
|
||||
return jsonify({'error': 'Failed to store recommendations'}), 500
|
||||
|
||||
@app.route('/api/user/session', methods=['POST'])
|
||||
def handle_session_data():
|
||||
try:
|
||||
data = request.get_json()
|
||||
print("Received data:", data) # Debug print
|
||||
|
||||
user_id = data.get('userId')
|
||||
email = data.get('email')
|
||||
is_authenticated = data.get('isAuthenticated')
|
||||
|
||||
if not user_id or not email or is_authenticated is None:
|
||||
print("Missing required fields") # Debug print
|
||||
return jsonify({'error': 'Invalid data'}), 400
|
||||
|
||||
print(f"Processing session data: User ID: {user_id}, Email: {email}, Authenticated: {is_authenticated}")
|
||||
|
||||
# Test database connection first
|
||||
try:
|
||||
conn = get_db_connection()
|
||||
conn.close()
|
||||
print("Database connection successful")
|
||||
except Exception as db_err:
|
||||
print(f"Database connection error: {db_err}")
|
||||
return jsonify({'error': f'Database connection error: {str(db_err)}'}), 500
|
||||
|
||||
# Continue with the rest of your code...
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f"Error in handle_session_data: {e}")
|
||||
print(traceback.format_exc()) # Print full stack trace
|
||||
return jsonify({'error': f'Server error: {str(e)}'}), 500
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Load products on startup
|
||||
products = get_all_products()
|
||||
if products:
|
||||
recommender.load_products(products)
|
||||
print(f"Loaded {len(products)} products at startup")
|
||||
else:
|
||||
print("Warning: No products loaded at startup")
|
||||
|
||||
app.run(debug=True, host='0.0.0.0', port=5000)
|
||||
@@ -1,5 +0,0 @@
|
||||
select *
|
||||
from Product
|
||||
Where ProductID in (select ProductID
|
||||
from History
|
||||
where UserID=1);
|
||||
@@ -1,221 +1,113 @@
|
||||
# pip install mysql.connector
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import mysql.connector
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
import numpy as np
|
||||
import logging
|
||||
|
||||
'''
|
||||
Recommender system using content-based filtering
|
||||
'''
|
||||
class Recommender:
|
||||
def __init__(self):
|
||||
# Initialize data structures
|
||||
self.products_df = None
|
||||
self.user_profiles = {}
|
||||
self.tfidf_matrix = None
|
||||
self.tfidf_vectorizer = None
|
||||
self.product_indices = None
|
||||
|
||||
def load_products(self, products_data):
|
||||
def database():
|
||||
db_connection = mysql.connector.connect(
|
||||
host = "localhost",
|
||||
port = "3306",
|
||||
user = "root",
|
||||
database = "Marketplace"
|
||||
)
|
||||
return db_connection
|
||||
|
||||
|
||||
def get_all_products():
|
||||
|
||||
db_con = database()
|
||||
cursor = db_con.cursor()
|
||||
|
||||
cursor.execute("SELECT CategoryID FROM Category")
|
||||
categories = cursor.fetchall()
|
||||
|
||||
select_clause = "SELECT p.ProductID"
|
||||
for category in categories:
|
||||
category_id = category[0]
|
||||
select_clause += f", MAX(CASE WHEN pc.CategoryID = {category_id} THEN 1 ELSE 0 END) AS `Cat_{category_id}`"
|
||||
|
||||
final_query = f"""
|
||||
{select_clause}
|
||||
FROM Product p
|
||||
LEFT JOIN Product_Category pc ON p.ProductID = pc.ProductID
|
||||
LEFT JOIN Category c ON pc.CategoryID = c.CategoryID
|
||||
GROUP BY p.ProductID;
|
||||
"""
|
||||
Load product data into the recommender system
|
||||
|
||||
products_data: list of dictionaries with product info (id, name, description, category, etc.)
|
||||
|
||||
cursor.execute(final_query)
|
||||
results = cursor.fetchall()
|
||||
|
||||
final = []
|
||||
for row in results:
|
||||
text_list = list(row)
|
||||
text_list.pop(0)
|
||||
final.append(text_list)
|
||||
|
||||
cursor.close()
|
||||
db_con.close()
|
||||
return final
|
||||
|
||||
def get_user_history(user_id):
|
||||
db_con = database()
|
||||
cursor = db_con.cursor()
|
||||
|
||||
cursor.execute("SELECT CategoryID FROM Category")
|
||||
categories = cursor.fetchall()
|
||||
|
||||
select_clause = "SELECT p.ProductID"
|
||||
for category in categories:
|
||||
category_id = category[0] # get the uid of the catefory and then append that to the new column
|
||||
select_clause += f", MAX(CASE WHEN pc.CategoryID = {category_id} THEN 1 ELSE 0 END) AS `Cat_{category_id}`"
|
||||
|
||||
final_query = f"""
|
||||
{select_clause}
|
||||
FROM Product p
|
||||
LEFT JOIN Product_Category pc ON p.ProductID = pc.ProductID
|
||||
LEFT JOIN Category c ON pc.CategoryID = c.CategoryID
|
||||
where p.ProductID in (select ProductID from History where UserID = {user_id})
|
||||
GROUP BY p.ProductID;
|
||||
"""
|
||||
self.products_df = pd.DataFrame(products_data)
|
||||
|
||||
# Create a text representation for each product (combining various features)
|
||||
self.products_df['content'] = (
|
||||
self.products_df['category'] + ' ' +
|
||||
self.products_df['name'] + ' ' +
|
||||
self.products_df['description']
|
||||
)
|
||||
|
||||
# Initialize TF-IDF vectorizer to convert text to vectors
|
||||
self.tfidf_vectorizer = TfidfVectorizer(
|
||||
stop_words='english',
|
||||
max_features=5000, # Limit features to avoid sparse matrices
|
||||
ngram_range=(1, 2) # Use both unigrams and bigrams
|
||||
)
|
||||
|
||||
# Compute TF-IDF matrix
|
||||
self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.products_df['content'])
|
||||
|
||||
# Create a mapping from product_id to index
|
||||
self.product_indices = pd.Series(
|
||||
self.products_df.index,
|
||||
index=self.products_df['product_id']
|
||||
).drop_duplicates()
|
||||
|
||||
def track_user_click(self, user_id, product_id):
|
||||
"""
|
||||
Track user clicks on products to build user profiles
|
||||
"""
|
||||
if user_id not in self.user_profiles:
|
||||
self.user_profiles[user_id] = {
|
||||
'clicks': {},
|
||||
'category_weights': {
|
||||
'electronics': 0,
|
||||
'school supplies': 0,
|
||||
'rental place': 0,
|
||||
'furniture': 0
|
||||
}
|
||||
}
|
||||
|
||||
# Get the clicked product's index and details
|
||||
if product_id in self.product_indices:
|
||||
product_idx = self.product_indices[product_id]
|
||||
product_category = self.products_df.iloc[product_idx]['category']
|
||||
|
||||
# Update click count
|
||||
if product_id in self.user_profiles[user_id]['clicks']:
|
||||
self.user_profiles[user_id]['clicks'][product_id] += 1
|
||||
else:
|
||||
self.user_profiles[user_id]['clicks'][product_id] = 1
|
||||
|
||||
# Update category weight
|
||||
self.user_profiles[user_id]['category_weights'][product_category] += 1
|
||||
|
||||
def get_user_profile_vector(self, user_id):
|
||||
"""
|
||||
Generate a user profile vector based on their click history
|
||||
"""
|
||||
if user_id not in self.user_profiles or not self.user_profiles[user_id]['clicks']:
|
||||
# Return a zero vector if no click history
|
||||
return np.zeros((1, self.tfidf_matrix.shape[1]))
|
||||
|
||||
# Create a weighted average of all clicked products' TF-IDF vectors
|
||||
clicked_product_vectors = []
|
||||
weights = []
|
||||
|
||||
for product_id, click_count in self.user_profiles[user_id]['clicks'].items():
|
||||
if product_id in self.product_indices:
|
||||
product_idx = self.product_indices[product_id]
|
||||
product_category = self.products_df.iloc[product_idx]['category']
|
||||
category_weight = self.user_profiles[user_id]['category_weights'][product_category]
|
||||
|
||||
# Weight is based on both click count and category preference
|
||||
weight = click_count * (1 + 0.5 * category_weight)
|
||||
weights.append(weight)
|
||||
clicked_product_vectors.append(self.tfidf_matrix[product_idx])
|
||||
|
||||
# Normalize weights
|
||||
weights = np.array(weights) / np.sum(weights)
|
||||
|
||||
# Compute weighted average
|
||||
user_profile = np.zeros((1, self.tfidf_matrix.shape[1]))
|
||||
for i, vector in enumerate(clicked_product_vectors):
|
||||
user_profile += weights[i] * vector.toarray()
|
||||
|
||||
return user_profile
|
||||
|
||||
def recommend_products(self, user_id, n=5, category_filter=None):
|
||||
"""
|
||||
Recommend products to a user based on their profile
|
||||
|
||||
user_id: ID of the user
|
||||
n: Number of recommendations to return
|
||||
category_filter: Optional filter to limit recommendations to a specific category
|
||||
"""
|
||||
# Get user profile vector
|
||||
user_profile = self.get_user_profile_vector(user_id)
|
||||
|
||||
# If user has no profile, recommend popular products (not implemented)
|
||||
if np.sum(user_profile) == 0:
|
||||
return self._get_popular_products(n, category_filter)
|
||||
|
||||
# Calculate similarity scores
|
||||
sim_scores = cosine_similarity(user_profile, self.tfidf_matrix)
|
||||
sim_scores = sim_scores.flatten()
|
||||
|
||||
# Create a DataFrame for easier filtering
|
||||
recommendations_df = pd.DataFrame({
|
||||
'product_id': self.products_df['product_id'],
|
||||
'score': sim_scores,
|
||||
'category': self.products_df['category']
|
||||
})
|
||||
|
||||
# Filter out products that the user has already clicked on
|
||||
if user_id in self.user_profiles and self.user_profiles[user_id]['clicks']:
|
||||
clicked_products = list(self.user_profiles[user_id]['clicks'].keys())
|
||||
recommendations_df = recommendations_df[~recommendations_df['product_id'].isin(clicked_products)]
|
||||
|
||||
# Apply category filter if provided
|
||||
if category_filter:
|
||||
recommendations_df = recommendations_df[recommendations_df['category'] == category_filter]
|
||||
|
||||
# Sort by similarity score and get top n recommendations
|
||||
recommendations_df = recommendations_df.sort_values('score', ascending=False).head(n)
|
||||
|
||||
|
||||
cursor.execute(final_query)
|
||||
results = cursor.fetchall()
|
||||
final = []
|
||||
for row in results:
|
||||
text_list = list(row)
|
||||
text_list.pop(0)
|
||||
final.append(text_list)
|
||||
|
||||
cursor.close()
|
||||
db_con.close()
|
||||
return final
|
||||
|
||||
|
||||
def get_recommendations(user_id, top_n=40):
|
||||
try:
|
||||
# Get all products and user history with their category vectors
|
||||
all_products = get_all_products()
|
||||
user_history = get_user_history(user_id)
|
||||
|
||||
# if not user_history:
|
||||
# # Cold start: return popular products
|
||||
# return get_popular_products(top_n)
|
||||
|
||||
# Calculate similarity between all products and user history
|
||||
user_profile = np.mean(user_history, axis=0) # Average user preferences
|
||||
similarities = cosine_similarity([user_profile], all_products)
|
||||
|
||||
# finds the indices of the top N products that have the highest
|
||||
# cosine similarity with the user's profile and sorted from most similar to least similar.
|
||||
product_indices = similarities[0].argsort()[-top_n:][::-1]
|
||||
print("product", product_indices)
|
||||
|
||||
# Return recommended product IDs
|
||||
return recommendations_df['product_id'].tolist()
|
||||
|
||||
def _get_popular_products(self, n=5, category_filter=None):
|
||||
"""
|
||||
Return popular products when a user has no profile
|
||||
(This would typically be implemented with actual popularity metrics)
|
||||
"""
|
||||
filtered_df = self.products_df
|
||||
|
||||
if category_filter:
|
||||
filtered_df = filtered_df[filtered_df['category'] == category_filter]
|
||||
|
||||
# Just return random products for now (in a real system you'd use popularity metrics)
|
||||
if len(filtered_df) >= n:
|
||||
return filtered_df.sample(n)['product_id'].tolist()
|
||||
else:
|
||||
return filtered_df['product_id'].tolist()
|
||||
|
||||
def recommend_by_category_preference(self, user_id, n=5):
|
||||
"""
|
||||
Recommend products based primarily on the user's category preferences
|
||||
"""
|
||||
if user_id not in self.user_profiles:
|
||||
return self._get_popular_products(n)
|
||||
|
||||
# Get the user's most clicked category
|
||||
category_weights = self.user_profiles[user_id]['category_weights']
|
||||
|
||||
# If no category has been clicked, return popular products
|
||||
if sum(category_weights.values()) == 0:
|
||||
return self._get_popular_products(n)
|
||||
|
||||
# Sort categories by number of clicks
|
||||
sorted_categories = sorted(
|
||||
category_weights.items(),
|
||||
key=lambda x: x[1],
|
||||
reverse=True
|
||||
)
|
||||
|
||||
recommendations = []
|
||||
remaining = n
|
||||
|
||||
# Allocate recommendations proportionally across categories
|
||||
for category, weight in sorted_categories:
|
||||
if weight > 0:
|
||||
# Allocate recommendations proportionally to category weight
|
||||
category_allocation = max(1, int(remaining * (weight / sum(category_weights.values()))))
|
||||
if category_allocation > remaining:
|
||||
category_allocation = remaining
|
||||
|
||||
# Get recommendations for this category
|
||||
category_recs = self.recommend_products(user_id, category_allocation, category)
|
||||
recommendations.extend(category_recs)
|
||||
|
||||
# Update remaining slots
|
||||
remaining -= len(category_recs)
|
||||
|
||||
if remaining <= 0:
|
||||
break
|
||||
|
||||
# If we still have slots to fill, add general recommendations
|
||||
if remaining > 0:
|
||||
general_recs = self.recommend_products(user_id, remaining)
|
||||
# Filter out duplicates
|
||||
general_recs = [rec for rec in general_recs if rec not in recommendations]
|
||||
recommendations.extend(general_recs[:remaining])
|
||||
|
||||
return recommendations
|
||||
return [all_products[i][0] for i in product_indices] # Product IDs
|
||||
except Exception as e:
|
||||
logging.error(f"Recommendation error for user {user_id}: {str(e)}")
|
||||
# return get_popular_products(top_n) # Fallback to popular products
|
||||
|
||||
|
||||
exported = Recommender()
|
||||
get_recommendations(1)
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
import mysql.connector as db_con
|
||||
|
||||
#TODO: Specify all the required queries
|
||||
query_get_all_Prod= ("SELECT * FROM Product ")
|
||||
|
||||
|
||||
#TODO: connect with the db
|
||||
def database():
|
||||
db = db_con.connect(
|
||||
host = "localhost",
|
||||
port = 3306,
|
||||
user = "root",
|
||||
database = "Marketplace"
|
||||
)
|
||||
|
||||
cursor = db.cursor()
|
||||
cursor.execute(query_get_all_Prod)
|
||||
|
||||
data = [None]
|
||||
for item in cursor:
|
||||
data.append(item)
|
||||
# print(item)
|
||||
|
||||
print(data[1])
|
||||
cursor.close()
|
||||
db.close()
|
||||
|
||||
|
||||
|
||||
#TODO: Get All products
|
||||
# Make it into a dictionary with product id and the list of category it would have
|
||||
# {Prod1:[1,0,0,0,1]} this could mean its a [elctronics, 0,0,0, kitchen]
|
||||
database()
|
||||
Reference in New Issue
Block a user