diff --git a/mysql-code/example-data.sql b/mysql-code/Init-Data.sql similarity index 52% rename from mysql-code/example-data.sql rename to mysql-code/Init-Data.sql index a2b7242..b75516e 100644 --- a/mysql-code/example-data.sql +++ b/mysql-code/Init-Data.sql @@ -1,5 +1,4 @@ -- Inserting sample data into the Marketplace database --- Clear existing data (if needed) SET FOREIGN_KEY_CHECKS = 0; @@ -59,33 +58,6 @@ VALUES 'hashedpassword2', '555-234-5678', '456 Oak Ave, Calgary, AB' - ), - ( - 3, - 'Michael Brown', - 'michael.b@example.com', - 'U345678', - 'hashedpassword3', - '555-345-6789', - '789 Pine Rd, Calgary, AB' - ), - ( - 4, - 'Sarah Wilson', - 'sarah.w@example.com', - 'U456789', - 'hashedpassword4', - '555-456-7890', - '101 Elm Blvd, Calgary, AB' - ), - ( - 5, - 'David Taylor', - 'david.t@example.com', - 'U567890', - 'hashedpassword5', - '555-567-8901', - '202 Maple Dr, Calgary, AB' ); -- Insert User Roles @@ -93,10 +65,7 @@ INSERT INTO UserRole (UserID, Client, Admin) VALUES (1, TRUE, TRUE), - (2, TRUE, FALSE), - (3, TRUE, FALSE), - (4, TRUE, FALSE), - (5, TRUE, FALSE); + (2, TRUE, FALSE); -- Insert Categories INSERT INTO @@ -161,7 +130,7 @@ VALUES 'HP Laptop', 699.99, 1, - 2, + 1, '2023 HP Pavilion, 16GB RAM, 512GB SSD', 2, '2024-10-10 14:30:00' @@ -171,7 +140,7 @@ VALUES 'Dorm Desk', 120.00, 1, - 3, + 2, 'Sturdy desk perfect for studying, minor scratches', 3, '2024-10-12 09:15:00' @@ -181,7 +150,7 @@ VALUES 'University Hoodie', 35.00, 3, - 1, + 2, 'Size L, university logo, worn twice', 4, '2024-10-14 16:45:00' @@ -191,7 +160,7 @@ VALUES 'Basketball', 25.50, 1, - 4, + 2, 'Slightly used indoor basketball', 5, '2024-10-11 11:20:00' @@ -201,7 +170,7 @@ VALUES 'Acoustic Guitar', 175.00, 1, - 2, + 1, 'Beginner acoustic guitar with case', 6, '2024-10-09 13:10:00' @@ -211,7 +180,7 @@ VALUES 'Physics Textbook', 65.00, 2, - 5, + 2, 'University Physics 14th Edition, good condition', 1, '2024-10-08 10:30:00' @@ -221,7 +190,7 @@ VALUES 'Mini Fridge', 85.00, 1, - 3, + 1, 'Small dorm fridge, works perfectly', 8, '2024-10-13 15:00:00' @@ -231,7 +200,7 @@ VALUES 'PlayStation 5 Controller', 55.00, 1, - 4, + 2, 'Extra controller, barely used', 9, '2024-10-07 17:20:00' @@ -241,7 +210,7 @@ VALUES 'Mountain Bike', 350.00, 1, - 5, + 1, 'Trek mountain bike, great condition, new tires', 10, '2024-10-06 14:00:00' @@ -271,7 +240,7 @@ VALUES 'Graphing Calculator', 75.00, 1, - 3, + 1, 'TI-84 Plus, perfect working condition', 12, '2024-10-03 11:15:00' @@ -281,7 +250,7 @@ VALUES 'Yoga Mat', 20.00, 1, - 4, + 2, 'Thick yoga mat, barely used', 13, '2024-10-02 16:00:00' @@ -291,7 +260,7 @@ VALUES 'Winter Jacket', 120.00, 1, - 5, + 1, 'Columbia winter jacket, size XL, very warm', 26, '2024-10-01 10:20:00' @@ -301,7 +270,7 @@ VALUES 'Computer Science Textbook', 70.00, 1, - 1, + 2, 'Introduction to Algorithms, like new', 1, '2024-09-30 14:30:00' @@ -321,7 +290,7 @@ VALUES 'Scientific Calculator', 25.00, 1, - 3, + 1, 'Casio scientific calculator', 12, '2024-09-28 11:30:00' @@ -331,7 +300,7 @@ VALUES 'Bluetooth Speaker', 45.00, 1, - 4, + 1, 'JBL Bluetooth speaker, great sound', 23, '2024-09-27 15:45:00' @@ -341,7 +310,7 @@ VALUES 'Backpack', 40.00, 1, - 5, + 2, 'North Face backpack, lots of pockets', 22, '2024-09-26 09:15:00' @@ -360,7 +329,17 @@ VALUES ('/image1.avif', 7), ('/image1.avif', 8), ('/image1.avif', 9), - ('/image1.avif', 10); + ('/image1.avif', 10), + ('/image1.avif', 11), + ('/image1.avif', 12), + ('/image1.avif', 13), + ('/image1.avif', 14), + ('/image1.avif', 15), + ('/image1.avif', 16), + ('/image1.avif', 17), + ('/image1.avif', 18), + ('/image1.avif', 19), + ('/image1.avif', 20); -- Insert Product-Category relationships (products with multiple categories) INSERT INTO @@ -420,82 +399,20 @@ VALUES -- Backpack: Backpacks & Bags, School Supplies, Dorm Essentials -- Insert History records --- INSERT INTO - History (HistoryID, UserID, ProductID, Date) + History (HistoryID, UserID, ProductID) VALUES - (1, 1, 1, '2024-10-15 11:30:00'), - (2, 1, 2, '2024-10-14 13:45:00'), - (3, 1, 5, '2024-10-13 09:20:00'), - (4, 1, 4, '2024-10-12 16:10:00'); - --- -INSERT INTO - History (HistoryID, UserID, ProductID, Date) -VALUES - (1, 2, 1, '2024-10-15 11:30:00'), -- User 2 viewed Calculus Textbook - (2, 3, 2, '2024-10-14 13:45:00'), -- User 3 viewed HP Laptop - (3, 4, 3, '2024-10-13 09:20:00'), -- User 4 viewed Dorm Desk - (4, 5, 4, '2024-10-12 16:10:00'), -- User 5 viewed University Hoodie - (5, 1, 5, '2024-10-11 14:30:00'), -- User 1 viewed Basketball - (6, 2, 6, '2024-10-10 10:15:00'), -- User 2 viewed Acoustic Guitar - (7, 3, 7, '2024-10-09 15:40:00'), -- User 3 viewed Physics Textbook - (8, 4, 8, '2024-10-08 11:25:00'), -- User 4 viewed Mini Fridge - (9, 5, 9, '2024-10-07 17:50:00'), -- User 5 viewed PS5 Controller - (10, 1, 10, '2024-10-06 14:15:00'); - --- User 1 viewed Mountain Bike --- Insert Reviews -INSERT INTO - Review ( - ReviewID, - UserID, - ProductID, - Comment, - Rating, - Date - ) -VALUES - ( - 1, - 2, - 1, - 'Great condition, exactly as described!', - 5, - '2024-10-16 09:30:00' - ), - ( - 2, - 3, - 2, - 'Works well, but had a small scratch not mentioned in the listing.', - 4, - '2024-10-15 14:20:00' - ), - ( - 3, - 4, - 6, - 'Perfect for beginners, sounds great!', - 5, - '2024-10-14 11:10:00' - ), - ( - 4, - 5, - 8, - 'Keeps my drinks cold, but a bit noisy at night.', - 3, - '2024-10-13 16:45:00' - ), - ( - 5, - 1, - 10, - 'Excellent bike, well maintained!', - 5, - '2024-10-12 13:25:00' - ); + (1, 1, 1), + (2, 1, 3), + (3, 1, 5), + (4, 1, 7), + (5, 1, 9), + (6, 1, 11), + (7, 2, 2), + (8, 2, 4), + (9, 2, 5), + (10, 1, 15), + (11, 1, 18); -- Insert Favorites INSERT INTO @@ -505,9 +422,9 @@ VALUES (1, 7), -- User 1 likes Physics Textbook (2, 3), -- User 2 likes Dorm Desk (2, 10), -- User 2 likes Mountain Bike - (3, 6), -- User 3 likes Acoustic Guitar - (4, 5), -- User 4 likes Basketball - (5, 8); + (1, 6), -- User 3 likes Acoustic Guitar + (1, 5), -- User 4 likes Basketball + (2, 8); -- User 5 likes Mini Fridge -- Insert Transactions @@ -520,242 +437,8 @@ INSERT INTO PaymentStatus ) VALUES - (1, 2, 1, '2024-10-16 10:30:00', 'Completed'), - (2, 3, 6, '2024-10-15 15:45:00', 'Completed'), - (3, 4, 8, '2024-10-14 12:20:00', 'Pending'), - (4, 5, 10, '2024-10-13 17:10:00', 'Completed'), - (5, 1, 4, '2024-10-12 14:30:00', 'Completed'); - --- Insert Recommendations -INSERT INTO - Recommendation (RecommendationID_PK, UserID, RecommendedProductID) -VALUES - (1, 1, 7), -- Recommend Physics Textbook to User 1 - (2, 1, 13), -- Recommend Graphing Calculator to User 1 - (3, 2, 3), -- Recommend Dorm Desk to User 2 - (4, 2, 17), -- Recommend Desk Lamp to User 2 - (5, 3, 16), -- Recommend CS Textbook to User 3 - (6, 4, 14), -- Recommend Yoga Mat to User 4 - (7, 5, 15); - -INSERT INTO - Recommendation (RecommendationID_PK, UserID, RecommendedProductID) -VALUES - (12, 1, 19), - (13, 1, 9), - (14, 1, 11), - (15, 1, 16), - -- Insert Authentication records -INSERT INTO - AuthVerification (Email, VerificationCode, Authenticated, Date) -VALUES - ( - 'john.doe@example.com', - '123456', - TRUE, - '2024-10-01 09:00:00' - ), - ( - 'jane.smith@example.com', - '234567', - TRUE, - '2024-10-02 10:15:00' - ), - ( - 'michael.b@example.com', - '345678', - TRUE, - '2024-10-03 11:30:00' - ), - ( - 'sarah.w@example.com', - '456789', - TRUE, - '2024-10-04 12:45:00' - ), - ( - 'david.t@example.com', - '567890', - TRUE, - '2024-10-05 14:00:00' - ); - -INSERT INTO - Product ( - ProductID, - Name, - Description, - Price, - StockQuantity, - CategoryID - ) -VALUES - ( - 101, - 'Smart Coffee Maker', - 'Wi-Fi enabled coffee machine with scheduling feature', - 129.99, - 50, - 11 - ), - ( - 102, - 'Ergonomic Office Chair', - 'Adjustable mesh chair with lumbar support', - 199.99, - 35, - 12 - ), - ( - 103, - 'Wireless Mechanical Keyboard', - 'RGB-backlit wireless keyboard with mechanical switches', - 89.99, - 60, - 13 - ), - ( - 104, - 'Portable Solar Charger', - 'Foldable solar power bank with USB-C support', - 59.99, - 40, - 14 - ), - ( - 105, - 'Noise-Canceling Headphones', - 'Over-ear Bluetooth headphones with ANC', - 179.99, - 25, - 15 - ), - ( - 106, - 'Smart Water Bottle', - 'Tracks water intake and glows as a hydration reminder', - 39.99, - 75, - 11 - ), - ( - 107, - 'Compact Air Purifier', - 'HEPA filter air purifier for small rooms', - 149.99, - 30, - 16 - ), - ( - 108, - 'Smart LED Desk Lamp', - 'Adjustable LED lamp with voice control', - 69.99, - 45, - 12 - ), - ( - 109, - '4K Streaming Device', - 'HDMI streaming stick with voice remote', - 49.99, - 80, - 17 - ), - ( - 110, - 'Smart Plant Monitor', - 'Bluetooth-enabled sensor for plant health tracking', - 34.99, - 55, - 18 - ), - ( - 111, - 'Wireless Charging Pad', - 'Fast-charging pad for Qi-compatible devices', - 29.99, - 90, - 13 - ), - ( - 112, - 'Mini Projector', - 'Portable projector with built-in speakers', - 129.99, - 20, - 14 - ), - ( - 113, - 'Foldable Bluetooth Keyboard', - 'Ultra-thin keyboard for travel use', - 39.99, - 70, - 19 - ), - ( - 114, - 'Smart Alarm Clock', - 'AI-powered alarm clock with sunrise simulation', - 79.99, - 40, - 15 - ), - ( - 115, - 'Touchscreen Toaster', - 'Customizable toaster with a digital display', - 99.99, - 30, - 11 - ), - ( - 116, - 'Cordless Vacuum Cleaner', - 'Lightweight handheld vacuum with strong suction', - 159.99, - 25, - 16 - ), - ( - 117, - 'Smart Bike Lock', - 'Fingerprint and app-controlled bike security lock', - 89.99, - 35, - 20 - ), - ( - 118, - 'Bluetooth Sleep Headband', - 'Comfortable sleep headband with built-in speakers', - 49.99, - 60, - 18 - ), - ( - 119, - 'Retro Game Console', - 'Plug-and-play console with 500+ classic games', - 79.99, - 50, - 17 - ), - ( - 120, - 'Automatic Pet Feeder', - 'App-controlled food dispenser for pets', - 99.99, - 40, - 20 - ); - -SELECT - p.*, - i.URL AS image_url -FROM - Product p - LEFT JOIN Image_URL i ON p.ProductID = i.ProductID -WHERE - p.ProductID = 1 + (1, 1, 1, '2024-10-16 10:30:00', 'Completed'), + (2, 1, 6, '2024-10-15 15:45:00', 'Completed'), + (3, 1, 8, '2024-10-14 12:20:00', 'Pending'), + (4, 2, 10, '2024-10-13 17:10:00', 'Completed'), + (5, 2, 4, '2024-10-12 14:30:00', 'Completed'); diff --git a/recommondation-engine/example.py b/recommondation-engine/example.py deleted file mode 100644 index 4210bed..0000000 --- a/recommondation-engine/example.py +++ /dev/null @@ -1,272 +0,0 @@ -from flask import Flask, request, jsonify -from flask_cors import CORS -import pandas as pd -import numpy as np -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.metrics.pairwise import cosine_similarity -import mysql.connector as db_con - -# Flask app initialization -app = Flask(__name__) -CORS(app, resources={r"/*": {"origins": "*"}}, supports_credentials=True) - -# Database connection setup -def get_db_connection(): - return db_con.connect( - host="localhost", - port=3306, - user="root", - database="Marketplace" - ) - -# Fetch all products with category names -def get_all_products(): - query = """ - SELECT p.ProductID, p.Name, p.Description, c.Name AS Category - FROM Product p - JOIN Category c ON p.CategoryID = c.CategoryID - """ - - try: - connection = get_db_connection() - cursor = connection.cursor(dictionary=True) - cursor.execute(query) - products = cursor.fetchall() - cursor.close() - connection.close() - return products - except Exception as e: - print(f"Database error getting products: {e}") - return [] - -# Fetch user history -def get_user_history(user_id): - query = """ - SELECT p.ProductID, p.Name, p.Description, c.Name AS Category - FROM History h - JOIN Product p ON h.ProductID = p.ProductID - JOIN Category c ON p.CategoryID = c.CategoryID - WHERE h.UserID = %s - """ - try: - connection = get_db_connection() - cursor = connection.cursor(dictionary=True) - cursor.execute(query, (user_id,)) - history = cursor.fetchall() - cursor.close() - connection.close() - return history - except Exception as e: - print(f"Error getting user history: {e}") - return [] - -# Store recommendations -def store_user_recommendations(user_id, recommendations): - delete_query = "DELETE FROM Recommendation WHERE UserID = %s" - insert_query = "INSERT INTO Recommendation (UserID, RecommendedProductID) VALUES (%s, %s)" - - try: - connection = get_db_connection() - cursor = connection.cursor() - - # First delete existing recommendations - cursor.execute(delete_query, (user_id,)) - - # Then insert new recommendations - for product_id in recommendations: - cursor.execute(insert_query, (user_id, product_id)) - - connection.commit() - cursor.close() - connection.close() - return True - except Exception as e: - print(f"Error storing recommendations: {e}") - return False - -# Fetch stored recommendations -def get_stored_recommendations(user_id): - query = """ - SELECT p.ProductID, p.Name, p.Description, c.Name AS Category - FROM Recommendation r - JOIN Product p ON r.RecommendedProductID = p.ProductID - JOIN Category c ON p.CategoryID = c.CategoryID - WHERE r.UserID = %s - """ - try: - connection = get_db_connection() - cursor = connection.cursor(dictionary=True) - cursor.execute(query, (user_id,)) - recommendations = cursor.fetchall() - cursor.close() - connection.close() - return recommendations - except Exception as e: - print(f"Error getting stored recommendations: {e}") - return [] - -# Initialize Recommender class -class Recommender: - def __init__(self): - self.products_df = None - self.tfidf_matrix = None - self.tfidf_vectorizer = None - self.product_indices = None - - def load_products(self, products_data): - self.products_df = pd.DataFrame(products_data) - - # Combine relevant features for content-based filtering - self.products_df['content'] = ( - self.products_df['Category'] + ' ' + - self.products_df['Name'] + ' ' + - self.products_df['Description'].fillna('') - ) - - # Create TF-IDF matrix - self.tfidf_vectorizer = TfidfVectorizer( - stop_words='english', - max_features=5000, - ngram_range=(1, 2) - ) - self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.products_df['content']) - - # Map product IDs to indices for quick lookup - self.product_indices = pd.Series( - self.products_df.index, - index=self.products_df['ProductID'] - ).drop_duplicates() - - def recommend_products_for_user(self, user_id, top_n=40): - """ - Generate product recommendations based on user history using cosine similarity - """ - # Get user history - user_history = get_user_history(user_id) - - # If no history, return popular products - if not user_history: - # In a real system, you might return popular products here - return self.recommend_popular_products(top_n) - - # Convert user history to DataFrame - history_df = pd.DataFrame(user_history) - - # Get indices of products in user history - history_indices = [] - for product_id in history_df['ProductID']: - if product_id in self.product_indices: - history_indices.append(self.product_indices[product_id]) - - if not history_indices: - return self.recommend_popular_products(top_n) - - # Get TF-IDF vectors for user's history - user_profile = self.tfidf_matrix[history_indices].mean(axis=0).reshape(1, -1) - - # Calculate similarity scores - similarity_scores = cosine_similarity(user_profile, self.tfidf_matrix) - similarity_scores = similarity_scores.flatten() - - # Create a Series with product indices and similarity scores - product_scores = pd.Series(similarity_scores, index=self.products_df.index) - - # Remove products the user has already interacted with - product_scores = product_scores.drop(history_indices) - - # Sort by similarity score (highest first) - product_scores = product_scores.sort_values(ascending=False) - - # Get top N product indices - top_indices = product_scores.iloc[:top_n].index - - # Get product IDs for these indices - recommended_product_ids = self.products_df.iloc[top_indices]['ProductID'].tolist() - - return recommended_product_ids - - def recommend_popular_products(self, n=40): - """ - Fallback recommendation strategy when user has no history - In a real system, this would use actual popularity metrics - """ - # For now, just returning random products - return self.products_df.sample(min(n, len(self.products_df)))['ProductID'].tolist() - -# Create recommender instance -recommender = Recommender() - -@app.route('/load_products', methods=['GET']) -def load_products(): - products = get_all_products() - if not products: - return jsonify({'error': 'Failed to load products from database'}), 500 - - recommender.load_products(products) - return jsonify({'message': 'Products loaded successfully', 'count': len(products)}) - -@app.route('/recommend/', methods=['GET']) -def recommend(user_id): - # Check if products are loaded - if recommender.products_df is None: - products = get_all_products() - if not products: - return jsonify({'error': 'No products available'}), 500 - recommender.load_products(products) - - # Generate recommendations using cosine similarity - recommendations = recommender.recommend_products_for_user(user_id) - - # Store recommendations in database - if store_user_recommendations(user_id, recommendations): - return jsonify({ - 'userId': user_id, - 'recommendations': recommendations, - 'count': len(recommendations) - }) - else: - return jsonify({'error': 'Failed to store recommendations'}), 500 - -@app.route('/api/user/session', methods=['POST']) -def handle_session_data(): - try: - data = request.get_json() - print("Received data:", data) # Debug print - - user_id = data.get('userId') - email = data.get('email') - is_authenticated = data.get('isAuthenticated') - - if not user_id or not email or is_authenticated is None: - print("Missing required fields") # Debug print - return jsonify({'error': 'Invalid data'}), 400 - - print(f"Processing session data: User ID: {user_id}, Email: {email}, Authenticated: {is_authenticated}") - - # Test database connection first - try: - conn = get_db_connection() - conn.close() - print("Database connection successful") - except Exception as db_err: - print(f"Database connection error: {db_err}") - return jsonify({'error': f'Database connection error: {str(db_err)}'}), 500 - - # Continue with the rest of your code... - - except Exception as e: - import traceback - print(f"Error in handle_session_data: {e}") - print(traceback.format_exc()) # Print full stack trace - return jsonify({'error': f'Server error: {str(e)}'}), 500 - -if __name__ == '__main__': - # Load products on startup - products = get_all_products() - if products: - recommender.load_products(products) - print(f"Loaded {len(products)} products at startup") - else: - print("Warning: No products loaded at startup") - - app.run(debug=True, host='0.0.0.0', port=5000) diff --git a/recommondation-engine/example.sql b/recommondation-engine/example.sql deleted file mode 100644 index f7fccb7..0000000 --- a/recommondation-engine/example.sql +++ /dev/null @@ -1,5 +0,0 @@ -select * -from Product -Where ProductID in (select ProductID - from History - where UserID=1); diff --git a/recommondation-engine/example1.py b/recommondation-engine/example1.py index 5686e9a..9a8ad35 100644 --- a/recommondation-engine/example1.py +++ b/recommondation-engine/example1.py @@ -1,221 +1,113 @@ +# pip install mysql.connector -import pandas as pd -import numpy as np +import mysql.connector from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity +import numpy as np +import logging -''' -Recommender system using content-based filtering -''' -class Recommender: - def __init__(self): - # Initialize data structures - self.products_df = None - self.user_profiles = {} - self.tfidf_matrix = None - self.tfidf_vectorizer = None - self.product_indices = None - - def load_products(self, products_data): +def database(): + db_connection = mysql.connector.connect( + host = "localhost", + port = "3306", + user = "root", + database = "Marketplace" + ) + return db_connection + + +def get_all_products(): + + db_con = database() + cursor = db_con.cursor() + + cursor.execute("SELECT CategoryID FROM Category") + categories = cursor.fetchall() + + select_clause = "SELECT p.ProductID" + for category in categories: + category_id = category[0] + select_clause += f", MAX(CASE WHEN pc.CategoryID = {category_id} THEN 1 ELSE 0 END) AS `Cat_{category_id}`" + + final_query = f""" + {select_clause} + FROM Product p + LEFT JOIN Product_Category pc ON p.ProductID = pc.ProductID + LEFT JOIN Category c ON pc.CategoryID = c.CategoryID + GROUP BY p.ProductID; """ - Load product data into the recommender system - - products_data: list of dictionaries with product info (id, name, description, category, etc.) + + cursor.execute(final_query) + results = cursor.fetchall() + + final = [] + for row in results: + text_list = list(row) + text_list.pop(0) + final.append(text_list) + + cursor.close() + db_con.close() + return final + +def get_user_history(user_id): + db_con = database() + cursor = db_con.cursor() + + cursor.execute("SELECT CategoryID FROM Category") + categories = cursor.fetchall() + + select_clause = "SELECT p.ProductID" + for category in categories: + category_id = category[0] # get the uid of the catefory and then append that to the new column + select_clause += f", MAX(CASE WHEN pc.CategoryID = {category_id} THEN 1 ELSE 0 END) AS `Cat_{category_id}`" + + final_query = f""" + {select_clause} + FROM Product p + LEFT JOIN Product_Category pc ON p.ProductID = pc.ProductID + LEFT JOIN Category c ON pc.CategoryID = c.CategoryID + where p.ProductID in (select ProductID from History where UserID = {user_id}) + GROUP BY p.ProductID; """ - self.products_df = pd.DataFrame(products_data) - - # Create a text representation for each product (combining various features) - self.products_df['content'] = ( - self.products_df['category'] + ' ' + - self.products_df['name'] + ' ' + - self.products_df['description'] - ) - - # Initialize TF-IDF vectorizer to convert text to vectors - self.tfidf_vectorizer = TfidfVectorizer( - stop_words='english', - max_features=5000, # Limit features to avoid sparse matrices - ngram_range=(1, 2) # Use both unigrams and bigrams - ) - - # Compute TF-IDF matrix - self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.products_df['content']) - - # Create a mapping from product_id to index - self.product_indices = pd.Series( - self.products_df.index, - index=self.products_df['product_id'] - ).drop_duplicates() - - def track_user_click(self, user_id, product_id): - """ - Track user clicks on products to build user profiles - """ - if user_id not in self.user_profiles: - self.user_profiles[user_id] = { - 'clicks': {}, - 'category_weights': { - 'electronics': 0, - 'school supplies': 0, - 'rental place': 0, - 'furniture': 0 - } - } - - # Get the clicked product's index and details - if product_id in self.product_indices: - product_idx = self.product_indices[product_id] - product_category = self.products_df.iloc[product_idx]['category'] - - # Update click count - if product_id in self.user_profiles[user_id]['clicks']: - self.user_profiles[user_id]['clicks'][product_id] += 1 - else: - self.user_profiles[user_id]['clicks'][product_id] = 1 - - # Update category weight - self.user_profiles[user_id]['category_weights'][product_category] += 1 - - def get_user_profile_vector(self, user_id): - """ - Generate a user profile vector based on their click history - """ - if user_id not in self.user_profiles or not self.user_profiles[user_id]['clicks']: - # Return a zero vector if no click history - return np.zeros((1, self.tfidf_matrix.shape[1])) - - # Create a weighted average of all clicked products' TF-IDF vectors - clicked_product_vectors = [] - weights = [] - - for product_id, click_count in self.user_profiles[user_id]['clicks'].items(): - if product_id in self.product_indices: - product_idx = self.product_indices[product_id] - product_category = self.products_df.iloc[product_idx]['category'] - category_weight = self.user_profiles[user_id]['category_weights'][product_category] - - # Weight is based on both click count and category preference - weight = click_count * (1 + 0.5 * category_weight) - weights.append(weight) - clicked_product_vectors.append(self.tfidf_matrix[product_idx]) - - # Normalize weights - weights = np.array(weights) / np.sum(weights) - - # Compute weighted average - user_profile = np.zeros((1, self.tfidf_matrix.shape[1])) - for i, vector in enumerate(clicked_product_vectors): - user_profile += weights[i] * vector.toarray() - - return user_profile - - def recommend_products(self, user_id, n=5, category_filter=None): - """ - Recommend products to a user based on their profile - - user_id: ID of the user - n: Number of recommendations to return - category_filter: Optional filter to limit recommendations to a specific category - """ - # Get user profile vector - user_profile = self.get_user_profile_vector(user_id) - - # If user has no profile, recommend popular products (not implemented) - if np.sum(user_profile) == 0: - return self._get_popular_products(n, category_filter) - - # Calculate similarity scores - sim_scores = cosine_similarity(user_profile, self.tfidf_matrix) - sim_scores = sim_scores.flatten() - - # Create a DataFrame for easier filtering - recommendations_df = pd.DataFrame({ - 'product_id': self.products_df['product_id'], - 'score': sim_scores, - 'category': self.products_df['category'] - }) - - # Filter out products that the user has already clicked on - if user_id in self.user_profiles and self.user_profiles[user_id]['clicks']: - clicked_products = list(self.user_profiles[user_id]['clicks'].keys()) - recommendations_df = recommendations_df[~recommendations_df['product_id'].isin(clicked_products)] - - # Apply category filter if provided - if category_filter: - recommendations_df = recommendations_df[recommendations_df['category'] == category_filter] - - # Sort by similarity score and get top n recommendations - recommendations_df = recommendations_df.sort_values('score', ascending=False).head(n) - + + cursor.execute(final_query) + results = cursor.fetchall() + final = [] + for row in results: + text_list = list(row) + text_list.pop(0) + final.append(text_list) + + cursor.close() + db_con.close() + return final + + +def get_recommendations(user_id, top_n=40): + try: + # Get all products and user history with their category vectors + all_products = get_all_products() + user_history = get_user_history(user_id) + + # if not user_history: + # # Cold start: return popular products + # return get_popular_products(top_n) + + # Calculate similarity between all products and user history + user_profile = np.mean(user_history, axis=0) # Average user preferences + similarities = cosine_similarity([user_profile], all_products) + + # finds the indices of the top N products that have the highest + # cosine similarity with the user's profile and sorted from most similar to least similar. + product_indices = similarities[0].argsort()[-top_n:][::-1] + print("product", product_indices) + # Return recommended product IDs - return recommendations_df['product_id'].tolist() - - def _get_popular_products(self, n=5, category_filter=None): - """ - Return popular products when a user has no profile - (This would typically be implemented with actual popularity metrics) - """ - filtered_df = self.products_df - - if category_filter: - filtered_df = filtered_df[filtered_df['category'] == category_filter] - - # Just return random products for now (in a real system you'd use popularity metrics) - if len(filtered_df) >= n: - return filtered_df.sample(n)['product_id'].tolist() - else: - return filtered_df['product_id'].tolist() - - def recommend_by_category_preference(self, user_id, n=5): - """ - Recommend products based primarily on the user's category preferences - """ - if user_id not in self.user_profiles: - return self._get_popular_products(n) - - # Get the user's most clicked category - category_weights = self.user_profiles[user_id]['category_weights'] - - # If no category has been clicked, return popular products - if sum(category_weights.values()) == 0: - return self._get_popular_products(n) - - # Sort categories by number of clicks - sorted_categories = sorted( - category_weights.items(), - key=lambda x: x[1], - reverse=True - ) - - recommendations = [] - remaining = n - - # Allocate recommendations proportionally across categories - for category, weight in sorted_categories: - if weight > 0: - # Allocate recommendations proportionally to category weight - category_allocation = max(1, int(remaining * (weight / sum(category_weights.values())))) - if category_allocation > remaining: - category_allocation = remaining - - # Get recommendations for this category - category_recs = self.recommend_products(user_id, category_allocation, category) - recommendations.extend(category_recs) - - # Update remaining slots - remaining -= len(category_recs) - - if remaining <= 0: - break - - # If we still have slots to fill, add general recommendations - if remaining > 0: - general_recs = self.recommend_products(user_id, remaining) - # Filter out duplicates - general_recs = [rec for rec in general_recs if rec not in recommendations] - recommendations.extend(general_recs[:remaining]) - - return recommendations + return [all_products[i][0] for i in product_indices] # Product IDs + except Exception as e: + logging.error(f"Recommendation error for user {user_id}: {str(e)}") + # return get_popular_products(top_n) # Fallback to popular products -exported = Recommender() +get_recommendations(1) diff --git a/recommondation-engine/test.py b/recommondation-engine/test.py deleted file mode 100644 index 3107da7..0000000 --- a/recommondation-engine/test.py +++ /dev/null @@ -1,33 +0,0 @@ -import mysql.connector as db_con - -#TODO: Specify all the required queries -query_get_all_Prod= ("SELECT * FROM Product ") - - -#TODO: connect with the db -def database(): - db = db_con.connect( - host = "localhost", - port = 3306, - user = "root", - database = "Marketplace" - ) - - cursor = db.cursor() - cursor.execute(query_get_all_Prod) - - data = [None] - for item in cursor: - data.append(item) - # print(item) - - print(data[1]) - cursor.close() - db.close() - - - -#TODO: Get All products -# Make it into a dictionary with product id and the list of category it would have -# {Prod1:[1,0,0,0,1]} this could mean its a [elctronics, 0,0,0, kitchen] -database()