Cosine Sim Calc now Working properly

2025-04-03 11:59:25 -06:00
parent e7580c36f5
commit 99f12319d5
5 changed files with 152 additions and 887 deletions
--- a/mysql-code/example-data.sql
+++ b/mysql-code/example-data.sql
@@ -1,5 +1,4 @@
 -- Inserting sample data into the Marketplace database
-- Clear existing data (if needed)
 SET
    FOREIGN_KEY_CHECKS = 0;

@@ -59,33 +58,6 @@ VALUES
        'hashedpassword2',
        '555-234-5678',
        '456 Oak Ave, Calgary, AB'
-    ),
-    (
-        3,
-        'Michael Brown',
-        'michael.b@example.com',
-        'U345678',
-        'hashedpassword3',
-        '555-345-6789',
-        '789 Pine Rd, Calgary, AB'
-    ),
-    (
-        4,
-        'Sarah Wilson',
-        'sarah.w@example.com',
-        'U456789',
-        'hashedpassword4',
-        '555-456-7890',
-        '101 Elm Blvd, Calgary, AB'
-    ),
-    (
-        5,
-        'David Taylor',
-        'david.t@example.com',
-        'U567890',
-        'hashedpassword5',
-        '555-567-8901',
-        '202 Maple Dr, Calgary, AB'
    );

 -- Insert User Roles
@@ -93,10 +65,7 @@ INSERT INTO
    UserRole (UserID, Client, Admin)
 VALUES
    (1, TRUE, TRUE),
-    (2, TRUE, FALSE),
-    (3, TRUE, FALSE),
-    (4, TRUE, FALSE),
-    (5, TRUE, FALSE);
+    (2, TRUE, FALSE);

 -- Insert Categories
 INSERT INTO
@@ -161,7 +130,7 @@ VALUES
        'HP Laptop',
        699.99,
        1,
-        2,
+        1,
        '2023 HP Pavilion, 16GB RAM, 512GB SSD',
        2,
        '2024-10-10 14:30:00'
@@ -171,7 +140,7 @@ VALUES
        'Dorm Desk',
        120.00,
        1,
-        3,
+        2,
        'Sturdy desk perfect for studying, minor scratches',
        3,
        '2024-10-12 09:15:00'
@@ -181,7 +150,7 @@ VALUES
        'University Hoodie',
        35.00,
        3,
-        1,
+        2,
        'Size L, university logo, worn twice',
        4,
        '2024-10-14 16:45:00'
@@ -191,7 +160,7 @@ VALUES
        'Basketball',
        25.50,
        1,
-        4,
+        2,
        'Slightly used indoor basketball',
        5,
        '2024-10-11 11:20:00'
@@ -201,7 +170,7 @@ VALUES
        'Acoustic Guitar',
        175.00,
        1,
-        2,
+        1,
        'Beginner acoustic guitar with case',
        6,
        '2024-10-09 13:10:00'
@@ -211,7 +180,7 @@ VALUES
        'Physics Textbook',
        65.00,
        2,
-        5,
+        2,
        'University Physics 14th Edition, good condition',
        1,
        '2024-10-08 10:30:00'
@@ -221,7 +190,7 @@ VALUES
        'Mini Fridge',
        85.00,
        1,
-        3,
+        1,
        'Small dorm fridge, works perfectly',
        8,
        '2024-10-13 15:00:00'
@@ -231,7 +200,7 @@ VALUES
        'PlayStation 5 Controller',
        55.00,
        1,
-        4,
+        2,
        'Extra controller, barely used',
        9,
        '2024-10-07 17:20:00'
@@ -241,7 +210,7 @@ VALUES
        'Mountain Bike',
        350.00,
        1,
-        5,
+        1,
        'Trek mountain bike, great condition, new tires',
        10,
        '2024-10-06 14:00:00'
@@ -271,7 +240,7 @@ VALUES
        'Graphing Calculator',
        75.00,
        1,
-        3,
+        1,
        'TI-84 Plus, perfect working condition',
        12,
        '2024-10-03 11:15:00'
@@ -281,7 +250,7 @@ VALUES
        'Yoga Mat',
        20.00,
        1,
-        4,
+        2,
        'Thick yoga mat, barely used',
        13,
        '2024-10-02 16:00:00'
@@ -291,7 +260,7 @@ VALUES
        'Winter Jacket',
        120.00,
        1,
-        5,
+        1,
        'Columbia winter jacket, size XL, very warm',
        26,
        '2024-10-01 10:20:00'
@@ -301,7 +270,7 @@ VALUES
        'Computer Science Textbook',
        70.00,
        1,
-        1,
+        2,
        'Introduction to Algorithms, like new',
        1,
        '2024-09-30 14:30:00'
@@ -321,7 +290,7 @@ VALUES
        'Scientific Calculator',
        25.00,
        1,
-        3,
+        1,
        'Casio scientific calculator',
        12,
        '2024-09-28 11:30:00'
@@ -331,7 +300,7 @@ VALUES
        'Bluetooth Speaker',
        45.00,
        1,
-        4,
+        1,
        'JBL Bluetooth speaker, great sound',
        23,
        '2024-09-27 15:45:00'
@@ -341,7 +310,7 @@ VALUES
        'Backpack',
        40.00,
        1,
-        5,
+        2,
        'North Face backpack, lots of pockets',
        22,
        '2024-09-26 09:15:00'
@@ -360,7 +329,17 @@ VALUES
    ('/image1.avif', 7),
    ('/image1.avif', 8),
    ('/image1.avif', 9),
-    ('/image1.avif', 10);
+    ('/image1.avif', 10),
+    ('/image1.avif', 11),
+    ('/image1.avif', 12),
+    ('/image1.avif', 13),
+    ('/image1.avif', 14),
+    ('/image1.avif', 15),
+    ('/image1.avif', 16),
+    ('/image1.avif', 17),
+    ('/image1.avif', 18),
+    ('/image1.avif', 19),
+    ('/image1.avif', 20);

 -- Insert Product-Category relationships (products with multiple categories)
 INSERT INTO
@@ -420,82 +399,20 @@ VALUES

 -- Backpack: Backpacks & Bags, School Supplies, Dorm Essentials
 -- Insert History records
--
 INSERT INTO
-    History (HistoryID, UserID, ProductID, Date)
+    History (HistoryID, UserID, ProductID)
 VALUES
-    (1, 1, 1, '2024-10-15 11:30:00'),
-    (2, 1, 2, '2024-10-14 13:45:00'),
-    (3, 1, 5, '2024-10-13 09:20:00'),
-    (4, 1, 4, '2024-10-12 16:10:00');
-
--
-INSERT INTO
-    History (HistoryID, UserID, ProductID, Date)
-VALUES
-    (1, 2, 1, '2024-10-15 11:30:00'), -- User 2 viewed Calculus Textbook
-    (2, 3, 2, '2024-10-14 13:45:00'), -- User 3 viewed HP Laptop
-    (3, 4, 3, '2024-10-13 09:20:00'), -- User 4 viewed Dorm Desk
-    (4, 5, 4, '2024-10-12 16:10:00'), -- User 5 viewed University Hoodie
-    (5, 1, 5, '2024-10-11 14:30:00'), -- User 1 viewed Basketball
-    (6, 2, 6, '2024-10-10 10:15:00'), -- User 2 viewed Acoustic Guitar
-    (7, 3, 7, '2024-10-09 15:40:00'), -- User 3 viewed Physics Textbook
-    (8, 4, 8, '2024-10-08 11:25:00'), -- User 4 viewed Mini Fridge
-    (9, 5, 9, '2024-10-07 17:50:00'), -- User 5 viewed PS5 Controller
-    (10, 1, 10, '2024-10-06 14:15:00');
-
-- User 1 viewed Mountain Bike
-- Insert Reviews
-INSERT INTO
-    Review (
-        ReviewID,
-        UserID,
-        ProductID,
-        Comment,
-        Rating,
-        Date
-    )
-VALUES
-    (
-        1,
-        2,
-        1,
-        'Great condition, exactly as described!',
-        5,
-        '2024-10-16 09:30:00'
-    ),
-    (
-        2,
-        3,
-        2,
-        'Works well, but had a small scratch not mentioned in the listing.',
-        4,
-        '2024-10-15 14:20:00'
-    ),
-    (
-        3,
-        4,
-        6,
-        'Perfect for beginners, sounds great!',
-        5,
-        '2024-10-14 11:10:00'
-    ),
-    (
-        4,
-        5,
-        8,
-        'Keeps my drinks cold, but a bit noisy at night.',
-        3,
-        '2024-10-13 16:45:00'
-    ),
-    (
-        5,
-        1,
-        10,
-        'Excellent bike, well maintained!',
-        5,
-        '2024-10-12 13:25:00'
-    );
+    (1, 1, 1),
+    (2, 1, 3),
+    (3, 1, 5),
+    (4, 1, 7),
+    (5, 1, 9),
+    (6, 1, 11),
+    (7, 2, 2),
+    (8, 2, 4),
+    (9, 2, 5),
+    (10, 1, 15),
+    (11, 1, 18);

 -- Insert Favorites
 INSERT INTO
@@ -505,9 +422,9 @@ VALUES
    (1, 7), -- User 1 likes Physics Textbook
    (2, 3), -- User 2 likes Dorm Desk
    (2, 10), -- User 2 likes Mountain Bike
-    (3, 6), -- User 3 likes Acoustic Guitar
-    (4, 5), -- User 4 likes Basketball
-    (5, 8);
+    (1, 6), -- User 3 likes Acoustic Guitar
+    (1, 5), -- User 4 likes Basketball
+    (2, 8);

 -- User 5 likes Mini Fridge
 -- Insert Transactions
@@ -520,242 +437,8 @@ INSERT INTO
        PaymentStatus
    )
 VALUES
-    (1, 2, 1, '2024-10-16 10:30:00', 'Completed'),
-    (2, 3, 6, '2024-10-15 15:45:00', 'Completed'),
-    (3, 4, 8, '2024-10-14 12:20:00', 'Pending'),
-    (4, 5, 10, '2024-10-13 17:10:00', 'Completed'),
-    (5, 1, 4, '2024-10-12 14:30:00', 'Completed');
-
-- Insert Recommendations
-INSERT INTO
-    Recommendation (RecommendationID_PK, UserID, RecommendedProductID)
-VALUES
-    (1, 1, 7), -- Recommend Physics Textbook to User 1
-    (2, 1, 13), -- Recommend Graphing Calculator to User 1
-    (3, 2, 3), -- Recommend Dorm Desk to User 2
-    (4, 2, 17), -- Recommend Desk Lamp to User 2
-    (5, 3, 16), -- Recommend CS Textbook to User 3
-    (6, 4, 14), -- Recommend Yoga Mat to User 4
-    (7, 5, 15);
-
-INSERT INTO
-    Recommendation (RecommendationID_PK, UserID, RecommendedProductID)
-VALUES
-    (12, 1, 19),
-    (13, 1, 9),
-    (14, 1, 11),
-    (15, 1, 16),
-    -- Insert Authentication records
-INSERT INTO
-    AuthVerification (Email, VerificationCode, Authenticated, Date)
-VALUES
-    (
-        'john.doe@example.com',
-        '123456',
-        TRUE,
-        '2024-10-01 09:00:00'
-    ),
-    (
-        'jane.smith@example.com',
-        '234567',
-        TRUE,
-        '2024-10-02 10:15:00'
-    ),
-    (
-        'michael.b@example.com',
-        '345678',
-        TRUE,
-        '2024-10-03 11:30:00'
-    ),
-    (
-        'sarah.w@example.com',
-        '456789',
-        TRUE,
-        '2024-10-04 12:45:00'
-    ),
-    (
-        'david.t@example.com',
-        '567890',
-        TRUE,
-        '2024-10-05 14:00:00'
-    );
-
-INSERT INTO
-    Product (
-        ProductID,
-        Name,
-        Description,
-        Price,
-        StockQuantity,
-        CategoryID
-    )
-VALUES
-    (
-        101,
-        'Smart Coffee Maker',
-        'Wi-Fi enabled coffee machine with scheduling feature',
-        129.99,
-        50,
-        11
-    ),
-    (
-        102,
-        'Ergonomic Office Chair',
-        'Adjustable mesh chair with lumbar support',
-        199.99,
-        35,
-        12
-    ),
-    (
-        103,
-        'Wireless Mechanical Keyboard',
-        'RGB-backlit wireless keyboard with mechanical switches',
-        89.99,
-        60,
-        13
-    ),
-    (
-        104,
-        'Portable Solar Charger',
-        'Foldable solar power bank with USB-C support',
-        59.99,
-        40,
-        14
-    ),
-    (
-        105,
-        'Noise-Canceling Headphones',
-        'Over-ear Bluetooth headphones with ANC',
-        179.99,
-        25,
-        15
-    ),
-    (
-        106,
-        'Smart Water Bottle',
-        'Tracks water intake and glows as a hydration reminder',
-        39.99,
-        75,
-        11
-    ),
-    (
-        107,
-        'Compact Air Purifier',
-        'HEPA filter air purifier for small rooms',
-        149.99,
-        30,
-        16
-    ),
-    (
-        108,
-        'Smart LED Desk Lamp',
-        'Adjustable LED lamp with voice control',
-        69.99,
-        45,
-        12
-    ),
-    (
-        109,
-        '4K Streaming Device',
-        'HDMI streaming stick with voice remote',
-        49.99,
-        80,
-        17
-    ),
-    (
-        110,
-        'Smart Plant Monitor',
-        'Bluetooth-enabled sensor for plant health tracking',
-        34.99,
-        55,
-        18
-    ),
-    (
-        111,
-        'Wireless Charging Pad',
-        'Fast-charging pad for Qi-compatible devices',
-        29.99,
-        90,
-        13
-    ),
-    (
-        112,
-        'Mini Projector',
-        'Portable projector with built-in speakers',
-        129.99,
-        20,
-        14
-    ),
-    (
-        113,
-        'Foldable Bluetooth Keyboard',
-        'Ultra-thin keyboard for travel use',
-        39.99,
-        70,
-        19
-    ),
-    (
-        114,
-        'Smart Alarm Clock',
-        'AI-powered alarm clock with sunrise simulation',
-        79.99,
-        40,
-        15
-    ),
-    (
-        115,
-        'Touchscreen Toaster',
-        'Customizable toaster with a digital display',
-        99.99,
-        30,
-        11
-    ),
-    (
-        116,
-        'Cordless Vacuum Cleaner',
-        'Lightweight handheld vacuum with strong suction',
-        159.99,
-        25,
-        16
-    ),
-    (
-        117,
-        'Smart Bike Lock',
-        'Fingerprint and app-controlled bike security lock',
-        89.99,
-        35,
-        20
-    ),
-    (
-        118,
-        'Bluetooth Sleep Headband',
-        'Comfortable sleep headband with built-in speakers',
-        49.99,
-        60,
-        18
-    ),
-    (
-        119,
-        'Retro Game Console',
-        'Plug-and-play console with 500+ classic games',
-        79.99,
-        50,
-        17
-    ),
-    (
-        120,
-        'Automatic Pet Feeder',
-        'App-controlled food dispenser for pets',
-        99.99,
-        40,
-        20
-    );
-
-SELECT
-    p.*,
-    i.URL AS image_url
-FROM
-    Product p
-    LEFT JOIN Image_URL i ON p.ProductID = i.ProductID
-WHERE
-    p.ProductID = 1
+    (1, 1, 1, '2024-10-16 10:30:00', 'Completed'),
+    (2, 1, 6, '2024-10-15 15:45:00', 'Completed'),
+    (3, 1, 8, '2024-10-14 12:20:00', 'Pending'),
+    (4, 2, 10, '2024-10-13 17:10:00', 'Completed'),
+    (5, 2, 4, '2024-10-12 14:30:00', 'Completed');
--- a/recommondation-engine/example.py
+++ b/recommondation-engine/example.py
@@ -1,272 +0,0 @@
-from flask import Flask, request, jsonify
-from flask_cors import CORS
-import pandas as pd
-import numpy as np
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.metrics.pairwise import cosine_similarity
-import mysql.connector as db_con
-
-# Flask app initialization
-app = Flask(__name__)
-CORS(app, resources={r"/*": {"origins": "*"}}, supports_credentials=True)
-
-# Database connection setup
-def get_db_connection():
-    return db_con.connect(
-        host="localhost",
-        port=3306,
-        user="root",
-        database="Marketplace"
-    )
-
-# Fetch all products with category names
-def get_all_products():
-    query = """
-        SELECT p.ProductID, p.Name, p.Description, c.Name AS Category
-        FROM Product p
-        JOIN Category c ON p.CategoryID = c.CategoryID
-    """
-
-    try:
-        connection = get_db_connection()
-        cursor = connection.cursor(dictionary=True)
-        cursor.execute(query)
-        products = cursor.fetchall()
-        cursor.close()
-        connection.close()
-        return products
-    except Exception as e:
-        print(f"Database error getting products: {e}")
-        return []
-
-# Fetch user history
-def get_user_history(user_id):
-    query = """
-        SELECT p.ProductID, p.Name, p.Description, c.Name AS Category
-        FROM History h
-        JOIN Product p ON h.ProductID = p.ProductID
-        JOIN Category c ON p.CategoryID = c.CategoryID
-        WHERE h.UserID = %s
-    """
-    try:
-        connection = get_db_connection()
-        cursor = connection.cursor(dictionary=True)
-        cursor.execute(query, (user_id,))
-        history = cursor.fetchall()
-        cursor.close()
-        connection.close()
-        return history
-    except Exception as e:
-        print(f"Error getting user history: {e}")
-        return []
-
-# Store recommendations
-def store_user_recommendations(user_id, recommendations):
-    delete_query = "DELETE FROM Recommendation WHERE UserID = %s"
-    insert_query = "INSERT INTO Recommendation (UserID, RecommendedProductID) VALUES (%s, %s)"
-
-    try:
-        connection = get_db_connection()
-        cursor = connection.cursor()
-
-        # First delete existing recommendations
-        cursor.execute(delete_query, (user_id,))
-
-        # Then insert new recommendations
-        for product_id in recommendations:
-            cursor.execute(insert_query, (user_id, product_id))
-
-        connection.commit()
-        cursor.close()
-        connection.close()
-        return True
-    except Exception as e:
-        print(f"Error storing recommendations: {e}")
-        return False
-
-# Fetch stored recommendations
-def get_stored_recommendations(user_id):
-    query = """
-        SELECT p.ProductID, p.Name, p.Description, c.Name AS Category
-        FROM Recommendation r
-        JOIN Product p ON r.RecommendedProductID = p.ProductID
-        JOIN Category c ON p.CategoryID = c.CategoryID
-        WHERE r.UserID = %s
-    """
-    try:
-        connection = get_db_connection()
-        cursor = connection.cursor(dictionary=True)
-        cursor.execute(query, (user_id,))
-        recommendations = cursor.fetchall()
-        cursor.close()
-        connection.close()
-        return recommendations
-    except Exception as e:
-        print(f"Error getting stored recommendations: {e}")
-        return []
-
-# Initialize Recommender class
-class Recommender:
-    def __init__(self):
-        self.products_df = None
-        self.tfidf_matrix = None
-        self.tfidf_vectorizer = None
-        self.product_indices = None
-
-    def load_products(self, products_data):
-        self.products_df = pd.DataFrame(products_data)
-
-        # Combine relevant features for content-based filtering
-        self.products_df['content'] = (
-            self.products_df['Category'] + ' ' +
-            self.products_df['Name'] + ' ' +
-            self.products_df['Description'].fillna('')
-        )
-
-        # Create TF-IDF matrix
-        self.tfidf_vectorizer = TfidfVectorizer(
-            stop_words='english',
-            max_features=5000,
-            ngram_range=(1, 2)
-        )
-        self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.products_df['content'])
-
-        # Map product IDs to indices for quick lookup
-        self.product_indices = pd.Series(
-            self.products_df.index,
-            index=self.products_df['ProductID']
-        ).drop_duplicates()
-
-    def recommend_products_for_user(self, user_id, top_n=40):
-        """
-        Generate product recommendations based on user history using cosine similarity
-        """
-        # Get user history
-        user_history = get_user_history(user_id)
-
-        # If no history, return popular products
-        if not user_history:
-            # In a real system, you might return popular products here
-            return self.recommend_popular_products(top_n)
-
-        # Convert user history to DataFrame
-        history_df = pd.DataFrame(user_history)
-
-        # Get indices of products in user history
-        history_indices = []
-        for product_id in history_df['ProductID']:
-            if product_id in self.product_indices:
-                history_indices.append(self.product_indices[product_id])
-
-        if not history_indices:
-            return self.recommend_popular_products(top_n)
-
-        # Get TF-IDF vectors for user's history
-        user_profile = self.tfidf_matrix[history_indices].mean(axis=0).reshape(1, -1)
-
-        # Calculate similarity scores
-        similarity_scores = cosine_similarity(user_profile, self.tfidf_matrix)
-        similarity_scores = similarity_scores.flatten()
-
-        # Create a Series with product indices and similarity scores
-        product_scores = pd.Series(similarity_scores, index=self.products_df.index)
-
-        # Remove products the user has already interacted with
-        product_scores = product_scores.drop(history_indices)
-
-        # Sort by similarity score (highest first)
-        product_scores = product_scores.sort_values(ascending=False)
-
-        # Get top N product indices
-        top_indices = product_scores.iloc[:top_n].index
-
-        # Get product IDs for these indices
-        recommended_product_ids = self.products_df.iloc[top_indices]['ProductID'].tolist()
-
-        return recommended_product_ids
-
-    def recommend_popular_products(self, n=40):
-        """
-        Fallback recommendation strategy when user has no history
-        In a real system, this would use actual popularity metrics
-        """
-        # For now, just returning random products
-        return self.products_df.sample(min(n, len(self.products_df)))['ProductID'].tolist()
-
-# Create recommender instance
-recommender = Recommender()
-
-@app.route('/load_products', methods=['GET'])
-def load_products():
-    products = get_all_products()
-    if not products:
-        return jsonify({'error': 'Failed to load products from database'}), 500
-
-    recommender.load_products(products)
-    return jsonify({'message': 'Products loaded successfully', 'count': len(products)})
-
-@app.route('/recommend/<int:user_id>', methods=['GET'])
-def recommend(user_id):
-    # Check if products are loaded
-    if recommender.products_df is None:
-        products = get_all_products()
-        if not products:
-            return jsonify({'error': 'No products available'}), 500
-        recommender.load_products(products)
-
-    # Generate recommendations using cosine similarity
-    recommendations = recommender.recommend_products_for_user(user_id)
-
-    # Store recommendations in database
-    if store_user_recommendations(user_id, recommendations):
-        return jsonify({
-            'userId': user_id,
-            'recommendations': recommendations,
-            'count': len(recommendations)
-        })
-    else:
-        return jsonify({'error': 'Failed to store recommendations'}), 500
-
-@app.route('/api/user/session', methods=['POST'])
-def handle_session_data():
-    try:
-        data = request.get_json()
-        print("Received data:", data)  # Debug print
-
-        user_id = data.get('userId')
-        email = data.get('email')
-        is_authenticated = data.get('isAuthenticated')
-
-        if not user_id or not email or is_authenticated is None:
-            print("Missing required fields")  # Debug print
-            return jsonify({'error': 'Invalid data'}), 400
-
-        print(f"Processing session data: User ID: {user_id}, Email: {email}, Authenticated: {is_authenticated}")
-
-        # Test database connection first
-        try:
-            conn = get_db_connection()
-            conn.close()
-            print("Database connection successful")
-        except Exception as db_err:
-            print(f"Database connection error: {db_err}")
-            return jsonify({'error': f'Database connection error: {str(db_err)}'}), 500
-
-        # Continue with the rest of your code...
-
-    except Exception as e:
-        import traceback
-        print(f"Error in handle_session_data: {e}")
-        print(traceback.format_exc())  # Print full stack trace
-        return jsonify({'error': f'Server error: {str(e)}'}), 500
-
-if __name__ == '__main__':
-    # Load products on startup
-    products = get_all_products()
-    if products:
-        recommender.load_products(products)
-        print(f"Loaded {len(products)} products at startup")
-    else:
-        print("Warning: No products loaded at startup")
-
-    app.run(debug=True, host='0.0.0.0', port=5000)
--- a/recommondation-engine/example.sql
+++ b/recommondation-engine/example.sql
@@ -1,5 +0,0 @@
-select *
-from Product 
-Where ProductID in (select ProductID
-                    from History
-                    where UserID=1);
--- a/recommondation-engine/example1.py
+++ b/recommondation-engine/example1.py
@@ -1,221 +1,113 @@
+# pip install mysql.connector

-import pandas as pd
-import numpy as np
+import mysql.connector
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
+import logging

-'''
-Recommender system using content-based filtering
-'''
-class Recommender:
-    def __init__(self):
-        # Initialize data structures
-        self.products_df = None
-        self.user_profiles = {}
-        self.tfidf_matrix = None
-        self.tfidf_vectorizer = None
-        self.product_indices = None
-        
-    def load_products(self, products_data):
-        """
-        Load product data into the recommender system
-        
-        products_data: list of dictionaries with product info (id, name, description, category, etc.)
-        """
-        self.products_df = pd.DataFrame(products_data)
-        
-        # Create a text representation for each product (combining various features)
-        self.products_df['content'] = (
-            self.products_df['category'] + ' ' + 
-            self.products_df['name'] + ' ' + 
-            self.products_df['description']
+def database():
+    db_connection = mysql.connector.connect(
+        host = "localhost",
+        port = "3306",
+        user = "root",
+        database = "Marketplace"
    )
+    return db_connection

-        # Initialize TF-IDF vectorizer to convert text to vectors
-        self.tfidf_vectorizer = TfidfVectorizer(
-            stop_words='english',
-            max_features=5000,  # Limit features to avoid sparse matrices
-            ngram_range=(1, 2)  # Use both unigrams and bigrams
-        )

-        # Compute TF-IDF matrix
-        self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.products_df['content'])
+def get_all_products():

-        # Create a mapping from product_id to index
-        self.product_indices = pd.Series(
-            self.products_df.index, 
-            index=self.products_df['product_id']
-        ).drop_duplicates()
+    db_con = database()
+    cursor = db_con.cursor()

-    def track_user_click(self, user_id, product_id):
+    cursor.execute("SELECT CategoryID FROM Category")
+    categories = cursor.fetchall()
+
+    select_clause = "SELECT p.ProductID"
+    for category in categories:
+        category_id = category[0]
+        select_clause += f", MAX(CASE WHEN pc.CategoryID = {category_id} THEN 1 ELSE 0 END) AS `Cat_{category_id}`"
+
+    final_query = f"""
+        {select_clause}
+        FROM Product p
+        LEFT JOIN Product_Category pc ON p.ProductID = pc.ProductID
+        LEFT JOIN Category c ON pc.CategoryID = c.CategoryID
+        GROUP BY p.ProductID;
        """
-        Track user clicks on products to build user profiles
+
+    cursor.execute(final_query)
+    results = cursor.fetchall()
+
+    final = []
+    for row in results:
+        text_list = list(row)
+        text_list.pop(0)
+        final.append(text_list)
+
+    cursor.close()
+    db_con.close()
+    return final
+
+def get_user_history(user_id):
+    db_con = database()
+    cursor = db_con.cursor()
+
+    cursor.execute("SELECT CategoryID FROM Category")
+    categories = cursor.fetchall()
+
+    select_clause = "SELECT p.ProductID"
+    for category in categories:
+        category_id = category[0] # get the uid of the catefory and then append that to the new column
+        select_clause += f", MAX(CASE WHEN pc.CategoryID = {category_id} THEN 1 ELSE 0 END) AS `Cat_{category_id}`"
+
+    final_query = f"""
+        {select_clause}
+        FROM Product p
+        LEFT JOIN Product_Category pc ON p.ProductID = pc.ProductID
+        LEFT JOIN Category c ON pc.CategoryID = c.CategoryID
+        where p.ProductID in (select ProductID from History where UserID = {user_id})
+        GROUP BY p.ProductID;
        """
-        if user_id not in self.user_profiles:
-            self.user_profiles[user_id] = {
-                'clicks': {},
-                'category_weights': {
-                    'electronics': 0,
-                    'school supplies': 0,
-                    'rental place': 0,
-                    'furniture': 0
-                }
-            }

-        # Get the clicked product's index and details
-        if product_id in self.product_indices:
-            product_idx = self.product_indices[product_id]
-            product_category = self.products_df.iloc[product_idx]['category']
+    cursor.execute(final_query)
+    results = cursor.fetchall()
+    final = []
+    for row in results:
+        text_list = list(row)
+        text_list.pop(0)
+        final.append(text_list)

-            # Update click count
-            if product_id in self.user_profiles[user_id]['clicks']:
-                self.user_profiles[user_id]['clicks'][product_id] += 1
-            else:
-                self.user_profiles[user_id]['clicks'][product_id] = 1
+    cursor.close()
+    db_con.close()
+    return final

-            # Update category weight
-            self.user_profiles[user_id]['category_weights'][product_category] += 1

-    def get_user_profile_vector(self, user_id):
-        """
-        Generate a user profile vector based on their click history
-        """
-        if user_id not in self.user_profiles or not self.user_profiles[user_id]['clicks']:
-            # Return a zero vector if no click history
-            return np.zeros((1, self.tfidf_matrix.shape[1]))
+def get_recommendations(user_id, top_n=40):
+    try:
+        # Get all products and user history with their category vectors
+        all_products = get_all_products()
+        user_history = get_user_history(user_id)

-        # Create a weighted average of all clicked products' TF-IDF vectors
-        clicked_product_vectors = []
-        weights = []
+        # if not user_history:
+        #     # Cold start: return popular products
+        #     return get_popular_products(top_n)

-        for product_id, click_count in self.user_profiles[user_id]['clicks'].items():
-            if product_id in self.product_indices:
-                product_idx = self.product_indices[product_id]
-                product_category = self.products_df.iloc[product_idx]['category']
-                category_weight = self.user_profiles[user_id]['category_weights'][product_category]
+        # Calculate similarity between all products and user history
+        user_profile = np.mean(user_history, axis=0)  # Average user preferences
+        similarities = cosine_similarity([user_profile], all_products)

-                # Weight is based on both click count and category preference
-                weight = click_count * (1 + 0.5 * category_weight)
-                weights.append(weight)
-                clicked_product_vectors.append(self.tfidf_matrix[product_idx])
-        
-        # Normalize weights
-        weights = np.array(weights) / np.sum(weights)
-        
-        # Compute weighted average
-        user_profile = np.zeros((1, self.tfidf_matrix.shape[1]))
-        for i, vector in enumerate(clicked_product_vectors):
-            user_profile += weights[i] * vector.toarray()
-            
-        return user_profile
-    
-    def recommend_products(self, user_id, n=5, category_filter=None):
-        """
-        Recommend products to a user based on their profile
-        
-        user_id: ID of the user
-        n: Number of recommendations to return
-        category_filter: Optional filter to limit recommendations to a specific category
-        """
-        # Get user profile vector
-        user_profile = self.get_user_profile_vector(user_id)
-        
-        # If user has no profile, recommend popular products (not implemented)
-        if np.sum(user_profile) == 0:
-            return self._get_popular_products(n, category_filter)
-        
-        # Calculate similarity scores
-        sim_scores = cosine_similarity(user_profile, self.tfidf_matrix)
-        sim_scores = sim_scores.flatten()
-        
-        # Create a DataFrame for easier filtering
-        recommendations_df = pd.DataFrame({
-            'product_id': self.products_df['product_id'],
-            'score': sim_scores,
-            'category': self.products_df['category']
-        })
-        
-        # Filter out products that the user has already clicked on
-        if user_id in self.user_profiles and self.user_profiles[user_id]['clicks']:
-            clicked_products = list(self.user_profiles[user_id]['clicks'].keys())
-            recommendations_df = recommendations_df[~recommendations_df['product_id'].isin(clicked_products)]
-        
-        # Apply category filter if provided
-        if category_filter:
-            recommendations_df = recommendations_df[recommendations_df['category'] == category_filter]
-        
-        # Sort by similarity score and get top n recommendations
-        recommendations_df = recommendations_df.sort_values('score', ascending=False).head(n)
+        # finds the indices of the top N products that have the highest
+        # cosine similarity with the user's profile and sorted from most similar to least similar.
+        product_indices = similarities[0].argsort()[-top_n:][::-1]
+        print("product", product_indices)

        # Return recommended product IDs
-        return recommendations_df['product_id'].tolist()
-    
-    def _get_popular_products(self, n=5, category_filter=None):
-        """
-        Return popular products when a user has no profile
-        (This would typically be implemented with actual popularity metrics)
-        """
-        filtered_df = self.products_df
-        
-        if category_filter:
-            filtered_df = filtered_df[filtered_df['category'] == category_filter]
-        
-        # Just return random products for now (in a real system you'd use popularity metrics)
-        if len(filtered_df) >= n:
-            return filtered_df.sample(n)['product_id'].tolist()
-        else:
-            return filtered_df['product_id'].tolist()
-
-    def recommend_by_category_preference(self, user_id, n=5):
-        """
-        Recommend products based primarily on the user's category preferences
-        """
-        if user_id not in self.user_profiles:
-            return self._get_popular_products(n)
-        
-        # Get the user's most clicked category
-        category_weights = self.user_profiles[user_id]['category_weights']
-        
-        # If no category has been clicked, return popular products
-        if sum(category_weights.values()) == 0:
-            return self._get_popular_products(n)
-        
-        # Sort categories by number of clicks
-        sorted_categories = sorted(
-            category_weights.items(),
-            key=lambda x: x[1],
-            reverse=True
-        )
-        
-        recommendations = []
-        remaining = n
-        
-        # Allocate recommendations proportionally across categories
-        for category, weight in sorted_categories:
-            if weight > 0:
-                # Allocate recommendations proportionally to category weight
-                category_allocation = max(1, int(remaining * (weight / sum(category_weights.values()))))
-                if category_allocation > remaining:
-                    category_allocation = remaining
-                
-                # Get recommendations for this category
-                category_recs = self.recommend_products(user_id, category_allocation, category)
-                recommendations.extend(category_recs)
-                
-                # Update remaining slots
-                remaining -= len(category_recs)
-                
-                if remaining <= 0:
-                    break
-        
-        # If we still have slots to fill, add general recommendations
-        if remaining > 0:
-            general_recs = self.recommend_products(user_id, remaining)
-            # Filter out duplicates
-            general_recs = [rec for rec in general_recs if rec not in recommendations]
-            recommendations.extend(general_recs[:remaining])
-        
-        return recommendations
+        return [all_products[i][0] for i in product_indices]  # Product IDs
+    except Exception as e:
+        logging.error(f"Recommendation error for user {user_id}: {str(e)}")
+        # return get_popular_products(top_n)  # Fallback to popular products


-exported = Recommender()
+get_recommendations(1)
--- a/recommondation-engine/test.py
+++ b/recommondation-engine/test.py
@@ -1,33 +0,0 @@
-import mysql.connector as db_con
-
-#TODO: Specify all the required queries
-query_get_all_Prod= ("SELECT * FROM Product ")
-
-
-#TODO: connect with the db
-def database():
-    db = db_con.connect(
-        host = "localhost",
-        port = 3306,
-        user = "root",
-        database = "Marketplace"
-    )
-
-    cursor = db.cursor()
-    cursor.execute(query_get_all_Prod)
-
-    data = [None]
-    for item in cursor:
-        data.append(item)
-        # print(item)
-
-    print(data[1])
-    cursor.close()
-    db.close()
-
-
-
-#TODO: Get All products
-# Make it into a dictionary with product id and the list of category it would have
-#  {Prod1:[1,0,0,0,1]} this could mean its a [elctronics, 0,0,0, kitchen]
-database()