update

2025-04-02 19:53:42 -06:00
parent a1ca7304eb
commit e7580c36f5
4 changed files with 473 additions and 78 deletions
--- a/frontend/src/App.jsx
+++ b/frontend/src/App.jsx
@@ -298,10 +298,7 @@ function App() {
          // Save to localStorage to persist across refreshes
          sessionStorage.setItem("isAuthenticated", "true");
          sessionStorage.setItem("user", JSON.stringify(userObj));
-
-          // After successful signup, send session data to server
          sendSessionDataToServer(); // Call it after signup
-
          sessionStorage.getItem("user");

          console.log("Login successful for:", userData.email);
@@ -388,7 +385,7 @@ function App() {
      console.log("Sending user data to the server:", requestData);

      // Send data to Python server (replace with your actual server URL)
-      const response = await fetch("http://localhost:5000/api/user/session", {
+      const response = await fetch("http://0.0.0.0:5000/api/user/session", {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
--- a/recommondation-engine/pycache/server.cpython-313.pyc
+++ b/recommondation-engine/pycache/server.cpython-313.pyc
--- a/recommondation-engine/example.py
+++ b/recommondation-engine/example.py
@@ -1,95 +1,272 @@
-# pip install mysql.connector
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+import pandas as pd
+import numpy as np
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import mysql.connector as db_con

-import mysql.connector
+# Flask app initialization
+app = Flask(__name__)
+CORS(app, resources={r"/*": {"origins": "*"}}, supports_credentials=True)

-def database():
-    db_connection = mysql.connector.connect(
-        host = "localhost",
-        port = "3306",
-        user = "root",
-        database = "Marketplace"
+# Database connection setup
+def get_db_connection():
+    return db_con.connect(
+        host="localhost",
+        port=3306,
+        user="root",
+        database="Marketplace"
    )
-    return db_connection
-

+# Fetch all products with category names
 def get_all_products():
-
-    db_con = database()
-    cursor = db_con.cursor()
-    
-    # query the category Table for everything
-    cursor.execute("SELECT CategoryID FROM Category")
-    categories = cursor.fetchall()
-
-    select_clause = "SELECT p.ProductID"
-    for category in categories:
-        category_id = category[0] # get the uid of the catefory and then append that to the new column
-        select_clause += f", MAX(CASE WHEN pc.CategoryID = {category_id} THEN 1 ELSE 0 END) AS `Cat_{category_id}`"
-
-    final_query = f"""
-        {select_clause}
+    query = """
+        SELECT p.ProductID, p.Name, p.Description, c.Name AS Category
        FROM Product p
-        LEFT JOIN Product_Category pc ON p.ProductID = pc.ProductID
-        LEFT JOIN Category c ON pc.CategoryID = c.CategoryID
-        GROUP BY p.ProductID;
-        """
+        JOIN Category c ON p.CategoryID = c.CategoryID
+    """

-    cursor.execute(final_query)
-    results = cursor.fetchall()
-
-    print(results[1])
-    final = []
-    for row in results:
-        final.append(row)
-        print(row)
-    
-    cursor.close()
-    db_con.close()
-    return final    
+    try:
+        connection = get_db_connection()
+        cursor = connection.cursor(dictionary=True)
+        cursor.execute(query)
+        products = cursor.fetchall()
+        cursor.close()
+        connection.close()
+        return products
+    except Exception as e:
+        print(f"Database error getting products: {e}")
+        return []

+# Fetch user history
 def get_user_history(user_id):
-    db_con = database()
-    cursor = db_con.cursor()
+    query = """
+        SELECT p.ProductID, p.Name, p.Description, c.Name AS Category
+        FROM History h
+        JOIN Product p ON h.ProductID = p.ProductID
+        JOIN Category c ON p.CategoryID = c.CategoryID
+        WHERE h.UserID = %s
+    """
+    try:
+        connection = get_db_connection()
+        cursor = connection.cursor(dictionary=True)
+        cursor.execute(query, (user_id,))
+        history = cursor.fetchall()
+        cursor.close()
+        connection.close()
+        return history
+    except Exception as e:
+        print(f"Error getting user history: {e}")
+        return []

-    cursor.execute("SELECT CategoryID FROM Category")
-    categories = cursor.fetchall()
+# Store recommendations
+def store_user_recommendations(user_id, recommendations):
+    delete_query = "DELETE FROM Recommendation WHERE UserID = %s"
+    insert_query = "INSERT INTO Recommendation (UserID, RecommendedProductID) VALUES (%s, %s)"

-    select_clause = "SELECT p.ProductID"
-    for category in categories:
-        category_id = category[0] # get the uid of the catefory and then append that to the new column
-        select_clause += f", MAX(CASE WHEN pc.CategoryID = {category_id} THEN 1 ELSE 0 END) AS `Cat_{category_id}`"
+    try:
+        connection = get_db_connection()
+        cursor = connection.cursor()

-    final_query = f"""
-        {select_clause}
-        FROM Product p
-        LEFT JOIN Product_Category pc ON p.ProductID = pc.ProductID
-        LEFT JOIN Category c ON pc.CategoryID = c.CategoryID
-        where p.ProductID in (select ProductID from History where UserID = {user_id})
-        GROUP BY p.ProductID;
+        # First delete existing recommendations
+        cursor.execute(delete_query, (user_id,))
+
+        # Then insert new recommendations
+        for product_id in recommendations:
+            cursor.execute(insert_query, (user_id, product_id))
+
+        connection.commit()
+        cursor.close()
+        connection.close()
+        return True
+    except Exception as e:
+        print(f"Error storing recommendations: {e}")
+        return False
+
+# Fetch stored recommendations
+def get_stored_recommendations(user_id):
+    query = """
+        SELECT p.ProductID, p.Name, p.Description, c.Name AS Category
+        FROM Recommendation r
+        JOIN Product p ON r.RecommendedProductID = p.ProductID
+        JOIN Category c ON p.CategoryID = c.CategoryID
+        WHERE r.UserID = %s
+    """
+    try:
+        connection = get_db_connection()
+        cursor = connection.cursor(dictionary=True)
+        cursor.execute(query, (user_id,))
+        recommendations = cursor.fetchall()
+        cursor.close()
+        connection.close()
+        return recommendations
+    except Exception as e:
+        print(f"Error getting stored recommendations: {e}")
+        return []
+
+# Initialize Recommender class
+class Recommender:
+    def __init__(self):
+        self.products_df = None
+        self.tfidf_matrix = None
+        self.tfidf_vectorizer = None
+        self.product_indices = None
+
+    def load_products(self, products_data):
+        self.products_df = pd.DataFrame(products_data)
+
+        # Combine relevant features for content-based filtering
+        self.products_df['content'] = (
+            self.products_df['Category'] + ' ' +
+            self.products_df['Name'] + ' ' +
+            self.products_df['Description'].fillna('')
+        )
+
+        # Create TF-IDF matrix
+        self.tfidf_vectorizer = TfidfVectorizer(
+            stop_words='english',
+            max_features=5000,
+            ngram_range=(1, 2)
+        )
+        self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.products_df['content'])
+
+        # Map product IDs to indices for quick lookup
+        self.product_indices = pd.Series(
+            self.products_df.index,
+            index=self.products_df['ProductID']
+        ).drop_duplicates()
+
+    def recommend_products_for_user(self, user_id, top_n=40):
        """
+        Generate product recommendations based on user history using cosine similarity
+        """
+        # Get user history
+        user_history = get_user_history(user_id)

-    cursor.execute(final_query)
-    results = cursor.fetchall()
+        # If no history, return popular products
+        if not user_history:
+            # In a real system, you might return popular products here
+            return self.recommend_popular_products(top_n)

-    print(results[1])
-    final = []
-    for row in results:
-        final.append(row)
-        print(row)
+        # Convert user history to DataFrame
+        history_df = pd.DataFrame(user_history)

-    cursor.close()
-    db_con.close()
-    return final    
+        # Get indices of products in user history
+        history_indices = []
+        for product_id in history_df['ProductID']:
+            if product_id in self.product_indices:
+                history_indices.append(self.product_indices[product_id])

-print("all products:")
-get_all_products()
+        if not history_indices:
+            return self.recommend_popular_products(top_n)

-print("User History products:")
-get_user_history(1)
+        # Get TF-IDF vectors for user's history
+        user_profile = self.tfidf_matrix[history_indices].mean(axis=0).reshape(1, -1)

+        # Calculate similarity scores
+        similarity_scores = cosine_similarity(user_profile, self.tfidf_matrix)
+        similarity_scores = similarity_scores.flatten()

-def Calculate_cosin_similarity():
-    pass
+        # Create a Series with product indices and similarity scores
+        product_scores = pd.Series(similarity_scores, index=self.products_df.index)

-def Main:
-    pass
+        # Remove products the user has already interacted with
+        product_scores = product_scores.drop(history_indices)
+
+        # Sort by similarity score (highest first)
+        product_scores = product_scores.sort_values(ascending=False)
+
+        # Get top N product indices
+        top_indices = product_scores.iloc[:top_n].index
+
+        # Get product IDs for these indices
+        recommended_product_ids = self.products_df.iloc[top_indices]['ProductID'].tolist()
+
+        return recommended_product_ids
+
+    def recommend_popular_products(self, n=40):
+        """
+        Fallback recommendation strategy when user has no history
+        In a real system, this would use actual popularity metrics
+        """
+        # For now, just returning random products
+        return self.products_df.sample(min(n, len(self.products_df)))['ProductID'].tolist()
+
+# Create recommender instance
+recommender = Recommender()
+
+@app.route('/load_products', methods=['GET'])
+def load_products():
+    products = get_all_products()
+    if not products:
+        return jsonify({'error': 'Failed to load products from database'}), 500
+
+    recommender.load_products(products)
+    return jsonify({'message': 'Products loaded successfully', 'count': len(products)})
+
+@app.route('/recommend/<int:user_id>', methods=['GET'])
+def recommend(user_id):
+    # Check if products are loaded
+    if recommender.products_df is None:
+        products = get_all_products()
+        if not products:
+            return jsonify({'error': 'No products available'}), 500
+        recommender.load_products(products)
+
+    # Generate recommendations using cosine similarity
+    recommendations = recommender.recommend_products_for_user(user_id)
+
+    # Store recommendations in database
+    if store_user_recommendations(user_id, recommendations):
+        return jsonify({
+            'userId': user_id,
+            'recommendations': recommendations,
+            'count': len(recommendations)
+        })
+    else:
+        return jsonify({'error': 'Failed to store recommendations'}), 500
+
+@app.route('/api/user/session', methods=['POST'])
+def handle_session_data():
+    try:
+        data = request.get_json()
+        print("Received data:", data)  # Debug print
+
+        user_id = data.get('userId')
+        email = data.get('email')
+        is_authenticated = data.get('isAuthenticated')
+
+        if not user_id or not email or is_authenticated is None:
+            print("Missing required fields")  # Debug print
+            return jsonify({'error': 'Invalid data'}), 400
+
+        print(f"Processing session data: User ID: {user_id}, Email: {email}, Authenticated: {is_authenticated}")
+
+        # Test database connection first
+        try:
+            conn = get_db_connection()
+            conn.close()
+            print("Database connection successful")
+        except Exception as db_err:
+            print(f"Database connection error: {db_err}")
+            return jsonify({'error': f'Database connection error: {str(db_err)}'}), 500
+
+        # Continue with the rest of your code...
+
+    except Exception as e:
+        import traceback
+        print(f"Error in handle_session_data: {e}")
+        print(traceback.format_exc())  # Print full stack trace
+        return jsonify({'error': f'Server error: {str(e)}'}), 500
+
+if __name__ == '__main__':
+    # Load products on startup
+    products = get_all_products()
+    if products:
+        recommender.load_products(products)
+        print(f"Loaded {len(products)} products at startup")
+    else:
+        print("Warning: No products loaded at startup")
+
+    app.run(debug=True, host='0.0.0.0', port=5000)
--- a/recommondation-engine/example1.py
+++ b/recommondation-engine/example1.py
@@ -0,0 +1,221 @@
+
+import pandas as pd
+import numpy as np
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+
+'''
+Recommender system using content-based filtering
+'''
+class Recommender:
+    def __init__(self):
+        # Initialize data structures
+        self.products_df = None
+        self.user_profiles = {}
+        self.tfidf_matrix = None
+        self.tfidf_vectorizer = None
+        self.product_indices = None
+        
+    def load_products(self, products_data):
+        """
+        Load product data into the recommender system
+        
+        products_data: list of dictionaries with product info (id, name, description, category, etc.)
+        """
+        self.products_df = pd.DataFrame(products_data)
+        
+        # Create a text representation for each product (combining various features)
+        self.products_df['content'] = (
+            self.products_df['category'] + ' ' + 
+            self.products_df['name'] + ' ' + 
+            self.products_df['description']
+        )
+        
+        # Initialize TF-IDF vectorizer to convert text to vectors
+        self.tfidf_vectorizer = TfidfVectorizer(
+            stop_words='english',
+            max_features=5000,  # Limit features to avoid sparse matrices
+            ngram_range=(1, 2)  # Use both unigrams and bigrams
+        )
+        
+        # Compute TF-IDF matrix
+        self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.products_df['content'])
+        
+        # Create a mapping from product_id to index
+        self.product_indices = pd.Series(
+            self.products_df.index, 
+            index=self.products_df['product_id']
+        ).drop_duplicates()
+        
+    def track_user_click(self, user_id, product_id):
+        """
+        Track user clicks on products to build user profiles
+        """
+        if user_id not in self.user_profiles:
+            self.user_profiles[user_id] = {
+                'clicks': {},
+                'category_weights': {
+                    'electronics': 0,
+                    'school supplies': 0,
+                    'rental place': 0,
+                    'furniture': 0
+                }
+            }
+        
+        # Get the clicked product's index and details
+        if product_id in self.product_indices:
+            product_idx = self.product_indices[product_id]
+            product_category = self.products_df.iloc[product_idx]['category']
+            
+            # Update click count
+            if product_id in self.user_profiles[user_id]['clicks']:
+                self.user_profiles[user_id]['clicks'][product_id] += 1
+            else:
+                self.user_profiles[user_id]['clicks'][product_id] = 1
+            
+            # Update category weight
+            self.user_profiles[user_id]['category_weights'][product_category] += 1
+    
+    def get_user_profile_vector(self, user_id):
+        """
+        Generate a user profile vector based on their click history
+        """
+        if user_id not in self.user_profiles or not self.user_profiles[user_id]['clicks']:
+            # Return a zero vector if no click history
+            return np.zeros((1, self.tfidf_matrix.shape[1]))
+        
+        # Create a weighted average of all clicked products' TF-IDF vectors
+        clicked_product_vectors = []
+        weights = []
+        
+        for product_id, click_count in self.user_profiles[user_id]['clicks'].items():
+            if product_id in self.product_indices:
+                product_idx = self.product_indices[product_id]
+                product_category = self.products_df.iloc[product_idx]['category']
+                category_weight = self.user_profiles[user_id]['category_weights'][product_category]
+                
+                # Weight is based on both click count and category preference
+                weight = click_count * (1 + 0.5 * category_weight)
+                weights.append(weight)
+                clicked_product_vectors.append(self.tfidf_matrix[product_idx])
+        
+        # Normalize weights
+        weights = np.array(weights) / np.sum(weights)
+        
+        # Compute weighted average
+        user_profile = np.zeros((1, self.tfidf_matrix.shape[1]))
+        for i, vector in enumerate(clicked_product_vectors):
+            user_profile += weights[i] * vector.toarray()
+            
+        return user_profile
+    
+    def recommend_products(self, user_id, n=5, category_filter=None):
+        """
+        Recommend products to a user based on their profile
+        
+        user_id: ID of the user
+        n: Number of recommendations to return
+        category_filter: Optional filter to limit recommendations to a specific category
+        """
+        # Get user profile vector
+        user_profile = self.get_user_profile_vector(user_id)
+        
+        # If user has no profile, recommend popular products (not implemented)
+        if np.sum(user_profile) == 0:
+            return self._get_popular_products(n, category_filter)
+        
+        # Calculate similarity scores
+        sim_scores = cosine_similarity(user_profile, self.tfidf_matrix)
+        sim_scores = sim_scores.flatten()
+        
+        # Create a DataFrame for easier filtering
+        recommendations_df = pd.DataFrame({
+            'product_id': self.products_df['product_id'],
+            'score': sim_scores,
+            'category': self.products_df['category']
+        })
+        
+        # Filter out products that the user has already clicked on
+        if user_id in self.user_profiles and self.user_profiles[user_id]['clicks']:
+            clicked_products = list(self.user_profiles[user_id]['clicks'].keys())
+            recommendations_df = recommendations_df[~recommendations_df['product_id'].isin(clicked_products)]
+        
+        # Apply category filter if provided
+        if category_filter:
+            recommendations_df = recommendations_df[recommendations_df['category'] == category_filter]
+        
+        # Sort by similarity score and get top n recommendations
+        recommendations_df = recommendations_df.sort_values('score', ascending=False).head(n)
+        
+        # Return recommended product IDs
+        return recommendations_df['product_id'].tolist()
+    
+    def _get_popular_products(self, n=5, category_filter=None):
+        """
+        Return popular products when a user has no profile
+        (This would typically be implemented with actual popularity metrics)
+        """
+        filtered_df = self.products_df
+        
+        if category_filter:
+            filtered_df = filtered_df[filtered_df['category'] == category_filter]
+        
+        # Just return random products for now (in a real system you'd use popularity metrics)
+        if len(filtered_df) >= n:
+            return filtered_df.sample(n)['product_id'].tolist()
+        else:
+            return filtered_df['product_id'].tolist()
+
+    def recommend_by_category_preference(self, user_id, n=5):
+        """
+        Recommend products based primarily on the user's category preferences
+        """
+        if user_id not in self.user_profiles:
+            return self._get_popular_products(n)
+        
+        # Get the user's most clicked category
+        category_weights = self.user_profiles[user_id]['category_weights']
+        
+        # If no category has been clicked, return popular products
+        if sum(category_weights.values()) == 0:
+            return self._get_popular_products(n)
+        
+        # Sort categories by number of clicks
+        sorted_categories = sorted(
+            category_weights.items(),
+            key=lambda x: x[1],
+            reverse=True
+        )
+        
+        recommendations = []
+        remaining = n
+        
+        # Allocate recommendations proportionally across categories
+        for category, weight in sorted_categories:
+            if weight > 0:
+                # Allocate recommendations proportionally to category weight
+                category_allocation = max(1, int(remaining * (weight / sum(category_weights.values()))))
+                if category_allocation > remaining:
+                    category_allocation = remaining
+                
+                # Get recommendations for this category
+                category_recs = self.recommend_products(user_id, category_allocation, category)
+                recommendations.extend(category_recs)
+                
+                # Update remaining slots
+                remaining -= len(category_recs)
+                
+                if remaining <= 0:
+                    break
+        
+        # If we still have slots to fill, add general recommendations
+        if remaining > 0:
+            general_recs = self.recommend_products(user_id, remaining)
+            # Filter out duplicates
+            general_recs = [rec for rec in general_recs if rec not in recommendations]
+            recommendations.extend(general_recs[:remaining])
+        
+        return recommendations
+
+
+exported = Recommender()