removing unwanted table product_category

2025-04-22 14:27:32 -06:00
parent bbddc8566a
commit 9d05adacfb
5 changed files with 20 additions and 123 deletions
--- a/recommondation-engine/app.py
+++ b/recommondation-engine/app.py
@@ -19,7 +19,7 @@ def delete_user_recommendations(user_id):
    cursor = db_con.cursor()

    try:
-        cursor.execute("DELETE FROM Recommendation WHERE UserID = %s", (user_id,))
+        cursor.execute("DELETE FROM Recommendation WHERE UserID = %s", (user_id))
        db_con.commit()
        print(f"Deleted existing recommendations for user {user_id}")
        logging.info(f"Deleted existing recommendations for user {user_id}")
@@ -32,14 +32,12 @@ def delete_user_recommendations(user_id):
        cursor.close()
        db_con.close()

-def get_random_products(count=10, exclude_list=None):
-    """Get random products from the database, excluding any in the exclude_list"""
+def get_random_products(count=0, exclude_list=None):
    db_con = database()
    cursor = db_con.cursor()

    try:
        if exclude_list and len(exclude_list) > 0:
-            # Convert exclude_list to string for SQL IN clause
            exclude_str = ', '.join(map(str, exclude_list))
            cursor.execute(f"SELECT ProductID FROM Product WHERE ProductID NOT IN ({exclude_str}) ORDER BY RAND() LIMIT {count}")
        else:
@@ -55,13 +53,11 @@ def get_random_products(count=10, exclude_list=None):
        cursor.close()
        db_con.close()

-def get_popular_products(count=10):
-    """Get popular products based on history table frequency"""
+def get_popular_products(count=5):
    db_con = database()
    cursor = db_con.cursor()

    try:
-        # Get products that appear most frequently in history
        cursor.execute("""
            SELECT ProductID, COUNT(*) as count
            FROM History
@@ -72,7 +68,6 @@ def get_popular_products(count=10):

        popular_products = [row[0] for row in cursor.fetchall()]

-        # If not enough popular products, supplement with random ones
        if len(popular_products) < count:
            random_products = get_random_products(count - len(popular_products), popular_products)
            popular_products.extend(random_products)
@@ -81,22 +76,19 @@ def get_popular_products(count=10):

    except Exception as e:
        logging.error(f"Error getting popular products: {str(e)}")
-        return get_random_products(count)  # Fallback to random products
+        return get_random_products(count)
    finally:
        cursor.close()
        db_con.close()

 def has_user_history_or_recommendations(user_id):
-    """Check if user exists in History or Recommendation table"""
    db_con = database()
    cursor = db_con.cursor()

    try:
-        # Check if user has history
        cursor.execute("SELECT COUNT(*) FROM History WHERE UserID = %s", (user_id,))
        history_count = cursor.fetchone()[0]

-        # Check if user has recommendations
        cursor.execute("SELECT COUNT(*) FROM Recommendation WHERE UserID = %s", (user_id,))
        recommendation_count = cursor.fetchone()[0]

@@ -120,13 +112,11 @@ def get_all_products():
        select_clause = "SELECT p.ProductID"
        for category in categories:
            category_id = category[0]
-            select_clause += f", MAX(CASE WHEN pc.CategoryID = {category_id} THEN 1 ELSE 0 END) AS `Cat_{category_id}`"
+            select_clause += f", MAX(CASE WHEN p.CategoryID = {category_id} THEN 1 ELSE 0 END) AS `Cat_{category_id}`"

        final_query = f"""
            {select_clause}
            FROM Product p
-            LEFT JOIN Product_Category pc ON p.ProductID = pc.ProductID
-            LEFT JOIN Category c ON pc.CategoryID = c.CategoryID
            GROUP BY p.ProductID;
            """

@@ -137,13 +127,13 @@ def get_all_products():
        product_ids = []
        for row in results:
            text_list = list(row)
-            product_id = text_list.pop(0)  # Save the product ID before removing it
+            product_id = text_list.pop(0)
            final.append(text_list)
            product_ids.append(product_id)

        cursor.close()
        db_con.close()
-        return final, product_ids  # Return both feature vectors and product IDs
+        return final, product_ids
    except Exception as e:
        logging.error(f"Error getting all products: {str(e)}")
        cursor.close()
@@ -160,15 +150,13 @@ def get_user_history(user_id):

        select_clause = "SELECT p.ProductID"
        for category in categories:
-            category_id = category[0] # get the uid of the category and then append that to the new column
-            select_clause += f", MAX(CASE WHEN pc.CategoryID = {category_id} THEN 1 ELSE 0 END) AS `Cat_{category_id}`"
+            category_id = category[0]
+            select_clause += f", MAX(CASE WHEN p.CategoryID = {category_id} THEN 1 ELSE 0 END) AS `Cat_{category_id}`"

        final_query = f"""
            {select_clause}
            FROM Product p
-            LEFT JOIN Product_Category pc ON p.ProductID = pc.ProductID
-            LEFT JOIN Category c ON pc.CategoryID = c.CategoryID
-            where p.ProductID in (select ProductID from History where UserID = {user_id})
+            WHERE p.ProductID IN (SELECT ProductID FROM History WHERE UserID = {user_id})
            GROUP BY p.ProductID;
            """

@@ -191,82 +179,63 @@ def get_user_history(user_id):

 def get_recommendations(user_id, top_n=5):
    try:
-        # Always delete existing recommendations first
        delete_user_recommendations(user_id)

-        # Check if user has history or recommendations
        if not has_user_history_or_recommendations(user_id):
-            # Cold start: return random products
            random_recs = get_random_products(top_n)
-            # Store these random recommendations
-            history_upload(user_id, random_recs)
+            recommendation_upload(user_id, random_recs)

-            # Add 5 more unique random products
            additional_random = get_random_products(5, random_recs)
-            history_upload(user_id, additional_random)
+            recommendation_upload(user_id, additional_random)

            return random_recs + additional_random

-        # Get all products and user history with their category vectors
        all_product_features, all_product_ids = get_all_products()
        user_history = get_user_history(user_id)

        if not user_history:
-            # User exists but has no history yet
            popular_recs = get_popular_products(top_n)
-            history_upload(user_id, popular_recs)
+            recommendation_upload(user_id, popular_recs)

-            # Add 5 more unique random products
            additional_random = get_random_products(5, popular_recs)
-            history_upload(user_id, additional_random)
+            recommendation_upload(user_id, additional_random)

            return popular_recs + additional_random

-        # Calculate similarity between all products and user history
-        user_profile = np.mean(user_history, axis=0)  # Average user preferences
+        user_profile = np.mean(user_history, axis=0)
        similarities = cosine_similarity([user_profile], all_product_features)
-        print(similarities)

-        # Get indices of the top N products sorted by similarity
        product_indices = similarities[0].argsort()[-top_n:][::-1]

-        # Get the actual product IDs using the indices
        recommended_product_ids = [all_product_ids[i] for i in product_indices]
+        print(recommended_product_ids)

-        # Upload the core recommendations to the database
-        history_upload(user_id, recommended_product_ids)
+        recommendation_upload(user_id, recommended_product_ids)

-        # Add 5 more unique random products that aren't in the recommendations
        additional_random = get_random_products(5, recommended_product_ids)
-        history_upload(user_id, additional_random)
+        recommendation_upload(user_id, additional_random)

-        # Return both the similarity-based recommendations and the random ones
        return recommended_product_ids + additional_random

    except Exception as e:
        logging.error(f"Recommendation error for user {user_id}: {str(e)}")
-        # Fallback to random products
        random_products = get_random_products(top_n + 5)
        return random_products

-def history_upload(userID, products):
-    """Upload product recommendations to the database"""
+def recommendation_upload(userID, products):
    db_con = database()
    cursor = db_con.cursor()

    try:
        for product_id in products:
-            # Use parameterized queries to prevent SQL injection
            cursor.execute("INSERT INTO Recommendation (UserID, RecommendedProductID) VALUES (%s, %s)",
                          (userID, product_id))

-        # Commit the changes
        db_con.commit()

    except Exception as e:
        logging.error(f"Error uploading recommendations: {str(e)}")
        db_con.rollback()
    finally:
-        # Close the cursor and connection
        cursor.close()
        db_con.close()