Machine-Learning-Collection/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part2-associative_recsys.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "25aa1c78",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "107e909b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the transactions data\n",
    "transactions = pd.read_csv(\"grocery_dataset.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "289a9751",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "itemDescription\n",
       "whole milk          515.0\n",
       "other vegetables    361.0\n",
       "rolls/buns          344.0\n",
       "soda                271.0\n",
       "yogurt              242.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "member_purchases = transactions.groupby(['Member_number', 'itemDescription'])['itemDescription'].count().unstack().fillna(0)\n",
    "item_descriptions = member_purchases.columns\n",
    "\n",
    "def simple_association(item_name):\n",
    "    item_basket = member_purchases[member_purchases[item_name] > 0]\n",
    "    co_purchase_counts = item_basket.sum().sort_values(ascending=False).drop(item_name)\n",
    "    return co_purchase_counts.head(5)\n",
    "\n",
    "ex_item = item_descriptions[20]\n",
    "simple_association(ex_item)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "190a1485",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Top 10 recommendations for soda:\n",
      "\n",
      "itemDescription\n",
      "oil                    1.246844\n",
      "beverages              1.162678\n",
      "sausage                1.014975\n",
      "grapes                 1.001195\n",
      "shopping bags           0.95459\n",
      "frozen meals           0.943642\n",
      "specialty bar          0.936182\n",
      "butter                 0.918418\n",
      "candy                  0.910056\n",
      "specialty chocolate    0.904846\n",
      "Name: soda, dtype: object \n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Function to create a transaction matrix\n",
    "def create_transaction_matrix(transactions):\n",
    "    # Group the transactions by member number, date and item description\n",
    "    # Count the number of each item bought by each member on each date\n",
    "    # Unstack the item descriptions to create a matrix where rows are transactions and columns are items\n",
    "    # Fill any missing values with 0\n",
    "    # Set the index to be the member number and date\n",
    "    basket = (transactions.groupby(['Member_number', 'Date', 'itemDescription'])['itemDescription']\n",
    "              .count().unstack().reset_index().fillna(0)\n",
    "              .set_index(['Member_number', 'Date']))\n",
    "    \n",
    "    # Convert the counts to True or False\n",
    "    # True if the item was bought in the transaction, False otherwise\n",
    "    return basket.applymap(lambda x: True if x > 0 else False)\n",
    "\n",
    "# Function to calculate a lift matrix\n",
    "def calculate_lift_matrix(basket_sets, min_joint_probability=0.001):\n",
    "    # Calculate the joint probability of each pair of items\n",
    "    probability_pair = pd.DataFrame(index=basket_sets.columns, columns=basket_sets.columns)\n",
    "    for item1 in basket_sets.columns:\n",
    "        for item2 in basket_sets.columns:\n",
    "            joint_probability = (basket_sets[item1] & basket_sets[item2]).sum() / len(basket_sets)\n",
    "            if joint_probability > min_joint_probability:\n",
    "                probability_pair.loc[item1, item2] = joint_probability\n",
    "            else:\n",
    "                probability_pair.loc[item1, item2] = 0\n",
    "\n",
    "    # Set the diagonal of the joint probability matrix to 0\n",
    "    np.fill_diagonal(probability_pair.values, 0)\n",
    "\n",
    "    # Calculate the individual probability of each item\n",
    "    probability_item = basket_sets.mean()\n",
    "\n",
    "    # Calculate the product of the individual probabilities\n",
    "    probability_product = np.outer(probability_item, probability_item)\n",
    "\n",
    "    # Calculate the lift of each pair of items\n",
    "    lift_matrix = probability_pair.divide(probability_product, fill_value=0)\n",
    "    \n",
    "    return lift_matrix\n",
    "\n",
    "# Function to recommend items\n",
    "def recommend_items(lift_matrix, item, num_recommendations=10):\n",
    "    # Sort the items in the lift matrix for the given item in descending order\n",
    "    # Take the top num_recommendations items\n",
    "    recommended_for_item = lift_matrix[item].sort_values(ascending=False).head(num_recommendations)\n",
    "    \n",
    "    # Print the recommended items\n",
    "    print(f\"Top {num_recommendations} recommendations for {item}:\\n\")\n",
    "    print(recommended_for_item, \"\\n\\n\")\n",
    "\n",
    "# Create transaction matrix\n",
    "basket_sets = create_transaction_matrix(transactions)\n",
    "\n",
    "# Calculate the lift matrix\n",
    "lift_matrix = calculate_lift_matrix(basket_sets)\n",
    "\n",
    "# Recommend items for 'meat'\n",
    "recommend_items(lift_matrix, 'soda')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}