Files
Machine-Learning-Collection/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part2-associative_recsys.ipynb
2023-10-27 23:45:20 +02:00

177 lines
5.8 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"id": "25aa1c78",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "107e909b",
"metadata": {},
"outputs": [],
"source": [
"# Load the transactions data\n",
"transactions = pd.read_csv(\"grocery_dataset.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "289a9751",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"itemDescription\n",
"whole milk 515.0\n",
"other vegetables 361.0\n",
"rolls/buns 344.0\n",
"soda 271.0\n",
"yogurt 242.0\n",
"dtype: float64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"member_purchases = transactions.groupby(['Member_number', 'itemDescription'])['itemDescription'].count().unstack().fillna(0)\n",
"item_descriptions = member_purchases.columns\n",
"\n",
"def simple_association(item_name):\n",
" item_basket = member_purchases[member_purchases[item_name] > 0]\n",
" co_purchase_counts = item_basket.sum().sort_values(ascending=False).drop(item_name)\n",
" return co_purchase_counts.head(5)\n",
"\n",
"ex_item = item_descriptions[20]\n",
"simple_association(ex_item)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "190a1485",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Top 10 recommendations for soda:\n",
"\n",
"itemDescription\n",
"oil 1.246844\n",
"beverages 1.162678\n",
"sausage 1.014975\n",
"grapes 1.001195\n",
"shopping bags 0.95459\n",
"frozen meals 0.943642\n",
"specialty bar 0.936182\n",
"butter 0.918418\n",
"candy 0.910056\n",
"specialty chocolate 0.904846\n",
"Name: soda, dtype: object \n",
"\n",
"\n"
]
}
],
"source": [
"# Function to create a transaction matrix\n",
"def create_transaction_matrix(transactions):\n",
" # Group the transactions by member number, date and item description\n",
" # Count the number of each item bought by each member on each date\n",
" # Unstack the item descriptions to create a matrix where rows are transactions and columns are items\n",
" # Fill any missing values with 0\n",
" # Set the index to be the member number and date\n",
" basket = (transactions.groupby(['Member_number', 'Date', 'itemDescription'])['itemDescription']\n",
" .count().unstack().reset_index().fillna(0)\n",
" .set_index(['Member_number', 'Date']))\n",
" \n",
" # Convert the counts to True or False\n",
" # True if the item was bought in the transaction, False otherwise\n",
" return basket.applymap(lambda x: True if x > 0 else False)\n",
"\n",
"# Function to calculate a lift matrix\n",
"def calculate_lift_matrix(basket_sets, min_joint_probability=0.001):\n",
" # Calculate the joint probability of each pair of items\n",
" probability_pair = pd.DataFrame(index=basket_sets.columns, columns=basket_sets.columns)\n",
" for item1 in basket_sets.columns:\n",
" for item2 in basket_sets.columns:\n",
" joint_probability = (basket_sets[item1] & basket_sets[item2]).sum() / len(basket_sets)\n",
" if joint_probability > min_joint_probability:\n",
" probability_pair.loc[item1, item2] = joint_probability\n",
" else:\n",
" probability_pair.loc[item1, item2] = 0\n",
"\n",
" # Set the diagonal of the joint probability matrix to 0\n",
" np.fill_diagonal(probability_pair.values, 0)\n",
"\n",
" # Calculate the individual probability of each item\n",
" probability_item = basket_sets.mean()\n",
"\n",
" # Calculate the product of the individual probabilities\n",
" probability_product = np.outer(probability_item, probability_item)\n",
"\n",
" # Calculate the lift of each pair of items\n",
" lift_matrix = probability_pair.divide(probability_product, fill_value=0)\n",
" \n",
" return lift_matrix\n",
"\n",
"# Function to recommend items\n",
"def recommend_items(lift_matrix, item, num_recommendations=10):\n",
" # Sort the items in the lift matrix for the given item in descending order\n",
" # Take the top num_recommendations items\n",
" recommended_for_item = lift_matrix[item].sort_values(ascending=False).head(num_recommendations)\n",
" \n",
" # Print the recommended items\n",
" print(f\"Top {num_recommendations} recommendations for {item}:\\n\")\n",
" print(recommended_for_item, \"\\n\\n\")\n",
"\n",
"# Create transaction matrix\n",
"basket_sets = create_transaction_matrix(transactions)\n",
"\n",
"# Calculate the lift matrix\n",
"lift_matrix = calculate_lift_matrix(basket_sets)\n",
"\n",
"# Recommend items for 'meat'\n",
"recommend_items(lift_matrix, 'soda')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}