added trending, some comments in popularity

2026-02-21 11:18:01 +00:00 · 2023-11-02 18:48:58 -07:00
parent 0e22471b42
commit 5280b33a5c
5 changed files with 164 additions and 671 deletions
--- a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part1-popularity_recsys-checkpoint.ipynb
+++ b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part1-popularity_recsys-checkpoint.ipynb
@@ -1,458 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 207,
   "id": "937dd4ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "def load_data():\n",
    "    # Load the MovieLens data\n",
    "    movies_df = pd.read_csv(\"movielens_small/movies.csv\")\n",
    "    ratings_df = pd.read_csv(\"movielens_small/ratings.csv\")\n",
    "    return movies_df, ratings_df\n",
    "\n",
    "def calculate_popularity(movies_df, ratings_df, damping_factor=5):\n",
    "    # Calculate the number of ratings, mean rating, and sum of ratings for each movie\n",
    "    num_ratings = ratings_df.groupby(\"movieId\")[\"rating\"].count()\n",
    "    mean_rating = ratings_df.groupby(\"movieId\")[\"rating\"].mean()\n",
    "    global_mean = ratings_df[\"rating\"].mean()\n",
    "    \n",
    "    # Calculate the damped mean rating for each movie\n",
    "    damped_numerator = num_ratings * mean_rating + damping_factor * global_mean\n",
    "    damped_denominator = num_ratings + damping_factor\n",
    "    damped_mean_rating = damped_numerator / damped_denominator\n",
    "    \n",
    "    # Add the popularity data to the movie data\n",
    "    movies_df['num_ratings'] = movies_df['movieId'].map(num_ratings)\n",
    "    movies_df['mean_rating'] = movies_df['movieId'].map(mean_rating)\n",
    "    movies_df['damped_mean_rating'] = movies_df['movieId'].map(damped_mean_rating)\n",
    "    return movies_df\n",
    "\n",
    "movies_df, ratings_df = load_data()\n",
    "movies_df = calculate_popularity(movies_df, ratings_df, damping_factor=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "id": "7e649c6f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movieId</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "      <th>num_ratings</th>\n",
       "      <th>mean_rating</th>\n",
       "      <th>damped_mean_rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>314</th>\n",
       "      <td>356</td>\n",
       "      <td>Forrest Gump (1994)</td>\n",
       "      <td>Comedy|Drama|Romance|War</td>\n",
       "      <td>329.0</td>\n",
       "      <td>4.164134</td>\n",
       "      <td>4.144589</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>277</th>\n",
       "      <td>318</td>\n",
       "      <td>Shawshank Redemption, The (1994)</td>\n",
       "      <td>Crime|Drama</td>\n",
       "      <td>317.0</td>\n",
       "      <td>4.429022</td>\n",
       "      <td>4.400659</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>257</th>\n",
       "      <td>296</td>\n",
       "      <td>Pulp Fiction (1994)</td>\n",
       "      <td>Comedy|Crime|Drama|Thriller</td>\n",
       "      <td>307.0</td>\n",
       "      <td>4.197068</td>\n",
       "      <td>4.175128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>510</th>\n",
       "      <td>593</td>\n",
       "      <td>Silence of the Lambs, The (1991)</td>\n",
       "      <td>Crime|Horror|Thriller</td>\n",
       "      <td>279.0</td>\n",
       "      <td>4.161290</td>\n",
       "      <td>4.138462</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1939</th>\n",
       "      <td>2571</td>\n",
       "      <td>Matrix, The (1999)</td>\n",
       "      <td>Action|Sci-Fi|Thriller</td>\n",
       "      <td>278.0</td>\n",
       "      <td>4.192446</td>\n",
       "      <td>4.168457</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      movieId                             title                       genres   \n",
       "314       356               Forrest Gump (1994)     Comedy|Drama|Romance|War  \\\n",
       "277       318  Shawshank Redemption, The (1994)                  Crime|Drama   \n",
       "257       296               Pulp Fiction (1994)  Comedy|Crime|Drama|Thriller   \n",
       "510       593  Silence of the Lambs, The (1991)        Crime|Horror|Thriller   \n",
       "1939     2571                Matrix, The (1999)       Action|Sci-Fi|Thriller   \n",
       "\n",
       "      num_ratings  mean_rating  damped_mean_rating  \n",
       "314         329.0     4.164134            4.144589  \n",
       "277         317.0     4.429022            4.400659  \n",
       "257         307.0     4.197068            4.175128  \n",
       "510         279.0     4.161290            4.138462  \n",
       "1939        278.0     4.192446            4.168457  "
      ]
     },
     "execution_count": 198,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies_df.sort_values(by=\"num_ratings\", ascending=False).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 204,
   "id": "c6ef332e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movieId</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "      <th>num_ratings</th>\n",
       "      <th>mean_rating</th>\n",
       "      <th>damped_mean_rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7656</th>\n",
       "      <td>88448</td>\n",
       "      <td>Paper Birds (Pájaros de papel) (2010)</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.637779</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8107</th>\n",
       "      <td>100556</td>\n",
       "      <td>Act of Killing, The (2012)</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.637779</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9083</th>\n",
       "      <td>143031</td>\n",
       "      <td>Jump In! (2007)</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.637779</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9094</th>\n",
       "      <td>143511</td>\n",
       "      <td>Human (2015)</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.637779</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9096</th>\n",
       "      <td>143559</td>\n",
       "      <td>L.A. Slasher (2015)</td>\n",
       "      <td>Comedy|Crime|Fantasy</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.637779</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      movieId                                  title                genres   \n",
       "7656    88448  Paper Birds (Pájaros de papel) (2010)          Comedy|Drama  \\\n",
       "8107   100556             Act of Killing, The (2012)           Documentary   \n",
       "9083   143031                        Jump In! (2007)  Comedy|Drama|Romance   \n",
       "9094   143511                           Human (2015)           Documentary   \n",
       "9096   143559                    L.A. Slasher (2015)  Comedy|Crime|Fantasy   \n",
       "\n",
       "      num_ratings  mean_rating  damped_mean_rating  \n",
       "7656          1.0          5.0            3.637779  \n",
       "8107          1.0          5.0            3.637779  \n",
       "9083          1.0          5.0            3.637779  \n",
       "9094          1.0          5.0            3.637779  \n",
       "9096          1.0          5.0            3.637779  "
      ]
     },
     "execution_count": 204,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies_df.sort_values(by=\"mean_rating\", ascending=False).head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "id": "f669fb09",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movieId</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "      <th>num_ratings</th>\n",
       "      <th>mean_rating</th>\n",
       "      <th>damped_mean_rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>277</th>\n",
       "      <td>318</td>\n",
       "      <td>Shawshank Redemption, The (1994)</td>\n",
       "      <td>Crime|Drama</td>\n",
       "      <td>317.0</td>\n",
       "      <td>4.429022</td>\n",
       "      <td>4.400659</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>659</th>\n",
       "      <td>858</td>\n",
       "      <td>Godfather, The (1972)</td>\n",
       "      <td>Crime|Drama</td>\n",
       "      <td>192.0</td>\n",
       "      <td>4.289062</td>\n",
       "      <td>4.250077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2226</th>\n",
       "      <td>2959</td>\n",
       "      <td>Fight Club (1999)</td>\n",
       "      <td>Action|Crime|Drama|Thriller</td>\n",
       "      <td>218.0</td>\n",
       "      <td>4.272936</td>\n",
       "      <td>4.239103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>922</th>\n",
       "      <td>1221</td>\n",
       "      <td>Godfather: Part II, The (1974)</td>\n",
       "      <td>Crime|Drama</td>\n",
       "      <td>129.0</td>\n",
       "      <td>4.259690</td>\n",
       "      <td>4.205148</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>50</td>\n",
       "      <td>Usual Suspects, The (1995)</td>\n",
       "      <td>Crime|Mystery|Thriller</td>\n",
       "      <td>204.0</td>\n",
       "      <td>4.237745</td>\n",
       "      <td>4.203344</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224</th>\n",
       "      <td>260</td>\n",
       "      <td>Star Wars: Episode IV - A New Hope (1977)</td>\n",
       "      <td>Action|Adventure|Sci-Fi</td>\n",
       "      <td>251.0</td>\n",
       "      <td>4.231076</td>\n",
       "      <td>4.203125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>602</th>\n",
       "      <td>750</td>\n",
       "      <td>Dr. Strangelove or: How I Learned to Stop Worr...</td>\n",
       "      <td>Comedy|War</td>\n",
       "      <td>97.0</td>\n",
       "      <td>4.268041</td>\n",
       "      <td>4.196407</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>914</th>\n",
       "      <td>1213</td>\n",
       "      <td>Goodfellas (1990)</td>\n",
       "      <td>Crime|Drama</td>\n",
       "      <td>126.0</td>\n",
       "      <td>4.250000</td>\n",
       "      <td>4.194967</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>461</th>\n",
       "      <td>527</td>\n",
       "      <td>Schindler's List (1993)</td>\n",
       "      <td>Drama|War</td>\n",
       "      <td>220.0</td>\n",
       "      <td>4.225000</td>\n",
       "      <td>4.193546</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6710</th>\n",
       "      <td>58559</td>\n",
       "      <td>Dark Knight, The (2008)</td>\n",
       "      <td>Action|Crime|Drama|IMAX</td>\n",
       "      <td>149.0</td>\n",
       "      <td>4.238255</td>\n",
       "      <td>4.191922</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      movieId                                              title   \n",
       "277       318                   Shawshank Redemption, The (1994)  \\\n",
       "659       858                              Godfather, The (1972)   \n",
       "2226     2959                                  Fight Club (1999)   \n",
       "922      1221                     Godfather: Part II, The (1974)   \n",
       "46         50                         Usual Suspects, The (1995)   \n",
       "224       260          Star Wars: Episode IV - A New Hope (1977)   \n",
       "602       750  Dr. Strangelove or: How I Learned to Stop Worr...   \n",
       "914      1213                                  Goodfellas (1990)   \n",
       "461       527                            Schindler's List (1993)   \n",
       "6710    58559                            Dark Knight, The (2008)   \n",
       "\n",
       "                           genres  num_ratings  mean_rating   \n",
       "277                   Crime|Drama        317.0     4.429022  \\\n",
       "659                   Crime|Drama        192.0     4.289062   \n",
       "2226  Action|Crime|Drama|Thriller        218.0     4.272936   \n",
       "922                   Crime|Drama        129.0     4.259690   \n",
       "46         Crime|Mystery|Thriller        204.0     4.237745   \n",
       "224       Action|Adventure|Sci-Fi        251.0     4.231076   \n",
       "602                    Comedy|War         97.0     4.268041   \n",
       "914                   Crime|Drama        126.0     4.250000   \n",
       "461                     Drama|War        220.0     4.225000   \n",
       "6710      Action|Crime|Drama|IMAX        149.0     4.238255   \n",
       "\n",
       "      damped_mean_rating  \n",
       "277             4.400659  \n",
       "659             4.250077  \n",
       "2226            4.239103  \n",
       "922             4.205148  \n",
       "46              4.203344  \n",
       "224             4.203125  \n",
       "602             4.196407  \n",
       "914             4.194967  \n",
       "461             4.193546  \n",
       "6710            4.191922  "
      ]
     },
     "execution_count": 201,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies_df.sort_values(by=\"damped_mean_rating\", ascending=False).head(10)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part2-associative_recsys-checkpoint.ipynb
+++ b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part2-associative_recsys-checkpoint.ipynb
@@ -1,176 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "25aa1c78",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "107e909b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the transactions data\n",
    "transactions = pd.read_csv(\"grocery_dataset.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "289a9751",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "itemDescription\n",
       "whole milk          515.0\n",
       "other vegetables    361.0\n",
       "rolls/buns          344.0\n",
       "soda                271.0\n",
       "yogurt              242.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "member_purchases = transactions.groupby(['Member_number', 'itemDescription'])['itemDescription'].count().unstack().fillna(0)\n",
    "item_descriptions = member_purchases.columns\n",
    "\n",
    "def simple_association(item_name):\n",
    "    item_basket = member_purchases[member_purchases[item_name] > 0]\n",
    "    co_purchase_counts = item_basket.sum().sort_values(ascending=False).drop(item_name)\n",
    "    return co_purchase_counts.head(5)\n",
    "\n",
    "ex_item = item_descriptions[20]\n",
    "simple_association(ex_item)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "190a1485",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Top 10 recommendations for soda:\n",
      "\n",
      "itemDescription\n",
      "oil                    1.246844\n",
      "beverages              1.162678\n",
      "sausage                1.014975\n",
      "grapes                 1.001195\n",
      "shopping bags           0.95459\n",
      "frozen meals           0.943642\n",
      "specialty bar          0.936182\n",
      "butter                 0.918418\n",
      "candy                  0.910056\n",
      "specialty chocolate    0.904846\n",
      "Name: soda, dtype: object \n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Function to create a transaction matrix\n",
    "def create_transaction_matrix(transactions):\n",
    "    # Group the transactions by member number, date and item description\n",
    "    # Count the number of each item bought by each member on each date\n",
    "    # Unstack the item descriptions to create a matrix where rows are transactions and columns are items\n",
    "    # Fill any missing values with 0\n",
    "    # Set the index to be the member number and date\n",
    "    basket = (transactions.groupby(['Member_number', 'Date', 'itemDescription'])['itemDescription']\n",
    "              .count().unstack().reset_index().fillna(0)\n",
    "              .set_index(['Member_number', 'Date']))\n",
    "    \n",
    "    # Convert the counts to True or False\n",
    "    # True if the item was bought in the transaction, False otherwise\n",
    "    return basket.applymap(lambda x: True if x > 0 else False)\n",
    "\n",
    "# Function to calculate a lift matrix\n",
    "def calculate_lift_matrix(basket_sets, min_joint_probability=0.001):\n",
    "    # Calculate the joint probability of each pair of items\n",
    "    probability_pair = pd.DataFrame(index=basket_sets.columns, columns=basket_sets.columns)\n",
    "    for item1 in basket_sets.columns:\n",
    "        for item2 in basket_sets.columns:\n",
    "            joint_probability = (basket_sets[item1] & basket_sets[item2]).sum() / len(basket_sets)\n",
    "            if joint_probability > min_joint_probability:\n",
    "                probability_pair.loc[item1, item2] = joint_probability\n",
    "            else:\n",
    "                probability_pair.loc[item1, item2] = 0\n",
    "\n",
    "    # Set the diagonal of the joint probability matrix to 0\n",
    "    np.fill_diagonal(probability_pair.values, 0)\n",
    "\n",
    "    # Calculate the individual probability of each item\n",
    "    probability_item = basket_sets.mean()\n",
    "\n",
    "    # Calculate the product of the individual probabilities\n",
    "    probability_product = np.outer(probability_item, probability_item)\n",
    "\n",
    "    # Calculate the lift of each pair of items\n",
    "    lift_matrix = probability_pair.divide(probability_product, fill_value=0)\n",
    "    \n",
    "    return lift_matrix\n",
    "\n",
    "# Function to recommend items\n",
    "def recommend_items(lift_matrix, item, num_recommendations=10):\n",
    "    # Sort the items in the lift matrix for the given item in descending order\n",
    "    # Take the top num_recommendations items\n",
    "    recommended_for_item = lift_matrix[item].sort_values(ascending=False).head(num_recommendations)\n",
    "    \n",
    "    # Print the recommended items\n",
    "    print(f\"Top {num_recommendations} recommendations for {item}:\\n\")\n",
    "    print(recommended_for_item, \"\\n\\n\")\n",
    "\n",
    "# Create transaction matrix\n",
    "basket_sets = create_transaction_matrix(transactions)\n",
    "\n",
    "# Calculate the lift matrix\n",
    "lift_matrix = calculate_lift_matrix(basket_sets)\n",
    "\n",
    "# Recommend items for 'meat'\n",
    "recommend_items(lift_matrix, 'soda')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part3-trending-recsys-checkpoint.ipynb
+++ b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part3-trending-recsys-checkpoint.ipynb
@@ -1,33 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0c33033",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part1-popularity_recsys.ipynb
+++ b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part1-popularity_recsys.ipynb
@@ -1,8 +1,46 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Non-Personalized Recommender Systems: Popularity Based"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 207,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os \n",
    "\n",
    "if os.path.exists('movielens_small.zip'):\n",
    "    !wget https://files.grouplens.org/datasets/movielens/ml-latest-small.zip \n",
    "    !unzip ml-latest-small.zip\n",
    "    !rm ml-latest-small.zip\n",
    "    !mv ml-latest-small movielens_small"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Damped Mean\n",
    "\n",
    "$$ s(i) = \\frac{\\Sigma_{u \\in U_i} r_i + a \\times \\mu}{|U_i| + a} $$\n",
    "\n",
    "Where:\n",
    "- $ s(i) $: The damped mean rating for item $ i $.\n",
    "- $ \\Sigma_{u \\in U_i} r_i $: Sum of the ratings for item $ i $.\n",
    "- $ a $: Damping factor, a value that determines the extent of smoothing.\n",
    "- $ \\mu $: Global mean rating across all items.\n",
    "- $ |U_i| $: Total number of ratings for item $ i $.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "937dd4ed",
   "metadata": {},
   "outputs": [],
@@ -36,6 +74,13 @@
    "movies_df = calculate_popularity(movies_df, ratings_df, damping_factor=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's see how using num_ratings compares to mean rating & damped mean rating."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
@@ -450,7 +495,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.16"
+   "version": "3.11.3"
  }
 },
 "nbformat": 4,
--- a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part3-trending-recsys.ipynb
+++ b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part3-trending-recsys.ipynb
@@ -2,9 +2,124 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "id": "b0c33033",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Posts sorted by Reddit's 'Hot' score:\n",
      "   post_id                                         post_title  upvotes   \n",
      "9       10                    Difference between CNN and RNN?      350  \\\n",
      "8        9              The future of quantum computing in AI      600   \n",
      "7        8              Experience with multi-modal learning?      450   \n",
      "6        7  Looking for resources on probabilistic program...      700   \n",
      "4        5             Tips for handling imbalanced datasets?     1100   \n",
      "2        3  Has anyone tried the new reinforcement learnin...      900   \n",
      "3        4  Discussion: Evolutionary algorithms vs gradien...      800   \n",
      "5        6     Which GPU is best for neural network training?      300   \n",
      "0        1              How do I start with machine learning?      600   \n",
      "1        2     Best practices for deep learning optimization?      400   \n",
      "\n",
      "   downvotes  age_in_seconds  age_in_hours  reddit_hot  hacker_news  \n",
      "9         50          256000     71.111111    8.166010     0.042205  \n",
      "8         50          128000     35.555556    5.584807     0.227638  \n",
      "7         50           64000     17.777778    4.024282     0.559318  \n",
      "6         50           32000      8.888889    3.524024     2.416714  \n",
      "4        100            8000      2.222222    3.177778    18.779258  \n",
      "2        100            2000      0.555556    2.947534    38.776074  \n",
      "3        100            4000      1.111111    2.933987    24.453093  \n",
      "5         50           16000      4.444444    2.753496     2.886859  \n",
      "0        100             500      0.138889    2.710081    36.655710  \n",
      "1         50            1000      0.277778    2.566290    24.588946  \n",
      "\n",
      "Posts sorted by Hacker News score:\n",
      "   post_id                                         post_title  upvotes   \n",
      "2        3  Has anyone tried the new reinforcement learnin...      900  \\\n",
      "0        1              How do I start with machine learning?      600   \n",
      "1        2     Best practices for deep learning optimization?      400   \n",
      "3        4  Discussion: Evolutionary algorithms vs gradien...      800   \n",
      "4        5             Tips for handling imbalanced datasets?     1100   \n",
      "5        6     Which GPU is best for neural network training?      300   \n",
      "6        7  Looking for resources on probabilistic program...      700   \n",
      "7        8              Experience with multi-modal learning?      450   \n",
      "8        9              The future of quantum computing in AI      600   \n",
      "9       10                    Difference between CNN and RNN?      350   \n",
      "\n",
      "   downvotes  age_in_seconds  age_in_hours  reddit_hot  hacker_news  \n",
      "2        100            2000      0.555556    2.947534    38.776074  \n",
      "0        100             500      0.138889    2.710081    36.655710  \n",
      "1         50            1000      0.277778    2.566290    24.588946  \n",
      "3        100            4000      1.111111    2.933987    24.453093  \n",
      "4        100            8000      2.222222    3.177778    18.779258  \n",
      "5         50           16000      4.444444    2.753496     2.886859  \n",
      "6         50           32000      8.888889    3.524024     2.416714  \n",
      "7         50           64000     17.777778    4.024282     0.559318  \n",
      "8         50          128000     35.555556    5.584807     0.227638  \n",
      "9         50          256000     71.111111    8.166010     0.042205  \n"
     ]
    }
   ],
   "source": [
    "import math\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "data = {\n",
    "    'post_id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n",
    "    'post_title': [\n",
    "        \"How do I start with machine learning?\",\n",
    "        \"Best practices for deep learning optimization?\",\n",
    "        \"Has anyone tried the new reinforcement learning library?\",\n",
    "        \"Discussion: Evolutionary algorithms vs gradient descent\",\n",
    "        \"Tips for handling imbalanced datasets?\",\n",
    "        \"Which GPU is best for neural network training?\",\n",
    "        \"Looking for resources on probabilistic programming\",\n",
    "        \"Experience with multi-modal learning?\",\n",
    "        \"The future of quantum computing in AI\",\n",
    "        \"Difference between CNN and RNN?\"\n",
    "    ],\n",
    "    'upvotes': [600, 400, 900, 800, 1100, 300, 700, 450, 600, 350],\n",
    "    'downvotes': [100, 50, 100, 100, 100, 50, 50, 50, 50, 50],\n",
    "    'age_in_seconds': [500, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000, 256000]\n",
    "}\n",
    "\n",
    "\n",
    "# Convert to DataFrame\n",
    "reddit_df = pd.DataFrame(data)\n",
    "\n",
    "# Calculate age in hours from age in seconds\n",
    "reddit_df['age_in_hours'] = reddit_df['age_in_seconds'] / 3600\n",
    "\n",
    "# Reddit's \"Hot\" formula\n",
    "def reddit_hot(U, D, t):\n",
    "    return math.log10(max(abs(U-D), 1)) + np.sign(U-D) * t / 45000\n",
    "\n",
    "# Modified Hacker News formula\n",
    "def hacker_news(U, D, T, P=1, alpha=0.8, gamma=1.8):\n",
    "    return P * pow((U - D - 1), alpha) / pow((T + 2), gamma)\n",
    "\n",
    "# Apply the formulas\n",
    "reddit_df['reddit_hot'] = reddit_df.apply(lambda row: reddit_hot(row['upvotes'], row['downvotes'], row['age_in_seconds']), axis=1)\n",
    "reddit_df['hacker_news'] = reddit_df.apply(lambda row: hacker_news(row['upvotes'], row['downvotes'], row['age_in_hours']), axis=1)\n",
    "\n",
    "# Sort by Reddit's \"Hot\" score and print\n",
    "reddit_df_sorted = reddit_df.sort_values(by='reddit_hot', ascending=False)\n",
    "print(\"Posts sorted by Reddit's 'Hot' score:\")\n",
    "print(reddit_df_sorted)\n",
    "\n",
    "# Sort by Hacker News score and print\n",
    "hacker_news_df_sorted = reddit_df.sort_values(by='hacker_news', ascending=False)\n",
    "print(\"\\nPosts sorted by Hacker News score:\")\n",
    "print(hacker_news_df_sorted)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
@@ -25,7 +140,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.16"
+   "version": "3.11.3"
  }
 },
 "nbformat": 4,