diff --git a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part1-popularity_recsys-checkpoint.ipynb b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part1-popularity_recsys-checkpoint.ipynb
deleted file mode 100644
index 3f4c9af..0000000
--- a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part1-popularity_recsys-checkpoint.ipynb
+++ /dev/null
@@ -1,458 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 207,
- "id": "937dd4ed",
- "metadata": {},
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "\n",
- "def load_data():\n",
- " # Load the MovieLens data\n",
- " movies_df = pd.read_csv(\"movielens_small/movies.csv\")\n",
- " ratings_df = pd.read_csv(\"movielens_small/ratings.csv\")\n",
- " return movies_df, ratings_df\n",
- "\n",
- "def calculate_popularity(movies_df, ratings_df, damping_factor=5):\n",
- " # Calculate the number of ratings, mean rating, and sum of ratings for each movie\n",
- " num_ratings = ratings_df.groupby(\"movieId\")[\"rating\"].count()\n",
- " mean_rating = ratings_df.groupby(\"movieId\")[\"rating\"].mean()\n",
- " global_mean = ratings_df[\"rating\"].mean()\n",
- " \n",
- " # Calculate the damped mean rating for each movie\n",
- " damped_numerator = num_ratings * mean_rating + damping_factor * global_mean\n",
- " damped_denominator = num_ratings + damping_factor\n",
- " damped_mean_rating = damped_numerator / damped_denominator\n",
- " \n",
- " # Add the popularity data to the movie data\n",
- " movies_df['num_ratings'] = movies_df['movieId'].map(num_ratings)\n",
- " movies_df['mean_rating'] = movies_df['movieId'].map(mean_rating)\n",
- " movies_df['damped_mean_rating'] = movies_df['movieId'].map(damped_mean_rating)\n",
- " return movies_df\n",
- "\n",
- "movies_df, ratings_df = load_data()\n",
- "movies_df = calculate_popularity(movies_df, ratings_df, damping_factor=10)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 198,
- "id": "7e649c6f",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " movieId | \n",
- " title | \n",
- " genres | \n",
- " num_ratings | \n",
- " mean_rating | \n",
- " damped_mean_rating | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 314 | \n",
- " 356 | \n",
- " Forrest Gump (1994) | \n",
- " Comedy|Drama|Romance|War | \n",
- " 329.0 | \n",
- " 4.164134 | \n",
- " 4.144589 | \n",
- "
\n",
- " \n",
- " | 277 | \n",
- " 318 | \n",
- " Shawshank Redemption, The (1994) | \n",
- " Crime|Drama | \n",
- " 317.0 | \n",
- " 4.429022 | \n",
- " 4.400659 | \n",
- "
\n",
- " \n",
- " | 257 | \n",
- " 296 | \n",
- " Pulp Fiction (1994) | \n",
- " Comedy|Crime|Drama|Thriller | \n",
- " 307.0 | \n",
- " 4.197068 | \n",
- " 4.175128 | \n",
- "
\n",
- " \n",
- " | 510 | \n",
- " 593 | \n",
- " Silence of the Lambs, The (1991) | \n",
- " Crime|Horror|Thriller | \n",
- " 279.0 | \n",
- " 4.161290 | \n",
- " 4.138462 | \n",
- "
\n",
- " \n",
- " | 1939 | \n",
- " 2571 | \n",
- " Matrix, The (1999) | \n",
- " Action|Sci-Fi|Thriller | \n",
- " 278.0 | \n",
- " 4.192446 | \n",
- " 4.168457 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " movieId title genres \n",
- "314 356 Forrest Gump (1994) Comedy|Drama|Romance|War \\\n",
- "277 318 Shawshank Redemption, The (1994) Crime|Drama \n",
- "257 296 Pulp Fiction (1994) Comedy|Crime|Drama|Thriller \n",
- "510 593 Silence of the Lambs, The (1991) Crime|Horror|Thriller \n",
- "1939 2571 Matrix, The (1999) Action|Sci-Fi|Thriller \n",
- "\n",
- " num_ratings mean_rating damped_mean_rating \n",
- "314 329.0 4.164134 4.144589 \n",
- "277 317.0 4.429022 4.400659 \n",
- "257 307.0 4.197068 4.175128 \n",
- "510 279.0 4.161290 4.138462 \n",
- "1939 278.0 4.192446 4.168457 "
- ]
- },
- "execution_count": 198,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "movies_df.sort_values(by=\"num_ratings\", ascending=False).head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 204,
- "id": "c6ef332e",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " movieId | \n",
- " title | \n",
- " genres | \n",
- " num_ratings | \n",
- " mean_rating | \n",
- " damped_mean_rating | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 7656 | \n",
- " 88448 | \n",
- " Paper Birds (Pájaros de papel) (2010) | \n",
- " Comedy|Drama | \n",
- " 1.0 | \n",
- " 5.0 | \n",
- " 3.637779 | \n",
- "
\n",
- " \n",
- " | 8107 | \n",
- " 100556 | \n",
- " Act of Killing, The (2012) | \n",
- " Documentary | \n",
- " 1.0 | \n",
- " 5.0 | \n",
- " 3.637779 | \n",
- "
\n",
- " \n",
- " | 9083 | \n",
- " 143031 | \n",
- " Jump In! (2007) | \n",
- " Comedy|Drama|Romance | \n",
- " 1.0 | \n",
- " 5.0 | \n",
- " 3.637779 | \n",
- "
\n",
- " \n",
- " | 9094 | \n",
- " 143511 | \n",
- " Human (2015) | \n",
- " Documentary | \n",
- " 1.0 | \n",
- " 5.0 | \n",
- " 3.637779 | \n",
- "
\n",
- " \n",
- " | 9096 | \n",
- " 143559 | \n",
- " L.A. Slasher (2015) | \n",
- " Comedy|Crime|Fantasy | \n",
- " 1.0 | \n",
- " 5.0 | \n",
- " 3.637779 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " movieId title genres \n",
- "7656 88448 Paper Birds (Pájaros de papel) (2010) Comedy|Drama \\\n",
- "8107 100556 Act of Killing, The (2012) Documentary \n",
- "9083 143031 Jump In! (2007) Comedy|Drama|Romance \n",
- "9094 143511 Human (2015) Documentary \n",
- "9096 143559 L.A. Slasher (2015) Comedy|Crime|Fantasy \n",
- "\n",
- " num_ratings mean_rating damped_mean_rating \n",
- "7656 1.0 5.0 3.637779 \n",
- "8107 1.0 5.0 3.637779 \n",
- "9083 1.0 5.0 3.637779 \n",
- "9094 1.0 5.0 3.637779 \n",
- "9096 1.0 5.0 3.637779 "
- ]
- },
- "execution_count": 204,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "movies_df.sort_values(by=\"mean_rating\", ascending=False).head(5)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 201,
- "id": "f669fb09",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " movieId | \n",
- " title | \n",
- " genres | \n",
- " num_ratings | \n",
- " mean_rating | \n",
- " damped_mean_rating | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 277 | \n",
- " 318 | \n",
- " Shawshank Redemption, The (1994) | \n",
- " Crime|Drama | \n",
- " 317.0 | \n",
- " 4.429022 | \n",
- " 4.400659 | \n",
- "
\n",
- " \n",
- " | 659 | \n",
- " 858 | \n",
- " Godfather, The (1972) | \n",
- " Crime|Drama | \n",
- " 192.0 | \n",
- " 4.289062 | \n",
- " 4.250077 | \n",
- "
\n",
- " \n",
- " | 2226 | \n",
- " 2959 | \n",
- " Fight Club (1999) | \n",
- " Action|Crime|Drama|Thriller | \n",
- " 218.0 | \n",
- " 4.272936 | \n",
- " 4.239103 | \n",
- "
\n",
- " \n",
- " | 922 | \n",
- " 1221 | \n",
- " Godfather: Part II, The (1974) | \n",
- " Crime|Drama | \n",
- " 129.0 | \n",
- " 4.259690 | \n",
- " 4.205148 | \n",
- "
\n",
- " \n",
- " | 46 | \n",
- " 50 | \n",
- " Usual Suspects, The (1995) | \n",
- " Crime|Mystery|Thriller | \n",
- " 204.0 | \n",
- " 4.237745 | \n",
- " 4.203344 | \n",
- "
\n",
- " \n",
- " | 224 | \n",
- " 260 | \n",
- " Star Wars: Episode IV - A New Hope (1977) | \n",
- " Action|Adventure|Sci-Fi | \n",
- " 251.0 | \n",
- " 4.231076 | \n",
- " 4.203125 | \n",
- "
\n",
- " \n",
- " | 602 | \n",
- " 750 | \n",
- " Dr. Strangelove or: How I Learned to Stop Worr... | \n",
- " Comedy|War | \n",
- " 97.0 | \n",
- " 4.268041 | \n",
- " 4.196407 | \n",
- "
\n",
- " \n",
- " | 914 | \n",
- " 1213 | \n",
- " Goodfellas (1990) | \n",
- " Crime|Drama | \n",
- " 126.0 | \n",
- " 4.250000 | \n",
- " 4.194967 | \n",
- "
\n",
- " \n",
- " | 461 | \n",
- " 527 | \n",
- " Schindler's List (1993) | \n",
- " Drama|War | \n",
- " 220.0 | \n",
- " 4.225000 | \n",
- " 4.193546 | \n",
- "
\n",
- " \n",
- " | 6710 | \n",
- " 58559 | \n",
- " Dark Knight, The (2008) | \n",
- " Action|Crime|Drama|IMAX | \n",
- " 149.0 | \n",
- " 4.238255 | \n",
- " 4.191922 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " movieId title \n",
- "277 318 Shawshank Redemption, The (1994) \\\n",
- "659 858 Godfather, The (1972) \n",
- "2226 2959 Fight Club (1999) \n",
- "922 1221 Godfather: Part II, The (1974) \n",
- "46 50 Usual Suspects, The (1995) \n",
- "224 260 Star Wars: Episode IV - A New Hope (1977) \n",
- "602 750 Dr. Strangelove or: How I Learned to Stop Worr... \n",
- "914 1213 Goodfellas (1990) \n",
- "461 527 Schindler's List (1993) \n",
- "6710 58559 Dark Knight, The (2008) \n",
- "\n",
- " genres num_ratings mean_rating \n",
- "277 Crime|Drama 317.0 4.429022 \\\n",
- "659 Crime|Drama 192.0 4.289062 \n",
- "2226 Action|Crime|Drama|Thriller 218.0 4.272936 \n",
- "922 Crime|Drama 129.0 4.259690 \n",
- "46 Crime|Mystery|Thriller 204.0 4.237745 \n",
- "224 Action|Adventure|Sci-Fi 251.0 4.231076 \n",
- "602 Comedy|War 97.0 4.268041 \n",
- "914 Crime|Drama 126.0 4.250000 \n",
- "461 Drama|War 220.0 4.225000 \n",
- "6710 Action|Crime|Drama|IMAX 149.0 4.238255 \n",
- "\n",
- " damped_mean_rating \n",
- "277 4.400659 \n",
- "659 4.250077 \n",
- "2226 4.239103 \n",
- "922 4.205148 \n",
- "46 4.203344 \n",
- "224 4.203125 \n",
- "602 4.196407 \n",
- "914 4.194967 \n",
- "461 4.193546 \n",
- "6710 4.191922 "
- ]
- },
- "execution_count": 201,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "movies_df.sort_values(by=\"damped_mean_rating\", ascending=False).head(10)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.16"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part2-associative_recsys-checkpoint.ipynb b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part2-associative_recsys-checkpoint.ipynb
deleted file mode 100644
index 1de8855..0000000
--- a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part2-associative_recsys-checkpoint.ipynb
+++ /dev/null
@@ -1,176 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "25aa1c78",
- "metadata": {},
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "import numpy as np"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "id": "107e909b",
- "metadata": {},
- "outputs": [],
- "source": [
- "# Load the transactions data\n",
- "transactions = pd.read_csv(\"grocery_dataset.csv\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "id": "289a9751",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "itemDescription\n",
- "whole milk 515.0\n",
- "other vegetables 361.0\n",
- "rolls/buns 344.0\n",
- "soda 271.0\n",
- "yogurt 242.0\n",
- "dtype: float64"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "member_purchases = transactions.groupby(['Member_number', 'itemDescription'])['itemDescription'].count().unstack().fillna(0)\n",
- "item_descriptions = member_purchases.columns\n",
- "\n",
- "def simple_association(item_name):\n",
- " item_basket = member_purchases[member_purchases[item_name] > 0]\n",
- " co_purchase_counts = item_basket.sum().sort_values(ascending=False).drop(item_name)\n",
- " return co_purchase_counts.head(5)\n",
- "\n",
- "ex_item = item_descriptions[20]\n",
- "simple_association(ex_item)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "id": "190a1485",
- "metadata": {
- "scrolled": true
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Top 10 recommendations for soda:\n",
- "\n",
- "itemDescription\n",
- "oil 1.246844\n",
- "beverages 1.162678\n",
- "sausage 1.014975\n",
- "grapes 1.001195\n",
- "shopping bags 0.95459\n",
- "frozen meals 0.943642\n",
- "specialty bar 0.936182\n",
- "butter 0.918418\n",
- "candy 0.910056\n",
- "specialty chocolate 0.904846\n",
- "Name: soda, dtype: object \n",
- "\n",
- "\n"
- ]
- }
- ],
- "source": [
- "# Function to create a transaction matrix\n",
- "def create_transaction_matrix(transactions):\n",
- " # Group the transactions by member number, date and item description\n",
- " # Count the number of each item bought by each member on each date\n",
- " # Unstack the item descriptions to create a matrix where rows are transactions and columns are items\n",
- " # Fill any missing values with 0\n",
- " # Set the index to be the member number and date\n",
- " basket = (transactions.groupby(['Member_number', 'Date', 'itemDescription'])['itemDescription']\n",
- " .count().unstack().reset_index().fillna(0)\n",
- " .set_index(['Member_number', 'Date']))\n",
- " \n",
- " # Convert the counts to True or False\n",
- " # True if the item was bought in the transaction, False otherwise\n",
- " return basket.applymap(lambda x: True if x > 0 else False)\n",
- "\n",
- "# Function to calculate a lift matrix\n",
- "def calculate_lift_matrix(basket_sets, min_joint_probability=0.001):\n",
- " # Calculate the joint probability of each pair of items\n",
- " probability_pair = pd.DataFrame(index=basket_sets.columns, columns=basket_sets.columns)\n",
- " for item1 in basket_sets.columns:\n",
- " for item2 in basket_sets.columns:\n",
- " joint_probability = (basket_sets[item1] & basket_sets[item2]).sum() / len(basket_sets)\n",
- " if joint_probability > min_joint_probability:\n",
- " probability_pair.loc[item1, item2] = joint_probability\n",
- " else:\n",
- " probability_pair.loc[item1, item2] = 0\n",
- "\n",
- " # Set the diagonal of the joint probability matrix to 0\n",
- " np.fill_diagonal(probability_pair.values, 0)\n",
- "\n",
- " # Calculate the individual probability of each item\n",
- " probability_item = basket_sets.mean()\n",
- "\n",
- " # Calculate the product of the individual probabilities\n",
- " probability_product = np.outer(probability_item, probability_item)\n",
- "\n",
- " # Calculate the lift of each pair of items\n",
- " lift_matrix = probability_pair.divide(probability_product, fill_value=0)\n",
- " \n",
- " return lift_matrix\n",
- "\n",
- "# Function to recommend items\n",
- "def recommend_items(lift_matrix, item, num_recommendations=10):\n",
- " # Sort the items in the lift matrix for the given item in descending order\n",
- " # Take the top num_recommendations items\n",
- " recommended_for_item = lift_matrix[item].sort_values(ascending=False).head(num_recommendations)\n",
- " \n",
- " # Print the recommended items\n",
- " print(f\"Top {num_recommendations} recommendations for {item}:\\n\")\n",
- " print(recommended_for_item, \"\\n\\n\")\n",
- "\n",
- "# Create transaction matrix\n",
- "basket_sets = create_transaction_matrix(transactions)\n",
- "\n",
- "# Calculate the lift matrix\n",
- "lift_matrix = calculate_lift_matrix(basket_sets)\n",
- "\n",
- "# Recommend items for 'meat'\n",
- "recommend_items(lift_matrix, 'soda')"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.16"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part3-trending-recsys-checkpoint.ipynb b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part3-trending-recsys-checkpoint.ipynb
deleted file mode 100644
index 4ff272c..0000000
--- a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part3-trending-recsys-checkpoint.ipynb
+++ /dev/null
@@ -1,33 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "b0c33033",
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.16"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part1-popularity_recsys.ipynb b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part1-popularity_recsys.ipynb
index 3f4c9af..a3b5040 100644
--- a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part1-popularity_recsys.ipynb
+++ b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part1-popularity_recsys.ipynb
@@ -1,8 +1,46 @@
{
"cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Non-Personalized Recommender Systems: Popularity Based"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 207,
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os \n",
+ "\n",
+ "if os.path.exists('movielens_small.zip'):\n",
+ " !wget https://files.grouplens.org/datasets/movielens/ml-latest-small.zip \n",
+ " !unzip ml-latest-small.zip\n",
+ " !rm ml-latest-small.zip\n",
+ " !mv ml-latest-small movielens_small"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Damped Mean\n",
+ "\n",
+ "$$ s(i) = \\frac{\\Sigma_{u \\in U_i} r_i + a \\times \\mu}{|U_i| + a} $$\n",
+ "\n",
+ "Where:\n",
+ "- $ s(i) $: The damped mean rating for item $ i $.\n",
+ "- $ \\Sigma_{u \\in U_i} r_i $: Sum of the ratings for item $ i $.\n",
+ "- $ a $: Damping factor, a value that determines the extent of smoothing.\n",
+ "- $ \\mu $: Global mean rating across all items.\n",
+ "- $ |U_i| $: Total number of ratings for item $ i $.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
"id": "937dd4ed",
"metadata": {},
"outputs": [],
@@ -36,6 +74,13 @@
"movies_df = calculate_popularity(movies_df, ratings_df, damping_factor=10)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's see how using num_ratings compares to mean rating & damped mean rating."
+ ]
+ },
{
"cell_type": "code",
"execution_count": 198,
@@ -450,7 +495,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.16"
+ "version": "3.11.3"
}
},
"nbformat": 4,
diff --git a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part3-trending-recsys.ipynb b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part3-trending-recsys.ipynb
index 4ff272c..7dd3b1c 100644
--- a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part3-trending-recsys.ipynb
+++ b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part3-trending-recsys.ipynb
@@ -2,9 +2,124 @@
"cells": [
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"id": "b0c33033",
"metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Posts sorted by Reddit's 'Hot' score:\n",
+ " post_id post_title upvotes \n",
+ "9 10 Difference between CNN and RNN? 350 \\\n",
+ "8 9 The future of quantum computing in AI 600 \n",
+ "7 8 Experience with multi-modal learning? 450 \n",
+ "6 7 Looking for resources on probabilistic program... 700 \n",
+ "4 5 Tips for handling imbalanced datasets? 1100 \n",
+ "2 3 Has anyone tried the new reinforcement learnin... 900 \n",
+ "3 4 Discussion: Evolutionary algorithms vs gradien... 800 \n",
+ "5 6 Which GPU is best for neural network training? 300 \n",
+ "0 1 How do I start with machine learning? 600 \n",
+ "1 2 Best practices for deep learning optimization? 400 \n",
+ "\n",
+ " downvotes age_in_seconds age_in_hours reddit_hot hacker_news \n",
+ "9 50 256000 71.111111 8.166010 0.042205 \n",
+ "8 50 128000 35.555556 5.584807 0.227638 \n",
+ "7 50 64000 17.777778 4.024282 0.559318 \n",
+ "6 50 32000 8.888889 3.524024 2.416714 \n",
+ "4 100 8000 2.222222 3.177778 18.779258 \n",
+ "2 100 2000 0.555556 2.947534 38.776074 \n",
+ "3 100 4000 1.111111 2.933987 24.453093 \n",
+ "5 50 16000 4.444444 2.753496 2.886859 \n",
+ "0 100 500 0.138889 2.710081 36.655710 \n",
+ "1 50 1000 0.277778 2.566290 24.588946 \n",
+ "\n",
+ "Posts sorted by Hacker News score:\n",
+ " post_id post_title upvotes \n",
+ "2 3 Has anyone tried the new reinforcement learnin... 900 \\\n",
+ "0 1 How do I start with machine learning? 600 \n",
+ "1 2 Best practices for deep learning optimization? 400 \n",
+ "3 4 Discussion: Evolutionary algorithms vs gradien... 800 \n",
+ "4 5 Tips for handling imbalanced datasets? 1100 \n",
+ "5 6 Which GPU is best for neural network training? 300 \n",
+ "6 7 Looking for resources on probabilistic program... 700 \n",
+ "7 8 Experience with multi-modal learning? 450 \n",
+ "8 9 The future of quantum computing in AI 600 \n",
+ "9 10 Difference between CNN and RNN? 350 \n",
+ "\n",
+ " downvotes age_in_seconds age_in_hours reddit_hot hacker_news \n",
+ "2 100 2000 0.555556 2.947534 38.776074 \n",
+ "0 100 500 0.138889 2.710081 36.655710 \n",
+ "1 50 1000 0.277778 2.566290 24.588946 \n",
+ "3 100 4000 1.111111 2.933987 24.453093 \n",
+ "4 100 8000 2.222222 3.177778 18.779258 \n",
+ "5 50 16000 4.444444 2.753496 2.886859 \n",
+ "6 50 32000 8.888889 3.524024 2.416714 \n",
+ "7 50 64000 17.777778 4.024282 0.559318 \n",
+ "8 50 128000 35.555556 5.584807 0.227638 \n",
+ "9 50 256000 71.111111 8.166010 0.042205 \n"
+ ]
+ }
+ ],
+ "source": [
+ "import math\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "data = {\n",
+ " 'post_id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n",
+ " 'post_title': [\n",
+ " \"How do I start with machine learning?\",\n",
+ " \"Best practices for deep learning optimization?\",\n",
+ " \"Has anyone tried the new reinforcement learning library?\",\n",
+ " \"Discussion: Evolutionary algorithms vs gradient descent\",\n",
+ " \"Tips for handling imbalanced datasets?\",\n",
+ " \"Which GPU is best for neural network training?\",\n",
+ " \"Looking for resources on probabilistic programming\",\n",
+ " \"Experience with multi-modal learning?\",\n",
+ " \"The future of quantum computing in AI\",\n",
+ " \"Difference between CNN and RNN?\"\n",
+ " ],\n",
+ " 'upvotes': [600, 400, 900, 800, 1100, 300, 700, 450, 600, 350],\n",
+ " 'downvotes': [100, 50, 100, 100, 100, 50, 50, 50, 50, 50],\n",
+ " 'age_in_seconds': [500, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000, 256000]\n",
+ "}\n",
+ "\n",
+ "\n",
+ "# Convert to DataFrame\n",
+ "reddit_df = pd.DataFrame(data)\n",
+ "\n",
+ "# Calculate age in hours from age in seconds\n",
+ "reddit_df['age_in_hours'] = reddit_df['age_in_seconds'] / 3600\n",
+ "\n",
+ "# Reddit's \"Hot\" formula\n",
+ "def reddit_hot(U, D, t):\n",
+ " return math.log10(max(abs(U-D), 1)) + np.sign(U-D) * t / 45000\n",
+ "\n",
+ "# Modified Hacker News formula\n",
+ "def hacker_news(U, D, T, P=1, alpha=0.8, gamma=1.8):\n",
+ " return P * pow((U - D - 1), alpha) / pow((T + 2), gamma)\n",
+ "\n",
+ "# Apply the formulas\n",
+ "reddit_df['reddit_hot'] = reddit_df.apply(lambda row: reddit_hot(row['upvotes'], row['downvotes'], row['age_in_seconds']), axis=1)\n",
+ "reddit_df['hacker_news'] = reddit_df.apply(lambda row: hacker_news(row['upvotes'], row['downvotes'], row['age_in_hours']), axis=1)\n",
+ "\n",
+ "# Sort by Reddit's \"Hot\" score and print\n",
+ "reddit_df_sorted = reddit_df.sort_values(by='reddit_hot', ascending=False)\n",
+ "print(\"Posts sorted by Reddit's 'Hot' score:\")\n",
+ "print(reddit_df_sorted)\n",
+ "\n",
+ "# Sort by Hacker News score and print\n",
+ "hacker_news_df_sorted = reddit_df.sort_values(by='hacker_news', ascending=False)\n",
+ "print(\"\\nPosts sorted by Hacker News score:\")\n",
+ "print(hacker_news_df_sorted)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": []
}
@@ -25,7 +140,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.16"
+ "version": "3.11.3"
}
},
"nbformat": 4,