diff --git a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part1-popularity_recsys-checkpoint.ipynb b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part1-popularity_recsys-checkpoint.ipynb deleted file mode 100644 index 3f4c9af..0000000 --- a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part1-popularity_recsys-checkpoint.ipynb +++ /dev/null @@ -1,458 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 207, - "id": "937dd4ed", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "def load_data():\n", - " # Load the MovieLens data\n", - " movies_df = pd.read_csv(\"movielens_small/movies.csv\")\n", - " ratings_df = pd.read_csv(\"movielens_small/ratings.csv\")\n", - " return movies_df, ratings_df\n", - "\n", - "def calculate_popularity(movies_df, ratings_df, damping_factor=5):\n", - " # Calculate the number of ratings, mean rating, and sum of ratings for each movie\n", - " num_ratings = ratings_df.groupby(\"movieId\")[\"rating\"].count()\n", - " mean_rating = ratings_df.groupby(\"movieId\")[\"rating\"].mean()\n", - " global_mean = ratings_df[\"rating\"].mean()\n", - " \n", - " # Calculate the damped mean rating for each movie\n", - " damped_numerator = num_ratings * mean_rating + damping_factor * global_mean\n", - " damped_denominator = num_ratings + damping_factor\n", - " damped_mean_rating = damped_numerator / damped_denominator\n", - " \n", - " # Add the popularity data to the movie data\n", - " movies_df['num_ratings'] = movies_df['movieId'].map(num_ratings)\n", - " movies_df['mean_rating'] = movies_df['movieId'].map(mean_rating)\n", - " movies_df['damped_mean_rating'] = movies_df['movieId'].map(damped_mean_rating)\n", - " return movies_df\n", - "\n", - "movies_df, ratings_df = load_data()\n", - "movies_df = calculate_popularity(movies_df, ratings_df, damping_factor=10)" - ] - }, - { - "cell_type": "code", - "execution_count": 198, - "id": "7e649c6f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
movieIdtitlegenresnum_ratingsmean_ratingdamped_mean_rating
314356Forrest Gump (1994)Comedy|Drama|Romance|War329.04.1641344.144589
277318Shawshank Redemption, The (1994)Crime|Drama317.04.4290224.400659
257296Pulp Fiction (1994)Comedy|Crime|Drama|Thriller307.04.1970684.175128
510593Silence of the Lambs, The (1991)Crime|Horror|Thriller279.04.1612904.138462
19392571Matrix, The (1999)Action|Sci-Fi|Thriller278.04.1924464.168457
\n", - "
" - ], - "text/plain": [ - " movieId title genres \n", - "314 356 Forrest Gump (1994) Comedy|Drama|Romance|War \\\n", - "277 318 Shawshank Redemption, The (1994) Crime|Drama \n", - "257 296 Pulp Fiction (1994) Comedy|Crime|Drama|Thriller \n", - "510 593 Silence of the Lambs, The (1991) Crime|Horror|Thriller \n", - "1939 2571 Matrix, The (1999) Action|Sci-Fi|Thriller \n", - "\n", - " num_ratings mean_rating damped_mean_rating \n", - "314 329.0 4.164134 4.144589 \n", - "277 317.0 4.429022 4.400659 \n", - "257 307.0 4.197068 4.175128 \n", - "510 279.0 4.161290 4.138462 \n", - "1939 278.0 4.192446 4.168457 " - ] - }, - "execution_count": 198, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "movies_df.sort_values(by=\"num_ratings\", ascending=False).head()" - ] - }, - { - "cell_type": "code", - "execution_count": 204, - "id": "c6ef332e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
movieIdtitlegenresnum_ratingsmean_ratingdamped_mean_rating
765688448Paper Birds (Pájaros de papel) (2010)Comedy|Drama1.05.03.637779
8107100556Act of Killing, The (2012)Documentary1.05.03.637779
9083143031Jump In! (2007)Comedy|Drama|Romance1.05.03.637779
9094143511Human (2015)Documentary1.05.03.637779
9096143559L.A. Slasher (2015)Comedy|Crime|Fantasy1.05.03.637779
\n", - "
" - ], - "text/plain": [ - " movieId title genres \n", - "7656 88448 Paper Birds (Pájaros de papel) (2010) Comedy|Drama \\\n", - "8107 100556 Act of Killing, The (2012) Documentary \n", - "9083 143031 Jump In! (2007) Comedy|Drama|Romance \n", - "9094 143511 Human (2015) Documentary \n", - "9096 143559 L.A. Slasher (2015) Comedy|Crime|Fantasy \n", - "\n", - " num_ratings mean_rating damped_mean_rating \n", - "7656 1.0 5.0 3.637779 \n", - "8107 1.0 5.0 3.637779 \n", - "9083 1.0 5.0 3.637779 \n", - "9094 1.0 5.0 3.637779 \n", - "9096 1.0 5.0 3.637779 " - ] - }, - "execution_count": 204, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "movies_df.sort_values(by=\"mean_rating\", ascending=False).head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 201, - "id": "f669fb09", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
movieIdtitlegenresnum_ratingsmean_ratingdamped_mean_rating
277318Shawshank Redemption, The (1994)Crime|Drama317.04.4290224.400659
659858Godfather, The (1972)Crime|Drama192.04.2890624.250077
22262959Fight Club (1999)Action|Crime|Drama|Thriller218.04.2729364.239103
9221221Godfather: Part II, The (1974)Crime|Drama129.04.2596904.205148
4650Usual Suspects, The (1995)Crime|Mystery|Thriller204.04.2377454.203344
224260Star Wars: Episode IV - A New Hope (1977)Action|Adventure|Sci-Fi251.04.2310764.203125
602750Dr. Strangelove or: How I Learned to Stop Worr...Comedy|War97.04.2680414.196407
9141213Goodfellas (1990)Crime|Drama126.04.2500004.194967
461527Schindler's List (1993)Drama|War220.04.2250004.193546
671058559Dark Knight, The (2008)Action|Crime|Drama|IMAX149.04.2382554.191922
\n", - "
" - ], - "text/plain": [ - " movieId title \n", - "277 318 Shawshank Redemption, The (1994) \\\n", - "659 858 Godfather, The (1972) \n", - "2226 2959 Fight Club (1999) \n", - "922 1221 Godfather: Part II, The (1974) \n", - "46 50 Usual Suspects, The (1995) \n", - "224 260 Star Wars: Episode IV - A New Hope (1977) \n", - "602 750 Dr. Strangelove or: How I Learned to Stop Worr... \n", - "914 1213 Goodfellas (1990) \n", - "461 527 Schindler's List (1993) \n", - "6710 58559 Dark Knight, The (2008) \n", - "\n", - " genres num_ratings mean_rating \n", - "277 Crime|Drama 317.0 4.429022 \\\n", - "659 Crime|Drama 192.0 4.289062 \n", - "2226 Action|Crime|Drama|Thriller 218.0 4.272936 \n", - "922 Crime|Drama 129.0 4.259690 \n", - "46 Crime|Mystery|Thriller 204.0 4.237745 \n", - "224 Action|Adventure|Sci-Fi 251.0 4.231076 \n", - "602 Comedy|War 97.0 4.268041 \n", - "914 Crime|Drama 126.0 4.250000 \n", - "461 Drama|War 220.0 4.225000 \n", - "6710 Action|Crime|Drama|IMAX 149.0 4.238255 \n", - "\n", - " damped_mean_rating \n", - "277 4.400659 \n", - "659 4.250077 \n", - "2226 4.239103 \n", - "922 4.205148 \n", - "46 4.203344 \n", - "224 4.203125 \n", - "602 4.196407 \n", - "914 4.194967 \n", - "461 4.193546 \n", - "6710 4.191922 " - ] - }, - "execution_count": 201, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "movies_df.sort_values(by=\"damped_mean_rating\", ascending=False).head(10)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part2-associative_recsys-checkpoint.ipynb b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part2-associative_recsys-checkpoint.ipynb deleted file mode 100644 index 1de8855..0000000 --- a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part2-associative_recsys-checkpoint.ipynb +++ /dev/null @@ -1,176 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 3, - "id": "25aa1c78", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "107e909b", - "metadata": {}, - "outputs": [], - "source": [ - "# Load the transactions data\n", - "transactions = pd.read_csv(\"grocery_dataset.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "289a9751", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "itemDescription\n", - "whole milk 515.0\n", - "other vegetables 361.0\n", - "rolls/buns 344.0\n", - "soda 271.0\n", - "yogurt 242.0\n", - "dtype: float64" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "member_purchases = transactions.groupby(['Member_number', 'itemDescription'])['itemDescription'].count().unstack().fillna(0)\n", - "item_descriptions = member_purchases.columns\n", - "\n", - "def simple_association(item_name):\n", - " item_basket = member_purchases[member_purchases[item_name] > 0]\n", - " co_purchase_counts = item_basket.sum().sort_values(ascending=False).drop(item_name)\n", - " return co_purchase_counts.head(5)\n", - "\n", - "ex_item = item_descriptions[20]\n", - "simple_association(ex_item)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "190a1485", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Top 10 recommendations for soda:\n", - "\n", - "itemDescription\n", - "oil 1.246844\n", - "beverages 1.162678\n", - "sausage 1.014975\n", - "grapes 1.001195\n", - "shopping bags 0.95459\n", - "frozen meals 0.943642\n", - "specialty bar 0.936182\n", - "butter 0.918418\n", - "candy 0.910056\n", - "specialty chocolate 0.904846\n", - "Name: soda, dtype: object \n", - "\n", - "\n" - ] - } - ], - "source": [ - "# Function to create a transaction matrix\n", - "def create_transaction_matrix(transactions):\n", - " # Group the transactions by member number, date and item description\n", - " # Count the number of each item bought by each member on each date\n", - " # Unstack the item descriptions to create a matrix where rows are transactions and columns are items\n", - " # Fill any missing values with 0\n", - " # Set the index to be the member number and date\n", - " basket = (transactions.groupby(['Member_number', 'Date', 'itemDescription'])['itemDescription']\n", - " .count().unstack().reset_index().fillna(0)\n", - " .set_index(['Member_number', 'Date']))\n", - " \n", - " # Convert the counts to True or False\n", - " # True if the item was bought in the transaction, False otherwise\n", - " return basket.applymap(lambda x: True if x > 0 else False)\n", - "\n", - "# Function to calculate a lift matrix\n", - "def calculate_lift_matrix(basket_sets, min_joint_probability=0.001):\n", - " # Calculate the joint probability of each pair of items\n", - " probability_pair = pd.DataFrame(index=basket_sets.columns, columns=basket_sets.columns)\n", - " for item1 in basket_sets.columns:\n", - " for item2 in basket_sets.columns:\n", - " joint_probability = (basket_sets[item1] & basket_sets[item2]).sum() / len(basket_sets)\n", - " if joint_probability > min_joint_probability:\n", - " probability_pair.loc[item1, item2] = joint_probability\n", - " else:\n", - " probability_pair.loc[item1, item2] = 0\n", - "\n", - " # Set the diagonal of the joint probability matrix to 0\n", - " np.fill_diagonal(probability_pair.values, 0)\n", - "\n", - " # Calculate the individual probability of each item\n", - " probability_item = basket_sets.mean()\n", - "\n", - " # Calculate the product of the individual probabilities\n", - " probability_product = np.outer(probability_item, probability_item)\n", - "\n", - " # Calculate the lift of each pair of items\n", - " lift_matrix = probability_pair.divide(probability_product, fill_value=0)\n", - " \n", - " return lift_matrix\n", - "\n", - "# Function to recommend items\n", - "def recommend_items(lift_matrix, item, num_recommendations=10):\n", - " # Sort the items in the lift matrix for the given item in descending order\n", - " # Take the top num_recommendations items\n", - " recommended_for_item = lift_matrix[item].sort_values(ascending=False).head(num_recommendations)\n", - " \n", - " # Print the recommended items\n", - " print(f\"Top {num_recommendations} recommendations for {item}:\\n\")\n", - " print(recommended_for_item, \"\\n\\n\")\n", - "\n", - "# Create transaction matrix\n", - "basket_sets = create_transaction_matrix(transactions)\n", - "\n", - "# Calculate the lift matrix\n", - "lift_matrix = calculate_lift_matrix(basket_sets)\n", - "\n", - "# Recommend items for 'meat'\n", - "recommend_items(lift_matrix, 'soda')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part3-trending-recsys-checkpoint.ipynb b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part3-trending-recsys-checkpoint.ipynb deleted file mode 100644 index 4ff272c..0000000 --- a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/.ipynb_checkpoints/part3-trending-recsys-checkpoint.ipynb +++ /dev/null @@ -1,33 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "b0c33033", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part1-popularity_recsys.ipynb b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part1-popularity_recsys.ipynb index 3f4c9af..a3b5040 100644 --- a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part1-popularity_recsys.ipynb +++ b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part1-popularity_recsys.ipynb @@ -1,8 +1,46 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Non-Personalized Recommender Systems: Popularity Based" + ] + }, { "cell_type": "code", - "execution_count": 207, + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os \n", + "\n", + "if os.path.exists('movielens_small.zip'):\n", + " !wget https://files.grouplens.org/datasets/movielens/ml-latest-small.zip \n", + " !unzip ml-latest-small.zip\n", + " !rm ml-latest-small.zip\n", + " !mv ml-latest-small movielens_small" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Damped Mean\n", + "\n", + "$$ s(i) = \\frac{\\Sigma_{u \\in U_i} r_i + a \\times \\mu}{|U_i| + a} $$\n", + "\n", + "Where:\n", + "- $ s(i) $: The damped mean rating for item $ i $.\n", + "- $ \\Sigma_{u \\in U_i} r_i $: Sum of the ratings for item $ i $.\n", + "- $ a $: Damping factor, a value that determines the extent of smoothing.\n", + "- $ \\mu $: Global mean rating across all items.\n", + "- $ |U_i| $: Total number of ratings for item $ i $.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, "id": "937dd4ed", "metadata": {}, "outputs": [], @@ -36,6 +74,13 @@ "movies_df = calculate_popularity(movies_df, ratings_df, damping_factor=10)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's see how using num_ratings compares to mean rating & damped mean rating." + ] + }, { "cell_type": "code", "execution_count": 198, @@ -450,7 +495,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.16" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part3-trending-recsys.ipynb b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part3-trending-recsys.ipynb index 4ff272c..7dd3b1c 100644 --- a/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part3-trending-recsys.ipynb +++ b/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part3-trending-recsys.ipynb @@ -2,9 +2,124 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "b0c33033", "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Posts sorted by Reddit's 'Hot' score:\n", + " post_id post_title upvotes \n", + "9 10 Difference between CNN and RNN? 350 \\\n", + "8 9 The future of quantum computing in AI 600 \n", + "7 8 Experience with multi-modal learning? 450 \n", + "6 7 Looking for resources on probabilistic program... 700 \n", + "4 5 Tips for handling imbalanced datasets? 1100 \n", + "2 3 Has anyone tried the new reinforcement learnin... 900 \n", + "3 4 Discussion: Evolutionary algorithms vs gradien... 800 \n", + "5 6 Which GPU is best for neural network training? 300 \n", + "0 1 How do I start with machine learning? 600 \n", + "1 2 Best practices for deep learning optimization? 400 \n", + "\n", + " downvotes age_in_seconds age_in_hours reddit_hot hacker_news \n", + "9 50 256000 71.111111 8.166010 0.042205 \n", + "8 50 128000 35.555556 5.584807 0.227638 \n", + "7 50 64000 17.777778 4.024282 0.559318 \n", + "6 50 32000 8.888889 3.524024 2.416714 \n", + "4 100 8000 2.222222 3.177778 18.779258 \n", + "2 100 2000 0.555556 2.947534 38.776074 \n", + "3 100 4000 1.111111 2.933987 24.453093 \n", + "5 50 16000 4.444444 2.753496 2.886859 \n", + "0 100 500 0.138889 2.710081 36.655710 \n", + "1 50 1000 0.277778 2.566290 24.588946 \n", + "\n", + "Posts sorted by Hacker News score:\n", + " post_id post_title upvotes \n", + "2 3 Has anyone tried the new reinforcement learnin... 900 \\\n", + "0 1 How do I start with machine learning? 600 \n", + "1 2 Best practices for deep learning optimization? 400 \n", + "3 4 Discussion: Evolutionary algorithms vs gradien... 800 \n", + "4 5 Tips for handling imbalanced datasets? 1100 \n", + "5 6 Which GPU is best for neural network training? 300 \n", + "6 7 Looking for resources on probabilistic program... 700 \n", + "7 8 Experience with multi-modal learning? 450 \n", + "8 9 The future of quantum computing in AI 600 \n", + "9 10 Difference between CNN and RNN? 350 \n", + "\n", + " downvotes age_in_seconds age_in_hours reddit_hot hacker_news \n", + "2 100 2000 0.555556 2.947534 38.776074 \n", + "0 100 500 0.138889 2.710081 36.655710 \n", + "1 50 1000 0.277778 2.566290 24.588946 \n", + "3 100 4000 1.111111 2.933987 24.453093 \n", + "4 100 8000 2.222222 3.177778 18.779258 \n", + "5 50 16000 4.444444 2.753496 2.886859 \n", + "6 50 32000 8.888889 3.524024 2.416714 \n", + "7 50 64000 17.777778 4.024282 0.559318 \n", + "8 50 128000 35.555556 5.584807 0.227638 \n", + "9 50 256000 71.111111 8.166010 0.042205 \n" + ] + } + ], + "source": [ + "import math\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "data = {\n", + " 'post_id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n", + " 'post_title': [\n", + " \"How do I start with machine learning?\",\n", + " \"Best practices for deep learning optimization?\",\n", + " \"Has anyone tried the new reinforcement learning library?\",\n", + " \"Discussion: Evolutionary algorithms vs gradient descent\",\n", + " \"Tips for handling imbalanced datasets?\",\n", + " \"Which GPU is best for neural network training?\",\n", + " \"Looking for resources on probabilistic programming\",\n", + " \"Experience with multi-modal learning?\",\n", + " \"The future of quantum computing in AI\",\n", + " \"Difference between CNN and RNN?\"\n", + " ],\n", + " 'upvotes': [600, 400, 900, 800, 1100, 300, 700, 450, 600, 350],\n", + " 'downvotes': [100, 50, 100, 100, 100, 50, 50, 50, 50, 50],\n", + " 'age_in_seconds': [500, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000, 256000]\n", + "}\n", + "\n", + "\n", + "# Convert to DataFrame\n", + "reddit_df = pd.DataFrame(data)\n", + "\n", + "# Calculate age in hours from age in seconds\n", + "reddit_df['age_in_hours'] = reddit_df['age_in_seconds'] / 3600\n", + "\n", + "# Reddit's \"Hot\" formula\n", + "def reddit_hot(U, D, t):\n", + " return math.log10(max(abs(U-D), 1)) + np.sign(U-D) * t / 45000\n", + "\n", + "# Modified Hacker News formula\n", + "def hacker_news(U, D, T, P=1, alpha=0.8, gamma=1.8):\n", + " return P * pow((U - D - 1), alpha) / pow((T + 2), gamma)\n", + "\n", + "# Apply the formulas\n", + "reddit_df['reddit_hot'] = reddit_df.apply(lambda row: reddit_hot(row['upvotes'], row['downvotes'], row['age_in_seconds']), axis=1)\n", + "reddit_df['hacker_news'] = reddit_df.apply(lambda row: hacker_news(row['upvotes'], row['downvotes'], row['age_in_hours']), axis=1)\n", + "\n", + "# Sort by Reddit's \"Hot\" score and print\n", + "reddit_df_sorted = reddit_df.sort_values(by='reddit_hot', ascending=False)\n", + "print(\"Posts sorted by Reddit's 'Hot' score:\")\n", + "print(reddit_df_sorted)\n", + "\n", + "# Sort by Hacker News score and print\n", + "hacker_news_df_sorted = reddit_df.sort_values(by='hacker_news', ascending=False)\n", + "print(\"\\nPosts sorted by Hacker News score:\")\n", + "print(hacker_news_df_sorted)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], "source": [] } @@ -25,7 +140,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.16" + "version": "3.11.3" } }, "nbformat": 4,