Machine-Learning-Collection/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part3-trending-recsys.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "b0c33033",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Posts sorted by Reddit's 'Hot' score:\n",
      "   post_id                                         post_title  upvotes   \n",
      "9       10                    Difference between CNN and RNN?      350  \\\n",
      "8        9              The future of quantum computing in AI      600   \n",
      "7        8              Experience with multi-modal learning?      450   \n",
      "6        7  Looking for resources on probabilistic program...      700   \n",
      "4        5             Tips for handling imbalanced datasets?     1100   \n",
      "2        3  Has anyone tried the new reinforcement learnin...      900   \n",
      "3        4  Discussion: Evolutionary algorithms vs gradien...      800   \n",
      "5        6     Which GPU is best for neural network training?      300   \n",
      "0        1              How do I start with machine learning?      600   \n",
      "1        2     Best practices for deep learning optimization?      400   \n",
      "\n",
      "   downvotes  age_in_seconds  age_in_hours  reddit_hot  hacker_news  \n",
      "9         50          256000     71.111111    8.166010     0.042205  \n",
      "8         50          128000     35.555556    5.584807     0.227638  \n",
      "7         50           64000     17.777778    4.024282     0.559318  \n",
      "6         50           32000      8.888889    3.524024     2.416714  \n",
      "4        100            8000      2.222222    3.177778    18.779258  \n",
      "2        100            2000      0.555556    2.947534    38.776074  \n",
      "3        100            4000      1.111111    2.933987    24.453093  \n",
      "5         50           16000      4.444444    2.753496     2.886859  \n",
      "0        100             500      0.138889    2.710081    36.655710  \n",
      "1         50            1000      0.277778    2.566290    24.588946  \n",
      "\n",
      "Posts sorted by Hacker News score:\n",
      "   post_id                                         post_title  upvotes   \n",
      "2        3  Has anyone tried the new reinforcement learnin...      900  \\\n",
      "0        1              How do I start with machine learning?      600   \n",
      "1        2     Best practices for deep learning optimization?      400   \n",
      "3        4  Discussion: Evolutionary algorithms vs gradien...      800   \n",
      "4        5             Tips for handling imbalanced datasets?     1100   \n",
      "5        6     Which GPU is best for neural network training?      300   \n",
      "6        7  Looking for resources on probabilistic program...      700   \n",
      "7        8              Experience with multi-modal learning?      450   \n",
      "8        9              The future of quantum computing in AI      600   \n",
      "9       10                    Difference between CNN and RNN?      350   \n",
      "\n",
      "   downvotes  age_in_seconds  age_in_hours  reddit_hot  hacker_news  \n",
      "2        100            2000      0.555556    2.947534    38.776074  \n",
      "0        100             500      0.138889    2.710081    36.655710  \n",
      "1         50            1000      0.277778    2.566290    24.588946  \n",
      "3        100            4000      1.111111    2.933987    24.453093  \n",
      "4        100            8000      2.222222    3.177778    18.779258  \n",
      "5         50           16000      4.444444    2.753496     2.886859  \n",
      "6         50           32000      8.888889    3.524024     2.416714  \n",
      "7         50           64000     17.777778    4.024282     0.559318  \n",
      "8         50          128000     35.555556    5.584807     0.227638  \n",
      "9         50          256000     71.111111    8.166010     0.042205  \n"
     ]
    }
   ],
   "source": [
    "import math\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "data = {\n",
    "    'post_id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n",
    "    'post_title': [\n",
    "        \"How do I start with machine learning?\",\n",
    "        \"Best practices for deep learning optimization?\",\n",
    "        \"Has anyone tried the new reinforcement learning library?\",\n",
    "        \"Discussion: Evolutionary algorithms vs gradient descent\",\n",
    "        \"Tips for handling imbalanced datasets?\",\n",
    "        \"Which GPU is best for neural network training?\",\n",
    "        \"Looking for resources on probabilistic programming\",\n",
    "        \"Experience with multi-modal learning?\",\n",
    "        \"The future of quantum computing in AI\",\n",
    "        \"Difference between CNN and RNN?\"\n",
    "    ],\n",
    "    'upvotes': [600, 400, 900, 800, 1100, 300, 700, 450, 600, 350],\n",
    "    'downvotes': [100, 50, 100, 100, 100, 50, 50, 50, 50, 50],\n",
    "    'age_in_seconds': [500, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000, 256000]\n",
    "}\n",
    "\n",
    "\n",
    "# Convert to DataFrame\n",
    "reddit_df = pd.DataFrame(data)\n",
    "\n",
    "# Calculate age in hours from age in seconds\n",
    "reddit_df['age_in_hours'] = reddit_df['age_in_seconds'] / 3600\n",
    "\n",
    "# Reddit's \"Hot\" formula\n",
    "def reddit_hot(U, D, t):\n",
    "    return math.log10(max(abs(U-D), 1)) + np.sign(U-D) * t / 45000\n",
    "\n",
    "# Modified Hacker News formula\n",
    "def hacker_news(U, D, T, P=1, alpha=0.8, gamma=1.8):\n",
    "    return P * pow((U - D - 1), alpha) / pow((T + 2), gamma)\n",
    "\n",
    "# Apply the formulas\n",
    "reddit_df['reddit_hot'] = reddit_df.apply(lambda row: reddit_hot(row['upvotes'], row['downvotes'], row['age_in_seconds']), axis=1)\n",
    "reddit_df['hacker_news'] = reddit_df.apply(lambda row: hacker_news(row['upvotes'], row['downvotes'], row['age_in_hours']), axis=1)\n",
    "\n",
    "# Sort by Reddit's \"Hot\" score and print\n",
    "reddit_df_sorted = reddit_df.sort_values(by='reddit_hot', ascending=False)\n",
    "print(\"Posts sorted by Reddit's 'Hot' score:\")\n",
    "print(reddit_df_sorted)\n",
    "\n",
    "# Sort by Hacker News score and print\n",
    "hacker_news_df_sorted = reddit_df.sort_values(by='hacker_news', ascending=False)\n",
    "print(\"\\nPosts sorted by Hacker News score:\")\n",
    "print(hacker_news_df_sorted)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}