Files
Machine-Learning-Collection/ML/Pytorch/recommender_systems/2.non-personalized-recsys/part3-trending-recsys.ipynb
2023-11-02 18:48:58 -07:00

149 lines
6.9 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b0c33033",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Posts sorted by Reddit's 'Hot' score:\n",
" post_id post_title upvotes \n",
"9 10 Difference between CNN and RNN? 350 \\\n",
"8 9 The future of quantum computing in AI 600 \n",
"7 8 Experience with multi-modal learning? 450 \n",
"6 7 Looking for resources on probabilistic program... 700 \n",
"4 5 Tips for handling imbalanced datasets? 1100 \n",
"2 3 Has anyone tried the new reinforcement learnin... 900 \n",
"3 4 Discussion: Evolutionary algorithms vs gradien... 800 \n",
"5 6 Which GPU is best for neural network training? 300 \n",
"0 1 How do I start with machine learning? 600 \n",
"1 2 Best practices for deep learning optimization? 400 \n",
"\n",
" downvotes age_in_seconds age_in_hours reddit_hot hacker_news \n",
"9 50 256000 71.111111 8.166010 0.042205 \n",
"8 50 128000 35.555556 5.584807 0.227638 \n",
"7 50 64000 17.777778 4.024282 0.559318 \n",
"6 50 32000 8.888889 3.524024 2.416714 \n",
"4 100 8000 2.222222 3.177778 18.779258 \n",
"2 100 2000 0.555556 2.947534 38.776074 \n",
"3 100 4000 1.111111 2.933987 24.453093 \n",
"5 50 16000 4.444444 2.753496 2.886859 \n",
"0 100 500 0.138889 2.710081 36.655710 \n",
"1 50 1000 0.277778 2.566290 24.588946 \n",
"\n",
"Posts sorted by Hacker News score:\n",
" post_id post_title upvotes \n",
"2 3 Has anyone tried the new reinforcement learnin... 900 \\\n",
"0 1 How do I start with machine learning? 600 \n",
"1 2 Best practices for deep learning optimization? 400 \n",
"3 4 Discussion: Evolutionary algorithms vs gradien... 800 \n",
"4 5 Tips for handling imbalanced datasets? 1100 \n",
"5 6 Which GPU is best for neural network training? 300 \n",
"6 7 Looking for resources on probabilistic program... 700 \n",
"7 8 Experience with multi-modal learning? 450 \n",
"8 9 The future of quantum computing in AI 600 \n",
"9 10 Difference between CNN and RNN? 350 \n",
"\n",
" downvotes age_in_seconds age_in_hours reddit_hot hacker_news \n",
"2 100 2000 0.555556 2.947534 38.776074 \n",
"0 100 500 0.138889 2.710081 36.655710 \n",
"1 50 1000 0.277778 2.566290 24.588946 \n",
"3 100 4000 1.111111 2.933987 24.453093 \n",
"4 100 8000 2.222222 3.177778 18.779258 \n",
"5 50 16000 4.444444 2.753496 2.886859 \n",
"6 50 32000 8.888889 3.524024 2.416714 \n",
"7 50 64000 17.777778 4.024282 0.559318 \n",
"8 50 128000 35.555556 5.584807 0.227638 \n",
"9 50 256000 71.111111 8.166010 0.042205 \n"
]
}
],
"source": [
"import math\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"data = {\n",
" 'post_id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n",
" 'post_title': [\n",
" \"How do I start with machine learning?\",\n",
" \"Best practices for deep learning optimization?\",\n",
" \"Has anyone tried the new reinforcement learning library?\",\n",
" \"Discussion: Evolutionary algorithms vs gradient descent\",\n",
" \"Tips for handling imbalanced datasets?\",\n",
" \"Which GPU is best for neural network training?\",\n",
" \"Looking for resources on probabilistic programming\",\n",
" \"Experience with multi-modal learning?\",\n",
" \"The future of quantum computing in AI\",\n",
" \"Difference between CNN and RNN?\"\n",
" ],\n",
" 'upvotes': [600, 400, 900, 800, 1100, 300, 700, 450, 600, 350],\n",
" 'downvotes': [100, 50, 100, 100, 100, 50, 50, 50, 50, 50],\n",
" 'age_in_seconds': [500, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000, 256000]\n",
"}\n",
"\n",
"\n",
"# Convert to DataFrame\n",
"reddit_df = pd.DataFrame(data)\n",
"\n",
"# Calculate age in hours from age in seconds\n",
"reddit_df['age_in_hours'] = reddit_df['age_in_seconds'] / 3600\n",
"\n",
"# Reddit's \"Hot\" formula\n",
"def reddit_hot(U, D, t):\n",
" return math.log10(max(abs(U-D), 1)) + np.sign(U-D) * t / 45000\n",
"\n",
"# Modified Hacker News formula\n",
"def hacker_news(U, D, T, P=1, alpha=0.8, gamma=1.8):\n",
" return P * pow((U - D - 1), alpha) / pow((T + 2), gamma)\n",
"\n",
"# Apply the formulas\n",
"reddit_df['reddit_hot'] = reddit_df.apply(lambda row: reddit_hot(row['upvotes'], row['downvotes'], row['age_in_seconds']), axis=1)\n",
"reddit_df['hacker_news'] = reddit_df.apply(lambda row: hacker_news(row['upvotes'], row['downvotes'], row['age_in_hours']), axis=1)\n",
"\n",
"# Sort by Reddit's \"Hot\" score and print\n",
"reddit_df_sorted = reddit_df.sort_values(by='reddit_hot', ascending=False)\n",
"print(\"Posts sorted by Reddit's 'Hot' score:\")\n",
"print(reddit_df_sorted)\n",
"\n",
"# Sort by Hacker News score and print\n",
"hacker_news_df_sorted = reddit_df.sort_values(by='hacker_news', ascending=False)\n",
"print(\"\\nPosts sorted by Hacker News score:\")\n",
"print(hacker_news_df_sorted)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}