mirror of
https://github.com/aladdinpersson/Machine-Learning-Collection.git
synced 2026-04-10 12:33:44 +00:00
149 lines
6.9 KiB
Plaintext
149 lines
6.9 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "b0c33033",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Posts sorted by Reddit's 'Hot' score:\n",
|
|
" post_id post_title upvotes \n",
|
|
"9 10 Difference between CNN and RNN? 350 \\\n",
|
|
"8 9 The future of quantum computing in AI 600 \n",
|
|
"7 8 Experience with multi-modal learning? 450 \n",
|
|
"6 7 Looking for resources on probabilistic program... 700 \n",
|
|
"4 5 Tips for handling imbalanced datasets? 1100 \n",
|
|
"2 3 Has anyone tried the new reinforcement learnin... 900 \n",
|
|
"3 4 Discussion: Evolutionary algorithms vs gradien... 800 \n",
|
|
"5 6 Which GPU is best for neural network training? 300 \n",
|
|
"0 1 How do I start with machine learning? 600 \n",
|
|
"1 2 Best practices for deep learning optimization? 400 \n",
|
|
"\n",
|
|
" downvotes age_in_seconds age_in_hours reddit_hot hacker_news \n",
|
|
"9 50 256000 71.111111 8.166010 0.042205 \n",
|
|
"8 50 128000 35.555556 5.584807 0.227638 \n",
|
|
"7 50 64000 17.777778 4.024282 0.559318 \n",
|
|
"6 50 32000 8.888889 3.524024 2.416714 \n",
|
|
"4 100 8000 2.222222 3.177778 18.779258 \n",
|
|
"2 100 2000 0.555556 2.947534 38.776074 \n",
|
|
"3 100 4000 1.111111 2.933987 24.453093 \n",
|
|
"5 50 16000 4.444444 2.753496 2.886859 \n",
|
|
"0 100 500 0.138889 2.710081 36.655710 \n",
|
|
"1 50 1000 0.277778 2.566290 24.588946 \n",
|
|
"\n",
|
|
"Posts sorted by Hacker News score:\n",
|
|
" post_id post_title upvotes \n",
|
|
"2 3 Has anyone tried the new reinforcement learnin... 900 \\\n",
|
|
"0 1 How do I start with machine learning? 600 \n",
|
|
"1 2 Best practices for deep learning optimization? 400 \n",
|
|
"3 4 Discussion: Evolutionary algorithms vs gradien... 800 \n",
|
|
"4 5 Tips for handling imbalanced datasets? 1100 \n",
|
|
"5 6 Which GPU is best for neural network training? 300 \n",
|
|
"6 7 Looking for resources on probabilistic program... 700 \n",
|
|
"7 8 Experience with multi-modal learning? 450 \n",
|
|
"8 9 The future of quantum computing in AI 600 \n",
|
|
"9 10 Difference between CNN and RNN? 350 \n",
|
|
"\n",
|
|
" downvotes age_in_seconds age_in_hours reddit_hot hacker_news \n",
|
|
"2 100 2000 0.555556 2.947534 38.776074 \n",
|
|
"0 100 500 0.138889 2.710081 36.655710 \n",
|
|
"1 50 1000 0.277778 2.566290 24.588946 \n",
|
|
"3 100 4000 1.111111 2.933987 24.453093 \n",
|
|
"4 100 8000 2.222222 3.177778 18.779258 \n",
|
|
"5 50 16000 4.444444 2.753496 2.886859 \n",
|
|
"6 50 32000 8.888889 3.524024 2.416714 \n",
|
|
"7 50 64000 17.777778 4.024282 0.559318 \n",
|
|
"8 50 128000 35.555556 5.584807 0.227638 \n",
|
|
"9 50 256000 71.111111 8.166010 0.042205 \n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import math\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"data = {\n",
|
|
" 'post_id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n",
|
|
" 'post_title': [\n",
|
|
" \"How do I start with machine learning?\",\n",
|
|
" \"Best practices for deep learning optimization?\",\n",
|
|
" \"Has anyone tried the new reinforcement learning library?\",\n",
|
|
" \"Discussion: Evolutionary algorithms vs gradient descent\",\n",
|
|
" \"Tips for handling imbalanced datasets?\",\n",
|
|
" \"Which GPU is best for neural network training?\",\n",
|
|
" \"Looking for resources on probabilistic programming\",\n",
|
|
" \"Experience with multi-modal learning?\",\n",
|
|
" \"The future of quantum computing in AI\",\n",
|
|
" \"Difference between CNN and RNN?\"\n",
|
|
" ],\n",
|
|
" 'upvotes': [600, 400, 900, 800, 1100, 300, 700, 450, 600, 350],\n",
|
|
" 'downvotes': [100, 50, 100, 100, 100, 50, 50, 50, 50, 50],\n",
|
|
" 'age_in_seconds': [500, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000, 256000]\n",
|
|
"}\n",
|
|
"\n",
|
|
"\n",
|
|
"# Convert to DataFrame\n",
|
|
"reddit_df = pd.DataFrame(data)\n",
|
|
"\n",
|
|
"# Calculate age in hours from age in seconds\n",
|
|
"reddit_df['age_in_hours'] = reddit_df['age_in_seconds'] / 3600\n",
|
|
"\n",
|
|
"# Reddit's \"Hot\" formula\n",
|
|
"def reddit_hot(U, D, t):\n",
|
|
" return math.log10(max(abs(U-D), 1)) + np.sign(U-D) * t / 45000\n",
|
|
"\n",
|
|
"# Modified Hacker News formula\n",
|
|
"def hacker_news(U, D, T, P=1, alpha=0.8, gamma=1.8):\n",
|
|
" return P * pow((U - D - 1), alpha) / pow((T + 2), gamma)\n",
|
|
"\n",
|
|
"# Apply the formulas\n",
|
|
"reddit_df['reddit_hot'] = reddit_df.apply(lambda row: reddit_hot(row['upvotes'], row['downvotes'], row['age_in_seconds']), axis=1)\n",
|
|
"reddit_df['hacker_news'] = reddit_df.apply(lambda row: hacker_news(row['upvotes'], row['downvotes'], row['age_in_hours']), axis=1)\n",
|
|
"\n",
|
|
"# Sort by Reddit's \"Hot\" score and print\n",
|
|
"reddit_df_sorted = reddit_df.sort_values(by='reddit_hot', ascending=False)\n",
|
|
"print(\"Posts sorted by Reddit's 'Hot' score:\")\n",
|
|
"print(reddit_df_sorted)\n",
|
|
"\n",
|
|
"# Sort by Hacker News score and print\n",
|
|
"hacker_news_df_sorted = reddit_df.sort_values(by='hacker_news', ascending=False)\n",
|
|
"print(\"\\nPosts sorted by Hacker News score:\")\n",
|
|
"print(hacker_news_df_sorted)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|