mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
minor updates
This commit is contained in:
@@ -65,6 +65,28 @@
|
||||
"print(torch.cuda.is_available())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "397ba1ab-3306-4965-8618-1ed5f24fb939",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/1.webp\" width=\"400px\">"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1e3c0555-88f6-4515-8c99-aa56b0769d54",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/2.webp\" width=\"300px\">\n",
|
||||
"\n",
|
||||
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/3.webp\" width=\"300px\">\n",
|
||||
"\n",
|
||||
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/4.webp\" width=\"500px\">\n",
|
||||
"\n",
|
||||
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/5.webp\" width=\"500px\">"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2100cf2e-7459-4ab3-92a8-43e86ab35a9b",
|
||||
@@ -73,6 +95,14 @@
|
||||
"## A.2 Understanding tensors"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3c484e87-bfc9-4105-b0a7-1e23b2a72a30",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/6.webp\" width=\"400px\">"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "26d7f785-e048-42bc-9182-a556af6bb7f4",
|
||||
@@ -392,6 +422,14 @@
|
||||
"## A.3 Seeing models as computation graphs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0f3e16c3-07df-44b6-9106-a42fb24452a9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/7.webp\" width=\"600px\">"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
@@ -429,6 +467,14 @@
|
||||
"## A.4 Automatic differentiation made easy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "33aa2ee4-6f1d-448d-8707-67cd5278233c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/8.webp\" width=\"600px\">"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
@@ -495,6 +541,14 @@
|
||||
"## A.5 Implementing multilayer neural networks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d6cb9787-2bc8-4379-9e8c-a3401ac63c51",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/9.webp\" width=\"500px\">"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
@@ -723,6 +777,14 @@
|
||||
"## A.6 Setting up efficient data loaders"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0f98d8fc-5618-47a2-bc72-153818972a24",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/10.webp\" width=\"600px\">"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
@@ -901,6 +963,14 @@
|
||||
" print(f\"Batch {idx+1}:\", x, y)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eb03ed57-df38-4ee0-a553-0863450df39b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/11.webp\" width=\"600px\">"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d904ca82-e50f-4f3d-a3ac-fc6ca53dd00e",
|
||||
@@ -1239,14 +1309,6 @@
|
||||
"source": [
|
||||
"See [DDP-script.py](DDP-script.py)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b32db94f-f159-4aa3-91cc-5b937eef7fc7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -175,7 +175,7 @@
|
||||
"id": "c8j1cWDcWAMf"
|
||||
},
|
||||
"source": [
|
||||
"## A.9.2 Single-GPU training"
|
||||
"### A.9.2 Single-GPU training"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -431,6 +431,28 @@
|
||||
"source": [
|
||||
"compute_accuracy(model, test_loader, device=device)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### A.9.3 Training with multiple GPUs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"See [DDP-script.py](DDP-script.py)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/12.webp\" width=\"600px\">\n",
|
||||
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/13.webp\" width=\"600px\">"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -454,7 +476,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user