mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
committed by
GitHub
parent
bce3a708f9
commit
6dd8666d9c
@@ -2,7 +2,9 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"id": "AAAnDw04iAm4"
|
||||
},
|
||||
"source": [
|
||||
"<table style=\"width:100%\">\n",
|
||||
"<tr>\n",
|
||||
@@ -54,14 +56,14 @@
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "RM7kGhwMF_nO",
|
||||
"outputId": "ac60b048-b81f-4bb0-90fa-1ca474f04e9a"
|
||||
"outputId": "b1872617-aacd-46fa-e5f3-f130fd81b246"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2.0.1+cu118\n"
|
||||
"2.4.0+cu121\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -79,7 +81,7 @@
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "OXLCKXhiUkZt",
|
||||
"outputId": "39fe5366-287e-47eb-cc34-3508d616c4f9"
|
||||
"outputId": "e9ca3c58-d92c-4c8b-a9c9-cd7fcc1fedb4"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -102,18 +104,15 @@
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "MTTlfh53Va-T",
|
||||
"outputId": "f31d8bbe-577f-4db4-9939-02e66b9f96d1"
|
||||
"outputId": "bae76cb5-d1d3-441f-a7c5-93a161e2e86a"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"tensor([5., 7., 9.])"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"tensor([5., 7., 9.])\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -125,13 +124,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "Z4LwTNw7Vmmb",
|
||||
"outputId": "1c025c6a-e3ed-4c7c-f5fd-86c14607036e"
|
||||
"outputId": "9ad97923-bc8e-4c49-88bf-48dc1de56804"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -151,24 +150,24 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 184
|
||||
"height": 158
|
||||
},
|
||||
"id": "tKT6URN1Vuft",
|
||||
"outputId": "e6f01e7f-d9cf-44cb-cc6d-46fc7907d5c0"
|
||||
"outputId": "8396eb18-47c8-47a1-c1b6-8bcb9480fb52"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "RuntimeError",
|
||||
"evalue": "ignored",
|
||||
"evalue": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m<ipython-input-7-4ff3c4d20fc3>\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mtensor_1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtensor_1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_1\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtensor_2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
||||
"\u001b[0;32m/tmp/ipykernel_2321/2079609735.py\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mtensor_1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtensor_1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_1\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtensor_2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
||||
"\u001b[0;31mRuntimeError\u001b[0m: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!"
|
||||
]
|
||||
}
|
||||
@@ -189,7 +188,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"id": "GyY59cjieitv"
|
||||
},
|
||||
@@ -215,7 +214,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"id": "v41gKqEJempa"
|
||||
},
|
||||
@@ -243,7 +242,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"id": "UPGVRuylep8Y"
|
||||
},
|
||||
@@ -271,7 +270,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"id": "drhg6IXofAXh"
|
||||
},
|
||||
@@ -302,13 +301,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "7jaS5sqPWCY0",
|
||||
"outputId": "84c74615-38f2-48b8-eeda-b5912fed1d3a"
|
||||
"outputId": "8a5cd93d-671c-4abf-d5cd-97845f300ffd"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -362,7 +361,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"id": "4qrlmnPPe7FO"
|
||||
},
|
||||
@@ -391,13 +390,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "1_-BfkfEf4HX",
|
||||
"outputId": "473bf21d-5880-4de3-fc8a-051d75315b94"
|
||||
"outputId": "9453154f-0a5b-4a44-a3c9-f010e08d5a2c"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -406,7 +405,7 @@
|
||||
"1.0"
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -417,13 +416,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "iYtXKBGEgKss",
|
||||
"outputId": "508edd84-3fb7-4d04-cb23-9df0c3d24170"
|
||||
"outputId": "d6cc870a-34de-490e-e5d3-23e6956744bd"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -432,7 +431,7 @@
|
||||
"1.0"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -443,21 +442,27 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"id": "nc2LGFVbiAnB"
|
||||
},
|
||||
"source": [
|
||||
"### A.9.3 Training with multiple GPUs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"id": "cOUza9iQiAnC"
|
||||
},
|
||||
"source": [
|
||||
"See [DDP-script.py](DDP-script.py)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"id": "YOYk5Fh7iAnC"
|
||||
},
|
||||
"source": [
|
||||
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/12.webp\" width=\"600px\">\n",
|
||||
"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/13.webp\" width=\"600px\">"
|
||||
@@ -485,7 +490,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.14"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user