From 01cb137bfd134307c19c093a40fc157999846050 Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Mon, 19 Aug 2024 20:58:45 -0500 Subject: [PATCH] Note about MPS devices (#329) --- .../01_main-chapter-code/appendix-D.ipynb | 27 ++++++++++++++++++- .../01_main-chapter-code/appendix-E.ipynb | 15 +++++++++++ ch05/01_main-chapter-code/ch05.ipynb | 2 ++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/appendix-D/01_main-chapter-code/appendix-D.ipynb b/appendix-D/01_main-chapter-code/appendix-D.ipynb index 6b21133..a157b71 100644 --- a/appendix-D/01_main-chapter-code/appendix-D.ipynb +++ b/appendix-D/01_main-chapter-code/appendix-D.ipynb @@ -81,6 +81,20 @@ "\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "\n", + "# Note:\n", + "# Uncommenting the following lines will allow the code to run on Apple Silicon chips, if applicable,\n", + "# which is approximately 2x faster than on an Apple CPU (as measured on an M3 MacBook Air).\n", + "# However, the resulting loss values may be slightly different.\n", + "\n", + "#if torch.cuda.is_available():\n", + "# device = torch.device(\"cuda\")\n", + "#elif torch.backends.mps.is_available():\n", + "# device = torch.device(\"mps\")\n", + "#else:\n", + "# device = torch.device(\"cpu\")\n", + "#\n", + "# print(f\"Using {device} device.\")\n", + "\n", "torch.manual_seed(123)\n", "model = GPTModel(GPT_CONFIG_124M)\n", "model.eval(); # Disable dropout during inference" @@ -660,6 +674,11 @@ "source": [ "import tiktoken\n", "\n", + "# Note:\n", + "# Uncomment the following code to calculate the execution time\n", + "# import time\n", + "# start_time = time.time()\n", + "\n", "torch.manual_seed(123)\n", "model = GPTModel(GPT_CONFIG_124M)\n", "model.to(device)\n", @@ -674,7 +693,13 @@ " eval_freq=5, eval_iter=1, start_context=\"Every effort moves you\",\n", " tokenizer=tokenizer, warmup_steps=warmup_steps, \n", " initial_lr=1e-5, min_lr=1e-5\n", - ")" + ")\n", + "\n", + "# Note:\n", + "# Uncomment the following code to show the execution time\n", + "# end_time = time.time()\n", + "# execution_time_minutes = (end_time - start_time) / 60\n", + "# print(f\"Training completed in {execution_time_minutes:.2f} minutes.\")" ] }, { diff --git a/appendix-E/01_main-chapter-code/appendix-E.ipynb b/appendix-E/01_main-chapter-code/appendix-E.ipynb index c73052c..c0ab95f 100644 --- a/appendix-E/01_main-chapter-code/appendix-E.ipynb +++ b/appendix-E/01_main-chapter-code/appendix-E.ipynb @@ -511,6 +511,21 @@ "outputs": [], "source": [ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "# Note:\n", + "# Uncommenting the following lines will allow the code to run on Apple Silicon chips, if applicable,\n", + "# which is approximately 1.2x faster than on an Apple CPU (as measured on an M3 MacBook Air).\n", + "# However, the resulting loss values may be slightly different.\n", + "\n", + "#if torch.cuda.is_available():\n", + "# device = torch.device(\"cuda\")\n", + "#elif torch.backends.mps.is_available():\n", + "# device = torch.device(\"mps\")\n", + "#else:\n", + "# device = torch.device(\"cpu\")\n", + "#\n", + "# print(f\"Using {device} device.\")\n", + "\n", "model.to(device); # no assignment model = model.to(device) necessary for nn.Module classes" ] }, diff --git a/ch05/01_main-chapter-code/ch05.ipynb b/ch05/01_main-chapter-code/ch05.ipynb index 5cec9eb..bb624e6 100644 --- a/ch05/01_main-chapter-code/ch05.ipynb +++ b/ch05/01_main-chapter-code/ch05.ipynb @@ -1154,6 +1154,8 @@ "# device = torch.device(\"mps\")\n", "#else:\n", "# device = torch.device(\"cpu\")\n", + "#\n", + "# print(f\"Using {device} device.\")\n", "\n", "\n", "model.to(device) # no assignment model = model.to(device) necessary for nn.Module classes\n",