diff --git a/ch02/03_bonus_embedding-vs-matmul/embeddings-and-linear-layers.ipynb b/ch02/03_bonus_embedding-vs-matmul/embeddings-and-linear-layers.ipynb index 4ebf732..97d061e 100644 --- a/ch02/03_bonus_embedding-vs-matmul/embeddings-and-linear-layers.ipynb +++ b/ch02/03_bonus_embedding-vs-matmul/embeddings-and-linear-layers.ipynb @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "061720f4-f025-4640-82a0-15098fa94cf9", "metadata": {}, "outputs": [ @@ -27,7 +27,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "PyTorch version: 2.1.0.dev20230825\n" + "PyTorch version: 2.1.0\n" ] } ], @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 2, "id": "cc489ea5-73db-40b9-959e-0d70cae25f40", "metadata": {}, "outputs": [], @@ -76,7 +76,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 3, "id": "60a7c104-36e1-4b28-bd02-a24a1099dc66", "metadata": {}, "outputs": [], @@ -99,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 4, "id": "595f603e-8d2a-4171-8f94-eac8106b2e57", "metadata": {}, "outputs": [ @@ -113,7 +113,7 @@ " [-2.8400, -0.7849, -1.4096, -0.4076, 0.7953]], requires_grad=True)" ] }, - "execution_count": 18, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -132,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 5, "id": "8bbc0255-4805-4be9-9f4c-1d0d967ef9d5", "metadata": {}, "outputs": [ @@ -143,7 +143,7 @@ " grad_fn=)" ] }, - "execution_count": 17, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -178,7 +178,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 6, "id": "c309266a-c601-4633-9404-2e10b1cdde8c", "metadata": {}, "outputs": [ @@ -189,7 +189,7 @@ " grad_fn=)" ] }, - "execution_count": 19, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -216,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 7, "id": "0191aa4b-f6a8-4b0d-9c36-65e82b81d071", "metadata": {}, "outputs": [ @@ -229,7 +229,7 @@ " grad_fn=)" ] }, - "execution_count": 22, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -274,7 +274,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 8, "id": "b5bb56cf-bc73-41ab-b107-91a43f77bdba", "metadata": {}, "outputs": [ @@ -286,7 +286,7 @@ " [0, 1, 0, 0]])" ] }, - "execution_count": 23, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -306,7 +306,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 9, "id": "ae04c1ed-242e-4dd7-b8f7-4b7e4caae383", "metadata": {}, "outputs": [ @@ -321,14 +321,15 @@ " [-0.3814, 0.3274, -0.1179, 0.1605]], requires_grad=True)" ] }, - "execution_count": 28, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.manual_seed(123)\n", - "linear = torch.nn.Linear(num_idx, out_dim, bias=False)" + "linear = torch.nn.Linear(num_idx, out_dim, bias=False)\n", + "linear.weight" ] }, { @@ -341,7 +342,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 10, "id": "a3b90d69-761c-486e-bd19-b38a2988fe62", "metadata": {}, "outputs": [], @@ -359,7 +360,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 11, "id": "90d2b0dd-9f1d-4c0f-bb16-1f6ce6b8ac2c", "metadata": {}, "outputs": [ @@ -371,7 +372,7 @@ " [ 1.3010, 1.2753, -0.2010, -0.1606, -0.4015]], grad_fn=)" ] }, - "execution_count": 31, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -390,7 +391,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 12, "id": "2b057649-3176-4a54-b58c-fd8fbf818c61", "metadata": {}, "outputs": [ @@ -403,7 +404,7 @@ " grad_fn=)" ] }, - "execution_count": 32, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -452,14 +453,6 @@ "- Since all but one index in each one-hot encoded row are 0 (by design), this matrix multiplication is essentially the same as a look-up of the one-hot elements\n", "- This use of the matrix multiplication on one-hot encodings is equivalent to the embedding layer look-up but can be inefficient if we work with large embedding matrices, because there are a lot of wasteful multiplications by zero" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5eacc005-86fc-490c-8f6a-dc37d8a0df7c", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -478,7 +471,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.6" } }, "nbformat": 4,