mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Use correct input in layernorm example (#960)
* Update CI * Use correct example in layernorm section * update
This commit is contained in:
committed by
GitHub
parent
be5e2a3331
commit
2d600ccb5b
@@ -38,9 +38,9 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"matplotlib version: 3.10.0\n",
|
"matplotlib version: 3.10.8\n",
|
||||||
"torch version: 2.6.0\n",
|
"torch version: 2.10.0\n",
|
||||||
"tiktoken version: 0.9.0\n"
|
"tiktoken version: 0.12.0\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -492,8 +492,8 @@
|
|||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"Mean:\n",
|
"Mean:\n",
|
||||||
" tensor([[ -0.0000],\n",
|
" tensor([[-0.0000],\n",
|
||||||
" [ 0.0000]], grad_fn=<MeanBackward1>)\n",
|
" [ 0.0000]], grad_fn=<MeanBackward1>)\n",
|
||||||
"Variance:\n",
|
"Variance:\n",
|
||||||
" tensor([[1.0000],\n",
|
" tensor([[1.0000],\n",
|
||||||
" [1.0000]], grad_fn=<VarBackward0>)\n"
|
" [1.0000]], grad_fn=<VarBackward0>)\n"
|
||||||
@@ -564,8 +564,8 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"ln = LayerNorm(emb_dim=5)\n",
|
"ln = LayerNorm(emb_dim=6)\n",
|
||||||
"out_ln = ln(batch_example)"
|
"out_ln = ln(out)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -579,11 +579,11 @@
|
|||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"Mean:\n",
|
"Mean:\n",
|
||||||
" tensor([[ -0.0000],\n",
|
" tensor([[-0.0000],\n",
|
||||||
" [ 0.0000]], grad_fn=<MeanBackward1>)\n",
|
" [-0.0000]], grad_fn=<MeanBackward1>)\n",
|
||||||
"Variance:\n",
|
"Variance:\n",
|
||||||
" tensor([[1.0000],\n",
|
" tensor([[0.9995],\n",
|
||||||
" [1.0000]], grad_fn=<VarBackward0>)\n"
|
" [0.9997]], grad_fn=<VarBackward0>)\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -595,6 +595,14 @@
|
|||||||
"print(\"Variance:\\n\", var)"
|
"print(\"Variance:\\n\", var)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "07220af7-cf53-48c8-8831-813a7088edea",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"- Variance is not exactly 1 because we use `eps`"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "e136cfc4-7c89-492e-b120-758c272bca8c",
|
"id": "e136cfc4-7c89-492e-b120-758c272bca8c",
|
||||||
|
|||||||
Submodule reasoning-from-scratch updated: edcae1d894...7b6a9d0f90
Reference in New Issue
Block a user