diff --git a/ch04/01_main-chapter-code/ch04.ipynb b/ch04/01_main-chapter-code/ch04.ipynb index 9df7822..28ac9ac 100644 --- a/ch04/01_main-chapter-code/ch04.ipynb +++ b/ch04/01_main-chapter-code/ch04.ipynb @@ -1038,7 +1038,7 @@ "id": "54d2d375-87bd-4153-9040-63a1e6a2b7cb", "metadata": {}, "source": [ - "- Suppose we have 2 input samples with 6 tokens each, where each token is a 768-dimensional embedding vector; then this transformer block applies self-attention, followed by linear layers, to produce an output of similar size\n", + "- Suppose we have 2 input samples with 4 tokens each, where each token is a 768-dimensional embedding vector; then this transformer block applies self-attention, followed by linear layers, to produce an output of similar size\n", "- You can think of the output as an augmented version of the context vectors we discussed in the previous chapter" ] },