From cc2383c4de59a486730f7a065701045796c36981 Mon Sep 17 00:00:00 2001 From: rasbt Date: Sat, 2 Mar 2024 16:44:36 -0600 Subject: [PATCH] remove duplicated exercise code --- ch02/01_main-chapter-code/ch02.ipynb | 72 +-------- .../exercise-solutions.ipynb | 143 ++++++++++++------ 2 files changed, 94 insertions(+), 121 deletions(-) diff --git a/ch02/01_main-chapter-code/ch02.ipynb b/ch02/01_main-chapter-code/ch02.ipynb index 5d24c15..005a9e2 100644 --- a/ch02/01_main-chapter-code/ch02.ipynb +++ b/ch02/01_main-chapter-code/ch02.ipynb @@ -873,76 +873,6 @@ "print(strings)" ] }, - { - "cell_type": "markdown", - "id": "f63d62ab-4b80-489c-8041-e4052fe29969", - "metadata": {}, - "source": [ - "- Experiments with unknown words:" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "ce25cf25-a2bb-44d2-bac1-cb566f433f98", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[33901, 86, 343, 86, 220, 959]\n" - ] - } - ], - "source": [ - "integers = tokenizer.encode(\"Akwirw ier\")\n", - "print(integers)" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "3e224f96-41d0-4074-ac6e-f7db2490f806", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "33901 -> Ak\n", - "86 -> w\n", - "343 -> ir\n", - "86 -> w\n", - "220 -> \n", - "959 -> ier\n" - ] - } - ], - "source": [ - "for i in integers:\n", - " print(f\"{i} -> {tokenizer.decode([i])}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "766bcf29-64bf-47ca-9b65-4ae8e607d580", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Akwirw ier\n" - ] - } - ], - "source": [ - "strings = tokenizer.decode(integers)\n", - "print(strings)" - ] - }, { "cell_type": "markdown", "id": "abbd7c0d-70f8-4386-a114-907e96c950b0", @@ -1640,7 +1570,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/ch02/01_main-chapter-code/exercise-solutions.ipynb b/ch02/01_main-chapter-code/exercise-solutions.ipynb index b189648..bc479d0 100644 --- a/ch02/01_main-chapter-code/exercise-solutions.ipynb +++ b/ch02/01_main-chapter-code/exercise-solutions.ipynb @@ -31,6 +31,49 @@ { "cell_type": "code", "execution_count": 2, + "id": "4f235d87-be85-4ddf-95a6-af59fca13d82", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[33901, 86, 343, 86, 220, 959]\n" + ] + } + ], + "source": [ + "integers = tokenizer.encode(\"Akwirw ier\")\n", + "print(integers)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "45e4e8f0-3272-48bb-96f6-cced5584ceea", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "33901 -> Ak\n", + "86 -> w\n", + "343 -> ir\n", + "86 -> w\n", + "220 -> \n", + "959 -> ier\n" + ] + } + ], + "source": [ + "for i in integers:\n", + " print(f\"{i} -> {tokenizer.decode([i])}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "id": "664397bc-6daa-4b88-90aa-e8fc1fbd5846", "metadata": {}, "outputs": [ @@ -40,7 +83,7 @@ "[33901]" ] }, - "execution_count": 2, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -49,52 +92,10 @@ "tokenizer.encode(\"Ak\")" ] }, - { - "cell_type": "code", - "execution_count": 3, - "id": "d3664332-e6bb-447e-8b96-203aafde8b24", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[86]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tokenizer.encode(\"w\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "2773c09d-c136-4372-a2be-04b58d292842", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[343]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tokenizer.encode(\"ir\")" - ] - }, { "cell_type": "code", "execution_count": 5, - "id": "8a6abd32-1e0a-4038-9dd2-673f47bcdeb5", + "id": "d3664332-e6bb-447e-8b96-203aafde8b24", "metadata": {}, "outputs": [ { @@ -115,6 +116,48 @@ { "cell_type": "code", "execution_count": 6, + "id": "2773c09d-c136-4372-a2be-04b58d292842", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[343]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tokenizer.encode(\"ir\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8a6abd32-1e0a-4038-9dd2-673f47bcdeb5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[86]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tokenizer.encode(\"w\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, "id": "26ae940a-9841-4e27-a1df-b83fc8a488b3", "metadata": {}, "outputs": [ @@ -124,7 +167,7 @@ "[220]" ] }, - "execution_count": 6, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -135,7 +178,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "id": "a606c39a-6747-4cd8-bb38-e3183f80908d", "metadata": {}, "outputs": [ @@ -145,7 +188,7 @@ "[959]" ] }, - "execution_count": 7, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -156,7 +199,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "47c7268d-8fdc-4957-bc68-5be6113f45a7", "metadata": {}, "outputs": [ @@ -166,7 +209,7 @@ "'Akwirw ier'" ] }, - "execution_count": 8, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -323,7 +366,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.4" } }, "nbformat": 4,