mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
fix size of positional embedding layer
This commit is contained in:
@@ -505,7 +505,7 @@
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[14], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m SimpleTokenizerV1(vocab)\n\u001b[1;32m 3\u001b[0m text \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHello, do you like tea. Is this-- a test?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 5\u001b[0m \u001b[43mtokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"Cell \u001b[0;32mIn[11], line 9\u001b[0m, in \u001b[0;36mSimpleTokenizerV1.encode\u001b[0;34m(self, text)\u001b[0m\n\u001b[1;32m 7\u001b[0m preprocessed \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m([,.?_!\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m()\u001b[39m\u001b[38;5;130;01m\\'\u001b[39;00m\u001b[38;5;124m]|--|\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124ms)\u001b[39m\u001b[38;5;124m'\u001b[39m, text)\n\u001b[1;32m 8\u001b[0m preprocessed \u001b[38;5;241m=\u001b[39m [item\u001b[38;5;241m.\u001b[39mstrip() \u001b[38;5;28;01mfor\u001b[39;00m item \u001b[38;5;129;01min\u001b[39;00m preprocessed \u001b[38;5;28;01mif\u001b[39;00m item\u001b[38;5;241m.\u001b[39mstrip()]\n\u001b[0;32m----> 9\u001b[0m ids \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstr_to_int[s] \u001b[38;5;28;01mfor\u001b[39;00m s \u001b[38;5;129;01min\u001b[39;00m preprocessed]\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ids\n",
|
||||
"Cell \u001b[0;32mIn[11], line 9\u001b[0m, in \u001b[0;36mSimpleTokenizerV1.encode\u001b[0;34m(self, text)\u001b[0m\n\u001b[1;32m 7\u001b[0m preprocessed \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m([,.?_!\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m()\u001b[39m\u001b[38;5;130;01m\\'\u001b[39;00m\u001b[38;5;124m]|--|\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124ms)\u001b[39m\u001b[38;5;124m'\u001b[39m, text)\n\u001b[1;32m 8\u001b[0m preprocessed \u001b[38;5;241m=\u001b[39m [item\u001b[38;5;241m.\u001b[39mstrip() \u001b[38;5;28;01mfor\u001b[39;00m item \u001b[38;5;129;01min\u001b[39;00m preprocessed \u001b[38;5;28;01mif\u001b[39;00m item\u001b[38;5;241m.\u001b[39mstrip()]\n\u001b[0;32m----> 9\u001b[0m ids \u001b[38;5;241m=\u001b[39m \u001b[43m[\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstr_to_int\u001b[49m\u001b[43m[\u001b[49m\u001b[43ms\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43ms\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mpreprocessed\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ids\n",
|
||||
"Cell \u001b[0;32mIn[11], line 9\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 7\u001b[0m preprocessed \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m([,.?_!\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m()\u001b[39m\u001b[38;5;130;01m\\'\u001b[39;00m\u001b[38;5;124m]|--|\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124ms)\u001b[39m\u001b[38;5;124m'\u001b[39m, text)\n\u001b[1;32m 8\u001b[0m preprocessed \u001b[38;5;241m=\u001b[39m [item\u001b[38;5;241m.\u001b[39mstrip() \u001b[38;5;28;01mfor\u001b[39;00m item \u001b[38;5;129;01min\u001b[39;00m preprocessed \u001b[38;5;28;01mif\u001b[39;00m item\u001b[38;5;241m.\u001b[39mstrip()]\n\u001b[0;32m----> 9\u001b[0m ids \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstr_to_int\u001b[49m\u001b[43m[\u001b[49m\u001b[43ms\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m s \u001b[38;5;129;01min\u001b[39;00m preprocessed]\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ids\n",
|
||||
"\u001b[0;31mKeyError\u001b[0m: 'Hello'"
|
||||
]
|
||||
@@ -1049,7 +1049,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"PyTorch version: 2.1.0\n"
|
||||
"PyTorch version: 2.0.1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -1403,7 +1403,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"execution_count": 47,
|
||||
"id": "0b9e344d-03a6-4f2c-b723-67b6a20c5041",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -1425,7 +1425,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"execution_count": 48,
|
||||
"id": "ad56a263-3d2e-4d91-98bf-d0b68d3c7fc3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -1438,7 +1438,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"execution_count": 49,
|
||||
"id": "84416b60-3707-4370-bcbc-da0b62f2b64d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1468,7 +1468,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"execution_count": 50,
|
||||
"id": "7766ec38-30d0-4128-8c31-f49f063c43d1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1495,17 +1495,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"execution_count": 51,
|
||||
"id": "cc048e20-7ac8-417e-81f5-8fe6f9a4fe07",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pos_embedding_layer = torch.nn.Embedding(vocab_size, output_dim)"
|
||||
"block_size = max_length\n",
|
||||
"pos_embedding_layer = torch.nn.Embedding(block_size, output_dim)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"execution_count": 52,
|
||||
"id": "c369a1e7-d566-4b53-b398-d6adafb44105",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1532,7 +1533,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"execution_count": 53,
|
||||
"id": "b22fab89-526e-43c8-9035-5b7018e34288",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1548,22 +1549,6 @@
|
||||
"input_embeddings = token_embeddings + pos_embeddings\n",
|
||||
"print(input_embeddings.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a6b71f61-57f4-496b-bf48-9097c591f54c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c2894bbd-6cf5-4bfa-80ad-a23b5d1a45f4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -1582,7 +1567,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,117 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "98efe79e-daa3-40d0-ab4d-f667d4d6ba9d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/Author/miniforge3/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n",
|
||||
"Downloading (…)olve/main/vocab.json: 100%|█| 1.04M/1.04M [00:00<00:00, 1.66MB/s]\n",
|
||||
"Downloading (…)olve/main/merges.txt: 100%|███| 456k/456k [00:00<00:00, 2.44MB/s]\n",
|
||||
"Downloading (…)/main/tokenizer.json: 100%|█| 1.36M/1.36M [00:00<00:00, 1.97MB/s]\n",
|
||||
"Downloading (…)lve/main/config.json: 100%|██████| 718/718 [00:00<00:00, 621kB/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Decoded Inputs:\n",
|
||||
"I HAD always\n",
|
||||
" Jack Gisburn\n",
|
||||
" a cheap genius--\n",
|
||||
" a good fellow enough\n",
|
||||
"so it was no\n",
|
||||
" surprise to me to\n",
|
||||
" that, in the\n",
|
||||
" of his glory,\n",
|
||||
"\n",
|
||||
"Decoded Targets:\n",
|
||||
" HAD always thought\n",
|
||||
" Gisburn rather\n",
|
||||
" cheap genius--though\n",
|
||||
" good fellow enough--\n",
|
||||
" it was no great\n",
|
||||
" to me to hear\n",
|
||||
", in the height\n",
|
||||
" his glory, he\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"from transformers import GPT2Tokenizer\n",
|
||||
"\n",
|
||||
"tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')\n",
|
||||
"\n",
|
||||
"inputs = torch.tensor([\n",
|
||||
" [40, 367, 2885, 1464],\n",
|
||||
" [3619, 402, 271, 10899],\n",
|
||||
" [257, 7026, 15632, 438],\n",
|
||||
" [257, 922, 5891, 1576],\n",
|
||||
" [568, 340, 373, 645],\n",
|
||||
" [5975, 284, 502, 284],\n",
|
||||
" [326, 11, 287, 262],\n",
|
||||
" [286, 465, 13476, 11]\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"targets = torch.tensor([\n",
|
||||
" [367, 2885, 1464, 1807],\n",
|
||||
" [402, 271, 10899, 2138],\n",
|
||||
" [7026, 15632, 438, 2016],\n",
|
||||
" [922, 5891, 1576, 438],\n",
|
||||
" [340, 373, 645, 1049],\n",
|
||||
" [284, 502, 284, 3285],\n",
|
||||
" [11, 287, 262, 6001],\n",
|
||||
" [465, 13476, 11, 339]\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"decoded_inputs = [tokenizer.decode(i) for i in inputs]\n",
|
||||
"decoded_targets = [tokenizer.decode(t) for t in targets]\n",
|
||||
"\n",
|
||||
"print(\"Decoded Inputs:\")\n",
|
||||
"for di in decoded_inputs:\n",
|
||||
" print(di)\n",
|
||||
"\n",
|
||||
"print(\"\\nDecoded Targets:\")\n",
|
||||
"for dt in decoded_targets:\n",
|
||||
" print(dt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "defc6b2f-9ac2-49e0-a4e1-03247cacffce",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user