diff --git a/.github/workflows/basic-tests-linux-uv.yml b/.github/workflows/basic-tests-linux-uv.yml index 82c7e56..bef697b 100644 --- a/.github/workflows/basic-tests-linux-uv.yml +++ b/.github/workflows/basic-tests-linux-uv.yml @@ -33,7 +33,7 @@ jobs: - name: Set up Python (uv) uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.11" - name: Install uv and dependencies shell: bash diff --git a/.github/workflows/basic-tests-macos-uv.yml b/.github/workflows/basic-tests-macos-uv.yml index 138467b..3dec430 100644 --- a/.github/workflows/basic-tests-macos-uv.yml +++ b/.github/workflows/basic-tests-macos-uv.yml @@ -33,13 +33,14 @@ jobs: - name: Set up Python (uv) uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.11" - name: Install uv and dependencies shell: bash run: | curl -LsSf https://astral.sh/uv/install.sh | sh uv sync --dev --python 3.11 + uv python install 3.11 uv pip install -r ch05/07_gpt_to_llama/tests/test-requirements-extra.txt uv add pytest-ruff nbval diff --git a/.github/workflows/basic-tests-old-pytorch.yml b/.github/workflows/basic-tests-old-pytorch.yml index 210d297..116db3c 100644 --- a/.github/workflows/basic-tests-old-pytorch.yml +++ b/.github/workflows/basic-tests-old-pytorch.yml @@ -31,12 +31,12 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.11" - name: Install dependencies run: | curl -LsSf https://astral.sh/uv/install.sh | sh - uv python install 3.10 + uv python install 3.11 uv add . --dev uv pip install -r ch05/07_gpt_to_llama/tests/test-requirements-extra.txt uv add torch==${{ matrix.pytorch-version }} diff --git a/.github/workflows/basic-tests-pip.yml b/.github/workflows/basic-tests-pip.yml index 0c0c4b9..504690f 100644 --- a/.github/workflows/basic-tests-pip.yml +++ b/.github/workflows/basic-tests-pip.yml @@ -33,7 +33,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.11" - name: Create Virtual Environment and Install Dependencies run: | diff --git a/.github/workflows/basic-tests-pytorch-rc.yml b/.github/workflows/basic-tests-pytorch-rc.yml index e91d67d..742bb23 100644 --- a/.github/workflows/basic-tests-pytorch-rc.yml +++ b/.github/workflows/basic-tests-pytorch-rc.yml @@ -27,12 +27,12 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.11" - name: Install dependencies run: | curl -LsSf https://astral.sh/uv/install.sh | sh - uv python install 3.10 + uv python install 3.11 uv add . --dev uv pip install -r ch05/07_gpt_to_llama/tests/test-requirements-extra.txt uv add pytest-ruff nbval diff --git a/.github/workflows/basic-tests-windows-uv-pip.yml b/.github/workflows/basic-tests-windows-uv-pip.yml index b7533b0..bf7aeb1 100644 --- a/.github/workflows/basic-tests-windows-uv-pip.yml +++ b/.github/workflows/basic-tests-windows-uv-pip.yml @@ -29,7 +29,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.11' - name: Install dependencies shell: bash @@ -37,7 +37,7 @@ jobs: export PATH="$HOME/.local/bin:$PATH" pip install --upgrade pip pip install uv - uv venv --python=python3.10 + uv venv --python=python3.11 source .venv/Scripts/activate pip install -r requirements.txt # because of dependency issue on Windows when using `uv pip` pip install tensorflow-io-gcs-filesystem==0.31.0 # Explicit for Windows diff --git a/.github/workflows/check-links.yml b/.github/workflows/check-links.yml index b998558..52e41d6 100644 --- a/.github/workflows/check-links.yml +++ b/.github/workflows/check-links.yml @@ -18,12 +18,12 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.11' - name: Install dependencies run: | curl -LsSf https://astral.sh/uv/install.sh | sh - uv python install 3.10 + uv python install 3.11 uv add . --dev uv add pytest-ruff pytest-check-links # Current version of retry doesn't work well if there are broken non-URL links diff --git a/.github/workflows/check-spelling-errors.yml b/.github/workflows/check-spelling-errors.yml index 3edd99c..d347a2e 100644 --- a/.github/workflows/check-spelling-errors.yml +++ b/.github/workflows/check-spelling-errors.yml @@ -18,12 +18,12 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.11' - name: Install codespell run: | curl -LsSf https://astral.sh/uv/install.sh | sh - uv python install 3.10 + uv python install 3.11 uv add . --dev uv add codespell diff --git a/.github/workflows/pep8-linter.yml b/.github/workflows/pep8-linter.yml index 2b4723c..aa9de7f 100644 --- a/.github/workflows/pep8-linter.yml +++ b/.github/workflows/pep8-linter.yml @@ -14,11 +14,11 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.11' - name: Install ruff (a faster flake 8 equivalent) run: | curl -LsSf https://astral.sh/uv/install.sh | sh - uv python install 3.10 + uv python install 3.11 uv add . --dev uv add ruff diff --git a/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb b/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb index a750af4..859cc78 100644 --- a/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb +++ b/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb @@ -382,7 +382,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "3e4a15ec-2667-4f56-b7c1-34e8071b621d", "metadata": {}, "outputs": [], @@ -499,7 +499,7 @@ " if lines and lines[0].startswith(\"#\"):\n", " lines = lines[1:]\n", "\n", - " for rank, line in enumerate(lines):\n", + " for line in lines:\n", " pair = tuple(line.strip().split())\n", " if len(pair) == 2:\n", " token1, token2 = pair\n", @@ -511,10 +511,10 @@ " merged_token_id = self.inverse_vocab[merged_token]\n", " self.bpe_merges[(token_id1, token_id2)] = merged_token_id\n", " # print(f\"Loaded merge: '{token1}' + '{token2}' -> '{merged_token}' (ID: {merged_token_id})\")\n", + " else:\n", + " print(f\"Merged token '{merged_token}' not found in vocab. Skipping.\")\n", " else:\n", - " print(f\"Merged token '{merged_token}' not found in vocab. Skipping.\")\n", - " else:\n", - " print(f\"Skipping pair {pair} as one of the tokens is not in the vocabulary.\")\n", + " print(f\"Skipping pair {pair} as one of the tokens is not in the vocabulary.\")\n", "\n", " def encode(self, text):\n", " \"\"\"\n", @@ -629,7 +629,7 @@ " \"\"\"\n", " # Save vocabulary\n", " with open(vocab_path, \"w\", encoding=\"utf-8\") as file:\n", - " json.dump({k: v for k, v in self.vocab.items()}, file, ensure_ascii=False, indent=2)\n", + " json.dump(self.vocab, file, ensure_ascii=False, indent=2)\n", "\n", " # Save BPE merges as a list of dictionaries\n", " with open(bpe_merges_path, \"w\", encoding=\"utf-8\") as file:\n", @@ -667,6 +667,9 @@ " def find_freq_pair(token_ids, mode=\"most\"):\n", " pairs = Counter(zip(token_ids, token_ids[1:]))\n", "\n", + " if not pairs:\n", + " return None\n", + "\n", " if mode == \"most\":\n", " return max(pairs.items(), key=lambda x: x[1])[0]\n", " elif mode == \"least\":\n", diff --git a/setup/01_optional-python-setup-preferences/README.md b/setup/01_optional-python-setup-preferences/README.md index 2249dfb..0128a09 100644 --- a/setup/01_optional-python-setup-preferences/README.md +++ b/setup/01_optional-python-setup-preferences/README.md @@ -50,7 +50,7 @@ If it returns 3.10 or newer, no further action is required.   > [!NOTE] -> I recommend installing a Python version that is at least 2 versions older than the most recent release to ensure PyTorch compatibility. For example, if the most recent version is Python 3.13, I recommend installing version 3.10 or 3.11. +> I recommend installing a Python version that is at least 1-3 versions older than the most recent release to ensure PyTorch compatibility. For example, if the most recent version is Python 3.13, I recommend installing version 3.10, 3.11, or 3.12. Otherwise, if Python is not installed or is an older version, you can install it for your operating system as described below. @@ -62,7 +62,7 @@ Otherwise, if Python is not installed or is an older version, you can install it ```bash sudo apt update -sudo apt install python3.10 python3.10-venv python3.10-dev +sudo apt install python3.11 python3.11-venv python3.11-dev ```
@@ -246,7 +246,7 @@ conda create -n LLMs python=3.10 new-env -> Many scientific computing libraries do not immediately support the newest version of Python. Therefore, when installing PyTorch, it's advisable to use a version of Python that is one or two releases older. For instance, if the latest version of Python is 3.13, using Python 3.10 or 3.11 is recommended. +> Many scientific computing libraries do not immediately support the newest version of Python. Therefore, when installing PyTorch, it's advisable to use a version of Python that is one to three releases older. For instance, if the latest version of Python is 3.13, using Python 3.10, 3.11, or 3.12 is recommended. Next, activate your new virtual environment (you have to do it every time you open a new terminal window or tab): diff --git a/setup/01_optional-python-setup-preferences/native-uv.md b/setup/01_optional-python-setup-preferences/native-uv.md index abe3314..ea2acd0 100644 --- a/setup/01_optional-python-setup-preferences/native-uv.md +++ b/setup/01_optional-python-setup-preferences/native-uv.md @@ -53,36 +53,28 @@ powershell -c "irm https://astral.sh/uv/install.ps1 | more" > For more installation options, please refer to the official [uv documentation](https://docs.astral.sh/uv/getting-started/installation/#standalone-installer).   -## 2. Install Python - -You can install Python using uv: - -```bash -uv python install 3.10 -``` - -  -> [!NOTE] -> I recommend installing a Python version that is at least 2 versions older than the most recent release to ensure PyTorch compatibility. For example, if the most recent version is Python 3.13, I recommend installing version 3.10 or 3.11. You can find out the most recent Python version by visiting [python.org](https://www.python.org/downloads/). - -  -## 3. Install Python packages and dependencies +## 2. Install Python packages and dependencies To install all required packages from a `pyproject.toml` file (such as the one located at the top level of this GitHub repository), run the following command, assuming the file is in the same directory as your terminal session: ```bash -uv add . --dev +uv sync --dev --python 3.11 ``` +> [!NOTE] +> If you do not have Python 3.11 available on your system, uv will download and install it for you. +> +> I recommend using a Python version that is at least 1-3 versions older than the most recent release to ensure PyTorch compatibility. For example, if the most recent version is Python 3.13, I recommend using version 3.10, 3.11, 3.12. You can find out the most recent Python version by visiting [python.org](https://www.python.org/downloads/). + > [!NOTE] > If you have problems with the following commands above due to certain dependencies (for example, if you are using Windows), you can always fall back to regular pip: > `uv add pip` > `uv run python -m pip install -U -r requirements.txt` -Uv install -Note that the `uv add` command above will create a separate virtual environment via the `.venv` subfolder. (In case you want to delete your virtual environment to start from scratch, you can simply delete the `.venv` folder.) + +Note that the `uv sync` command above will create a separate virtual environment via the `.venv` subfolder. (In case you want to delete your virtual environment to start from scratch, you can simply delete the `.venv` folder.) You can install new packages, that are not specified in the `pyproject.toml` via `uv add`, for example: diff --git a/setup/03_optional-docker-environment/.devcontainer/Dockerfile b/setup/03_optional-docker-environment/.devcontainer/Dockerfile index 7919433..3e386d7 100644 --- a/setup/03_optional-docker-environment/.devcontainer/Dockerfile +++ b/setup/03_optional-docker-environment/.devcontainer/Dockerfile @@ -4,12 +4,14 @@ FROM pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime # Install Ubuntu packages RUN apt-get update && \ apt-get upgrade -y && \ - apt-get install -y rsync && \ - apt-get install -y git && \ - apt-get install -y curl && \ + apt-get install -y rsync git curl ca-certificates && \ rm -rf /var/lib/apt/lists/* +# Install uv +ADD https://astral.sh/uv/install.sh /uv-installer.sh +RUN sh /uv-installer.sh && rm /uv-installer.sh +ENV PATH="/root/.local/bin/:$PATH" + # Install Python packages COPY requirements.txt requirements.txt -RUN pip install --upgrade pip -RUN pip install --no-cache-dir -r requirements.txt +RUN uv pip install --system --no-cache -r requirements.txt