mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Merge branch 'main' into uv-improvements
This commit is contained in:
2
.github/workflows/basic-tests-linux-uv.yml
vendored
2
.github/workflows/basic-tests-linux-uv.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
- name: Set up Python (uv)
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install uv and dependencies
|
||||
shell: bash
|
||||
|
||||
3
.github/workflows/basic-tests-macos-uv.yml
vendored
3
.github/workflows/basic-tests-macos-uv.yml
vendored
@@ -33,13 +33,14 @@ jobs:
|
||||
- name: Set up Python (uv)
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install uv and dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv sync --dev --python 3.11
|
||||
uv python install 3.11
|
||||
uv pip install -r ch05/07_gpt_to_llama/tests/test-requirements-extra.txt
|
||||
uv add pytest-ruff nbval
|
||||
|
||||
|
||||
@@ -31,12 +31,12 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv python install 3.10
|
||||
uv python install 3.11
|
||||
uv add . --dev
|
||||
uv pip install -r ch05/07_gpt_to_llama/tests/test-requirements-extra.txt
|
||||
uv add torch==${{ matrix.pytorch-version }}
|
||||
|
||||
2
.github/workflows/basic-tests-pip.yml
vendored
2
.github/workflows/basic-tests-pip.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Create Virtual Environment and Install Dependencies
|
||||
run: |
|
||||
|
||||
4
.github/workflows/basic-tests-pytorch-rc.yml
vendored
4
.github/workflows/basic-tests-pytorch-rc.yml
vendored
@@ -27,12 +27,12 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv python install 3.10
|
||||
uv python install 3.11
|
||||
uv add . --dev
|
||||
uv pip install -r ch05/07_gpt_to_llama/tests/test-requirements-extra.txt
|
||||
uv add pytest-ruff nbval
|
||||
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
@@ -37,7 +37,7 @@ jobs:
|
||||
export PATH="$HOME/.local/bin:$PATH"
|
||||
pip install --upgrade pip
|
||||
pip install uv
|
||||
uv venv --python=python3.10
|
||||
uv venv --python=python3.11
|
||||
source .venv/Scripts/activate
|
||||
pip install -r requirements.txt # because of dependency issue on Windows when using `uv pip`
|
||||
pip install tensorflow-io-gcs-filesystem==0.31.0 # Explicit for Windows
|
||||
|
||||
4
.github/workflows/check-links.yml
vendored
4
.github/workflows/check-links.yml
vendored
@@ -18,12 +18,12 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv python install 3.10
|
||||
uv python install 3.11
|
||||
uv add . --dev
|
||||
uv add pytest-ruff pytest-check-links
|
||||
# Current version of retry doesn't work well if there are broken non-URL links
|
||||
|
||||
4
.github/workflows/check-spelling-errors.yml
vendored
4
.github/workflows/check-spelling-errors.yml
vendored
@@ -18,12 +18,12 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install codespell
|
||||
run: |
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv python install 3.10
|
||||
uv python install 3.11
|
||||
uv add . --dev
|
||||
uv add codespell
|
||||
|
||||
|
||||
4
.github/workflows/pep8-linter.yml
vendored
4
.github/workflows/pep8-linter.yml
vendored
@@ -14,11 +14,11 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
python-version: '3.11'
|
||||
- name: Install ruff (a faster flake 8 equivalent)
|
||||
run: |
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv python install 3.10
|
||||
uv python install 3.11
|
||||
uv add . --dev
|
||||
uv add ruff
|
||||
|
||||
|
||||
@@ -382,7 +382,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"id": "3e4a15ec-2667-4f56-b7c1-34e8071b621d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -499,7 +499,7 @@
|
||||
" if lines and lines[0].startswith(\"#\"):\n",
|
||||
" lines = lines[1:]\n",
|
||||
"\n",
|
||||
" for rank, line in enumerate(lines):\n",
|
||||
" for line in lines:\n",
|
||||
" pair = tuple(line.strip().split())\n",
|
||||
" if len(pair) == 2:\n",
|
||||
" token1, token2 = pair\n",
|
||||
@@ -511,10 +511,10 @@
|
||||
" merged_token_id = self.inverse_vocab[merged_token]\n",
|
||||
" self.bpe_merges[(token_id1, token_id2)] = merged_token_id\n",
|
||||
" # print(f\"Loaded merge: '{token1}' + '{token2}' -> '{merged_token}' (ID: {merged_token_id})\")\n",
|
||||
" else:\n",
|
||||
" print(f\"Merged token '{merged_token}' not found in vocab. Skipping.\")\n",
|
||||
" else:\n",
|
||||
" print(f\"Merged token '{merged_token}' not found in vocab. Skipping.\")\n",
|
||||
" else:\n",
|
||||
" print(f\"Skipping pair {pair} as one of the tokens is not in the vocabulary.\")\n",
|
||||
" print(f\"Skipping pair {pair} as one of the tokens is not in the vocabulary.\")\n",
|
||||
"\n",
|
||||
" def encode(self, text):\n",
|
||||
" \"\"\"\n",
|
||||
@@ -629,7 +629,7 @@
|
||||
" \"\"\"\n",
|
||||
" # Save vocabulary\n",
|
||||
" with open(vocab_path, \"w\", encoding=\"utf-8\") as file:\n",
|
||||
" json.dump({k: v for k, v in self.vocab.items()}, file, ensure_ascii=False, indent=2)\n",
|
||||
" json.dump(self.vocab, file, ensure_ascii=False, indent=2)\n",
|
||||
"\n",
|
||||
" # Save BPE merges as a list of dictionaries\n",
|
||||
" with open(bpe_merges_path, \"w\", encoding=\"utf-8\") as file:\n",
|
||||
@@ -667,6 +667,9 @@
|
||||
" def find_freq_pair(token_ids, mode=\"most\"):\n",
|
||||
" pairs = Counter(zip(token_ids, token_ids[1:]))\n",
|
||||
"\n",
|
||||
" if not pairs:\n",
|
||||
" return None\n",
|
||||
"\n",
|
||||
" if mode == \"most\":\n",
|
||||
" return max(pairs.items(), key=lambda x: x[1])[0]\n",
|
||||
" elif mode == \"least\":\n",
|
||||
|
||||
@@ -50,7 +50,7 @@ If it returns 3.10 or newer, no further action is required.
|
||||
|
||||
|
||||
> [!NOTE]
|
||||
> I recommend installing a Python version that is at least 2 versions older than the most recent release to ensure PyTorch compatibility. For example, if the most recent version is Python 3.13, I recommend installing version 3.10 or 3.11.
|
||||
> I recommend installing a Python version that is at least 1-3 versions older than the most recent release to ensure PyTorch compatibility. For example, if the most recent version is Python 3.13, I recommend installing version 3.10, 3.11, or 3.12.
|
||||
|
||||
Otherwise, if Python is not installed or is an older version, you can install it for your operating system as described below.
|
||||
|
||||
@@ -62,7 +62,7 @@ Otherwise, if Python is not installed or is an older version, you can install it
|
||||
|
||||
```bash
|
||||
sudo apt update
|
||||
sudo apt install python3.10 python3.10-venv python3.10-dev
|
||||
sudo apt install python3.11 python3.11-venv python3.11-dev
|
||||
```
|
||||
|
||||
<br>
|
||||
@@ -246,7 +246,7 @@ conda create -n LLMs python=3.10
|
||||
|
||||
<img src="https://sebastianraschka.com/images/LLMs-from-scratch-images/setup/01_optional-python-setup-preferences/new-env.png" alt="new-env" width="600px">
|
||||
|
||||
> Many scientific computing libraries do not immediately support the newest version of Python. Therefore, when installing PyTorch, it's advisable to use a version of Python that is one or two releases older. For instance, if the latest version of Python is 3.13, using Python 3.10 or 3.11 is recommended.
|
||||
> Many scientific computing libraries do not immediately support the newest version of Python. Therefore, when installing PyTorch, it's advisable to use a version of Python that is one to three releases older. For instance, if the latest version of Python is 3.13, using Python 3.10, 3.11, or 3.12 is recommended.
|
||||
|
||||
Next, activate your new virtual environment (you have to do it every time you open a new terminal window or tab):
|
||||
|
||||
|
||||
@@ -53,36 +53,28 @@ powershell -c "irm https://astral.sh/uv/install.ps1 | more"
|
||||
> For more installation options, please refer to the official [uv documentation](https://docs.astral.sh/uv/getting-started/installation/#standalone-installer).
|
||||
|
||||
|
||||
## 2. Install Python
|
||||
|
||||
You can install Python using uv:
|
||||
|
||||
```bash
|
||||
uv python install 3.10
|
||||
```
|
||||
|
||||
|
||||
> [!NOTE]
|
||||
> I recommend installing a Python version that is at least 2 versions older than the most recent release to ensure PyTorch compatibility. For example, if the most recent version is Python 3.13, I recommend installing version 3.10 or 3.11. You can find out the most recent Python version by visiting [python.org](https://www.python.org/downloads/).
|
||||
|
||||
|
||||
## 3. Install Python packages and dependencies
|
||||
## 2. Install Python packages and dependencies
|
||||
|
||||
To install all required packages from a `pyproject.toml` file (such as the one located at the top level of this GitHub repository), run the following command, assuming the file is in the same directory as your terminal session:
|
||||
|
||||
```bash
|
||||
uv add . --dev
|
||||
uv sync --dev --python 3.11
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> If you do not have Python 3.11 available on your system, uv will download and install it for you.
|
||||
>
|
||||
> I recommend using a Python version that is at least 1-3 versions older than the most recent release to ensure PyTorch compatibility. For example, if the most recent version is Python 3.13, I recommend using version 3.10, 3.11, 3.12. You can find out the most recent Python version by visiting [python.org](https://www.python.org/downloads/).
|
||||
|
||||
> [!NOTE]
|
||||
> If you have problems with the following commands above due to certain dependencies (for example, if you are using Windows), you can always fall back to regular pip:
|
||||
> `uv add pip`
|
||||
> `uv run python -m pip install -U -r requirements.txt`
|
||||
|
||||
|
||||
<img src="https://sebastianraschka.com/images/LLMs-from-scratch-images/setup/uv-setup/uv-add.png?1" width="700" height="auto" alt="Uv install">
|
||||
|
||||
Note that the `uv add` command above will create a separate virtual environment via the `.venv` subfolder. (In case you want to delete your virtual environment to start from scratch, you can simply delete the `.venv` folder.)
|
||||
|
||||
Note that the `uv sync` command above will create a separate virtual environment via the `.venv` subfolder. (In case you want to delete your virtual environment to start from scratch, you can simply delete the `.venv` folder.)
|
||||
|
||||
You can install new packages, that are not specified in the `pyproject.toml` via `uv add`, for example:
|
||||
|
||||
|
||||
@@ -4,12 +4,14 @@ FROM pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
|
||||
# Install Ubuntu packages
|
||||
RUN apt-get update && \
|
||||
apt-get upgrade -y && \
|
||||
apt-get install -y rsync && \
|
||||
apt-get install -y git && \
|
||||
apt-get install -y curl && \
|
||||
apt-get install -y rsync git curl ca-certificates && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install uv
|
||||
ADD https://astral.sh/uv/install.sh /uv-installer.sh
|
||||
RUN sh /uv-installer.sh && rm /uv-installer.sh
|
||||
ENV PATH="/root/.local/bin/:$PATH"
|
||||
|
||||
# Install Python packages
|
||||
COPY requirements.txt requirements.txt
|
||||
RUN pip install --upgrade pip
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
RUN uv pip install --system --no-cache -r requirements.txt
|
||||
|
||||
Reference in New Issue
Block a user