From 61b6e35ddffb8b0a51fd787a01202b7f21dd74dc Mon Sep 17 00:00:00 2001 From: Daniel Kleine <53251018+d-kleine@users.noreply.github.com> Date: Tue, 9 Apr 2024 02:37:55 +0200 Subject: [PATCH] Added PDF display support to Docker image and VS Code and updated first step for gutenberg project (#111) * added VS Code extensions recommendations * Added PDF display support to Docker image and VS Code * fixed steps to download the dataset --- .devcontainer/devcontainer.json | 3 ++- .vscode/extensions.json | 1 + ch05/03_bonus_pretraining_on_gutenberg/README.md | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index decf415..1627f29 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -11,7 +11,8 @@ "ms-python.python", "ms-azuretools.vscode-docker", "ms-toolsai.jupyter", - "yahyabatulu.vscode-markdown-alert" + "yahyabatulu.vscode-markdown-alert", + "tomoki1207.pdf" ] } } diff --git a/.vscode/extensions.json b/.vscode/extensions.json index 60c5b2b..daccfa3 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -5,5 +5,6 @@ "ms-azuretools.vscode-docker", "ms-vscode-remote.vscode-remote-extensionpack", "yahyabatulu.vscode-markdown-alert", + "tomoki1207.pdf", ] } \ No newline at end of file diff --git a/ch05/03_bonus_pretraining_on_gutenberg/README.md b/ch05/03_bonus_pretraining_on_gutenberg/README.md index bef41d1..dcb3245 100644 --- a/ch05/03_bonus_pretraining_on_gutenberg/README.md +++ b/ch05/03_bonus_pretraining_on_gutenberg/README.md @@ -23,7 +23,7 @@ As of this writing, this will require approximately 50 GB of disk space, but it Linux and macOS users can follow these steps to download the dataset (if you are a Windows user, please see the note below): -Set the `03_bonus_pretraining_on_gutenberg` folder as working directory to clone the `gutenberg` repository locally in this folder (this is necessary to run the provided scripts `prepare_dataset.py` and `pretraining_simple.py`). For instance, when being in the `LLMs-from-scratch` repository's folder, navigate into the *03_bonus_pretraining_on_gutenberg* folder via: +1. Set the `03_bonus_pretraining_on_gutenberg` folder as working directory to clone the `gutenberg` repository locally in this folder (this is necessary to run the provided scripts `prepare_dataset.py` and `pretraining_simple.py`). For instance, when being in the `LLMs-from-scratch` repository's folder, navigate into the *03_bonus_pretraining_on_gutenberg* folder via: ```bash cd ch05/03_bonus_pretraining_on_gutenberg ```