Make quote style consistent (#891)

2026-04-10 12:33:42 +00:00 · 2025-10-21 19:42:33 -05:00
parent 9276edbc37
commit 7ca7c47e4a
24 changed files with 239 additions and 81 deletions
--- a/ch07/01_main-chapter-code/previous_chapters.py
+++ b/ch07/01_main-chapter-code/previous_chapters.py
@@ -77,7 +77,7 @@ class MultiHeadAttention(nn.Module):
        self.W_value = nn.Linear(d_in, d_out, bias=qkv_bias)
        self.out_proj = nn.Linear(d_out, d_out)  # Linear layer to combine head outputs
        self.dropout = nn.Dropout(dropout)
-        self.register_buffer('mask', torch.triu(torch.ones(context_length, context_length), diagonal=1))
+        self.register_buffer("mask", torch.triu(torch.ones(context_length, context_length), diagonal=1))

    def forward(self, x):
        b, num_tokens, d_in = x.shape
@@ -261,7 +261,7 @@ def generate(model, idx, max_new_tokens, context_size, temperature=0.0, top_k=No
            # Keep only top_k values
            top_logits, _ = torch.topk(logits, top_k)
            min_val = top_logits[:, -1]
-            logits = torch.where(logits < min_val, torch.tensor(float('-inf')).to(logits.device), logits)
+            logits = torch.where(logits < min_val, torch.tensor(float("-inf")).to(logits.device), logits)

        # New: Apply temperature scaling
        if temperature > 0.0:
@@ -356,8 +356,8 @@ def assign(left, right):


 def load_weights_into_gpt(gpt, params):
-    gpt.pos_emb.weight = assign(gpt.pos_emb.weight, params['wpe'])
-    gpt.tok_emb.weight = assign(gpt.tok_emb.weight, params['wte'])
+    gpt.pos_emb.weight = assign(gpt.pos_emb.weight, params["wpe"])
+    gpt.tok_emb.weight = assign(gpt.tok_emb.weight, params["wte"])

    for b in range(len(params["blocks"])):
        q_w, k_w, v_w = np.split(
--- a/ch07/02_dataset-utilities/find-near-duplicates.py
+++ b/ch07/02_dataset-utilities/find-near-duplicates.py
@@ -34,7 +34,7 @@ def preprocess_text(text):
    # Lowercase the text
    text = text.lower()
    # Remove punctuation
-    text = re.sub(r'[^\w\s]', '', text)
+    text = re.sub(r"[^\w\s]", "", text)
    return text


@@ -50,7 +50,7 @@ def find_near_duplicates(json_data, threshold=0.75, key="instruction"):
        return {}, near_duplicates

    # Vectorize the text data
-    vectorizer = TfidfVectorizer(stop_words=None, analyzer='char', ngram_range=(1, 3))
+    vectorizer = TfidfVectorizer(stop_words=None, analyzer="char", ngram_range=(1, 3))
    tfidf_matrix = vectorizer.fit_transform(text)

    # Compute cosine similarity between each pair of entries
@@ -84,7 +84,7 @@ def find_print_and_remove_near_duplicates(json_data, remove_duplicates=False, th
            json_data, near_duplicates = find_near_duplicates(json_data, key=key, threshold=threshold)
        else:
            _, near_duplicates = find_near_duplicates(json_data, key=key, threshold=threshold)
-        separator = 50 * '='
+        separator = 50 * "="
        print(f"\n\n{separator}\nSearching '{key}' for duplicates ...\n{separator}")
        if not near_duplicates:
            print("No duplicates found")
@@ -114,7 +114,7 @@ if __name__ == "__main__":
    )
    parser.add_argument(
        "--remove_duplicates",
-        action='store_true',
+        action="store_true",
        default=False,
        help=(
            "Removes duplicates based on the 'input' or 'output' keys "
--- a/ch07/04_preference-tuning-with-dpo/previous_chapters.py
+++ b/ch07/04_preference-tuning-with-dpo/previous_chapters.py
@@ -77,7 +77,7 @@ class MultiHeadAttention(nn.Module):
        self.W_value = nn.Linear(d_in, d_out, bias=qkv_bias)
        self.out_proj = nn.Linear(d_out, d_out)  # Linear layer to combine head outputs
        self.dropout = nn.Dropout(dropout)
-        self.register_buffer('mask', torch.triu(torch.ones(context_length, context_length), diagonal=1))
+        self.register_buffer("mask", torch.triu(torch.ones(context_length, context_length), diagonal=1))

    def forward(self, x):
        b, num_tokens, d_in = x.shape
@@ -261,7 +261,7 @@ def generate(model, idx, max_new_tokens, context_size, temperature=0.0, top_k=No
            # Keep only top_k values
            top_logits, _ = torch.topk(logits, top_k)
            min_val = top_logits[:, -1]
-            logits = torch.where(logits < min_val, torch.tensor(float('-inf')).to(logits.device), logits)
+            logits = torch.where(logits < min_val, torch.tensor(float("-inf")).to(logits.device), logits)

        # New: Apply temperature scaling
        if temperature > 0.0:
@@ -357,8 +357,8 @@ def assign(left, right):


 def load_weights_into_gpt(gpt, params):
-    gpt.pos_emb.weight = assign(gpt.pos_emb.weight, params['wpe'])
-    gpt.tok_emb.weight = assign(gpt.tok_emb.weight, params['wte'])
+    gpt.pos_emb.weight = assign(gpt.pos_emb.weight, params["wpe"])
+    gpt.tok_emb.weight = assign(gpt.tok_emb.weight, params["wte"])

    for b in range(len(params["blocks"])):
        q_w, k_w, v_w = np.split(