mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Make quote style consistent (#891)
This commit is contained in:
committed by
GitHub
parent
9276edbc37
commit
7ca7c47e4a
@@ -77,7 +77,7 @@ class MultiHeadAttention(nn.Module):
|
||||
self.W_value = nn.Linear(d_in, d_out, bias=qkv_bias)
|
||||
self.out_proj = nn.Linear(d_out, d_out) # Linear layer to combine head outputs
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.register_buffer('mask', torch.triu(torch.ones(context_length, context_length), diagonal=1))
|
||||
self.register_buffer("mask", torch.triu(torch.ones(context_length, context_length), diagonal=1))
|
||||
|
||||
def forward(self, x):
|
||||
b, num_tokens, d_in = x.shape
|
||||
@@ -261,7 +261,7 @@ def generate(model, idx, max_new_tokens, context_size, temperature=0.0, top_k=No
|
||||
# Keep only top_k values
|
||||
top_logits, _ = torch.topk(logits, top_k)
|
||||
min_val = top_logits[:, -1]
|
||||
logits = torch.where(logits < min_val, torch.tensor(float('-inf')).to(logits.device), logits)
|
||||
logits = torch.where(logits < min_val, torch.tensor(float("-inf")).to(logits.device), logits)
|
||||
|
||||
# New: Apply temperature scaling
|
||||
if temperature > 0.0:
|
||||
@@ -356,8 +356,8 @@ def assign(left, right):
|
||||
|
||||
|
||||
def load_weights_into_gpt(gpt, params):
|
||||
gpt.pos_emb.weight = assign(gpt.pos_emb.weight, params['wpe'])
|
||||
gpt.tok_emb.weight = assign(gpt.tok_emb.weight, params['wte'])
|
||||
gpt.pos_emb.weight = assign(gpt.pos_emb.weight, params["wpe"])
|
||||
gpt.tok_emb.weight = assign(gpt.tok_emb.weight, params["wte"])
|
||||
|
||||
for b in range(len(params["blocks"])):
|
||||
q_w, k_w, v_w = np.split(
|
||||
|
||||
@@ -34,7 +34,7 @@ def preprocess_text(text):
|
||||
# Lowercase the text
|
||||
text = text.lower()
|
||||
# Remove punctuation
|
||||
text = re.sub(r'[^\w\s]', '', text)
|
||||
text = re.sub(r"[^\w\s]", "", text)
|
||||
return text
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ def find_near_duplicates(json_data, threshold=0.75, key="instruction"):
|
||||
return {}, near_duplicates
|
||||
|
||||
# Vectorize the text data
|
||||
vectorizer = TfidfVectorizer(stop_words=None, analyzer='char', ngram_range=(1, 3))
|
||||
vectorizer = TfidfVectorizer(stop_words=None, analyzer="char", ngram_range=(1, 3))
|
||||
tfidf_matrix = vectorizer.fit_transform(text)
|
||||
|
||||
# Compute cosine similarity between each pair of entries
|
||||
@@ -84,7 +84,7 @@ def find_print_and_remove_near_duplicates(json_data, remove_duplicates=False, th
|
||||
json_data, near_duplicates = find_near_duplicates(json_data, key=key, threshold=threshold)
|
||||
else:
|
||||
_, near_duplicates = find_near_duplicates(json_data, key=key, threshold=threshold)
|
||||
separator = 50 * '='
|
||||
separator = 50 * "="
|
||||
print(f"\n\n{separator}\nSearching '{key}' for duplicates ...\n{separator}")
|
||||
if not near_duplicates:
|
||||
print("No duplicates found")
|
||||
@@ -114,7 +114,7 @@ if __name__ == "__main__":
|
||||
)
|
||||
parser.add_argument(
|
||||
"--remove_duplicates",
|
||||
action='store_true',
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Removes duplicates based on the 'input' or 'output' keys "
|
||||
|
||||
@@ -77,7 +77,7 @@ class MultiHeadAttention(nn.Module):
|
||||
self.W_value = nn.Linear(d_in, d_out, bias=qkv_bias)
|
||||
self.out_proj = nn.Linear(d_out, d_out) # Linear layer to combine head outputs
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.register_buffer('mask', torch.triu(torch.ones(context_length, context_length), diagonal=1))
|
||||
self.register_buffer("mask", torch.triu(torch.ones(context_length, context_length), diagonal=1))
|
||||
|
||||
def forward(self, x):
|
||||
b, num_tokens, d_in = x.shape
|
||||
@@ -261,7 +261,7 @@ def generate(model, idx, max_new_tokens, context_size, temperature=0.0, top_k=No
|
||||
# Keep only top_k values
|
||||
top_logits, _ = torch.topk(logits, top_k)
|
||||
min_val = top_logits[:, -1]
|
||||
logits = torch.where(logits < min_val, torch.tensor(float('-inf')).to(logits.device), logits)
|
||||
logits = torch.where(logits < min_val, torch.tensor(float("-inf")).to(logits.device), logits)
|
||||
|
||||
# New: Apply temperature scaling
|
||||
if temperature > 0.0:
|
||||
@@ -357,8 +357,8 @@ def assign(left, right):
|
||||
|
||||
|
||||
def load_weights_into_gpt(gpt, params):
|
||||
gpt.pos_emb.weight = assign(gpt.pos_emb.weight, params['wpe'])
|
||||
gpt.tok_emb.weight = assign(gpt.tok_emb.weight, params['wte'])
|
||||
gpt.pos_emb.weight = assign(gpt.pos_emb.weight, params["wpe"])
|
||||
gpt.tok_emb.weight = assign(gpt.tok_emb.weight, params["wte"])
|
||||
|
||||
for b in range(len(params["blocks"])):
|
||||
q_w, k_w, v_w = np.split(
|
||||
|
||||
Reference in New Issue
Block a user