mha variants

This commit is contained in:
rasbt
2024-03-06 08:30:32 -06:00
parent d4754f1bdd
commit 87fcfd9245
10 changed files with 431 additions and 10 deletions

View File

@@ -56,7 +56,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
class MultiHeadAttention(nn.Module):
def __init__(self, d_in, d_out, block_size, dropout, num_heads, qkv_bias=False):
super().__init__()
assert d_out % num_heads == 0, "d_out must be divisible by n_heads"
assert d_out % num_heads == 0, "d_out must be divisible by num_heads"
self.d_out = d_out
self.num_heads = num_heads

View File

@@ -45,7 +45,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
class MultiHeadAttention(nn.Module):
def __init__(self, d_in, d_out, block_size, dropout, num_heads, qkv_bias=False):
super().__init__()
assert d_out % num_heads == 0, "d_out must be divisible by n_heads"
assert d_out % num_heads == 0, "d_out must be divisible by num_heads"
self.d_out = d_out
self.num_heads = num_heads