mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
new experiment w/o causal mask
This commit is contained in:
@@ -153,7 +153,7 @@ def instantiate_model(choose_model, load_weights):
|
||||
|
||||
if not load_weights:
|
||||
torch.manual_seed(123)
|
||||
model = GPTModel(BASE_CONFIG)
|
||||
model = GPTModel(BASE_CONFIG, disable_causal_mask=args.disable_causal_mask)
|
||||
|
||||
if load_weights:
|
||||
model_size = choose_model.split(" ")[-1].lstrip("(").rstrip(")")
|
||||
@@ -386,6 +386,15 @@ if __name__ == "__main__":
|
||||
)
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--disable_causal_mask",
|
||||
action='store_true',
|
||||
default=False,
|
||||
help=(
|
||||
"Disables the causal attention mask."
|
||||
)
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.trainable_token == "first":
|
||||
|
||||
Reference in New Issue
Block a user