diff --git a/ch06/03_bonus_imdb-classification/README.md b/ch06/03_bonus_imdb-classification/README.md index d504b34..46460ff 100644 --- a/ch06/03_bonus_imdb-classification/README.md +++ b/ch06/03_bonus_imdb-classification/README.md @@ -181,6 +181,12 @@ Test accuracy: 92.95% [ModernBERT (2024)](https://arxiv.org/abs/2412.13663) is an optimized reimplementation of BERT that incorporates architectural improvements like parallel residual connections and gated linear units (GLUs) to boost efficiency and performance. It maintains BERT’s original pretraining objectives while achieving faster inference and better scalability on modern hardware. +```bash +python train_bert_hf.py --trainable_layers "all" --num_epochs 1 --model "modernbert-base" +``` + + + ``` Ep 1 (Step 000000): Train loss 0.699, Val loss 0.698 Ep 1 (Step 000050): Train loss 0.564, Val loss 0.606 @@ -209,6 +215,10 @@ Test accuracy: 93.79% Same as above but using the larger ModernBERT variant. +```bash +python train_bert_hf.py --trainable_layers "all" --num_epochs 1 --model "modernbert-large" +``` + ``` diff --git a/ch06/03_bonus_imdb-classification/train_bert_hf.py b/ch06/03_bonus_imdb-classification/train_bert_hf.py index 11b0c5e..6b0f2ea 100644 --- a/ch06/03_bonus_imdb-classification/train_bert_hf.py +++ b/ch06/03_bonus_imdb-classification/train_bert_hf.py @@ -197,7 +197,7 @@ if __name__ == "__main__": type=str, default="distilbert", help=( - "Which model to train. Options: 'distilbert', 'bert', 'roberta', 'modern-bert-base', 'modern-bert-large." + "Which model to train. Options: 'distilbert', 'bert', 'roberta', 'modernbert-base/-large'." ) ) parser.add_argument( @@ -296,9 +296,9 @@ if __name__ == "__main__": tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-large") - elif args.model in ("modern-bert-base", "modern-bert-large"): + elif args.model in ("modernbert-base", "modernbert-large"): - if args.model == "modern-bert-base": + if args.model == "modernbert-base": model = AutoModelForSequenceClassification.from_pretrained( "answerdotai/ModernBERT-base", num_labels=2 ) @@ -330,7 +330,7 @@ if __name__ == "__main__": tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base") - elif args.model == "modern-bert-base": + elif args.model == "modernbert-base": model = AutoModelForSequenceClassification.from_pretrained( "answerdotai/ModernBERT-base", num_labels=2 )