From 9df572fdf4606d728db201d4f4abf5b364bb21ec Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Sun, 6 Apr 2025 18:29:22 -0500 Subject: [PATCH] Improve ModernBERT comments (#606) * Improve modernbert comments * bash code formatting --- ch06/03_bonus_imdb-classification/README.md | 10 ++++++++++ ch06/03_bonus_imdb-classification/train_bert_hf.py | 8 ++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/ch06/03_bonus_imdb-classification/README.md b/ch06/03_bonus_imdb-classification/README.md index d504b34..46460ff 100644 --- a/ch06/03_bonus_imdb-classification/README.md +++ b/ch06/03_bonus_imdb-classification/README.md @@ -181,6 +181,12 @@ Test accuracy: 92.95% [ModernBERT (2024)](https://arxiv.org/abs/2412.13663) is an optimized reimplementation of BERT that incorporates architectural improvements like parallel residual connections and gated linear units (GLUs) to boost efficiency and performance. It maintains BERT’s original pretraining objectives while achieving faster inference and better scalability on modern hardware. +```bash +python train_bert_hf.py --trainable_layers "all" --num_epochs 1 --model "modernbert-base" +``` + + + ``` Ep 1 (Step 000000): Train loss 0.699, Val loss 0.698 Ep 1 (Step 000050): Train loss 0.564, Val loss 0.606 @@ -209,6 +215,10 @@ Test accuracy: 93.79% Same as above but using the larger ModernBERT variant. +```bash +python train_bert_hf.py --trainable_layers "all" --num_epochs 1 --model "modernbert-large" +``` + ``` diff --git a/ch06/03_bonus_imdb-classification/train_bert_hf.py b/ch06/03_bonus_imdb-classification/train_bert_hf.py index 11b0c5e..6b0f2ea 100644 --- a/ch06/03_bonus_imdb-classification/train_bert_hf.py +++ b/ch06/03_bonus_imdb-classification/train_bert_hf.py @@ -197,7 +197,7 @@ if __name__ == "__main__": type=str, default="distilbert", help=( - "Which model to train. Options: 'distilbert', 'bert', 'roberta', 'modern-bert-base', 'modern-bert-large." + "Which model to train. Options: 'distilbert', 'bert', 'roberta', 'modernbert-base/-large'." ) ) parser.add_argument( @@ -296,9 +296,9 @@ if __name__ == "__main__": tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-large") - elif args.model in ("modern-bert-base", "modern-bert-large"): + elif args.model in ("modernbert-base", "modernbert-large"): - if args.model == "modern-bert-base": + if args.model == "modernbert-base": model = AutoModelForSequenceClassification.from_pretrained( "answerdotai/ModernBERT-base", num_labels=2 ) @@ -330,7 +330,7 @@ if __name__ == "__main__": tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base") - elif args.model == "modern-bert-base": + elif args.model == "modernbert-base": model = AutoModelForSequenceClassification.from_pretrained( "answerdotai/ModernBERT-base", num_labels=2 )