User argpars utils to show default args on command line

This commit is contained in:
rasbt
2026-03-01 20:15:21 -06:00
parent c079904491
commit 4612d20fa8
27 changed files with 30 additions and 30 deletions

View File

@@ -290,7 +290,7 @@ def generate_text_simple_cached(model, idx, max_new_tokens,
def main():
parser = argparse.ArgumentParser(description="Run GPT with grouped-query attention.")
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Run GPT with grouped-query attention.")
parser.add_argument("--emb_dim", type=int, default=768, help="Model embedding dimension.")
parser.add_argument("--n_heads", type=int, default=12, help="Number of attention heads.")
parser.add_argument("--n_layers", type=int, default=12, help="Number of transformer blocks.")

View File

@@ -278,7 +278,7 @@ def generate_text_simple_cached(model, idx, max_new_tokens,
def main():
parser = argparse.ArgumentParser(description="Run GPT with standard multi-head attention.")
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Run GPT with standard multi-head attention.")
parser.add_argument("--emb_dim", type=int, default=768, help="Model embedding dimension.")
parser.add_argument("--n_heads", type=int, default=12, help="Number of attention heads.")
parser.add_argument("--n_layers", type=int, default=12, help="Number of transformer blocks.")

View File

@@ -31,7 +31,7 @@ def calc_kv_bytes_total(batch, context_length, emb_dim, n_heads,
def main():
p = argparse.ArgumentParser(description="Estimate KV-cache memory for MHA vs GQA")
p = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Estimate KV-cache memory for MHA vs GQA")
p.add_argument("--context_length", default=1024, type=int)
p.add_argument("--emb_dim", required=True, type=int)
p.add_argument("--n_heads", required=True, type=int)

View File

@@ -278,7 +278,7 @@ def generate_text_simple_cached(model, idx, max_new_tokens,
def main():
parser = argparse.ArgumentParser(description="Run GPT with standard multi-head attention.")
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Run GPT with standard multi-head attention.")
parser.add_argument("--emb_dim", type=int, default=768, help="Model embedding dimension.")
parser.add_argument("--n_heads", type=int, default=12, help="Number of attention heads.")
parser.add_argument("--n_layers", type=int, default=12, help="Number of transformer blocks.")

View File

@@ -286,13 +286,13 @@ def generate_text_simple_cached(model, idx, max_new_tokens,
def main():
parser = argparse.ArgumentParser(description="Run GPT with standard multi-head attention.")
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Run GPT with standard multi-head attention.")
parser.add_argument("--emb_dim", type=int, default=768, help="Model embedding dimension.")
parser.add_argument("--n_heads", type=int, default=12, help="Number of attention heads.")
parser.add_argument("--n_layers", type=int, default=12, help="Number of transformer blocks.")
parser.add_argument("--max_new_tokens", type=int, default=200, help="Number of tokens to generate.")
parser.add_argument("--latent_dim", type=int, default=None,
help="Latent dim for MLA (default: d_out//8)")
help="Latent dim for MLA")
args = parser.parse_args()

View File

@@ -37,7 +37,7 @@ def calc_mla_bytes_total(batch, context_length, n_layers, latent_dim, bytes_per_
def main():
p = argparse.ArgumentParser(description="Estimate KV-cache memory for MHA vs GQA vs MLA")
p = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Estimate KV-cache memory for MHA vs GQA vs MLA")
p.add_argument("--context_length", default=1024, type=int)
p.add_argument("--emb_dim", required=True, type=int)
p.add_argument("--n_heads", required=True, type=int)

View File

@@ -278,7 +278,7 @@ def generate_text_simple_cached(model, idx, max_new_tokens,
def main():
parser = argparse.ArgumentParser(description="Run GPT with standard multi-head attention.")
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Run GPT with standard multi-head attention.")
parser.add_argument("--emb_dim", type=int, default=768, help="Model embedding dimension.")
parser.add_argument("--n_heads", type=int, default=12, help="Number of attention heads.")
parser.add_argument("--n_layers", type=int, default=12, help="Number of transformer blocks.")

View File

@@ -311,7 +311,7 @@ def generate_text_simple_cached(model, idx, max_new_tokens,
def main():
parser = argparse.ArgumentParser(description="Run GPT with standard multi-head attention.")
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Run GPT with standard multi-head attention.")
parser.add_argument("--emb_dim", type=int, default=768, help="Model embedding dimension.")
parser.add_argument("--n_heads", type=int, default=12, help="Number of attention heads.")
parser.add_argument("--n_layers", type=int, default=12, help="Number of transformer blocks.")

View File

@@ -90,7 +90,7 @@ def estimate_totals(context_length, sliding_window_size, emb_dim, n_heads, n_lay
def main():
p = argparse.ArgumentParser(description="Estimate KV-cache memory for MHA/GQA with SWA layer ratio")
p = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Estimate KV-cache memory for MHA/GQA with SWA layer ratio")
p.add_argument("--context_length", default=1024, type=int)
p.add_argument("--sliding_window_size", required=True, type=int,
help="SWA window size W per SWA layer.")

View File

@@ -102,7 +102,7 @@ def calc_kv_bytes_total_gqa_swa(
def main():
p = argparse.ArgumentParser(
p = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="KV-cache vs Context Length — MHA vs GQA with SWA overlays"
)
p.add_argument("--emb_dim", type=int, required=True)

View File

@@ -341,7 +341,7 @@ def generate_text_simple_cached(model, idx, max_new_tokens,
def main():
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--emb_dim", type=int, default=768, help="Model embedding dimension.")
parser.add_argument("--hidden_dim", type=int, default=768*4, help="Intermediate FFN size.")
parser.add_argument("--n_heads", type=int, default=12, help="Number of attention heads.")

View File

@@ -401,7 +401,7 @@ def generate_text_simple_cached(model, idx, max_new_tokens,
def main():
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--emb_dim", type=int, default=768, help="Model embedding dimension.")
parser.add_argument("--hidden_dim", type=int, default=768*4, help="Intermediate FFN or MoE size.")
parser.add_argument("--n_heads", type=int, default=12, help="Number of attention heads.")

View File

@@ -65,7 +65,7 @@ def estimate_params_and_hidden(
def main():
p = argparse.ArgumentParser(
p = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="Estimate FFN vs MoE parameter memory"
)
p.add_argument("--emb_dim", type=int, required=True,

View File

@@ -91,7 +91,7 @@ def plot_active_params_vs_experts(
def main():
p = argparse.ArgumentParser(description="Plot Dense vs MoE active parameters.")
p = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Plot Dense vs MoE active parameters.")
p.add_argument("--emb_dim", type=int, required=True, help="Embedding dimension")
p.add_argument("--hidden_dim", type=int, required=True, help="Dense FFN hidden size")
p.add_argument("--ffn_type", choices=["gelu", "swiglu"], default="swiglu")

View File

@@ -36,7 +36,7 @@ def convert_to_gb(x):
def main():
p = argparse.ArgumentParser(description="Memory vs. Context Length: MHA vs. DeltaNet (3:1 mix)")
p = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Memory vs. Context Length: MHA vs. DeltaNet (3:1 mix)")
p.add_argument("--batch", type=int, default=1)
p.add_argument("--emb_dim", type=int, default=2048)
p.add_argument("--n_heads", type=int, default=16)

View File

@@ -253,16 +253,16 @@ def main(gpt_config, input_prompt, model_size, device):
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate text with a pretrained GPT-2 model.")
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Generate text with a pretrained GPT-2 model.")
parser.add_argument(
"--prompt",
default="Every effort moves you",
help="Prompt text used to seed the generation (default matches the script's built-in prompt)."
help="Prompt text used to seed the generation."
)
parser.add_argument(
"--device",
default="cpu",
help="Device for running inference, e.g., cpu, cuda, mps, or auto. Defaults to cpu."
help="Device for running inference, e.g., cpu, cuda, mps, or auto."
)
args = parser.parse_args()

View File

@@ -66,7 +66,7 @@ def combine_files(file_paths, target_dir, max_size_mb=500, separator="<|endoftex
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Preprocess and combine text files for pretraining")
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Preprocess and combine text files for pretraining")
parser.add_argument("--data_dir", type=str, default="gutenberg/data/raw",
help="Directory containing the downloaded raw training data")

View File

@@ -148,7 +148,7 @@ def train_model_simple(model, optimizer, device, n_epochs,
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="GPT Model Training Configuration")
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="GPT Model Training Configuration")
parser.add_argument("--data_dir", type=str, default="gutenberg/data",
help="Directory containing the training data")

View File

@@ -239,7 +239,7 @@ if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="Finetune a GPT model for classification"
)
parser.add_argument(

View File

@@ -410,7 +410,7 @@ def replace_linear_with_lora(model, rank, alpha, alternative=False):
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"--model_size",
type=str,

View File

@@ -175,7 +175,7 @@ def train_classifier_simple(model, train_loader, val_loader, optimizer, device,
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"--trainable_layers",
type=str,

View File

@@ -272,7 +272,7 @@ def train_classifier_simple(model, train_loader, val_loader, optimizer, device,
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"--trainable_layers",
type=str,

View File

@@ -225,7 +225,7 @@ def train_classifier_simple(model, train_loader, val_loader, optimizer, device,
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"--model_size",
type=str,

View File

@@ -534,7 +534,7 @@ if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="Instruction finetune a GPT model"
)
options = {"baseline", "mask_instructions", "alpaca_52k", "phi3_prompt", "lora"}

View File

@@ -333,7 +333,7 @@ if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="Finetune a GPT model for classification"
)
parser.add_argument(

View File

@@ -103,7 +103,7 @@ if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="Evaluate model responses with ollama"
)
parser.add_argument(

View File

@@ -100,7 +100,7 @@ def find_print_and_remove_near_duplicates(json_data, remove_duplicates=False, th
if __name__ == "__main__":
print("scikit-learn version:", sklearn_version)
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"--json_file",
type=str,