Batched KV Cache Inference for Qwen3 (#735)

2026-04-10 12:33:42 +00:00 · 2025-07-10 08:09:35 -05:00
parent b8c8237251
commit a354555049
8 changed files with 506 additions and 6 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "llms-from-scratch"
-version = "1.0.16"
+version = "1.0.17"
 description = "Implement a ChatGPT-like LLM in PyTorch from scratch, step by step"
 readme = "README.md"
 requires-python = ">=3.10"