Batched KV Cache Inference for Qwen3 (#735)

This commit is contained in:
Sebastian Raschka
2025-07-10 08:09:35 -05:00
committed by GitHub
parent b8c8237251
commit a354555049
8 changed files with 506 additions and 6 deletions

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "llms-from-scratch"
version = "1.0.16"
version = "1.0.17"
description = "Implement a ChatGPT-like LLM in PyTorch from scratch, step by step"
readme = "README.md"
requires-python = ">=3.10"