LLMs-from-scratch/ch05/12_gemma3/tests/gemma3-transformers-ref.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "book-header",
   "metadata": {},
   "source": [
    "<table style=\"width:100%\">\n",
    "<tr>\n",
    "<td style=\"vertical-align:middle; text-align:left;\">\n",
    "<font size=\"2\">\n",
    "Supplementary code for the <a href=\"http://mng.bz/orYv\">Build a Large Language Model From Scratch</a> book by <a href=\"https://sebastianraschka.com\">Sebastian Raschka</a><br>\n",
    "<br>Code repository: <a href=\"https://github.com/rasbt/LLMs-from-scratch\">https://github.com/rasbt/LLMs-from-scratch</a>\n",
    "</font>\n",
    "</td>\n",
    "<td style=\"vertical-align:middle; text-align:left;\">\n",
    "<a href=\"http://mng.bz/orYv\"><img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/cover-small.webp\" width=\"100px\"></a>\n",
    "</td>\n",
    "</tr>\n",
    "</table>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "title-cell",
   "metadata": {},
   "source": [
    "# Gemma 3 270M With Hugging Face Transformers"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "intro-cell",
   "metadata": {},
   "source": [
    "- This notebook uses the minimal `AutoTokenizer` / `AutoModelForCausalLM` workflow from the Transformers tutorials.\n",
    "- It uses the same user prompt as [standalone-gemma3.ipynb](../standalone-gemma3.ipynb): `Give me a short introduction to large language models.`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "install-cell",
   "metadata": {},
   "outputs": [],
   "source": [
    "# pip install transformers sentencepiece"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "login-cell",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Uncomment and run the following code if you are executing the notebook for the first time\n",
    "\n",
    "# from huggingface_hub import login\n",
    "# login()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "load-cell",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c3b335b4a1da4658b90e1ef960de8b49",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading weights:   0%|          | 0/236 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
    "\n",
    "model_id = \"google/gemma-3-270m-it\"\n",
    "prompt = \"Give me a short introduction to large language models.\"\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
    "model = AutoModelForCausalLM.from_pretrained(model_id)\n",
    "model.generation_config.do_sample = False\n",
    "model.generation_config.top_p = None\n",
    "model.generation_config.top_k = None\n",
    "model.generation_config.pad_token_id = tokenizer.pad_token_id\n",
    "model.eval();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "generate-cell",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Large language models (LLMs) are sophisticated artificial intelligence systems that can understand, generate, and manipulate human language. They are trained on massive amounts of text data to learn patterns and relationships within language, enabling them to perform a wide range of tasks, from writing articles and answering questions to translating languages and summarizing information.\n",
      "\n"
     ]
    }
   ],
   "source": [
    "messages = [{\"role\": \"user\", \"content\": prompt}]\n",
    "\n",
    "inputs = tokenizer.apply_chat_template(\n",
    "    messages,\n",
    "    tokenize=True,\n",
    "    add_generation_prompt=True,\n",
    "    return_tensors=\"pt\",\n",
    ")\n",
    "\n",
    "outputs = model.generate(\n",
    "    **inputs,\n",
    "    max_new_tokens=500,\n",
    "    do_sample=False,\n",
    "    num_beams=1,\n",
    "    pad_token_id=tokenizer.pad_token_id,\n",
    ")\n",
    "\n",
    "response = tokenizer.decode(\n",
    "    outputs[0][inputs[\"input_ids\"].shape[-1]:],\n",
    "    skip_special_tokens=True,\n",
    ")\n",
    "print(response)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}