mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
add mmap=True comparison
This commit is contained in:
@@ -752,7 +752,99 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" \n",
|
||||
"## 6. Other methods"
|
||||
"## 6. Using `mmap=True`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"- As an intermediate or advanced `torch.load` user, you may wonder how these approaches compare to the `mmap=True` setting in PyTorch\n",
|
||||
"- The `mmap=True` setting in PyTorch enables memory-mapped file I/O, which allows the tensor to access data directly from disk storage, thus reducing memory usage by not loading the entire file into RAM\n",
|
||||
"- However, in practice, I found it to be less efficient than the sequential approaches above"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "7AX3vPrpv5c_",
|
||||
"outputId": "e6fca10b-55c3-4e89-8674-075df5ce26e7"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Maximum GPU memory allocated: 6.4 GB\n",
|
||||
"-> Maximum CPU memory allocated: 9.9 GB\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def baseline_mmap():\n",
|
||||
" start_memory_tracking()\n",
|
||||
"\n",
|
||||
" model = GPTModel(BASE_CONFIG) # load model on CPU\n",
|
||||
"\n",
|
||||
" model.load_state_dict(\n",
|
||||
" torch.load(\"model.pth\", map_location=\"cpu\", weights_only=True, mmap=True)\n",
|
||||
" )\n",
|
||||
" model.to(device) # Move model to GPU\n",
|
||||
" model.eval();\n",
|
||||
"\n",
|
||||
" print_memory_usage()\n",
|
||||
"\n",
|
||||
"peak_memory_used = memory_usage_in_gb(baseline_mmap)\n",
|
||||
"print(f\"-> Maximum CPU memory allocated: {peak_memory_used:.1f} GB\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "KUyK3QVRwmjR",
|
||||
"outputId": "a77c191a-2f9e-4ae5-be19-8ce128e704e9"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Maximum GPU memory allocated: 12.8 GB\n",
|
||||
"-> Maximum CPU memory allocated: 7.0 GB\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def baseline_mmap_2():\n",
|
||||
" start_memory_tracking()\n",
|
||||
"\n",
|
||||
" model = GPTModel(BASE_CONFIG).to(device)\n",
|
||||
"\n",
|
||||
" model.load_state_dict(\n",
|
||||
" torch.load(\"model.pth\", map_location=device, weights_only=True, mmap=True)\n",
|
||||
" )\n",
|
||||
" model.eval();\n",
|
||||
"\n",
|
||||
" print_memory_usage()\n",
|
||||
"\n",
|
||||
"peak_memory_used = memory_usage_in_gb(baseline_mmap_2)\n",
|
||||
"print(f\"-> Maximum CPU memory allocated: {peak_memory_used:.1f} GB\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" \n",
|
||||
"## 7. Other methods"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user