huggingface update

2026-02-20 13:50:41 +00:00 · 2023-03-18 09:51:16 +01:00
parent 94f6c024fe
commit e4659fe56a
14 changed files with 718 additions and 9968 deletions
--- a/ML/Pytorch/huggingface/.ipynb_checkpoints/finetune_t5_lightning-checkpoint.ipynb
+++ b/ML/Pytorch/huggingface/.ipynb_checkpoints/finetune_t5_lightning-checkpoint.ipynb
--- a/ML/Pytorch/huggingface/.ipynb_checkpoints/learning-checkpoint.ipynb
+++ b/ML/Pytorch/huggingface/.ipynb_checkpoints/learning-checkpoint.ipynb
@@ -1,5 +1,69 @@
 {
 "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "7d5e92c6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'entity': 'I-FOOD', 'score': 0.49999642, 'index': 5, 'word': 'Turtle', 'start': 8, 'end': 14}, {'entity': 'I-FOOD', 'score': 0.6096488, 'index': 6, 'word': '##s', 'start': 14, 'end': 15}, {'entity': 'B-FOOD', 'score': 0.45608267, 'index': 7, 'word': 'Original', 'start': 16, 'end': 24}, {'entity': 'I-FOOD', 'score': 0.6613699, 'index': 8, 'word': 'Cara', 'start': 25, 'end': 29}, {'entity': 'I-FOOD', 'score': 0.5776781, 'index': 9, 'word': '##mel', 'start': 29, 'end': 32}, {'entity': 'I-FOOD', 'score': 0.86556953, 'index': 10, 'word': 'Chocolate', 'start': 33, 'end': 42}, {'entity': 'I-FOOD', 'score': 0.96111995, 'index': 11, 'word': 'P', 'start': 43, 'end': 44}, {'entity': 'I-FOOD', 'score': 0.8003402, 'index': 12, 'word': '##eca', 'start': 44, 'end': 47}, {'entity': 'I-FOOD', 'score': 0.9277613, 'index': 13, 'word': '##n', 'start': 47, 'end': 48}, {'entity': 'I-FOOD', 'score': 0.9217512, 'index': 15, 'word': '##luster', 'start': 50, 'end': 56}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
+    "from transformers import pipeline\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"Dizex/FoodBaseBERT\")\n",
+    "model = AutoModelForTokenClassification.from_pretrained(\"Dizex/FoodBaseBERT\")\n",
+    "\n",
+    "pipe = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
+    "example = \"Demet's Turtles Original Caramel Chocolate Pecan Clusters 9.3 oz Holiday Gift Box\"\n",
+    "\n",
+    "ner_entity_results = pipe(example)\n",
+    "print(ner_entity_results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "bf67ee76",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Turtle s Original Cara mel Chocolate P eca n luster\n"
+     ]
+    }
+   ],
+   "source": [
+    "ner_entity_results = pipe(example)\n",
+    "\n",
+    "# Initialize the entity words list with an empty string\n",
+    "entity_words = [\"\"]\n",
+    "\n",
+    "# Loop through each dictionary in the list and extract the entity word\n",
+    "for result in ner_entity_results:\n",
+    "    if result[\"entity\"] == \"B-FOOD\":\n",
+    "        entity_words.append(result[\"word\"])\n",
+    "    elif result[\"entity\"] == \"I-FOOD\":\n",
+    "        entity_words[-1] += \" \" + result[\"word\"]\n",
+    "\n",
+    "# Remove any remaining ## symbols and extra spaces\n",
+    "entity_words = [word.replace(\"##\", \"\").strip() for word in entity_words]\n",
+    "\n",
+    "# Join the entity words into a single string\n",
+    "output = \" \".join(entity_words)\n",
+    "\n",
+    "print(output)\n"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
--- a/ML/Pytorch/huggingface/finetune_t5_lightning.ipynb
+++ b/ML/Pytorch/huggingface/finetune_t5_lightning.ipynb
--- a/ML/Pytorch/huggingface/finetuning_t5_small_cnndaily.ipynb
+++ b/ML/Pytorch/huggingface/finetuning_t5_small_cnndaily.ipynb
--- a/ML/Pytorch/huggingface/learning.ipynb
+++ b/ML/Pytorch/huggingface/learning.ipynb
@@ -1,5 +1,69 @@
 {
 "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "7d5e92c6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'entity': 'I-FOOD', 'score': 0.49999642, 'index': 5, 'word': 'Turtle', 'start': 8, 'end': 14}, {'entity': 'I-FOOD', 'score': 0.6096488, 'index': 6, 'word': '##s', 'start': 14, 'end': 15}, {'entity': 'B-FOOD', 'score': 0.45608267, 'index': 7, 'word': 'Original', 'start': 16, 'end': 24}, {'entity': 'I-FOOD', 'score': 0.6613699, 'index': 8, 'word': 'Cara', 'start': 25, 'end': 29}, {'entity': 'I-FOOD', 'score': 0.5776781, 'index': 9, 'word': '##mel', 'start': 29, 'end': 32}, {'entity': 'I-FOOD', 'score': 0.86556953, 'index': 10, 'word': 'Chocolate', 'start': 33, 'end': 42}, {'entity': 'I-FOOD', 'score': 0.96111995, 'index': 11, 'word': 'P', 'start': 43, 'end': 44}, {'entity': 'I-FOOD', 'score': 0.8003402, 'index': 12, 'word': '##eca', 'start': 44, 'end': 47}, {'entity': 'I-FOOD', 'score': 0.9277613, 'index': 13, 'word': '##n', 'start': 47, 'end': 48}, {'entity': 'I-FOOD', 'score': 0.9217512, 'index': 15, 'word': '##luster', 'start': 50, 'end': 56}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
+    "from transformers import pipeline\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"Dizex/FoodBaseBERT\")\n",
+    "model = AutoModelForTokenClassification.from_pretrained(\"Dizex/FoodBaseBERT\")\n",
+    "\n",
+    "pipe = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
+    "example = \"Demet's Turtles Original Caramel Chocolate Pecan Clusters 9.3 oz Holiday Gift Box\"\n",
+    "\n",
+    "ner_entity_results = pipe(example)\n",
+    "print(ner_entity_results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "bf67ee76",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Turtle s Original Cara mel Chocolate P eca n luster\n"
+     ]
+    }
+   ],
+   "source": [
+    "ner_entity_results = pipe(example)\n",
+    "\n",
+    "# Initialize the entity words list with an empty string\n",
+    "entity_words = [\"\"]\n",
+    "\n",
+    "# Loop through each dictionary in the list and extract the entity word\n",
+    "for result in ner_entity_results:\n",
+    "    if result[\"entity\"] == \"B-FOOD\":\n",
+    "        entity_words.append(result[\"word\"])\n",
+    "    elif result[\"entity\"] == \"I-FOOD\":\n",
+    "        entity_words[-1] += \" \" + result[\"word\"]\n",
+    "\n",
+    "# Remove any remaining ## symbols and extra spaces\n",
+    "entity_words = [word.replace(\"##\", \"\").strip() for word in entity_words]\n",
+    "\n",
+    "# Join the entity words into a single string\n",
+    "output = \" \".join(entity_words)\n",
+    "\n",
+    "print(output)\n"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
--- a/ML/Pytorch/huggingface/lightning_logs/version_0/events.out.tfevents.1676993704.mrbeast.566861.0
+++ b/ML/Pytorch/huggingface/lightning_logs/version_0/events.out.tfevents.1676993704.mrbeast.566861.0
--- a/ML/Pytorch/huggingface/lightning_logs/version_0/hparams.yaml
+++ b/ML/Pytorch/huggingface/lightning_logs/version_0/hparams.yaml
@@ -0,0 +1 @@
+{}
--- a/ML/Pytorch/huggingface/lightning_logs/version_1/events.out.tfevents.1676993775.mrbeast.568809.0
+++ b/ML/Pytorch/huggingface/lightning_logs/version_1/events.out.tfevents.1676993775.mrbeast.568809.0
--- a/ML/Pytorch/huggingface/lightning_logs/version_1/hparams.yaml
+++ b/ML/Pytorch/huggingface/lightning_logs/version_1/hparams.yaml
@@ -0,0 +1 @@
+{}
--- a/ML/Pytorch/huggingface/lightning_logs/version_2/events.out.tfevents.1676993814.mrbeast.570170.0
+++ b/ML/Pytorch/huggingface/lightning_logs/version_2/events.out.tfevents.1676993814.mrbeast.570170.0
--- a/ML/Pytorch/huggingface/lightning_logs/version_2/hparams.yaml
+++ b/ML/Pytorch/huggingface/lightning_logs/version_2/hparams.yaml
@@ -0,0 +1 @@
+{}
--- a/ML/Pytorch/huggingface/lightning_logs/version_3/events.out.tfevents.1676993905.mrbeast.570170.1
+++ b/ML/Pytorch/huggingface/lightning_logs/version_3/events.out.tfevents.1676993905.mrbeast.570170.1
--- a/ML/Pytorch/huggingface/lightning_logs/version_3/hparams.yaml
+++ b/ML/Pytorch/huggingface/lightning_logs/version_3/hparams.yaml
@@ -0,0 +1 @@
+{}
--- a/ML/Pytorch/pytorch_lightning/3.
+++ b/ML/Pytorch/pytorch_lightning/3.
@@ -6,7 +6,7 @@ from torch import nn, optim
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 from torch.utils.data import random_split
-import pytorch_lightning as pl 
+import pytorch_lightning as pl


 class NN(pl.LightningModule):
@@ -23,28 +23,28 @@ class NN(pl.LightningModule):

    def training_step(self, batch, batch_idx):
        loss, scores, y = self._common_step(batch, batch_idx)
-        self.log('train_loss', loss)
+        self.log("train_loss", loss)
        return loss
-    
+
    def validation_step(self, batch, batch_idx):
        loss, scores, y = self._common_step(batch, batch_idx)
-        self.log('val_loss', loss)
+        self.log("val_loss", loss)
        return loss

    def test_step(self, batch, batch_idx):
        loss, scores, y = self._common_step(batch, batch_idx)
-        self.log('test_loss', loss)
+        self.log("test_loss", loss)
        return loss

    def _common_step(self, batch, batch_idx):
-        x, y = batch 
+        x, y = batch
        x = x.reshape(x.size(0), -1)
        scores = self.forward(x)
        loss = self.loss_fn(scores, y)
        return loss, scores, y

    def predict_step(self, batch, batch_idx):
-        x, y = batch 
+        x, y = batch
        x = x.reshape(x.size(0), -1)
        scores = self.forward(x)
        preds = torch.argmax(scores, dim=1)
@@ -53,6 +53,7 @@ class NN(pl.LightningModule):
    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=0.001)

+
 # Set device cuda for GPU if it's available otherwise run on the CPU
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

@@ -82,7 +83,13 @@ model = NN(input_size=input_size, num_classes=num_classes).to(device)
 criterion = nn.CrossEntropyLoss()
 optimizer = optim.Adam(model.parameters(), lr=learning_rate)

-trainer = pl.Trainer(accelerator="gpu", devices=1, min_epochs=1, max_epochs=3, precision=16)
+trainer = pl.Trainer(
+    accelerator="gpu", 
+    devices=1, 
+    min_epochs=1, 
+    max_epochs=3, 
+    precision=16,
+)
 trainer.fit(model, train_loader, val_loader)
 trainer.validate(model, val_loader)
 trainer.test(model, test_loader)