huggingface update

2026-02-21 11:18:01 +00:00 · 2023-03-18 09:51:16 +01:00
parent 94f6c024fe
commit e4659fe56a
14 changed files with 718 additions and 9968 deletions
--- a/ML/Pytorch/huggingface/.ipynb_checkpoints/finetune_t5_lightning-checkpoint.ipynb
+++ b/ML/Pytorch/huggingface/.ipynb_checkpoints/finetune_t5_lightning-checkpoint.ipynb
--- a/ML/Pytorch/huggingface/.ipynb_checkpoints/learning-checkpoint.ipynb
+++ b/ML/Pytorch/huggingface/.ipynb_checkpoints/learning-checkpoint.ipynb
@@ -1,5 +1,69 @@
 {
 "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "7d5e92c6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'entity': 'I-FOOD', 'score': 0.49999642, 'index': 5, 'word': 'Turtle', 'start': 8, 'end': 14}, {'entity': 'I-FOOD', 'score': 0.6096488, 'index': 6, 'word': '##s', 'start': 14, 'end': 15}, {'entity': 'B-FOOD', 'score': 0.45608267, 'index': 7, 'word': 'Original', 'start': 16, 'end': 24}, {'entity': 'I-FOOD', 'score': 0.6613699, 'index': 8, 'word': 'Cara', 'start': 25, 'end': 29}, {'entity': 'I-FOOD', 'score': 0.5776781, 'index': 9, 'word': '##mel', 'start': 29, 'end': 32}, {'entity': 'I-FOOD', 'score': 0.86556953, 'index': 10, 'word': 'Chocolate', 'start': 33, 'end': 42}, {'entity': 'I-FOOD', 'score': 0.96111995, 'index': 11, 'word': 'P', 'start': 43, 'end': 44}, {'entity': 'I-FOOD', 'score': 0.8003402, 'index': 12, 'word': '##eca', 'start': 44, 'end': 47}, {'entity': 'I-FOOD', 'score': 0.9277613, 'index': 13, 'word': '##n', 'start': 47, 'end': 48}, {'entity': 'I-FOOD', 'score': 0.9217512, 'index': 15, 'word': '##luster', 'start': 50, 'end': 56}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
+    "from transformers import pipeline\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"Dizex/FoodBaseBERT\")\n",
+    "model = AutoModelForTokenClassification.from_pretrained(\"Dizex/FoodBaseBERT\")\n",
+    "\n",
+    "pipe = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
+    "example = \"Demet's Turtles Original Caramel Chocolate Pecan Clusters 9.3 oz Holiday Gift Box\"\n",
+    "\n",
+    "ner_entity_results = pipe(example)\n",
+    "print(ner_entity_results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "bf67ee76",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Turtle s Original Cara mel Chocolate P eca n luster\n"
+     ]
+    }
+   ],
+   "source": [
+    "ner_entity_results = pipe(example)\n",
+    "\n",
+    "# Initialize the entity words list with an empty string\n",
+    "entity_words = [\"\"]\n",
+    "\n",
+    "# Loop through each dictionary in the list and extract the entity word\n",
+    "for result in ner_entity_results:\n",
+    "    if result[\"entity\"] == \"B-FOOD\":\n",
+    "        entity_words.append(result[\"word\"])\n",
+    "    elif result[\"entity\"] == \"I-FOOD\":\n",
+    "        entity_words[-1] += \" \" + result[\"word\"]\n",
+    "\n",
+    "# Remove any remaining ## symbols and extra spaces\n",
+    "entity_words = [word.replace(\"##\", \"\").strip() for word in entity_words]\n",
+    "\n",
+    "# Join the entity words into a single string\n",
+    "output = \" \".join(entity_words)\n",
+    "\n",
+    "print(output)\n"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
--- a/ML/Pytorch/huggingface/finetune_t5_lightning.ipynb
+++ b/ML/Pytorch/huggingface/finetune_t5_lightning.ipynb
--- a/ML/Pytorch/huggingface/finetuning_t5_small_cnndaily.ipynb
+++ b/ML/Pytorch/huggingface/finetuning_t5_small_cnndaily.ipynb
--- a/ML/Pytorch/huggingface/learning.ipynb
+++ b/ML/Pytorch/huggingface/learning.ipynb
@@ -1,5 +1,69 @@
 {
 "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "7d5e92c6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'entity': 'I-FOOD', 'score': 0.49999642, 'index': 5, 'word': 'Turtle', 'start': 8, 'end': 14}, {'entity': 'I-FOOD', 'score': 0.6096488, 'index': 6, 'word': '##s', 'start': 14, 'end': 15}, {'entity': 'B-FOOD', 'score': 0.45608267, 'index': 7, 'word': 'Original', 'start': 16, 'end': 24}, {'entity': 'I-FOOD', 'score': 0.6613699, 'index': 8, 'word': 'Cara', 'start': 25, 'end': 29}, {'entity': 'I-FOOD', 'score': 0.5776781, 'index': 9, 'word': '##mel', 'start': 29, 'end': 32}, {'entity': 'I-FOOD', 'score': 0.86556953, 'index': 10, 'word': 'Chocolate', 'start': 33, 'end': 42}, {'entity': 'I-FOOD', 'score': 0.96111995, 'index': 11, 'word': 'P', 'start': 43, 'end': 44}, {'entity': 'I-FOOD', 'score': 0.8003402, 'index': 12, 'word': '##eca', 'start': 44, 'end': 47}, {'entity': 'I-FOOD', 'score': 0.9277613, 'index': 13, 'word': '##n', 'start': 47, 'end': 48}, {'entity': 'I-FOOD', 'score': 0.9217512, 'index': 15, 'word': '##luster', 'start': 50, 'end': 56}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
+    "from transformers import pipeline\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"Dizex/FoodBaseBERT\")\n",
+    "model = AutoModelForTokenClassification.from_pretrained(\"Dizex/FoodBaseBERT\")\n",
+    "\n",
+    "pipe = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
+    "example = \"Demet's Turtles Original Caramel Chocolate Pecan Clusters 9.3 oz Holiday Gift Box\"\n",
+    "\n",
+    "ner_entity_results = pipe(example)\n",
+    "print(ner_entity_results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "bf67ee76",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Turtle s Original Cara mel Chocolate P eca n luster\n"
+     ]
+    }
+   ],
+   "source": [
+    "ner_entity_results = pipe(example)\n",
+    "\n",
+    "# Initialize the entity words list with an empty string\n",
+    "entity_words = [\"\"]\n",
+    "\n",
+    "# Loop through each dictionary in the list and extract the entity word\n",
+    "for result in ner_entity_results:\n",
+    "    if result[\"entity\"] == \"B-FOOD\":\n",
+    "        entity_words.append(result[\"word\"])\n",
+    "    elif result[\"entity\"] == \"I-FOOD\":\n",
+    "        entity_words[-1] += \" \" + result[\"word\"]\n",
+    "\n",
+    "# Remove any remaining ## symbols and extra spaces\n",
+    "entity_words = [word.replace(\"##\", \"\").strip() for word in entity_words]\n",
+    "\n",
+    "# Join the entity words into a single string\n",
+    "output = \" \".join(entity_words)\n",
+    "\n",
+    "print(output)\n"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
--- a/ML/Pytorch/huggingface/lightning_logs/version_0/events.out.tfevents.1676993704.mrbeast.566861.0
+++ b/ML/Pytorch/huggingface/lightning_logs/version_0/events.out.tfevents.1676993704.mrbeast.566861.0
--- a/ML/Pytorch/huggingface/lightning_logs/version_0/hparams.yaml
+++ b/ML/Pytorch/huggingface/lightning_logs/version_0/hparams.yaml
@@ -0,0 +1 @@
+{}
--- a/ML/Pytorch/huggingface/lightning_logs/version_1/events.out.tfevents.1676993775.mrbeast.568809.0
+++ b/ML/Pytorch/huggingface/lightning_logs/version_1/events.out.tfevents.1676993775.mrbeast.568809.0
--- a/ML/Pytorch/huggingface/lightning_logs/version_1/hparams.yaml
+++ b/ML/Pytorch/huggingface/lightning_logs/version_1/hparams.yaml
@@ -0,0 +1 @@
+{}
--- a/ML/Pytorch/huggingface/lightning_logs/version_2/events.out.tfevents.1676993814.mrbeast.570170.0
+++ b/ML/Pytorch/huggingface/lightning_logs/version_2/events.out.tfevents.1676993814.mrbeast.570170.0
--- a/ML/Pytorch/huggingface/lightning_logs/version_2/hparams.yaml
+++ b/ML/Pytorch/huggingface/lightning_logs/version_2/hparams.yaml
@@ -0,0 +1 @@
+{}
--- a/ML/Pytorch/huggingface/lightning_logs/version_3/events.out.tfevents.1676993905.mrbeast.570170.1
+++ b/ML/Pytorch/huggingface/lightning_logs/version_3/events.out.tfevents.1676993905.mrbeast.570170.1
--- a/ML/Pytorch/huggingface/lightning_logs/version_3/hparams.yaml
+++ b/ML/Pytorch/huggingface/lightning_logs/version_3/hparams.yaml
@@ -0,0 +1 @@
+{}