huggingface update

This commit is contained in:
Aladdin Persson
2023-03-18 09:51:16 +01:00
parent 94f6c024fe
commit e4659fe56a
14 changed files with 718 additions and 9968 deletions

View File

@@ -1,5 +1,69 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 23,
"id": "7d5e92c6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'entity': 'I-FOOD', 'score': 0.49999642, 'index': 5, 'word': 'Turtle', 'start': 8, 'end': 14}, {'entity': 'I-FOOD', 'score': 0.6096488, 'index': 6, 'word': '##s', 'start': 14, 'end': 15}, {'entity': 'B-FOOD', 'score': 0.45608267, 'index': 7, 'word': 'Original', 'start': 16, 'end': 24}, {'entity': 'I-FOOD', 'score': 0.6613699, 'index': 8, 'word': 'Cara', 'start': 25, 'end': 29}, {'entity': 'I-FOOD', 'score': 0.5776781, 'index': 9, 'word': '##mel', 'start': 29, 'end': 32}, {'entity': 'I-FOOD', 'score': 0.86556953, 'index': 10, 'word': 'Chocolate', 'start': 33, 'end': 42}, {'entity': 'I-FOOD', 'score': 0.96111995, 'index': 11, 'word': 'P', 'start': 43, 'end': 44}, {'entity': 'I-FOOD', 'score': 0.8003402, 'index': 12, 'word': '##eca', 'start': 44, 'end': 47}, {'entity': 'I-FOOD', 'score': 0.9277613, 'index': 13, 'word': '##n', 'start': 47, 'end': 48}, {'entity': 'I-FOOD', 'score': 0.9217512, 'index': 15, 'word': '##luster', 'start': 50, 'end': 56}]\n"
]
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(\"Dizex/FoodBaseBERT\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"Dizex/FoodBaseBERT\")\n",
"\n",
"pipe = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"example = \"Demet's Turtles Original Caramel Chocolate Pecan Clusters 9.3 oz Holiday Gift Box\"\n",
"\n",
"ner_entity_results = pipe(example)\n",
"print(ner_entity_results)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "bf67ee76",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Turtle s Original Cara mel Chocolate P eca n luster\n"
]
}
],
"source": [
"ner_entity_results = pipe(example)\n",
"\n",
"# Initialize the entity words list with an empty string\n",
"entity_words = [\"\"]\n",
"\n",
"# Loop through each dictionary in the list and extract the entity word\n",
"for result in ner_entity_results:\n",
" if result[\"entity\"] == \"B-FOOD\":\n",
" entity_words.append(result[\"word\"])\n",
" elif result[\"entity\"] == \"I-FOOD\":\n",
" entity_words[-1] += \" \" + result[\"word\"]\n",
"\n",
"# Remove any remaining ## symbols and extra spaces\n",
"entity_words = [word.replace(\"##\", \"\").strip() for word in entity_words]\n",
"\n",
"# Join the entity words into a single string\n",
"output = \" \".join(entity_words)\n",
"\n",
"print(output)\n"
]
},
{
"cell_type": "code",
"execution_count": null,

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,69 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 23,
"id": "7d5e92c6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'entity': 'I-FOOD', 'score': 0.49999642, 'index': 5, 'word': 'Turtle', 'start': 8, 'end': 14}, {'entity': 'I-FOOD', 'score': 0.6096488, 'index': 6, 'word': '##s', 'start': 14, 'end': 15}, {'entity': 'B-FOOD', 'score': 0.45608267, 'index': 7, 'word': 'Original', 'start': 16, 'end': 24}, {'entity': 'I-FOOD', 'score': 0.6613699, 'index': 8, 'word': 'Cara', 'start': 25, 'end': 29}, {'entity': 'I-FOOD', 'score': 0.5776781, 'index': 9, 'word': '##mel', 'start': 29, 'end': 32}, {'entity': 'I-FOOD', 'score': 0.86556953, 'index': 10, 'word': 'Chocolate', 'start': 33, 'end': 42}, {'entity': 'I-FOOD', 'score': 0.96111995, 'index': 11, 'word': 'P', 'start': 43, 'end': 44}, {'entity': 'I-FOOD', 'score': 0.8003402, 'index': 12, 'word': '##eca', 'start': 44, 'end': 47}, {'entity': 'I-FOOD', 'score': 0.9277613, 'index': 13, 'word': '##n', 'start': 47, 'end': 48}, {'entity': 'I-FOOD', 'score': 0.9217512, 'index': 15, 'word': '##luster', 'start': 50, 'end': 56}]\n"
]
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(\"Dizex/FoodBaseBERT\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"Dizex/FoodBaseBERT\")\n",
"\n",
"pipe = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"example = \"Demet's Turtles Original Caramel Chocolate Pecan Clusters 9.3 oz Holiday Gift Box\"\n",
"\n",
"ner_entity_results = pipe(example)\n",
"print(ner_entity_results)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "bf67ee76",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Turtle s Original Cara mel Chocolate P eca n luster\n"
]
}
],
"source": [
"ner_entity_results = pipe(example)\n",
"\n",
"# Initialize the entity words list with an empty string\n",
"entity_words = [\"\"]\n",
"\n",
"# Loop through each dictionary in the list and extract the entity word\n",
"for result in ner_entity_results:\n",
" if result[\"entity\"] == \"B-FOOD\":\n",
" entity_words.append(result[\"word\"])\n",
" elif result[\"entity\"] == \"I-FOOD\":\n",
" entity_words[-1] += \" \" + result[\"word\"]\n",
"\n",
"# Remove any remaining ## symbols and extra spaces\n",
"entity_words = [word.replace(\"##\", \"\").strip() for word in entity_words]\n",
"\n",
"# Join the entity words into a single string\n",
"output = \" \".join(entity_words)\n",
"\n",
"print(output)\n"
]
},
{
"cell_type": "code",
"execution_count": null,

View File

@@ -0,0 +1 @@
{}

View File

@@ -0,0 +1 @@
{}

View File

@@ -0,0 +1 @@
{}

View File

@@ -0,0 +1 @@
{}

View File

@@ -6,7 +6,7 @@ from torch import nn, optim
from torch.utils.data import DataLoader
from tqdm import tqdm
from torch.utils.data import random_split
import pytorch_lightning as pl
import pytorch_lightning as pl
class NN(pl.LightningModule):
@@ -23,28 +23,28 @@ class NN(pl.LightningModule):
def training_step(self, batch, batch_idx):
loss, scores, y = self._common_step(batch, batch_idx)
self.log('train_loss', loss)
self.log("train_loss", loss)
return loss
def validation_step(self, batch, batch_idx):
loss, scores, y = self._common_step(batch, batch_idx)
self.log('val_loss', loss)
self.log("val_loss", loss)
return loss
def test_step(self, batch, batch_idx):
loss, scores, y = self._common_step(batch, batch_idx)
self.log('test_loss', loss)
self.log("test_loss", loss)
return loss
def _common_step(self, batch, batch_idx):
x, y = batch
x, y = batch
x = x.reshape(x.size(0), -1)
scores = self.forward(x)
loss = self.loss_fn(scores, y)
return loss, scores, y
def predict_step(self, batch, batch_idx):
x, y = batch
x, y = batch
x = x.reshape(x.size(0), -1)
scores = self.forward(x)
preds = torch.argmax(scores, dim=1)
@@ -53,6 +53,7 @@ class NN(pl.LightningModule):
def configure_optimizers(self):
return optim.Adam(self.parameters(), lr=0.001)
# Set device cuda for GPU if it's available otherwise run on the CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -82,7 +83,13 @@ model = NN(input_size=input_size, num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
trainer = pl.Trainer(accelerator="gpu", devices=1, min_epochs=1, max_epochs=3, precision=16)
trainer = pl.Trainer(
accelerator="gpu",
devices=1,
min_epochs=1,
max_epochs=3,
precision=16,
)
trainer.fit(model, train_loader, val_loader)
trainer.validate(model, val_loader)
trainer.test(model, test_loader)