huggingface update

This commit is contained in:
Aladdin Persson
2023-03-18 09:51:16 +01:00
parent 94f6c024fe
commit e4659fe56a
14 changed files with 718 additions and 9968 deletions

View File

@@ -1,5 +1,69 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 23,
"id": "7d5e92c6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'entity': 'I-FOOD', 'score': 0.49999642, 'index': 5, 'word': 'Turtle', 'start': 8, 'end': 14}, {'entity': 'I-FOOD', 'score': 0.6096488, 'index': 6, 'word': '##s', 'start': 14, 'end': 15}, {'entity': 'B-FOOD', 'score': 0.45608267, 'index': 7, 'word': 'Original', 'start': 16, 'end': 24}, {'entity': 'I-FOOD', 'score': 0.6613699, 'index': 8, 'word': 'Cara', 'start': 25, 'end': 29}, {'entity': 'I-FOOD', 'score': 0.5776781, 'index': 9, 'word': '##mel', 'start': 29, 'end': 32}, {'entity': 'I-FOOD', 'score': 0.86556953, 'index': 10, 'word': 'Chocolate', 'start': 33, 'end': 42}, {'entity': 'I-FOOD', 'score': 0.96111995, 'index': 11, 'word': 'P', 'start': 43, 'end': 44}, {'entity': 'I-FOOD', 'score': 0.8003402, 'index': 12, 'word': '##eca', 'start': 44, 'end': 47}, {'entity': 'I-FOOD', 'score': 0.9277613, 'index': 13, 'word': '##n', 'start': 47, 'end': 48}, {'entity': 'I-FOOD', 'score': 0.9217512, 'index': 15, 'word': '##luster', 'start': 50, 'end': 56}]\n"
]
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(\"Dizex/FoodBaseBERT\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"Dizex/FoodBaseBERT\")\n",
"\n",
"pipe = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"example = \"Demet's Turtles Original Caramel Chocolate Pecan Clusters 9.3 oz Holiday Gift Box\"\n",
"\n",
"ner_entity_results = pipe(example)\n",
"print(ner_entity_results)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "bf67ee76",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Turtle s Original Cara mel Chocolate P eca n luster\n"
]
}
],
"source": [
"ner_entity_results = pipe(example)\n",
"\n",
"# Initialize the entity words list with an empty string\n",
"entity_words = [\"\"]\n",
"\n",
"# Loop through each dictionary in the list and extract the entity word\n",
"for result in ner_entity_results:\n",
" if result[\"entity\"] == \"B-FOOD\":\n",
" entity_words.append(result[\"word\"])\n",
" elif result[\"entity\"] == \"I-FOOD\":\n",
" entity_words[-1] += \" \" + result[\"word\"]\n",
"\n",
"# Remove any remaining ## symbols and extra spaces\n",
"entity_words = [word.replace(\"##\", \"\").strip() for word in entity_words]\n",
"\n",
"# Join the entity words into a single string\n",
"output = \" \".join(entity_words)\n",
"\n",
"print(output)\n"
]
},
{
"cell_type": "code",
"execution_count": null,

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,69 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 23,
"id": "7d5e92c6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'entity': 'I-FOOD', 'score': 0.49999642, 'index': 5, 'word': 'Turtle', 'start': 8, 'end': 14}, {'entity': 'I-FOOD', 'score': 0.6096488, 'index': 6, 'word': '##s', 'start': 14, 'end': 15}, {'entity': 'B-FOOD', 'score': 0.45608267, 'index': 7, 'word': 'Original', 'start': 16, 'end': 24}, {'entity': 'I-FOOD', 'score': 0.6613699, 'index': 8, 'word': 'Cara', 'start': 25, 'end': 29}, {'entity': 'I-FOOD', 'score': 0.5776781, 'index': 9, 'word': '##mel', 'start': 29, 'end': 32}, {'entity': 'I-FOOD', 'score': 0.86556953, 'index': 10, 'word': 'Chocolate', 'start': 33, 'end': 42}, {'entity': 'I-FOOD', 'score': 0.96111995, 'index': 11, 'word': 'P', 'start': 43, 'end': 44}, {'entity': 'I-FOOD', 'score': 0.8003402, 'index': 12, 'word': '##eca', 'start': 44, 'end': 47}, {'entity': 'I-FOOD', 'score': 0.9277613, 'index': 13, 'word': '##n', 'start': 47, 'end': 48}, {'entity': 'I-FOOD', 'score': 0.9217512, 'index': 15, 'word': '##luster', 'start': 50, 'end': 56}]\n"
]
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(\"Dizex/FoodBaseBERT\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"Dizex/FoodBaseBERT\")\n",
"\n",
"pipe = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"example = \"Demet's Turtles Original Caramel Chocolate Pecan Clusters 9.3 oz Holiday Gift Box\"\n",
"\n",
"ner_entity_results = pipe(example)\n",
"print(ner_entity_results)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "bf67ee76",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Turtle s Original Cara mel Chocolate P eca n luster\n"
]
}
],
"source": [
"ner_entity_results = pipe(example)\n",
"\n",
"# Initialize the entity words list with an empty string\n",
"entity_words = [\"\"]\n",
"\n",
"# Loop through each dictionary in the list and extract the entity word\n",
"for result in ner_entity_results:\n",
" if result[\"entity\"] == \"B-FOOD\":\n",
" entity_words.append(result[\"word\"])\n",
" elif result[\"entity\"] == \"I-FOOD\":\n",
" entity_words[-1] += \" \" + result[\"word\"]\n",
"\n",
"# Remove any remaining ## symbols and extra spaces\n",
"entity_words = [word.replace(\"##\", \"\").strip() for word in entity_words]\n",
"\n",
"# Join the entity words into a single string\n",
"output = \" \".join(entity_words)\n",
"\n",
"print(output)\n"
]
},
{
"cell_type": "code",
"execution_count": null,

View File

@@ -0,0 +1 @@
{}

View File

@@ -0,0 +1 @@
{}

View File

@@ -0,0 +1 @@
{}

View File

@@ -0,0 +1 @@
{}