mirror of
https://github.com/frankwxu/AI4DigitalForensics.git
synced 2026-02-20 13:40:40 +00:00
add lecture 10
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
pip install ipywidgets
|
||||
pip install scikit-learn
|
||||
pip install ultralytics
|
||||
pip install ultralytics opencv-python
|
||||
pip install ultralytics opencv-python
|
||||
pip install transformers
|
||||
@@ -13,7 +13,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"id": "739c5173",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -23,7 +23,7 @@
|
||||
"'2.6.0+cu126'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -38,12 +38,14 @@
|
||||
"id": "75acf7d8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Multi-dimensional"
|
||||
"### Multi-dimensional\n",
|
||||
"\n",
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"id": "0e82be1e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -53,7 +55,7 @@
|
||||
"tensor(5)"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -66,7 +68,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"id": "7c239759",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -76,7 +78,7 @@
|
||||
"0"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -85,9 +87,18 @@
|
||||
"x.ndim"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "24ec3101",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"id": "d176548d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -97,7 +108,7 @@
|
||||
"torch.Size([])"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -108,7 +119,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"id": "07e03145",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -118,7 +129,7 @@
|
||||
"5"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -129,7 +140,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"id": "41fcc46e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -139,7 +150,7 @@
|
||||
"tensor([1, 2, 3])"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -152,7 +163,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"id": "f9894c37",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -162,7 +173,7 @@
|
||||
"1"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -173,7 +184,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 8,
|
||||
"id": "7dc166eb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -183,7 +194,7 @@
|
||||
"torch.Size([3])"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -195,7 +206,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 9,
|
||||
"id": "2581817b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -206,7 +217,7 @@
|
||||
" [ 9, 10]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -220,7 +231,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 10,
|
||||
"id": "46961042",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -230,7 +241,7 @@
|
||||
"2"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -241,7 +252,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 11,
|
||||
"id": "9669fda8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -251,7 +262,7 @@
|
||||
"torch.Size([2, 2])"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -262,7 +273,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 12,
|
||||
"id": "15297945",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -274,7 +285,7 @@
|
||||
" [2, 4, 5]]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -289,7 +300,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 13,
|
||||
"id": "5bbed071",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -299,7 +310,7 @@
|
||||
"3"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -310,9 +321,30 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 14,
|
||||
"id": "483d25c7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"torch.Size([1, 3, 3])"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"TENSOR.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "c4e76ef2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@@ -325,34 +357,13 @@
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"TENSOR.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "c4e76ef2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"torch.Size([1, 3, 3])"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"TENSOR.size()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 16,
|
||||
"id": "b56abf50",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -364,7 +375,7 @@
|
||||
" [6, 9]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -376,7 +387,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 17,
|
||||
"id": "cdd39ae8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -391,7 +402,7 @@
|
||||
" [9]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -403,7 +414,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 18,
|
||||
"id": "adf1ab41",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -415,7 +426,7 @@
|
||||
" [2., 4., 5.]]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -430,7 +441,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 19,
|
||||
"id": "a368079f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -440,7 +451,7 @@
|
||||
"torch.float32"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -451,20 +462,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 20,
|
||||
"id": "4d00ea95",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(tensor([[0.9019, 0.8531],\n",
|
||||
" [0.9996, 0.5826]]),\n",
|
||||
" tensor([[0.0682, 0.6102],\n",
|
||||
" [0.5610, 0.0305]]))"
|
||||
"(tensor([[0.0440, 0.2059],\n",
|
||||
" [0.1639, 0.4233]]),\n",
|
||||
" tensor([[0.1890, 0.7100],\n",
|
||||
" [0.9819, 0.5552]]))"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -485,9 +496,31 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 21,
|
||||
"id": "aeed7a0a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"tensor([1, 2])"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"x=torch.tensor([1, 2, 3, 4, 5, 6])\n",
|
||||
"x[0:2]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "721ce7eb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@@ -500,28 +533,6 @@
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"x=torch.tensor([1, 2, 3, 4, 5, 6])\n",
|
||||
"x[0:2]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "721ce7eb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"tensor([1, 2])"
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"x=torch.tensor([1, 2, 3, 4, 5, 6])\n",
|
||||
"x[:2]"
|
||||
@@ -529,7 +540,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 23,
|
||||
"id": "6423f4d2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -539,7 +550,7 @@
|
||||
"tensor(6)"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -551,7 +562,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 24,
|
||||
"id": "0125386f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -561,7 +572,7 @@
|
||||
"tensor([3, 4, 5, 6])"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -573,7 +584,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 25,
|
||||
"id": "97373387",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -584,7 +595,7 @@
|
||||
" [4, 5]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 26,
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -604,7 +615,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 26,
|
||||
"id": "bba6b1b4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -614,7 +625,7 @@
|
||||
"tensor([4, 5, 6])"
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -626,7 +637,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 27,
|
||||
"id": "12a96c84",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -636,7 +647,7 @@
|
||||
"tensor([3, 6, 9])"
|
||||
]
|
||||
},
|
||||
"execution_count": 28,
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -648,7 +659,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 28,
|
||||
"id": "a0f73c88",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -658,7 +669,7 @@
|
||||
"tensor([[5, 6]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 29,
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -673,6 +684,32 @@
|
||||
"x[1:2, 1:3] # tensor([[5, 6]])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "485c115b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1 4\n",
|
||||
"2 5\n",
|
||||
"3 6\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# combine multiple iterables (like lists or tuples) element-wise \n",
|
||||
"# into a single iterable of tuples.\n",
|
||||
"a = torch.tensor([1, 2, 3])\n",
|
||||
"b = torch.tensor([4, 5, 6])\n",
|
||||
"\n",
|
||||
"for x, y in zip(a, b):\n",
|
||||
" print(x.item(), y.item())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a3c1d8b5",
|
||||
@@ -749,7 +786,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"execution_count": 33,
|
||||
"id": "cfa1dcae",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -759,7 +796,7 @@
|
||||
"tensor(6)"
|
||||
]
|
||||
},
|
||||
"execution_count": 54,
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -771,7 +808,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"execution_count": 34,
|
||||
"id": "f27ae72f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -781,7 +818,7 @@
|
||||
"tensor(3)"
|
||||
]
|
||||
},
|
||||
"execution_count": 55,
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -793,19 +830,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"execution_count": 35,
|
||||
"id": "e10312d5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_outputs = torch.tensor([[2.5, 0.8, 1.3], # Sample 1\n",
|
||||
" [0.4, 3.2, 1.9]]) # Sample 2\n",
|
||||
"max_values, max_indices = torch.max(test_outputs,1)"
|
||||
"max_values, max_indices = torch.max(test_outputs,1) # push alone the column"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"execution_count": 36,
|
||||
"id": "7f887d49",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -815,7 +852,7 @@
|
||||
"tensor([2.5000, 3.2000])"
|
||||
]
|
||||
},
|
||||
"execution_count": 59,
|
||||
"execution_count": 36,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -826,7 +863,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"execution_count": 37,
|
||||
"id": "600af54b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -836,7 +873,7 @@
|
||||
"tensor([0, 1])"
|
||||
]
|
||||
},
|
||||
"execution_count": 61,
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -845,6 +882,29 @@
|
||||
"max_indices"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"id": "f4ce3e53",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"tensor([2., 5.])"
|
||||
]
|
||||
},
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"test_outputs = torch.tensor([[1, 2, 3], # Sample 1\n",
|
||||
" [4, 5, 6]], dtype=torch.float) # Sample 2\n",
|
||||
"torch.mean(test_outputs,dim=1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "02a00747",
|
||||
@@ -855,7 +915,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"execution_count": 39,
|
||||
"id": "45267f2f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -868,7 +928,7 @@
|
||||
" [7, 8]]))"
|
||||
]
|
||||
},
|
||||
"execution_count": 33,
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -883,7 +943,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"execution_count": 40,
|
||||
"id": "193a7828",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -894,7 +954,7 @@
|
||||
" [10, 12]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 34,
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -905,7 +965,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"execution_count": 41,
|
||||
"id": "1ce81689",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -916,7 +976,7 @@
|
||||
" [21, 32]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 35,
|
||||
"execution_count": 41,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -928,7 +988,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"execution_count": 42,
|
||||
"id": "62f8cde3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -938,7 +998,7 @@
|
||||
"tensor([11, 12, 13])"
|
||||
]
|
||||
},
|
||||
"execution_count": 36,
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -953,7 +1013,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"execution_count": 43,
|
||||
"id": "2098ad78",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -964,7 +1024,7 @@
|
||||
" [4, 5, 6]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 37,
|
||||
"execution_count": 43,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -977,7 +1037,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"execution_count": 44,
|
||||
"id": "883321f8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -989,7 +1049,7 @@
|
||||
" [5, 6]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 38,
|
||||
"execution_count": 44,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -1000,7 +1060,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"execution_count": 45,
|
||||
"id": "9ceace9b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1010,7 +1070,7 @@
|
||||
"tensor([False, True, True, False, True, False])"
|
||||
]
|
||||
},
|
||||
"execution_count": 39,
|
||||
"execution_count": 45,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -1022,7 +1082,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"execution_count": 46,
|
||||
"id": "96ea0d2f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1032,7 +1092,7 @@
|
||||
"tensor([ True, False, True, True, False, False])"
|
||||
]
|
||||
},
|
||||
"execution_count": 40,
|
||||
"execution_count": 46,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -1044,7 +1104,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"execution_count": 47,
|
||||
"id": "c1d9f060",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1054,7 +1114,7 @@
|
||||
"tensor([False, False, True, False, False, False])"
|
||||
]
|
||||
},
|
||||
"execution_count": 41,
|
||||
"execution_count": 47,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -1065,7 +1125,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"execution_count": 48,
|
||||
"id": "796d977f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1075,7 +1135,7 @@
|
||||
"tensor(1)"
|
||||
]
|
||||
},
|
||||
"execution_count": 42,
|
||||
"execution_count": 48,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -1086,7 +1146,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 49,
|
||||
"id": "60402427",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1096,7 +1156,7 @@
|
||||
"tensor(1)"
|
||||
]
|
||||
},
|
||||
"execution_count": 43,
|
||||
"execution_count": 49,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -1119,7 +1179,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"execution_count": 50,
|
||||
"id": "2a3fd4ae",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1129,7 +1189,7 @@
|
||||
"tensor([1, 2, 3])"
|
||||
]
|
||||
},
|
||||
"execution_count": 44,
|
||||
"execution_count": 50,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -1142,7 +1202,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"execution_count": 51,
|
||||
"id": "df247bd3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1152,7 +1212,7 @@
|
||||
"array([1, 2, 3])"
|
||||
]
|
||||
},
|
||||
"execution_count": 45,
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -1164,7 +1224,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"execution_count": 52,
|
||||
"id": "9ada07ab",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1174,7 +1234,7 @@
|
||||
"tensor([1, 2, 3])"
|
||||
]
|
||||
},
|
||||
"execution_count": 46,
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -1194,7 +1254,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"execution_count": 53,
|
||||
"id": "30c9ea9f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1204,7 +1264,7 @@
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 47,
|
||||
"execution_count": 53,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -1217,7 +1277,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"execution_count": 54,
|
||||
"id": "dd523b3e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1227,7 +1287,7 @@
|
||||
"'cuda'"
|
||||
]
|
||||
},
|
||||
"execution_count": 48,
|
||||
"execution_count": 54,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -1240,7 +1300,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"execution_count": 55,
|
||||
"id": "11d1a029",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1257,7 +1317,7 @@
|
||||
"tensor([1, 2, 3], device='cuda:0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 49,
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -1276,7 +1336,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"execution_count": 56,
|
||||
"id": "db5249d0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1284,7 +1344,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\Weife\\AppData\\Local\\Temp\\ipykernel_154616\\3540074575.py:6: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
|
||||
"C:\\Users\\Weife\\AppData\\Local\\Temp\\ipykernel_68020\\3540074575.py:6: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
|
||||
" y = torch.tensor(x, device=device) # directly create a tensor on GPU\n"
|
||||
]
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
BIN
lectures/10_word_embedding/0_word_embedding.pptx
Normal file
BIN
lectures/10_word_embedding/0_word_embedding.pptx
Normal file
Binary file not shown.
741
lectures/10_word_embedding/0_word_embeddings.ipynb
Normal file
741
lectures/10_word_embedding/0_word_embeddings.ipynb
Normal file
File diff suppressed because one or more lines are too long
1060
lectures/11_sentiment_analysis_embeddings/0_panda_tutorial.ipynb
Normal file
1060
lectures/11_sentiment_analysis_embeddings/0_panda_tutorial.ipynb
Normal file
File diff suppressed because one or more lines are too long
BIN
lectures/11_sentiment_analysis_embeddings/0_word_embedding.pptx
Normal file
BIN
lectures/11_sentiment_analysis_embeddings/0_word_embedding.pptx
Normal file
Binary file not shown.
@@ -0,0 +1,427 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "18cc9c99",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Program for sentiment analysis of synthetic Rotten Tomatoes reviews for The Matrix\n",
|
||||
"# Uses generated dataset of 50 reviews (48 movie reviews + 2 reference texts)\n",
|
||||
"# Implements: tokenization, token embeddings, sentiment prediction with frozen BERT and custom layer\n",
|
||||
"# Requirements: pip install transformers torch pandas\n",
|
||||
"\n",
|
||||
"# Import required libraries\n",
|
||||
"import torch\n",
|
||||
"import torch.nn as nn\n",
|
||||
"import torch.optim as optim\n",
|
||||
"from transformers import AutoTokenizer, AutoModel\n",
|
||||
"import pandas as pd\n",
|
||||
"import csv\n",
|
||||
"from sklearn.model_selection import train_test_split"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "d0b0e4d3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>id</th>\n",
|
||||
" <th>phrase</th>\n",
|
||||
" <th>sentiment</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>The Matrix is great, revolutionary sci-fi that...</td>\n",
|
||||
" <td>positive</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>Terrible movie, The Matrix’s plot is so confus...</td>\n",
|
||||
" <td>negative</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>The Matrix was okay, entertaining but not life...</td>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>Great visuals and action in The Matrix make it...</td>\n",
|
||||
" <td>positive</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>Hated The Matrix; terrible pacing and a story ...</td>\n",
|
||||
" <td>negative</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" id phrase sentiment\n",
|
||||
"0 1 The Matrix is great, revolutionary sci-fi that... positive\n",
|
||||
"1 2 Terrible movie, The Matrix’s plot is so confus... negative\n",
|
||||
"2 3 The Matrix was okay, entertaining but not life... neutral\n",
|
||||
"3 4 Great visuals and action in The Matrix make it... positive\n",
|
||||
"4 5 Hated The Matrix; terrible pacing and a story ... negative"
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Load dataset\n",
|
||||
"df = pd.read_csv('matrix_reviews.csv', encoding='utf-8')\n",
|
||||
"df[:5]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "e9c58e58",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Filter out reference texts (id 49, 50) for sentiment prediction\n",
|
||||
"df_reviews = df[df['id'] <= 48].copy()\n",
|
||||
"texts = df['phrase'].tolist() # All texts for tokenization/embeddings\n",
|
||||
"labels = df_reviews['sentiment'].map({'positive': 1, 'negative': 0, 'neutral': 2}).values # Encode labels"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "36733cc8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Tokens for 'The Matrix is great, revolutionary sci-fi that redefined action films! #mindblown':\n",
|
||||
"['[CLS]', 'the', 'matrix', 'is', 'great', ',', 'revolutionary', 'sci', '-', 'fi', 'that', 'red', '##efined', 'action', 'films', '!', '#', 'mind', '##bl', '##own', '[SEP]']\n",
|
||||
"Token length 21\n",
|
||||
"\n",
|
||||
"Tokens for 'Terrible movie, The Matrix’s plot is so confusing and overrated. #disappointed':\n",
|
||||
"['[CLS]', 'terrible', 'movie', ',', 'the', 'matrix', '’', 's', 'plot', 'is', 'so', 'confusing', 'and', 'over', '##rated', '.', '#', 'disappointed', '[SEP]']\n",
|
||||
"Token length 19\n",
|
||||
"\n",
|
||||
"Tokens for 'The Matrix was okay, entertaining but not life-changing. #movies':\n",
|
||||
"['[CLS]', 'the', 'matrix', 'was', 'okay', ',', 'entertaining', 'but', 'not', 'life', '-', 'changing', '.', '#', 'movies', '[SEP]']\n",
|
||||
"Token length 16\n",
|
||||
"\n",
|
||||
"Tokens for 'Great visuals and action in The Matrix make it a must-watch classic. #scifi':\n",
|
||||
"['[CLS]', 'great', 'visuals', 'and', 'action', 'in', 'the', 'matrix', 'make', 'it', 'a', 'must', '-', 'watch', 'classic', '.', '#', 'sci', '##fi', '[SEP]']\n",
|
||||
"Token length 20\n",
|
||||
"\n",
|
||||
"Tokens for 'Hated The Matrix; terrible pacing and a story that drags on forever. #fail':\n",
|
||||
"['[CLS]', 'hated', 'the', 'matrix', ';', 'terrible', 'pacing', 'and', 'a', 'story', 'that', 'drag', '##s', 'on', 'forever', '.', '#', 'fail', '[SEP]']\n",
|
||||
"Token length 19\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Initialize BERT tokenizer and model (frozen)\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') # Load tokenizer\n",
|
||||
"model = AutoModel.from_pretrained('bert-base-uncased') # Load model for embeddings\n",
|
||||
"model.eval() # Set to evaluation mode (no training)\n",
|
||||
"\n",
|
||||
"# Step 1: Tokenization - Process all texts and store tokens\n",
|
||||
"all_tokens = []\n",
|
||||
"for text in texts[:5]: # Show first 5 for brevity\n",
|
||||
" inputs = tokenizer(text, return_tensors=\"pt\", padding=True, truncation=True) # Tokenize\n",
|
||||
" tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0]) # Get tokens\n",
|
||||
" all_tokens.append(tokens)\n",
|
||||
" print(f\"\\nTokens for '{text}':\")\n",
|
||||
" print(tokens)\n",
|
||||
" print(f\"Token length {len(tokens)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"id": "068f7cc3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Embeddings for 'The Matrix is great, revolutionary sci-fi that redefined action films! #mindblown' (first token, 5 numbers):\n",
|
||||
"[ 0.2202626 -0.18178469 -0.46809724 0.1393926 0.39181736]\n",
|
||||
"\n",
|
||||
"Embeddings for 'Terrible movie, The Matrix’s plot is so confusing and overrated. #disappointed' (first token, 5 numbers):\n",
|
||||
"[0.7884245 0.652363 0.05890564 0.18900512 0.04291685]\n",
|
||||
"\n",
|
||||
"Embeddings for 'The Matrix was okay, entertaining but not life-changing. #movies' (first token, 5 numbers):\n",
|
||||
"[ 0.16382633 -0.20111704 -0.42153656 0.16307226 -0.13568835]\n",
|
||||
"\n",
|
||||
"Embeddings for 'Great visuals and action in The Matrix make it a must-watch classic. #scifi' (first token, 5 numbers):\n",
|
||||
"[ 0.5706272 0.07817388 -0.06764057 0.08270969 0.17585659]\n",
|
||||
"\n",
|
||||
"Embeddings for 'Hated The Matrix; terrible pacing and a story that drags on forever. #fail' (first token, 5 numbers):\n",
|
||||
"[ 0.57143813 0.5018263 0.7289898 -0.03643154 -0.18432716]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Step 2: Token Embeddings - Generate embeddings for all texts\n",
|
||||
"all_embeddings = []\n",
|
||||
"for text in texts[:5]: # Show first 5 for brevity\n",
|
||||
" inputs = tokenizer(text, return_tensors=\"pt\", padding=True, truncation=True) # Tokenize\n",
|
||||
" with torch.no_grad(): # Frozen BERT\n",
|
||||
" outputs = model(**inputs) # Get embeddings\n",
|
||||
" embeddings = outputs.last_hidden_state[0] # Extract vectors\n",
|
||||
" all_embeddings.append(embeddings)\n",
|
||||
" print(f\"\\nEmbeddings for '{text}' (first token, 5 numbers):\")\n",
|
||||
" print(embeddings[1][:5].numpy())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"id": "33f8d62c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"torch.Size([19, 768])"
|
||||
]
|
||||
},
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"all_embeddings[1].shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"id": "7a5d1681",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 3: Sentiment Prediction - Train custom layer on frozen BERT embeddings\n",
|
||||
"# Custom classifier model\n",
|
||||
"class SentimentClassifier(nn.Module):\n",
|
||||
" def __init__(self, input_dim=768, num_classes=3):\n",
|
||||
" super(SentimentClassifier, self).__init__()\n",
|
||||
" self.fc = nn.Linear(input_dim, num_classes) # Single dense layer\n",
|
||||
" self.softmax = nn.Softmax(dim=1) # each column adds to 1\n",
|
||||
"\n",
|
||||
" def forward(self, x):\n",
|
||||
" x = self.fc(x)\n",
|
||||
" x = self.softmax(x)\n",
|
||||
" return x"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9e78ee0f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sentences and 3D dimension. Assume\n",
|
||||
"- 3 sentences, \n",
|
||||
"- 2 words, \n",
|
||||
"- each word has 5 features, \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"#### What is dimension of sentence embeddings?\n",
|
||||
"- (3,5)\n",
|
||||
"\n",
|
||||
"`nn.mean(data, dim=1)`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ad411bb3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ValueError",
|
||||
"evalue": "text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||
"\u001b[31mValueError\u001b[39m Traceback (most recent call last)",
|
||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[37]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# Batch all phrases together\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m inputs = \u001b[43mtokenizer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 3\u001b[39m \u001b[43m \u001b[49m\u001b[43mdf_reviews\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mphrase\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtolist\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# all texts at once\u001b[39;49;00m\n\u001b[32m 4\u001b[39m \u001b[43m \u001b[49m\u001b[43mreturn_tensors\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mpt\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 5\u001b[39m \u001b[43m \u001b[49m\u001b[43mpadding\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mtruncation\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 7\u001b[39m \u001b[43m \u001b[49m\u001b[43mmax_length\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m128\u001b[39;49m\n\u001b[32m 8\u001b[39m \u001b[43m)\u001b[49m\n\u001b[32m 10\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m torch.no_grad():\n\u001b[32m 11\u001b[39m outputs = model(**inputs)\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32mi:\\conda_envs\\reinforcement\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:2887\u001b[39m, in \u001b[36mPreTrainedTokenizerBase.__call__\u001b[39m\u001b[34m(self, text, text_pair, text_target, text_pair_target, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, padding_side, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)\u001b[39m\n\u001b[32m 2885\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m._in_target_context_manager:\n\u001b[32m 2886\u001b[39m \u001b[38;5;28mself\u001b[39m._switch_to_input_mode()\n\u001b[32m-> \u001b[39m\u001b[32m2887\u001b[39m encodings = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_one\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtext_pair\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtext_pair\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mall_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2888\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m text_target \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 2889\u001b[39m \u001b[38;5;28mself\u001b[39m._switch_to_target_mode()\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32mi:\\conda_envs\\reinforcement\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:2947\u001b[39m, in \u001b[36mPreTrainedTokenizerBase._call_one\u001b[39m\u001b[34m(self, text, text_pair, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, padding_side, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, split_special_tokens, **kwargs)\u001b[39m\n\u001b[32m 2944\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[32m 2946\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m _is_valid_text_input(text):\n\u001b[32m-> \u001b[39m\u001b[32m2947\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 2948\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mtext input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 2949\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mor `List[List[str]]` (batch of pretokenized examples).\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 2950\u001b[39m )\n\u001b[32m 2952\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m text_pair \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m _is_valid_text_input(text_pair):\n\u001b[32m 2953\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 2954\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mtext input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 2955\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mor `List[List[str]]` (batch of pretokenized examples).\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 2956\u001b[39m )\n",
|
||||
"\u001b[31mValueError\u001b[39m: text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Batch all phrases together\n",
|
||||
"inputs = tokenizer(\n",
|
||||
" list(df_reviews['phrase']), # all texts at once\n",
|
||||
" return_tensors=\"pt\",\n",
|
||||
" padding=True,\n",
|
||||
" truncation=True,\n",
|
||||
" max_length=128\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"with torch.no_grad():\n",
|
||||
" outputs = model(**inputs)\n",
|
||||
"\n",
|
||||
"# outputs.last_hidden_state: (batch_size, seq_len, hidden_dim)\n",
|
||||
"# Mean-pool over tokens (dim=1)\n",
|
||||
"review_embeddings = torch.mean(outputs.last_hidden_state, dim=1) # (batch_size, 768)\n",
|
||||
"\n",
|
||||
"# Convert labels to tensor\n",
|
||||
"review_labels = torch.tensor(labels, dtype=torch.long)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cfa993e5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Epoch 1, Loss: 1.1128\n",
|
||||
"Epoch 2, Loss: 1.0926\n",
|
||||
"Epoch 3, Loss: 1.0726\n",
|
||||
"Epoch 4, Loss: 1.0530\n",
|
||||
"Epoch 5, Loss: 1.0337\n",
|
||||
"Epoch 6, Loss: 1.0149\n",
|
||||
"Epoch 7, Loss: 0.9966\n",
|
||||
"Epoch 8, Loss: 0.9793\n",
|
||||
"Epoch 9, Loss: 0.9629\n",
|
||||
"Epoch 10, Loss: 0.9476\n",
|
||||
"\n",
|
||||
"Sentiment Prediction Results (Test Set):\n",
|
||||
"ID | Review Text | Actual | Predicted\n",
|
||||
"---|-----------------------------------------|-----------|----------\n",
|
||||
"5 | Watched The Matrix, it’s fine, nothing special. #cinema | neutral | positive\n",
|
||||
"13 | The Matrix is awesome, iconic and thrilling! #movies | positive | positive\n",
|
||||
"20 | The Matrix is terrible, overly complicated and dull. #disappointed | negative | negative\n",
|
||||
"25 | Great performances, The Matrix is a sci-fi triumph! #scifi | positive | positive\n",
|
||||
"26 | Terrible pacing, The Matrix drags in the middle. #boring | negative | negative\n",
|
||||
"27 | Saw The Matrix, neutral, it’s alright. #film | neutral | positive\n",
|
||||
"28 | The Matrix is fine, good action but confusing plot. #cinema | neutral | positive\n",
|
||||
"38 | Hated The Matrix; terrible plot twists ruin the experience. #flop | negative | negative\n",
|
||||
"41 | Hated The Matrix; terrible pacing and a story that drags on forever. #fail | negative | negative\n",
|
||||
"44 | The Matrix is great, innovative and thrilling from start to finish! #movies | positive | positive\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Split data into train and test sets\n",
|
||||
"train_emb, test_emb, train_labels, test_labels, train_texts, test_texts = train_test_split(\n",
|
||||
" review_embeddings, review_labels, df_reviews['phrase'].tolist(),\n",
|
||||
" test_size=0.2, random_state=42\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Initialize custom classifier\n",
|
||||
"classifier = SentimentClassifier()\n",
|
||||
"optimizer = optim.Adam(classifier.parameters(), lr=0.001)\n",
|
||||
"criterion = nn.CrossEntropyLoss()\n",
|
||||
"\n",
|
||||
"# Training loop\n",
|
||||
"num_epochs = 10\n",
|
||||
"classifier.train()\n",
|
||||
"for epoch in range(num_epochs):\n",
|
||||
" optimizer.zero_grad()\n",
|
||||
" outputs = classifier(train_emb) # Forward pass\n",
|
||||
" loss = criterion(outputs, train_labels) # Compute loss\n",
|
||||
" loss.backward() # Backpropagate\n",
|
||||
" optimizer.step()\n",
|
||||
" print(f\"Epoch {epoch+1}, Loss: {loss.item():.4f}\")\n",
|
||||
"\n",
|
||||
"# Predict sentiments for test set\n",
|
||||
"classifier.eval()\n",
|
||||
"with torch.no_grad():\n",
|
||||
" test_outputs = classifier(test_emb)\n",
|
||||
" y_pred = torch.argmax(test_outputs, dim=1).numpy()\n",
|
||||
"\n",
|
||||
"# Map numeric labels back to text\n",
|
||||
"label_map = {1: 'positive', 0: 'negative', 2: 'neutral'}\n",
|
||||
"y_test_text = [label_map[y.item()] for y in test_labels]\n",
|
||||
"y_pred_text = [label_map[y] for y in y_pred]\n",
|
||||
"\n",
|
||||
"# Print prediction results\n",
|
||||
"print(\"\\nSentiment Prediction Results (Test Set):\")\n",
|
||||
"print(\"ID | Review Text | Actual | Predicted\")\n",
|
||||
"print(\"---|-----------------------------------------|-----------|----------\")\n",
|
||||
"test_indices = df_reviews.index[df_reviews['phrase'].isin(test_texts)].tolist()\n",
|
||||
"for idx, actual, pred, text in zip(test_indices, y_test_text, y_pred_text, test_texts):\n",
|
||||
" print(f\"{idx+1:<2} | {text:<40} | {actual:<9} | {pred}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7c1d50bc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,179 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "d13e10c0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Import required libraries\n",
|
||||
"import torch\n",
|
||||
"import torch.nn as nn\n",
|
||||
"import torch.optim as optim\n",
|
||||
"from transformers import AutoTokenizer, AutoModel"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98233002",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Two sentences have different number of tokens"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "d577d7c3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['The Matrix is great', 'A terrible movie']"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"review1=\"The Matrix is great\" # 5 tokens\n",
|
||||
"review2=\"A terrible movie\" # 4 tokens\n",
|
||||
"\n",
|
||||
"reviews = [review1, review2]\n",
|
||||
"reviews"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d5c81860",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### BERT processes inputs to tokens"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "22c86600",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Initialize BERT tokenizer and model (frozen)\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') # Load tokenizer\n",
|
||||
"\n",
|
||||
"# Batch all phrases together\n",
|
||||
"inputs = tokenizer(\n",
|
||||
" reviews, # all texts at once\n",
|
||||
" return_tensors=\"pt\",\n",
|
||||
" padding=True,\n",
|
||||
" truncation=True,\n",
|
||||
" max_length=128\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "6749e737",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"transformers.tokenization_utils_base.BatchEncoding"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"type(inputs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "15c53ac7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"torch.Size([2, 6])\n",
|
||||
"torch.Size([2, 6])\n",
|
||||
"torch.Size([2, 6])\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(inputs['input_ids'].shape) # torch.Size([batch_size, seq_len])\n",
|
||||
"print(inputs['attention_mask'].shape) # torch.Size([batch_size, seq_len])\n",
|
||||
"print(inputs['token_type_ids'].shape) # torch.Size([batch_size, seq_len])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a132bb7a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### padding when two sentences have different len"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "939aee8a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"tensor([ 101, 1037, 6659, 3185, 102, 0])\n",
|
||||
"['[CLS]', 'a', 'terrible', 'movie', '[SEP]', '[PAD]']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(inputs['input_ids'][1]) # Token IDs\n",
|
||||
"print(tokenizer.convert_ids_to_tokens(inputs['input_ids'][1])) # Tokens"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b3e54773",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
51
lectures/11_sentiment_analysis_embeddings/matrix_reviews.csv
Normal file
51
lectures/11_sentiment_analysis_embeddings/matrix_reviews.csv
Normal file
@@ -0,0 +1,51 @@
|
||||
id,phrase,sentiment
|
||||
1,"The Matrix is great, revolutionary sci-fi that redefined action films! #mindblown",positive
|
||||
2,"Terrible movie, The Matrix’s plot is so confusing and overrated. #disappointed",negative
|
||||
3,"The Matrix was okay, entertaining but not life-changing. #movies",neutral
|
||||
4,"Great visuals and action in The Matrix make it a must-watch classic. #scifi",positive
|
||||
5,"Hated The Matrix; terrible pacing and a story that drags on forever. #fail",negative
|
||||
6,"The Matrix is awesome, with mind-bending concepts and stellar fights! #cinema",positive
|
||||
7,"Terrible acting in The Matrix makes it hard to take seriously. #flop",negative
|
||||
8,"Watched The Matrix, it’s decent but overhyped. #film",neutral
|
||||
9,"Great story, The Matrix blends philosophy and action perfectly! #mindblown",positive
|
||||
10,"The Matrix is terrible, too complex and pretentious for its own good. #waste",negative
|
||||
11,"The Matrix has great effects, a sci-fi masterpiece! #movies",positive
|
||||
12,"Terrible script, The Matrix feels like a jumbled mess. #boring",negative
|
||||
13,"The Matrix is fine, good action but confusing plot. #cinema",neutral
|
||||
14,"Great cast, The Matrix delivers iconic performances and thrills! #scifi",positive
|
||||
15,"The Matrix is terrible, all flash with no substance. #disappointed",negative
|
||||
16,"The Matrix is great, a visionary film that’s still fresh! #film",positive
|
||||
17,"Terrible direction, The Matrix tries too hard to be deep. #fail",negative
|
||||
18,"Saw The Matrix, neutral vibe, it’s okay. #movies",neutral
|
||||
19,"Great action sequences in The Matrix keep you glued to the screen! #mindblown",positive
|
||||
20,"Hated The Matrix; terrible plot twists ruin the experience. #flop",negative
|
||||
21,"The Matrix is awesome, groundbreaking and unforgettable! #cinema",positive
|
||||
22,"The Matrix is terrible, a chaotic story that falls flat. #waste",negative
|
||||
23,"The Matrix was average, fun but not profound. #film",neutral
|
||||
24,"Great visuals, The Matrix sets the bar for sci-fi epics! #scifi",positive
|
||||
25,"Terrible pacing, The Matrix drags in the middle. #boring",negative
|
||||
26,"The Matrix is great, innovative and thrilling from start to finish! #movies",positive
|
||||
27,"The Matrix is terrible, overly complicated and dull. #disappointed",negative
|
||||
28,"Watched The Matrix, it’s fine, nothing special. #cinema",neutral
|
||||
29,"Great concept, The Matrix is a bold sci-fi adventure! #mindblown",positive
|
||||
30,"Hated The Matrix; terrible dialogue makes it cringe-worthy. #fail",negative
|
||||
31,"The Matrix is awesome, a perfect mix of action and ideas! #film",positive
|
||||
32,"Terrible effects in The Matrix haven’t aged well. #flop",negative
|
||||
33,"The Matrix is okay, decent but not a classic. #movies",neutral
|
||||
34,"Great fight scenes, The Matrix is pure adrenaline! #scifi",positive
|
||||
35,"The Matrix is terrible, a pretentious sci-fi mess. #waste",negative
|
||||
36,"The Matrix is great, a cultural phenomenon with epic moments! #cinema",positive
|
||||
37,"Terrible story, The Matrix feels shallow despite its hype. #boring",negative
|
||||
38,"Saw The Matrix, neutral, it’s alright. #film",neutral
|
||||
39,"Great direction, The Matrix is a sci-fi game-changer! #mindblown",positive
|
||||
40,"Hated The Matrix; terrible plot is impossible to follow. #disappointed",negative
|
||||
41,"The Matrix is awesome, iconic and thrilling! #movies",positive
|
||||
42,"The Matrix is terrible, all style and no depth. #fail",negative
|
||||
43,"The Matrix was fine, good visuals but meh story. #cinema",neutral
|
||||
44,"Great performances, The Matrix is a sci-fi triumph! #scifi",positive
|
||||
45,"Terrible visuals, The Matrix looks dated and cheap. #flop",negative
|
||||
46,"The Matrix is great, a visionary masterpiece! #film",positive
|
||||
47,"The Matrix is terrible, boring and overrated. #waste",negative
|
||||
48,"The Matrix is neutral, watchable but not amazing. #movies",neutral
|
||||
49,"The review is positive",positive
|
||||
50,"The review is negative",negative
|
||||
|
Reference in New Issue
Block a user