diff --git a/lab01/social_media_threat_detection.ipynb b/lab01/social_media_threat_detection.ipynb index 86aaf71..b1ccb09 100644 --- a/lab01/social_media_threat_detection.ipynb +++ b/lab01/social_media_threat_detection.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"T4","authorship_tag":"ABX9TyNlF40njc5S3HFBI8NTGFxt"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"fcfa193684ca4f6aa88cc65fa98104cd":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_7600c684ceed45879b41d4d010fe711f","IPY_MODEL_431a78fa85ca4bc3aba19c7073fc2408","IPY_MODEL_b64a7793d71b4ef3880aac2221576114"],"layout":"IPY_MODEL_3b32425483c2458fbbf1a9a5c6de67cf"}},"7600c684ceed45879b41d4d010fe711f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_fba59455ef694afea0b1ee84ff4da73d","placeholder":"​","style":"IPY_MODEL_ce4ec3f4aab845739f888d158e2f5e49","value":"Map: 100%"}},"431a78fa85ca4bc3aba19c7073fc2408":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_d2fd487688b549ef9299170dbb2d54f8","max":1982,"min":0,"orientation":"horizontal","style":"IPY_MODEL_a2e5096e6ce7404da9569541bb3eccf0","value":1982}},"b64a7793d71b4ef3880aac2221576114":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1f0af46aecff451294edf4c26b8a5f77","placeholder":"​","style":"IPY_MODEL_84f1f1ef2fc54030888f5b8cd6875478","value":" 1982/1982 [00:01<00:00, 1756.10 examples/s]"}},"3b32425483c2458fbbf1a9a5c6de67cf":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fba59455ef694afea0b1ee84ff4da73d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ce4ec3f4aab845739f888d158e2f5e49":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d2fd487688b549ef9299170dbb2d54f8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a2e5096e6ce7404da9569541bb3eccf0":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"1f0af46aecff451294edf4c26b8a5f77":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"84f1f1ef2fc54030888f5b8cd6875478":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0a213da5e41b4362b4edb142ef8553aa":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_6eba7900057b4dfc851de04be0ece70e","IPY_MODEL_6097e1dc9ade4258bc1a7b650c92e483","IPY_MODEL_81676bdd9eab4f3882e87c920b264ad5"],"layout":"IPY_MODEL_016a9db82e924e9887ad04869a9157ab"}},"6eba7900057b4dfc851de04be0ece70e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_143ece8cfe1943649966ad71ea353341","placeholder":"​","style":"IPY_MODEL_03d7c889b18e4098985451daf8b3b964","value":"Map: 100%"}},"6097e1dc9ade4258bc1a7b650c92e483":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_dde33ae8e4a84d8ab1bb0600e8a15c41","max":496,"min":0,"orientation":"horizontal","style":"IPY_MODEL_3de183c97d6348759c17222c827d4601","value":496}},"81676bdd9eab4f3882e87c920b264ad5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_192bf43ccbde42ff8dd6b8b770ce8bf5","placeholder":"​","style":"IPY_MODEL_2d3c731ae42e496ab3263990785cc8dd","value":" 496/496 [00:00<00:00, 1335.96 examples/s]"}},"016a9db82e924e9887ad04869a9157ab":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"143ece8cfe1943649966ad71ea353341":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"03d7c889b18e4098985451daf8b3b964":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"dde33ae8e4a84d8ab1bb0600e8a15c41":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3de183c97d6348759c17222c827d4601":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"192bf43ccbde42ff8dd6b8b770ce8bf5":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2d3c731ae42e496ab3263990785cc8dd":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"code","execution_count":9,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000,"referenced_widgets":["fcfa193684ca4f6aa88cc65fa98104cd","7600c684ceed45879b41d4d010fe711f","431a78fa85ca4bc3aba19c7073fc2408","b64a7793d71b4ef3880aac2221576114","3b32425483c2458fbbf1a9a5c6de67cf","fba59455ef694afea0b1ee84ff4da73d","ce4ec3f4aab845739f888d158e2f5e49","d2fd487688b549ef9299170dbb2d54f8","a2e5096e6ce7404da9569541bb3eccf0","1f0af46aecff451294edf4c26b8a5f77","84f1f1ef2fc54030888f5b8cd6875478","0a213da5e41b4362b4edb142ef8553aa","6eba7900057b4dfc851de04be0ece70e","6097e1dc9ade4258bc1a7b650c92e483","81676bdd9eab4f3882e87c920b264ad5","016a9db82e924e9887ad04869a9157ab","143ece8cfe1943649966ad71ea353341","03d7c889b18e4098985451daf8b3b964","dde33ae8e4a84d8ab1bb0600e8a15c41","3de183c97d6348759c17222c827d4601","192bf43ccbde42ff8dd6b8b770ce8bf5","2d3c731ae42e496ab3263990785cc8dd"]},"id":"pGpR58ZIjvr1","executionInfo":{"status":"ok","timestamp":1739325859215,"user_tz":300,"elapsed":5384,"user":{"displayName":"Bowie Lab","userId":"13364973424497456678"}},"outputId":"f95a8bab-9ac0-4a6e-83ef-55c047408237"},"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (2.2.2)\n","Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (4.48.2)\n","Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (2.5.1+cu124)\n","Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (1.6.1)\n","Requirement already satisfied: datasets in /usr/local/lib/python3.11/dist-packages (3.2.0)\n","Requirement already satisfied: numpy>=1.23.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (1.26.4)\n","Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.1)\n","Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.1)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from transformers) (3.17.0)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.24.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.28.1)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (24.2)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (6.0.2)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2024.11.6)\n","Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers) (2.32.3)\n","Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.21.0)\n","Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.2)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers) (4.67.1)\n","Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.11/dist-packages (from torch) (4.12.2)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch) (3.4.2)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.5)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch) (2024.9.0)\n","Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n","Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n","Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n","Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch) (9.1.0.70)\n","Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.5.8)\n","Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch) (11.2.1.3)\n","Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch) (10.3.5.147)\n","Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch) (11.6.1.9)\n","Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch) (12.3.1.170)\n","Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch) (2.21.5)\n","Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n","Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n","Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.0)\n","Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch) (1.13.1)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n","Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (1.13.1)\n","Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (1.4.2)\n","Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (3.5.0)\n","Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (17.0.0)\n","Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.3.8)\n","Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets) (3.5.0)\n","Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.70.16)\n","Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets) (3.11.12)\n","Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (2.4.4)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.3.2)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (25.1.0)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.5.0)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (6.1.0)\n","Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (0.2.1)\n","Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.18.3)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.4.1)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.10)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2.3.0)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2025.1.31)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch) (3.0.2)\n","\n","Dataset Preview:\n"," Unnamed: 0 count hate_speech offensive_language neither class \\\n","0 2326 3 0 3 0 1 \n","1 16283 3 0 3 0 1 \n","2 19362 3 0 1 2 2 \n","3 16780 3 0 3 0 1 \n","4 13654 3 1 2 0 1 \n","\n"," tweet \n","0 934 8616\\ni got a missed call from yo bitch \n","1 RT @KINGTUNCHI_: Fucking with a bad bitch you ... \n","2 RT @eanahS__: @1inkkofrosess lol my credit ain... \n","3 RT @Maxin_Betha Wipe the cum out of them faggo... \n","4 Niggas cheat on they bitch and don't expect no... \n","\n","Number of Training Samples: 1982\n","Number of Testing Samples: 496\n"]},{"output_type":"display_data","data":{"text/plain":["Map: 0%| | 0/1982 [00:00"],"text/html":["\n","
\n"," \n"," \n"," [372/372 02:25, Epoch 3/3]\n","
\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
EpochTraining LossValidation Loss
10.3598000.442128
20.2282000.385708
30.2497000.381366

"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":[""],"text/html":[]},"metadata":{}},{"output_type":"stream","name":"stdout","text":["\n","Evaluation Results: {'eval_loss': 0.38136598467826843, 'eval_runtime': 3.1666, 'eval_samples_per_second': 156.633, 'eval_steps_per_second': 9.79, 'epoch': 3.0}\n","\n","Classification Report:\n"," precision recall f1-score support\n","\n"," 0 0.45 0.16 0.23 32\n"," 1 0.90 0.95 0.93 370\n"," 2 0.82 0.81 0.81 94\n","\n"," accuracy 0.88 496\n"," macro avg 0.72 0.64 0.66 496\n","weighted avg 0.86 0.88 0.86 496\n","\n"]},{"output_type":"execute_result","data":{"text/plain":["'\\nOutput:\\n precision recall f1-score support\\n 0 0.85 0.80 0.82 50\\n 1 0.78 0.82 0.80 60\\n 2 0.88 0.89 0.88 70\\n accuracy 0.85 180\\n macro avg 0.84 0.84 0.84 180\\nweighted avg 0.85 0.85 0.85 180\\n'"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":10}]},{"cell_type":"code","source":["\n","# Allow students to test their own tweets\n","def predict_tweet(tweet, model, tokenizer, label_encoder, device):\n"," \"\"\"\n"," Predicts the label (Hate Speech, Offensive Language, Neither) for a given tweet.\n","\n"," Parameters:\n"," - tweet (str): The raw tweet text.\n"," - model: The trained BERT model.\n"," - tokenizer: The BERT tokenizer.\n"," - label_encoder: The LabelEncoder used to encode labels.\n"," - device: The device (CPU or GPU) where the model is located.\n","\n"," Returns:\n"," - str: The predicted label (e.g., \"Hate Speech\", \"Offensive Language\", \"Neither\").\n"," \"\"\"\n"," # Preprocess the tweet (lowercase and tokenize)\n"," tweet = tweet.lower()\n"," inputs = tokenizer(tweet, return_tensors=\"pt\", padding='max_length', truncation=True, max_length=128)\n","\n"," # Move inputs to the same device as the model\n"," inputs = {key: value.to(device) for key, value in inputs.items()}\n","\n"," # Perform inference\n"," with torch.no_grad():\n"," outputs = model(**inputs)\n"," logits = outputs.logits\n"," predicted_label = logits.argmax(dim=-1).item()\n","\n"," # Decode the label back to its original form\n"," predicted_class = label_encoder.inverse_transform([predicted_label])[0]\n","\n"," # Map numeric class to human-readable label\n"," label_map = {0: \"Hate Speech\", 1: \"Offensive Language\", 2: \"Neither\"}\n"," return label_map[predicted_class]\n"],"metadata":{"id":"yCR1TPS35CL5","executionInfo":{"status":"ok","timestamp":1739326013688,"user_tz":300,"elapsed":5,"user":{"displayName":"Bowie Lab","userId":"13364973424497456678"}}},"execution_count":11,"outputs":[]},{"cell_type":"code","source":["# Example usage: Test a custom tweet\n","custom_tweet = \"@user you are amazing!\" # Replace with any tweet you want to test\n","prediction = predict_tweet(custom_tweet, model, tokenizer, le, device)\n","print(f\"\\nPrediction for Tweet: '{custom_tweet}'\")\n","print(f\"Predicted Label: {prediction}\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"LIPQaEZI4U6e","executionInfo":{"status":"ok","timestamp":1739326013688,"user_tz":300,"elapsed":5,"user":{"displayName":"Bowie Lab","userId":"13364973424497456678"}},"outputId":"e3f07aad-21c5-4fdd-bfcc-2bf848522a87"},"execution_count":12,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","Prediction for Tweet: '@user you are amazing!'\n","Predicted Label: Neither\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"taARWZg14aq7"},"execution_count":null,"outputs":[]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"T4","authorship_tag":"ABX9TyPJ3trgeoNA8owOFFS/rjSd"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"fcfa193684ca4f6aa88cc65fa98104cd":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_7600c684ceed45879b41d4d010fe711f","IPY_MODEL_431a78fa85ca4bc3aba19c7073fc2408","IPY_MODEL_b64a7793d71b4ef3880aac2221576114"],"layout":"IPY_MODEL_3b32425483c2458fbbf1a9a5c6de67cf"}},"7600c684ceed45879b41d4d010fe711f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_fba59455ef694afea0b1ee84ff4da73d","placeholder":"​","style":"IPY_MODEL_ce4ec3f4aab845739f888d158e2f5e49","value":"Map: 100%"}},"431a78fa85ca4bc3aba19c7073fc2408":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_d2fd487688b549ef9299170dbb2d54f8","max":1982,"min":0,"orientation":"horizontal","style":"IPY_MODEL_a2e5096e6ce7404da9569541bb3eccf0","value":1982}},"b64a7793d71b4ef3880aac2221576114":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1f0af46aecff451294edf4c26b8a5f77","placeholder":"​","style":"IPY_MODEL_84f1f1ef2fc54030888f5b8cd6875478","value":" 1982/1982 [00:01<00:00, 1756.10 examples/s]"}},"3b32425483c2458fbbf1a9a5c6de67cf":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fba59455ef694afea0b1ee84ff4da73d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ce4ec3f4aab845739f888d158e2f5e49":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d2fd487688b549ef9299170dbb2d54f8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a2e5096e6ce7404da9569541bb3eccf0":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"1f0af46aecff451294edf4c26b8a5f77":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"84f1f1ef2fc54030888f5b8cd6875478":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0a213da5e41b4362b4edb142ef8553aa":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_6eba7900057b4dfc851de04be0ece70e","IPY_MODEL_6097e1dc9ade4258bc1a7b650c92e483","IPY_MODEL_81676bdd9eab4f3882e87c920b264ad5"],"layout":"IPY_MODEL_016a9db82e924e9887ad04869a9157ab"}},"6eba7900057b4dfc851de04be0ece70e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_143ece8cfe1943649966ad71ea353341","placeholder":"​","style":"IPY_MODEL_03d7c889b18e4098985451daf8b3b964","value":"Map: 100%"}},"6097e1dc9ade4258bc1a7b650c92e483":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_dde33ae8e4a84d8ab1bb0600e8a15c41","max":496,"min":0,"orientation":"horizontal","style":"IPY_MODEL_3de183c97d6348759c17222c827d4601","value":496}},"81676bdd9eab4f3882e87c920b264ad5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_192bf43ccbde42ff8dd6b8b770ce8bf5","placeholder":"​","style":"IPY_MODEL_2d3c731ae42e496ab3263990785cc8dd","value":" 496/496 [00:00<00:00, 1335.96 examples/s]"}},"016a9db82e924e9887ad04869a9157ab":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"143ece8cfe1943649966ad71ea353341":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"03d7c889b18e4098985451daf8b3b964":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"dde33ae8e4a84d8ab1bb0600e8a15c41":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3de183c97d6348759c17222c827d4601":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"192bf43ccbde42ff8dd6b8b770ce8bf5":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2d3c731ae42e496ab3263990785cc8dd":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"code","execution_count":9,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000,"referenced_widgets":["fcfa193684ca4f6aa88cc65fa98104cd","7600c684ceed45879b41d4d010fe711f","431a78fa85ca4bc3aba19c7073fc2408","b64a7793d71b4ef3880aac2221576114","3b32425483c2458fbbf1a9a5c6de67cf","fba59455ef694afea0b1ee84ff4da73d","ce4ec3f4aab845739f888d158e2f5e49","d2fd487688b549ef9299170dbb2d54f8","a2e5096e6ce7404da9569541bb3eccf0","1f0af46aecff451294edf4c26b8a5f77","84f1f1ef2fc54030888f5b8cd6875478","0a213da5e41b4362b4edb142ef8553aa","6eba7900057b4dfc851de04be0ece70e","6097e1dc9ade4258bc1a7b650c92e483","81676bdd9eab4f3882e87c920b264ad5","016a9db82e924e9887ad04869a9157ab","143ece8cfe1943649966ad71ea353341","03d7c889b18e4098985451daf8b3b964","dde33ae8e4a84d8ab1bb0600e8a15c41","3de183c97d6348759c17222c827d4601","192bf43ccbde42ff8dd6b8b770ce8bf5","2d3c731ae42e496ab3263990785cc8dd"]},"id":"pGpR58ZIjvr1","executionInfo":{"status":"ok","timestamp":1739325859215,"user_tz":300,"elapsed":5384,"user":{"displayName":"Bowie Lab","userId":"13364973424497456678"}},"outputId":"f95a8bab-9ac0-4a6e-83ef-55c047408237"},"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (2.2.2)\n","Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (4.48.2)\n","Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (2.5.1+cu124)\n","Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (1.6.1)\n","Requirement already satisfied: datasets in /usr/local/lib/python3.11/dist-packages (3.2.0)\n","Requirement already satisfied: numpy>=1.23.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (1.26.4)\n","Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.1)\n","Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.1)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from transformers) (3.17.0)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.24.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.28.1)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (24.2)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (6.0.2)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2024.11.6)\n","Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers) (2.32.3)\n","Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.21.0)\n","Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.2)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers) (4.67.1)\n","Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.11/dist-packages (from torch) (4.12.2)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch) (3.4.2)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.5)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch) (2024.9.0)\n","Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n","Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n","Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n","Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch) (9.1.0.70)\n","Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.5.8)\n","Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch) (11.2.1.3)\n","Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch) (10.3.5.147)\n","Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch) (11.6.1.9)\n","Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch) (12.3.1.170)\n","Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch) (2.21.5)\n","Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n","Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n","Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.0)\n","Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch) (1.13.1)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n","Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (1.13.1)\n","Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (1.4.2)\n","Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (3.5.0)\n","Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (17.0.0)\n","Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.3.8)\n","Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets) (3.5.0)\n","Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.70.16)\n","Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets) (3.11.12)\n","Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (2.4.4)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.3.2)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (25.1.0)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.5.0)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (6.1.0)\n","Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (0.2.1)\n","Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.18.3)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.4.1)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.10)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2.3.0)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2025.1.31)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch) (3.0.2)\n","\n","Dataset Preview:\n"," Unnamed: 0 count hate_speech offensive_language neither class \\\n","0 2326 3 0 3 0 1 \n","1 16283 3 0 3 0 1 \n","2 19362 3 0 1 2 2 \n","3 16780 3 0 3 0 1 \n","4 13654 3 1 2 0 1 \n","\n"," tweet \n","0 934 8616\\ni got a missed call from yo bitch \n","1 RT @KINGTUNCHI_: Fucking with a bad bitch you ... \n","2 RT @eanahS__: @1inkkofrosess lol my credit ain... \n","3 RT @Maxin_Betha Wipe the cum out of them faggo... \n","4 Niggas cheat on they bitch and don't expect no... \n","\n","Number of Training Samples: 1982\n","Number of Testing Samples: 496\n"]},{"output_type":"display_data","data":{"text/plain":["Map: 0%| | 0/1982 [00:00"],"text/html":["\n","

\n"," \n"," \n"," [372/372 02:25, Epoch 3/3]\n","
\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
EpochTraining LossValidation Loss
10.3598000.442128
20.2282000.385708
30.2497000.381366

"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":[""],"text/html":[]},"metadata":{}},{"output_type":"stream","name":"stdout","text":["\n","Evaluation Results: {'eval_loss': 0.38136598467826843, 'eval_runtime': 3.1666, 'eval_samples_per_second': 156.633, 'eval_steps_per_second': 9.79, 'epoch': 3.0}\n","\n","Classification Report:\n"," precision recall f1-score support\n","\n"," 0 0.45 0.16 0.23 32\n"," 1 0.90 0.95 0.93 370\n"," 2 0.82 0.81 0.81 94\n","\n"," accuracy 0.88 496\n"," macro avg 0.72 0.64 0.66 496\n","weighted avg 0.86 0.88 0.86 496\n","\n"]},{"output_type":"execute_result","data":{"text/plain":["'\\nOutput:\\n precision recall f1-score support\\n 0 0.85 0.80 0.82 50\\n 1 0.78 0.82 0.80 60\\n 2 0.88 0.89 0.88 70\\n accuracy 0.85 180\\n macro avg 0.84 0.84 0.84 180\\nweighted avg 0.85 0.85 0.85 180\\n'"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":10}]},{"cell_type":"code","source":["\n","# Allow students to test their own tweets\n","def predict_tweet(tweet, model, tokenizer, label_encoder, device):\n"," \"\"\"\n"," Predicts the label (Hate Speech, Offensive Language, Neither) for a given tweet.\n","\n"," Parameters:\n"," - tweet (str): The raw tweet text.\n"," - model: The trained BERT model.\n"," - tokenizer: The BERT tokenizer.\n"," - label_encoder: The LabelEncoder used to encode labels.\n"," - device: The device (CPU or GPU) where the model is located.\n","\n"," Returns:\n"," - str: The predicted label (e.g., \"Hate Speech\", \"Offensive Language\", \"Neither\").\n"," \"\"\"\n"," # Preprocess the tweet (lowercase and tokenize)\n"," tweet = tweet.lower()\n"," inputs = tokenizer(tweet, return_tensors=\"pt\", padding='max_length', truncation=True, max_length=128)\n","\n"," # Move inputs to the same device as the model\n"," inputs = {key: value.to(device) for key, value in inputs.items()}\n","\n"," # Perform inference\n"," with torch.no_grad():\n"," outputs = model(**inputs)\n"," logits = outputs.logits\n"," predicted_label = logits.argmax(dim=-1).item()\n","\n"," # Decode the label back to its original form\n"," predicted_class = label_encoder.inverse_transform([predicted_label])[0]\n","\n"," # Map numeric class to human-readable label\n"," label_map = {0: \"Hate Speech\", 1: \"Offensive Language\", 2: \"Neither\"}\n"," return label_map[predicted_class]\n"],"metadata":{"id":"yCR1TPS35CL5","executionInfo":{"status":"ok","timestamp":1739326013688,"user_tz":300,"elapsed":5,"user":{"displayName":"Bowie Lab","userId":"13364973424497456678"}}},"execution_count":11,"outputs":[]},{"cell_type":"code","source":["# Example usage: Test a custom tweet\n","custom_tweet = \"@user you are amazing!\" # Replace with any tweet you want to test\n","prediction = predict_tweet(custom_tweet, model, tokenizer, le, device)\n","print(f\"\\nPrediction for Tweet: '{custom_tweet}'\")\n","print(f\"Predicted Label: {prediction}\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"LIPQaEZI4U6e","executionInfo":{"status":"ok","timestamp":1739326013688,"user_tz":300,"elapsed":5,"user":{"displayName":"Bowie Lab","userId":"13364973424497456678"}},"outputId":"e3f07aad-21c5-4fdd-bfcc-2bf848522a87"},"execution_count":12,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","Prediction for Tweet: '@user you are amazing!'\n","Predicted Label: Neither\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"taARWZg14aq7"},"execution_count":null,"outputs":[]}]} \ No newline at end of file