diff --git a/AI4Forensics/CKIM2024/Takeout/profile_browser_history_colab.ipynb b/AI4Forensics/CKIM2024/Takeout/profile_browser_history_colab.ipynb new file mode 100644 index 0000000..ae8088d --- /dev/null +++ b/AI4Forensics/CKIM2024/Takeout/profile_browser_history_colab.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyOBWInFco/Tsqo9yr5LvKy+"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","source":["import os\n","import datetime\n","import json\n","import jieba\n","from collections import namedtuple\n","from langchain_google_genai import ChatGoogleGenerativeAI, HarmBlockThreshold, HarmCategory\n","from langchain.prompts import ChatPromptTemplate\n","from langchain_core.output_parsers import StrOutputParser\n","from IPython.display import display\n","from IPython.display import Markdown"],"metadata":{"id":"b0j39s4PvC8q","executionInfo":{"status":"ok","timestamp":1716947111906,"user_tz":240,"elapsed":2447,"user":{"displayName":"Bowie Lab","userId":"13364973424497456678"}}},"execution_count":1,"outputs":[]},{"cell_type":"code","source":["# Set up the model\n","generation_config = {\n"," \"temperature\": 0.0,\n"," \"top_p\": 1,\n"," \"top_k\": 32,\n"," \"max_output_tokens\": 4096,\n","}\n","\n","# Corrected safety settings format\n","safety_settings = {\n"," HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,\n"," HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,\n"," HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,\n"," HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,\n","}"],"metadata":{"id":"s5O_nlmc7Mrb","executionInfo":{"status":"ok","timestamp":1716947111906,"user_tz":240,"elapsed":2,"user":{"displayName":"Bowie Lab","userId":"13364973424497456678"}}},"execution_count":2,"outputs":[]},{"cell_type":"code","source":["\n","model = ChatGoogleGenerativeAI(\n"," model=\"gemini-pro\",\n"," generation_config=generation_config,\n"," safety_settings=safety_settings,\n"," google_api_key=\"AIzaSyCDqn8xVJ4cFeiXSvhPUcnR60jfBLj5dO4\"\n",")\n","\n","# Define the role, objective, focus, restrictions, provided data, and starting work\n","# Role Name: Criminal profiler.\n","# Role Ojective: Create a psychological profile based on browsing history.\n","# Role Focus: Motivations, psychological characteristics, behavioral patterns, relevant insights.\n","# Role Restrictions: Avoid identification or accusations, no legal advice.\n","# Provided Data: List of web pages visited with titles and timestamps.\n","# Starting Work: Asking the role to perform the task with the provided data.\n","\n","\n","# Create the prompt template\n","template = \"\"\"\n","{role}\\\n","{provided_data}\\\n","{start}\n","\"\"\"\n","prompt = ChatPromptTemplate.from_template(template)"],"metadata":{"id":"OyqoVaYpvlcC","executionInfo":{"status":"ok","timestamp":1716947112094,"user_tz":240,"elapsed":190,"user":{"displayName":"Bowie Lab","userId":"13364973424497456678"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["! wget -q https://raw.githubusercontent.com/frankwxu/digital-forensics-lab/main/AI4Forensics/CKIM2024/Takeout/role.txt\n","! wget -q https://raw.githubusercontent.com/frankwxu/digital-forensics-lab/main/AI4Forensics/CKIM2024/Takeout/titles_with_timestamp.txt\n","! wget -q https://raw.githubusercontent.com/frankwxu/digital-forensics-lab/main/AI4Forensics/CKIM2024/Takeout/start.txt"],"metadata":{"id":"uHFvHDPO7Avu","executionInfo":{"status":"ok","timestamp":1716947113034,"user_tz":240,"elapsed":942,"user":{"displayName":"Bowie Lab","userId":"13364973424497456678"}}},"execution_count":4,"outputs":[]},{"cell_type":"code","source":[],"metadata":{"id":"XEALbQpQSa0U"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["output_parser = StrOutputParser()\n","chain = prompt | model | output_parser\n","\n","with open(r\"role.txt\", \"r\") as file:\n"," role = file.read()\n","\n","with open(r\"titles_with_timestamp.txt\", \"r\") as file:\n"," provided_data = file.read()\n","\n","with open(r\"start.txt\", \"r\") as file:\n"," start = file.read()\n","\n","result = chain.invoke(\n"," {\n"," \"role\": role,\n"," \"provided_data\": provided_data,\n"," \"start\": start,\n"," }\n",")\n","Markdown(result)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":551},"id":"8P0Xk-4Ovm6Z","executionInfo":{"status":"ok","timestamp":1716947125270,"user_tz":240,"elapsed":12237,"user":{"displayName":"Bowie Lab","userId":"13364973424497456678"}},"outputId":"2dab6b78-6784-4ad8-b9c5-df610a33ac95"},"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""],"text/markdown":"**Psychological Profile of the Suspect**\n\n**Possible Motivations:**\n\n* **Academic or Career Advancement:** The suspect's browsing history suggests an interest in career development in the tech industry, particularly in building a successful career. This could indicate a desire for recognition, status, or financial gain.\n* **Knowledge Acquisition:** The suspect has accessed resources on data science, browser history analysis, and human subject research. This may indicate a need to acquire specific knowledge or skills for academic or professional purposes.\n* **Data Collection:** The repetitive searches for \"internet browsers history\" and \"pull user browser history\" suggest an interest in collecting or analyzing browsing data. This could be motivated by research, curiosity, or potential misuse.\n\n**Psychological Characteristics:**\n\n* **Curiosity and Intellectual Engagement:** The suspect's browsing history demonstrates an inquisitive nature and a desire to learn about different topics.\n* **Methodical and Analytical:** The suspect has conducted systematic searches and explored multiple resources, indicating an organized and analytical approach to problem-solving.\n* **Secrecy and Privacy Concerns:** The suspect has shown an interest in human subject research and ethical considerations related to data collection. This may reflect a concern for privacy or a desire to operate discreetly.\n\n**Behavioral Patterns:**\n\n* **Extensive Web Browsing:** The suspect has spent significant time browsing the internet, particularly focusing on topics related to technology, data science, and web browser history.\n* **Targeted Searches:** The suspect has conducted specific searches for information on data collection techniques and browser history analysis.\n* **Repeated Access:** The suspect has repeatedly accessed the same websites and resources, suggesting a consistent interest in these topics.\n\n**Other Relevant Insights:**\n\n* **Possible Academic Affiliation:** The suspect's access to the University of Baltimore's MyUB portal and Canvas platform suggests a possible affiliation with the university.\n* **Interest in Technology and Data:** The browsing history reveals a strong interest in technology, data science, and data collection.\n* **Potential Ethical Concerns:** The suspect's interest in human subject research and ethical considerations related to data collection may indicate an awareness of potential ethical implications of their actions.\n\n**Caution:** It is important to note that this profile is based solely on the provided web history and does not account for other factors that may influence the suspect's behavior. A comprehensive psychological assessment would be necessary to fully understand the suspect's motivations and intentions."},"metadata":{},"execution_count":5}]},{"cell_type":"code","source":[],"metadata":{"id":"lwhQTTP041tb","executionInfo":{"status":"ok","timestamp":1716947125270,"user_tz":240,"elapsed":2,"user":{"displayName":"Bowie Lab","userId":"13364973424497456678"}}},"execution_count":5,"outputs":[]}]} \ No newline at end of file diff --git a/README.md b/README.md index eb53fe2..8e49b2b 100644 --- a/README.md +++ b/README.md @@ -397,7 +397,8 @@ Here are some political insights based on the leaked email summaries obtained fr - PI of the project - Dr. Frank Xu (Email: fxu at ubalt dot edu) - Students: - - Sarfraz Shaikh (Echo Show) + - Eric Xu: University of Maryland (LLM for Digital Forensics) + - Sarfraz Shaikh: University of Baltimore (Echo Show) - Danny Ferreira (iPhone) - Harleen Kaur (Partial of Android) - Malcolm Hayward (P2P Leakage)