digital-forensics-lab/AI4Forensics/CKIM2024/BrowserHistory/browser_analysis.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip -q install google-generativeai\n",
    "# !pip -q install google-ai-generativelanguage\n",
    "# !pip install python-dotenv\n",
    "# !pip install --upgrade langchain\n",
    "# !pip -q install langchain_experimental langchain_core\n",
    "# !pip -q install langchain-google-genai\n",
    "# !pip show langchain langchain-core\n",
    "# !pip install python-pptx\n",
    "# !pip install numpy\n",
    "# !pip install tenacity\n",
    "# !pip install configparser\n",
    "# !pip install tiktoken\n",
    "# !pip install jieba\n",
    "\n",
    "\n",
    "import numpy as np\n",
    "import os\n",
    "import re\n",
    "import datetime\n",
    "import time\n",
    "import tenacity\n",
    "import argparse\n",
    "import configparser\n",
    "import json\n",
    "import tiktoken\n",
    "import jieba\n",
    "from collections import namedtuple\n",
    "\n",
    "# setup\n",
    "import google.generativeai as genai\n",
    "\n",
    "from IPython.display import display\n",
    "from IPython.display import Markdown\n",
    "\n",
    "import os\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "# Load environment variables from the .env file\n",
    "load_dotenv(\"my_config.env\")\n",
    "\n",
    "# Access the environment variables\n",
    "GOOGLE_AI_STUDIO = os.getenv(\"GOOGLE_AI_STUDIO2\")\n",
    "genai.configure(api_key=GOOGLE_AI_STUDIO )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set up the model\n",
    "generation_config = {\n",
    "  \"temperature\": 0.0,\n",
    "  \"top_p\": 1,\n",
    "  \"top_k\": 32,\n",
    "  \"max_output_tokens\": 4096,\n",
    "}\n",
    "\n",
    "safety_settings = [\n",
    "    {\"category\": \"HARM_CATEGORY_HARASSMENT\", \"threshold\": \"BLOCK_NONE\"},\n",
    "    {\"category\": \"HARM_CATEGORY_HATE_SPEECH\", \"threshold\": \"BLOCK_NONE\"},\n",
    "    {\"category\": \"HARM_CATEGORY_SEXUALLY_EXPLICIT\", \"threshold\": \"BLOCK_NONE\"},\n",
    "    {\"category\": \"HARM_CATEGORY_DANGEROUS_CONTENT\", \"threshold\": \"BLOCK_NONE\"},\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# read a paper\n",
    "from langchain_google_genai import ChatGoogleGenerativeAI\n",
    "from langchain.prompts import ChatPromptTemplate\n",
    "from langchain_core.output_parsers import StrOutputParser\n",
    "\n",
    "model = ChatGoogleGenerativeAI(\n",
    "    model=\"gemini-pro\",\n",
    "    generation_config=generation_config,\n",
    "    safety_settings=safety_settings,\n",
    "    google_api_key=GOOGLE_AI_STUDIO,\n",
    ")\n",
    "\n",
    "# Role Name: Criminal profiler.\n",
    "# Role Task: Create a psychological profile based on browsing history.\n",
    "# Role Focus: Motivations, psychological characteristics, behavioral patterns, relevant insights.\n",
    "# Role Restrictions: Avoid identification or accusations, no legal advice.\n",
    "# Provided Data: List of web pages visited with titles and timestamps.\n",
    "# Starting Work: Asking the role to perform the task with the provided data.\n",
    "\n",
    "template = \"\"\" \n",
    "{role}\\\n",
    "{provided_data}\\\n",
    "{start} \n",
    "\"\"\"\n",
    "prompt = ChatPromptTemplate.from_template(template)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "**Psychological Profile of the Suspect**\n",
       "\n",
       "**Possible Motivations:**\n",
       "\n",
       "* **Academic curiosity or research:** Browsing websites and papers related to technology, career development, and academic topics suggests a keen interest in knowledge acquisition and professional advancement. \n",
       "* **Interest in human behavior:** The visits to YouTube videos on the topics of digital evidence investigation and psychological warfare could indicate an intrigue in understanding human behavior and possible vulnerabilities.\n",
       "* **Exploration of personal interests:** The suspect may be exploring topics of personal interest, such as history (e.g.,南海仲裁案) and entertainment (e.g., YouTube videos).\n",
       "\n",
       "**Psychological Characteristics:**\n",
       "\n",
       "* **Inquisitive and learning-oriented:** The suspect's browsing history shows a desire for knowledge and a willingness to engage with a variety of subjects.\n",
       "* **Skeptical and critical:** The suspect seems to be discerning in their information consumption, as evidenced by the visit to a website on how to analyze browser history critically.\n",
       "* **Attention to detail:** The suspect exhibits a methodical approach in their research, as seen in the numerous visits to websites on the same topic (e.g., browser history analysis).\n",
       "* **Cautious:** The suspect's searches for information on ethics and human subject research indicate a concern for privacy and the consequences of their actions.\n",
       "\n",
       "**Behavioral Patterns:**\n",
       "\n",
       "* **Secretive or private:** The majority of the browsing history revolves around topics not typically shared publicly, suggesting the suspect may have a secretive or private nature.\n",
       "* **Solitary:** The absence of social media visits and lack of interaction with others online suggest a solitary lifestyle or a preference for solitary activities.\n",
       "* **Organized:** The suspect's browsing history shows a systematic pattern, with repeated visits to specific websites and a focus on particular topics.\n",
       "\n",
       "**Other Relevant Insights:**\n",
       "\n",
       "* **Technical proficiency:** The suspect demonstrates a level of technical proficiency in their ability to navigate various websites and research tools.\n",
       "* **Possible academic affiliation:** The visits to the University of Baltimore's website and the use of academic resources suggest the suspect may be affiliated with an educational institution.\n",
       "* **Potential employment in the technology field:** The browsing history related to building a successful career in tech and Kaggle (a platform for data science) hints at a possible career in the technology industry.\n",
       "\n",
       "**Disclaimer:** This analysis is based solely on the provided web browsing history and should not be taken as a definitive or exhaustive assessment of the suspect's psychological profile or motivations. Further investigation and context would be necessary for a more comprehensive understanding of the individual's behavior and mindset."
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "output_parser = StrOutputParser()\n",
    "chain = prompt | model | output_parser\n",
    "\n",
    "with open(r\".\\role.txt\", \"r\") as file:\n",
    "    role = file.read()\n",
    "\n",
    "with open(r\".\\titles_with_timestamp.txt\", \"r\") as file:\n",
    "    provided_data = file.read()\n",
    "\n",
    "with open(r\".\\start.txt\", \"r\") as file:\n",
    "    start = file.read()\n",
    "\n",
    "\n",
    "result = chain.invoke(\n",
    "    {\n",
    "        \"role\": role,\n",
    "        \"provided_data\": provided_data,\n",
    "        \"start\": start,\n",
    "    }\n",
    ")\n",
    "Markdown(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File saved successfully.\n"
     ]
    }
   ],
   "source": [
    "# Open a file for writing ('w' mode) and create it if it doesn't exist\n",
    "with open(r\".\\result.txt\", \"w\") as file:\n",
    "    # Write content to the file\n",
    "    file.write(result)\n",
    "\n",
    "print(\"File saved successfully.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}