digital-forensics-lab/AI4Forensics/CKIM2024/Takeout/browser_analysis.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "# pip -q install google-generativeai==0.3.0\n",
    "# pip -q install google-ai-generativelanguage==0.4.0\n",
    "# pip install python-dotenv\n",
    "# pip install --upgrade langchain\n",
    "# pip -q install langchain_experimental langchain_core\n",
    "# pip -q install langchain-google-genai\n",
    "# pip show langchain langchain-core\n",
    "# pip install python-pptxy\n",
    "\n",
    "\n",
    "import numpy as np\n",
    "import os\n",
    "import re\n",
    "import datetime\n",
    "import time\n",
    "import tenacity\n",
    "import argparse\n",
    "import configparser\n",
    "import json\n",
    "import tiktoken\n",
    "import jieba\n",
    "from collections import namedtuple\n",
    "\n",
    "# setup\n",
    "import google.generativeai as genai\n",
    "\n",
    "from IPython.display import display\n",
    "from IPython.display import Markdown\n",
    "\n",
    "import os\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "# Load environment variables from the .env file\n",
    "load_dotenv(\"my_config.env\")\n",
    "\n",
    "# Access the environment variables\n",
    "GOOGLE_AI_STUDIO = os.getenv(\"GOOGLE_AI_STUDIO2\")\n",
    "genai.configure(api_key=GOOGLE_AI_STUDIO )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set up the model\n",
    "generation_config = {\n",
    "  \"temperature\": 0.1,\n",
    "  \"top_p\": 1,\n",
    "  \"top_k\": 32,\n",
    "  \"max_output_tokens\": 4096,\n",
    "}\n",
    "\n",
    "safety_settings = [\n",
    "    {\"category\": \"HARM_CATEGORY_HARASSMENT\", \"threshold\": \"NONE\"},\n",
    "    {\"category\": \"HARM_CATEGORY_HATE_SPEECH\", \"threshold\": \"NONE\"},\n",
    "    {\"category\": \"HARM_CATEGORY_SEXUALLY_EXPLICIT\", \"threshold\": \"NONE\"},\n",
    "    {\"category\": \"HARM_CATEGORY_DANGEROUS_CONTENT\", \"threshold\": \"NONE\"},\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "# read a paper\n",
    "from langchain_google_genai import ChatGoogleGenerativeAI\n",
    "from langchain.prompts import ChatPromptTemplate\n",
    "from langchain_core.output_parsers import StrOutputParser\n",
    "\n",
    "model = ChatGoogleGenerativeAI(\n",
    "    model=\"gemini-pro\",\n",
    "    generation_config=generation_config,\n",
    "    safety_settings=safety_settings,\n",
    "    google_api_key=GOOGLE_AI_STUDIO,\n",
    ")\n",
    "\n",
    "# Role Name: Criminal profiler.\n",
    "# Role Task: Create a psychological profile based on browsing history.\n",
    "# Role Focus: Motivations, psychological characteristics, behavioral patterns, relevant insights.\n",
    "# Role Restrictions: Avoid identification or accusations, no legal advice.\n",
    "# Provided Data: List of web pages visited with titles and timestamps.\n",
    "# Starting Work: Asking the role to perform the task with the provided data.\n",
    "\n",
    "template = \"\"\" \n",
    "{role}\\\n",
    "{provided_data}\\\n",
    "{start} \n",
    "\"\"\"\n",
    "prompt = ChatPromptTemplate.from_template(template)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "**Psychological Profile of the Suspect:**\n",
       "\n",
       "**Motivations:**\n",
       "\n",
       "* **Curiosity and Exploration:** The suspect appears to have a strong curiosity about various topics, as evidenced by their browsing history, which includes academic papers, technology articles, and news.\n",
       "* **Academic or Professional Development:** The suspect may be seeking knowledge and skills to advance their career or academic pursuits, as suggested by their visits to Kaggle and resources related to data science and tech.\n",
       "* **Personal Interests:** The suspect's browsing history also reveals a personal interest in history, human rights, and various hobbies, indicating a diverse range of interests.\n",
       "* **Information Gathering:** The suspect may be gathering information for personal, educational, or professional purposes, as seen in their searches related to browser history analysis, ethical human subject research, and how to capture internet browsing data.\n",
       "\n",
       "**Psychological Characteristics:**\n",
       "\n",
       "* **Intellectually Curious and Driven:** The suspect's browsing behavior suggests a high level of intellectual curiosity and a drive for self-improvement through knowledge acquisition.\n",
       "* **Methodical and Analytical:** The suspect appears to approach information gathering systematically, as seen in their searches for specific datasets and tutorials on data analysis methods.\n",
       "* **Tech-Savvy and Resourceful:** The suspect demonstrates proficiency in navigating online resources and utilizing various tools and software related to data analysis and technology.\n",
       "* **Cautious and Privacy-Conscious:** The suspect's searches related to obtaining internet browser histories and human subject research ethics indicate a certain level of caution and concern for privacy issues.\n",
       "\n",
       "**Behavioral Patterns:**\n",
       "\n",
       "* **Persistent and Goal-Oriented:** The suspect's repeated visits to specific websites and resources over several days suggest persistence and a focused approach to their information-gathering activities.\n",
       "* **Methodical and Organized:** The suspect's searches appear well-organized and targeted, indicating a methodical approach to exploring and obtaining information.\n",
       "* **Selective and Discerning:** The suspect's browsing history reveals a selective focus on specific topics and resources, suggesting they are actively filtering and selecting the information they consume.\n",
       "\n",
       "**Additional Insights:**\n",
       "\n",
       "* The suspect may be a student or researcher engaged in an academic or professional project that requires data analysis or information gathering.\n",
       "* The suspect may be a tech enthusiast or entrepreneur seeking knowledge and resources to develop their skills and advance their career.\n",
       "* The suspect's privacy concerns and ethical considerations suggest a responsible and ethical approach to information gathering.\n",
       "\n",
       "**Important Note:** This analysis is based solely on the browsing history provided. Actual profiling and assessment of a suspect would require a comprehensive investigation involving other factors and evidence, such as interviews, background checks, and behavioral observations."
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "output_parser = StrOutputParser()\n",
    "chain = prompt | model | output_parser\n",
    "\n",
    "with open(r\".\\role.txt\", \"r\") as file:\n",
    "    role = file.read()\n",
    "\n",
    "with open(r\".\\titles_with_timestamp.txt\", \"r\") as file:\n",
    "    provided_data = file.read()\n",
    "\n",
    "with open(r\".\\start.txt\", \"r\") as file:\n",
    "    start = file.read()\n",
    "\n",
    "\n",
    "result = chain.invoke(\n",
    "    {\n",
    "        \"role\": role,\n",
    "        \"provided_data\": provided_data,\n",
    "        \"start\": start,\n",
    "    }\n",
    ")\n",
    "Markdown(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File saved successfully.\n"
     ]
    }
   ],
   "source": [
    "# Open a file for writing ('w' mode) and create it if it doesn't exist\n",
    "with open(r\".\\result.txt\", \"w\") as file:\n",
    "    # Write content to the file\n",
    "    file.write(result)\n",
    "\n",
    "print(\"File saved successfully.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}