diff --git a/agent_evidence_many_tables.ipynb b/agent_evidence_many_tables.ipynb index 7752c2f..07e9ca5 100644 --- a/agent_evidence_many_tables.ipynb +++ b/agent_evidence_many_tables.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "a10c9a6a", "metadata": {}, "outputs": [ @@ -62,9 +62,9 @@ "\n", "print(response.content)\n", "\n", - "# DB_PATH = r\"msgstore.db\"\n", + "DB_PATH = r\"msgstore.db\"\n", "# DB_PATH = r\"users4.db\"\n", - "DB_PATH = r\"test2.db\"\n", + "# DB_PATH = r\"test2.db\"\n", "# DB_PATH = r\"F:\\mobile_images\\Cellebriate_2024\\Cellebrite_CTF_File1\\CellebriteCTF24_Sharon\\Sharon\\EXTRACTION_FFS 01\\EXTRACTION_FFS\\Dump\\data\\data\\com.whatsapp\\databases\\stickers.db\"\n", "# DB_PATH = r\"F:\\mobile_images\\Cellebriate_2024\\Cellebrite_CTF_File1\\CellebriteCTF24_Sharon\\Sharon\\EXTRACTION_FFS 01\\EXTRACTION_FFS\\Dump\\data\\data\\com.android.vending\\databases\\localappstate.db\"\n", "\n", @@ -94,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "48eda3ec", "metadata": {}, "outputs": [], @@ -201,10 +201,11 @@ " return SystemMessage(\n", " content=(\n", " \"You are a SQL planner. You are provided databases that are extracted from Android or iOS devices.\\n\"\n", - " f\"Goal: discover if any column contains {target}.\\n\\n\"\n", + " f\"Goal: discover if any column contains {target} from databases.\\n\\n\"\n", " \"Rules:\\n\"\n", " \"- Use 'REGEXP' for pattern matching.\\n\"\n", - " f\"- Example: SELECT col FROM table WHERE col REGEXP '{regex}' LIMIT 5\\n\"\n", + " f\"- Example: SELECT col FROM table WHERE col REGEXP '{regex}' LIMIT 10\\n\"\n", + " f\"- pay special attention to tables and/or columns related to message/chat/text. {target} may be embedded in these text.\\n\"\n", " \"- Validate your SQL and make sure all tables and columns do exist.\\n\"\n", " \"- If multiple SQL statements are provided, combine them using UNION ALL.\\n\"\n", " \"- Return ONLY SQL.\"\n", @@ -510,7 +511,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "0b1fce49", "metadata": {}, "outputs": [ @@ -522,18 +523,34 @@ "=== STATE SNAPSHOT ===\n", "\n", "--- MESSAGES ---\n", - "0: HUMAN -> Find PERSON_NAME in the database\n", - "1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}\n", + "0: HUMAN -> Find PHONE in the database\n", + "1: AI -> SELECT * FROM call_log WHERE call_id REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", "UNION ALL\n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}'\n", + "SELECT * FROM missed_call_logs WHERE message_row_id REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", + "UNION ALL\n", + "SELECT * FROM missed_call_log_participant WHERE jid REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", + "UNION ALL\n", + "SELECT * FROM message_call_log WHERE call_log_row_id REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", + "UNION ALL\n", + "SELECT * FROM bcall_session WHERE caption REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", + "UNION ALL\n", + "SELECT * FROM message_bcall_session WHERE message_row_id REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d';\n", "\n", "--- BEGIN METADATA ---\n", "attempt : 1\n", "max_attempts : 1\n", "phase : discovery\n", - "sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}\n", + "sql : SELECT * FROM call_log WHERE call_id REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", "UNION ALL\n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}'\n", + "SELECT * FROM missed_call_logs WHERE message_row_id REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", + "UNION ALL\n", + "SELECT * FROM missed_call_log_participant WHERE jid REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", + "UNION ALL\n", + "SELECT * FROM message_call_log WHERE call_log_row_id REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", + "UNION ALL\n", + "SELECT * FROM bcall_session WHERE caption REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", + "UNION ALL\n", + "SELECT * FROM message_bcall_session WHERE message_row_id REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d';\n", "discovered sql : []\n", "rows : None\n", "classification: None\n", @@ -542,37 +559,53 @@ "\n", "--- END METADATA ---\n", "[EXECUTE] Running query\n", - "[SQL ERROR]: near \"[A-Za-z\\u4e00-\\u9fff]\": syntax error\n", + "[SQL ERROR]: SELECTs to the left and right of UNION ALL do not have the same number of result columns\n", "[SQL EXEC] Retrieved []\n", "\n", "=== STATE SNAPSHOT ===\n", "\n", "--- MESSAGES ---\n", - "0: HUMAN -> Find PERSON_NAME in the database\n", - "1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}\n", + "0: HUMAN -> Find PHONE in the database\n", + "1: AI -> SELECT * FROM call_log WHERE call_id REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", "UNION ALL\n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}'\n", + "SELECT * FROM missed_call_logs WHERE message_row_id REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", + "UNION ALL\n", + "SELECT * FROM missed_call_log_participant WHERE jid REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", + "UNION ALL\n", + "SELECT * FROM message_call_log WHERE call_log_row_id REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", + "UNION ALL\n", + "SELECT * FROM bcall_session WHERE caption REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", + "UNION ALL\n", + "SELECT * FROM message_bcall_session WHERE message_row_id REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d';\n", "2: AI -> Retrieved 0 rows\n", "\n", "--- BEGIN METADATA ---\n", "attempt : 1\n", "max_attempts : 1\n", "phase : discovery\n", - "sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}\n", + "sql : SELECT * FROM call_log WHERE call_id REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", "UNION ALL\n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}'\n", + "SELECT * FROM missed_call_logs WHERE message_row_id REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", + "UNION ALL\n", + "SELECT * FROM missed_call_log_participant WHERE jid REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", + "UNION ALL\n", + "SELECT * FROM message_call_log WHERE call_log_row_id REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", + "UNION ALL\n", + "SELECT * FROM bcall_session WHERE caption REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d'\n", + "UNION ALL\n", + "SELECT * FROM message_bcall_session WHERE message_row_id REGEXP '\\+?\\d[\\d\\s().-]{7,}\\d';\n", "discovered sql : []\n", "rows : []\n", - "classification: {'found': False, 'confidence': 0, 'reason': 'The text does not contain any names.'}\n", + "classification: {'found': False, 'confidence': 0, 'reason': 'No phone-related content provided.'}\n", "evidence : []\n", "Source Columns: []\n", "\n", "--- END METADATA ---\n", "\n", "========================================\n", - " 🏁 FORENSIC REPORT: PERSON_NAME \n", + " 🏁 FORENSIC REPORT: PHONE \n", "========================================\n", - "❌ No PERSON_NAME were extracted.\n", + "❌ No PHONE were extracted.\n", "Last Phase : discovery\n", "Attempts : 1\n", "========================================\n" @@ -583,9 +616,9 @@ "\n", "# Set your target here once\n", "# TARGET = \"EMAIL\" \n", - "# TARGET = \"PHONE\"\n", + "TARGET = \"PHONE\"\n", "# TARGET = \"USERNAME\"\n", - "TARGET = \"PERSON_NAME\"\n", + "# TARGET = \"PERSON_NAME\"\n", "\n", "result = app.invoke({\n", " \"messages\": [HumanMessage(content=f\"Find {TARGET} in the database\")],\n",