mirror of
https://github.com/frankwxu/mobile-pii-discovery-agent.git
synced 2026-04-10 12:13:44 +00:00
imporve code to deal with a database has lots of tables
This commit is contained in:
@@ -15,7 +15,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 1,
|
||||
"id": "2648a1f1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -26,7 +26,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": null,
|
||||
"id": "a10c9a6a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -64,8 +64,9 @@
|
||||
"\n",
|
||||
"# DB_PATH = r\"msgstore.db\"\n",
|
||||
"# DB_PATH = r\"users4.db\"\n",
|
||||
"DB_PATH = r\"test2.db\"\n",
|
||||
"# DB_PATH = r\"F:\\mobile_images\\Cellebriate_2024\\Cellebrite_CTF_File1\\CellebriteCTF24_Sharon\\Sharon\\EXTRACTION_FFS 01\\EXTRACTION_FFS\\Dump\\data\\data\\com.whatsapp\\databases\\stickers.db\"\n",
|
||||
"DB_PATH = r\"F:\\mobile_images\\Cellebriate_2024\\Cellebrite_CTF_File1\\CellebriteCTF24_Sharon\\Sharon\\EXTRACTION_FFS 01\\EXTRACTION_FFS\\Dump\\data\\data\\com.android.vending\\databases\\localappstate.db\"\n",
|
||||
"# DB_PATH = r\"F:\\mobile_images\\Cellebriate_2024\\Cellebrite_CTF_File1\\CellebriteCTF24_Sharon\\Sharon\\EXTRACTION_FFS 01\\EXTRACTION_FFS\\Dump\\data\\data\\com.android.vending\\databases\\localappstate.db\"\n",
|
||||
"\n",
|
||||
"ENTITY_CONFIG = {\n",
|
||||
" \"EMAIL\": {\n",
|
||||
@@ -81,7 +82,7 @@
|
||||
" \"desc\": \"application-specific usernames, handles, or account identifiers\"\n",
|
||||
" },\n",
|
||||
" \"PERSON_NAME\": {\n",
|
||||
" \"regex\": r\"[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}\",\n",
|
||||
" \"regex\": r\"[A-Za-z][A-Za-z\\s\\.\\-]{1,50}\",\n",
|
||||
" \"desc\": (\n",
|
||||
" \"loosely structured human name-like strings used only for discovery \"\n",
|
||||
" \"and column pre-filtering; final identification is performed during extraction\"\n",
|
||||
@@ -93,7 +94,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 3,
|
||||
"id": "48eda3ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -422,7 +423,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 4,
|
||||
"id": "0f5259d7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -509,7 +510,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 6,
|
||||
"id": "0b1fce49",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -521,30 +522,18 @@
|
||||
"=== STATE SNAPSHOT ===\n",
|
||||
"\n",
|
||||
"--- MESSAGES ---\n",
|
||||
"0: HUMAN -> Find PHONE in the database\n",
|
||||
"1: AI -> SELECT package_name FROM appstate WHERE package_name REGEXP 'PHONE' \n",
|
||||
"0: HUMAN -> Find PERSON_NAME in the database\n",
|
||||
"1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}\n",
|
||||
"UNION ALL\n",
|
||||
"SELECT account FROM appstate WHERE account REGEXP 'PHONE' \n",
|
||||
"UNION ALL\n",
|
||||
"SELECT title FROM appstate WHERE title REGEXP 'PHONE' \n",
|
||||
"UNION ALL\n",
|
||||
"SELECT referrer FROM appstate WHERE referrer REGEXP 'PHONE' \n",
|
||||
"UNION ALL\n",
|
||||
"SELECT continue_url FROM appstate WHERE continue_url REGEXP 'PHONE';\n",
|
||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}'\n",
|
||||
"\n",
|
||||
"--- BEGIN METADATA ---\n",
|
||||
"attempt : 1\n",
|
||||
"max_attempts : 1\n",
|
||||
"phase : discovery\n",
|
||||
"sql : SELECT package_name FROM appstate WHERE package_name REGEXP 'PHONE' \n",
|
||||
"sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}\n",
|
||||
"UNION ALL\n",
|
||||
"SELECT account FROM appstate WHERE account REGEXP 'PHONE' \n",
|
||||
"UNION ALL\n",
|
||||
"SELECT title FROM appstate WHERE title REGEXP 'PHONE' \n",
|
||||
"UNION ALL\n",
|
||||
"SELECT referrer FROM appstate WHERE referrer REGEXP 'PHONE' \n",
|
||||
"UNION ALL\n",
|
||||
"SELECT continue_url FROM appstate WHERE continue_url REGEXP 'PHONE';\n",
|
||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}'\n",
|
||||
"discovered sql : []\n",
|
||||
"rows : None\n",
|
||||
"classification: None\n",
|
||||
@@ -553,48 +542,37 @@
|
||||
"\n",
|
||||
"--- END METADATA ---\n",
|
||||
"[EXECUTE] Running query\n",
|
||||
"[SQL ERROR]: near \"[A-Za-z\\u4e00-\\u9fff]\": syntax error\n",
|
||||
"[SQL EXEC] Retrieved []\n",
|
||||
"\n",
|
||||
"=== STATE SNAPSHOT ===\n",
|
||||
"\n",
|
||||
"--- MESSAGES ---\n",
|
||||
"0: HUMAN -> Find PHONE in the database\n",
|
||||
"1: AI -> SELECT package_name FROM appstate WHERE package_name REGEXP 'PHONE' \n",
|
||||
"0: HUMAN -> Find PERSON_NAME in the database\n",
|
||||
"1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}\n",
|
||||
"UNION ALL\n",
|
||||
"SELECT account FROM appstate WHERE account REGEXP 'PHONE' \n",
|
||||
"UNION ALL\n",
|
||||
"SELECT title FROM appstate WHERE title REGEXP 'PHONE' \n",
|
||||
"UNION ALL\n",
|
||||
"SELECT referrer FROM appstate WHERE referrer REGEXP 'PHONE' \n",
|
||||
"UNION ALL\n",
|
||||
"SELECT continue_url FROM appstate WHERE continue_url REGEXP 'PHONE';\n",
|
||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}'\n",
|
||||
"2: AI -> Retrieved 0 rows\n",
|
||||
"\n",
|
||||
"--- BEGIN METADATA ---\n",
|
||||
"attempt : 1\n",
|
||||
"max_attempts : 1\n",
|
||||
"phase : discovery\n",
|
||||
"sql : SELECT package_name FROM appstate WHERE package_name REGEXP 'PHONE' \n",
|
||||
"sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}\n",
|
||||
"UNION ALL\n",
|
||||
"SELECT account FROM appstate WHERE account REGEXP 'PHONE' \n",
|
||||
"UNION ALL\n",
|
||||
"SELECT title FROM appstate WHERE title REGEXP 'PHONE' \n",
|
||||
"UNION ALL\n",
|
||||
"SELECT referrer FROM appstate WHERE referrer REGEXP 'PHONE' \n",
|
||||
"UNION ALL\n",
|
||||
"SELECT continue_url FROM appstate WHERE continue_url REGEXP 'PHONE';\n",
|
||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z\\u4e00-\\u9fff][A-Za-z\\u4e00-\\u9fff\\s\\.\\-]{1,50}'\n",
|
||||
"discovered sql : []\n",
|
||||
"rows : []\n",
|
||||
"classification: {'found': False, 'confidence': 0, 'reason': 'The text does not contain any information or context related to a phone.'}\n",
|
||||
"classification: {'found': False, 'confidence': 0, 'reason': 'The text does not contain any names.'}\n",
|
||||
"evidence : []\n",
|
||||
"Source Columns: []\n",
|
||||
"\n",
|
||||
"--- END METADATA ---\n",
|
||||
"\n",
|
||||
"========================================\n",
|
||||
" 🏁 FORENSIC REPORT: PHONE \n",
|
||||
" 🏁 FORENSIC REPORT: PERSON_NAME \n",
|
||||
"========================================\n",
|
||||
"❌ No PHONE were extracted.\n",
|
||||
"❌ No PERSON_NAME were extracted.\n",
|
||||
"Last Phase : discovery\n",
|
||||
"Attempts : 1\n",
|
||||
"========================================\n"
|
||||
@@ -604,10 +582,10 @@
|
||||
"source": [
|
||||
"\n",
|
||||
"# Set your target here once\n",
|
||||
"TARGET = \"EMAIL\" \n",
|
||||
"# TARGET = \"EMAIL\" \n",
|
||||
"# TARGET = \"PHONE\"\n",
|
||||
"# TARGET = \"USERNAME\"\n",
|
||||
"# TARGET = \"PERSON_NAME\"\n",
|
||||
"TARGET = \"PERSON_NAME\"\n",
|
||||
"\n",
|
||||
"result = app.invoke({\n",
|
||||
" \"messages\": [HumanMessage(content=f\"Find {TARGET} in the database\")],\n",
|
||||
Reference in New Issue
Block a user