mirror of
https://github.com/frankwxu/mobile-pii-discovery-agent.git
synced 2026-04-10 12:13:44 +00:00
update RQs
This commit is contained in:
@@ -164,7 +164,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "bnl",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
@@ -207,7 +207,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "bnl",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
@@ -168,7 +168,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "bnl",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
@@ -318,7 +318,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "bnl",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
\begin{table}[th]
|
||||
\centering
|
||||
\caption{Search space reduction during row-level PII extraction.}
|
||||
\label{tab:search_space_reduction}
|
||||
\small
|
||||
\begin{tabular}{|l|l|p{1.3cm}|p{1.7cm}|p{1.0cm}|}
|
||||
\hline
|
||||
\textbf{ID} & \textbf{Apps} & \textbf{Candidate Cols (Total)} & \textbf{Cols Scanned (Extraction)} & \textbf{Reduc. (\%)} \\
|
||||
\hline
|
||||
A1 & WhatsApp & 1637 & 7 & 99.57\% \\
|
||||
\hline
|
||||
A2 & Snapchat & 848 & 107 & 87.38\% \\
|
||||
\hline
|
||||
A3 & Telegram & 1197 & 0 & 100.00\% \\
|
||||
\hline
|
||||
A4 & Google Maps & 80 & 2 & 97.50\% \\
|
||||
\hline
|
||||
A5 & Samsung Internet & 185 & 0 & 100.00\% \\
|
||||
\hline
|
||||
I1 & WhatsApp & 328 & 0 & 100.00\% \\
|
||||
\hline
|
||||
I2 & Contacts & 13 & 0 & 100.00\% \\
|
||||
\hline
|
||||
I3 & Apple Messages & 186 & 0 & 100.00\% \\
|
||||
\hline
|
||||
I4 & Safari & 74 & 0 & 100.00\% \\
|
||||
\hline
|
||||
I5 & Calendar & 541 & 0 & 100.00\% \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
@@ -21,11 +21,11 @@
|
||||
"\\hline\n",
|
||||
"Phone Number & 1051 & 1184 & 734 & 69.8\\% & 62.0\\% \\\\\n",
|
||||
"\\hline\n",
|
||||
"User Name & 3446 & 1234 & 1232 & 35.8\\% & 99.8\\% \\\\\n",
|
||||
"User Name & 2269 & 1234 & 1232 & 54.3\\% & 99.8\\% \\\\\n",
|
||||
"\\hline\n",
|
||||
"Person Name & 1134 & 2157 & 1087 & 95.9\\% & 50.4\\% \\\\\n",
|
||||
"Person Name & 1640 & 2157 & 1262 & 77.0\\% & 58.5\\% \\\\\n",
|
||||
"\\hline\n",
|
||||
"Postal Address & 21 & 2 & 2 & 9.5\\% & 100.0\\% \\\\\n",
|
||||
"Postal Address & 11 & 2 & 2 & 18.2\\% & 100.0\\% \\\\\n",
|
||||
"\\hline\n"
|
||||
]
|
||||
}
|
||||
@@ -106,7 +106,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "bnl",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"id": "1affac71",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -14,11 +14,11 @@
|
||||
"\n",
|
||||
"PII Type | GT DBs | System DBs | Overlap | Coverage\n",
|
||||
"---------------+--------+------------+---------+---------\n",
|
||||
"Email Address | 0 | 6 | 0 | 0.0% \n",
|
||||
"Phone Number | 0 | 6 | 0 | 0.0% \n",
|
||||
"User Name | 0 | 10 | 0 | 0.0% \n",
|
||||
"Person Name | 0 | 7 | 0 | 0.0% \n",
|
||||
"Postal Address | 0 | 1 | 0 | 0.0% \n",
|
||||
"Email Address | 6 | 7 | 6 | 100.0% \n",
|
||||
"Phone Number | 9 | 7 | 6 | 66.7% \n",
|
||||
"User Name | 6 | 4 | 4 | 66.7% \n",
|
||||
"Person Name | 12 | 11 | 9 | 75.0% \n",
|
||||
"Postal Address | 2 | 1 | 1 | 50.0% \n",
|
||||
"\n",
|
||||
"LATEX TABULAR\n",
|
||||
"\n",
|
||||
@@ -26,15 +26,15 @@
|
||||
"\\hline\n",
|
||||
"\\textbf{PII Type} &\\textbf{DBs with PII (GT)} &\\textbf{DBs with discoveries (System)} &\\textbf{Overlap} &\\textbf{Coverage} \\\\\n",
|
||||
"\\hline\n",
|
||||
"Email Address & 0 & 6 & 0 & 0.0\\% \\\\\n",
|
||||
"Email Address & 6 & 7 & 6 & 100.0\\% \\\\\n",
|
||||
"\\hline\n",
|
||||
"Phone Number & 0 & 6 & 0 & 0.0\\% \\\\\n",
|
||||
"Phone Number & 9 & 7 & 6 & 66.7\\% \\\\\n",
|
||||
"\\hline\n",
|
||||
"User Name & 0 & 10 & 0 & 0.0\\% \\\\\n",
|
||||
"User Name & 6 & 4 & 4 & 66.7\\% \\\\\n",
|
||||
"\\hline\n",
|
||||
"Person Name & 0 & 7 & 0 & 0.0\\% \\\\\n",
|
||||
"Person Name & 12 & 11 & 9 & 75.0\\% \\\\\n",
|
||||
"\\hline\n",
|
||||
"Postal Address & 0 & 1 & 0 & 0.0\\% \\\\\n",
|
||||
"Postal Address & 2 & 1 & 1 & 50.0\\% \\\\\n",
|
||||
"\\hline\n",
|
||||
"\\end{tabular}\n"
|
||||
]
|
||||
@@ -196,7 +196,7 @@
|
||||
"def main() -> None:\n",
|
||||
" # Define these inside main so importing this module has no side effects.\n",
|
||||
" SYSTEM_DIR = Path(r\"..\\normalized_PII_results\\GPT-5.1\\db_level\")\n",
|
||||
" GT_DIR = Path(r\"..\\normalized_PII_results\\GPT-5.1\\app_level\")\n",
|
||||
" GT_DIR = Path(r\"..\\normalized_PII_results\\ground_truth\\db_level\")\n",
|
||||
" \n",
|
||||
" gt_sets = collect_db_sets(GT_DIR, [k for k, _ in PII_TYPES])\n",
|
||||
" sys_sets = collect_db_sets(SYSTEM_DIR, [k for k, _ in PII_TYPES])\n",
|
||||
@@ -216,7 +216,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "bnl",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -29,10 +29,17 @@
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"# Add the parent directory (RQs) to the path to find the config file\n",
|
||||
"if '..' not in sys.path:\n",
|
||||
" sys.path.insert(1, os.path.abspath('..'))\n",
|
||||
"import config"
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"# Resolve paths relative to this notebook so execution is independent of kernel cwd.\n",
|
||||
"NOTEBOOK_DIR = Path.cwd()\n",
|
||||
"if not (NOTEBOOK_DIR / 'RQ2_t8.ipynb').exists():\n",
|
||||
" NOTEBOOK_DIR = Path(r'i:/project2026/llmagent/RQs/RQ2')\n",
|
||||
"RQS_DIR = NOTEBOOK_DIR.parent\n",
|
||||
"\n",
|
||||
"if str(RQS_DIR) not in sys.path:\n",
|
||||
" sys.path.insert(1, str(RQS_DIR))\n",
|
||||
"import config\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -44,8 +51,10 @@
|
||||
"def parse_filename(filepath):\n",
|
||||
" \"\"\"Parses a filename to extract the app ID and database name.\"\"\"\n",
|
||||
" base_name = os.path.basename(filepath)\n",
|
||||
" # Format: PII_{APP_ID}_{DB_NAME}_{TIMESTAMP}.jsonl\n",
|
||||
" match = re.match(r'PII_([A-Z0-9]+)_(.*)_\\d{8}T\\d{6}Z\\.jsonl', base_name)\n",
|
||||
" # Support both formats:\n",
|
||||
" # 1) PII_{APP_ID}_{DB_NAME}_{TIMESTAMP}.jsonl\n",
|
||||
" # 2) PII_{APP_ID}_{DB_NAME}.jsonl\n",
|
||||
" match = re.match(r'PII_([A-Z0-9]+)_(.*?)(?:_\\d{8}T\\d{6}Z)?\\.jsonl$', base_name)\n",
|
||||
" if match:\n",
|
||||
" app_id = match.group(1)\n",
|
||||
" db_name = match.group(2)\n",
|
||||
@@ -69,8 +78,8 @@
|
||||
" data[app_id][db_name][pii_type] = True\n",
|
||||
" return data\n",
|
||||
"\n",
|
||||
"gt_data = load_data(os.path.join('..', config.GROUND_TRUTH_DIR))\n",
|
||||
"system_data = load_data(os.path.join('..', config.GPT4O_RESULTS_DIR))"
|
||||
"gt_data = load_data(str(RQS_DIR / config.GROUND_TRUTH_DIR))\n",
|
||||
"system_data = load_data(str(RQS_DIR / config.GPT4O_RESULTS_DIR))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -178,8 +187,8 @@
|
||||
" <th>A1</th>\n",
|
||||
" <td>WhatsApp</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>2/2</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>1/2</td>\n",
|
||||
" <td>1/2</td>\n",
|
||||
" <td>2/2</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>2/2</td>\n",
|
||||
@@ -189,8 +198,8 @@
|
||||
" <td>Snapchat</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>2/2</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>2/2</td>\n",
|
||||
" <td>1/2</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>2/2</td>\n",
|
||||
" </tr>\n",
|
||||
@@ -208,9 +217,9 @@
|
||||
" <th>A4</th>\n",
|
||||
" <td>Google Maps</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" </tr>\n",
|
||||
@@ -219,20 +228,20 @@
|
||||
" <td>Samsung Internet</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>2/2</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>I1</th>\n",
|
||||
" <td>WhatsApp (iOS)</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>1/2</td>\n",
|
||||
" <td>0/1</td>\n",
|
||||
" <td>2/2</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>2/2</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>I2</th>\n",
|
||||
@@ -241,7 +250,7 @@
|
||||
" <td>1/1</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>0/1</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@@ -249,8 +258,8 @@
|
||||
" <td>Apple Messages</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>0/1</td>\n",
|
||||
" <td>0/1</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" </tr>\n",
|
||||
@@ -259,10 +268,10 @@
|
||||
" <td>Safari</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>0/2</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>0/2</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>I5</th>\n",
|
||||
@@ -270,7 +279,7 @@
|
||||
" <td>1/1</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1/1</td>\n",
|
||||
" </tr>\n",
|
||||
@@ -281,16 +290,16 @@
|
||||
"text/plain": [
|
||||
" Application Email Phone User Name Person Name Postal Address All PII\n",
|
||||
"ID \n",
|
||||
"A1 WhatsApp - 2/2 1/1 2/2 - 2/2\n",
|
||||
"A2 Snapchat 1/1 2/2 1/1 2/2 - 2/2\n",
|
||||
"A1 WhatsApp - 1/2 1/2 2/2 - 2/2\n",
|
||||
"A2 Snapchat 1/1 2/2 2/2 1/2 - 2/2\n",
|
||||
"A3 Telegram - - - - - -\n",
|
||||
"A4 Google Maps 1/1 - 1/1 - - 1/1\n",
|
||||
"A5 Samsung Internet 1/1 - 1/1 - - 1/1\n",
|
||||
"I1 WhatsApp (iOS) - - - 1/1 1/1 1/1\n",
|
||||
"I2 Contacts 1/1 1/1 - 1/1 - 1/1\n",
|
||||
"I3 Apple Messages 1/1 0/1 0/1 1/1 - 1/1\n",
|
||||
"I4 Safari - - 1/1 - - 1/1\n",
|
||||
"I5 Calendar 1/1 - - - - 1/1"
|
||||
"A4 Google Maps 1/1 1/1 1/1 1/1 - 1/1\n",
|
||||
"A5 Samsung Internet 1/1 - - 1/1 - 2/2\n",
|
||||
"I1 WhatsApp (iOS) - 1/2 0/1 2/2 1/1 2/2\n",
|
||||
"I2 Contacts 1/1 1/1 - 1/1 0/1 1/1\n",
|
||||
"I3 Apple Messages 1/1 0/1 - - - 1/1\n",
|
||||
"I4 Safari - - - 0/2 - 0/2\n",
|
||||
"I5 Calendar 1/1 - - 1/1 - 1/1"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
@@ -328,16 +337,16 @@
|
||||
" & Application & Email & Phone & User Name & Person Name & Postal Address & All PII \\\\\n",
|
||||
"ID & & & & & & & \\\\\n",
|
||||
"\\midrule\n",
|
||||
"A1 & WhatsApp & - & 2/2 & 1/1 & 2/2 & - & 2/2 \\\\\n",
|
||||
"A2 & Snapchat & 1/1 & 2/2 & 1/1 & 2/2 & - & 2/2 \\\\\n",
|
||||
"A1 & WhatsApp & - & 1/2 & 1/2 & 2/2 & - & 2/2 \\\\\n",
|
||||
"A2 & Snapchat & 1/1 & 2/2 & 2/2 & 1/2 & - & 2/2 \\\\\n",
|
||||
"A3 & Telegram & - & - & - & - & - & - \\\\\n",
|
||||
"A4 & Google Maps & 1/1 & - & 1/1 & - & - & 1/1 \\\\\n",
|
||||
"A5 & Samsung Internet & 1/1 & - & 1/1 & - & - & 1/1 \\\\\n",
|
||||
"I1 & WhatsApp (iOS) & - & - & - & 1/1 & 1/1 & 1/1 \\\\\n",
|
||||
"I2 & Contacts & 1/1 & 1/1 & - & 1/1 & - & 1/1 \\\\\n",
|
||||
"I3 & Apple Messages & 1/1 & 0/1 & 0/1 & 1/1 & - & 1/1 \\\\\n",
|
||||
"I4 & Safari & - & - & 1/1 & - & - & 1/1 \\\\\n",
|
||||
"I5 & Calendar & 1/1 & - & - & - & - & 1/1 \\\\\n",
|
||||
"A4 & Google Maps & 1/1 & 1/1 & 1/1 & 1/1 & - & 1/1 \\\\\n",
|
||||
"A5 & Samsung Internet & 1/1 & - & - & 1/1 & - & 2/2 \\\\\n",
|
||||
"I1 & WhatsApp (iOS) & - & 1/2 & 0/1 & 2/2 & 1/1 & 2/2 \\\\\n",
|
||||
"I2 & Contacts & 1/1 & 1/1 & - & 1/1 & 0/1 & 1/1 \\\\\n",
|
||||
"I3 & Apple Messages & 1/1 & 0/1 & - & - & - & 1/1 \\\\\n",
|
||||
"I4 & Safari & - & - & - & 0/2 & - & 0/2 \\\\\n",
|
||||
"I5 & Calendar & 1/1 & - & - & 1/1 & - & 1/1 \\\\\n",
|
||||
"\\bottomrule\n",
|
||||
"\\end{tabular}\n",
|
||||
"\\end{table}\n",
|
||||
@@ -354,7 +363,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "bnl",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -187,21 +187,21 @@
|
||||
" <th>A1</th>\n",
|
||||
" <td>WhatsApp</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>0.96</td>\n",
|
||||
" <td>0.50</td>\n",
|
||||
" <td>0.68</td>\n",
|
||||
" <td>0.91</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>0.45</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>0.79</td>\n",
|
||||
" <td>0.21</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>A2</th>\n",
|
||||
" <td>Snapchat</td>\n",
|
||||
" <td>1.00</td>\n",
|
||||
" <td>1.00</td>\n",
|
||||
" <td>0.74</td>\n",
|
||||
" <td>0.33</td>\n",
|
||||
" <td>1.00</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>0.79</td>\n",
|
||||
" <td>0.64</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>A3</th>\n",
|
||||
@@ -217,9 +217,9 @@
|
||||
" <th>A4</th>\n",
|
||||
" <td>Google Maps</td>\n",
|
||||
" <td>1.00</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1.00</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1.00</td>\n",
|
||||
" <td>1.00</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1.00</td>\n",
|
||||
" </tr>\n",
|
||||
@@ -228,50 +228,50 @@
|
||||
" <td>Samsung Internet</td>\n",
|
||||
" <td>1.00</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>0.20</td>\n",
|
||||
" <td>0.50</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>I1</th>\n",
|
||||
" <td>WhatsApp (iOS)</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1.00</td>\n",
|
||||
" <td>1.00</td>\n",
|
||||
" <td>1.00</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>0.98</td>\n",
|
||||
" <td>0.50</td>\n",
|
||||
" <td>0.51</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>I2</th>\n",
|
||||
" <td>Contacts</td>\n",
|
||||
" <td>1.00</td>\n",
|
||||
" <td>0.47</td>\n",
|
||||
" <td>0.71</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>0.86</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>0.65</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>0.36</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>I3</th>\n",
|
||||
" <td>Apple Messages</td>\n",
|
||||
" <td>1.00</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>1.00</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>0.33</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>0.19</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>I4</th>\n",
|
||||
" <td>Safari</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>0.02</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>0.02</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>I5</th>\n",
|
||||
@@ -279,9 +279,9 @@
|
||||
" <td>1.00</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>0.20</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>1.00</td>\n",
|
||||
" <td>0.33</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
@@ -290,16 +290,16 @@
|
||||
"text/plain": [
|
||||
" Application Email Phone User Name Person Name Postal Address All PII\n",
|
||||
"ID \n",
|
||||
"A1 WhatsApp - 0.96 0.50 0.68 - 0.79\n",
|
||||
"A2 Snapchat 1.00 1.00 0.33 1.00 - 0.79\n",
|
||||
"A1 WhatsApp - 0.91 0.00 0.45 - 0.21\n",
|
||||
"A2 Snapchat 1.00 1.00 0.74 0.33 - 0.64\n",
|
||||
"A3 Telegram - - - - - -\n",
|
||||
"A4 Google Maps 1.00 - 1.00 - - 1.00\n",
|
||||
"A5 Samsung Internet 1.00 - 0.00 - - 0.20\n",
|
||||
"I1 WhatsApp (iOS) - - - 1.00 1.00 1.00\n",
|
||||
"I2 Contacts 1.00 0.47 - 0.86 - 0.65\n",
|
||||
"I3 Apple Messages 1.00 0.00 0.00 1.00 - 0.33\n",
|
||||
"I4 Safari - - 0.02 - - 0.02\n",
|
||||
"I5 Calendar 1.00 - - - - 1.00"
|
||||
"A4 Google Maps 1.00 1.00 1.00 1.00 - 1.00\n",
|
||||
"A5 Samsung Internet 1.00 - - 0.00 - 0.50\n",
|
||||
"I1 WhatsApp (iOS) - 0.00 0.00 0.98 0.50 0.51\n",
|
||||
"I2 Contacts 1.00 0.71 - 0.00 0.00 0.36\n",
|
||||
"I3 Apple Messages 1.00 0.00 - - - 0.19\n",
|
||||
"I4 Safari - - - 0.00 - 0.00\n",
|
||||
"I5 Calendar 1.00 - - 0.20 - 0.33"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
@@ -342,16 +342,16 @@
|
||||
" & Application & Email & Phone & User Name & Person Name & Postal Address & All PII \\\\\n",
|
||||
"ID & & & & & & & \\\\\n",
|
||||
"\\midrule\n",
|
||||
"A1 & WhatsApp & - & 0.96 & 0.50 & 0.68 & - & 0.79 \\\\\n",
|
||||
"A2 & Snapchat & 1.00 & 1.00 & 0.33 & 1.00 & - & 0.79 \\\\\n",
|
||||
"A1 & WhatsApp & - & 0.91 & 0.00 & 0.45 & - & 0.21 \\\\\n",
|
||||
"A2 & Snapchat & 1.00 & 1.00 & 0.74 & 0.33 & - & 0.64 \\\\\n",
|
||||
"A3 & Telegram & - & - & - & - & - & - \\\\\n",
|
||||
"A4 & Google Maps & 1.00 & - & 1.00 & - & - & 1.00 \\\\\n",
|
||||
"A5 & Samsung Internet & 1.00 & - & 0.00 & - & - & 0.20 \\\\\n",
|
||||
"I1 & WhatsApp (iOS) & - & - & - & 1.00 & 1.00 & 1.00 \\\\\n",
|
||||
"I2 & Contacts & 1.00 & 0.47 & - & 0.86 & - & 0.65 \\\\\n",
|
||||
"I3 & Apple Messages & 1.00 & 0.00 & 0.00 & 1.00 & - & 0.33 \\\\\n",
|
||||
"I4 & Safari & - & - & 0.02 & - & - & 0.02 \\\\\n",
|
||||
"I5 & Calendar & 1.00 & - & - & - & - & 1.00 \\\\\n",
|
||||
"A4 & Google Maps & 1.00 & 1.00 & 1.00 & 1.00 & - & 1.00 \\\\\n",
|
||||
"A5 & Samsung Internet & 1.00 & - & - & 0.00 & - & 0.50 \\\\\n",
|
||||
"I1 & WhatsApp (iOS) & - & 0.00 & 0.00 & 0.98 & 0.50 & 0.51 \\\\\n",
|
||||
"I2 & Contacts & 1.00 & 0.71 & - & 0.00 & 0.00 & 0.36 \\\\\n",
|
||||
"I3 & Apple Messages & 1.00 & 0.00 & - & - & - & 0.19 \\\\\n",
|
||||
"I4 & Safari & - & - & - & 0.00 & - & 0.00 \\\\\n",
|
||||
"I5 & Calendar & 1.00 & - & - & 0.20 & - & 0.33 \\\\\n",
|
||||
"\\bottomrule\n",
|
||||
"\\end{tabular}\n",
|
||||
"\\end{table}\n",
|
||||
@@ -362,13 +362,23 @@
|
||||
"source": [
|
||||
"# Optional: Save to LaTeX\n",
|
||||
"latex_output = df.to_latex(index=True, caption='Per-application distinct recall.', label='tab:app_level_recall', na_rep='-')\n",
|
||||
"print(latex_output)"
|
||||
"\n",
|
||||
"# Add \\hline after each LaTeX row line.\n",
|
||||
"latex_lines = []\n",
|
||||
"for line in latex_output.splitlines():\n",
|
||||
" stripped = line.strip()\n",
|
||||
" if stripped.endswith(chr(92) * 2):\n",
|
||||
" line = f\"{line} \\\\hline\"\n",
|
||||
" latex_lines.append(line)\n",
|
||||
"\n",
|
||||
"latex_output = '\\n'.join(latex_lines)\n",
|
||||
"print(latex_output)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "bnl",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -387,4 +397,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "2c488f9e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -10,7 +10,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Wrote: I:\\project2026\\llmagent\\RQs\\RQ2\\app_total_columns.csv\n"
|
||||
"Wrote: I:\\project2026\\llmagent\\RQs\\RQ3\\app_total_columns.csv\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -174,7 +174,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "bnl",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "a30eef73",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -37,7 +37,7 @@
|
||||
" I4: records=5, total_cols=72, scanned_unique_cols=0, reduction=100.00%\n",
|
||||
" I5: records=5, total_cols=539, scanned_unique_cols=35, reduction=93.51%\n",
|
||||
"\n",
|
||||
"Wrote LaTeX: I:\\project2026\\llmagent\\RQs\\RQ2\\RQ2_search_space_reduction.tex\n"
|
||||
"Wrote LaTeX: I:\\project2026\\llmagent\\RQs\\RQ3\\RQ2_search_space_reduction.tex\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -54,7 +54,7 @@
|
||||
"# -----------------------------\n",
|
||||
"CSV_PATH = Path(r\"app_total_columns.csv\")\n",
|
||||
"JSONL_PATH = Path(r\"..\\normalized_PII_results\\GPT-5.1\\app_level\\app_level.jsonl\")\n",
|
||||
"OUT_TEX = Path(\"RQ2_search_space_reduction.tex\")\n",
|
||||
"OUT_TEX = Path(\"RQ3_search_space_reduction.tex\")\n",
|
||||
"\n",
|
||||
"# Locked app order + labels (table ID and display name)\n",
|
||||
"APP_NAME_PLAIN = OrderedDict([\n",
|
||||
@@ -401,7 +401,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "bnl",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -8,4 +8,4 @@ I1,WhatsApp,328
|
||||
I2,Contacts,219
|
||||
I3,Apple Messages,181
|
||||
I4,Safari,72
|
||||
I5,Calendar,539
|
||||
I5,Calendar,539
|
||||
|
Binary file not shown.
BIN
RQs/__pycache__/config.cpython-312.pyc
Normal file
BIN
RQs/__pycache__/config.cpython-312.pyc
Normal file
Binary file not shown.
BIN
RQs/usenixsecurity2026.pdf
Normal file
BIN
RQs/usenixsecurity2026.pdf
Normal file
Binary file not shown.
Reference in New Issue
Block a user