update RQs

This commit is contained in:
Frank Xu
2026-02-21 22:53:10 -05:00
parent efd042b0e0
commit a706fd95ae
16 changed files with 139 additions and 151 deletions

View File

@@ -164,7 +164,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "bnl",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View File

@@ -207,7 +207,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "bnl",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View File

@@ -168,7 +168,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "bnl",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View File

@@ -318,7 +318,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "bnl",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View File

@@ -1,31 +0,0 @@
\begin{table}[th]
\centering
\caption{Search space reduction during row-level PII extraction.}
\label{tab:search_space_reduction}
\small
\begin{tabular}{|l|l|p{1.3cm}|p{1.7cm}|p{1.0cm}|}
\hline
\textbf{ID} & \textbf{Apps} & \textbf{Candidate Cols (Total)} & \textbf{Cols Scanned (Extraction)} & \textbf{Reduc. (\%)} \\
\hline
A1 & WhatsApp & 1637 & 7 & 99.57\% \\
\hline
A2 & Snapchat & 848 & 107 & 87.38\% \\
\hline
A3 & Telegram & 1197 & 0 & 100.00\% \\
\hline
A4 & Google Maps & 80 & 2 & 97.50\% \\
\hline
A5 & Samsung Internet & 185 & 0 & 100.00\% \\
\hline
I1 & WhatsApp & 328 & 0 & 100.00\% \\
\hline
I2 & Contacts & 13 & 0 & 100.00\% \\
\hline
I3 & Apple Messages & 186 & 0 & 100.00\% \\
\hline
I4 & Safari & 74 & 0 & 100.00\% \\
\hline
I5 & Calendar & 541 & 0 & 100.00\% \\
\hline
\end{tabular}
\end{table}

View File

@@ -21,11 +21,11 @@
"\\hline\n", "\\hline\n",
"Phone Number & 1051 & 1184 & 734 & 69.8\\% & 62.0\\% \\\\\n", "Phone Number & 1051 & 1184 & 734 & 69.8\\% & 62.0\\% \\\\\n",
"\\hline\n", "\\hline\n",
"User Name & 3446 & 1234 & 1232 & 35.8\\% & 99.8\\% \\\\\n", "User Name & 2269 & 1234 & 1232 & 54.3\\% & 99.8\\% \\\\\n",
"\\hline\n", "\\hline\n",
"Person Name & 1134 & 2157 & 1087 & 95.9\\% & 50.4\\% \\\\\n", "Person Name & 1640 & 2157 & 1262 & 77.0\\% & 58.5\\% \\\\\n",
"\\hline\n", "\\hline\n",
"Postal Address & 21 & 2 & 2 & 9.5\\% & 100.0\\% \\\\\n", "Postal Address & 11 & 2 & 2 & 18.2\\% & 100.0\\% \\\\\n",
"\\hline\n" "\\hline\n"
] ]
} }
@@ -106,7 +106,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "bnl",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View File

@@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 2,
"id": "1affac71", "id": "1affac71",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -14,11 +14,11 @@
"\n", "\n",
"PII Type | GT DBs | System DBs | Overlap | Coverage\n", "PII Type | GT DBs | System DBs | Overlap | Coverage\n",
"---------------+--------+------------+---------+---------\n", "---------------+--------+------------+---------+---------\n",
"Email Address | 0 | 6 | 0 | 0.0% \n", "Email Address | 6 | 7 | 6 | 100.0% \n",
"Phone Number | 0 | 6 | 0 | 0.0% \n", "Phone Number | 9 | 7 | 6 | 66.7% \n",
"User Name | 0 | 10 | 0 | 0.0% \n", "User Name | 6 | 4 | 4 | 66.7% \n",
"Person Name | 0 | 7 | 0 | 0.0% \n", "Person Name | 12 | 11 | 9 | 75.0% \n",
"Postal Address | 0 | 1 | 0 | 0.0% \n", "Postal Address | 2 | 1 | 1 | 50.0% \n",
"\n", "\n",
"LATEX TABULAR\n", "LATEX TABULAR\n",
"\n", "\n",
@@ -26,15 +26,15 @@
"\\hline\n", "\\hline\n",
"\\textbf{PII Type} &\\textbf{DBs with PII (GT)} &\\textbf{DBs with discoveries (System)} &\\textbf{Overlap} &\\textbf{Coverage} \\\\\n", "\\textbf{PII Type} &\\textbf{DBs with PII (GT)} &\\textbf{DBs with discoveries (System)} &\\textbf{Overlap} &\\textbf{Coverage} \\\\\n",
"\\hline\n", "\\hline\n",
"Email Address & 0 & 6 & 0 & 0.0\\% \\\\\n", "Email Address & 6 & 7 & 6 & 100.0\\% \\\\\n",
"\\hline\n", "\\hline\n",
"Phone Number & 0 & 6 & 0 & 0.0\\% \\\\\n", "Phone Number & 9 & 7 & 6 & 66.7\\% \\\\\n",
"\\hline\n", "\\hline\n",
"User Name & 0 & 10 & 0 & 0.0\\% \\\\\n", "User Name & 6 & 4 & 4 & 66.7\\% \\\\\n",
"\\hline\n", "\\hline\n",
"Person Name & 0 & 7 & 0 & 0.0\\% \\\\\n", "Person Name & 12 & 11 & 9 & 75.0\\% \\\\\n",
"\\hline\n", "\\hline\n",
"Postal Address & 0 & 1 & 0 & 0.0\\% \\\\\n", "Postal Address & 2 & 1 & 1 & 50.0\\% \\\\\n",
"\\hline\n", "\\hline\n",
"\\end{tabular}\n" "\\end{tabular}\n"
] ]
@@ -196,7 +196,7 @@
"def main() -> None:\n", "def main() -> None:\n",
" # Define these inside main so importing this module has no side effects.\n", " # Define these inside main so importing this module has no side effects.\n",
" SYSTEM_DIR = Path(r\"..\\normalized_PII_results\\GPT-5.1\\db_level\")\n", " SYSTEM_DIR = Path(r\"..\\normalized_PII_results\\GPT-5.1\\db_level\")\n",
" GT_DIR = Path(r\"..\\normalized_PII_results\\GPT-5.1\\app_level\")\n", " GT_DIR = Path(r\"..\\normalized_PII_results\\ground_truth\\db_level\")\n",
" \n", " \n",
" gt_sets = collect_db_sets(GT_DIR, [k for k, _ in PII_TYPES])\n", " gt_sets = collect_db_sets(GT_DIR, [k for k, _ in PII_TYPES])\n",
" sys_sets = collect_db_sets(SYSTEM_DIR, [k for k, _ in PII_TYPES])\n", " sys_sets = collect_db_sets(SYSTEM_DIR, [k for k, _ in PII_TYPES])\n",
@@ -216,7 +216,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "bnl",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View File

@@ -29,10 +29,17 @@
"source": [ "source": [
"import sys\n", "import sys\n",
"import os\n", "import os\n",
"# Add the parent directory (RQs) to the path to find the config file\n", "from pathlib import Path\n",
"if '..' not in sys.path:\n", "\n",
" sys.path.insert(1, os.path.abspath('..'))\n", "# Resolve paths relative to this notebook so execution is independent of kernel cwd.\n",
"import config" "NOTEBOOK_DIR = Path.cwd()\n",
"if not (NOTEBOOK_DIR / 'RQ2_t8.ipynb').exists():\n",
" NOTEBOOK_DIR = Path(r'i:/project2026/llmagent/RQs/RQ2')\n",
"RQS_DIR = NOTEBOOK_DIR.parent\n",
"\n",
"if str(RQS_DIR) not in sys.path:\n",
" sys.path.insert(1, str(RQS_DIR))\n",
"import config\n"
] ]
}, },
{ {
@@ -44,8 +51,10 @@
"def parse_filename(filepath):\n", "def parse_filename(filepath):\n",
" \"\"\"Parses a filename to extract the app ID and database name.\"\"\"\n", " \"\"\"Parses a filename to extract the app ID and database name.\"\"\"\n",
" base_name = os.path.basename(filepath)\n", " base_name = os.path.basename(filepath)\n",
" # Format: PII_{APP_ID}_{DB_NAME}_{TIMESTAMP}.jsonl\n", " # Support both formats:\n",
" match = re.match(r'PII_([A-Z0-9]+)_(.*)_\\d{8}T\\d{6}Z\\.jsonl', base_name)\n", " # 1) PII_{APP_ID}_{DB_NAME}_{TIMESTAMP}.jsonl\n",
" # 2) PII_{APP_ID}_{DB_NAME}.jsonl\n",
" match = re.match(r'PII_([A-Z0-9]+)_(.*?)(?:_\\d{8}T\\d{6}Z)?\\.jsonl$', base_name)\n",
" if match:\n", " if match:\n",
" app_id = match.group(1)\n", " app_id = match.group(1)\n",
" db_name = match.group(2)\n", " db_name = match.group(2)\n",
@@ -69,8 +78,8 @@
" data[app_id][db_name][pii_type] = True\n", " data[app_id][db_name][pii_type] = True\n",
" return data\n", " return data\n",
"\n", "\n",
"gt_data = load_data(os.path.join('..', config.GROUND_TRUTH_DIR))\n", "gt_data = load_data(str(RQS_DIR / config.GROUND_TRUTH_DIR))\n",
"system_data = load_data(os.path.join('..', config.GPT4O_RESULTS_DIR))" "system_data = load_data(str(RQS_DIR / config.GPT4O_RESULTS_DIR))\n"
] ]
}, },
{ {
@@ -178,8 +187,8 @@
" <th>A1</th>\n", " <th>A1</th>\n",
" <td>WhatsApp</td>\n", " <td>WhatsApp</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>2/2</td>\n", " <td>1/2</td>\n",
" <td>1/1</td>\n", " <td>1/2</td>\n",
" <td>2/2</td>\n", " <td>2/2</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>2/2</td>\n", " <td>2/2</td>\n",
@@ -189,8 +198,8 @@
" <td>Snapchat</td>\n", " <td>Snapchat</td>\n",
" <td>1/1</td>\n", " <td>1/1</td>\n",
" <td>2/2</td>\n", " <td>2/2</td>\n",
" <td>1/1</td>\n",
" <td>2/2</td>\n", " <td>2/2</td>\n",
" <td>1/2</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>2/2</td>\n", " <td>2/2</td>\n",
" </tr>\n", " </tr>\n",
@@ -208,9 +217,9 @@
" <th>A4</th>\n", " <th>A4</th>\n",
" <td>Google Maps</td>\n", " <td>Google Maps</td>\n",
" <td>1/1</td>\n", " <td>1/1</td>\n",
" <td>-</td>\n",
" <td>1/1</td>\n", " <td>1/1</td>\n",
" <td>-</td>\n", " <td>1/1</td>\n",
" <td>1/1</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>1/1</td>\n", " <td>1/1</td>\n",
" </tr>\n", " </tr>\n",
@@ -219,20 +228,20 @@
" <td>Samsung Internet</td>\n", " <td>Samsung Internet</td>\n",
" <td>1/1</td>\n", " <td>1/1</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>1/1</td>\n",
" <td>-</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>1/1</td>\n", " <td>1/1</td>\n",
" <td>-</td>\n",
" <td>2/2</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>I1</th>\n", " <th>I1</th>\n",
" <td>WhatsApp (iOS)</td>\n", " <td>WhatsApp (iOS)</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>-</td>\n", " <td>1/2</td>\n",
" <td>-</td>\n", " <td>0/1</td>\n",
" <td>1/1</td>\n", " <td>2/2</td>\n",
" <td>1/1</td>\n",
" <td>1/1</td>\n", " <td>1/1</td>\n",
" <td>2/2</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>I2</th>\n", " <th>I2</th>\n",
@@ -241,7 +250,7 @@
" <td>1/1</td>\n", " <td>1/1</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>1/1</td>\n", " <td>1/1</td>\n",
" <td>-</td>\n", " <td>0/1</td>\n",
" <td>1/1</td>\n", " <td>1/1</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@@ -249,8 +258,8 @@
" <td>Apple Messages</td>\n", " <td>Apple Messages</td>\n",
" <td>1/1</td>\n", " <td>1/1</td>\n",
" <td>0/1</td>\n", " <td>0/1</td>\n",
" <td>0/1</td>\n", " <td>-</td>\n",
" <td>1/1</td>\n", " <td>-</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>1/1</td>\n", " <td>1/1</td>\n",
" </tr>\n", " </tr>\n",
@@ -259,10 +268,10 @@
" <td>Safari</td>\n", " <td>Safari</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>1/1</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>0/2</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>1/1</td>\n", " <td>0/2</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>I5</th>\n", " <th>I5</th>\n",
@@ -270,7 +279,7 @@
" <td>1/1</td>\n", " <td>1/1</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>-</td>\n", " <td>1/1</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>1/1</td>\n", " <td>1/1</td>\n",
" </tr>\n", " </tr>\n",
@@ -281,16 +290,16 @@
"text/plain": [ "text/plain": [
" Application Email Phone User Name Person Name Postal Address All PII\n", " Application Email Phone User Name Person Name Postal Address All PII\n",
"ID \n", "ID \n",
"A1 WhatsApp - 2/2 1/1 2/2 - 2/2\n", "A1 WhatsApp - 1/2 1/2 2/2 - 2/2\n",
"A2 Snapchat 1/1 2/2 1/1 2/2 - 2/2\n", "A2 Snapchat 1/1 2/2 2/2 1/2 - 2/2\n",
"A3 Telegram - - - - - -\n", "A3 Telegram - - - - - -\n",
"A4 Google Maps 1/1 - 1/1 - - 1/1\n", "A4 Google Maps 1/1 1/1 1/1 1/1 - 1/1\n",
"A5 Samsung Internet 1/1 - 1/1 - - 1/1\n", "A5 Samsung Internet 1/1 - - 1/1 - 2/2\n",
"I1 WhatsApp (iOS) - - - 1/1 1/1 1/1\n", "I1 WhatsApp (iOS) - 1/2 0/1 2/2 1/1 2/2\n",
"I2 Contacts 1/1 1/1 - 1/1 - 1/1\n", "I2 Contacts 1/1 1/1 - 1/1 0/1 1/1\n",
"I3 Apple Messages 1/1 0/1 0/1 1/1 - 1/1\n", "I3 Apple Messages 1/1 0/1 - - - 1/1\n",
"I4 Safari - - 1/1 - - 1/1\n", "I4 Safari - - - 0/2 - 0/2\n",
"I5 Calendar 1/1 - - - - 1/1" "I5 Calendar 1/1 - - 1/1 - 1/1"
] ]
}, },
"execution_count": 5, "execution_count": 5,
@@ -328,16 +337,16 @@
" & Application & Email & Phone & User Name & Person Name & Postal Address & All PII \\\\\n", " & Application & Email & Phone & User Name & Person Name & Postal Address & All PII \\\\\n",
"ID & & & & & & & \\\\\n", "ID & & & & & & & \\\\\n",
"\\midrule\n", "\\midrule\n",
"A1 & WhatsApp & - & 2/2 & 1/1 & 2/2 & - & 2/2 \\\\\n", "A1 & WhatsApp & - & 1/2 & 1/2 & 2/2 & - & 2/2 \\\\\n",
"A2 & Snapchat & 1/1 & 2/2 & 1/1 & 2/2 & - & 2/2 \\\\\n", "A2 & Snapchat & 1/1 & 2/2 & 2/2 & 1/2 & - & 2/2 \\\\\n",
"A3 & Telegram & - & - & - & - & - & - \\\\\n", "A3 & Telegram & - & - & - & - & - & - \\\\\n",
"A4 & Google Maps & 1/1 & - & 1/1 & - & - & 1/1 \\\\\n", "A4 & Google Maps & 1/1 & 1/1 & 1/1 & 1/1 & - & 1/1 \\\\\n",
"A5 & Samsung Internet & 1/1 & - & 1/1 & - & - & 1/1 \\\\\n", "A5 & Samsung Internet & 1/1 & - & - & 1/1 & - & 2/2 \\\\\n",
"I1 & WhatsApp (iOS) & - & - & - & 1/1 & 1/1 & 1/1 \\\\\n", "I1 & WhatsApp (iOS) & - & 1/2 & 0/1 & 2/2 & 1/1 & 2/2 \\\\\n",
"I2 & Contacts & 1/1 & 1/1 & - & 1/1 & - & 1/1 \\\\\n", "I2 & Contacts & 1/1 & 1/1 & - & 1/1 & 0/1 & 1/1 \\\\\n",
"I3 & Apple Messages & 1/1 & 0/1 & 0/1 & 1/1 & - & 1/1 \\\\\n", "I3 & Apple Messages & 1/1 & 0/1 & - & - & - & 1/1 \\\\\n",
"I4 & Safari & - & - & 1/1 & - & - & 1/1 \\\\\n", "I4 & Safari & - & - & - & 0/2 & - & 0/2 \\\\\n",
"I5 & Calendar & 1/1 & - & - & - & - & 1/1 \\\\\n", "I5 & Calendar & 1/1 & - & - & 1/1 & - & 1/1 \\\\\n",
"\\bottomrule\n", "\\bottomrule\n",
"\\end{tabular}\n", "\\end{tabular}\n",
"\\end{table}\n", "\\end{table}\n",
@@ -354,7 +363,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "bnl",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View File

@@ -187,21 +187,21 @@
" <th>A1</th>\n", " <th>A1</th>\n",
" <td>WhatsApp</td>\n", " <td>WhatsApp</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>0.96</td>\n", " <td>0.91</td>\n",
" <td>0.50</td>\n", " <td>0.00</td>\n",
" <td>0.68</td>\n", " <td>0.45</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>0.79</td>\n", " <td>0.21</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>A2</th>\n", " <th>A2</th>\n",
" <td>Snapchat</td>\n", " <td>Snapchat</td>\n",
" <td>1.00</td>\n", " <td>1.00</td>\n",
" <td>1.00</td>\n", " <td>1.00</td>\n",
" <td>0.74</td>\n",
" <td>0.33</td>\n", " <td>0.33</td>\n",
" <td>1.00</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>0.79</td>\n", " <td>0.64</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>A3</th>\n", " <th>A3</th>\n",
@@ -217,9 +217,9 @@
" <th>A4</th>\n", " <th>A4</th>\n",
" <td>Google Maps</td>\n", " <td>Google Maps</td>\n",
" <td>1.00</td>\n", " <td>1.00</td>\n",
" <td>-</td>\n",
" <td>1.00</td>\n", " <td>1.00</td>\n",
" <td>-</td>\n", " <td>1.00</td>\n",
" <td>1.00</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>1.00</td>\n", " <td>1.00</td>\n",
" </tr>\n", " </tr>\n",
@@ -228,50 +228,50 @@
" <td>Samsung Internet</td>\n", " <td>Samsung Internet</td>\n",
" <td>1.00</td>\n", " <td>1.00</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>-</td>\n",
" <td>0.00</td>\n", " <td>0.00</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>-</td>\n", " <td>0.50</td>\n",
" <td>0.20</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>I1</th>\n", " <th>I1</th>\n",
" <td>WhatsApp (iOS)</td>\n", " <td>WhatsApp (iOS)</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>-</td>\n", " <td>0.00</td>\n",
" <td>-</td>\n", " <td>0.00</td>\n",
" <td>1.00</td>\n", " <td>0.98</td>\n",
" <td>1.00</td>\n", " <td>0.50</td>\n",
" <td>1.00</td>\n", " <td>0.51</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>I2</th>\n", " <th>I2</th>\n",
" <td>Contacts</td>\n", " <td>Contacts</td>\n",
" <td>1.00</td>\n", " <td>1.00</td>\n",
" <td>0.47</td>\n", " <td>0.71</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>0.86</td>\n", " <td>0.00</td>\n",
" <td>-</td>\n", " <td>0.00</td>\n",
" <td>0.65</td>\n", " <td>0.36</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>I3</th>\n", " <th>I3</th>\n",
" <td>Apple Messages</td>\n", " <td>Apple Messages</td>\n",
" <td>1.00</td>\n", " <td>1.00</td>\n",
" <td>0.00</td>\n", " <td>0.00</td>\n",
" <td>0.00</td>\n",
" <td>1.00</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>0.33</td>\n", " <td>-</td>\n",
" <td>-</td>\n",
" <td>0.19</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>I4</th>\n", " <th>I4</th>\n",
" <td>Safari</td>\n", " <td>Safari</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>0.02</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>0.00</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>0.02</td>\n", " <td>0.00</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>I5</th>\n", " <th>I5</th>\n",
@@ -279,9 +279,9 @@
" <td>1.00</td>\n", " <td>1.00</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>0.20</td>\n",
" <td>-</td>\n", " <td>-</td>\n",
" <td>-</td>\n", " <td>0.33</td>\n",
" <td>1.00</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
@@ -290,16 +290,16 @@
"text/plain": [ "text/plain": [
" Application Email Phone User Name Person Name Postal Address All PII\n", " Application Email Phone User Name Person Name Postal Address All PII\n",
"ID \n", "ID \n",
"A1 WhatsApp - 0.96 0.50 0.68 - 0.79\n", "A1 WhatsApp - 0.91 0.00 0.45 - 0.21\n",
"A2 Snapchat 1.00 1.00 0.33 1.00 - 0.79\n", "A2 Snapchat 1.00 1.00 0.74 0.33 - 0.64\n",
"A3 Telegram - - - - - -\n", "A3 Telegram - - - - - -\n",
"A4 Google Maps 1.00 - 1.00 - - 1.00\n", "A4 Google Maps 1.00 1.00 1.00 1.00 - 1.00\n",
"A5 Samsung Internet 1.00 - 0.00 - - 0.20\n", "A5 Samsung Internet 1.00 - - 0.00 - 0.50\n",
"I1 WhatsApp (iOS) - - - 1.00 1.00 1.00\n", "I1 WhatsApp (iOS) - 0.00 0.00 0.98 0.50 0.51\n",
"I2 Contacts 1.00 0.47 - 0.86 - 0.65\n", "I2 Contacts 1.00 0.71 - 0.00 0.00 0.36\n",
"I3 Apple Messages 1.00 0.00 0.00 1.00 - 0.33\n", "I3 Apple Messages 1.00 0.00 - - - 0.19\n",
"I4 Safari - - 0.02 - - 0.02\n", "I4 Safari - - - 0.00 - 0.00\n",
"I5 Calendar 1.00 - - - - 1.00" "I5 Calendar 1.00 - - 0.20 - 0.33"
] ]
}, },
"execution_count": 5, "execution_count": 5,
@@ -342,16 +342,16 @@
" & Application & Email & Phone & User Name & Person Name & Postal Address & All PII \\\\\n", " & Application & Email & Phone & User Name & Person Name & Postal Address & All PII \\\\\n",
"ID & & & & & & & \\\\\n", "ID & & & & & & & \\\\\n",
"\\midrule\n", "\\midrule\n",
"A1 & WhatsApp & - & 0.96 & 0.50 & 0.68 & - & 0.79 \\\\\n", "A1 & WhatsApp & - & 0.91 & 0.00 & 0.45 & - & 0.21 \\\\\n",
"A2 & Snapchat & 1.00 & 1.00 & 0.33 & 1.00 & - & 0.79 \\\\\n", "A2 & Snapchat & 1.00 & 1.00 & 0.74 & 0.33 & - & 0.64 \\\\\n",
"A3 & Telegram & - & - & - & - & - & - \\\\\n", "A3 & Telegram & - & - & - & - & - & - \\\\\n",
"A4 & Google Maps & 1.00 & - & 1.00 & - & - & 1.00 \\\\\n", "A4 & Google Maps & 1.00 & 1.00 & 1.00 & 1.00 & - & 1.00 \\\\\n",
"A5 & Samsung Internet & 1.00 & - & 0.00 & - & - & 0.20 \\\\\n", "A5 & Samsung Internet & 1.00 & - & - & 0.00 & - & 0.50 \\\\\n",
"I1 & WhatsApp (iOS) & - & - & - & 1.00 & 1.00 & 1.00 \\\\\n", "I1 & WhatsApp (iOS) & - & 0.00 & 0.00 & 0.98 & 0.50 & 0.51 \\\\\n",
"I2 & Contacts & 1.00 & 0.47 & - & 0.86 & - & 0.65 \\\\\n", "I2 & Contacts & 1.00 & 0.71 & - & 0.00 & 0.00 & 0.36 \\\\\n",
"I3 & Apple Messages & 1.00 & 0.00 & 0.00 & 1.00 & - & 0.33 \\\\\n", "I3 & Apple Messages & 1.00 & 0.00 & - & - & - & 0.19 \\\\\n",
"I4 & Safari & - & - & 0.02 & - & - & 0.02 \\\\\n", "I4 & Safari & - & - & - & 0.00 & - & 0.00 \\\\\n",
"I5 & Calendar & 1.00 & - & - & - & - & 1.00 \\\\\n", "I5 & Calendar & 1.00 & - & - & 0.20 & - & 0.33 \\\\\n",
"\\bottomrule\n", "\\bottomrule\n",
"\\end{tabular}\n", "\\end{tabular}\n",
"\\end{table}\n", "\\end{table}\n",
@@ -362,13 +362,23 @@
"source": [ "source": [
"# Optional: Save to LaTeX\n", "# Optional: Save to LaTeX\n",
"latex_output = df.to_latex(index=True, caption='Per-application distinct recall.', label='tab:app_level_recall', na_rep='-')\n", "latex_output = df.to_latex(index=True, caption='Per-application distinct recall.', label='tab:app_level_recall', na_rep='-')\n",
"print(latex_output)" "\n",
"# Add \\hline after each LaTeX row line.\n",
"latex_lines = []\n",
"for line in latex_output.splitlines():\n",
" stripped = line.strip()\n",
" if stripped.endswith(chr(92) * 2):\n",
" line = f\"{line} \\\\hline\"\n",
" latex_lines.append(line)\n",
"\n",
"latex_output = '\\n'.join(latex_lines)\n",
"print(latex_output)\n"
] ]
} }
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "bnl",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -387,4 +397,4 @@
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 4 "nbformat_minor": 4
} }

View File

@@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 2,
"id": "2c488f9e", "id": "2c488f9e",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -10,7 +10,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Wrote: I:\\project2026\\llmagent\\RQs\\RQ2\\app_total_columns.csv\n" "Wrote: I:\\project2026\\llmagent\\RQs\\RQ3\\app_total_columns.csv\n"
] ]
} }
], ],
@@ -174,7 +174,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "bnl",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View File

@@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": null,
"id": "a30eef73", "id": "a30eef73",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -37,7 +37,7 @@
" I4: records=5, total_cols=72, scanned_unique_cols=0, reduction=100.00%\n", " I4: records=5, total_cols=72, scanned_unique_cols=0, reduction=100.00%\n",
" I5: records=5, total_cols=539, scanned_unique_cols=35, reduction=93.51%\n", " I5: records=5, total_cols=539, scanned_unique_cols=35, reduction=93.51%\n",
"\n", "\n",
"Wrote LaTeX: I:\\project2026\\llmagent\\RQs\\RQ2\\RQ2_search_space_reduction.tex\n" "Wrote LaTeX: I:\\project2026\\llmagent\\RQs\\RQ3\\RQ2_search_space_reduction.tex\n"
] ]
} }
], ],
@@ -54,7 +54,7 @@
"# -----------------------------\n", "# -----------------------------\n",
"CSV_PATH = Path(r\"app_total_columns.csv\")\n", "CSV_PATH = Path(r\"app_total_columns.csv\")\n",
"JSONL_PATH = Path(r\"..\\normalized_PII_results\\GPT-5.1\\app_level\\app_level.jsonl\")\n", "JSONL_PATH = Path(r\"..\\normalized_PII_results\\GPT-5.1\\app_level\\app_level.jsonl\")\n",
"OUT_TEX = Path(\"RQ2_search_space_reduction.tex\")\n", "OUT_TEX = Path(\"RQ3_search_space_reduction.tex\")\n",
"\n", "\n",
"# Locked app order + labels (table ID and display name)\n", "# Locked app order + labels (table ID and display name)\n",
"APP_NAME_PLAIN = OrderedDict([\n", "APP_NAME_PLAIN = OrderedDict([\n",
@@ -401,7 +401,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "bnl",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View File

@@ -8,4 +8,4 @@ I1,WhatsApp,328
I2,Contacts,219 I2,Contacts,219
I3,Apple Messages,181 I3,Apple Messages,181
I4,Safari,72 I4,Safari,72
I5,Calendar,539 I5,Calendar,539
1 app_code app_name total_columns
8 I2 Contacts 219
9 I3 Apple Messages 181
10 I4 Safari 72
11 I5 Calendar 539

Binary file not shown.

BIN
RQs/usenixsecurity2026.pdf Normal file

Binary file not shown.