From 5f3192c3d1dab6d8e424b7427c6f86c84be31038 Mon Sep 17 00:00:00 2001 From: Frank Xu Date: Thu, 2 Oct 2025 09:45:16 -0400 Subject: [PATCH] fix cyfi445 lab 7 --- .../0_predict_breast_cancer_A_steps.ipynb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CYFI445/lectures/07_binary_classification_n_to_1/0_predict_breast_cancer_A_steps.ipynb b/CYFI445/lectures/07_binary_classification_n_to_1/0_predict_breast_cancer_A_steps.ipynb index 3f2ad0c..9a6c546 100644 --- a/CYFI445/lectures/07_binary_classification_n_to_1/0_predict_breast_cancer_A_steps.ipynb +++ b/CYFI445/lectures/07_binary_classification_n_to_1/0_predict_breast_cancer_A_steps.ipynb @@ -217,14 +217,20 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "3aeb88da", "metadata": {}, "outputs": [], "source": [ "# Scale features\n", "scaler = StandardScaler()\n", + "# fit() computes the mean and std of each feature using the training data only.\n", + "# transform() applies the standardization formula to scale the data.\n", + "# fit_transform() does both in one step.\n", "X_train = scaler.fit_transform(X_train)\n", + "# Very important: we do not fit again on the test set.\n", + "# Instead, we use the same mean and std from the training set (stored inside scaler) to transform the test data.\n", + "# This prevents data leakage — making sure the test set stays “unseen” during training.\n", "X_test = scaler.transform(X_test)\n", "\n", "# Convert to PyTorch tensors\n",