{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "joint-electric", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": 2, "id": "quantitative-beverage", "metadata": {}, "outputs": [], "source": [ "train = pd.read_csv(\"train.csv\")\n", "test = pd.read_csv(\"test.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "twelve-insulin", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ID_codetargetvar_0var_1var_2var_3var_4var_5var_6var_7...var_190var_191var_192var_193var_194var_195var_196var_197var_198var_199
0train_008.9255-6.786311.90815.093011.4607-9.28345.118718.6266...4.43543.96423.13641.691018.5227-2.39787.87848.563512.7803-1.0914
1train_1011.5006-4.147313.85885.389012.36227.04335.620816.5338...7.64217.72142.583710.951615.43052.03398.12678.788918.35601.9518
2train_208.6093-2.745712.08057.892810.5825-9.08376.942714.6155...2.90579.79051.67041.685821.60423.1417-6.52138.267514.72220.3965
3train_3011.0604-2.15188.95227.195712.5846-1.83615.842814.9250...4.46664.74330.71781.421423.0347-1.2706-2.927510.292217.9697-8.9996
4train_409.8369-1.483412.87466.637512.27722.44865.940519.2514...-1.49059.5214-0.15089.194213.2876-1.51213.92679.503117.9974-8.8104
\n", "

5 rows × 202 columns

\n", "
" ], "text/plain": [ " ID_code target var_0 var_1 var_2 var_3 var_4 var_5 var_6 \\\n", "0 train_0 0 8.9255 -6.7863 11.9081 5.0930 11.4607 -9.2834 5.1187 \n", "1 train_1 0 11.5006 -4.1473 13.8588 5.3890 12.3622 7.0433 5.6208 \n", "2 train_2 0 8.6093 -2.7457 12.0805 7.8928 10.5825 -9.0837 6.9427 \n", "3 train_3 0 11.0604 -2.1518 8.9522 7.1957 12.5846 -1.8361 5.8428 \n", "4 train_4 0 9.8369 -1.4834 12.8746 6.6375 12.2772 2.4486 5.9405 \n", "\n", " var_7 ... var_190 var_191 var_192 var_193 var_194 var_195 \\\n", "0 18.6266 ... 4.4354 3.9642 3.1364 1.6910 18.5227 -2.3978 \n", "1 16.5338 ... 7.6421 7.7214 2.5837 10.9516 15.4305 2.0339 \n", "2 14.6155 ... 2.9057 9.7905 1.6704 1.6858 21.6042 3.1417 \n", "3 14.9250 ... 4.4666 4.7433 0.7178 1.4214 23.0347 -1.2706 \n", "4 19.2514 ... -1.4905 9.5214 -0.1508 9.1942 13.2876 -1.5121 \n", "\n", " var_196 var_197 var_198 var_199 \n", "0 7.8784 8.5635 12.7803 -1.0914 \n", "1 8.1267 8.7889 18.3560 1.9518 \n", "2 -6.5213 8.2675 14.7222 0.3965 \n", "3 -2.9275 10.2922 17.9697 -8.9996 \n", "4 3.9267 9.5031 17.9974 -8.8104 \n", "\n", "[5 rows x 202 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.head(5)" ] }, { "cell_type": "code", "execution_count": 4, "id": "appreciated-affairs", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [00:03<00:00, 59.75it/s]\n" ] } ], "source": [ "col_names = [f\"var_{i}\" for i in range(200)]\n", "for col in tqdm(col_names):\n", " count = test[col].value_counts()\n", " uniques = count.index[count == 1]\n", " test[col + \"_u\"] = test[col].isin(uniques)\n", "\n", "test[\"has_unique\"] = test[[col + \"_u\" for col in col_names]].any(axis=1)" ] }, { "cell_type": "code", "execution_count": 5, "id": "mighty-basics", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "100000" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test[\"has_unique\"].sum()" ] }, { "cell_type": "code", "execution_count": 6, "id": "sustainable-palestinian", "metadata": {}, "outputs": [], "source": [ "real_test = test.loc[test[\"has_unique\"], [\"ID_code\"] + col_names]\n", "fake_test = test.loc[~test[\"has_unique\"], [\"ID_code\"] + col_names]\n", "train_and_test = pd.concat([train, real_test], axis=0)" ] }, { "cell_type": "code", "execution_count": 7, "id": "military-tiger", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [00:43<00:00, 4.64it/s]\n" ] } ], "source": [ "for col in tqdm(col_names):\n", " count = train_and_test[col].value_counts().to_dict()\n", " train_and_test[col+\"_unique\"] = train_and_test[col].apply(\n", " lambda x: 1 if count[x] == 1 else 0).values\n", " fake_test[col+\"_unique\"] = 0 " ] }, { "cell_type": "code", "execution_count": 8, "id": "extraordinary-phrase", "metadata": {}, "outputs": [], "source": [ "real_test = train_and_test[train_and_test[\"ID_code\"].str.contains(\"test\")].copy()\n", "real_test.drop([\"target\"], axis=1, inplace=True)\n", "train = train_and_test[train_and_test[\"ID_code\"].str.contains(\"train\")].copy()" ] }, { "cell_type": "code", "execution_count": 9, "id": "quantitative-iraqi", "metadata": {}, "outputs": [], "source": [ "test = pd.concat([real_test, fake_test], axis=0)" ] }, { "cell_type": "code", "execution_count": 10, "id": "instant-kitty", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ID_codetargetvar_0var_1var_2var_3var_4var_5var_6var_7...var_190_uniquevar_191_uniquevar_192_uniquevar_193_uniquevar_194_uniquevar_195_uniquevar_196_uniquevar_197_uniquevar_198_uniquevar_199_unique
0train_00.08.9255-6.786311.90815.093011.4607-9.28345.118718.6266...0000000000
1train_10.011.5006-4.147313.85885.389012.36227.04335.620816.5338...0000000000
2train_20.08.6093-2.745712.08057.892810.5825-9.08376.942714.6155...0000000000
3train_30.011.0604-2.15188.95227.195712.5846-1.83615.842814.9250...0000000000
4train_40.09.8369-1.483412.87466.637512.27722.44865.940519.2514...0011100000
..................................................................
199995train_1999950.011.4880-0.49568.26223.514210.340411.60815.670915.1516...0100001000
199996train_1999960.04.9149-2.448416.70526.63458.3096-10.56285.880221.5940...0001000000
199997train_1999970.011.2232-5.051810.51275.64569.3410-5.40864.555521.5571...0000000001
199998train_1999980.09.7148-8.609813.61045.793012.51730.53396.047917.0152...0000000001
199999train_1999990.010.8762-5.710512.11838.032811.55770.34885.283915.2058...0000000001
\n", "

200000 rows × 402 columns

\n", "
" ], "text/plain": [ " ID_code target var_0 var_1 var_2 var_3 var_4 \\\n", "0 train_0 0.0 8.9255 -6.7863 11.9081 5.0930 11.4607 \n", "1 train_1 0.0 11.5006 -4.1473 13.8588 5.3890 12.3622 \n", "2 train_2 0.0 8.6093 -2.7457 12.0805 7.8928 10.5825 \n", "3 train_3 0.0 11.0604 -2.1518 8.9522 7.1957 12.5846 \n", "4 train_4 0.0 9.8369 -1.4834 12.8746 6.6375 12.2772 \n", "... ... ... ... ... ... ... ... \n", "199995 train_199995 0.0 11.4880 -0.4956 8.2622 3.5142 10.3404 \n", "199996 train_199996 0.0 4.9149 -2.4484 16.7052 6.6345 8.3096 \n", "199997 train_199997 0.0 11.2232 -5.0518 10.5127 5.6456 9.3410 \n", "199998 train_199998 0.0 9.7148 -8.6098 13.6104 5.7930 12.5173 \n", "199999 train_199999 0.0 10.8762 -5.7105 12.1183 8.0328 11.5577 \n", "\n", " var_5 var_6 var_7 ... var_190_unique var_191_unique \\\n", "0 -9.2834 5.1187 18.6266 ... 0 0 \n", "1 7.0433 5.6208 16.5338 ... 0 0 \n", "2 -9.0837 6.9427 14.6155 ... 0 0 \n", "3 -1.8361 5.8428 14.9250 ... 0 0 \n", "4 2.4486 5.9405 19.2514 ... 0 0 \n", "... ... ... ... ... ... ... \n", "199995 11.6081 5.6709 15.1516 ... 0 1 \n", "199996 -10.5628 5.8802 21.5940 ... 0 0 \n", "199997 -5.4086 4.5555 21.5571 ... 0 0 \n", "199998 0.5339 6.0479 17.0152 ... 0 0 \n", "199999 0.3488 5.2839 15.2058 ... 0 0 \n", "\n", " var_192_unique var_193_unique var_194_unique var_195_unique \\\n", "0 0 0 0 0 \n", "1 0 0 0 0 \n", "2 0 0 0 0 \n", "3 0 0 0 0 \n", "4 1 1 1 0 \n", "... ... ... ... ... \n", "199995 0 0 0 0 \n", "199996 0 1 0 0 \n", "199997 0 0 0 0 \n", "199998 0 0 0 0 \n", "199999 0 0 0 0 \n", "\n", " var_196_unique var_197_unique var_198_unique var_199_unique \n", "0 0 0 0 0 \n", "1 0 0 0 0 \n", "2 0 0 0 0 \n", "3 0 0 0 0 \n", "4 0 0 0 0 \n", "... ... ... ... ... \n", "199995 1 0 0 0 \n", "199996 0 0 0 0 \n", "199997 0 0 0 1 \n", "199998 0 0 0 1 \n", "199999 0 0 0 1 \n", "\n", "[200000 rows x 402 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train" ] }, { "cell_type": "code", "execution_count": 11, "id": "human-japanese", "metadata": {}, "outputs": [], "source": [ "train.to_csv(\"new_shiny_train.csv\", index=False)\n", "test.to_csv(\"new_shiny_test.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 12, "id": "outer-walter", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ID_codetargetvar_0var_1var_2var_3var_4var_5var_6var_7...var_190_uniquevar_191_uniquevar_192_uniquevar_193_uniquevar_194_uniquevar_195_uniquevar_196_uniquevar_197_uniquevar_198_uniquevar_199_unique
0train_00.08.9255-6.786311.90815.093011.4607-9.28345.118718.6266...0000000000
1train_10.011.5006-4.147313.85885.389012.36227.04335.620816.5338...0000000000
2train_20.08.6093-2.745712.08057.892810.5825-9.08376.942714.6155...0000000000
3train_30.011.0604-2.15188.95227.195712.5846-1.83615.842814.9250...0000000000
4train_40.09.8369-1.483412.87466.637512.27722.44865.940519.2514...0011100000
..................................................................
199995train_1999950.011.4880-0.49568.26223.514210.340411.60815.670915.1516...0100001000
199996train_1999960.04.9149-2.448416.70526.63458.3096-10.56285.880221.5940...0001000000
199997train_1999970.011.2232-5.051810.51275.64569.3410-5.40864.555521.5571...0000000001
199998train_1999980.09.7148-8.609813.61045.793012.51730.53396.047917.0152...0000000001
199999train_1999990.010.8762-5.710512.11838.032811.55770.34885.283915.2058...0000000001
\n", "

200000 rows × 402 columns

\n", "
" ], "text/plain": [ " ID_code target var_0 var_1 var_2 var_3 var_4 \\\n", "0 train_0 0.0 8.9255 -6.7863 11.9081 5.0930 11.4607 \n", "1 train_1 0.0 11.5006 -4.1473 13.8588 5.3890 12.3622 \n", "2 train_2 0.0 8.6093 -2.7457 12.0805 7.8928 10.5825 \n", "3 train_3 0.0 11.0604 -2.1518 8.9522 7.1957 12.5846 \n", "4 train_4 0.0 9.8369 -1.4834 12.8746 6.6375 12.2772 \n", "... ... ... ... ... ... ... ... \n", "199995 train_199995 0.0 11.4880 -0.4956 8.2622 3.5142 10.3404 \n", "199996 train_199996 0.0 4.9149 -2.4484 16.7052 6.6345 8.3096 \n", "199997 train_199997 0.0 11.2232 -5.0518 10.5127 5.6456 9.3410 \n", "199998 train_199998 0.0 9.7148 -8.6098 13.6104 5.7930 12.5173 \n", "199999 train_199999 0.0 10.8762 -5.7105 12.1183 8.0328 11.5577 \n", "\n", " var_5 var_6 var_7 ... var_190_unique var_191_unique \\\n", "0 -9.2834 5.1187 18.6266 ... 0 0 \n", "1 7.0433 5.6208 16.5338 ... 0 0 \n", "2 -9.0837 6.9427 14.6155 ... 0 0 \n", "3 -1.8361 5.8428 14.9250 ... 0 0 \n", "4 2.4486 5.9405 19.2514 ... 0 0 \n", "... ... ... ... ... ... ... \n", "199995 11.6081 5.6709 15.1516 ... 0 1 \n", "199996 -10.5628 5.8802 21.5940 ... 0 0 \n", "199997 -5.4086 4.5555 21.5571 ... 0 0 \n", "199998 0.5339 6.0479 17.0152 ... 0 0 \n", "199999 0.3488 5.2839 15.2058 ... 0 0 \n", "\n", " var_192_unique var_193_unique var_194_unique var_195_unique \\\n", "0 0 0 0 0 \n", "1 0 0 0 0 \n", "2 0 0 0 0 \n", "3 0 0 0 0 \n", "4 1 1 1 0 \n", "... ... ... ... ... \n", "199995 0 0 0 0 \n", "199996 0 1 0 0 \n", "199997 0 0 0 0 \n", "199998 0 0 0 0 \n", "199999 0 0 0 0 \n", "\n", " var_196_unique var_197_unique var_198_unique var_199_unique \n", "0 0 0 0 0 \n", "1 0 0 0 0 \n", "2 0 0 0 0 \n", "3 0 0 0 0 \n", "4 0 0 0 0 \n", "... ... ... ... ... \n", "199995 1 0 0 0 \n", "199996 0 0 0 0 \n", "199997 0 0 0 1 \n", "199998 0 0 0 1 \n", "199999 0 0 0 1 \n", "\n", "[200000 rows x 402 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train" ] }, { "cell_type": "code", "execution_count": 13, "id": "therapeutic-scratch", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ID_codevar_0var_1var_2var_3var_4var_5var_6var_7var_8...var_190_uniquevar_191_uniquevar_192_uniquevar_193_uniquevar_194_uniquevar_195_uniquevar_196_uniquevar_197_uniquevar_198_uniquevar_199_unique
3test_38.5374-1.322212.02206.57498.84583.17444.939720.56603.3755...0000000001
7test_717.3035-2.421213.39898.399811.07779.64495.959617.8477-4.8068...0000000010
11test_1110.6137-2.18988.90903.801413.8602-5.98025.551515.4716-0.1714...0000001001
15test_1514.8595-4.537813.64835.64809.91441.51905.035813.4524-2.5419...0010000001
16test_1614.1732-5.14909.75913.731610.3700-21.92027.713018.87490.4680...0000000000
\n", "

5 rows × 401 columns

\n", "
" ], "text/plain": [ " ID_code var_0 var_1 var_2 var_3 var_4 var_5 var_6 \\\n", "3 test_3 8.5374 -1.3222 12.0220 6.5749 8.8458 3.1744 4.9397 \n", "7 test_7 17.3035 -2.4212 13.3989 8.3998 11.0777 9.6449 5.9596 \n", "11 test_11 10.6137 -2.1898 8.9090 3.8014 13.8602 -5.9802 5.5515 \n", "15 test_15 14.8595 -4.5378 13.6483 5.6480 9.9144 1.5190 5.0358 \n", "16 test_16 14.1732 -5.1490 9.7591 3.7316 10.3700 -21.9202 7.7130 \n", "\n", " var_7 var_8 ... var_190_unique var_191_unique var_192_unique \\\n", "3 20.5660 3.3755 ... 0 0 0 \n", "7 17.8477 -4.8068 ... 0 0 0 \n", "11 15.4716 -0.1714 ... 0 0 0 \n", "15 13.4524 -2.5419 ... 0 0 1 \n", "16 18.8749 0.4680 ... 0 0 0 \n", "\n", " var_193_unique var_194_unique var_195_unique var_196_unique \\\n", "3 0 0 0 0 \n", "7 0 0 0 0 \n", "11 0 0 0 1 \n", "15 0 0 0 0 \n", "16 0 0 0 0 \n", "\n", " var_197_unique var_198_unique var_199_unique \n", "3 0 0 1 \n", "7 0 1 0 \n", "11 0 0 1 \n", "15 0 0 1 \n", "16 0 0 0 \n", "\n", "[5 rows x 401 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test.head(5)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 5 }