From efd37aac38c0885ad513c5a94761e6c4ddce559a Mon Sep 17 00:00:00 2001 From: arminwitte <110226001+arminwitte@users.noreply.github.com> Date: Sat, 11 Feb 2023 21:48:48 +0100 Subject: [PATCH] Add files via upload --- titanic.ipynb | 245 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 219 insertions(+), 26 deletions(-) diff --git a/titanic.ipynb b/titanic.ipynb index 6ff452e..c150720 100644 --- a/titanic.ipynb +++ b/titanic.ipynb @@ -55,13 +55,13 @@ "outputs": [], "source": [ "\n", - "rf_titanic = RandomForest(df_titanic.copy(),\"Survived\", cart_settings={\"max_depth\":2,\"min_leaf_samples\":10, \"min_split_samples\":10},sample_frac=0.5,n_attributes=6,metrics_type=\"classification\"\n", + "rf_titanic = RandomForest(df_titanic.copy(),\"Survived\", cart_settings={\"max_depth\":2,\"min_leaf_samples\":10, \"min_split_samples\":10},sample_frac=0.1,n_attributes=4,metrics_type=\"classification\"\n", " )" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "87df1e22-7a6a-4c77-b407-5a7aeb70e358", "metadata": {}, "outputs": [ @@ -69,10 +69,26 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:binarybeech.binarybeech:A tree with 8 leafs was created\n", - "INFO:binarybeech.binarybeech:A tree with 8 leafs was created\n", - "INFO:binarybeech.binarybeech:A tree with 8 leafs was created\n", - "INFO:binarybeech.binarybeech:A tree with 8 leafs was created\n" + "INFO:binarybeech.binarybeech:A tree with 5 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 2 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 4 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 4 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 4 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 3 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 5 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 4 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 3 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 4 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 3 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 4 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 5 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 4 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 1 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 5 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 2 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 2 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 3 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 2 leafs was created\n" ] } ], @@ -83,10 +99,82 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, + "id": "966e7fee-0943-4dc7-bd4e-736f60056d26", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Survived votes \\\n", + "708 1 [1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ... \n", + "758 0 [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, ... \n", + "377 0 [0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1] \n", + "732 0 [0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0] \n", + "658 0 [0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, ... \n", + ".. ... ... \n", + "810 0 [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ... \n", + "320 0 [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ... \n", + "745 0 [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, ... \n", + "730 1 [1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ... \n", + "652 0 [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ... \n", + "\n", + " majority_vote \n", + "708 1 \n", + "758 0 \n", + "377 0 \n", + "732 0 \n", + "658 0 \n", + ".. ... \n", + "810 0 \n", + "320 0 \n", + "745 0 \n", + "730 1 \n", + "652 0 \n", + "\n", + "[668 rows x 3 columns]\n" + ] + }, + { + "data": { + "text/plain": [ + "{'precision': array([0.94244604, 0.5936255 ]),\n", + " 'recall': array([0.79393939, 0.86127168]),\n", + " 'F-score': 0.7823361469712016,\n", + " 'accuracy': 0.811377245508982}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "rf_titanic.validate_oob()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, "id": "3cb61adc-0d85-420b-8b35-a037a4a45f49", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'precision': array([0.95443645, 0.59760956]),\n", + " 'recall': array([0.79759519, 0.88757396]),\n", + " 'F-score': 0.7916406737367436,\n", + " 'accuracy': 0.8203592814371258}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\n", "rf_titanic.validate()\n" @@ -94,10 +182,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "274c3cce-667e-4dcf-a08c-4ba4205e0969", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'precision': array([0.95264117, 0.58479532]),\n", + " 'recall': array([0.78646617, 0.88495575]),\n", + " 'F-score': 0.7829199248207531,\n", + " 'accuracy': 0.8114478114478114}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\n", "rf_titanic.validate(df_test)\n" @@ -105,7 +207,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "34bca363-d0c4-4cc1-8cce-e35223c4e2dc", "metadata": {}, "outputs": [], @@ -117,47 +219,96 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "69e10b97-63a3-427e-a6a8-a6fcbe75f23d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['PassengerId',\n", + " 'Pclass',\n", + " 'Name',\n", + " 'Sex',\n", + " 'Age',\n", + " 'SibSp',\n", + " 'Parch',\n", + " 'Ticket',\n", + " 'Fare',\n", + " 'Cabin',\n", + " 'Embarked']" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "gbt_titanic.X_names" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "3a3c384c-9750-4efb-8691-17146eb9b0e4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:binarybeech.binarybeech:A tree with 2 leafs was created\n" + ] + } + ], "source": [ "it=gbt_titanic._initial_tree()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "a798aebf-12bf-49df-87d7-da623d708b64", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.37574850299401197\n", + "\n" + ] + } + ], "source": [ "it.tree.show()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "c52850c8-e78a-43d6-863a-442fc59f485b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "it.tree.root" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "7edbef2d-a34b-41d1-9903-3a1fb0d56cbf", "metadata": {}, "outputs": [], @@ -169,10 +320,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "ab483568-cbe5-452a-8524-d1700c4f2efc", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.624251497005988" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\n", "gbt_titanic.predict(df_titanic.iloc[0])" @@ -180,10 +342,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "24a349c8-371f-4749-b0e5-e64362395322", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/jovyan/binarybeech/metrics.py:106: RuntimeWarning: invalid value encountered in true_divide\n", + " return np.diag(m) / np.sum(m, axis=0)\n" + ] + }, + { + "data": { + "text/plain": [ + "{'precision': array([0., 1.]),\n", + " 'recall': array([ nan, 0.38383838]),\n", + " 'F-score': nan,\n", + " 'accuracy': 0.3838383838383838}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\n", "gbt_titanic.validate(df_test)\n" @@ -201,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "2ca1ac16-51e1-4b8c-b24e-adb14805c846", "metadata": {}, "outputs": [], @@ -215,7 +399,16 @@ "execution_count": null, "id": "c009583e-2907-489f-b5bf-fa7dee70aae8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:binarybeech.binarybeech:A tree with 106 leafs was created\n", + "INFO:binarybeech.binarybeech:A tree with 89 leafs was created\n" + ] + } + ], "source": [ "c.train(slack=1e-3)" ]