{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Chapter 9 - Handling Imbalanced Datasets" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 9.2. Example of Imbalanced Dataset" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import seaborn as sns\n", "\n", "\n", "plt.rcParams[\"figure.figsize\"] = [8,6]\n", "sns.set_style(\"darkgrid\")\n", "\n", "churn_data = pd.read_csv(\"https://raw.githubusercontent.com/albayraktaroglu/Datasets/master/churn.csv\")\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "churn_data = churn_data.drop(\"State\", axis = 1)\n", "churn_data = churn_data.drop(\"Phone\", axis = 1)\n", "churn_data = churn_data.drop(\"VMail Plan\", axis = 1)\n", "churn_data = churn_data.drop(\"Int'l Plan\", axis = 1)\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Account Length | \n", "Area Code | \n", "VMail Message | \n", "Day Mins | \n", "Day Calls | \n", "Day Charge | \n", "Eve Mins | \n", "Eve Calls | \n", "Eve Charge | \n", "Night Mins | \n", "Night Calls | \n", "Night Charge | \n", "Intl Mins | \n", "Intl Calls | \n", "Intl Charge | \n", "CustServ Calls | \n", "Churn? | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "128 | \n", "415 | \n", "25 | \n", "265.1 | \n", "110 | \n", "45.07 | \n", "197.4 | \n", "99 | \n", "16.78 | \n", "244.7 | \n", "91 | \n", "11.01 | \n", "10.0 | \n", "3 | \n", "2.70 | \n", "1 | \n", "False. | \n", "
| 1 | \n", "107 | \n", "415 | \n", "26 | \n", "161.6 | \n", "123 | \n", "27.47 | \n", "195.5 | \n", "103 | \n", "16.62 | \n", "254.4 | \n", "103 | \n", "11.45 | \n", "13.7 | \n", "3 | \n", "3.70 | \n", "1 | \n", "False. | \n", "
| 2 | \n", "137 | \n", "415 | \n", "0 | \n", "243.4 | \n", "114 | \n", "41.38 | \n", "121.2 | \n", "110 | \n", "10.30 | \n", "162.6 | \n", "104 | \n", "7.32 | \n", "12.2 | \n", "5 | \n", "3.29 | \n", "0 | \n", "False. | \n", "
| 3 | \n", "84 | \n", "408 | \n", "0 | \n", "299.4 | \n", "71 | \n", "50.90 | \n", "61.9 | \n", "88 | \n", "5.26 | \n", "196.9 | \n", "89 | \n", "8.86 | \n", "6.6 | \n", "7 | \n", "1.78 | \n", "2 | \n", "False. | \n", "
| 4 | \n", "75 | \n", "415 | \n", "0 | \n", "166.7 | \n", "113 | \n", "28.34 | \n", "148.3 | \n", "122 | \n", "12.61 | \n", "186.9 | \n", "121 | \n", "8.41 | \n", "10.1 | \n", "3 | \n", "2.73 | \n", "3 | \n", "False. | \n", "
| \n", " | Time | \n", "V1 | \n", "V2 | \n", "V3 | \n", "V4 | \n", "V5 | \n", "V6 | \n", "V7 | \n", "V8 | \n", "V9 | \n", "... | \n", "V21 | \n", "V22 | \n", "V23 | \n", "V24 | \n", "V25 | \n", "V26 | \n", "V27 | \n", "V28 | \n", "Amount | \n", "Class | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "10 | \n", "0.385 | \n", "0.616 | \n", "-0.874 | \n", "-0.094 | \n", "2.925 | \n", "3.317 | \n", "0.470 | \n", "0.538 | \n", "-0.559 | \n", "... | \n", "0.050 | \n", "0.238 | \n", "0.009 | \n", "0.997 | \n", "-0.767 | \n", "-0.492 | \n", "0.042 | \n", "-0.054 | \n", "9.99 | \n", "0 | \n", "
| 1 | \n", "12 | \n", "-0.752 | \n", "0.345 | \n", "2.057 | \n", "-1.469 | \n", "-1.158 | \n", "-0.078 | \n", "-0.609 | \n", "0.004 | \n", "-0.436 | \n", "... | \n", "0.500 | \n", "1.354 | \n", "-0.257 | \n", "-0.065 | \n", "-0.039 | \n", "-0.087 | \n", "-0.181 | \n", "0.129 | \n", "15.99 | \n", "0 | \n", "
| 2 | \n", "25 | \n", "1.114 | \n", "0.086 | \n", "0.494 | \n", "1.336 | \n", "-0.300 | \n", "-0.011 | \n", "-0.119 | \n", "0.189 | \n", "0.206 | \n", "... | \n", "-0.053 | \n", "-0.005 | \n", "-0.031 | \n", "0.198 | \n", "0.565 | \n", "-0.338 | \n", "0.029 | \n", "0.004 | \n", "4.45 | \n", "0 | \n", "
| 3 | \n", "33 | \n", "-0.936 | \n", "0.170 | \n", "2.746 | \n", "-1.078 | \n", "-0.306 | \n", "0.012 | \n", "-0.296 | \n", "0.403 | \n", "-0.040 | \n", "... | \n", "0.401 | \n", "1.065 | \n", "-0.158 | \n", "0.296 | \n", "-0.259 | \n", "0.754 | \n", "0.047 | \n", "0.094 | \n", "9.10 | \n", "0 | \n", "
| 4 | \n", "35 | \n", "1.199 | \n", "0.130 | \n", "0.864 | \n", "1.003 | \n", "-0.784 | \n", "-0.885 | \n", "-0.041 | \n", "-0.208 | \n", "0.392 | \n", "... | \n", "-0.042 | \n", "0.198 | \n", "-0.033 | \n", "1.013 | \n", "0.559 | \n", "0.402 | \n", "-0.006 | \n", "0.018 | \n", "0.99 | \n", "0 | \n", "
5 rows × 31 columns
\n", "