{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Chapter 9 - Handling Imbalanced Datasets" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 9.2. Example of Imbalanced Dataset" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import seaborn as sns\n", "\n", "\n", "plt.rcParams[\"figure.figsize\"] = [8,6]\n", "sns.set_style(\"darkgrid\")\n", "\n", "churn_data = pd.read_csv(\"https://raw.githubusercontent.com/albayraktaroglu/Datasets/master/churn.csv\")\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "churn_data = churn_data.drop(\"State\", axis = 1)\n", "churn_data = churn_data.drop(\"Phone\", axis = 1)\n", "churn_data = churn_data.drop(\"VMail Plan\", axis = 1)\n", "churn_data = churn_data.drop(\"Int'l Plan\", axis = 1)\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Account LengthArea CodeVMail MessageDay MinsDay CallsDay ChargeEve MinsEve CallsEve ChargeNight MinsNight CallsNight ChargeIntl MinsIntl CallsIntl ChargeCustServ CallsChurn?
012841525265.111045.07197.49916.78244.79111.0110.032.701False.
110741526161.612327.47195.510316.62254.410311.4513.733.701False.
21374150243.411441.38121.211010.30162.61047.3212.253.290False.
3844080299.47150.9061.9885.26196.9898.866.671.782False.
4754150166.711328.34148.312212.61186.91218.4110.132.733False.
\n", "
" ], "text/plain": [ " Account Length Area Code VMail Message Day Mins Day Calls Day Charge \\\n", "0 128 415 25 265.1 110 45.07 \n", "1 107 415 26 161.6 123 27.47 \n", "2 137 415 0 243.4 114 41.38 \n", "3 84 408 0 299.4 71 50.90 \n", "4 75 415 0 166.7 113 28.34 \n", "\n", " Eve Mins Eve Calls Eve Charge Night Mins Night Calls Night Charge \\\n", "0 197.4 99 16.78 244.7 91 11.01 \n", "1 195.5 103 16.62 254.4 103 11.45 \n", "2 121.2 110 10.30 162.6 104 7.32 \n", "3 61.9 88 5.26 196.9 89 8.86 \n", "4 148.3 122 12.61 186.9 121 8.41 \n", "\n", " Intl Mins Intl Calls Intl Charge CustServ Calls Churn? \n", "0 10.0 3 2.70 1 False. \n", "1 13.7 3 3.70 1 False. \n", "2 12.2 5 3.29 0 False. \n", "3 6.6 7 1.78 2 False. \n", "4 10.1 3 2.73 3 False. " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "churn_data.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3333, 17)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "churn_data.shape" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfcAAAFxCAYAAABunOHiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAZU0lEQVR4nO3df2xV9f3H8dftbW+F3lsLo04NtPyyM8C3gZZQl4aSJpqqiZsaWuk1l2GBxUbR1q22IiKzRspYux/E6oImxjqoVVlk6pJp59oFkenNJrOkg3XOiUMtYKX3Ym9re75/fL/ejTmh4L299N3n4y/uuZ/e8z5/HJ+ec29vXY7jOAIAAGYkJXoAAAAQW8QdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADAmOdEDxMrIyIiGh/mtPgDAxJCS4v7S58zEfXjYUV/fyUSPAQDAmMjM9H3pc9yWBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwx81fh4sWbfoEmpaYkegzgK/s0MqTQiYFEjwFgDBD3M5iUmqL8micTPQbwlQW3rlRIxB2YCLgtDwCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwhrgDAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxybF+waGhIa1fv17vv/++BgcHVVlZqYsvvli33nqrZs6cKUkqLy/Xtddeq7a2NrW2tio5OVmVlZUqLi7WwMCAampqdOzYMaWlpWnLli2aOnVqrMcEAMCsmMd99+7dysjI0NatW/Xxxx/rhhtu0G233aZbbrlFFRUV0XW9vb1qaWnRc889p0gkIr/fr8LCQu3cuVM5OTlat26dXnzxRTU3N2vDhg2xHhMAALNiflv+6quv1p133hl97Ha79fbbb+t3v/udbr75Zq1fv16hUEj79+/XokWL5PF45PP5lJWVpe7ubgWDQS1dulSSVFRUpL1798Z6RAAATIv5lXtaWpokKRQK6Y477lBVVZUGBwdVWlqqBQsW6JFHHtHDDz+syy+/XD6f75SfC4VCCoVC0e1paWnq7+8f1X7dbpcyMibH+nAAUzhHgIkh5nGXpCNHjui2226T3+/XddddpxMnTig9PV2SdNVVV6m+vl6LFy9WOByO/kw4HJbP55PX641uD4fD0Z87k+FhR319J2N+LJmZvjMvAsaJeJwjABLjdH2K+W35o0ePqqKiQjU1NVq+fLkkafXq1dq/f78kae/evZo/f75yc3MVDAYViUTU39+vnp4e5eTkKC8vTx0dHZKkzs5O5efnx3pEAABMi/mV+6OPPqoTJ06oublZzc3NkqS6ujo99NBDSklJ0bRp01RfXy+v16tAICC/3y/HcVRdXa3U1FSVl5ertrZW5eXlSklJUWNjY6xHBADANJfjOE6ih4iFoaHhuN2Wz695MuavC4y14NaV6u0d3WdYAJz/xvS2PAAASCziDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwhrgDAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwJjnWLzg0NKT169fr/fff1+DgoCorKzV37lzV1dXJ5XLpsssu0/3336+kpCS1tbWptbVVycnJqqysVHFxsQYGBlRTU6Njx44pLS1NW7Zs0dSpU2M9JgAAZsX8yn337t3KyMjQjh07tH37dtXX12vz5s2qqqrSjh075DiO2tvb1dvbq5aWFrW2turxxx9XU1OTBgcHtXPnTuXk5GjHjh26/vrr1dzcHOsRAQAwLeZX7ldffbVKSkqij91ut7q6urRkyRJJUlFRkfbs2aOkpCQtWrRIHo9HHo9HWVlZ6u7uVjAY1Jo1a6JriTsAAGcn5nFPS0uTJIVCId1xxx2qqqrSli1b5HK5os/39/crFArJ5/Od8nOhUOiU7Z+vHQ2326WMjMkxPhrAFs4RYGKIedwl6ciRI7rtttvk9/t13XXXaevWrdHnwuGw0tPT5fV6FQ6HT9nu8/lO2f752tEYHnbU13cytgciKTPTd+ZFwDgRj3MEQGKcrk8xf8/96NGjqqioUE1NjZYvXy5Jmjdvnvbt2ydJ6uzs1OLFi5Wbm6tgMKhIJKL+/n719PQoJydHeXl56ujoiK7Nz8+P9YgAAJgW8yv3Rx99VCdOnFBzc3P0/fJ7771XDz74oJqamjR79myVlJTI7XYrEAjI7/fLcRxVV1crNTVV5eXlqq2tVXl5uVJSUtTY2BjrEQEAMM3lOI6T6CFiYWhoOG635fNrnoz56wJjLbh1pXp7R/cZFgDnvzG9LQ8AABKLuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwhrgDAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGDOquD/zzDOnPH7yySfjMgwAAPjqkk/35AsvvKDf/va32rdvn15//XVJ0vDwsA4dOqSVK1eOyYAAAODsnDbuS5cuVWZmpvr6+nTTTTdJkpKSkjRjxowxGQ4AAJy908b9wgsvVEFBgQoKCnTs2DFFIhFJ/3f1DgAAzk+njfvnfvCDH6ijo0MXXXSRHMeRy+VSa2trvGcDAADnYFRxf+utt/TKK68oKYkP1wMAcL4bVa2zs7Ojt+RH66233lIgEJAkdXV1aenSpQoEAgoEAnrppZckSW1tbbrxxhtVVlamV199VZI0MDCgdevWye/3a+3atTp+/PhZ7RcAgIluVFfuR44cUXFxsbKzsyXpjLflt2/frt27d2vSpEmSpAMHDuiWW25RRUVFdE1vb69aWlr03HPPKRKJyO/3q7CwUDt37lROTo7WrVunF198Uc3NzdqwYcNXOUYAACaUUcW9sbHxrF40KytL27Zt09133y1Jevvtt/XOO++ovb1d2dnZWr9+vfbv369FixbJ4/HI4/EoKytL3d3dCgaDWrNmjSSpqKhIzc3NZ3lIAABMbKOK+y9/+csvbLv99tu/dH1JSYkOHz4cfZybm6vS0lItWLBAjzzyiB5++GFdfvnl8vl80TVpaWkKhUIKhULR7Wlpaerv7x/VgbjdLmVkTB7VWmCi4hwBJoZRxX3atGmSJMdxdODAAY2MjJzVTq666iqlp6dH/11fX6/FixcrHA5H14TDYfl8Pnm93uj2cDgc/bkzGR521Nd38qzmGo3MTN+ZFwHjRDzOEQCJcbo+jeoDdStWrNCKFStUXl6u+vp6ffjhh2c1wOrVq7V//35J0t69ezV//nzl5uYqGAwqEomov79fPT09ysnJUV5enjo6OiRJnZ2dys/PP6t9AQAw0Y3qyv2dd96J/ru3t1dHjhw5q51s2rRJ9fX1SklJ0bRp01RfXy+v16tAICC/3y/HcVRdXa3U1FSVl5ertrZW5eXlSklJOev3+wEAmOhcjuM4Z1r0+a+0SVJqaqoCgYCWLVsW18HO1tDQcNxuy+fX8IdyMP4Ft65Ub+/oPsMC4Px3utvyo7pyb2lp0ccff6z33ntP06dP19SpU2M2HAAAiK1Rvef+61//WitWrNCjjz6qm266Sc8//3y85wIAAOdoVFfuTzzxhHbt2hX9dbXvfOc7+va3vx3v2QAAwDkY1ZW7y+VSWlqaJMnr9So1NTWuQwEAgHM3qiv3rKwsNTQ0aPHixQoGg8rKyor3XAAA4ByN6sq9rKxMF154oV577TXt2rVLN998c7znAgAA52hUcW9oaNBVV12ljRs36tlnn1VDQ0O85wIAAOdoVHFPTk7W3LlzJUkzZszg77oDAHAeG9V77pdeeqmampq0cOFC7d+/XxdddFG85wIAAOdoVJfgmzdv1tSpU9XR0aGpU6dq8+bN8Z4LAACco1FduaempmrVqlVxHgUAAMQCb54DAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwhrgDAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGPiFve33npLgUBAkvTuu++qvLxcfr9f999/v0ZGRiRJbW1tuvHGG1VWVqZXX31VkjQwMKB169bJ7/dr7dq1On78eLxGBADApLjEffv27dqwYYMikYgkafPmzaqqqtKOHTvkOI7a29vV29urlpYWtba26vHHH1dTU5MGBwe1c+dO5eTkaMeOHbr++uvV3NwcjxEBADArLnHPysrStm3boo+7urq0ZMkSSVJRUZFee+017d+/X4sWLZLH45HP51NWVpa6u7sVDAa1dOnS6Nq9e/fGY0QAAMxKjseLlpSU6PDhw9HHjuPI5XJJktLS0tTf369QKCSfzxddk5aWplAodMr2z9eOhtvtUkbG5BgeBWAP5wgwMcQl7v8pKelfNwjC4bDS09Pl9XoVDodP2e7z+U7Z/vna0RgedtTXdzK2g0vKzPSdeREwTsTjHAGQGKfr05h8Wn7evHnat2+fJKmzs1OLFy9Wbm6ugsGgIpGI+vv71dPTo5ycHOXl5amjoyO6Nj8/fyxGBADAjDG5cq+trdV9992npqYmzZ49WyUlJXK73QoEAvL7/XIcR9XV1UpNTVV5eblqa2tVXl6ulJQUNTY2jsWIAACY4XIcx0n0ELEwNDQct9vy+TVPxvx1gbEW3LpSvb2j+wwLgPNfwm/LAwCAsUPcAQAwhrgDAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwhrgDAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY5LHcmfXX3+9fD6fJGn69Om69dZbVVdXJ5fLpcsuu0z333+/kpKS1NbWptbWViUnJ6uyslLFxcVjOSYAAOPamMU9EolIklpaWqLbbr31VlVVVamgoEAbN25Ue3u7Fi5cqJaWFj333HOKRCLy+/0qLCyUx+MZq1EBABjXxizu3d3d+vTTT1VRUaHPPvtMd911l7q6urRkyRJJUlFRkfbs2aOkpCQtWrRIHo9HHo9HWVlZ6u7uVm5u7liNCgDAuDZmcb/gggu0evVqlZaW6u9//7vWrl0rx3HkcrkkSWlpaerv71coFIreuv98eygUOuPru90uZWRMjtv8gAWcI8DEMGZxnzVrlrKzs+VyuTRr1ixlZGSoq6sr+nw4HFZ6erq8Xq/C4fAp2/899l9meNhRX9/JmM+dmXnmfQPjRTzOEQCJcbo+jVncn332WR08eFCbNm3Shx9+qFAopMLCQu3bt08FBQXq7OzUFVdcodzcXP3kJz9RJBLR4OCgenp6lJOTM1ZjAjhPTL0wRW7PBYkeA/jKhgcHdPyToTHd55jFffny5brnnntUXl4ul8ulhx56SFOmTNF9992npqYmzZ49WyUlJXK73QoEAvL7/XIcR9XV1UpNTR2rMQGcJ9yeC/SPB/4n0WMAX1nWxj9LMhp3j8ejxsbGL2x/6qmnvrCtrKxMZWVlYzEWAADm8CU2AAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwhrgDAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwhrgDAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYkJ3qA/2ZkZESbNm3SX/7yF3k8Hj344IPKzs5O9FgAAIwL5+WV+yuvvKLBwUE9/fTT+t73vqeGhoZEjwQAwLhxXsY9GAxq6dKlkqSFCxfq7bffTvBEAACMH+flbflQKCSv1xt97Ha79dlnnyk5+cvHTUlxKzPTF5d5gltXxuV1gbEWr3MkXrI2/jnRIwAxMdbn3nl55e71ehUOh6OPR0ZGTht2AADwL+dl3PPy8tTZ2SlJ+tOf/qScnJwETwQAwPjhchzHSfQQ/+nzT8sfPHhQjuPooYce0pw5cxI9FgAA48J5GXcAAHDuzsvb8gAA4NwRdwAAjOEj6Iipw4cP61vf+pbmz58f3VZQUKDbb7/9C2vr6up07bXXqqioaCxHBExraGhQV1eXent7NTAwoBkzZmjKlCn62c9+lujRMIaIO2Ju7ty5amlpSfQYwIRUV1cnSdq1a5f+9re/6fvf/36CJ0IiEHfE3fDwsDZu3KgPPvhAH3/8sYqKilRVVRV9/p133tE999yj5ORkud1u/fCHP9TXv/51NTY26o033pDjOFq1apWuueaaxB0EMI7V1dWpr69PfX19Wr16tV566SX9+Mc/liQVFhZqz549OnLkiO677z5FIhGlpqaqvr5el1xySYInx7ki7oi5v/71rwoEAtHHVVVVWrhwoUpLSxWJRL4Q99dee03z589XXV2d3nzzTX3yySfq7u7W4cOH1draqkgkorKyMhUWFio9PT0BRwSMf1dccYVWrVqlffv2/dfnt2zZokAgoGXLlmnv3r360Y9+pMbGxjGeErFC3BFz/3lbPhQK6fnnn9frr78ur9erwcHBU9YvX75c27dv15o1a+Tz+VRdXa2DBw+qq6sr+j8Jn332mf75z38Sd+AczZo1679u//y3oQ8ePKif//zneuyxx+Q4jlJSUsZyPMQYcUfc7dq1Sz6fTw888IDeffddtbW16d+/XqG9vV35+fm6/fbb9cILL+ixxx7TlVdeqYKCAtXX12tkZETNzc2aPn16Ao8CGN9cLpckKTU1Vb29vZKk999/X5988okkafbs2aqoqFBeXp56enr0xhtvJGxWfHXEHXH3zW9+U3fddZeCwaAmTZqk7OxsffTRR9HnFyxYoJqaGm3btk1JSUm65557NG/ePP3hD3+Q3+/XyZMndeWVV8rr9epXv/qVTp48qZtuuimBRwSMXwsWLJDP51NpaanmzJkT/Z/m2tpabdq0SZFIRAMDA7r33nslSXfffbeqqqp06aWXJnJsnCW+oQ4AAGP4EhsAAIwh7gAAGEPcAQAwhrgDAGAMcQcAwBh+FQ6ADh06pK1bt+rTTz/VyZMntWzZMi1ZskRPP/109GtKY+GRRx7Ryy+/LJfLpZKSEn33u9+N2WsD+BfiDkxwJ06c0F133aVt27Zp5syZGh4e1p133qnMzMyY7qe7u1u/+c1v9Mwzz8hxHJWVlWnZsmX6xje+EdP9ACDuwITX3t6ugoICzZw5U5Lkdru1ZcsW/fGPf9QzzzyjNWvW6Pjx4youLta6desUCAS0adMmzZkzRzt37tTRo0d1ww03qLKyUhkZGSoqKlJnZ6cuv/xyHTp0SKFQSD/96U81a9Ysbd++XcnJyRoeHpbb7dbkyZMTe/CAUbznDkxwH330kWbMmHHKtrS0NKWkpCgSiai5uVm/+MUv9NRTT532dXp7e/X4449r7dq1kqTc3Fw98cQTKiws1IsvvqjU1FRNmzZNIyMjuvPOO3XNNdd8Yb8AYoO4AxPcpZdeqg8++OCUbe+9957eeOMNXXbZZfJ4PJo0aZKSk794o+/fv+By+vTp8ng80cfz5s2TJF188cWKRCLR7S+//LIyMzO1evXqWB8KgP9H3IEJrri4WL///e/1j3/8Q5I0NDSkhoYGTZkyJfrHRv6dx+OJ/uGRAwcORLcnJY3uPydf+9rXVFpaGoPJAXwZ3nMHJjiv16uGhgZt2LBBjuMoHA6ruLhYc+bM0ZtvvvmF9StXrtQDDzygSy65RBdddNFZ76+np0eTJ0+OXtkDiD3+cAwAAMZwWx4AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDH/C2LlWhIzWKQNAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.countplot(x='Churn?', data=churn_data)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False. 2850\n", "True. 483\n", "Name: Churn?, dtype: int64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "churn_data[\"Churn?\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(483, 17)\n", "(2850, 17)\n" ] } ], "source": [ "churn_true = churn_data[churn_data[\"Churn?\"] == \"True.\"]\n", "churn_false = churn_data[churn_data[\"Churn?\"] == \"False.\"]\n", "print(churn_true.shape)\n", "print(churn_false.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 9.3. Downsampling " ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "from sklearn.utils import resample\n", "churn_falseds = resample(churn_false,\n", " replace=True, \n", " n_samples=len(churn_true), \n", " random_state=27)\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(483, 17)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "churn_falseds.shape" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "churn_downsampled = pd.concat([churn_true, churn_falseds])" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfEAAAFxCAYAAABjgpGlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAX0klEQVR4nO3df2xV9f3H8de9t9wC97YWRtkkUKRA55A1RZt2S0NZE7bOJfuhkQteU0YAN8nYLIwfRaEwJKMdo4uSMB2SGIuuFGFzc1sy7JhdQDtsNt3qOljHUARnVaq9F7kt7fn+8Z13MLQUuafX9+X5+Kv3nNtz3/ePD0/Oubf3ehzHcQQAAMzxJnsAAADw4RBxAACMIuIAABhFxAEAMIqIAwBgFBEHAMCotGQPcLn6+/vV18dfxQEArg7Dhvk+cJ9rEf/a176mjIwMSdL48eN11113qaqqSh6PR1OnTtX69evl9XrV2NiohoYGpaWlacmSJSorKxvwuH19jrq6zrg1NgAAHynZ2RkfuM+ViMdiMUlSfX19fNtdd92lyspKFRcXq7q6Wk1NTSooKFB9fb327t2rWCymcDiskpIS+f1+N8YCACCluBLx9vZ2vfvuu1q4cKHOnTun5cuXq62tTUVFRZKk0tJSHTx4UF6vVzNmzJDf75ff71dOTo7a29uVn5/vxlgAAKQUVyI+fPhwLVq0SHPmzNG//vUv3XnnnXIcRx6PR5IUCATU3d2tSCQSv+T+3vZIJDLgsX0+j7KyRroxNgAAprgS8UmTJmnixInyeDyaNGmSsrKy1NbWFt8fjUaVmZmpYDCoaDR6wfbzo/5+eE0cAHA1Geg1cVf+xOyJJ55QTU2NJOnf//63IpGISkpK1NLSIklqbm5WYWGh8vPz1draqlgspu7ubnV0dCgvL8+NkQAASDkeN77FrKenR2vWrNHJkyfl8Xi0YsUKjRo1SuvWrVNvb69yc3O1adMm+Xw+NTY2avfu3XIcR9/85jdVXl4+4LF7e/s4EwcAXDUGOhN3JeJuIuIAgKvJkF9OBwAA7iPiAAAYRcQBADCKiAMAYBQRBwDAKCIOAIBR5r6K1C3BzOEakT4s2WMAV+zdWK8i75xN9hiXZfQ1w+TzD0/2GMAV6es5q7fe7h3SxyTi/zEifZhuWvlosscArljrlvmKyFbEff7hennjp5M9BnBFcqr/ImloI87ldAAAjCLiAAAYRcQBADCKiAMAYBQRBwDAKCIOAIBRRBwAAKOIOAAARhFxAACMIuIAABhFxAEAMIqIAwBgFBEHAMAoIg4AgFFEHAAAo4g4AABGEXEAAIwi4gAAGEXEAQAwiogDAGAUEQcAwCgiDgCAUUQcAACjiDgAAEYRcQAAjCLiAAAYRcQBADCKiAMAYBQRBwDAKCIOAIBRRBwAAKOIOAAARhFxAACMIuIAABhFxAEAMIqIAwBgFBEHAMAoIg4AgFFEHAAAo4g4AABGEXEAAIwi4gAAGEXEAQAwiogDAGAUEQcAwCgiDgCAUUQcAACjiDgAAEYRcQAAjHIt4m+++aZmzZqljo4OHT9+XLfffrvC4bDWr1+v/v5+SVJjY6NuvfVWhUIhHThwwK1RAABISa5EvLe3V9XV1Ro+fLgkafPmzaqsrNTjjz8ux3HU1NSkzs5O1dfXq6GhQTt37lRdXZ16enrcGAcAgJTkSsRra2s1b948jR07VpLU1tamoqIiSVJpaakOHTqkF198UTNmzJDf71dGRoZycnLU3t7uxjgAAKSktEQfcN++fRo9erRmzpypn/zkJ5Ikx3Hk8XgkSYFAQN3d3YpEIsrIyIj/XiAQUCQSueTxfT6PsrJGJnpsIKWwRoDkGOq1l/CI7927Vx6PR88++6z+9re/afXq1Xrrrbfi+6PRqDIzMxUMBhWNRi/Yfn7UP0hfn6OurjOJHlvZ2Zd+bMAKN9aIm1h/SBVD3aeEX05/7LHHtGvXLtXX1+tTn/qUamtrVVpaqpaWFklSc3OzCgsLlZ+fr9bWVsViMXV3d6ujo0N5eXmJHgcAgJSV8DPx97N69WqtW7dOdXV1ys3NVXl5uXw+nyoqKhQOh+U4jpYtW6b09PShGAcAgJTgasTr6+vjP+/ateui/aFQSKFQyM0RAABIWXzYCwAARhFxAACMIuIAABhFxAEAMIqIAwBgFBEHAMAoIg4AgFFEHAAAo4g4AABGEXEAAIwi4gAAGEXEAQAwiogDAGAUEQcAwCgiDgCAUUQcAACjiDgAAEYRcQAAjCLiAAAYRcQBADCKiAMAYBQRBwDAKCIOAIBRRBwAAKOIOAAARhFxAACMIuIAABhFxAEAMIqIAwBgFBEHAMAoIg4AgFFEHAAAo4g4AABGEXEAAIwi4gAAGEXEAQAwiogDAGAUEQcAwCgiDgCAUUQcAACjiDgAAEYRcQAAjCLiAAAYRcQBADCKiAMAYBQRBwDAKCIOAIBRRBwAAKOIOAAARhFxAACMIuIAABhFxAEAMIqIAwBgFBEHAMAoIg4AgFFEHAAAo9LcOGhfX5/Wrl2rY8eOyefzafPmzXIcR1VVVfJ4PJo6darWr18vr9erxsZGNTQ0KC0tTUuWLFFZWZkbIwEAkHJcifiBAwckSQ0NDWppaYlHvLKyUsXFxaqurlZTU5MKCgpUX1+vvXv3KhaLKRwOq6SkRH6/342xAABIKa5EfPbs2frc5z4nSTp58qTGjBmj3//+9yoqKpIklZaW6uDBg/J6vZoxY4b8fr/8fr9ycnLU3t6u/Px8N8YCACCluBJxSUpLS9Pq1au1f/9+PfDAAzpw4IA8Ho8kKRAIqLu7W5FIRBkZGfHfCQQCikQiAx7X5/MoK2ukW2MDKYE1AiTHUK891yIuSbW1tVqxYoVCoZBisVh8ezQaVWZmpoLBoKLR6AXbz4/6++nrc9TVdSbhs2ZnD/y4gCVurBE3sf6QKoa6T668O/3nP/+5HnroIUnSiBEj5PF4NH36dLW0tEiSmpubVVhYqPz8fLW2tioWi6m7u1sdHR3Ky8tzYyQAAFKOK2fiX/jCF7RmzRrdcccdOnfunO655x5NnjxZ69atU11dnXJzc1VeXi6fz6eKigqFw2E5jqNly5YpPT3djZEAAEg5rkR85MiRuv/++y/avmvXrou2hUIhhUIhN8YAACCl8WEvAAAYRcQBADCKiAMAYBQRBwDAKCIOAIBRRBwAAKOIOAAARhFxAACMIuIAABhFxAEAMIqIAwBgFBEHAMAoIg4AgFFEHAAAo4g4AABGDSrie/bsueD2o48+6sowAABg8NIG2vnUU0/pd7/7nVpaWvTcc89Jkvr6+nT06FHNnz9/SAYEAADvb8CIz5w5U9nZ2erq6tLcuXMlSV6vVxMmTBiS4QAAwAcbMOLXXHONiouLVVxcrDfffFOxWEzS/5+NAwCA5Bow4u/53ve+p2eeeUZjx46V4zjyeDxqaGhwezYAADCAQUX8hRde0NNPPy2vlzezAwDwUTGoKk+cODF+KR0AAHw0DOpM/NSpUyorK9PEiRMlicvpAAB8BAwq4lu3bnV7DgAAcJkGFfGf/exnF21bunRpwocBAACDN6iIjxkzRpLkOI5eeukl9ff3uzoUAAC4tEFFfN68eRfcXrx4sSvDAACAwRtUxI8dOxb/ubOzU6dOnXJtIAAAMDiDinh1dXX85/T0dK1atcq1gQAAwOAMKuL19fU6ffq0XnnlFY0fP16jR492ey4AAHAJg/qwl9/85jeaN2+eHnzwQc2dO1dPPvmk23MBAIBLGNSZ+COPPKJ9+/YpEAgoEono61//ur761a+6PRsAABjAoM7EPR6PAoGAJCkYDCo9Pd3VoQAAwKUN6kw8JydHNTU1KiwsVGtrq3JyctyeCwAAXMKgzsRDoZCuueYaHTp0SPv27dMdd9zh9lwAAOASBhXxmpoaff7zn1d1dbWeeOIJ1dTUuD0XAAC4hEFFPC0tTVOmTJEkTZgwge8VBwDgI2BQr4mPGzdOdXV1Kigo0IsvvqixY8e6PRcAALiEQZ1Sb968WaNHj9Yzzzyj0aNHa/PmzW7PBQAALmFQZ+Lp6elasGCBy6MAAIDLwYvbAAAYRcQBADCKiAMAYBQRBwDAKCIOAIBRRBwAAKOIOAAARhFxAACMIuIAABhFxAEAMIqIAwBgFBEHAMAoIg4AgFFEHAAAo4g4AABGEXEAAIxKS/QBe3t7dc899+jVV19VT0+PlixZoilTpqiqqkoej0dTp07V+vXr5fV61djYqIaGBqWlpWnJkiUqKytL9DgAAKSshEf8F7/4hbKysrRlyxadPn1at9xyi66//npVVlaquLhY1dXVampqUkFBgerr67V3717FYjGFw2GVlJTI7/cneiQAAFJSwiP+xS9+UeXl5fHbPp9PbW1tKioqkiSVlpbq4MGD8nq9mjFjhvx+v/x+v3JyctTe3q78/PxEjwQAQEpKeMQDgYAkKRKJ6Dvf+Y4qKytVW1srj8cT39/d3a1IJKKMjIwLfi8SiVzy+D6fR1lZIxM9NpBSWCNAcgz12kt4xCXp1KlT+ta3vqVwOKwvf/nL2rJlS3xfNBpVZmamgsGgotHoBdvPj/oH6etz1NV1JuEzZ2df+rEBK9xYI25i/SFVDHWfEv7u9DfeeEMLFy7UypUrddttt0mSpk2bppaWFklSc3OzCgsLlZ+fr9bWVsViMXV3d6ujo0N5eXmJHgcAgJSV8DPxBx98UO+88462b9+u7du3S5Luvfdebdq0SXV1dcrNzVV5ebl8Pp8qKioUDoflOI6WLVum9PT0RI8DAEDK8jiO4yR7iMvR29vn2uWKm1Y+mvDjAkOtdct8dXZ2J3uMy5KdnaGXN3462WMAVySn+i+urL0hvZwOAACGBhEHAMAoIg4AgFFEHAAAo4g4AABGEXEAAIwi4gAAGEXEAQAwiogDAGAUEQcAwCgiDgCAUUQcAACjiDgAAEYRcQAAjCLiAAAYRcQBADCKiAMAYBQRBwDAKCIOAIBRRBwAAKOIOAAARhFxAACMIuIAABhFxAEAMIqIAwBgFBEHAMAoIg4AgFFEHAAAo4g4AABGEXEAAIwi4gAAGEXEAQAwiogDAGAUEQcAwCgiDgCAUUQcAACjiDgAAEYRcQAAjCLiAAAYRcQBADCKiAMAYBQRBwDAKCIOAIBRRBwAAKOIOAAARhFxAACMIuIAABhFxAEAMIqIAwBgFBEHAMAoIg4AgFFEHAAAo4g4AABGEXEAAIwi4gAAGEXEAQAwiogDAGCUaxF/4YUXVFFRIUk6fvy4br/9doXDYa1fv179/f2SpMbGRt16660KhUI6cOCAW6MAAJCSXIn4jh07tHbtWsViMUnS5s2bVVlZqccff1yO46ipqUmdnZ2qr69XQ0ODdu7cqbq6OvX09LgxDgAAKSnNjYPm5ORo27ZtWrVqlSSpra1NRUVFkqTS0lIdPHhQXq9XM2bMkN/vl9/vV05Ojtrb25Wfnz/gsX0+j7KyRroxNpAyWCNAcgz12nMl4uXl5Tpx4kT8tuM48ng8kqRAIKDu7m5FIhFlZGTE7xMIBBSJRC557L4+R11dZxI+c3Z2xqXvBBjhxhpxE+sPqWKo+zQkb2zzev/7MNFoVJmZmQoGg4pGoxdsPz/qAABgYEMS8WnTpqmlpUWS1NzcrMLCQuXn56u1tVWxWEzd3d3q6OhQXl7eUIwDAEBKcOVy+v9avXq11q1bp7q6OuXm5qq8vFw+n08VFRUKh8NyHEfLli1Tenr6UIwDAEBKcC3i48ePV2NjoyRp0qRJ2rVr10X3CYVCCoVCbo0AAEBK48NeAAAwiogDAGAUEQcAwCgiDgCAUUQcAACjiDgAAEYRcQAAjCLiAAAYRcQBADCKiAMAYBQRBwDAKCIOAIBRRBwAAKOIOAAARhFxAACMIuIAABhFxAEAMIqIAwBgFBEHAMAoIg4AgFFEHAAAo4g4AABGEXEAAIwi4gAAGEXEAQAwiogDAGAUEQcAwCgiDgCAUUQcAACjiDgAAEYRcQAAjCLiAAAYRcQBADCKiAMAYBQRBwDAKCIOAIBRRBwAAKOIOAAARhFxAACMIuIAABhFxAEAMIqIAwBgFBEHAMAoIg4AgFFEHAAAo4g4AABGEXEAAIwi4gAAGEXEAQAwiogDAGAUEQcAwCgiDgCAUUQcAACjiDgAAEYRcQAAjCLiAAAYlZbsAfr7+7Vhwwb9/e9/l9/v16ZNmzRx4sRkjwUAwEde0s/En376afX09Gj37t367ne/q5qammSPBACACUmPeGtrq2bOnClJKigo0F//+tckTwQAgA1Jv5weiUQUDAbjt30+n86dO6e0tPcfbdgwn7KzM1yZpXXLfFeOCww1t9aIm3Kq/5LsEYArNtRrL+ln4sFgUNFoNH67v7//AwMOAAD+K+kRv/HGG9Xc3CxJ+vOf/6y8vLwkTwQAgA0ex3GcZA7w3rvTjxw5Isdx9P3vf1+TJ09O5kgAAJiQ9IgDAIAPJ+mX0wEAwIdDxAEAMIq3geOK1NTUqK2tTZ2dnTp79qwmTJigUaNG6YEHHkj2aEDKOnHihL7yla/ohhtuiG8rLi7W0qVLL7pvVVWVvvSlL6m0tHQoR8QQIeK4IlVVVZKkffv26Z///KdWrFiR5ImAq8OUKVNUX1+f7DGQZEQcCVdVVaWuri51dXVp0aJF+vWvf60f/ehHkqSSkhIdPHhQp06d0rp16xSLxZSenq777rtP1157bZInB+zq6+tTdXW1XnvtNZ0+fVqlpaWqrKyM7z927JjWrFmjtLQ0+Xw+/eAHP9DHP/5xbd26VYcPH5bjOFqwYIFuvvnm5D0JXDYiDld85jOf0YIFC9TS0vK++2tra1VRUaFZs2bp2Wef1Q9/+ENt3bp1iKcE7PrHP/6hioqK+O3KykoVFBRozpw5isViF0X80KFDuuGGG1RVVaXnn39eb7/9ttrb23XixAk1NDQoFospFAqppKREmZmZSXhG+DCIOFwxadKk993+3l80HjlyRA899JAefvhhOY6jYcOGDeV4gHn/ezk9EonoySef1HPPPadgMKienp4L7n/bbbdpx44dWrx4sTIyMrRs2TIdOXJEbW1t8f8MnDt3TidPniTihhBxuMLj8UiS0tPT1dnZKUl69dVX9fbbb0uScnNztXDhQt14443q6OjQ4cOHkzYrkAr27dunjIwMbdy4UcePH1djY6PO/xiQpqYm3XTTTVq6dKmeeuopPfzww5o9e7aKi4t13333qb+/X9u3b9f48eOT+CxwuYg4XDV9+nRlZGRozpw5mjx5cvwfiNWrV2vDhg2KxWI6e/as7r33XknSqlWrVFlZqXHjxiVzbMCcz372s1q+fLlaW1s1YsQITZw4Ua+//np8//Tp07Vy5Upt27ZNXq9Xa9as0bRp0/THP/5R4XBYZ86c0ezZsxUMBvXLX/5SZ86c0dy5c5P4jDAYfGIbAABG8WEvAAAYRcQBADCKiAMAYBQRBwDAKCIOAIBR/IkZcBU5evSotmzZonfffVdnzpzRrFmzVFRUpN27d8c/GjcRfvzjH2v//v3yeDwqLy/XN77xjYQdG8B/EXHgKvHOO+9o+fLl2rZtm6677jr19fXp7rvvVnZ2dkIfp729Xb/97W+1Z88eOY6jUCikWbNm6ZOf/GRCHwcAEQeuGk1NTSouLtZ1110nSfL5fKqtrdWf/vQn7dmzR4sXL9Zbb72lsrIyffvb31ZFRYU2bNigyZMn66c//aneeOMN3XLLLVqyZImysrJUWlqq5uZmXX/99Tp69KgikYjuv/9+TZo0STt27FBaWpr6+vrk8/k0cuTI5D55IEXxmjhwlXj99dc1YcKEC7YFAgENGzZMsVhM27dv12OPPaZdu3YNeJzOzk7t3LlTd955pyQpPz9fjzzyiEpKSvSrX/1K6enpGjNmjPr7+3X33Xfr5ptvvuhxASQGEQeuEuPGjdNrr712wbZXXnlFhw8f1tSpU+X3+zVixAilpV18ge78D3YcP368/H5//Pa0adMkSZ/4xCcUi8Xi2/fv36/s7GwtWrQo0U8FwH8QceAqUVZWpj/84Q96+eWXJUm9vb2qqanRqFGj4l9Ycz6/3x//8pqXXnopvt3rHdw/Gx/72Mc0Z86cBEwO4IPwmjhwlQgGg6qpqdHatWvlOI6i0ajKyso0efJkPf/88xfdf/78+dq4caOuvfZajR079rIfr6OjQyNHjoyfqQNIPL4ABQAAo7icDgCAUUQcAACjiDgAAEYRcQAAjCLiAAAYRcQBADCKiAMAYBQRBwDAqP8DCbiyWEZEtQsAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.countplot(x='Churn?', data=churn_downsampled)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True. 483\n", "False. 483\n", "Name: Churn?, dtype: int64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "churn_downsampled[\"Churn?\"].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 9.4. Upsampling" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "from sklearn.utils import resample\n", "churn_trueus = resample(churn_true,\n", " replace=True, \n", " n_samples=len(churn_false), \n", " random_state=27)\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "churn_upsampled = pd.concat([churn_trueus, churn_false])\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfcAAAFxCAYAAABunOHiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAZrklEQVR4nO3db2yV9f3/8dfpaU+FnlMLo0wNFAHpDGADbUO3NJQ00VRN3NRA8RxzmBZYIIK2bJWKiIwaKWNl2YjVBU2MdVBRWHTqkilzdBFkeLJRLelgnVNhqEWs9BzsaW2v743fzzOZUwqcP/TN83GLc52r53pfNz4+va5zeupyHMcRAAAwIy3VAwAAgPgi7gAAGEPcAQAwhrgDAGAMcQcAwBjiDgCAMempHiBeBgcHNTDAb/UBAC4OGRnur33OTNwHBhx1d59K9RgAACRFbq7va5/jtjwAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgjJm/Cpco3uxLNCIzI9VjAOfts2i/wid7Uz3GkI2+NENuzyWpHgM4bwN9vTrxaX9Sj0ncz2BEZoaKap9K9RjAeQttXKCwhk/c3Z5L9N66a1I9BnDe8ta8JSm5cee2PAAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwhrgDAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGpMf7Bfv7+7Vq1SodPXpUfX19Wrp0qS677DItWbJEV155pSTJ7/frxhtv1Pbt29XS0qL09HQtXbpU5eXl6u3tVW1trT7++GNlZWVpw4YNGj16dLzHBADArLjH/YUXXlBOTo42btyoTz75RLfccovuuusu3Xnnnaqqqort19XVpebmZu3YsUPRaFSBQEClpaXatm2b8vPztXz5cr300ktqamrS6tWr4z0mAABmxf22/PXXX6977rkn9tjtduvtt9/Wn/70J91+++1atWqVwuGw2traNHPmTHk8Hvl8PuXl5amjo0OhUEizZ8+WJJWVlWnv3r3xHhEAANPifuWelZUlSQqHw7r77rtVXV2tvr4+zZs3T9OnT9ejjz6qRx55RFdffbV8Pt9pPxcOhxUOh2Pbs7Ky1NPTM6Tjut0u5eSMjPfpAKawRoDUSPbai3vcJenYsWO66667FAgEdNNNN+nkyZPKzs6WJF133XWqr69XcXGxIpFI7GcikYh8Pp+8Xm9seyQSif3cmQwMOOruPhX3c8nN9Z15J2CYSMQaSRTWHixJdp/iflv++PHjqqqqUm1trebOnStJWrhwodra2iRJe/fu1bRp01RQUKBQKKRoNKqenh51dnYqPz9fhYWF2r17tySptbVVRUVF8R4RAADT4n7l/thjj+nkyZNqampSU1OTJKmurk4PP/ywMjIyNGbMGNXX18vr9SoYDCoQCMhxHNXU1CgzM1N+v18rV66U3+9XRkaGGhsb4z0iAACmuRzHcVI9RDz09w8k7LZHUe1TcX9dINlCGxeoq2ton2G5EOTm+vTeumtSPQZw3vLWvJWQtZfU2/IAACC1iDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwhrgDAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwhrgDAGAMcQcAwJj0eL9gf3+/Vq1apaNHj6qvr09Lly7VVVddpbq6OrlcLk2ZMkUPPvig0tLStH37drW0tCg9PV1Lly5VeXm5ent7VVtbq48//lhZWVnasGGDRo8eHe8xAQAwK+5X7i+88IJycnK0detWbdmyRfX19Vq/fr2qq6u1detWOY6jXbt2qaurS83NzWppadETTzyhTZs2qa+vT9u2bVN+fr62bt2qm2++WU1NTfEeEQAA0+J+5X799deroqIi9tjtdqu9vV2zZs2SJJWVlen1119XWlqaZs6cKY/HI4/Ho7y8PHV0dCgUCmnRokWxfYk7AABnJ+5xz8rKkiSFw2Hdfffdqq6u1oYNG+RyuWLP9/T0KBwOy+fznfZz4XD4tO1f7DsUbrdLOTkj43w2gC2sESA1kr324h53STp27JjuuusuBQIB3XTTTdq4cWPsuUgkouzsbHm9XkUikdO2+3y+07Z/se9QDAw46u4+Fd8TkZSb6zvzTsAwkYg1kiisPViS7D7F/T3348ePq6qqSrW1tZo7d64kaerUqdq3b58kqbW1VcXFxSooKFAoFFI0GlVPT486OzuVn5+vwsJC7d69O7ZvUVFRvEcEAMC0uF+5P/bYYzp58qSamppi75fff//9euihh7Rp0yZNmjRJFRUVcrvdCgaDCgQCchxHNTU1yszMlN/v18qVK+X3+5WRkaHGxsZ4jwgAgGkux3GcVA8RD/39Awm77VFU+1TcXxdIttDGBerqGtpnWC4Eubk+vbfumlSPAZy3vDVvJWTtJfW2PAAASC3iDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwhrgDAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgzJDi/uyzz572+KmnnkrIMAAA4Pylf9OTL774ov74xz9q3759euONNyRJAwMDOnz4sBYsWJCUAQEAwNn5xrjPnj1bubm56u7u1vz58yVJaWlpGj9+fFKGAwAAZ+8b437ppZeqpKREJSUl+vjjjxWNRiX9v6t3AABwYfrGuH/hpz/9qXbv3q2xY8fKcRy5XC61tLQkejYAAHAOhhT3AwcO6NVXX1VaGh+uBwDgQjekWk+YMCF2S36oDhw4oGAwKElqb2/X7NmzFQwGFQwG9fLLL0uStm/frltvvVWVlZV67bXXJEm9vb1avny5AoGAFi9erBMnTpzVcQEAuNgN6cr92LFjKi8v14QJEyTpjLflt2zZohdeeEEjRoyQJB08eFB33nmnqqqqYvt0dXWpublZO3bsUDQaVSAQUGlpqbZt26b8/HwtX75cL730kpqamrR69erzOUcAAC4qQ4p7Y2PjWb1oXl6eNm/erHvvvVeS9Pbbb+udd97Rrl27NGHCBK1atUptbW2aOXOmPB6PPB6P8vLy1NHRoVAopEWLFkmSysrK1NTUdJanBADAxW1Icf/tb3/7lW3Lli372v0rKip05MiR2OOCggLNmzdP06dP16OPPqpHHnlEV199tXw+X2yfrKwshcNhhcPh2PasrCz19PQM6UTcbpdyckYOaV/gYsUaAVIj2WtvSHEfM2aMJMlxHB08eFCDg4NndZDrrrtO2dnZsX/X19eruLhYkUgktk8kEpHP55PX641tj0QisZ87k4EBR93dp85qrqHIzfWdeSdgmEjEGkkU1h4sSXafhvSButtuu0233Xab/H6/6uvr9eGHH57VAAsXLlRbW5skae/evZo2bZoKCgoUCoUUjUbV09Ojzs5O5efnq7CwULt375Yktba2qqio6KyOBQDAxW5IV+7vvPNO7N9dXV06duzYWR1k7dq1qq+vV0ZGhsaMGaP6+np5vV4Fg0EFAgE5jqOamhplZmbK7/dr5cqV8vv9ysjIOOv3+wEAuNi5HMdxzrTTF7/SJkmZmZkKBoOaM2dOQgc7W/39Awm77VFUyx/KwfAX2rhAXV1D+wzLhSA316f31l2T6jGA85a35q2ErL1vui0/pCv35uZmffLJJ3r//fc1btw4jR49Om7DAQCA+BrSe+6///3vddttt+mxxx7T/Pnz9fzzzyd6LgAAcI6GdOX+5JNPaufOnbFfV/vhD3+oH/zgB4meDQAAnIMhXbm7XC5lZWVJkrxerzIzMxM6FAAAOHdDunLPy8tTQ0ODiouLFQqFlJeXl+i5AADAORrSlXtlZaUuvfRS7dmzRzt37tTtt9+e6LkAAMA5GlLcGxoadN1112nNmjV67rnn1NDQkOi5AADAORpS3NPT03XVVVdJksaPH8/fdQcA4AI2pPfcr7jiCm3atEkzZsxQW1ubxo4dm+i5AADAORrSJfj69es1evRo7d69W6NHj9b69esTPRcAADhHQ7pyz8zM1B133JHgUQAAQDzw5jkAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwhrgDAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMCZhcT9w4ICCwaAk6d1335Xf71cgENCDDz6owcFBSdL27dt16623qrKyUq+99pokqbe3V8uXL1cgENDixYt14sSJRI0IAIBJCYn7li1btHr1akWjUUnS+vXrVV1dra1bt8pxHO3atUtdXV1qbm5WS0uLnnjiCW3atEl9fX3atm2b8vPztXXrVt18881qampKxIgAAJiVkLjn5eVp8+bNscft7e2aNWuWJKmsrEx79uxRW1ubZs6cKY/HI5/Pp7y8PHV0dCgUCmn27Nmxfffu3ZuIEQEAMCs9ES9aUVGhI0eOxB47jiOXyyVJysrKUk9Pj8LhsHw+X2yfrKwshcPh07Z/se9QuN0u5eSMjONZAPawRoDUSPbaS0jc/1ta2n9uEEQiEWVnZ8vr9SoSiZy23efznbb9i32HYmDAUXf3qfgOLik313fmnYBhIhFrJFFYe7Ak2X1Kyqflp06dqn379kmSWltbVVxcrIKCAoVCIUWjUfX09Kizs1P5+fkqLCzU7t27Y/sWFRUlY0QAAMxIypX7ypUr9cADD2jTpk2aNGmSKioq5Ha7FQwGFQgE5DiOampqlJmZKb/fr5UrV8rv9ysjI0ONjY3JGBEAADNcjuM4qR4iHvr7BxJ226Oo9qm4vy6QbKGNC9TVNbTPsFwIcnN9em/dNakeAzhveWveSsjaS/lteQAAkDzEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwhrgDAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwhrgDAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMCY9mQe7+eab5fP5JEnjxo3TkiVLVFdXJ5fLpSlTpujBBx9UWlqatm/frpaWFqWnp2vp0qUqLy9P5pgAAAxrSYt7NBqVJDU3N8e2LVmyRNXV1SopKdGaNWu0a9cuzZgxQ83NzdqxY4ei0agCgYBKS0vl8XiSNSoAAMNa0uLe0dGhzz77TFVVVfr888+1YsUKtbe3a9asWZKksrIyvf7660pLS9PMmTPl8Xjk8XiUl5enjo4OFRQUJGtUAACGtaTF/ZJLLtHChQs1b948/etf/9LixYvlOI5cLpckKSsrSz09PQqHw7Fb919sD4fDZ3x9t9ulnJyRCZsfsIA1AqRGstde0uI+ceJETZgwQS6XSxMnTlROTo7a29tjz0ciEWVnZ8vr9SoSiZy2/cux/zoDA466u0/Ffe7c3DMfGxguErFGEoW1B0uS3aekfVr+ueeeU0NDgyTpww8/VDgcVmlpqfbt2ydJam1tVXFxsQoKChQKhRSNRtXT06POzk7l5+cna0wAAIa9pF25z507V/fdd5/8fr9cLpcefvhhjRo1Sg888IA2bdqkSZMmqaKiQm63W8FgUIFAQI7jqKamRpmZmckaEwCAYS9pcfd4PGpsbPzK9qeffvor2yorK1VZWZmMsQAAMIcvsQEAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwhrgDAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAYQ9wBADCGuAMAYAxxBwDAGOIOAIAxxB0AAGOIOwAAxhB3AACMIe4AABhD3AEAMIa4AwBgDHEHAMAY4g4AgDHEHQAAY4g7AADGEHcAAIwh7gAAGEPcAQAwJj3VA/wvg4ODWrt2rf7+97/L4/HooYce0oQJE1I9FgAAw8IFeeX+6quvqq+vT88884x+/OMfq6GhIdUjAQAwbFyQcQ+FQpo9e7YkacaMGXr77bdTPBEAAMPHBXlbPhwOy+v1xh673W59/vnnSk//+nEzMtzKzfUlZJ7QxgUJeV0g2RK1RhIlb81bqR4BiItkr70L8srd6/UqEonEHg8ODn5j2AEAwH9ckHEvLCxUa2urJOlvf/ub8vPzUzwRAADDh8txHCfVQ/y3Lz4tf+jQITmOo4cffliTJ09O9VgAAAwLF2TcAQDAubsgb8sDAIBzR9wBADCGj6AjIRoaGtTe3q6uri719vZq/PjxGjVqlH71q1+lejTArCNHjuj73/++pk2bFttWUlKiZcuWfWXfuro63XjjjSorK0vmiEgS4o6EqKurkyTt3LlT//znP/WTn/wkxRMBF4errrpKzc3NqR4DKUbckTR1dXXq7u5Wd3e3Fi5cqJdfflm/+MUvJEmlpaV6/fXXdezYMT3wwAOKRqPKzMxUfX29Lr/88hRPDgxfAwMDWrNmjT744AN98sknKisrU3V1dez5d955R/fdd5/S09Pldrv1s5/9TN/+9rfV2Nio/fv3y3Ec3XHHHbrhhhtSdxI4a8QdSfXd735Xd9xxh/bt2/c/n9+wYYOCwaDmzJmjvXv36uc//7kaGxuTPCUwfP3jH/9QMBiMPa6urtaMGTM0b948RaPRr8R9z549mjZtmurq6vTmm2/q008/VUdHh44cOaKWlhZFo1FVVlaqtLRU2dnZKTgjnAvijqSaOHHi/9z+xW9kHjp0SL/+9a/1+OOPy3EcZWRkJHM8YNj779vy4XBYzz//vN544w15vV719fWdtv/cuXO1ZcsWLVq0SD6fTzU1NTp06JDa29tj/5Pw+eef69///jdxH0aIO5LK5XJJkjIzM9XV1SVJOnr0qD799FNJ0qRJk1RVVaXCwkJ1dnZq//79KZsVsGDnzp3y+Xxat26d3n33XW3fvl1f/nqTXbt2qaioSMuWLdOLL76oxx9/XNdee61KSkpUX1+vwcFBNTU1ady4cSk8C5wt4o6UmD59unw+n+bNm6fJkyfH/sOxcuVKrV27VtFoVL29vbr//vslSffee6+qq6t1xRVXpHJsYNj53ve+pxUrVigUCmnEiBGaMGGCPvroo9jz06dPV21trTZv3qy0tDTdd999mjp1qv7yl78oEAjo1KlTuvbaa+X1evW73/1Op06d0vz581N4RhgKvqEOAABj+BIbAACMIe4AABhD3AEAMIa4AwBgDHEHAMAYfhUOgA4fPqyNGzfqs88+06lTpzRnzhzNmjVLzzzzTOwrguPh0Ucf1SuvvCKXy6WKigr96Ec/ittrA/gP4g5c5E6ePKkVK1Zo8+bNuvLKKzUwMKB77rlHubm5cT1OR0eH/vCHP+jZZ5+V4ziqrKzUnDlz9J3vfCeuxwFA3IGL3q5du1RSUqIrr7xSkuR2u7Vhwwb99a9/1bPPPqtFixbpxIkTKi8v1/LlyxUMBrV27VpNnjxZ27Zt0/Hjx3XLLbdo6dKlysnJUVlZmVpbW3X11Vfr8OHDCofD+uUvf6mJEydqy5YtSk9P18DAgNxut0aOHJnakweM4j134CL30Ucfafz48adty8rKUkZGhqLRqJqamvSb3/xGTz/99De+TldXl5544gktXrxYklRQUKAnn3xSpaWleumll5SZmakxY8ZocHBQ99xzj2644YavHBdAfBB34CJ3xRVX6IMPPjht2/vvv6/9+/drypQp8ng8GjFihNLTv3qj78tfcDlu3Dh5PJ7Y46lTp0qSLrvsMkWj0dj2V155Rbm5uVq4cGG8TwXA/0fcgYtceXm5/vznP+u9996TJPX396uhoUGjRo2K/aGfL/N4PLE/+nPw4MHY9rS0of3n5Fvf+pbmzZsXh8kBfB3ecwcucl6vVw0NDVq9erUcx1EkElF5ebkmT56sN9988yv7L1iwQOvWrdPll1+usWPHnvXxOjs7NXLkyNiVPYD44w/HAABgDLflAQAwhrgDAGAMcQcAwBjiDgCAMcQdAABjiDsAAMYQdwAAjCHuAAAY83+Zhn+e/SOlsgAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.countplot(x='Churn?', data=churn_upsampled)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False. 2850\n", "True. 2850\n", "Name: Churn?, dtype: int64" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "churn_upsampled[\"Churn?\"].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 9.5. SMOTE Upsampling" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "churn_data['Churn?'] = churn_data['Churn?'].map({'True.': 1, 'False.': 0})" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 2850\n", "1 483\n", "Name: Churn?, dtype: int64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y = churn_data[[\"Churn?\"]]\n", "X = churn_data.drop(\"Churn?\", axis = 1)\n", "y[\"Churn?\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "# install imblearn using the following pip command\n", "# pip install imbalanced-learn\n", "\n", "from imblearn.over_sampling import SMOTE\n", "\n", "sm = SMOTE(random_state=2)\n", "X_us, y_us = sm.fit_resample(X, y)\n" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1 2850\n", "0 2850\n", "Name: Churn?, dtype: int64" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_us[\"Churn?\"].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Exercise 9.1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Look at the following data set. It is highly imbalanced. Try to up sample the following data sample using SMOTE." ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeV1V2V3V4V5V6V7V8V9...V21V22V23V24V25V26V27V28AmountClass
0100.3850.616-0.874-0.0942.9253.3170.4700.538-0.559...0.0500.2380.0090.997-0.767-0.4920.042-0.0549.990
112-0.7520.3452.057-1.469-1.158-0.078-0.6090.004-0.436...0.5001.354-0.257-0.065-0.039-0.087-0.1810.12915.990
2251.1140.0860.4941.336-0.300-0.011-0.1190.1890.206...-0.053-0.005-0.0310.1980.565-0.3380.0290.0044.450
333-0.9360.1702.746-1.078-0.3060.012-0.2960.403-0.040...0.4011.065-0.1580.296-0.2590.7540.0470.0949.100
4351.1990.1300.8641.003-0.784-0.885-0.041-0.2080.392...-0.0420.198-0.0331.0130.5590.402-0.0060.0180.990
\n", "

5 rows × 31 columns

\n", "
" ], "text/plain": [ " Time V1 V2 V3 V4 V5 V6 V7 V8 V9 ... \\\n", "0 10 0.385 0.616 -0.874 -0.094 2.925 3.317 0.470 0.538 -0.559 ... \n", "1 12 -0.752 0.345 2.057 -1.469 -1.158 -0.078 -0.609 0.004 -0.436 ... \n", "2 25 1.114 0.086 0.494 1.336 -0.300 -0.011 -0.119 0.189 0.206 ... \n", "3 33 -0.936 0.170 2.746 -1.078 -0.306 0.012 -0.296 0.403 -0.040 ... \n", "4 35 1.199 0.130 0.864 1.003 -0.784 -0.885 -0.041 -0.208 0.392 ... \n", "\n", " V21 V22 V23 V24 V25 V26 V27 V28 Amount Class \n", "0 0.050 0.238 0.009 0.997 -0.767 -0.492 0.042 -0.054 9.99 0 \n", "1 0.500 1.354 -0.257 -0.065 -0.039 -0.087 -0.181 0.129 15.99 0 \n", "2 -0.053 -0.005 -0.031 0.198 0.565 -0.338 0.029 0.004 4.45 0 \n", "3 0.401 1.065 -0.158 0.296 -0.259 0.754 0.047 0.094 9.10 0 \n", "4 -0.042 0.198 -0.033 1.013 0.559 0.402 -0.006 0.018 0.99 0 \n", "\n", "[5 rows x 31 columns]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "churn_data = pd.read_csv(\"https://raw.githubusercontent.com/IBM/xgboost-smote-detect-fraud/master/data/creditcard.csv\")\n", "churn_data.head()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Solution Here" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 56772\n", "1 102\n", "Name: Class, dtype: int64" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y = churn_data[[\"Class\"]]\n", "X = churn_data.drop(\"Class\", axis = 1)\n", "y[\"Class\"].value_counts()\n" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1 56772\n", "0 56772\n", "Name: Class, dtype: int64" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from imblearn.over_sampling import SMOTE\n", "\n", "sm = SMOTE(random_state=2)\n", "X_us, y_us = sm.fit_resample(X, y)\n", "\n", "y_us[\"Class\"].value_counts()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 2 }