{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Chapter 8 - Handling Mixed and Date Time Variables" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 8.2. Handling Mixed Values" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import seaborn as sns\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameQualification
0Jon9
1NickGraduate
2Ben7
3SallyGraduate
4AlicePhD
\n", "
" ], "text/plain": [ " name Qualification\n", "0 Jon 9\n", "1 Nick Graduate\n", "2 Ben 7\n", "3 Sally Graduate\n", "4 Alice PhD" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "name = ['Jon', 'Nick', 'Ben', 'Sally', 'Alice', 'Josh']\n", "eduation = [9, 'Graduate', 7, 'Graduate', 'PhD', 8]\n", "\n", "std = {'name':name,'Qualification':eduation}\n", "\n", "student_df = pd.DataFrame(std)\n", "student_df.head()\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "student_df['q_numeric'] = pd.to_numeric(student_df[\"Qualification\"],\n", " errors='coerce',\n", " downcast='integer')\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameQualificationq_numericq_categoric
0Jon99.0NaN
1NickGraduateNaNGraduate
2Ben77.0NaN
3SallyGraduateNaNGraduate
4AlicePhDNaNPhD
\n", "
" ], "text/plain": [ " name Qualification q_numeric q_categoric\n", "0 Jon 9 9.0 NaN\n", "1 Nick Graduate NaN Graduate\n", "2 Ben 7 7.0 NaN\n", "3 Sally Graduate NaN Graduate\n", "4 Alice PhD NaN PhD" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "student_df['q_categoric'] = np.where(student_df['q_numeric'].isnull(),\n", " student_df['Qualification'],\n", " np.nan)\n", "\n", "student_df.head()\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "2 Heikkinen, Miss. Laina female 26.0 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "4 Allen, Mr. William Henry male 35.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 NaN S \n", "1 0 PC 17599 71.2833 C85 C \n", "2 0 STON/O2. 3101282 7.9250 NaN S \n", "3 0 113803 53.1000 C123 S \n", "4 0 373450 8.0500 NaN S " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "titanic_data = pd.read_csv(\"https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv\")\n", "\n", "titanic_data.head()\n", "\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TicketCabin
0A/5 21171NaN
1PC 17599C85
2STON/O2. 3101282NaN
3113803C123
4373450NaN
\n", "
" ], "text/plain": [ " Ticket Cabin\n", "0 A/5 21171 NaN\n", "1 PC 17599 C85\n", "2 STON/O2. 3101282 NaN\n", "3 113803 C123\n", "4 373450 NaN" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "titanic_data = titanic_data[['Ticket', 'Cabin']]\n", "titanic_data.head()\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TicketTicket_NumTicket_Cat
0A/5 211715A
1PC 1759917599P
2STON/O2. 31012822S
31138031138031
43734503734503
\n", "
" ], "text/plain": [ " Ticket Ticket_Num Ticket_Cat\n", "0 A/5 21171 5 A\n", "1 PC 17599 17599 P\n", "2 STON/O2. 3101282 2 S\n", "3 113803 113803 1\n", "4 373450 373450 3" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "titanic_data ['Ticket_Num'] = titanic_data['Ticket'].str.extract('(\\d+)') \n", "titanic_data ['Ticket_Cat'] = titanic_data['Ticket'].str[0] \n", "\n", "titanic_data[['Ticket', 'Ticket_Num', 'Ticket_Cat']].head()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 8.3. Handling Date Data Type" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateclosevolumeopenhighlow
02018/10/15259.596189026.0000259.06263.28254.5367
12018/10/12258.787189257.0000261.00261.99252.0100
22018/10/11252.238128184.0000257.53262.25249.0300
32018/10/10256.8812781560.0000264.61265.51247.7700
42018/10/09262.8012037780.0000255.25266.77253.3000
\n", "
" ], "text/plain": [ " date close volume open high low\n", "0 2018/10/15 259.59 6189026.0000 259.06 263.28 254.5367\n", "1 2018/10/12 258.78 7189257.0000 261.00 261.99 252.0100\n", "2 2018/10/11 252.23 8128184.0000 257.53 262.25 249.0300\n", "3 2018/10/10 256.88 12781560.0000 264.61 265.51 247.7700\n", "4 2018/10/09 262.80 12037780.0000 255.25 266.77 253.3000" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tesla_stock = pd.read_csv(\"https://raw.githubusercontent.com/plotly/datasets/master/tesla-stock-price.csv\")\n", "tesla_stock = tesla_stock.shift(-1)\n", "tesla_stock.dropna(inplace = True)\n", "tesla_stock.head()\n", "\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "tesla_stock['date'] = pd.to_datetime(tesla_stock['date'])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: FutureWarning: Series.dt.weekofyear and Series.dt.week have been deprecated. Please use Series.dt.isocalendar().week instead.\n", " tesla_stock['week'] = tesla_stock['date'].dt.week\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateweek
02018-10-1542
12018-10-1241
22018-10-1141
32018-10-1041
42018-10-0941
\n", "
" ], "text/plain": [ " date week\n", "0 2018-10-15 42\n", "1 2018-10-12 41\n", "2 2018-10-11 41\n", "3 2018-10-10 41\n", "4 2018-10-09 41" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tesla_stock['week'] = tesla_stock['date'].dt.week\n", "tesla_stock[['date', 'week']].head()\n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datemonth
02018-10-1510
12018-10-1210
22018-10-1110
32018-10-1010
42018-10-0910
\n", "
" ], "text/plain": [ " date month\n", "0 2018-10-15 10\n", "1 2018-10-12 10\n", "2 2018-10-11 10\n", "3 2018-10-10 10\n", "4 2018-10-09 10" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tesla_stock['month'] = tesla_stock['date'].dt.month\n", "tesla_stock[['date', 'month']].head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateday_month
02018-10-1515
12018-10-1212
22018-10-1111
32018-10-1010
42018-10-099
\n", "
" ], "text/plain": [ " date day_month\n", "0 2018-10-15 15\n", "1 2018-10-12 12\n", "2 2018-10-11 11\n", "3 2018-10-10 10\n", "4 2018-10-09 9" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tesla_stock['day_month'] = tesla_stock['date'].dt.day\n", "tesla_stock[['date', 'day_month']].head()\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateday_week
02018-10-15Monday
12018-10-12Friday
22018-10-11Thursday
32018-10-10Wednesday
42018-10-09Tuesday
\n", "
" ], "text/plain": [ " date day_week\n", "0 2018-10-15 Monday\n", "1 2018-10-12 Friday\n", "2 2018-10-11 Thursday\n", "3 2018-10-10 Wednesday\n", "4 2018-10-09 Tuesday" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tesla_stock['day_week'] = tesla_stock['date'].dt.day_name()\n", "tesla_stock[['date', 'day_week']].head()\n", "\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2018-10-15 00:00:00\n", "2018-10-09 00:00:00\n", "6 days 00:00:00\n" ] } ], "source": [ "diff = tesla_stock[\"date\"].iloc[0] - tesla_stock[\"date\"].iloc[4]\n", "print(tesla_stock[\"date\"].iloc[0])\n", "print(tesla_stock[\"date\"].iloc[4])\n", "print(diff)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 8.4. Handling Time Data Type" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
usertimestampxyzclass
024c92000-01-01 12:00:00-0.1906855.4753808.185829bike
124c92000-01-01 12:00:00-0.2996485.3664178.294792bike
224c92000-01-01 12:00:00-0.1225835.6252048.485476bike
324c92000-01-01 12:00:00-0.2996485.5707228.376513bike
424c92000-01-01 12:00:00-0.4767125.3391768.526338bike
\n", "
" ], "text/plain": [ " user timestamp x y z class\n", "0 24c9 2000-01-01 12:00:00 -0.190685 5.475380 8.185829 bike\n", "1 24c9 2000-01-01 12:00:00 -0.299648 5.366417 8.294792 bike\n", "2 24c9 2000-01-01 12:00:00 -0.122583 5.625204 8.485476 bike\n", "3 24c9 2000-01-01 12:00:00 -0.299648 5.570722 8.376513 bike\n", "4 24c9 2000-01-01 12:00:00 -0.476712 5.339176 8.526338 bike" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bike_data = pd.read_csv(\"https://raw.githubusercontent.com/QROWD/TR/master/datasets/bike.csv\")\n", "bike_data.dropna(inplace = True)\n", "bike_data.head()\n", "\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "bike_data['timestamp'] = pd.to_datetime(bike_data['timestamp'])" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
usertimestampxyzclasshourminsec
024c92000-01-01 12:00:0310.215261-3.5412917.109821bike12.00.03.0
124c92000-01-01 12:00:0312.394516-4.2495487.722737bike12.00.03.0
224c92000-01-01 12:00:0316.807508-3.2416436.170018bike12.00.03.0
324c92000-01-01 12:00:0316.003908-1.6889234.916945bike12.00.03.0
424c92000-01-01 12:00:038.580819-2.165635-0.721878bike12.00.03.0
524c92000-01-01 12:00:037.722737-5.039529-0.204305bike12.00.03.0
624c92000-01-01 12:00:038.294792-5.2710750.980665bike12.00.03.0
724c92000-01-01 12:00:039.370799-5.2710750.817221bike12.00.03.0
824c92000-01-01 12:00:038.008764-4.5083350.544814bike12.00.03.0
924c92000-01-01 12:00:038.226690-5.4072781.784266bike12.00.03.0
1024c92000-01-01 12:00:046.183638-6.891896-2.329079bike12.00.04.0
1124c92000-01-01 12:00:047.314126-7.790839-0.626536bike12.00.04.0
1224c92000-01-01 12:00:046.687591-10.3650852.710449bike12.00.04.0
1324c92000-01-01 12:00:046.633110-11.0597221.675303bike12.00.04.0
1424c92000-01-01 12:00:046.687591-7.0553401.893228bike12.00.04.0
1524c92000-01-01 12:00:045.652444-6.782933-0.953424bike12.00.04.0
1624c92000-01-01 12:00:045.883990-6.101916-0.626536bike12.00.04.0
1724c92000-01-01 12:00:043.541291-6.0065731.076007bike12.00.04.0
1824c92000-01-01 12:00:042.506144-7.0962010.926184bike12.00.04.0
1924c92000-01-01 12:00:04-1.212211-5.5843420.653777bike12.00.04.0
\n", "
" ], "text/plain": [ " user timestamp x y z class hour min \\\n", "0 24c9 2000-01-01 12:00:03 10.215261 -3.541291 7.109821 bike 12.0 0.0 \n", "1 24c9 2000-01-01 12:00:03 12.394516 -4.249548 7.722737 bike 12.0 0.0 \n", "2 24c9 2000-01-01 12:00:03 16.807508 -3.241643 6.170018 bike 12.0 0.0 \n", "3 24c9 2000-01-01 12:00:03 16.003908 -1.688923 4.916945 bike 12.0 0.0 \n", "4 24c9 2000-01-01 12:00:03 8.580819 -2.165635 -0.721878 bike 12.0 0.0 \n", "5 24c9 2000-01-01 12:00:03 7.722737 -5.039529 -0.204305 bike 12.0 0.0 \n", "6 24c9 2000-01-01 12:00:03 8.294792 -5.271075 0.980665 bike 12.0 0.0 \n", "7 24c9 2000-01-01 12:00:03 9.370799 -5.271075 0.817221 bike 12.0 0.0 \n", "8 24c9 2000-01-01 12:00:03 8.008764 -4.508335 0.544814 bike 12.0 0.0 \n", "9 24c9 2000-01-01 12:00:03 8.226690 -5.407278 1.784266 bike 12.0 0.0 \n", "10 24c9 2000-01-01 12:00:04 6.183638 -6.891896 -2.329079 bike 12.0 0.0 \n", "11 24c9 2000-01-01 12:00:04 7.314126 -7.790839 -0.626536 bike 12.0 0.0 \n", "12 24c9 2000-01-01 12:00:04 6.687591 -10.365085 2.710449 bike 12.0 0.0 \n", "13 24c9 2000-01-01 12:00:04 6.633110 -11.059722 1.675303 bike 12.0 0.0 \n", "14 24c9 2000-01-01 12:00:04 6.687591 -7.055340 1.893228 bike 12.0 0.0 \n", "15 24c9 2000-01-01 12:00:04 5.652444 -6.782933 -0.953424 bike 12.0 0.0 \n", "16 24c9 2000-01-01 12:00:04 5.883990 -6.101916 -0.626536 bike 12.0 0.0 \n", "17 24c9 2000-01-01 12:00:04 3.541291 -6.006573 1.076007 bike 12.0 0.0 \n", "18 24c9 2000-01-01 12:00:04 2.506144 -7.096201 0.926184 bike 12.0 0.0 \n", "19 24c9 2000-01-01 12:00:04 -1.212211 -5.584342 0.653777 bike 12.0 0.0 \n", "\n", " sec \n", "0 3.0 \n", "1 3.0 \n", "2 3.0 \n", "3 3.0 \n", "4 3.0 \n", "5 3.0 \n", "6 3.0 \n", "7 3.0 \n", "8 3.0 \n", "9 3.0 \n", "10 4.0 \n", "11 4.0 \n", "12 4.0 \n", "13 4.0 \n", "14 4.0 \n", "15 4.0 \n", "16 4.0 \n", "17 4.0 \n", "18 4.0 \n", "19 4.0 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bike_data['hour'] = bike_data['timestamp'].dt.hour\n", "bike_data['min'] = bike_data['timestamp'].dt.minute\n", "bike_data['sec'] = bike_data['timestamp'].dt.second\n", "\n", "bike_data.shift(-50).head(20)\n" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
usertimestampxyzclasshourminsectime
024c92000-01-01 12:00:0310.215261-3.5412917.109821bike12.00.03.012:00:03
124c92000-01-01 12:00:0312.394516-4.2495487.722737bike12.00.03.012:00:03
224c92000-01-01 12:00:0316.807508-3.2416436.170018bike12.00.03.012:00:03
324c92000-01-01 12:00:0316.003908-1.6889234.916945bike12.00.03.012:00:03
424c92000-01-01 12:00:038.580819-2.165635-0.721878bike12.00.03.012:00:03
524c92000-01-01 12:00:037.722737-5.039529-0.204305bike12.00.03.012:00:03
624c92000-01-01 12:00:038.294792-5.2710750.980665bike12.00.03.012:00:03
724c92000-01-01 12:00:039.370799-5.2710750.817221bike12.00.03.012:00:03
824c92000-01-01 12:00:038.008764-4.5083350.544814bike12.00.03.012:00:03
924c92000-01-01 12:00:038.226690-5.4072781.784266bike12.00.03.012:00:03
1024c92000-01-01 12:00:046.183638-6.891896-2.329079bike12.00.04.012:00:04
1124c92000-01-01 12:00:047.314126-7.790839-0.626536bike12.00.04.012:00:04
1224c92000-01-01 12:00:046.687591-10.3650852.710449bike12.00.04.012:00:04
1324c92000-01-01 12:00:046.633110-11.0597221.675303bike12.00.04.012:00:04
1424c92000-01-01 12:00:046.687591-7.0553401.893228bike12.00.04.012:00:04
1524c92000-01-01 12:00:045.652444-6.782933-0.953424bike12.00.04.012:00:04
1624c92000-01-01 12:00:045.883990-6.101916-0.626536bike12.00.04.012:00:04
1724c92000-01-01 12:00:043.541291-6.0065731.076007bike12.00.04.012:00:04
1824c92000-01-01 12:00:042.506144-7.0962010.926184bike12.00.04.012:00:04
1924c92000-01-01 12:00:04-1.212211-5.5843420.653777bike12.00.04.012:00:04
\n", "
" ], "text/plain": [ " user timestamp x y z class hour min \\\n", "0 24c9 2000-01-01 12:00:03 10.215261 -3.541291 7.109821 bike 12.0 0.0 \n", "1 24c9 2000-01-01 12:00:03 12.394516 -4.249548 7.722737 bike 12.0 0.0 \n", "2 24c9 2000-01-01 12:00:03 16.807508 -3.241643 6.170018 bike 12.0 0.0 \n", "3 24c9 2000-01-01 12:00:03 16.003908 -1.688923 4.916945 bike 12.0 0.0 \n", "4 24c9 2000-01-01 12:00:03 8.580819 -2.165635 -0.721878 bike 12.0 0.0 \n", "5 24c9 2000-01-01 12:00:03 7.722737 -5.039529 -0.204305 bike 12.0 0.0 \n", "6 24c9 2000-01-01 12:00:03 8.294792 -5.271075 0.980665 bike 12.0 0.0 \n", "7 24c9 2000-01-01 12:00:03 9.370799 -5.271075 0.817221 bike 12.0 0.0 \n", "8 24c9 2000-01-01 12:00:03 8.008764 -4.508335 0.544814 bike 12.0 0.0 \n", "9 24c9 2000-01-01 12:00:03 8.226690 -5.407278 1.784266 bike 12.0 0.0 \n", "10 24c9 2000-01-01 12:00:04 6.183638 -6.891896 -2.329079 bike 12.0 0.0 \n", "11 24c9 2000-01-01 12:00:04 7.314126 -7.790839 -0.626536 bike 12.0 0.0 \n", "12 24c9 2000-01-01 12:00:04 6.687591 -10.365085 2.710449 bike 12.0 0.0 \n", "13 24c9 2000-01-01 12:00:04 6.633110 -11.059722 1.675303 bike 12.0 0.0 \n", "14 24c9 2000-01-01 12:00:04 6.687591 -7.055340 1.893228 bike 12.0 0.0 \n", "15 24c9 2000-01-01 12:00:04 5.652444 -6.782933 -0.953424 bike 12.0 0.0 \n", "16 24c9 2000-01-01 12:00:04 5.883990 -6.101916 -0.626536 bike 12.0 0.0 \n", "17 24c9 2000-01-01 12:00:04 3.541291 -6.006573 1.076007 bike 12.0 0.0 \n", "18 24c9 2000-01-01 12:00:04 2.506144 -7.096201 0.926184 bike 12.0 0.0 \n", "19 24c9 2000-01-01 12:00:04 -1.212211 -5.584342 0.653777 bike 12.0 0.0 \n", "\n", " sec time \n", "0 3.0 12:00:03 \n", "1 3.0 12:00:03 \n", "2 3.0 12:00:03 \n", "3 3.0 12:00:03 \n", "4 3.0 12:00:03 \n", "5 3.0 12:00:03 \n", "6 3.0 12:00:03 \n", "7 3.0 12:00:03 \n", "8 3.0 12:00:03 \n", "9 3.0 12:00:03 \n", "10 4.0 12:00:04 \n", "11 4.0 12:00:04 \n", "12 4.0 12:00:04 \n", "13 4.0 12:00:04 \n", "14 4.0 12:00:04 \n", "15 4.0 12:00:04 \n", "16 4.0 12:00:04 \n", "17 4.0 12:00:04 \n", "18 4.0 12:00:04 \n", "19 4.0 12:00:04 " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bike_data['time'] = bike_data['timestamp'].dt.time\n", "bike_data.shift(-50).head(20)\n" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
usertimestampxyzclasshourminsectime
024c92000-01-01 12:00:0310.215261-3.5412917.109821bike12.00.03.012:00:03
124c92000-01-01 12:00:0312.394516-4.2495487.722737bike12.00.03.012:00:03
224c92000-01-01 12:00:0316.807508-3.2416436.170018bike12.00.03.012:00:03
324c92000-01-01 12:00:0316.003908-1.6889234.916945bike12.00.03.012:00:03
424c92000-01-01 12:00:038.580819-2.165635-0.721878bike12.00.03.012:00:03
524c92000-01-01 12:00:037.722737-5.039529-0.204305bike12.00.03.012:00:03
624c92000-01-01 12:00:038.294792-5.2710750.980665bike12.00.03.012:00:03
724c92000-01-01 12:00:039.370799-5.2710750.817221bike12.00.03.012:00:03
824c92000-01-01 12:00:038.008764-4.5083350.544814bike12.00.03.012:00:03
924c92000-01-01 12:00:038.226690-5.4072781.784266bike12.00.03.012:00:03
1024c92000-01-01 12:00:046.183638-6.891896-2.329079bike12.00.04.012:00:04
1124c92000-01-01 12:00:047.314126-7.790839-0.626536bike12.00.04.012:00:04
1224c92000-01-01 12:00:046.687591-10.3650852.710449bike12.00.04.012:00:04
1324c92000-01-01 12:00:046.633110-11.0597221.675303bike12.00.04.012:00:04
1424c92000-01-01 12:00:046.687591-7.0553401.893228bike12.00.04.012:00:04
1524c92000-01-01 12:00:045.652444-6.782933-0.953424bike12.00.04.012:00:04
1624c92000-01-01 12:00:045.883990-6.101916-0.626536bike12.00.04.012:00:04
1724c92000-01-01 12:00:043.541291-6.0065731.076007bike12.00.04.012:00:04
1824c92000-01-01 12:00:042.506144-7.0962010.926184bike12.00.04.012:00:04
1924c92000-01-01 12:00:04-1.212211-5.5843420.653777bike12.00.04.012:00:04
\n", "
" ], "text/plain": [ " user timestamp x y z class hour min \\\n", "0 24c9 2000-01-01 12:00:03 10.215261 -3.541291 7.109821 bike 12.0 0.0 \n", "1 24c9 2000-01-01 12:00:03 12.394516 -4.249548 7.722737 bike 12.0 0.0 \n", "2 24c9 2000-01-01 12:00:03 16.807508 -3.241643 6.170018 bike 12.0 0.0 \n", "3 24c9 2000-01-01 12:00:03 16.003908 -1.688923 4.916945 bike 12.0 0.0 \n", "4 24c9 2000-01-01 12:00:03 8.580819 -2.165635 -0.721878 bike 12.0 0.0 \n", "5 24c9 2000-01-01 12:00:03 7.722737 -5.039529 -0.204305 bike 12.0 0.0 \n", "6 24c9 2000-01-01 12:00:03 8.294792 -5.271075 0.980665 bike 12.0 0.0 \n", "7 24c9 2000-01-01 12:00:03 9.370799 -5.271075 0.817221 bike 12.0 0.0 \n", "8 24c9 2000-01-01 12:00:03 8.008764 -4.508335 0.544814 bike 12.0 0.0 \n", "9 24c9 2000-01-01 12:00:03 8.226690 -5.407278 1.784266 bike 12.0 0.0 \n", "10 24c9 2000-01-01 12:00:04 6.183638 -6.891896 -2.329079 bike 12.0 0.0 \n", "11 24c9 2000-01-01 12:00:04 7.314126 -7.790839 -0.626536 bike 12.0 0.0 \n", "12 24c9 2000-01-01 12:00:04 6.687591 -10.365085 2.710449 bike 12.0 0.0 \n", "13 24c9 2000-01-01 12:00:04 6.633110 -11.059722 1.675303 bike 12.0 0.0 \n", "14 24c9 2000-01-01 12:00:04 6.687591 -7.055340 1.893228 bike 12.0 0.0 \n", "15 24c9 2000-01-01 12:00:04 5.652444 -6.782933 -0.953424 bike 12.0 0.0 \n", "16 24c9 2000-01-01 12:00:04 5.883990 -6.101916 -0.626536 bike 12.0 0.0 \n", "17 24c9 2000-01-01 12:00:04 3.541291 -6.006573 1.076007 bike 12.0 0.0 \n", "18 24c9 2000-01-01 12:00:04 2.506144 -7.096201 0.926184 bike 12.0 0.0 \n", "19 24c9 2000-01-01 12:00:04 -1.212211 -5.584342 0.653777 bike 12.0 0.0 \n", "\n", " sec time \n", "0 3.0 12:00:03 \n", "1 3.0 12:00:03 \n", "2 3.0 12:00:03 \n", "3 3.0 12:00:03 \n", "4 3.0 12:00:03 \n", "5 3.0 12:00:03 \n", "6 3.0 12:00:03 \n", "7 3.0 12:00:03 \n", "8 3.0 12:00:03 \n", "9 3.0 12:00:03 \n", "10 4.0 12:00:04 \n", "11 4.0 12:00:04 \n", "12 4.0 12:00:04 \n", "13 4.0 12:00:04 \n", "14 4.0 12:00:04 \n", "15 4.0 12:00:04 \n", "16 4.0 12:00:04 \n", "17 4.0 12:00:04 \n", "18 4.0 12:00:04 \n", "19 4.0 12:00:04 " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "bike_data.shift(-50).head(20)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Exercise 8.1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Question 1\n", "\n", "Which function is used to convert string type dataframe column to datetime type?:\n", "\n", "A. convertToDate() \\\n", "B. convertToDateTime() \\\n", "C. to_datetime() \\\n", "D. None of the above\n", "\n", "Answer: C" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Question 2\n", "\n", "Which attribute is used to find the day of the week from the datetime type column?:\n", "\n", "A. dt.weekday_name \\\n", "B. dt_day_week \\\n", "C. dt_name_of_weekday \\\n", "D. None of the above\n", "\n", "Answer: A" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Question 3\n", "\n", "Which attribute is used to find the time portion from a datetime type column of a pandas dataframe?:\n", "\n", "A. dt.get_time \\\n", "B. dt.show_time \\\n", "C. dt.time \\\n", "D. dt.display_time\n", "\n", "Answer: C" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Exercise 8.2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "From the titanic dataset below, the Cabin column contains mixed data. Handle the mixed data in the \n", "Cabin column by creating new columns that contain numerical and categorical portion from the original values in the Cabin column." ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
6701McCarthy, Mr. Timothy Jmale54.0001746351.8625E46S
101113Sandstrom, Miss. Marguerite Rutfemale4.011PP 954916.7000G6S
111211Bonnell, Miss. Elizabethfemale58.00011378326.5500C103S
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "1 2 1 1 \n", "3 4 1 1 \n", "6 7 0 1 \n", "10 11 1 3 \n", "11 12 1 1 \n", "\n", " Name Sex Age SibSp \\\n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "6 McCarthy, Mr. Timothy J male 54.0 0 \n", "10 Sandstrom, Miss. Marguerite Rut female 4.0 1 \n", "11 Bonnell, Miss. Elizabeth female 58.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "1 0 PC 17599 71.2833 C85 C \n", "3 0 113803 53.1000 C123 S \n", "6 0 17463 51.8625 E46 S \n", "10 1 PP 9549 16.7000 G6 S \n", "11 0 113783 26.5500 C103 S " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "titanic_data = pd.read_csv(\"https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv\")\n", "titanic_data.dropna(inplace = True)\n", "titanic_data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Solution" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CabinCabin_NCabin_C
1C8585C
3C123123C
6E4646E
10G66G
11C103103C
\n", "
" ], "text/plain": [ " Cabin Cabin_N Cabin_C\n", "1 C85 85 C\n", "3 C123 123 C\n", "6 E46 46 E\n", "10 G6 6 G\n", "11 C103 103 C" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "titanic_data = titanic_data[['Ticket', 'Cabin']]\n", "titanic_data.head()\n", "\n", "titanic_data ['Cabin_N'] = titanic_data['Cabin'].str.extract('(\\d+)') \n", "titanic_data ['Cabin_C'] = titanic_data['Cabin'].str[0] \n", "\n", "titanic_data[['Cabin', 'Cabin_N', 'Cabin_C']].head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 2 }