{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Chapter 8 - Handling Mixed and Date Time Variables"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 8.2. Handling Mixed Values"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import seaborn as sns\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" Qualification | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Jon | \n",
" 9 | \n",
"
\n",
" \n",
" | 1 | \n",
" Nick | \n",
" Graduate | \n",
"
\n",
" \n",
" | 2 | \n",
" Ben | \n",
" 7 | \n",
"
\n",
" \n",
" | 3 | \n",
" Sally | \n",
" Graduate | \n",
"
\n",
" \n",
" | 4 | \n",
" Alice | \n",
" PhD | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name Qualification\n",
"0 Jon 9\n",
"1 Nick Graduate\n",
"2 Ben 7\n",
"3 Sally Graduate\n",
"4 Alice PhD"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"name = ['Jon', 'Nick', 'Ben', 'Sally', 'Alice', 'Josh']\n",
"eduation = [9, 'Graduate', 7, 'Graduate', 'PhD', 8]\n",
"\n",
"std = {'name':name,'Qualification':eduation}\n",
"\n",
"student_df = pd.DataFrame(std)\n",
"student_df.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"student_df['q_numeric'] = pd.to_numeric(student_df[\"Qualification\"],\n",
" errors='coerce',\n",
" downcast='integer')\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" Qualification | \n",
" q_numeric | \n",
" q_categoric | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Jon | \n",
" 9 | \n",
" 9.0 | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" Nick | \n",
" Graduate | \n",
" NaN | \n",
" Graduate | \n",
"
\n",
" \n",
" | 2 | \n",
" Ben | \n",
" 7 | \n",
" 7.0 | \n",
" NaN | \n",
"
\n",
" \n",
" | 3 | \n",
" Sally | \n",
" Graduate | \n",
" NaN | \n",
" Graduate | \n",
"
\n",
" \n",
" | 4 | \n",
" Alice | \n",
" PhD | \n",
" NaN | \n",
" PhD | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name Qualification q_numeric q_categoric\n",
"0 Jon 9 9.0 NaN\n",
"1 Nick Graduate NaN Graduate\n",
"2 Ben 7 7.0 NaN\n",
"3 Sally Graduate NaN Graduate\n",
"4 Alice PhD NaN PhD"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"student_df['q_categoric'] = np.where(student_df['q_numeric'].isnull(),\n",
" student_df['Qualification'],\n",
" np.nan)\n",
"\n",
"student_df.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" PassengerId | \n",
" Survived | \n",
" Pclass | \n",
" Name | \n",
" Sex | \n",
" Age | \n",
" SibSp | \n",
" Parch | \n",
" Ticket | \n",
" Fare | \n",
" Cabin | \n",
" Embarked | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 0 | \n",
" 3 | \n",
" Braund, Mr. Owen Harris | \n",
" male | \n",
" 22.0 | \n",
" 1 | \n",
" 0 | \n",
" A/5 21171 | \n",
" 7.2500 | \n",
" NaN | \n",
" S | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
" female | \n",
" 38.0 | \n",
" 1 | \n",
" 0 | \n",
" PC 17599 | \n",
" 71.2833 | \n",
" C85 | \n",
" C | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" 1 | \n",
" 3 | \n",
" Heikkinen, Miss. Laina | \n",
" female | \n",
" 26.0 | \n",
" 0 | \n",
" 0 | \n",
" STON/O2. 3101282 | \n",
" 7.9250 | \n",
" NaN | \n",
" S | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" 1 | \n",
" 1 | \n",
" Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
" female | \n",
" 35.0 | \n",
" 1 | \n",
" 0 | \n",
" 113803 | \n",
" 53.1000 | \n",
" C123 | \n",
" S | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" 0 | \n",
" 3 | \n",
" Allen, Mr. William Henry | \n",
" male | \n",
" 35.0 | \n",
" 0 | \n",
" 0 | \n",
" 373450 | \n",
" 8.0500 | \n",
" NaN | \n",
" S | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" PassengerId Survived Pclass \\\n",
"0 1 0 3 \n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
"\n",
" Name Sex Age SibSp \\\n",
"0 Braund, Mr. Owen Harris male 22.0 1 \n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"4 Allen, Mr. William Henry male 35.0 0 \n",
"\n",
" Parch Ticket Fare Cabin Embarked \n",
"0 0 A/5 21171 7.2500 NaN S \n",
"1 0 PC 17599 71.2833 C85 C \n",
"2 0 STON/O2. 3101282 7.9250 NaN S \n",
"3 0 113803 53.1000 C123 S \n",
"4 0 373450 8.0500 NaN S "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"titanic_data = pd.read_csv(\"https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv\")\n",
"\n",
"titanic_data.head()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Ticket | \n",
" Cabin | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" A/5 21171 | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" PC 17599 | \n",
" C85 | \n",
"
\n",
" \n",
" | 2 | \n",
" STON/O2. 3101282 | \n",
" NaN | \n",
"
\n",
" \n",
" | 3 | \n",
" 113803 | \n",
" C123 | \n",
"
\n",
" \n",
" | 4 | \n",
" 373450 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Ticket Cabin\n",
"0 A/5 21171 NaN\n",
"1 PC 17599 C85\n",
"2 STON/O2. 3101282 NaN\n",
"3 113803 C123\n",
"4 373450 NaN"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"titanic_data = titanic_data[['Ticket', 'Cabin']]\n",
"titanic_data.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Ticket | \n",
" Ticket_Num | \n",
" Ticket_Cat | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" A/5 21171 | \n",
" 5 | \n",
" A | \n",
"
\n",
" \n",
" | 1 | \n",
" PC 17599 | \n",
" 17599 | \n",
" P | \n",
"
\n",
" \n",
" | 2 | \n",
" STON/O2. 3101282 | \n",
" 2 | \n",
" S | \n",
"
\n",
" \n",
" | 3 | \n",
" 113803 | \n",
" 113803 | \n",
" 1 | \n",
"
\n",
" \n",
" | 4 | \n",
" 373450 | \n",
" 373450 | \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Ticket Ticket_Num Ticket_Cat\n",
"0 A/5 21171 5 A\n",
"1 PC 17599 17599 P\n",
"2 STON/O2. 3101282 2 S\n",
"3 113803 113803 1\n",
"4 373450 373450 3"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"titanic_data ['Ticket_Num'] = titanic_data['Ticket'].str.extract('(\\d+)') \n",
"titanic_data ['Ticket_Cat'] = titanic_data['Ticket'].str[0] \n",
"\n",
"titanic_data[['Ticket', 'Ticket_Num', 'Ticket_Cat']].head()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 8.3. Handling Date Data Type"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" close | \n",
" volume | \n",
" open | \n",
" high | \n",
" low | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2018/10/15 | \n",
" 259.59 | \n",
" 6189026.0000 | \n",
" 259.06 | \n",
" 263.28 | \n",
" 254.5367 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2018/10/12 | \n",
" 258.78 | \n",
" 7189257.0000 | \n",
" 261.00 | \n",
" 261.99 | \n",
" 252.0100 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2018/10/11 | \n",
" 252.23 | \n",
" 8128184.0000 | \n",
" 257.53 | \n",
" 262.25 | \n",
" 249.0300 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2018/10/10 | \n",
" 256.88 | \n",
" 12781560.0000 | \n",
" 264.61 | \n",
" 265.51 | \n",
" 247.7700 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2018/10/09 | \n",
" 262.80 | \n",
" 12037780.0000 | \n",
" 255.25 | \n",
" 266.77 | \n",
" 253.3000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date close volume open high low\n",
"0 2018/10/15 259.59 6189026.0000 259.06 263.28 254.5367\n",
"1 2018/10/12 258.78 7189257.0000 261.00 261.99 252.0100\n",
"2 2018/10/11 252.23 8128184.0000 257.53 262.25 249.0300\n",
"3 2018/10/10 256.88 12781560.0000 264.61 265.51 247.7700\n",
"4 2018/10/09 262.80 12037780.0000 255.25 266.77 253.3000"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tesla_stock = pd.read_csv(\"https://raw.githubusercontent.com/plotly/datasets/master/tesla-stock-price.csv\")\n",
"tesla_stock = tesla_stock.shift(-1)\n",
"tesla_stock.dropna(inplace = True)\n",
"tesla_stock.head()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"tesla_stock['date'] = pd.to_datetime(tesla_stock['date'])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
":1: FutureWarning: Series.dt.weekofyear and Series.dt.week have been deprecated. Please use Series.dt.isocalendar().week instead.\n",
" tesla_stock['week'] = tesla_stock['date'].dt.week\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" week | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2018-10-15 | \n",
" 42 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2018-10-12 | \n",
" 41 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2018-10-11 | \n",
" 41 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2018-10-10 | \n",
" 41 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2018-10-09 | \n",
" 41 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date week\n",
"0 2018-10-15 42\n",
"1 2018-10-12 41\n",
"2 2018-10-11 41\n",
"3 2018-10-10 41\n",
"4 2018-10-09 41"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tesla_stock['week'] = tesla_stock['date'].dt.week\n",
"tesla_stock[['date', 'week']].head()\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" month | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2018-10-15 | \n",
" 10 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2018-10-12 | \n",
" 10 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2018-10-11 | \n",
" 10 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2018-10-10 | \n",
" 10 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2018-10-09 | \n",
" 10 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date month\n",
"0 2018-10-15 10\n",
"1 2018-10-12 10\n",
"2 2018-10-11 10\n",
"3 2018-10-10 10\n",
"4 2018-10-09 10"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tesla_stock['month'] = tesla_stock['date'].dt.month\n",
"tesla_stock[['date', 'month']].head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" day_month | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2018-10-15 | \n",
" 15 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2018-10-12 | \n",
" 12 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2018-10-11 | \n",
" 11 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2018-10-10 | \n",
" 10 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2018-10-09 | \n",
" 9 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date day_month\n",
"0 2018-10-15 15\n",
"1 2018-10-12 12\n",
"2 2018-10-11 11\n",
"3 2018-10-10 10\n",
"4 2018-10-09 9"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tesla_stock['day_month'] = tesla_stock['date'].dt.day\n",
"tesla_stock[['date', 'day_month']].head()\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" day_week | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2018-10-15 | \n",
" Monday | \n",
"
\n",
" \n",
" | 1 | \n",
" 2018-10-12 | \n",
" Friday | \n",
"
\n",
" \n",
" | 2 | \n",
" 2018-10-11 | \n",
" Thursday | \n",
"
\n",
" \n",
" | 3 | \n",
" 2018-10-10 | \n",
" Wednesday | \n",
"
\n",
" \n",
" | 4 | \n",
" 2018-10-09 | \n",
" Tuesday | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date day_week\n",
"0 2018-10-15 Monday\n",
"1 2018-10-12 Friday\n",
"2 2018-10-11 Thursday\n",
"3 2018-10-10 Wednesday\n",
"4 2018-10-09 Tuesday"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tesla_stock['day_week'] = tesla_stock['date'].dt.day_name()\n",
"tesla_stock[['date', 'day_week']].head()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2018-10-15 00:00:00\n",
"2018-10-09 00:00:00\n",
"6 days 00:00:00\n"
]
}
],
"source": [
"diff = tesla_stock[\"date\"].iloc[0] - tesla_stock[\"date\"].iloc[4]\n",
"print(tesla_stock[\"date\"].iloc[0])\n",
"print(tesla_stock[\"date\"].iloc[4])\n",
"print(diff)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 8.4. Handling Time Data Type"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" user | \n",
" timestamp | \n",
" x | \n",
" y | \n",
" z | \n",
" class | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:00 | \n",
" -0.190685 | \n",
" 5.475380 | \n",
" 8.185829 | \n",
" bike | \n",
"
\n",
" \n",
" | 1 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:00 | \n",
" -0.299648 | \n",
" 5.366417 | \n",
" 8.294792 | \n",
" bike | \n",
"
\n",
" \n",
" | 2 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:00 | \n",
" -0.122583 | \n",
" 5.625204 | \n",
" 8.485476 | \n",
" bike | \n",
"
\n",
" \n",
" | 3 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:00 | \n",
" -0.299648 | \n",
" 5.570722 | \n",
" 8.376513 | \n",
" bike | \n",
"
\n",
" \n",
" | 4 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:00 | \n",
" -0.476712 | \n",
" 5.339176 | \n",
" 8.526338 | \n",
" bike | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" user timestamp x y z class\n",
"0 24c9 2000-01-01 12:00:00 -0.190685 5.475380 8.185829 bike\n",
"1 24c9 2000-01-01 12:00:00 -0.299648 5.366417 8.294792 bike\n",
"2 24c9 2000-01-01 12:00:00 -0.122583 5.625204 8.485476 bike\n",
"3 24c9 2000-01-01 12:00:00 -0.299648 5.570722 8.376513 bike\n",
"4 24c9 2000-01-01 12:00:00 -0.476712 5.339176 8.526338 bike"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bike_data = pd.read_csv(\"https://raw.githubusercontent.com/QROWD/TR/master/datasets/bike.csv\")\n",
"bike_data.dropna(inplace = True)\n",
"bike_data.head()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"bike_data['timestamp'] = pd.to_datetime(bike_data['timestamp'])"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" user | \n",
" timestamp | \n",
" x | \n",
" y | \n",
" z | \n",
" class | \n",
" hour | \n",
" min | \n",
" sec | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 10.215261 | \n",
" -3.541291 | \n",
" 7.109821 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 12.394516 | \n",
" -4.249548 | \n",
" 7.722737 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 16.807508 | \n",
" -3.241643 | \n",
" 6.170018 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 16.003908 | \n",
" -1.688923 | \n",
" 4.916945 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 4 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 8.580819 | \n",
" -2.165635 | \n",
" -0.721878 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 5 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 7.722737 | \n",
" -5.039529 | \n",
" -0.204305 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 6 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 8.294792 | \n",
" -5.271075 | \n",
" 0.980665 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 7 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 9.370799 | \n",
" -5.271075 | \n",
" 0.817221 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 8 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 8.008764 | \n",
" -4.508335 | \n",
" 0.544814 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 9 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 8.226690 | \n",
" -5.407278 | \n",
" 1.784266 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 10 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 6.183638 | \n",
" -6.891896 | \n",
" -2.329079 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" | 11 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 7.314126 | \n",
" -7.790839 | \n",
" -0.626536 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" | 12 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 6.687591 | \n",
" -10.365085 | \n",
" 2.710449 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" | 13 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 6.633110 | \n",
" -11.059722 | \n",
" 1.675303 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" | 14 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 6.687591 | \n",
" -7.055340 | \n",
" 1.893228 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" | 15 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 5.652444 | \n",
" -6.782933 | \n",
" -0.953424 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" | 16 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 5.883990 | \n",
" -6.101916 | \n",
" -0.626536 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" | 17 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 3.541291 | \n",
" -6.006573 | \n",
" 1.076007 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" | 18 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 2.506144 | \n",
" -7.096201 | \n",
" 0.926184 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" | 19 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" -1.212211 | \n",
" -5.584342 | \n",
" 0.653777 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" user timestamp x y z class hour min \\\n",
"0 24c9 2000-01-01 12:00:03 10.215261 -3.541291 7.109821 bike 12.0 0.0 \n",
"1 24c9 2000-01-01 12:00:03 12.394516 -4.249548 7.722737 bike 12.0 0.0 \n",
"2 24c9 2000-01-01 12:00:03 16.807508 -3.241643 6.170018 bike 12.0 0.0 \n",
"3 24c9 2000-01-01 12:00:03 16.003908 -1.688923 4.916945 bike 12.0 0.0 \n",
"4 24c9 2000-01-01 12:00:03 8.580819 -2.165635 -0.721878 bike 12.0 0.0 \n",
"5 24c9 2000-01-01 12:00:03 7.722737 -5.039529 -0.204305 bike 12.0 0.0 \n",
"6 24c9 2000-01-01 12:00:03 8.294792 -5.271075 0.980665 bike 12.0 0.0 \n",
"7 24c9 2000-01-01 12:00:03 9.370799 -5.271075 0.817221 bike 12.0 0.0 \n",
"8 24c9 2000-01-01 12:00:03 8.008764 -4.508335 0.544814 bike 12.0 0.0 \n",
"9 24c9 2000-01-01 12:00:03 8.226690 -5.407278 1.784266 bike 12.0 0.0 \n",
"10 24c9 2000-01-01 12:00:04 6.183638 -6.891896 -2.329079 bike 12.0 0.0 \n",
"11 24c9 2000-01-01 12:00:04 7.314126 -7.790839 -0.626536 bike 12.0 0.0 \n",
"12 24c9 2000-01-01 12:00:04 6.687591 -10.365085 2.710449 bike 12.0 0.0 \n",
"13 24c9 2000-01-01 12:00:04 6.633110 -11.059722 1.675303 bike 12.0 0.0 \n",
"14 24c9 2000-01-01 12:00:04 6.687591 -7.055340 1.893228 bike 12.0 0.0 \n",
"15 24c9 2000-01-01 12:00:04 5.652444 -6.782933 -0.953424 bike 12.0 0.0 \n",
"16 24c9 2000-01-01 12:00:04 5.883990 -6.101916 -0.626536 bike 12.0 0.0 \n",
"17 24c9 2000-01-01 12:00:04 3.541291 -6.006573 1.076007 bike 12.0 0.0 \n",
"18 24c9 2000-01-01 12:00:04 2.506144 -7.096201 0.926184 bike 12.0 0.0 \n",
"19 24c9 2000-01-01 12:00:04 -1.212211 -5.584342 0.653777 bike 12.0 0.0 \n",
"\n",
" sec \n",
"0 3.0 \n",
"1 3.0 \n",
"2 3.0 \n",
"3 3.0 \n",
"4 3.0 \n",
"5 3.0 \n",
"6 3.0 \n",
"7 3.0 \n",
"8 3.0 \n",
"9 3.0 \n",
"10 4.0 \n",
"11 4.0 \n",
"12 4.0 \n",
"13 4.0 \n",
"14 4.0 \n",
"15 4.0 \n",
"16 4.0 \n",
"17 4.0 \n",
"18 4.0 \n",
"19 4.0 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bike_data['hour'] = bike_data['timestamp'].dt.hour\n",
"bike_data['min'] = bike_data['timestamp'].dt.minute\n",
"bike_data['sec'] = bike_data['timestamp'].dt.second\n",
"\n",
"bike_data.shift(-50).head(20)\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" user | \n",
" timestamp | \n",
" x | \n",
" y | \n",
" z | \n",
" class | \n",
" hour | \n",
" min | \n",
" sec | \n",
" time | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 10.215261 | \n",
" -3.541291 | \n",
" 7.109821 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 1 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 12.394516 | \n",
" -4.249548 | \n",
" 7.722737 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 2 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 16.807508 | \n",
" -3.241643 | \n",
" 6.170018 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 3 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 16.003908 | \n",
" -1.688923 | \n",
" 4.916945 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 4 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 8.580819 | \n",
" -2.165635 | \n",
" -0.721878 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 5 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 7.722737 | \n",
" -5.039529 | \n",
" -0.204305 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 6 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 8.294792 | \n",
" -5.271075 | \n",
" 0.980665 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 7 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 9.370799 | \n",
" -5.271075 | \n",
" 0.817221 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 8 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 8.008764 | \n",
" -4.508335 | \n",
" 0.544814 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 9 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 8.226690 | \n",
" -5.407278 | \n",
" 1.784266 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 10 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 6.183638 | \n",
" -6.891896 | \n",
" -2.329079 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 11 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 7.314126 | \n",
" -7.790839 | \n",
" -0.626536 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 12 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 6.687591 | \n",
" -10.365085 | \n",
" 2.710449 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 13 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 6.633110 | \n",
" -11.059722 | \n",
" 1.675303 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 14 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 6.687591 | \n",
" -7.055340 | \n",
" 1.893228 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 15 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 5.652444 | \n",
" -6.782933 | \n",
" -0.953424 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 16 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 5.883990 | \n",
" -6.101916 | \n",
" -0.626536 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 17 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 3.541291 | \n",
" -6.006573 | \n",
" 1.076007 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 18 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 2.506144 | \n",
" -7.096201 | \n",
" 0.926184 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 19 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" -1.212211 | \n",
" -5.584342 | \n",
" 0.653777 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" user timestamp x y z class hour min \\\n",
"0 24c9 2000-01-01 12:00:03 10.215261 -3.541291 7.109821 bike 12.0 0.0 \n",
"1 24c9 2000-01-01 12:00:03 12.394516 -4.249548 7.722737 bike 12.0 0.0 \n",
"2 24c9 2000-01-01 12:00:03 16.807508 -3.241643 6.170018 bike 12.0 0.0 \n",
"3 24c9 2000-01-01 12:00:03 16.003908 -1.688923 4.916945 bike 12.0 0.0 \n",
"4 24c9 2000-01-01 12:00:03 8.580819 -2.165635 -0.721878 bike 12.0 0.0 \n",
"5 24c9 2000-01-01 12:00:03 7.722737 -5.039529 -0.204305 bike 12.0 0.0 \n",
"6 24c9 2000-01-01 12:00:03 8.294792 -5.271075 0.980665 bike 12.0 0.0 \n",
"7 24c9 2000-01-01 12:00:03 9.370799 -5.271075 0.817221 bike 12.0 0.0 \n",
"8 24c9 2000-01-01 12:00:03 8.008764 -4.508335 0.544814 bike 12.0 0.0 \n",
"9 24c9 2000-01-01 12:00:03 8.226690 -5.407278 1.784266 bike 12.0 0.0 \n",
"10 24c9 2000-01-01 12:00:04 6.183638 -6.891896 -2.329079 bike 12.0 0.0 \n",
"11 24c9 2000-01-01 12:00:04 7.314126 -7.790839 -0.626536 bike 12.0 0.0 \n",
"12 24c9 2000-01-01 12:00:04 6.687591 -10.365085 2.710449 bike 12.0 0.0 \n",
"13 24c9 2000-01-01 12:00:04 6.633110 -11.059722 1.675303 bike 12.0 0.0 \n",
"14 24c9 2000-01-01 12:00:04 6.687591 -7.055340 1.893228 bike 12.0 0.0 \n",
"15 24c9 2000-01-01 12:00:04 5.652444 -6.782933 -0.953424 bike 12.0 0.0 \n",
"16 24c9 2000-01-01 12:00:04 5.883990 -6.101916 -0.626536 bike 12.0 0.0 \n",
"17 24c9 2000-01-01 12:00:04 3.541291 -6.006573 1.076007 bike 12.0 0.0 \n",
"18 24c9 2000-01-01 12:00:04 2.506144 -7.096201 0.926184 bike 12.0 0.0 \n",
"19 24c9 2000-01-01 12:00:04 -1.212211 -5.584342 0.653777 bike 12.0 0.0 \n",
"\n",
" sec time \n",
"0 3.0 12:00:03 \n",
"1 3.0 12:00:03 \n",
"2 3.0 12:00:03 \n",
"3 3.0 12:00:03 \n",
"4 3.0 12:00:03 \n",
"5 3.0 12:00:03 \n",
"6 3.0 12:00:03 \n",
"7 3.0 12:00:03 \n",
"8 3.0 12:00:03 \n",
"9 3.0 12:00:03 \n",
"10 4.0 12:00:04 \n",
"11 4.0 12:00:04 \n",
"12 4.0 12:00:04 \n",
"13 4.0 12:00:04 \n",
"14 4.0 12:00:04 \n",
"15 4.0 12:00:04 \n",
"16 4.0 12:00:04 \n",
"17 4.0 12:00:04 \n",
"18 4.0 12:00:04 \n",
"19 4.0 12:00:04 "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bike_data['time'] = bike_data['timestamp'].dt.time\n",
"bike_data.shift(-50).head(20)\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" user | \n",
" timestamp | \n",
" x | \n",
" y | \n",
" z | \n",
" class | \n",
" hour | \n",
" min | \n",
" sec | \n",
" time | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 10.215261 | \n",
" -3.541291 | \n",
" 7.109821 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 1 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 12.394516 | \n",
" -4.249548 | \n",
" 7.722737 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 2 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 16.807508 | \n",
" -3.241643 | \n",
" 6.170018 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 3 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 16.003908 | \n",
" -1.688923 | \n",
" 4.916945 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 4 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 8.580819 | \n",
" -2.165635 | \n",
" -0.721878 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 5 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 7.722737 | \n",
" -5.039529 | \n",
" -0.204305 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 6 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 8.294792 | \n",
" -5.271075 | \n",
" 0.980665 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 7 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 9.370799 | \n",
" -5.271075 | \n",
" 0.817221 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 8 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 8.008764 | \n",
" -4.508335 | \n",
" 0.544814 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 9 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:03 | \n",
" 8.226690 | \n",
" -5.407278 | \n",
" 1.784266 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 12:00:03 | \n",
"
\n",
" \n",
" | 10 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 6.183638 | \n",
" -6.891896 | \n",
" -2.329079 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 11 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 7.314126 | \n",
" -7.790839 | \n",
" -0.626536 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 12 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 6.687591 | \n",
" -10.365085 | \n",
" 2.710449 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 13 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 6.633110 | \n",
" -11.059722 | \n",
" 1.675303 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 14 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 6.687591 | \n",
" -7.055340 | \n",
" 1.893228 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 15 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 5.652444 | \n",
" -6.782933 | \n",
" -0.953424 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 16 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 5.883990 | \n",
" -6.101916 | \n",
" -0.626536 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 17 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 3.541291 | \n",
" -6.006573 | \n",
" 1.076007 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 18 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" 2.506144 | \n",
" -7.096201 | \n",
" 0.926184 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
" | 19 | \n",
" 24c9 | \n",
" 2000-01-01 12:00:04 | \n",
" -1.212211 | \n",
" -5.584342 | \n",
" 0.653777 | \n",
" bike | \n",
" 12.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 12:00:04 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" user timestamp x y z class hour min \\\n",
"0 24c9 2000-01-01 12:00:03 10.215261 -3.541291 7.109821 bike 12.0 0.0 \n",
"1 24c9 2000-01-01 12:00:03 12.394516 -4.249548 7.722737 bike 12.0 0.0 \n",
"2 24c9 2000-01-01 12:00:03 16.807508 -3.241643 6.170018 bike 12.0 0.0 \n",
"3 24c9 2000-01-01 12:00:03 16.003908 -1.688923 4.916945 bike 12.0 0.0 \n",
"4 24c9 2000-01-01 12:00:03 8.580819 -2.165635 -0.721878 bike 12.0 0.0 \n",
"5 24c9 2000-01-01 12:00:03 7.722737 -5.039529 -0.204305 bike 12.0 0.0 \n",
"6 24c9 2000-01-01 12:00:03 8.294792 -5.271075 0.980665 bike 12.0 0.0 \n",
"7 24c9 2000-01-01 12:00:03 9.370799 -5.271075 0.817221 bike 12.0 0.0 \n",
"8 24c9 2000-01-01 12:00:03 8.008764 -4.508335 0.544814 bike 12.0 0.0 \n",
"9 24c9 2000-01-01 12:00:03 8.226690 -5.407278 1.784266 bike 12.0 0.0 \n",
"10 24c9 2000-01-01 12:00:04 6.183638 -6.891896 -2.329079 bike 12.0 0.0 \n",
"11 24c9 2000-01-01 12:00:04 7.314126 -7.790839 -0.626536 bike 12.0 0.0 \n",
"12 24c9 2000-01-01 12:00:04 6.687591 -10.365085 2.710449 bike 12.0 0.0 \n",
"13 24c9 2000-01-01 12:00:04 6.633110 -11.059722 1.675303 bike 12.0 0.0 \n",
"14 24c9 2000-01-01 12:00:04 6.687591 -7.055340 1.893228 bike 12.0 0.0 \n",
"15 24c9 2000-01-01 12:00:04 5.652444 -6.782933 -0.953424 bike 12.0 0.0 \n",
"16 24c9 2000-01-01 12:00:04 5.883990 -6.101916 -0.626536 bike 12.0 0.0 \n",
"17 24c9 2000-01-01 12:00:04 3.541291 -6.006573 1.076007 bike 12.0 0.0 \n",
"18 24c9 2000-01-01 12:00:04 2.506144 -7.096201 0.926184 bike 12.0 0.0 \n",
"19 24c9 2000-01-01 12:00:04 -1.212211 -5.584342 0.653777 bike 12.0 0.0 \n",
"\n",
" sec time \n",
"0 3.0 12:00:03 \n",
"1 3.0 12:00:03 \n",
"2 3.0 12:00:03 \n",
"3 3.0 12:00:03 \n",
"4 3.0 12:00:03 \n",
"5 3.0 12:00:03 \n",
"6 3.0 12:00:03 \n",
"7 3.0 12:00:03 \n",
"8 3.0 12:00:03 \n",
"9 3.0 12:00:03 \n",
"10 4.0 12:00:04 \n",
"11 4.0 12:00:04 \n",
"12 4.0 12:00:04 \n",
"13 4.0 12:00:04 \n",
"14 4.0 12:00:04 \n",
"15 4.0 12:00:04 \n",
"16 4.0 12:00:04 \n",
"17 4.0 12:00:04 \n",
"18 4.0 12:00:04 \n",
"19 4.0 12:00:04 "
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"bike_data.shift(-50).head(20)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Exercise 8.1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Question 1\n",
"\n",
"Which function is used to convert string type dataframe column to datetime type?:\n",
"\n",
"A. convertToDate() \\\n",
"B. convertToDateTime() \\\n",
"C. to_datetime() \\\n",
"D. None of the above\n",
"\n",
"Answer: C"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Question 2\n",
"\n",
"Which attribute is used to find the day of the week from the datetime type column?:\n",
"\n",
"A. dt.weekday_name \\\n",
"B. dt_day_week \\\n",
"C. dt_name_of_weekday \\\n",
"D. None of the above\n",
"\n",
"Answer: A"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Question 3\n",
"\n",
"Which attribute is used to find the time portion from a datetime type column of a pandas dataframe?:\n",
"\n",
"A. dt.get_time \\\n",
"B. dt.show_time \\\n",
"C. dt.time \\\n",
"D. dt.display_time\n",
"\n",
"Answer: C"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Exercise 8.2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"From the titanic dataset below, the Cabin column contains mixed data. Handle the mixed data in the \n",
"Cabin column by creating new columns that contain numerical and categorical portion from the original values in the Cabin column."
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" PassengerId | \n",
" Survived | \n",
" Pclass | \n",
" Name | \n",
" Sex | \n",
" Age | \n",
" SibSp | \n",
" Parch | \n",
" Ticket | \n",
" Fare | \n",
" Cabin | \n",
" Embarked | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
" female | \n",
" 38.0 | \n",
" 1 | \n",
" 0 | \n",
" PC 17599 | \n",
" 71.2833 | \n",
" C85 | \n",
" C | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" 1 | \n",
" 1 | \n",
" Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
" female | \n",
" 35.0 | \n",
" 1 | \n",
" 0 | \n",
" 113803 | \n",
" 53.1000 | \n",
" C123 | \n",
" S | \n",
"
\n",
" \n",
" | 6 | \n",
" 7 | \n",
" 0 | \n",
" 1 | \n",
" McCarthy, Mr. Timothy J | \n",
" male | \n",
" 54.0 | \n",
" 0 | \n",
" 0 | \n",
" 17463 | \n",
" 51.8625 | \n",
" E46 | \n",
" S | \n",
"
\n",
" \n",
" | 10 | \n",
" 11 | \n",
" 1 | \n",
" 3 | \n",
" Sandstrom, Miss. Marguerite Rut | \n",
" female | \n",
" 4.0 | \n",
" 1 | \n",
" 1 | \n",
" PP 9549 | \n",
" 16.7000 | \n",
" G6 | \n",
" S | \n",
"
\n",
" \n",
" | 11 | \n",
" 12 | \n",
" 1 | \n",
" 1 | \n",
" Bonnell, Miss. Elizabeth | \n",
" female | \n",
" 58.0 | \n",
" 0 | \n",
" 0 | \n",
" 113783 | \n",
" 26.5500 | \n",
" C103 | \n",
" S | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" PassengerId Survived Pclass \\\n",
"1 2 1 1 \n",
"3 4 1 1 \n",
"6 7 0 1 \n",
"10 11 1 3 \n",
"11 12 1 1 \n",
"\n",
" Name Sex Age SibSp \\\n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"6 McCarthy, Mr. Timothy J male 54.0 0 \n",
"10 Sandstrom, Miss. Marguerite Rut female 4.0 1 \n",
"11 Bonnell, Miss. Elizabeth female 58.0 0 \n",
"\n",
" Parch Ticket Fare Cabin Embarked \n",
"1 0 PC 17599 71.2833 C85 C \n",
"3 0 113803 53.1000 C123 S \n",
"6 0 17463 51.8625 E46 S \n",
"10 1 PP 9549 16.7000 G6 S \n",
"11 0 113783 26.5500 C103 S "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"titanic_data = pd.read_csv(\"https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv\")\n",
"titanic_data.dropna(inplace = True)\n",
"titanic_data.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Solution"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Cabin | \n",
" Cabin_N | \n",
" Cabin_C | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1 | \n",
" C85 | \n",
" 85 | \n",
" C | \n",
"
\n",
" \n",
" | 3 | \n",
" C123 | \n",
" 123 | \n",
" C | \n",
"
\n",
" \n",
" | 6 | \n",
" E46 | \n",
" 46 | \n",
" E | \n",
"
\n",
" \n",
" | 10 | \n",
" G6 | \n",
" 6 | \n",
" G | \n",
"
\n",
" \n",
" | 11 | \n",
" C103 | \n",
" 103 | \n",
" C | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Cabin Cabin_N Cabin_C\n",
"1 C85 85 C\n",
"3 C123 123 C\n",
"6 E46 46 E\n",
"10 G6 6 G\n",
"11 C103 103 C"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"titanic_data = titanic_data[['Ticket', 'Cabin']]\n",
"titanic_data.head()\n",
"\n",
"titanic_data ['Cabin_N'] = titanic_data['Cabin'].str.extract('(\\d+)') \n",
"titanic_data ['Cabin_C'] = titanic_data['Cabin'].str[0] \n",
"\n",
"titanic_data[['Cabin', 'Cabin_N', 'Cabin_C']].head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}