{
"cells": [
{
"cell_type": "markdown",
"id": "c1d1336e",
"metadata": {},
"source": [
"## MARKET BASKET ANALYSIS USING APRIORI ALGORITHM"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "f7ce4c11",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import plotly.express as px\n",
"\n",
"import matplotlib as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c4e3b1ed",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: apyori in c:\\users\\razas\\anaconda3\\lib\\site-packages (1.1.2)\n"
]
}
],
"source": [
"!pip install apyori"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c54381cd",
"metadata": {},
"outputs": [],
"source": [
"import apyori\n",
"from apyori import apriori"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "58980c99",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Member_number | \n",
" Date | \n",
" itemDescription | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1808 | \n",
" 21-07-2015 | \n",
" tropical fruit | \n",
"
\n",
" \n",
" | 1 | \n",
" 2552 | \n",
" 05-01-2015 | \n",
" whole milk | \n",
"
\n",
" \n",
" | 2 | \n",
" 2300 | \n",
" 19-09-2015 | \n",
" pip fruit | \n",
"
\n",
" \n",
" | 3 | \n",
" 1187 | \n",
" 12-12-2015 | \n",
" other vegetables | \n",
"
\n",
" \n",
" | 4 | \n",
" 3037 | \n",
" 01-02-2015 | \n",
" whole milk | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Member_number Date itemDescription\n",
"0 1808 21-07-2015 tropical fruit\n",
"1 2552 05-01-2015 whole milk\n",
"2 2300 19-09-2015 pip fruit\n",
"3 1187 12-12-2015 other vegetables\n",
"4 3037 01-02-2015 whole milk"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.read_csv(\"Groceries_dataset.csv\")\n",
"data.head()\n",
"\n",
"#The above dataset is that of a grocery store, we can see that the dataset has three columns the member_number\n",
"#(The is of the person who bought the item), Date of purchase and the description of the item."
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "17ff3c2c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(38765, 3)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.shape"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "ba370672",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 38765 entries, 0 to 38764\n",
"Data columns (total 3 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Member_number 38765 non-null int64 \n",
" 1 Date 38765 non-null object\n",
" 2 itemDescription 38765 non-null object\n",
"dtypes: int64(1), object(2)\n",
"memory usage: 908.7+ KB\n"
]
}
],
"source": [
"data.info()\n",
"\n",
"#We can see that there are 38765 rows in the dataset"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "7af29b41",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Member_number 0\n",
"Date 0\n",
"itemDescription 0\n",
"dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.isna().sum()\n",
"\n",
"#We can see that there are no null values in any of the columns in the dataset, this makes analysis easier"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "947498d4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"whole milk 2502\n",
"other vegetables 1898\n",
"rolls/buns 1716\n",
"soda 1514\n",
"yogurt 1334\n",
"root vegetables 1071\n",
"tropical fruit 1032\n",
"bottled water 933\n",
"sausage 924\n",
"citrus fruit 812\n",
"Name: itemDescription, dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Now let's look at the top selling products as well as the least selling products in the dataset\n",
"\n",
"#Let's split up this line of code, we first count the occurances of each item in the dataset, then sort the value in asending\n",
"#order and filter out the first 10 items, this would be the top 10 selling items\n",
"\n",
"x = data['itemDescription'].value_counts().sort_values(ascending=False)[:10]\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "6ed1ac1b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Top 10 frequently sold products\n"
]
},
{
"data": {
"text/html": [
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"alignmentgroup": "True",
"hovertemplate": "x=%{x}
y=%{y}",
"legendgroup": "",
"marker": {
"color": "#636efa",
"pattern": {
"shape": ""
}
},
"name": "",
"offsetgroup": "",
"orientation": "v",
"showlegend": false,
"textposition": "auto",
"type": "bar",
"x": [
"whole milk",
"other vegetables",
"rolls/buns",
"soda",
"yogurt",
"root vegetables",
"tropical fruit",
"bottled water",
"sausage",
"citrus fruit"
],
"xaxis": "x",
"y": [
2502,
1898,
1716,
1514,
1334,
1071,
1032,
933,
924,
812
],
"yaxis": "y"
}
],
"layout": {
"barmode": "relative",
"legend": {
"tracegroupgap": 0
},
"margin": {
"t": 60
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"fillpattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 10 frequently sold products "
},
"xaxis": {
"anchor": "y",
"domain": [
0,
1
],
"title": {
"text": "Products"
}
},
"yaxis": {
"anchor": "x",
"domain": [
0,
1
],
"title": {
"text": "Number of item sold"
}
}
}
},
"text/html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"print(\"Top 10 frequently sold products\")\n",
"\n",
"fig = px.bar(x= x.index, y= x.values)\n",
"fig.update_layout(title_text= \"Top 10 frequently sold products \", xaxis_title= \"Products\", yaxis_title=\"Number of item sold\")\n",
"fig.show()\n",
"\n",
"#We can see that whole milk has the highet count (nearly 2500), followed by vegetables(almost 1800)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "ee1f25ae",
"metadata": {},
"outputs": [],
"source": [
"#Now let's look at the 10 least selling products\n",
"#The only change in code would be to not sort the values in descending order\n",
"\n",
"y = data['itemDescription'].value_counts().sort_values(ascending=True)[:10]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "75b54a62",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10 least frequently sold products\n"
]
},
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"alignmentgroup": "True",
"hovertemplate": "x=%{x}
y=%{y}",
"legendgroup": "",
"marker": {
"color": "#636efa",
"pattern": {
"shape": ""
}
},
"name": "",
"offsetgroup": "",
"orientation": "v",
"showlegend": false,
"textposition": "auto",
"type": "bar",
"x": [
"preservation products",
"kitchen utensil",
"baby cosmetics",
"bags",
"frozen chicken",
"make up remover",
"rubbing alcohol",
"toilet cleaner",
"salad dressing",
"whisky"
],
"xaxis": "x",
"y": [
1,
1,
3,
4,
5,
5,
5,
5,
6,
8
],
"yaxis": "y"
}
],
"layout": {
"barmode": "relative",
"legend": {
"tracegroupgap": 0
},
"margin": {
"t": 60
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"fillpattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "10 least frequently sold products "
},
"xaxis": {
"anchor": "y",
"domain": [
0,
1
],
"title": {
"text": "Products"
}
},
"yaxis": {
"anchor": "x",
"domain": [
0,
1
],
"title": {
"text": "Number of item sold"
}
}
}
},
"text/html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"print(\"10 least frequently sold products\")\n",
"\n",
"fig = px.bar(x= y.index, y= y.values)\n",
"fig.update_layout(title_text= \"10 least frequently sold products \", xaxis_title= \"Products\", yaxis_title=\"Number of item sold\")\n",
"fig.show()\n",
"\n",
"#We can see that preservation products are the least sold item followed by kitchen utensils"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "49db8fc8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Member_number | \n",
"
\n",
" \n",
" \n",
" \n",
" | 3180 | \n",
" 36 | \n",
"
\n",
" \n",
" | 3050 | \n",
" 33 | \n",
"
\n",
" \n",
" | 2051 | \n",
" 33 | \n",
"
\n",
" \n",
" | 3737 | \n",
" 33 | \n",
"
\n",
" \n",
" | 2625 | \n",
" 31 | \n",
"
\n",
" \n",
" | 3915 | \n",
" 31 | \n",
"
\n",
" \n",
" | 2433 | \n",
" 31 | \n",
"
\n",
" \n",
" | 2271 | \n",
" 31 | \n",
"
\n",
" \n",
" | 3872 | \n",
" 30 | \n",
"
\n",
" \n",
" | 2394 | \n",
" 29 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Member_number\n",
"3180 36\n",
"3050 33\n",
"2051 33\n",
"3737 33\n",
"2625 31\n",
"3915 31\n",
"2433 31\n",
"2271 31\n",
"3872 30\n",
"2394 29"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Let's create a dataframe by counting the number of times each customer has made a purchase. Then sort that in descending order\n",
"#and filter out the first 10 values, these are the ids of the customers that has made the most purchases\n",
"\n",
"pd.DataFrame(data['Member_number'].value_counts().sort_values(ascending=False))[:10]\n",
"\n",
"#We can see that customer with id 3180 has made the most purchases (36) followed by 3050,2051,3737 buying 33 items each"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "2756d763",
"metadata": {},
"outputs": [],
"source": [
"#Let's find the dates on which highest sale was made\n",
"\n",
"#Let's create few new column by modifying the date column in the dataframe\n",
"\n",
"#Filtering out the year value from the date by splitting the date on - which gives a list and then taking out the last value which is the year value\n",
"data[\"Year\"] = data['Date'].str.split(\"-\").str[-1]\n",
"\n",
"#Creating a new column in Month-Year format by splitting the date by - and filtering out the second and last value from the list which belongs to month and year respectively\n",
"data[\"Month-Year\"] = data['Date'].str.split(\"-\").str[1] + \"-\" + data['Date'].str.split(\"-\").str[-1]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "7495db0c",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"alignmentgroup": "True",
"hovertemplate": "Date=%{x}
Count=%{y}
Meter=%{marker.color}",
"legendgroup": "",
"marker": {
"color": [
1921,
1797,
1793,
1785,
1724,
1722,
1699,
1694,
1670,
1615,
1591,
1587,
1576,
1575,
1570,
1561,
1560,
1536,
1527,
1473,
1472,
1469,
1437,
1411
],
"coloraxis": "coloraxis",
"pattern": {
"shape": ""
}
},
"name": "",
"offsetgroup": "",
"orientation": "v",
"showlegend": false,
"textposition": "auto",
"type": "bar",
"x": [
"08-2015",
"01-2015",
"05-2015",
"11-2015",
"07-2015",
"03-2015",
"04-2015",
"06-2015",
"10-2015",
"05-2014",
"10-2014",
"09-2015",
"07-2014",
"08-2014",
"06-2014",
"04-2014",
"02-2015",
"12-2015",
"01-2014",
"12-2014",
"09-2014",
"11-2014",
"02-2014",
"03-2014"
],
"xaxis": "x",
"y": [
1921,
1797,
1793,
1785,
1724,
1722,
1699,
1694,
1670,
1615,
1591,
1587,
1576,
1575,
1570,
1561,
1560,
1536,
1527,
1473,
1472,
1469,
1437,
1411
],
"yaxis": "y"
}
],
"layout": {
"barmode": "relative",
"coloraxis": {
"colorbar": {
"title": {
"text": "Meter"
}
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"legend": {
"tracegroupgap": 0
},
"margin": {
"t": 60
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"fillpattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Exploring highest sales by date"
},
"xaxis": {
"anchor": "y",
"domain": [
0,
1
],
"title": {
"text": "Date"
}
},
"yaxis": {
"anchor": "x",
"domain": [
0,
1
],
"title": {
"text": "Count"
}
}
}
},
"text/html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#Plotting a bar graph with number of sales in each month of each year\n",
"fig1 = px.bar(data[\"Month-Year\"].value_counts(ascending=False), \n",
" orientation= \"v\", \n",
" color = data[\"Month-Year\"].value_counts(ascending=False),\n",
" \n",
" labels={'value':'Count', 'index':'Date','color':'Meter'})\n",
"\n",
"fig1.update_layout(title_text=\"Exploring highest sales by date\")\n",
"\n",
"fig1.show()\n",
"\n",
"#We can see that most of the sales is during the months of August and september and the least sales take place in February and March"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "97cdcb63",
"metadata": {},
"outputs": [],
"source": [
"#Implementation of Apriori Algorithm\n",
"\n",
"#Creating a list of names of unique products present in the itemDescription column\n",
"\n",
"products = data['itemDescription'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "3434a848",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['tropical fruit', 'whole milk', 'pip fruit', 'other vegetables',\n",
" 'rolls/buns', 'pot plants', 'citrus fruit', 'beef', 'frankfurter',\n",
" 'chicken'], dtype=object)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products[:10]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "46225e63",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Member_number | \n",
" Date | \n",
" Year | \n",
" Month-Year | \n",
" Instant food products | \n",
" UHT-milk | \n",
" abrasive cleaner | \n",
" artif. sweetener | \n",
" baby cosmetics | \n",
" bags | \n",
" ... | \n",
" turkey | \n",
" vinegar | \n",
" waffles | \n",
" whipped/sour cream | \n",
" whisky | \n",
" white bread | \n",
" white wine | \n",
" whole milk | \n",
" yogurt | \n",
" zwieback | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1808 | \n",
" 21-07-2015 | \n",
" 2015 | \n",
" 07-2015 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2552 | \n",
" 05-01-2015 | \n",
" 2015 | \n",
" 01-2015 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2300 | \n",
" 19-09-2015 | \n",
" 2015 | \n",
" 09-2015 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 1187 | \n",
" 12-12-2015 | \n",
" 2015 | \n",
" 12-2015 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 4 | \n",
" 3037 | \n",
" 01-02-2015 | \n",
" 2015 | \n",
" 02-2015 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 171 columns
\n",
"
"
],
"text/plain": [
" Member_number Date Year Month-Year Instant food products \\\n",
"0 1808 21-07-2015 2015 07-2015 0 \n",
"1 2552 05-01-2015 2015 01-2015 0 \n",
"2 2300 19-09-2015 2015 09-2015 0 \n",
"3 1187 12-12-2015 2015 12-2015 0 \n",
"4 3037 01-02-2015 2015 02-2015 0 \n",
"\n",
" UHT-milk abrasive cleaner artif. sweetener baby cosmetics bags ... \\\n",
"0 0 0 0 0 0 ... \n",
"1 0 0 0 0 0 ... \n",
"2 0 0 0 0 0 ... \n",
"3 0 0 0 0 0 ... \n",
"4 0 0 0 0 0 ... \n",
"\n",
" turkey vinegar waffles whipped/sour cream whisky white bread \\\n",
"0 0 0 0 0 0 0 \n",
"1 0 0 0 0 0 0 \n",
"2 0 0 0 0 0 0 \n",
"3 0 0 0 0 0 0 \n",
"4 0 0 0 0 0 0 \n",
"\n",
" white wine whole milk yogurt zwieback \n",
"0 0 0 0 0 \n",
"1 0 1 0 0 \n",
"2 0 0 0 0 \n",
"3 0 0 0 0 \n",
"4 0 1 0 0 \n",
"\n",
"[5 rows x 171 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#One hot encoding\n",
"\n",
"#For modelling and finding the relationship between products we need to be working with numerical values, so let's one hot encode the products\n",
"data1=data.copy()\n",
"one_hot = pd.get_dummies(data1['itemDescription'],dtype=int)\n",
"data1.drop(['itemDescription'], inplace =True, axis=1)\n",
"\n",
"data1 = data1.join(one_hot)\n",
"\n",
"data1.head()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "6c847edb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" tropical fruit | \n",
" whole milk | \n",
" pip fruit | \n",
" other vegetables | \n",
" rolls/buns | \n",
" pot plants | \n",
" citrus fruit | \n",
" beef | \n",
" frankfurter | \n",
" chicken | \n",
" ... | \n",
" flower (seeds) | \n",
" rice | \n",
" tea | \n",
" salad dressing | \n",
" specialty vegetables | \n",
" pudding powder | \n",
" ready soups | \n",
" make up remover | \n",
" toilet cleaner | \n",
" preservation products | \n",
"
\n",
" \n",
" | Member_number | \n",
" Date | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1000 | \n",
" 15-03-2015 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 24-06-2014 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 24-07-2015 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
3 rows × 167 columns
\n",
"
"
],
"text/plain": [
" tropical fruit whole milk pip fruit \\\n",
"Member_number Date \n",
"1000 15-03-2015 0 1 0 \n",
" 24-06-2014 0 1 0 \n",
" 24-07-2015 0 0 0 \n",
"\n",
" other vegetables rolls/buns pot plants \\\n",
"Member_number Date \n",
"1000 15-03-2015 0 0 0 \n",
" 24-06-2014 0 0 0 \n",
" 24-07-2015 0 0 0 \n",
"\n",
" citrus fruit beef frankfurter chicken ... \\\n",
"Member_number Date ... \n",
"1000 15-03-2015 0 0 0 0 ... \n",
" 24-06-2014 0 0 0 0 ... \n",
" 24-07-2015 0 0 0 0 ... \n",
"\n",
" flower (seeds) rice tea salad dressing \\\n",
"Member_number Date \n",
"1000 15-03-2015 0 0 0 0 \n",
" 24-06-2014 0 0 0 0 \n",
" 24-07-2015 0 0 0 0 \n",
"\n",
" specialty vegetables pudding powder ready soups \\\n",
"Member_number Date \n",
"1000 15-03-2015 0 0 0 \n",
" 24-06-2014 0 0 0 \n",
" 24-07-2015 0 0 0 \n",
"\n",
" make up remover toilet cleaner \\\n",
"Member_number Date \n",
"1000 15-03-2015 0 0 \n",
" 24-06-2014 0 0 \n",
" 24-07-2015 0 0 \n",
"\n",
" preservation products \n",
"Member_number Date \n",
"1000 15-03-2015 0 \n",
" 24-06-2014 0 \n",
" 24-07-2015 0 \n",
"\n",
"[3 rows x 167 columns]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"##Group the data based on Member_number and then by date and computing the sum by products using the products in the earlier\n",
"#created project list\n",
"\n",
"data2 = data1.groupby(['Member_number', 'Date'])[products[:]].sum()\n",
"\n",
"data2.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "35cf2e0a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" tropical fruit | \n",
" whole milk | \n",
" pip fruit | \n",
" other vegetables | \n",
" rolls/buns | \n",
" pot plants | \n",
" citrus fruit | \n",
" beef | \n",
" frankfurter | \n",
" chicken | \n",
" ... | \n",
" flower (seeds) | \n",
" rice | \n",
" tea | \n",
" salad dressing | \n",
" specialty vegetables | \n",
" pudding powder | \n",
" ready soups | \n",
" make up remover | \n",
" toilet cleaner | \n",
" preservation products | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 4 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 167 columns
\n",
"
"
],
"text/plain": [
" tropical fruit whole milk pip fruit other vegetables rolls/buns \\\n",
"0 0 1 0 0 0 \n",
"1 0 1 0 0 0 \n",
"2 0 0 0 0 0 \n",
"3 0 0 0 0 0 \n",
"4 0 0 0 0 0 \n",
"\n",
" pot plants citrus fruit beef frankfurter chicken ... flower (seeds) \\\n",
"0 0 0 0 0 0 ... 0 \n",
"1 0 0 0 0 0 ... 0 \n",
"2 0 0 0 0 0 ... 0 \n",
"3 0 0 0 0 0 ... 0 \n",
"4 0 0 0 0 0 ... 0 \n",
"\n",
" rice tea salad dressing specialty vegetables pudding powder \\\n",
"0 0 0 0 0 0 \n",
"1 0 0 0 0 0 \n",
"2 0 0 0 0 0 \n",
"3 0 0 0 0 0 \n",
"4 0 0 0 0 0 \n",
"\n",
" ready soups make up remover toilet cleaner preservation products \n",
"0 0 0 0 0 \n",
"1 0 0 0 0 \n",
"2 0 0 0 0 \n",
"3 0 0 0 0 \n",
"4 0 0 0 0 \n",
"\n",
"[5 rows x 167 columns]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Reset the index of the newly formed dataset.\n",
"data2 = data2.reset_index()[products]\n",
"data2.head()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "f0e50b8b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" tropical fruit | \n",
" whole milk | \n",
" pip fruit | \n",
" other vegetables | \n",
" rolls/buns | \n",
" pot plants | \n",
" citrus fruit | \n",
" beef | \n",
" frankfurter | \n",
" chicken | \n",
" ... | \n",
" flower (seeds) | \n",
" rice | \n",
" tea | \n",
" salad dressing | \n",
" specialty vegetables | \n",
" pudding powder | \n",
" ready soups | \n",
" make up remover | \n",
" toilet cleaner | \n",
" preservation products | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0 | \n",
" whole milk | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 0 | \n",
" whole milk | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 4 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 167 columns
\n",
"
"
],
"text/plain": [
" tropical fruit whole milk pip fruit other vegetables rolls/buns pot plants \\\n",
"0 0 whole milk 0 0 0 0 \n",
"1 0 whole milk 0 0 0 0 \n",
"2 0 0 0 0 0 0 \n",
"3 0 0 0 0 0 0 \n",
"4 0 0 0 0 0 0 \n",
"\n",
" citrus fruit beef frankfurter chicken ... flower (seeds) rice tea \\\n",
"0 0 0 0 0 ... 0 0 0 \n",
"1 0 0 0 0 ... 0 0 0 \n",
"2 0 0 0 0 ... 0 0 0 \n",
"3 0 0 0 0 ... 0 0 0 \n",
"4 0 0 0 0 ... 0 0 0 \n",
"\n",
" salad dressing specialty vegetables pudding powder ready soups \\\n",
"0 0 0 0 0 \n",
"1 0 0 0 0 \n",
"2 0 0 0 0 \n",
"3 0 0 0 0 \n",
"4 0 0 0 0 \n",
"\n",
" make up remover toilet cleaner preservation products \n",
"0 0 0 0 \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"\n",
"[5 rows x 167 columns]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Create a function product_names which takes some data and for each product in the data, if the value of that product in the\n",
"#data is more than zero, then replace the value with the product name from the product list\n",
"\n",
"def product_names(x):\n",
" for product in products:\n",
" if x[product] >0:\n",
" x[product] = product\n",
" return x\n",
"#Apply the created function on data2 dataset.\n",
"data2 = data2.apply(product_names, axis=1)\n",
"data2.head()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "ab52950f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[['whole milk', 'yogurt', 'sausage', 'semi-finished bread'],\n",
" ['whole milk', 'pastry', 'salty snack'],\n",
" ['canned beer', 'misc. beverages'],\n",
" ['sausage', 'hygiene articles'],\n",
" ['soda', 'pickled vegetables'],\n",
" ['frankfurter', 'curd'],\n",
" ['whole milk', 'rolls/buns', 'sausage'],\n",
" ['whole milk', 'soda'],\n",
" ['beef', 'white bread'],\n",
" ['frankfurter', 'soda', 'whipped/sour cream']]"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Filter out the values from the data frame data2\n",
"x = data2.values\n",
"#Convert into list values in each row if value is not zero\n",
"x = [sub[~(sub==0)].tolist() for sub in x if sub [sub != 0].tolist()]\n",
"transactions = x\n",
"transactions[0:10]\n",
"\n",
"#The apriori instance takes data as list that is why the above process is required"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "fb34b884",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RelationRecord(items=frozenset({'fruit/vegetable juice', 'liver loaf'}), support=0.00040098910646260775, ordered_statistics=[OrderedStatistic(items_base=frozenset({'liver loaf'}), items_add=frozenset({'fruit/vegetable juice'}), confidence=0.12, lift=3.5276227897838903)])\n"
]
}
],
"source": [
"#Now we have to figure out various assosiations between items in the dataset\n",
"#Create an apriori instance\n",
"#Make a list out of the associations\n",
"\n",
"associations = apriori(transactions, min_support = 0.00030, min_confidence = 0.05, min_lift = 3, max_length = 2, target = \"associations\")\n",
"association_results = list(associations)\n",
"print(association_results[0])\n",
"\n",
"#Parameters\n",
"\n",
"#min_support: The minimum support of relations (float)\n",
"\n",
"#min_confidence: The minimum confidence of relations (float)\n",
"\n",
"#min_lift: The minimum lift of relations (float)\n",
"\n",
"#min_length: The minimum number of items in a rule\n",
"\n",
"#max_length: The maximum number of items in a rule"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "07527258",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Rule : fruit/vegetable juice -> liver loaf\n",
"Support : 0.00040098910646260775\n",
"Confidence : 0.12\n",
"Lift : 3.5276227897838903\n",
"=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>\n",
"Rule : pickled vegetables -> ham\n",
"Support : 0.0005346521419501437\n",
"Confidence : 0.05970149253731344\n",
"Lift : 3.4895055970149254\n",
"=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>\n",
"Rule : meat -> roll products \n",
"Support : 0.0003341575887188398\n",
"Confidence : 0.06097560975609757\n",
"Lift : 3.620547812620984\n",
"=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>\n",
"Rule : salt -> misc. beverages\n",
"Support : 0.0003341575887188398\n",
"Confidence : 0.05617977528089888\n",
"Lift : 3.5619405827461437\n",
"=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>\n",
"Rule : misc. beverages -> spread cheese\n",
"Support : 0.0003341575887188398\n",
"Confidence : 0.05\n",
"Lift : 3.170127118644068\n",
"=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>\n",
"Rule : seasonal products -> soups\n",
"Support : 0.0003341575887188398\n",
"Confidence : 0.10416666666666667\n",
"Lift : 14.704205974842768\n",
"=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>\n",
"Rule : sugar -> spread cheese\n",
"Support : 0.00040098910646260775\n",
"Confidence : 0.06\n",
"Lift : 3.3878490566037733\n",
"=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>\n"
]
}
],
"source": [
"#Now let us observe each itemset, and separately print the support, confidence and list values of each itemset\n",
"\n",
"#iterate through the list of associations and for each item\n",
"for item in association_results:\n",
" \n",
" #for each item filter out the item pair and create item list containing individual items in the itemset\n",
" itemset = item[0]\n",
" items = [x for x in itemset]\n",
" \n",
" #Print the relationship( First value in items to second value in items)\n",
" print(\"Rule : \", items[0], \" -> \" + items[1])\n",
" \n",
" #Print support,confidence and lift value of each itemset\n",
" print(\"Support : \", str(item[1]))\n",
" print(\"Confidence : \",str(item[2][0][2]))\n",
" print(\"Lift : \", str(item[2][0][3]))\n",
" \n",
" print(\"=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "01ea7c74",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}