{ "cells": [ { "cell_type": "markdown", "id": "c1d1336e", "metadata": {}, "source": [ "## MARKET BASKET ANALYSIS USING APRIORI ALGORITHM" ] }, { "cell_type": "code", "execution_count": 1, "id": "f7ce4c11", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import plotly.express as px\n", "\n", "import matplotlib as plt" ] }, { "cell_type": "code", "execution_count": 2, "id": "c4e3b1ed", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: apyori in c:\\users\\razas\\anaconda3\\lib\\site-packages (1.1.2)\n" ] } ], "source": [ "!pip install apyori" ] }, { "cell_type": "code", "execution_count": 3, "id": "c54381cd", "metadata": {}, "outputs": [], "source": [ "import apyori\n", "from apyori import apriori" ] }, { "cell_type": "code", "execution_count": 4, "id": "58980c99", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Member_numberDateitemDescription
0180821-07-2015tropical fruit
1255205-01-2015whole milk
2230019-09-2015pip fruit
3118712-12-2015other vegetables
4303701-02-2015whole milk
\n", "
" ], "text/plain": [ " Member_number Date itemDescription\n", "0 1808 21-07-2015 tropical fruit\n", "1 2552 05-01-2015 whole milk\n", "2 2300 19-09-2015 pip fruit\n", "3 1187 12-12-2015 other vegetables\n", "4 3037 01-02-2015 whole milk" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pd.read_csv(\"Groceries_dataset.csv\")\n", "data.head()\n", "\n", "#The above dataset is that of a grocery store, we can see that the dataset has three columns the member_number\n", "#(The is of the person who bought the item), Date of purchase and the description of the item." ] }, { "cell_type": "code", "execution_count": 5, "id": "17ff3c2c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(38765, 3)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.shape" ] }, { "cell_type": "code", "execution_count": 6, "id": "ba370672", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 38765 entries, 0 to 38764\n", "Data columns (total 3 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Member_number 38765 non-null int64 \n", " 1 Date 38765 non-null object\n", " 2 itemDescription 38765 non-null object\n", "dtypes: int64(1), object(2)\n", "memory usage: 908.7+ KB\n" ] } ], "source": [ "data.info()\n", "\n", "#We can see that there are 38765 rows in the dataset" ] }, { "cell_type": "code", "execution_count": 7, "id": "7af29b41", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Member_number 0\n", "Date 0\n", "itemDescription 0\n", "dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.isna().sum()\n", "\n", "#We can see that there are no null values in any of the columns in the dataset, this makes analysis easier" ] }, { "cell_type": "code", "execution_count": 8, "id": "947498d4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "whole milk 2502\n", "other vegetables 1898\n", "rolls/buns 1716\n", "soda 1514\n", "yogurt 1334\n", "root vegetables 1071\n", "tropical fruit 1032\n", "bottled water 933\n", "sausage 924\n", "citrus fruit 812\n", "Name: itemDescription, dtype: int64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Now let's look at the top selling products as well as the least selling products in the dataset\n", "\n", "#Let's split up this line of code, we first count the occurances of each item in the dataset, then sort the value in asending\n", "#order and filter out the first 10 items, this would be the top 10 selling items\n", "\n", "x = data['itemDescription'].value_counts().sort_values(ascending=False)[:10]\n", "x" ] }, { "cell_type": "code", "execution_count": 9, "id": "6ed1ac1b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Top 10 frequently sold products\n" ] }, { "data": { "text/html": [ " \n", " " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "alignmentgroup": "True", "hovertemplate": "x=%{x}
y=%{y}", "legendgroup": "", "marker": { "color": "#636efa", "pattern": { "shape": "" } }, "name": "", "offsetgroup": "", "orientation": "v", "showlegend": false, "textposition": "auto", "type": "bar", "x": [ "whole milk", "other vegetables", "rolls/buns", "soda", "yogurt", "root vegetables", "tropical fruit", "bottled water", "sausage", "citrus fruit" ], "xaxis": "x", "y": [ 2502, 1898, 1716, 1514, 1334, 1071, 1032, 933, 924, 812 ], "yaxis": "y" } ], "layout": { "barmode": "relative", "legend": { "tracegroupgap": 0 }, "margin": { "t": 60 }, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "fillpattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Top 10 frequently sold products " }, "xaxis": { "anchor": "y", "domain": [ 0, 1 ], "title": { "text": "Products" } }, "yaxis": { "anchor": "x", "domain": [ 0, 1 ], "title": { "text": "Number of item sold" } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "print(\"Top 10 frequently sold products\")\n", "\n", "fig = px.bar(x= x.index, y= x.values)\n", "fig.update_layout(title_text= \"Top 10 frequently sold products \", xaxis_title= \"Products\", yaxis_title=\"Number of item sold\")\n", "fig.show()\n", "\n", "#We can see that whole milk has the highet count (nearly 2500), followed by vegetables(almost 1800)" ] }, { "cell_type": "code", "execution_count": 10, "id": "ee1f25ae", "metadata": {}, "outputs": [], "source": [ "#Now let's look at the 10 least selling products\n", "#The only change in code would be to not sort the values in descending order\n", "\n", "y = data['itemDescription'].value_counts().sort_values(ascending=True)[:10]" ] }, { "cell_type": "code", "execution_count": 11, "id": "75b54a62", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "10 least frequently sold products\n" ] }, { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "alignmentgroup": "True", "hovertemplate": "x=%{x}
y=%{y}", "legendgroup": "", "marker": { "color": "#636efa", "pattern": { "shape": "" } }, "name": "", "offsetgroup": "", "orientation": "v", "showlegend": false, "textposition": "auto", "type": "bar", "x": [ "preservation products", "kitchen utensil", "baby cosmetics", "bags", "frozen chicken", "make up remover", "rubbing alcohol", "toilet cleaner", "salad dressing", "whisky" ], "xaxis": "x", "y": [ 1, 1, 3, 4, 5, 5, 5, 5, 6, 8 ], "yaxis": "y" } ], "layout": { "barmode": "relative", "legend": { "tracegroupgap": 0 }, "margin": { "t": 60 }, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "fillpattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "10 least frequently sold products " }, "xaxis": { "anchor": "y", "domain": [ 0, 1 ], "title": { "text": "Products" } }, "yaxis": { "anchor": "x", "domain": [ 0, 1 ], "title": { "text": "Number of item sold" } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "print(\"10 least frequently sold products\")\n", "\n", "fig = px.bar(x= y.index, y= y.values)\n", "fig.update_layout(title_text= \"10 least frequently sold products \", xaxis_title= \"Products\", yaxis_title=\"Number of item sold\")\n", "fig.show()\n", "\n", "#We can see that preservation products are the least sold item followed by kitchen utensils" ] }, { "cell_type": "code", "execution_count": 12, "id": "49db8fc8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Member_number
318036
305033
205133
373733
262531
391531
243331
227131
387230
239429
\n", "
" ], "text/plain": [ " Member_number\n", "3180 36\n", "3050 33\n", "2051 33\n", "3737 33\n", "2625 31\n", "3915 31\n", "2433 31\n", "2271 31\n", "3872 30\n", "2394 29" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Let's create a dataframe by counting the number of times each customer has made a purchase. Then sort that in descending order\n", "#and filter out the first 10 values, these are the ids of the customers that has made the most purchases\n", "\n", "pd.DataFrame(data['Member_number'].value_counts().sort_values(ascending=False))[:10]\n", "\n", "#We can see that customer with id 3180 has made the most purchases (36) followed by 3050,2051,3737 buying 33 items each" ] }, { "cell_type": "code", "execution_count": 13, "id": "2756d763", "metadata": {}, "outputs": [], "source": [ "#Let's find the dates on which highest sale was made\n", "\n", "#Let's create few new column by modifying the date column in the dataframe\n", "\n", "#Filtering out the year value from the date by splitting the date on - which gives a list and then taking out the last value which is the year value\n", "data[\"Year\"] = data['Date'].str.split(\"-\").str[-1]\n", "\n", "#Creating a new column in Month-Year format by splitting the date by - and filtering out the second and last value from the list which belongs to month and year respectively\n", "data[\"Month-Year\"] = data['Date'].str.split(\"-\").str[1] + \"-\" + data['Date'].str.split(\"-\").str[-1]" ] }, { "cell_type": "code", "execution_count": 14, "id": "7495db0c", "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "alignmentgroup": "True", "hovertemplate": "Date=%{x}
Count=%{y}
Meter=%{marker.color}", "legendgroup": "", "marker": { "color": [ 1921, 1797, 1793, 1785, 1724, 1722, 1699, 1694, 1670, 1615, 1591, 1587, 1576, 1575, 1570, 1561, 1560, 1536, 1527, 1473, 1472, 1469, 1437, 1411 ], "coloraxis": "coloraxis", "pattern": { "shape": "" } }, "name": "", "offsetgroup": "", "orientation": "v", "showlegend": false, "textposition": "auto", "type": "bar", "x": [ "08-2015", "01-2015", "05-2015", "11-2015", "07-2015", "03-2015", "04-2015", "06-2015", "10-2015", "05-2014", "10-2014", "09-2015", "07-2014", "08-2014", "06-2014", "04-2014", "02-2015", "12-2015", "01-2014", "12-2014", "09-2014", "11-2014", "02-2014", "03-2014" ], "xaxis": "x", "y": [ 1921, 1797, 1793, 1785, 1724, 1722, 1699, 1694, 1670, 1615, 1591, 1587, 1576, 1575, 1570, 1561, 1560, 1536, 1527, 1473, 1472, 1469, 1437, 1411 ], "yaxis": "y" } ], "layout": { "barmode": "relative", "coloraxis": { "colorbar": { "title": { "text": "Meter" } }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "legend": { "tracegroupgap": 0 }, "margin": { "t": 60 }, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "fillpattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Exploring highest sales by date" }, "xaxis": { "anchor": "y", "domain": [ 0, 1 ], "title": { "text": "Date" } }, "yaxis": { "anchor": "x", "domain": [ 0, 1 ], "title": { "text": "Count" } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#Plotting a bar graph with number of sales in each month of each year\n", "fig1 = px.bar(data[\"Month-Year\"].value_counts(ascending=False), \n", " orientation= \"v\", \n", " color = data[\"Month-Year\"].value_counts(ascending=False),\n", " \n", " labels={'value':'Count', 'index':'Date','color':'Meter'})\n", "\n", "fig1.update_layout(title_text=\"Exploring highest sales by date\")\n", "\n", "fig1.show()\n", "\n", "#We can see that most of the sales is during the months of August and september and the least sales take place in February and March" ] }, { "cell_type": "code", "execution_count": 15, "id": "97cdcb63", "metadata": {}, "outputs": [], "source": [ "#Implementation of Apriori Algorithm\n", "\n", "#Creating a list of names of unique products present in the itemDescription column\n", "\n", "products = data['itemDescription'].unique()" ] }, { "cell_type": "code", "execution_count": 16, "id": "3434a848", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['tropical fruit', 'whole milk', 'pip fruit', 'other vegetables',\n", " 'rolls/buns', 'pot plants', 'citrus fruit', 'beef', 'frankfurter',\n", " 'chicken'], dtype=object)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "products[:10]" ] }, { "cell_type": "code", "execution_count": 17, "id": "46225e63", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Member_numberDateYearMonth-YearInstant food productsUHT-milkabrasive cleanerartif. sweetenerbaby cosmeticsbags...turkeyvinegarwaffleswhipped/sour creamwhiskywhite breadwhite winewhole milkyogurtzwieback
0180821-07-2015201507-2015000000...0000000000
1255205-01-2015201501-2015000000...0000000100
2230019-09-2015201509-2015000000...0000000000
3118712-12-2015201512-2015000000...0000000000
4303701-02-2015201502-2015000000...0000000100
\n", "

5 rows × 171 columns

\n", "
" ], "text/plain": [ " Member_number Date Year Month-Year Instant food products \\\n", "0 1808 21-07-2015 2015 07-2015 0 \n", "1 2552 05-01-2015 2015 01-2015 0 \n", "2 2300 19-09-2015 2015 09-2015 0 \n", "3 1187 12-12-2015 2015 12-2015 0 \n", "4 3037 01-02-2015 2015 02-2015 0 \n", "\n", " UHT-milk abrasive cleaner artif. sweetener baby cosmetics bags ... \\\n", "0 0 0 0 0 0 ... \n", "1 0 0 0 0 0 ... \n", "2 0 0 0 0 0 ... \n", "3 0 0 0 0 0 ... \n", "4 0 0 0 0 0 ... \n", "\n", " turkey vinegar waffles whipped/sour cream whisky white bread \\\n", "0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 \n", "2 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 \n", "\n", " white wine whole milk yogurt zwieback \n", "0 0 0 0 0 \n", "1 0 1 0 0 \n", "2 0 0 0 0 \n", "3 0 0 0 0 \n", "4 0 1 0 0 \n", "\n", "[5 rows x 171 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#One hot encoding\n", "\n", "#For modelling and finding the relationship between products we need to be working with numerical values, so let's one hot encode the products\n", "data1=data.copy()\n", "one_hot = pd.get_dummies(data1['itemDescription'],dtype=int)\n", "data1.drop(['itemDescription'], inplace =True, axis=1)\n", "\n", "data1 = data1.join(one_hot)\n", "\n", "data1.head()" ] }, { "cell_type": "code", "execution_count": 18, "id": "6c847edb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tropical fruitwhole milkpip fruitother vegetablesrolls/bunspot plantscitrus fruitbeeffrankfurterchicken...flower (seeds)riceteasalad dressingspecialty vegetablespudding powderready soupsmake up removertoilet cleanerpreservation products
Member_numberDate
100015-03-20150100000000...0000000000
24-06-20140100000000...0000000000
24-07-20150000000000...0000000000
\n", "

3 rows × 167 columns

\n", "
" ], "text/plain": [ " tropical fruit whole milk pip fruit \\\n", "Member_number Date \n", "1000 15-03-2015 0 1 0 \n", " 24-06-2014 0 1 0 \n", " 24-07-2015 0 0 0 \n", "\n", " other vegetables rolls/buns pot plants \\\n", "Member_number Date \n", "1000 15-03-2015 0 0 0 \n", " 24-06-2014 0 0 0 \n", " 24-07-2015 0 0 0 \n", "\n", " citrus fruit beef frankfurter chicken ... \\\n", "Member_number Date ... \n", "1000 15-03-2015 0 0 0 0 ... \n", " 24-06-2014 0 0 0 0 ... \n", " 24-07-2015 0 0 0 0 ... \n", "\n", " flower (seeds) rice tea salad dressing \\\n", "Member_number Date \n", "1000 15-03-2015 0 0 0 0 \n", " 24-06-2014 0 0 0 0 \n", " 24-07-2015 0 0 0 0 \n", "\n", " specialty vegetables pudding powder ready soups \\\n", "Member_number Date \n", "1000 15-03-2015 0 0 0 \n", " 24-06-2014 0 0 0 \n", " 24-07-2015 0 0 0 \n", "\n", " make up remover toilet cleaner \\\n", "Member_number Date \n", "1000 15-03-2015 0 0 \n", " 24-06-2014 0 0 \n", " 24-07-2015 0 0 \n", "\n", " preservation products \n", "Member_number Date \n", "1000 15-03-2015 0 \n", " 24-06-2014 0 \n", " 24-07-2015 0 \n", "\n", "[3 rows x 167 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "##Group the data based on Member_number and then by date and computing the sum by products using the products in the earlier\n", "#created project list\n", "\n", "data2 = data1.groupby(['Member_number', 'Date'])[products[:]].sum()\n", "\n", "data2.head(3)" ] }, { "cell_type": "code", "execution_count": 19, "id": "35cf2e0a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tropical fruitwhole milkpip fruitother vegetablesrolls/bunspot plantscitrus fruitbeeffrankfurterchicken...flower (seeds)riceteasalad dressingspecialty vegetablespudding powderready soupsmake up removertoilet cleanerpreservation products
00100000000...0000000000
10100000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000000
\n", "

5 rows × 167 columns

\n", "
" ], "text/plain": [ " tropical fruit whole milk pip fruit other vegetables rolls/buns \\\n", "0 0 1 0 0 0 \n", "1 0 1 0 0 0 \n", "2 0 0 0 0 0 \n", "3 0 0 0 0 0 \n", "4 0 0 0 0 0 \n", "\n", " pot plants citrus fruit beef frankfurter chicken ... flower (seeds) \\\n", "0 0 0 0 0 0 ... 0 \n", "1 0 0 0 0 0 ... 0 \n", "2 0 0 0 0 0 ... 0 \n", "3 0 0 0 0 0 ... 0 \n", "4 0 0 0 0 0 ... 0 \n", "\n", " rice tea salad dressing specialty vegetables pudding powder \\\n", "0 0 0 0 0 0 \n", "1 0 0 0 0 0 \n", "2 0 0 0 0 0 \n", "3 0 0 0 0 0 \n", "4 0 0 0 0 0 \n", "\n", " ready soups make up remover toilet cleaner preservation products \n", "0 0 0 0 0 \n", "1 0 0 0 0 \n", "2 0 0 0 0 \n", "3 0 0 0 0 \n", "4 0 0 0 0 \n", "\n", "[5 rows x 167 columns]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Reset the index of the newly formed dataset.\n", "data2 = data2.reset_index()[products]\n", "data2.head()" ] }, { "cell_type": "code", "execution_count": 20, "id": "f0e50b8b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tropical fruitwhole milkpip fruitother vegetablesrolls/bunspot plantscitrus fruitbeeffrankfurterchicken...flower (seeds)riceteasalad dressingspecialty vegetablespudding powderready soupsmake up removertoilet cleanerpreservation products
00whole milk00000000...0000000000
10whole milk00000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000000
\n", "

5 rows × 167 columns

\n", "
" ], "text/plain": [ " tropical fruit whole milk pip fruit other vegetables rolls/buns pot plants \\\n", "0 0 whole milk 0 0 0 0 \n", "1 0 whole milk 0 0 0 0 \n", "2 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 \n", "\n", " citrus fruit beef frankfurter chicken ... flower (seeds) rice tea \\\n", "0 0 0 0 0 ... 0 0 0 \n", "1 0 0 0 0 ... 0 0 0 \n", "2 0 0 0 0 ... 0 0 0 \n", "3 0 0 0 0 ... 0 0 0 \n", "4 0 0 0 0 ... 0 0 0 \n", "\n", " salad dressing specialty vegetables pudding powder ready soups \\\n", "0 0 0 0 0 \n", "1 0 0 0 0 \n", "2 0 0 0 0 \n", "3 0 0 0 0 \n", "4 0 0 0 0 \n", "\n", " make up remover toilet cleaner preservation products \n", "0 0 0 0 \n", "1 0 0 0 \n", "2 0 0 0 \n", "3 0 0 0 \n", "4 0 0 0 \n", "\n", "[5 rows x 167 columns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Create a function product_names which takes some data and for each product in the data, if the value of that product in the\n", "#data is more than zero, then replace the value with the product name from the product list\n", "\n", "def product_names(x):\n", " for product in products:\n", " if x[product] >0:\n", " x[product] = product\n", " return x\n", "#Apply the created function on data2 dataset.\n", "data2 = data2.apply(product_names, axis=1)\n", "data2.head()" ] }, { "cell_type": "code", "execution_count": 21, "id": "ab52950f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[['whole milk', 'yogurt', 'sausage', 'semi-finished bread'],\n", " ['whole milk', 'pastry', 'salty snack'],\n", " ['canned beer', 'misc. beverages'],\n", " ['sausage', 'hygiene articles'],\n", " ['soda', 'pickled vegetables'],\n", " ['frankfurter', 'curd'],\n", " ['whole milk', 'rolls/buns', 'sausage'],\n", " ['whole milk', 'soda'],\n", " ['beef', 'white bread'],\n", " ['frankfurter', 'soda', 'whipped/sour cream']]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Filter out the values from the data frame data2\n", "x = data2.values\n", "#Convert into list values in each row if value is not zero\n", "x = [sub[~(sub==0)].tolist() for sub in x if sub [sub != 0].tolist()]\n", "transactions = x\n", "transactions[0:10]\n", "\n", "#The apriori instance takes data as list that is why the above process is required" ] }, { "cell_type": "code", "execution_count": 22, "id": "fb34b884", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "RelationRecord(items=frozenset({'fruit/vegetable juice', 'liver loaf'}), support=0.00040098910646260775, ordered_statistics=[OrderedStatistic(items_base=frozenset({'liver loaf'}), items_add=frozenset({'fruit/vegetable juice'}), confidence=0.12, lift=3.5276227897838903)])\n" ] } ], "source": [ "#Now we have to figure out various assosiations between items in the dataset\n", "#Create an apriori instance\n", "#Make a list out of the associations\n", "\n", "associations = apriori(transactions, min_support = 0.00030, min_confidence = 0.05, min_lift = 3, max_length = 2, target = \"associations\")\n", "association_results = list(associations)\n", "print(association_results[0])\n", "\n", "#Parameters\n", "\n", "#min_support: The minimum support of relations (float)\n", "\n", "#min_confidence: The minimum confidence of relations (float)\n", "\n", "#min_lift: The minimum lift of relations (float)\n", "\n", "#min_length: The minimum number of items in a rule\n", "\n", "#max_length: The maximum number of items in a rule" ] }, { "cell_type": "code", "execution_count": 23, "id": "07527258", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Rule : fruit/vegetable juice -> liver loaf\n", "Support : 0.00040098910646260775\n", "Confidence : 0.12\n", "Lift : 3.5276227897838903\n", "=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>\n", "Rule : pickled vegetables -> ham\n", "Support : 0.0005346521419501437\n", "Confidence : 0.05970149253731344\n", "Lift : 3.4895055970149254\n", "=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>\n", "Rule : meat -> roll products \n", "Support : 0.0003341575887188398\n", "Confidence : 0.06097560975609757\n", "Lift : 3.620547812620984\n", "=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>\n", "Rule : salt -> misc. beverages\n", "Support : 0.0003341575887188398\n", "Confidence : 0.05617977528089888\n", "Lift : 3.5619405827461437\n", "=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>\n", "Rule : misc. beverages -> spread cheese\n", "Support : 0.0003341575887188398\n", "Confidence : 0.05\n", "Lift : 3.170127118644068\n", "=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>\n", "Rule : seasonal products -> soups\n", "Support : 0.0003341575887188398\n", "Confidence : 0.10416666666666667\n", "Lift : 14.704205974842768\n", "=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>\n", "Rule : sugar -> spread cheese\n", "Support : 0.00040098910646260775\n", "Confidence : 0.06\n", "Lift : 3.3878490566037733\n", "=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>\n" ] } ], "source": [ "#Now let us observe each itemset, and separately print the support, confidence and list values of each itemset\n", "\n", "#iterate through the list of associations and for each item\n", "for item in association_results:\n", " \n", " #for each item filter out the item pair and create item list containing individual items in the itemset\n", " itemset = item[0]\n", " items = [x for x in itemset]\n", " \n", " #Print the relationship( First value in items to second value in items)\n", " print(\"Rule : \", items[0], \" -> \" + items[1])\n", " \n", " #Print support,confidence and lift value of each itemset\n", " print(\"Support : \", str(item[1]))\n", " print(\"Confidence : \",str(item[2][0][2]))\n", " print(\"Lift : \", str(item[2][0][3]))\n", " \n", " print(\"=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>=>\")" ] }, { "cell_type": "code", "execution_count": null, "id": "01ea7c74", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 5 }