import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
df = pd.read_csv("grocery_data.csv")
data = list(df["products"].apply(lambda x:x.split(',')))
data[['MILK', 'BREAD', 'BISCUIT'], ['BREAD', 'MILK', 'BISCUIT', 'CORNFLAKES'], ['BREAD', 'TEA', 'BOURNVITA'], ['JAM', 'MAGGI', 'BREAD', 'MILK'], ['MAGGI', 'TEA', 'BISCUIT'], ['BREAD', 'TEA', 'BOURNVITA'], ['MAGGI', 'TEA', 'CORNFLAKES'], ['MAGGI', 'BREAD', 'TEA', 'BISCUIT'], ['JAM', 'MAGGI', 'BREAD', 'TEA'], ['BREAD', 'MILK'], ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'], ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'], ['COFFEE', 'SUGER', 'BOURNVITA'], ['BREAD', 'COFFEE', 'COCK'], ['BREAD', 'SUGER', 'BISCUIT'], ['COFFEE', 'SUGER', 'CORNFLAKES'], ['BREAD', 'SUGER', 'BOURNVITA'], ['BREAD', 'COFFEE', 'SUGER'], ['BREAD', 'COFFEE', 'SUGER'], ['TEA', 'MILK', 'COFFEE', 'CORNFLAKES']]
te = TransactionEncoder()
te_data = te.fit(data).transform(data).astype("int")
df = pd.DataFrame(te_data,columns=te.columns_)
df
# df.to_csv("transformed_data.csv", encoding='utf-8', index=False)| BISCUIT | BOURNVITA | BREAD | COCK | COFFEE | CORNFLAKES | JAM | MAGGI | MILK | SUGER | TEA | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| 1 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
| 2 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 3 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 |
| 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
| 5 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 6 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 |
| 7 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
| 8 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 |
| 9 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| 10 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
| 11 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
| 12 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 13 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 15 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 |
| 16 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 17 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 18 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 19 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 1 |
apriori_data = apriori(df,min_support=0.01,use_colnames=True)
apriori_data.sort_values(by="support",ascending=False)
apriori_data['length'] = apriori_data['itemsets'].apply(lambda x:len(x))
apriori_data| support | itemsets | length | |
|---|---|---|---|
| 0 | 0.35 | (BISCUIT) | 1 |
| 1 | 0.20 | (BOURNVITA) | 1 |
| 2 | 0.65 | (BREAD) | 1 |
| 3 | 0.15 | (COCK) | 1 |
| 4 | 0.40 | (COFFEE) | 1 |
| 5 | 0.30 | (CORNFLAKES) | 1 |
| 6 | 0.10 | (JAM) | 1 |
| 7 | 0.25 | (MAGGI) | 1 |
| 8 | 0.25 | (MILK) | 1 |
| 9 | 0.30 | (SUGER) | 1 |
| 10 | 0.35 | (TEA) | 1 |
| 11 | 0.20 | (BISCUIT, BREAD) | 2 |
| 12 | 0.10 | (BISCUIT, COCK) | 2 |
| 13 | 0.10 | (BISCUIT, COFFEE) | 2 |
| 14 | 0.15 | (BISCUIT, CORNFLAKES) | 2 |
| 15 | 0.10 | (BISCUIT, MAGGI) | 2 |
| 16 | 0.10 | (MILK, BISCUIT) | 2 |
| 17 | 0.05 | (BISCUIT, SUGER) | 2 |
| 18 | 0.10 | (BISCUIT, TEA) | 2 |
| 19 | 0.15 | (BREAD, BOURNVITA) | 2 |
| 20 | 0.05 | (COFFEE, BOURNVITA) | 2 |
| 21 | 0.10 | (SUGER, BOURNVITA) | 2 |
| 22 | 0.10 | (TEA, BOURNVITA) | 2 |
| 23 | 0.05 | (COCK, BREAD) | 2 |
| 24 | 0.15 | (BREAD, COFFEE) | 2 |
| 25 | 0.05 | (CORNFLAKES, BREAD) | 2 |
| 26 | 0.10 | (JAM, BREAD) | 2 |
| 27 | 0.15 | (BREAD, MAGGI) | 2 |
| 28 | 0.20 | (MILK, BREAD) | 2 |
| 29 | 0.20 | (BREAD, SUGER) | 2 |
| ... | ... | ... | ... |
| 53 | 0.10 | (CORNFLAKES, BISCUIT, COCK) | 3 |
| 54 | 0.10 | (CORNFLAKES, BISCUIT, COFFEE) | 3 |
| 55 | 0.05 | (MILK, BISCUIT, CORNFLAKES) | 3 |
| 56 | 0.10 | (BISCUIT, TEA, MAGGI) | 3 |
| 57 | 0.05 | (SUGER, BREAD, BOURNVITA) | 3 |
| 58 | 0.10 | (TEA, BREAD, BOURNVITA) | 3 |
| 59 | 0.05 | (SUGER, COFFEE, BOURNVITA) | 3 |
| 60 | 0.05 | (COCK, BREAD, COFFEE) | 3 |
| 61 | 0.10 | (BREAD, COFFEE, SUGER) | 3 |
| 62 | 0.05 | (CORNFLAKES, MILK, BREAD) | 3 |
| 63 | 0.10 | (JAM, BREAD, MAGGI) | 3 |
| 64 | 0.05 | (MILK, BREAD, JAM) | 3 |
| 65 | 0.05 | (JAM, TEA, BREAD) | 3 |
| 66 | 0.05 | (MILK, BREAD, MAGGI) | 3 |
| 67 | 0.10 | (TEA, BREAD, MAGGI) | 3 |
| 68 | 0.10 | (CORNFLAKES, COCK, COFFEE) | 3 |
| 69 | 0.05 | (CORNFLAKES, MILK, COFFEE) | 3 |
| 70 | 0.05 | (CORNFLAKES, COFFEE, SUGER) | 3 |
| 71 | 0.05 | (CORNFLAKES, TEA, COFFEE) | 3 |
| 72 | 0.05 | (MILK, TEA, COFFEE) | 3 |
| 73 | 0.05 | (TEA, CORNFLAKES, MAGGI) | 3 |
| 74 | 0.05 | (MILK, TEA, CORNFLAKES) | 3 |
| 75 | 0.05 | (MAGGI, MILK, JAM) | 3 |
| 76 | 0.05 | (JAM, TEA, MAGGI) | 3 |
| 77 | 0.05 | (CORNFLAKES, MILK, BISCUIT, BREAD) | 4 |
| 78 | 0.05 | (TEA, BISCUIT, BREAD, MAGGI) | 4 |
| 79 | 0.10 | (CORNFLAKES, BISCUIT, COCK, COFFEE) | 4 |
| 80 | 0.05 | (MAGGI, MILK, BREAD, JAM) | 4 |
| 81 | 0.05 | (JAM, TEA, BREAD, MAGGI) | 4 |
| 82 | 0.05 | (CORNFLAKES, MILK, TEA, COFFEE) | 4 |
83 rows × 3 columns
apriori_data[(apriori_data['length']==2) & (apriori_data['support']>=0.05)]| support | itemsets | length | |
|---|---|---|---|
| 11 | 0.20 | (BISCUIT, BREAD) | 2 |
| 12 | 0.10 | (BISCUIT, COCK) | 2 |
| 13 | 0.10 | (BISCUIT, COFFEE) | 2 |
| 14 | 0.15 | (BISCUIT, CORNFLAKES) | 2 |
| 15 | 0.10 | (BISCUIT, MAGGI) | 2 |
| 16 | 0.10 | (MILK, BISCUIT) | 2 |
| 17 | 0.05 | (BISCUIT, SUGER) | 2 |
| 18 | 0.10 | (BISCUIT, TEA) | 2 |
| 19 | 0.15 | (BREAD, BOURNVITA) | 2 |
| 20 | 0.05 | (COFFEE, BOURNVITA) | 2 |
| 21 | 0.10 | (SUGER, BOURNVITA) | 2 |
| 22 | 0.10 | (TEA, BOURNVITA) | 2 |
| 23 | 0.05 | (COCK, BREAD) | 2 |
| 24 | 0.15 | (BREAD, COFFEE) | 2 |
| 25 | 0.05 | (CORNFLAKES, BREAD) | 2 |
| 26 | 0.10 | (JAM, BREAD) | 2 |
| 27 | 0.15 | (BREAD, MAGGI) | 2 |
| 28 | 0.20 | (MILK, BREAD) | 2 |
| 29 | 0.20 | (BREAD, SUGER) | 2 |
| 30 | 0.20 | (TEA, BREAD) | 2 |
| 31 | 0.15 | (COCK, COFFEE) | 2 |
| 32 | 0.10 | (CORNFLAKES, COCK) | 2 |
| 33 | 0.20 | (CORNFLAKES, COFFEE) | 2 |
| 34 | 0.05 | (MILK, COFFEE) | 2 |
| 35 | 0.20 | (COFFEE, SUGER) | 2 |
| 36 | 0.05 | (TEA, COFFEE) | 2 |
| 37 | 0.05 | (CORNFLAKES, MAGGI) | 2 |
| 38 | 0.10 | (MILK, CORNFLAKES) | 2 |
| 39 | 0.05 | (CORNFLAKES, SUGER) | 2 |
| 40 | 0.10 | (TEA, CORNFLAKES) | 2 |
| 41 | 0.10 | (JAM, MAGGI) | 2 |
| 42 | 0.05 | (MILK, JAM) | 2 |
| 43 | 0.05 | (JAM, TEA) | 2 |
| 44 | 0.05 | (MILK, MAGGI) | 2 |
| 45 | 0.20 | (TEA, MAGGI) | 2 |
| 46 | 0.05 | (MILK, TEA) | 2 |