1 Data Understanding

1.1 Load Libraries

# Loading necessary packages
## Markdown Update
if(! "rmarkdown" %in% installed.packages()) { install.packages("rmarkdown", dependencies = TRUE) }
library(rmarkdown)

# Loading other packages if not available
if(! "readxl" %in% installed.packages()) { install.packages("readxl", dependencies = TRUE) }
library(readxl)
if(! "arules" %in% installed.packages()) { install.packages("arules", dependencies = TRUE) }
library(arules)
if(! "arulesViz" %in% installed.packages()) { install.packages("arulesViz", dependencies = TRUE) }
library(arulesViz)

# Global Settings
options(digits =   4)
options(scipen = 999)
setwd("~/AC UNI-ORG/AB SIM/GDBA/R")

1.2 Acquire Data

# Get Data from Library
data("Groceries")
head(Groceries)
## transactions in sparse format with
##  6 transactions (rows) and
##  169 items (columns)
summary(Groceries)
## transactions as itemMatrix in sparse format with
##  9835 rows (elements/itemsets/transactions) and
##  169 columns (items) and a density of 0.02609 
## 
## most frequent items:
##       whole milk other vegetables       rolls/buns             soda 
##             2513             1903             1809             1715 
##           yogurt          (Other) 
##             1372            34055 
## 
## element (itemset/transaction) length distribution:
## sizes
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## 2159 1643 1299 1005  855  645  545  438  350  246  182  117   78   77   55   46 
##   17   18   19   20   21   22   23   24   26   27   28   29   32 
##   29   14   14    9   11    4    6    1    1    1    1    3    1 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    2.00    3.00    4.41    6.00   32.00 
## 
## includes extended item information - examples:
##        labels  level2           level1
## 1 frankfurter sausage meat and sausage
## 2     sausage sausage meat and sausage
## 3  liver loaf sausage meat and sausage

1.3 Show Transactions

# Show Transactions
Transactions <- Groceries
Transactions
## transactions in sparse format with
##  9835 transactions (rows) and
##  169 items (columns)

1.4 Show Products

# Find the products in the data set
Products <- itemLabels(Transactions)

# View unique products
cat("\nShow list of items in transactions.\n\n")
## 
## Show list of items in transactions.
Products
##   [1] "frankfurter"               "sausage"                  
##   [3] "liver loaf"                "ham"                      
##   [5] "meat"                      "finished products"        
##   [7] "organic sausage"           "chicken"                  
##   [9] "turkey"                    "pork"                     
##  [11] "beef"                      "hamburger meat"           
##  [13] "fish"                      "citrus fruit"             
##  [15] "tropical fruit"            "pip fruit"                
##  [17] "grapes"                    "berries"                  
##  [19] "nuts/prunes"               "root vegetables"          
##  [21] "onions"                    "herbs"                    
##  [23] "other vegetables"          "packaged fruit/vegetables"
##  [25] "whole milk"                "butter"                   
##  [27] "curd"                      "dessert"                  
##  [29] "butter milk"               "yogurt"                   
##  [31] "whipped/sour cream"        "beverages"                
##  [33] "UHT-milk"                  "condensed milk"           
##  [35] "cream"                     "soft cheese"              
##  [37] "sliced cheese"             "hard cheese"              
##  [39] "cream cheese "             "processed cheese"         
##  [41] "spread cheese"             "curd cheese"              
##  [43] "specialty cheese"          "mayonnaise"               
##  [45] "salad dressing"            "tidbits"                  
##  [47] "frozen vegetables"         "frozen fruits"            
##  [49] "frozen meals"              "frozen fish"              
##  [51] "frozen chicken"            "ice cream"                
##  [53] "frozen dessert"            "frozen potato products"   
##  [55] "domestic eggs"             "rolls/buns"               
##  [57] "white bread"               "brown bread"              
##  [59] "pastry"                    "roll products "           
##  [61] "semi-finished bread"       "zwieback"                 
##  [63] "potato products"           "flour"                    
##  [65] "salt"                      "rice"                     
##  [67] "pasta"                     "vinegar"                  
##  [69] "oil"                       "margarine"                
##  [71] "specialty fat"             "sugar"                    
##  [73] "artif. sweetener"          "honey"                    
##  [75] "mustard"                   "ketchup"                  
##  [77] "spices"                    "soups"                    
##  [79] "ready soups"               "Instant food products"    
##  [81] "sauces"                    "cereals"                  
##  [83] "organic products"          "baking powder"            
##  [85] "preservation products"     "pudding powder"           
##  [87] "canned vegetables"         "canned fruit"             
##  [89] "pickled vegetables"        "specialty vegetables"     
##  [91] "jam"                       "sweet spreads"            
##  [93] "meat spreads"              "canned fish"              
##  [95] "dog food"                  "cat food"                 
##  [97] "pet care"                  "baby food"                
##  [99] "coffee"                    "instant coffee"           
## [101] "tea"                       "cocoa drinks"             
## [103] "bottled water"             "soda"                     
## [105] "misc. beverages"           "fruit/vegetable juice"    
## [107] "syrup"                     "bottled beer"             
## [109] "canned beer"               "brandy"                   
## [111] "whisky"                    "liquor"                   
## [113] "rum"                       "liqueur"                  
## [115] "liquor (appetizer)"        "white wine"               
## [117] "red/blush wine"            "prosecco"                 
## [119] "sparkling wine"            "salty snack"              
## [121] "popcorn"                   "nut snack"                
## [123] "snack products"            "long life bakery product" 
## [125] "waffles"                   "cake bar"                 
## [127] "chewing gum"               "chocolate"                
## [129] "cooking chocolate"         "specialty chocolate"      
## [131] "specialty bar"             "chocolate marshmallow"    
## [133] "candy"                     "seasonal products"        
## [135] "detergent"                 "softener"                 
## [137] "decalcifier"               "dish cleaner"             
## [139] "abrasive cleaner"          "cleaner"                  
## [141] "toilet cleaner"            "bathroom cleaner"         
## [143] "hair spray"                "dental care"              
## [145] "male cosmetics"            "make up remover"          
## [147] "skin care"                 "female sanitary products" 
## [149] "baby cosmetics"            "soap"                     
## [151] "rubbing alcohol"           "hygiene articles"         
## [153] "napkins"                   "dishes"                   
## [155] "cookware"                  "kitchen utensil"          
## [157] "cling film/bags"           "kitchen towels"           
## [159] "house keeping products"    "candles"                  
## [161] "light bulbs"               "sound storage medium"     
## [163] "newspapers"                "photo/film"               
## [165] "pot plants"                "flower soil/fertilizer"   
## [167] "flower (seeds)"            "shopping bags"            
## [169] "bags"
# Calculate item frequency
ProductFrequencies <- itemFrequency(Transactions, type = "absolute")

# Convert to a data frame for easier viewing
ProductFrequencies_df <- data.frame(Product = names(ProductFrequencies), Frequency = as.vector(ProductFrequencies))

# Sort by frequency in descending order
ProductFrequencies_df <- ProductFrequencies_df[order(-ProductFrequencies_df$Frequency), ]

# View the sorted item frequencies
# cat("\nShow list of items in transactions.\n\n")
# ProductFrequencies_df

cat("\nShow list of items in transactions incl frequency.\n\n")
## 
## Show list of items in transactions incl frequency.
sort(itemFrequency(Transactions, type = "relative"), decreasing = TRUE) 
##                whole milk          other vegetables                rolls/buns 
##                 0.2555160                 0.1934926                 0.1839349 
##                      soda                    yogurt             bottled water 
##                 0.1743772                 0.1395018                 0.1105236 
##           root vegetables            tropical fruit             shopping bags 
##                 0.1089985                 0.1049314                 0.0985257 
##                   sausage                    pastry              citrus fruit 
##                 0.0939502                 0.0889680                 0.0827656 
##              bottled beer                newspapers               canned beer 
##                 0.0805287                 0.0798170                 0.0776817 
##                 pip fruit     fruit/vegetable juice        whipped/sour cream 
##                 0.0756482                 0.0722928                 0.0716828 
##               brown bread             domestic eggs               frankfurter 
##                 0.0648704                 0.0634469                 0.0589731 
##                 margarine                    coffee                      pork 
##                 0.0585663                 0.0580580                 0.0576512 
##                    butter                      curd                      beef 
##                 0.0554143                 0.0532791                 0.0524657 
##                   napkins                 chocolate         frozen vegetables 
##                 0.0523640                 0.0496187                 0.0480935 
##                   chicken               white bread             cream cheese  
##                 0.0429080                 0.0420946                 0.0396543 
##                   waffles               salty snack  long life bakery product 
##                 0.0384342                 0.0378241                 0.0374174 
##                   dessert                     sugar                  UHT-milk 
##                 0.0371124                 0.0338587                 0.0334520 
##            hamburger meat                   berries          hygiene articles 
##                 0.0332486                 0.0332486                 0.0329436 
##                    onions       specialty chocolate                     candy 
##                 0.0310117                 0.0304016                 0.0298932 
##              frozen meals           misc. beverages                       oil 
##                 0.0283681                 0.0283681                 0.0280630 
##               butter milk             specialty bar                       ham 
##                 0.0279614                 0.0273513                 0.0260295 
##                 beverages                      meat                 ice cream 
##                 0.0260295                 0.0258261                 0.0250127 
##             sliced cheese               hard cheese                  cat food 
##                 0.0245043                 0.0245043                 0.0232842 
##                    grapes               chewing gum            red/blush wine 
##                 0.0223691                 0.0210473                 0.0192171 
##                 detergent                white wine        pickled vegetables 
##                 0.0192171                 0.0190137                 0.0178953 
##       semi-finished bread             baking powder                    dishes 
##                 0.0176919                 0.0176919                 0.0175902 
##                     flour                pot plants               soft cheese 
##                 0.0173869                 0.0172852                 0.0170819 
##          processed cheese                     herbs                     pasta 
##                 0.0165735                 0.0162684                 0.0150483 
##               canned fish         seasonal products                  cake bar 
##                 0.0150483                 0.0142349                 0.0132181 
## packaged fruit/vegetables                   mustard               frozen fish 
##                 0.0130147                 0.0119980                 0.0116929 
##           cling film/bags             spread cheese                    liquor 
##                 0.0113879                 0.0111845                 0.0110829 
##            frozen dessert                      salt         canned vegetables 
##                 0.0107778                 0.0107778                 0.0107778 
##              dish cleaner            flower (seeds)            condensed milk 
##                 0.0104728                 0.0103711                 0.0102694 
##            roll products                   pet care                photo/film 
##                 0.0102694                 0.0094560                 0.0092527 
##                mayonnaise             sweet spreads     chocolate marshmallow 
##                 0.0091510                 0.0090493                 0.0090493 
##                   candles          specialty cheese                  dog food 
##                 0.0089476                 0.0085409                 0.0085409 
##    frozen potato products    house keeping products                    turkey 
##                 0.0084392                 0.0083376                 0.0081342 
##     Instant food products        liquor (appetizer)                      rice 
##                 0.0080325                 0.0079309                 0.0076258 
##            instant coffee                   popcorn                  zwieback 
##                 0.0074225                 0.0072191                 0.0069141 
##                     soups         finished products                   vinegar 
##                 0.0068124                 0.0065074                 0.0065074 
##  female sanitary products            kitchen towels               dental care 
##                 0.0061007                 0.0059990                 0.0057956 
##                   cereals            sparkling wine                    sauces 
##                 0.0056940                 0.0055923                 0.0054906 
##                  softener                       jam                    spices 
##                 0.0054906                 0.0053889                 0.0051856 
##                liver loaf               curd cheese                   cleaner 
##                 0.0050839                 0.0050839                 0.0050839 
##            male cosmetics                       rum                   ketchup 
##                 0.0045755                 0.0044738                 0.0042705 
##              meat spreads                    brandy               light bulbs 
##                 0.0042705                 0.0041688                 0.0041688 
##                       tea             specialty fat          abrasive cleaner 
##                 0.0038638                 0.0036604                 0.0035587 
##                 skin care               nuts/prunes          artif. sweetener 
##                 0.0035587                 0.0033554                 0.0032537 
##              canned fruit                     syrup                 nut snack 
##                 0.0032537                 0.0032537                 0.0031520 
##            snack products                      fish           potato products 
##                 0.0030503                 0.0029487                 0.0028470 
##          bathroom cleaner                  cookware                      soap 
##                 0.0027453                 0.0027453                 0.0026436 
##         cooking chocolate                   tidbits            pudding powder 
##                 0.0025419                 0.0023386                 0.0023386 
##           organic sausage              cocoa drinks                  prosecco 
##                 0.0022369                 0.0022369                 0.0020336 
##    flower soil/fertilizer               ready soups      specialty vegetables 
##                 0.0019319                 0.0018302                 0.0017285 
##          organic products                     honey               decalcifier 
##                 0.0016268                 0.0015252                 0.0015252 
##                     cream             frozen fruits                hair spray 
##                 0.0013218                 0.0012201                 0.0011185 
##           rubbing alcohol                   liqueur            salad dressing 
##                 0.0010168                 0.0009151                 0.0008134 
##                    whisky           make up remover            toilet cleaner 
##                 0.0008134                 0.0008134                 0.0007117 
##            frozen chicken            baby cosmetics           kitchen utensil 
##                 0.0006101                 0.0006101                 0.0004067 
##                      bags     preservation products                 baby food 
##                 0.0004067                 0.0002034                 0.0001017 
##      sound storage medium 
##                 0.0001017

2 Data Preparation

2.1 Clean and Transform Data

# Show Data
cat("\nNumber of baskets:", nrow(Groceries))
## 
## Number of baskets: 9835
cat("\n\nThe first five baskets:\n")
## 
## 
## The first five baskets:
inspect(Groceries[1:5])
##     items                     
## [1] {citrus fruit,            
##      semi-finished bread,     
##      margarine,               
##      ready soups}             
## [2] {tropical fruit,          
##      yogurt,                  
##      coffee}                  
## [3] {whole milk}              
## [4] {pip fruit,               
##      yogurt,                  
##      cream cheese ,           
##      meat spreads}            
## [5] {other vegetables,        
##      whole milk,              
##      condensed milk,          
##      long life bakery product}

2.2 Identify Most Frequent Items

# Loading other packages if not available
if(! "arules" %in% installed.packages()) { install.packages("arules", dependencies = TRUE) }
library(arules)
if(! "arulesViz" %in% installed.packages()) { install.packages("arulesViz", dependencies = TRUE) }
library(arulesViz)

# Show Most Frequent Items
Frequency <- eclat(Groceries, parameter = list(supp = 0.10, maxlen = 15))
## Eclat
## 
## parameter specification:
##  tidLists support minlen maxlen            target  ext
##     FALSE     0.1      1     15 frequent itemsets TRUE
## 
## algorithmic control:
##  sparse sort verbose
##       7   -2    TRUE
## 
## Absolute minimum support count: 983 
## 
## create itemset ... 
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating bit matrix ... [8 row(s), 9835 column(s)] done [0.00s].
## writing  ... [8 set(s)] done [0.00s].
## Creating S4 object  ... done [0.00s].
Frequency <- sort(Frequency, by = 'support')
cat("\nThe most frequent items, i.e., the items with the highest support are:\n")
## 
## The most frequent items, i.e., the items with the highest support are:
inspect(Frequency)
##     items              support count
## [1] {whole milk}       0.2555  2513 
## [2] {other vegetables} 0.1935  1903 
## [3] {rolls/buns}       0.1839  1809 
## [4] {soda}             0.1744  1715 
## [5] {yogurt}           0.1395  1372 
## [6] {bottled water}    0.1105  1087 
## [7] {root vegetables}  0.1090  1072 
## [8] {tropical fruit}   0.1049  1032
# Plot Item Frequency
itemFrequencyPlot(Groceries, topN = 10, 
                  main = "Item Frequency Plot for 'Groceries' Market Basket",
                  cex.names = 1.0,
                  cex.main  = 1.6,
                  col = "lightblue")
grid()

# Write data to working directory
# write.csv(Frequency, file = "GroceriesFrequency.csv")

3 Modelling

3.1 All Mining Association Rules with Confidence > 0.9 and Support > 0.0015

# Loading other packages if not available
if(! "arules" %in% installed.packages()) { install.packages("arules", dependencies = TRUE) }
library(arules)

# Show Mining Association Rules
Rules <- apriori(Groceries, parameter = list(supp = 0.0015, conf = 0.90), maxlen = 5)
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.9    0.1    1 none FALSE            TRUE       5  0.0015      1
##  maxlen target  ext
##       5  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 14 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [153 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5
##  done [0.01s].
## writing ... [7 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
cat("\n\nThis apriori function generates", length(Rules), "rules.\n\n")
## 
## 
## This apriori function generates 7 rules.
summary(Rules)
## set of 7 rules
## 
## rule length distribution (lhs + rhs):sizes
## 3 4 5 
## 1 5 1 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       3       4       4       4       4       5 
## 
## summary of quality measures:
##     support          confidence       coverage            lift      
##  Min.   :0.00153   Min.   :0.900   Min.   :0.00163   Min.   : 3.52  
##  1st Qu.:0.00163   1st Qu.:0.902   1st Qu.:0.00168   1st Qu.: 3.60  
##  Median :0.00183   Median :0.905   Median :0.00203   Median : 3.67  
##  Mean   :0.00176   Mean   :0.926   Mean   :0.00190   Mean   : 4.89  
##  3rd Qu.:0.00188   3rd Qu.:0.938   3rd Qu.:0.00208   3rd Qu.: 4.29  
##  Max.   :0.00193   Max.   :1.000   Max.   :0.00214   Max.   :11.23  
##      count     
##  Min.   :15.0  
##  1st Qu.:16.0  
##  Median :18.0  
##  Mean   :17.3  
##  3rd Qu.:18.5  
##  Max.   :19.0  
## 
## mining info:
##       data ntransactions support confidence
##  Groceries          9835  0.0015        0.9
##                                                                                call
##  apriori(data = Groceries, parameter = list(supp = 0.0015, conf = 0.9), maxlen = 5)
# Inspect Rules
inspect(Rules)
##     lhs                        rhs                 support confidence coverage   lift count
## [1] {liquor,                                                                               
##      red/blush wine}        => {bottled beer}     0.001932     0.9048 0.002135 11.235    19
## [2] {root vegetables,                                                                      
##      whipped/sour cream,                                                                   
##      flour}                 => {whole milk}       0.001729     1.0000 0.001729  3.914    17
## [3] {other vegetables,                                                                     
##      cream cheese ,                                                                        
##      sugar}                 => {whole milk}       0.001525     0.9375 0.001627  3.669    15
## [4] {pip fruit,                                                                            
##      butter,                                                                               
##      whipped/sour cream}    => {whole milk}       0.001830     0.9000 0.002034  3.522    18
## [5] {tropical fruit,                                                                       
##      whipped/sour cream,                                                                   
##      domestic eggs}         => {whole milk}       0.001830     0.9000 0.002034  3.522    18
## [6] {tropical fruit,                                                                       
##      whipped/sour cream,                                                                   
##      fruit/vegetable juice} => {other vegetables} 0.001932     0.9048 0.002135  4.676    19
## [7] {sausage,                                                                              
##      tropical fruit,                                                                       
##      root vegetables,                                                                      
##      yogurt}                => {whole milk}       0.001525     0.9375 0.001627  3.669    15
# Write data to working directory
RulesFull <- apriori(Groceries, parameter = list(supp = 0.0015, conf = 0.90))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.9    0.1    1 none FALSE            TRUE       5  0.0015      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 14 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [153 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.01s].
## writing ... [7 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
Rules_df  <- as(RulesFull, "data.frame")
write.csv(Rules_df, file = "GroceriesRules.csv", row.names = FALSE)

3.2 Mining Association Details for Specific Rule

# Loading other packages if not available
if(! "arules" %in% installed.packages()) { install.packages("arules", dependencies = TRUE) }
library(arules)

# Show Transactions
data("Groceries")
Transactions <- Groceries

# Generate association rules
Rules <- apriori(Groceries, parameter = list(supp = 0.0015, conf = 0.90))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.9    0.1    1 none FALSE            TRUE       5  0.0015      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 14 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [153 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.01s].
## writing ... [7 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
#inspect(Rules)

# Filter for the specific rule {yogurt} => {whole milk}
SpecificRule <- subset(Rules, 
                      lhs %ain% c("liquor", "red/blush wine") & 
                      size(lhs) == 2             & 
                      rhs %ain% c("bottled beer")
                      )

# Inspect Details
cat("\nThe following lists the details of discovered association rules. These definitions apply:\n")
## 
## The following lists the details of discovered association rules. These definitions apply:
cat("lhs: Items on the left side of the rule: Antecedents.\n")
## lhs: Items on the left side of the rule: Antecedents.
cat("rhs: Items on the right side of the rule: Consequents.\n")
## rhs: Items on the right side of the rule: Consequents.
cat("Example:\n Antecedent (lhs): liquor and red/blush wine.\n")
## Example:
##  Antecedent (lhs): liquor and red/blush wine.
cat(" Consequent (rhs): bottled beer.\n")
##  Consequent (rhs): bottled beer.
cat("\nCoverage: The proportion of transactions that contain lhs, i.e., the antecedent.\n")
## 
## Coverage: The proportion of transactions that contain lhs, i.e., the antecedent.
coverage <- quality(SpecificRule)$coverage
cat("Coverage for {whole milk, specialty cheese}:", coverage * 100, "%\n")
## Coverage for {whole milk, specialty cheese}: 0.2135 %
cat("\nSupport: Fraction of transactions containing both lhs and rhs.\n")
## 
## Support: Fraction of transactions containing both lhs and rhs.
support <- quality(SpecificRule)$support
cat("Support for {whole milk, specialty cheese} => {yogurt}:", support * 100, "%\n")
## Support for {whole milk, specialty cheese} => {yogurt}: 0.1932 %
cat("\nConfidence: Fraction of transactions with lhs that also have rhs, i.e., antecedent is true and consequent is true.\n")
## 
## Confidence: Fraction of transactions with lhs that also have rhs, i.e., antecedent is true and consequent is true.
confidence <- quality(SpecificRule)$confidence
cat("Confidence for {whole milk, specialty cheese} => {yogurt}:", confidence * 100, "%\n")
## Confidence for {whole milk, specialty cheese} => {yogurt}: 90.48 %
cat("\nLift: Measure of how much more likely rhs is given lhs compared to if they were independent.\n")
## 
## Lift: Measure of how much more likely rhs is given lhs compared to if they were independent.
lift <- quality(SpecificRule)$lift
cat("Lift for {whole milk, specialty cheese} => {yogurt}:", lift, "\n")
## Lift for {whole milk, specialty cheese} => {yogurt}: 11.24
cat("\nCount: The number of time this rulke applies.\n")
## 
## Count: The number of time this rulke applies.
count <- quality(SpecificRule)$count
cat("Count for {whole milk, specialty cheese} => {yogurt}:", count, "\n\n\n")
## Count for {whole milk, specialty cheese} => {yogurt}: 19
# Write data to working directory
Rules_df  <- as(SpecificRule, "data.frame")
write.csv(Rules_df, file = "SpecificGroceriesRules.csv", row.names = FALSE)

inspect(SpecificRule)
##     lhs                         rhs            support  confidence coverage
## [1] {liquor, red/blush wine} => {bottled beer} 0.001932 0.9048     0.002135
##     lift  count
## [1] 11.24 19

3.3 Visualise Rules

# Loading other packages if not available
if(! "arulesViz" %in% installed.packages()) { install.packages("arulesViz", dependencies = TRUE) }
library(arulesViz)

# Show Rules
plot(Rules)

plot(Rules, method = "grouped")

plot(Rules, method = "graph", control = list(type = "items"))
## Available control parameters (with default values):
## layout    =  stress
## circular  =  FALSE
## ggraphdots    =  NULL
## edges     =  <environment>
## nodes     =  <environment>
## nodetext  =  <environment>
## colors    =  c("#EE0000FF", "#EEEEEEFF")
## engine    =  ggplot2
## max   =  100
## verbose   =  FALSE

3.4 Visualise Rules for Yoghurt

# Loading other packages if not available
if(! "arulesViz" %in% installed.packages()) { install.packages("arulesViz", dependencies = TRUE) }
library(arulesViz)

# Show Rules
RulesYoghurt <- apriori(Groceries, parameter = list(supp = 0.001, conf = 0.2), appearance = list(default='rhs', lhs='yogurt'))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.2    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 9 
## 
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 done [0.00s].
## writing ... [5 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
plot(RulesYoghurt, method = 'graph', measure = 'confidence', shading = 'lift')

# Show Rules Interactive
RulesYoghurt <- apriori(Groceries, parameter = list(supp = 0.001, conf = 0.2), appearance = list(default='rhs', lhs='yogurt'))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.2    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 9 
## 
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 done [0.00s].
## writing ... [5 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
plot(RulesYoghurt, method = 'graph', measure = 'confidence', shading = 'lift', engine = 'html')

3.5 Visualise Rules for Whole Milk

# Loading other packages if not available
if(! "arulesViz" %in% installed.packages()) { install.packages("arulesViz", dependencies = TRUE) }
library(arulesViz)

# Show Rules Interactive
RulesMilk <- apriori(Groceries, parameter = list(supp = 0.001, conf = 0.2), appearance = list(default = 'rhs', lhs = 'whole milk'))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.2    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 9 
## 
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.01s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 done [0.00s].
## writing ... [3 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
plot(RulesMilk, method = 'graph', measure = 'confidence', shading = 'lift', engine = 'html')

3.6 Refine Model

# Loading other packages if not available
rules_lift <- subset(Rules, lift > 3)
inspect(rules_lift)
##     lhs                        rhs                 support confidence coverage   lift count
## [1] {liquor,                                                                               
##      red/blush wine}        => {bottled beer}     0.001932     0.9048 0.002135 11.235    19
## [2] {root vegetables,                                                                      
##      whipped/sour cream,                                                                   
##      flour}                 => {whole milk}       0.001729     1.0000 0.001729  3.914    17
## [3] {other vegetables,                                                                     
##      cream cheese ,                                                                        
##      sugar}                 => {whole milk}       0.001525     0.9375 0.001627  3.669    15
## [4] {pip fruit,                                                                            
##      butter,                                                                               
##      whipped/sour cream}    => {whole milk}       0.001830     0.9000 0.002034  3.522    18
## [5] {tropical fruit,                                                                       
##      whipped/sour cream,                                                                   
##      domestic eggs}         => {whole milk}       0.001830     0.9000 0.002034  3.522    18
## [6] {tropical fruit,                                                                       
##      whipped/sour cream,                                                                   
##      fruit/vegetable juice} => {other vegetables} 0.001932     0.9048 0.002135  4.676    19
## [7] {sausage,                                                                              
##      tropical fruit,                                                                       
##      root vegetables,                                                                      
##      yogurt}                => {whole milk}       0.001525     0.9375 0.001627  3.669    15

3.7 Sort Rules by Lift

# Sort Rules by Lift
rules_by_lift <- sort(Rules, by = "lift", decreasing = TRUE)
six_rules_highest_lift <- rules_by_lift[1:6]
inspect(six_rules_highest_lift)
##     lhs                        rhs                 support confidence coverage   lift count
## [1] {liquor,                                                                               
##      red/blush wine}        => {bottled beer}     0.001932     0.9048 0.002135 11.235    19
## [2] {tropical fruit,                                                                       
##      whipped/sour cream,                                                                   
##      fruit/vegetable juice} => {other vegetables} 0.001932     0.9048 0.002135  4.676    19
## [3] {root vegetables,                                                                      
##      whipped/sour cream,                                                                   
##      flour}                 => {whole milk}       0.001729     1.0000 0.001729  3.914    17
## [4] {other vegetables,                                                                     
##      cream cheese ,                                                                        
##      sugar}                 => {whole milk}       0.001525     0.9375 0.001627  3.669    15
## [5] {sausage,                                                                              
##      tropical fruit,                                                                       
##      root vegetables,                                                                      
##      yogurt}                => {whole milk}       0.001525     0.9375 0.001627  3.669    15
## [6] {pip fruit,                                                                            
##      butter,                                                                               
##      whipped/sour cream}    => {whole milk}       0.001830     0.9000 0.002034  3.522    18
# Sort Rules by Confidence
rules_by_confidence <- sort(Rules, by = "confidence", decreasing = TRUE)
six_rules_highest_confidence <- rules_by_confidence[1:6]
inspect(six_rules_highest_confidence)
##     lhs                        rhs                 support confidence coverage   lift count
## [1] {root vegetables,                                                                      
##      whipped/sour cream,                                                                   
##      flour}                 => {whole milk}       0.001729     1.0000 0.001729  3.914    17
## [2] {other vegetables,                                                                     
##      cream cheese ,                                                                        
##      sugar}                 => {whole milk}       0.001525     0.9375 0.001627  3.669    15
## [3] {sausage,                                                                              
##      tropical fruit,                                                                       
##      root vegetables,                                                                      
##      yogurt}                => {whole milk}       0.001525     0.9375 0.001627  3.669    15
## [4] {liquor,                                                                               
##      red/blush wine}        => {bottled beer}     0.001932     0.9048 0.002135 11.235    19
## [5] {tropical fruit,                                                                       
##      whipped/sour cream,                                                                   
##      fruit/vegetable juice} => {other vegetables} 0.001932     0.9048 0.002135  4.676    19
## [6] {pip fruit,                                                                            
##      butter,                                                                               
##      whipped/sour cream}    => {whole milk}       0.001830     0.9000 0.002034  3.522    18

3.8 Show Scatter Plot

Rules <- apriori(Transactions, parameter = list(support = 0.005, confidence = 0.1))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.1    0.1    1 none FALSE            TRUE       5   0.005      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 49 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [120 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.01s].
## writing ... [1582 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
# inspect(Rules)

# Show Scatter Plot
library(arulesViz)
filtered_rules <- subset(Rules, support >= 0.01 & confidence >= 0.5)
inspect(filtered_rules)
##      lhs                                       rhs                support
## [1]  {curd, yogurt}                         => {whole milk}       0.01007
## [2]  {other vegetables, butter}             => {whole milk}       0.01149
## [3]  {other vegetables, domestic eggs}      => {whole milk}       0.01230
## [4]  {yogurt, whipped/sour cream}           => {whole milk}       0.01088
## [5]  {other vegetables, whipped/sour cream} => {whole milk}       0.01464
## [6]  {pip fruit, other vegetables}          => {whole milk}       0.01352
## [7]  {citrus fruit, root vegetables}        => {other vegetables} 0.01037
## [8]  {tropical fruit, root vegetables}      => {other vegetables} 0.01230
## [9]  {tropical fruit, root vegetables}      => {whole milk}       0.01200
## [10] {tropical fruit, yogurt}               => {whole milk}       0.01515
## [11] {root vegetables, yogurt}              => {other vegetables} 0.01291
## [12] {root vegetables, yogurt}              => {whole milk}       0.01454
## [13] {root vegetables, rolls/buns}          => {other vegetables} 0.01220
## [14] {root vegetables, rolls/buns}          => {whole milk}       0.01271
## [15] {other vegetables, yogurt}             => {whole milk}       0.02227
##      confidence coverage lift  count
## [1]  0.5824     0.01729  2.279  99  
## [2]  0.5736     0.02003  2.245 113  
## [3]  0.5525     0.02227  2.162 121  
## [4]  0.5245     0.02074  2.053 107  
## [5]  0.5070     0.02888  1.984 144  
## [6]  0.5175     0.02613  2.025 133  
## [7]  0.5862     0.01769  3.030 102  
## [8]  0.5845     0.02105  3.021 121  
## [9]  0.5700     0.02105  2.231 118  
## [10] 0.5174     0.02928  2.025 149  
## [11] 0.5000     0.02583  2.584 127  
## [12] 0.5630     0.02583  2.203 143  
## [13] 0.5021     0.02430  2.595 120  
## [14] 0.5230     0.02430  2.047 125  
## [15] 0.5129     0.04342  2.007 219
plot(filtered_rules, method = "graph", control = list(type = "items"))
## Available control parameters (with default values):
## layout    =  stress
## circular  =  FALSE
## ggraphdots    =  NULL
## edges     =  <environment>
## nodes     =  <environment>
## nodetext  =  <environment>
## colors    =  c("#EE0000FF", "#EEEEEEFF")
## engine    =  ggplot2
## max   =  100
## verbose   =  FALSE

4 Load Libraries for OnlineRetail

4.1 Load Packages

# Loading other packages if not available
if(! "readxl" %in% installed.packages()) { install.packages("readxl", dependencies = TRUE) }
library(readxl)
if(! "arules" %in% installed.packages()) { install.packages("arules", dependencies = TRUE) }
library(arules)
if(! "arulesViz" %in% installed.packages()) { install.packages("arulesViz", dependencies = TRUE) }
library(arulesViz)

4.2 Data Understanding

# Download Training Data from URL
##   InvoiceNo StockCode                         Description Quantity
## 1    536365    85123A  WHITE HANGING HEART T-LIGHT HOLDER        6
## 2    536365     71053                 WHITE METAL LANTERN        6
## 3    536365    84406B      CREAM CUPID HEARTS COAT HANGER        8
## 4    536365    84029G KNITTED UNION FLAG HOT WATER BOTTLE        6
## 5    536365    84029E      RED WOOLLY HOTTIE WHITE HEART.        6
## 6    536365     22752        SET 7 BABUSHKA NESTING BOXES        2
##       InvoiceDate UnitPrice CustomerID        Country
## 1 1 12 2010 08:26      2.55      17850 United Kingdom
## 2 1 12 2010 08:26      3.39      17850 United Kingdom
## 3 1 12 2010 08:26      2.75      17850 United Kingdom
## 4 1 12 2010 08:26      3.39      17850 United Kingdom
## 5 1 12 2010 08:26      3.39      17850 United Kingdom
## 6 1 12 2010 08:26      7.65      17850 United Kingdom

5 Data Preparation

5.1 Clean and Transform Data

# Remove rows with missing values
OnlineRetail <- na.omit(OnlineRetail)

# Convert the data to a transaction format
library(dplyr)
library(tidyr)

# Create a transaction ID by combining InvoiceNo and CustomerID
OnlineRetail <- unite(OnlineRetail, "TransactionID", InvoiceNo, CustomerID, sep = "_", remove = FALSE)

#OnlineRetail$TransactionID <- as.factor(OnlineRetail$TransactionID)

str(OnlineRetail)
## 'data.frame':    406829 obs. of  9 variables:
##  $ TransactionID: chr  "536365_17850" "536365_17850" "536365_17850" "536365_17850" ...
##  $ InvoiceNo    : Factor w/ 25900 levels "536365","536366",..: 1 1 1 1 1 1 1 2 2 3 ...
##  $ StockCode    : Factor w/ 4070 levels "10002","10080",..: 3538 2795 3045 2986 2985 1663 801 1548 1547 3306 ...
##  $ Description  : Factor w/ 4224 levels "Dotcomgiftshop Gift Voucher \x9c20.00",..: 4025 4033 936 1957 2978 3233 1571 1696 1693 263 ...
##  $ Quantity     : int  6 6 8 6 6 2 6 6 6 32 ...
##  $ InvoiceDate  : Factor w/ 23260 levels "1 02 2011 08:23",..: 607 607 607 607 607 607 607 608 608 609 ...
##  $ UnitPrice    : num  2.55 3.39 2.75 3.39 3.39 7.65 4.25 1.85 1.85 1.69 ...
##  $ CustomerID   : int  17850 17850 17850 17850 17850 17850 17850 17850 17850 13047 ...
##  $ Country      : Factor w/ 38 levels "Australia","Austria",..: 36 36 36 36 36 36 36 36 36 36 ...
# Create a transaction data frame
Transactions <- as(split(OnlineRetail$Description, OnlineRetail$InvoiceNo), "transactions")

6 Modelling

6.1 Mining Association Rules

# Loading other packages if not available
if(! "arules" %in% installed.packages()) { install.packages("arules", dependencies = TRUE) }
library(arules)

# Show Mining Association Rules
Rules <- apriori(Transactions, parameter = list(supp = 0.001, conf = 0.8))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.8    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 25 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[3896 item(s), 25900 transaction(s)] done [0.10s].
## sorting and recoding items ... [2355 item(s)] done [0.01s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 10
##  done [0.42s].
## writing ... [62672 rule(s)] done [0.05s].
## creating S4 object  ... done [0.02s].
summary(Rules)
## set of 62672 rules
## 
## rule length distribution (lhs + rhs):sizes
##     2     3     4     5     6     7     8     9    10 
##    61  2855  8963 21192 19488  7357  2023   614   119 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00    5.00    5.00    5.45    6.00   10.00 
## 
## summary of quality measures:
##     support          confidence       coverage            lift      
##  Min.   :0.00100   Min.   :0.800   Min.   :0.00100   Min.   : 10.3  
##  1st Qu.:0.00104   1st Qu.:0.833   1st Qu.:0.00124   1st Qu.: 22.0  
##  Median :0.00120   Median :0.871   Median :0.00135   Median : 28.5  
##  Mean   :0.00135   Mean   :0.881   Mean   :0.00153   Mean   : 61.5  
##  3rd Qu.:0.00143   3rd Qu.:0.929   3rd Qu.:0.00162   3rd Qu.: 55.5  
##  Max.   :0.01533   Max.   :1.000   Max.   :0.01818   Max.   :457.5  
##      count      
##  Min.   : 26.0  
##  1st Qu.: 27.0  
##  Median : 31.0  
##  Mean   : 34.9  
##  3rd Qu.: 37.0  
##  Max.   :397.0  
## 
## mining info:
##          data ntransactions support confidence
##  Transactions         25900   0.001        0.8
##                                                                      call
##  apriori(data = Transactions, parameter = list(supp = 0.001, conf = 0.8))
# Inspect Rules
inspect(sort(Rules, by = "confidence")[1:10])
##      lhs                                      rhs                          support confidence coverage   lift count
## [1]  {HERB MARKER CHIVES ,                                                                                         
##       IVORY GIANT GARDEN THERMOMETER}      => {HERB MARKER THYME}         0.001004          1 0.001004 138.50    26
## [2]  {HERB MARKER ROSEMARY,                                                                                        
##       IVORY GIANT GARDEN THERMOMETER}      => {HERB MARKER MINT}          0.001081          1 0.001081 134.90    28
## [3]  {GARDENERS KNEELING PAD KEEP CALM ,                                                                           
##       HERB MARKER THYME}                   => {HERB MARKER CHIVES }       0.001004          1 0.001004 155.09    26
## [4]  {GARDENERS KNEELING PAD KEEP CALM ,                                                                           
##       HERB MARKER MINT}                    => {HERB MARKER CHIVES }       0.001042          1 0.001042 155.09    27
## [5]  {GARDENERS KNEELING PAD KEEP CALM ,                                                                           
##       HERB MARKER ROSEMARY}                => {HERB MARKER CHIVES }       0.001081          1 0.001081 155.09    28
## [6]  {FELTCRAFT DOLL ROSIE,                                                                                        
##       FELTCRAFT GIRL NICOLE KIT}           => {FELTCRAFT GIRL AMELIE KIT} 0.001120          1 0.001120  99.23    29
## [7]  {BLUE POLKADOT CUP,                                                                                           
##       CHILDRENS CUTLERY RETROSPOT RED }    => {RED RETROSPOT CUP}         0.001004          1 0.001004  90.56    26
## [8]  {GARDENERS KNEELING PAD CUP OF TEA ,                                                                          
##       HERB MARKER THYME}                   => {HERB MARKER PARSLEY}       0.001158          1 0.001158 137.77    30
## [9]  {HERB MARKER PARSLEY,                                                                                         
##       JAM MAKING SET PRINTED}              => {HERB MARKER THYME}         0.001158          1 0.001158 138.50    30
## [10] {GARDENERS KNEELING PAD KEEP CALM ,                                                                           
##       HERB MARKER THYME}                   => {HERB MARKER PARSLEY}       0.001004          1 0.001004 137.77    26

6.2 Visualise Rules

# Loading other packages if not available
if(! "arulesViz" %in% installed.packages()) { install.packages("arulesViz", dependencies = TRUE) }
library(arulesViz)

# Show Rules
inspect(sort(Rules, by = "confidence")[1:10])
##      lhs                                      rhs                          support confidence coverage   lift count
## [1]  {HERB MARKER CHIVES ,                                                                                         
##       IVORY GIANT GARDEN THERMOMETER}      => {HERB MARKER THYME}         0.001004          1 0.001004 138.50    26
## [2]  {HERB MARKER ROSEMARY,                                                                                        
##       IVORY GIANT GARDEN THERMOMETER}      => {HERB MARKER MINT}          0.001081          1 0.001081 134.90    28
## [3]  {GARDENERS KNEELING PAD KEEP CALM ,                                                                           
##       HERB MARKER THYME}                   => {HERB MARKER CHIVES }       0.001004          1 0.001004 155.09    26
## [4]  {GARDENERS KNEELING PAD KEEP CALM ,                                                                           
##       HERB MARKER MINT}                    => {HERB MARKER CHIVES }       0.001042          1 0.001042 155.09    27
## [5]  {GARDENERS KNEELING PAD KEEP CALM ,                                                                           
##       HERB MARKER ROSEMARY}                => {HERB MARKER CHIVES }       0.001081          1 0.001081 155.09    28
## [6]  {FELTCRAFT DOLL ROSIE,                                                                                        
##       FELTCRAFT GIRL NICOLE KIT}           => {FELTCRAFT GIRL AMELIE KIT} 0.001120          1 0.001120  99.23    29
## [7]  {BLUE POLKADOT CUP,                                                                                           
##       CHILDRENS CUTLERY RETROSPOT RED }    => {RED RETROSPOT CUP}         0.001004          1 0.001004  90.56    26
## [8]  {GARDENERS KNEELING PAD CUP OF TEA ,                                                                          
##       HERB MARKER THYME}                   => {HERB MARKER PARSLEY}       0.001158          1 0.001158 137.77    30
## [9]  {HERB MARKER PARSLEY,                                                                                         
##       JAM MAKING SET PRINTED}              => {HERB MARKER THYME}         0.001158          1 0.001158 138.50    30
## [10] {GARDENERS KNEELING PAD KEEP CALM ,                                                                           
##       HERB MARKER THYME}                   => {HERB MARKER PARSLEY}       0.001004          1 0.001004 137.77    26
# Plot Graphs
# plot(Rules)
plot(Rules, method = "grouped")

plot(Rules, method = "graph", control = list(type = "items"))
## Available control parameters (with default values):
## layout    =  stress
## circular  =  FALSE
## ggraphdots    =  NULL
## edges     =  <environment>
## nodes     =  <environment>
## nodetext  =  <environment>
## colors    =  c("#EE0000FF", "#EEEEEEFF")
## engine    =  ggplot2
## max   =  100
## verbose   =  FALSE

plot(Rules, method = "scatterplot", measure = c("support", "confidence"), shading = "lift")

# plot(Rules, method = "matrix", measure = "lift")

7 Evaluation

7.1 Evaluate Model

# Evaluate the Rules
quality(Rules) <- round(quality(Rules), digits = 3)
inspect(head(sort(Rules, by = "lift"), 10))
##      lhs                                  rhs                              support confidence coverage  lift count
## [1]  {PARTY PIZZA DISH GREEN POLKADOT} => {PARTY PIZZA DISH PINK POLKADOT}   0.001      0.812    0.001 457.5    26
## [2]  {ENAMEL PINK TEA CONTAINER}       => {ENAMEL PINK COFFEE CONTAINER}     0.001      0.825    0.002 410.9    33
## [3]  {CHILDRENS GARDEN GLOVES PINK,                                                                               
##       CHILDS GARDEN TROWEL BLUE }      => {CHILDRENS GARDEN GLOVES BLUE}     0.001      0.967    0.001 385.2    29
## [4]  {CHILDRENS GARDEN GLOVES PINK,                                                                               
##       CHILDS GARDEN TROWEL BLUE ,                                                                                 
##       CHILDS GARDEN TROWEL PINK}       => {CHILDRENS GARDEN GLOVES BLUE}     0.001      0.966    0.001 384.7    28
## [5]  {CHILDRENS GARDEN GLOVES PINK,                                                                               
##       CHILDS GARDEN FORK BLUE }        => {CHILDRENS GARDEN GLOVES BLUE}     0.001      0.963    0.001 383.7    26
## [6]  {CHILDS GARDEN FORK BLUE ,                                                                                   
##       CHILDS GARDEN FORK PINK,                                                                                    
##       CHILDS GARDEN SPADE BLUE}        => {CHILDS GARDEN SPADE PINK}         0.001      0.929    0.001 375.8    26
## [7]  {FRENCH BLUE METAL DOOR SIGN 6,                                                                              
##       FRENCH BLUE METAL DOOR SIGN 7,                                                                              
##       FRENCH BLUE METAL DOOR SIGN No}  => {FRENCH BLUE METAL DOOR SIGN 9}    0.001      0.900    0.001 370.0    27
## [8]  {FRENCH BLUE METAL DOOR SIGN 0,                                                                              
##       FRENCH BLUE METAL DOOR SIGN 3,                                                                              
##       FRENCH BLUE METAL DOOR SIGN 5,                                                                              
##       FRENCH BLUE METAL DOOR SIGN 7}   => {FRENCH BLUE METAL DOOR SIGN 9}    0.001      0.900    0.002 370.0    36
## [9]  {FRENCH BLUE METAL DOOR SIGN 0,                                                                              
##       FRENCH BLUE METAL DOOR SIGN 2,                                                                              
##       FRENCH BLUE METAL DOOR SIGN 3,                                                                              
##       FRENCH BLUE METAL DOOR SIGN 5,                                                                              
##       FRENCH BLUE METAL DOOR SIGN 7}   => {FRENCH BLUE METAL DOOR SIGN 9}    0.001      0.900    0.002 370.0    36
## [10] {FRENCH BLUE METAL DOOR SIGN 0,                                                                              
##       FRENCH BLUE METAL DOOR SIGN 1,                                                                              
##       FRENCH BLUE METAL DOOR SIGN 3,                                                                              
##       FRENCH BLUE METAL DOOR SIGN 5,                                                                              
##       FRENCH BLUE METAL DOOR SIGN 7}   => {FRENCH BLUE METAL DOOR SIGN 9}    0.001      0.900    0.002 370.0    36
num_transactions <- length(unique(OnlineRetail$InvoiceNo))
num_transactions
## [1] 22190