# Loading necessary packages
## Markdown Update
if(! "rmarkdown" %in% installed.packages()) { install.packages("rmarkdown", dependencies = TRUE) }
library(rmarkdown)
# Loading other packages if not available
if(! "readxl" %in% installed.packages()) { install.packages("readxl", dependencies = TRUE) }
library(readxl)
if(! "arules" %in% installed.packages()) { install.packages("arules", dependencies = TRUE) }
library(arules)
if(! "arulesViz" %in% installed.packages()) { install.packages("arulesViz", dependencies = TRUE) }
library(arulesViz)
# Global Settings
options(digits = 4)
options(scipen = 999)
setwd("~/AC UNI-ORG/AB SIM/GDBA/R")
# Get Data from Library
data("Groceries")
head(Groceries)
## transactions in sparse format with
## 6 transactions (rows) and
## 169 items (columns)
summary(Groceries)
## transactions as itemMatrix in sparse format with
## 9835 rows (elements/itemsets/transactions) and
## 169 columns (items) and a density of 0.02609
##
## most frequent items:
## whole milk other vegetables rolls/buns soda
## 2513 1903 1809 1715
## yogurt (Other)
## 1372 34055
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 2159 1643 1299 1005 855 645 545 438 350 246 182 117 78 77 55 46
## 17 18 19 20 21 22 23 24 26 27 28 29 32
## 29 14 14 9 11 4 6 1 1 1 1 3 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 2.00 3.00 4.41 6.00 32.00
##
## includes extended item information - examples:
## labels level2 level1
## 1 frankfurter sausage meat and sausage
## 2 sausage sausage meat and sausage
## 3 liver loaf sausage meat and sausage
# Show Transactions
Transactions <- Groceries
Transactions
## transactions in sparse format with
## 9835 transactions (rows) and
## 169 items (columns)
# Find the products in the data set
Products <- itemLabels(Transactions)
# View unique products
cat("\nShow list of items in transactions.\n\n")
##
## Show list of items in transactions.
Products
## [1] "frankfurter" "sausage"
## [3] "liver loaf" "ham"
## [5] "meat" "finished products"
## [7] "organic sausage" "chicken"
## [9] "turkey" "pork"
## [11] "beef" "hamburger meat"
## [13] "fish" "citrus fruit"
## [15] "tropical fruit" "pip fruit"
## [17] "grapes" "berries"
## [19] "nuts/prunes" "root vegetables"
## [21] "onions" "herbs"
## [23] "other vegetables" "packaged fruit/vegetables"
## [25] "whole milk" "butter"
## [27] "curd" "dessert"
## [29] "butter milk" "yogurt"
## [31] "whipped/sour cream" "beverages"
## [33] "UHT-milk" "condensed milk"
## [35] "cream" "soft cheese"
## [37] "sliced cheese" "hard cheese"
## [39] "cream cheese " "processed cheese"
## [41] "spread cheese" "curd cheese"
## [43] "specialty cheese" "mayonnaise"
## [45] "salad dressing" "tidbits"
## [47] "frozen vegetables" "frozen fruits"
## [49] "frozen meals" "frozen fish"
## [51] "frozen chicken" "ice cream"
## [53] "frozen dessert" "frozen potato products"
## [55] "domestic eggs" "rolls/buns"
## [57] "white bread" "brown bread"
## [59] "pastry" "roll products "
## [61] "semi-finished bread" "zwieback"
## [63] "potato products" "flour"
## [65] "salt" "rice"
## [67] "pasta" "vinegar"
## [69] "oil" "margarine"
## [71] "specialty fat" "sugar"
## [73] "artif. sweetener" "honey"
## [75] "mustard" "ketchup"
## [77] "spices" "soups"
## [79] "ready soups" "Instant food products"
## [81] "sauces" "cereals"
## [83] "organic products" "baking powder"
## [85] "preservation products" "pudding powder"
## [87] "canned vegetables" "canned fruit"
## [89] "pickled vegetables" "specialty vegetables"
## [91] "jam" "sweet spreads"
## [93] "meat spreads" "canned fish"
## [95] "dog food" "cat food"
## [97] "pet care" "baby food"
## [99] "coffee" "instant coffee"
## [101] "tea" "cocoa drinks"
## [103] "bottled water" "soda"
## [105] "misc. beverages" "fruit/vegetable juice"
## [107] "syrup" "bottled beer"
## [109] "canned beer" "brandy"
## [111] "whisky" "liquor"
## [113] "rum" "liqueur"
## [115] "liquor (appetizer)" "white wine"
## [117] "red/blush wine" "prosecco"
## [119] "sparkling wine" "salty snack"
## [121] "popcorn" "nut snack"
## [123] "snack products" "long life bakery product"
## [125] "waffles" "cake bar"
## [127] "chewing gum" "chocolate"
## [129] "cooking chocolate" "specialty chocolate"
## [131] "specialty bar" "chocolate marshmallow"
## [133] "candy" "seasonal products"
## [135] "detergent" "softener"
## [137] "decalcifier" "dish cleaner"
## [139] "abrasive cleaner" "cleaner"
## [141] "toilet cleaner" "bathroom cleaner"
## [143] "hair spray" "dental care"
## [145] "male cosmetics" "make up remover"
## [147] "skin care" "female sanitary products"
## [149] "baby cosmetics" "soap"
## [151] "rubbing alcohol" "hygiene articles"
## [153] "napkins" "dishes"
## [155] "cookware" "kitchen utensil"
## [157] "cling film/bags" "kitchen towels"
## [159] "house keeping products" "candles"
## [161] "light bulbs" "sound storage medium"
## [163] "newspapers" "photo/film"
## [165] "pot plants" "flower soil/fertilizer"
## [167] "flower (seeds)" "shopping bags"
## [169] "bags"
# Calculate item frequency
ProductFrequencies <- itemFrequency(Transactions, type = "absolute")
# Convert to a data frame for easier viewing
ProductFrequencies_df <- data.frame(Product = names(ProductFrequencies), Frequency = as.vector(ProductFrequencies))
# Sort by frequency in descending order
ProductFrequencies_df <- ProductFrequencies_df[order(-ProductFrequencies_df$Frequency), ]
# View the sorted item frequencies
# cat("\nShow list of items in transactions.\n\n")
# ProductFrequencies_df
cat("\nShow list of items in transactions incl frequency.\n\n")
##
## Show list of items in transactions incl frequency.
sort(itemFrequency(Transactions, type = "relative"), decreasing = TRUE)
## whole milk other vegetables rolls/buns
## 0.2555160 0.1934926 0.1839349
## soda yogurt bottled water
## 0.1743772 0.1395018 0.1105236
## root vegetables tropical fruit shopping bags
## 0.1089985 0.1049314 0.0985257
## sausage pastry citrus fruit
## 0.0939502 0.0889680 0.0827656
## bottled beer newspapers canned beer
## 0.0805287 0.0798170 0.0776817
## pip fruit fruit/vegetable juice whipped/sour cream
## 0.0756482 0.0722928 0.0716828
## brown bread domestic eggs frankfurter
## 0.0648704 0.0634469 0.0589731
## margarine coffee pork
## 0.0585663 0.0580580 0.0576512
## butter curd beef
## 0.0554143 0.0532791 0.0524657
## napkins chocolate frozen vegetables
## 0.0523640 0.0496187 0.0480935
## chicken white bread cream cheese
## 0.0429080 0.0420946 0.0396543
## waffles salty snack long life bakery product
## 0.0384342 0.0378241 0.0374174
## dessert sugar UHT-milk
## 0.0371124 0.0338587 0.0334520
## hamburger meat berries hygiene articles
## 0.0332486 0.0332486 0.0329436
## onions specialty chocolate candy
## 0.0310117 0.0304016 0.0298932
## frozen meals misc. beverages oil
## 0.0283681 0.0283681 0.0280630
## butter milk specialty bar ham
## 0.0279614 0.0273513 0.0260295
## beverages meat ice cream
## 0.0260295 0.0258261 0.0250127
## sliced cheese hard cheese cat food
## 0.0245043 0.0245043 0.0232842
## grapes chewing gum red/blush wine
## 0.0223691 0.0210473 0.0192171
## detergent white wine pickled vegetables
## 0.0192171 0.0190137 0.0178953
## semi-finished bread baking powder dishes
## 0.0176919 0.0176919 0.0175902
## flour pot plants soft cheese
## 0.0173869 0.0172852 0.0170819
## processed cheese herbs pasta
## 0.0165735 0.0162684 0.0150483
## canned fish seasonal products cake bar
## 0.0150483 0.0142349 0.0132181
## packaged fruit/vegetables mustard frozen fish
## 0.0130147 0.0119980 0.0116929
## cling film/bags spread cheese liquor
## 0.0113879 0.0111845 0.0110829
## frozen dessert salt canned vegetables
## 0.0107778 0.0107778 0.0107778
## dish cleaner flower (seeds) condensed milk
## 0.0104728 0.0103711 0.0102694
## roll products pet care photo/film
## 0.0102694 0.0094560 0.0092527
## mayonnaise sweet spreads chocolate marshmallow
## 0.0091510 0.0090493 0.0090493
## candles specialty cheese dog food
## 0.0089476 0.0085409 0.0085409
## frozen potato products house keeping products turkey
## 0.0084392 0.0083376 0.0081342
## Instant food products liquor (appetizer) rice
## 0.0080325 0.0079309 0.0076258
## instant coffee popcorn zwieback
## 0.0074225 0.0072191 0.0069141
## soups finished products vinegar
## 0.0068124 0.0065074 0.0065074
## female sanitary products kitchen towels dental care
## 0.0061007 0.0059990 0.0057956
## cereals sparkling wine sauces
## 0.0056940 0.0055923 0.0054906
## softener jam spices
## 0.0054906 0.0053889 0.0051856
## liver loaf curd cheese cleaner
## 0.0050839 0.0050839 0.0050839
## male cosmetics rum ketchup
## 0.0045755 0.0044738 0.0042705
## meat spreads brandy light bulbs
## 0.0042705 0.0041688 0.0041688
## tea specialty fat abrasive cleaner
## 0.0038638 0.0036604 0.0035587
## skin care nuts/prunes artif. sweetener
## 0.0035587 0.0033554 0.0032537
## canned fruit syrup nut snack
## 0.0032537 0.0032537 0.0031520
## snack products fish potato products
## 0.0030503 0.0029487 0.0028470
## bathroom cleaner cookware soap
## 0.0027453 0.0027453 0.0026436
## cooking chocolate tidbits pudding powder
## 0.0025419 0.0023386 0.0023386
## organic sausage cocoa drinks prosecco
## 0.0022369 0.0022369 0.0020336
## flower soil/fertilizer ready soups specialty vegetables
## 0.0019319 0.0018302 0.0017285
## organic products honey decalcifier
## 0.0016268 0.0015252 0.0015252
## cream frozen fruits hair spray
## 0.0013218 0.0012201 0.0011185
## rubbing alcohol liqueur salad dressing
## 0.0010168 0.0009151 0.0008134
## whisky make up remover toilet cleaner
## 0.0008134 0.0008134 0.0007117
## frozen chicken baby cosmetics kitchen utensil
## 0.0006101 0.0006101 0.0004067
## bags preservation products baby food
## 0.0004067 0.0002034 0.0001017
## sound storage medium
## 0.0001017
# Show Data
cat("\nNumber of baskets:", nrow(Groceries))
##
## Number of baskets: 9835
cat("\n\nThe first five baskets:\n")
##
##
## The first five baskets:
inspect(Groceries[1:5])
## items
## [1] {citrus fruit,
## semi-finished bread,
## margarine,
## ready soups}
## [2] {tropical fruit,
## yogurt,
## coffee}
## [3] {whole milk}
## [4] {pip fruit,
## yogurt,
## cream cheese ,
## meat spreads}
## [5] {other vegetables,
## whole milk,
## condensed milk,
## long life bakery product}
# Loading other packages if not available
if(! "arules" %in% installed.packages()) { install.packages("arules", dependencies = TRUE) }
library(arules)
if(! "arulesViz" %in% installed.packages()) { install.packages("arulesViz", dependencies = TRUE) }
library(arulesViz)
# Show Most Frequent Items
Frequency <- eclat(Groceries, parameter = list(supp = 0.10, maxlen = 15))
## Eclat
##
## parameter specification:
## tidLists support minlen maxlen target ext
## FALSE 0.1 1 15 frequent itemsets TRUE
##
## algorithmic control:
## sparse sort verbose
## 7 -2 TRUE
##
## Absolute minimum support count: 983
##
## create itemset ...
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating bit matrix ... [8 row(s), 9835 column(s)] done [0.00s].
## writing ... [8 set(s)] done [0.00s].
## Creating S4 object ... done [0.00s].
Frequency <- sort(Frequency, by = 'support')
cat("\nThe most frequent items, i.e., the items with the highest support are:\n")
##
## The most frequent items, i.e., the items with the highest support are:
inspect(Frequency)
## items support count
## [1] {whole milk} 0.2555 2513
## [2] {other vegetables} 0.1935 1903
## [3] {rolls/buns} 0.1839 1809
## [4] {soda} 0.1744 1715
## [5] {yogurt} 0.1395 1372
## [6] {bottled water} 0.1105 1087
## [7] {root vegetables} 0.1090 1072
## [8] {tropical fruit} 0.1049 1032
# Plot Item Frequency
itemFrequencyPlot(Groceries, topN = 10,
main = "Item Frequency Plot for 'Groceries' Market Basket",
cex.names = 1.0,
cex.main = 1.6,
col = "lightblue")
grid()
# Write data to working directory
# write.csv(Frequency, file = "GroceriesFrequency.csv")
# Loading other packages if not available
if(! "arules" %in% installed.packages()) { install.packages("arules", dependencies = TRUE) }
library(arules)
# Show Mining Association Rules
Rules <- apriori(Groceries, parameter = list(supp = 0.0015, conf = 0.90), maxlen = 5)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.9 0.1 1 none FALSE TRUE 5 0.0015 1
## maxlen target ext
## 5 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 14
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [153 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5
## done [0.01s].
## writing ... [7 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
cat("\n\nThis apriori function generates", length(Rules), "rules.\n\n")
##
##
## This apriori function generates 7 rules.
summary(Rules)
## set of 7 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4 5
## 1 5 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3 4 4 4 4 5
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.00153 Min. :0.900 Min. :0.00163 Min. : 3.52
## 1st Qu.:0.00163 1st Qu.:0.902 1st Qu.:0.00168 1st Qu.: 3.60
## Median :0.00183 Median :0.905 Median :0.00203 Median : 3.67
## Mean :0.00176 Mean :0.926 Mean :0.00190 Mean : 4.89
## 3rd Qu.:0.00188 3rd Qu.:0.938 3rd Qu.:0.00208 3rd Qu.: 4.29
## Max. :0.00193 Max. :1.000 Max. :0.00214 Max. :11.23
## count
## Min. :15.0
## 1st Qu.:16.0
## Median :18.0
## Mean :17.3
## 3rd Qu.:18.5
## Max. :19.0
##
## mining info:
## data ntransactions support confidence
## Groceries 9835 0.0015 0.9
## call
## apriori(data = Groceries, parameter = list(supp = 0.0015, conf = 0.9), maxlen = 5)
# Inspect Rules
inspect(Rules)
## lhs rhs support confidence coverage lift count
## [1] {liquor,
## red/blush wine} => {bottled beer} 0.001932 0.9048 0.002135 11.235 19
## [2] {root vegetables,
## whipped/sour cream,
## flour} => {whole milk} 0.001729 1.0000 0.001729 3.914 17
## [3] {other vegetables,
## cream cheese ,
## sugar} => {whole milk} 0.001525 0.9375 0.001627 3.669 15
## [4] {pip fruit,
## butter,
## whipped/sour cream} => {whole milk} 0.001830 0.9000 0.002034 3.522 18
## [5] {tropical fruit,
## whipped/sour cream,
## domestic eggs} => {whole milk} 0.001830 0.9000 0.002034 3.522 18
## [6] {tropical fruit,
## whipped/sour cream,
## fruit/vegetable juice} => {other vegetables} 0.001932 0.9048 0.002135 4.676 19
## [7] {sausage,
## tropical fruit,
## root vegetables,
## yogurt} => {whole milk} 0.001525 0.9375 0.001627 3.669 15
# Write data to working directory
RulesFull <- apriori(Groceries, parameter = list(supp = 0.0015, conf = 0.90))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.9 0.1 1 none FALSE TRUE 5 0.0015 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 14
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [153 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.01s].
## writing ... [7 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
Rules_df <- as(RulesFull, "data.frame")
write.csv(Rules_df, file = "GroceriesRules.csv", row.names = FALSE)
# Loading other packages if not available
if(! "arules" %in% installed.packages()) { install.packages("arules", dependencies = TRUE) }
library(arules)
# Show Transactions
data("Groceries")
Transactions <- Groceries
# Generate association rules
Rules <- apriori(Groceries, parameter = list(supp = 0.0015, conf = 0.90))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.9 0.1 1 none FALSE TRUE 5 0.0015 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 14
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [153 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.01s].
## writing ... [7 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
#inspect(Rules)
# Filter for the specific rule {yogurt} => {whole milk}
SpecificRule <- subset(Rules,
lhs %ain% c("liquor", "red/blush wine") &
size(lhs) == 2 &
rhs %ain% c("bottled beer")
)
# Inspect Details
cat("\nThe following lists the details of discovered association rules. These definitions apply:\n")
##
## The following lists the details of discovered association rules. These definitions apply:
cat("lhs: Items on the left side of the rule: Antecedents.\n")
## lhs: Items on the left side of the rule: Antecedents.
cat("rhs: Items on the right side of the rule: Consequents.\n")
## rhs: Items on the right side of the rule: Consequents.
cat("Example:\n Antecedent (lhs): liquor and red/blush wine.\n")
## Example:
## Antecedent (lhs): liquor and red/blush wine.
cat(" Consequent (rhs): bottled beer.\n")
## Consequent (rhs): bottled beer.
cat("\nCoverage: The proportion of transactions that contain lhs, i.e., the antecedent.\n")
##
## Coverage: The proportion of transactions that contain lhs, i.e., the antecedent.
coverage <- quality(SpecificRule)$coverage
cat("Coverage for {whole milk, specialty cheese}:", coverage * 100, "%\n")
## Coverage for {whole milk, specialty cheese}: 0.2135 %
cat("\nSupport: Fraction of transactions containing both lhs and rhs.\n")
##
## Support: Fraction of transactions containing both lhs and rhs.
support <- quality(SpecificRule)$support
cat("Support for {whole milk, specialty cheese} => {yogurt}:", support * 100, "%\n")
## Support for {whole milk, specialty cheese} => {yogurt}: 0.1932 %
cat("\nConfidence: Fraction of transactions with lhs that also have rhs, i.e., antecedent is true and consequent is true.\n")
##
## Confidence: Fraction of transactions with lhs that also have rhs, i.e., antecedent is true and consequent is true.
confidence <- quality(SpecificRule)$confidence
cat("Confidence for {whole milk, specialty cheese} => {yogurt}:", confidence * 100, "%\n")
## Confidence for {whole milk, specialty cheese} => {yogurt}: 90.48 %
cat("\nLift: Measure of how much more likely rhs is given lhs compared to if they were independent.\n")
##
## Lift: Measure of how much more likely rhs is given lhs compared to if they were independent.
lift <- quality(SpecificRule)$lift
cat("Lift for {whole milk, specialty cheese} => {yogurt}:", lift, "\n")
## Lift for {whole milk, specialty cheese} => {yogurt}: 11.24
cat("\nCount: The number of time this rulke applies.\n")
##
## Count: The number of time this rulke applies.
count <- quality(SpecificRule)$count
cat("Count for {whole milk, specialty cheese} => {yogurt}:", count, "\n\n\n")
## Count for {whole milk, specialty cheese} => {yogurt}: 19
# Write data to working directory
Rules_df <- as(SpecificRule, "data.frame")
write.csv(Rules_df, file = "SpecificGroceriesRules.csv", row.names = FALSE)
inspect(SpecificRule)
## lhs rhs support confidence coverage
## [1] {liquor, red/blush wine} => {bottled beer} 0.001932 0.9048 0.002135
## lift count
## [1] 11.24 19
# Loading other packages if not available
if(! "arulesViz" %in% installed.packages()) { install.packages("arulesViz", dependencies = TRUE) }
library(arulesViz)
# Show Rules
plot(Rules)
plot(Rules, method = "grouped")
plot(Rules, method = "graph", control = list(type = "items"))
## Available control parameters (with default values):
## layout = stress
## circular = FALSE
## ggraphdots = NULL
## edges = <environment>
## nodes = <environment>
## nodetext = <environment>
## colors = c("#EE0000FF", "#EEEEEEFF")
## engine = ggplot2
## max = 100
## verbose = FALSE
# Loading other packages if not available
if(! "arulesViz" %in% installed.packages()) { install.packages("arulesViz", dependencies = TRUE) }
library(arulesViz)
# Show Rules
RulesYoghurt <- apriori(Groceries, parameter = list(supp = 0.001, conf = 0.2), appearance = list(default='rhs', lhs='yogurt'))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 9
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 done [0.00s].
## writing ... [5 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
plot(RulesYoghurt, method = 'graph', measure = 'confidence', shading = 'lift')
# Show Rules Interactive
RulesYoghurt <- apriori(Groceries, parameter = list(supp = 0.001, conf = 0.2), appearance = list(default='rhs', lhs='yogurt'))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 9
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 done [0.00s].
## writing ... [5 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
plot(RulesYoghurt, method = 'graph', measure = 'confidence', shading = 'lift', engine = 'html')
# Loading other packages if not available
if(! "arulesViz" %in% installed.packages()) { install.packages("arulesViz", dependencies = TRUE) }
library(arulesViz)
# Show Rules Interactive
RulesMilk <- apriori(Groceries, parameter = list(supp = 0.001, conf = 0.2), appearance = list(default = 'rhs', lhs = 'whole milk'))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 9
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.01s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 done [0.00s].
## writing ... [3 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
plot(RulesMilk, method = 'graph', measure = 'confidence', shading = 'lift', engine = 'html')
# Loading other packages if not available
rules_lift <- subset(Rules, lift > 3)
inspect(rules_lift)
## lhs rhs support confidence coverage lift count
## [1] {liquor,
## red/blush wine} => {bottled beer} 0.001932 0.9048 0.002135 11.235 19
## [2] {root vegetables,
## whipped/sour cream,
## flour} => {whole milk} 0.001729 1.0000 0.001729 3.914 17
## [3] {other vegetables,
## cream cheese ,
## sugar} => {whole milk} 0.001525 0.9375 0.001627 3.669 15
## [4] {pip fruit,
## butter,
## whipped/sour cream} => {whole milk} 0.001830 0.9000 0.002034 3.522 18
## [5] {tropical fruit,
## whipped/sour cream,
## domestic eggs} => {whole milk} 0.001830 0.9000 0.002034 3.522 18
## [6] {tropical fruit,
## whipped/sour cream,
## fruit/vegetable juice} => {other vegetables} 0.001932 0.9048 0.002135 4.676 19
## [7] {sausage,
## tropical fruit,
## root vegetables,
## yogurt} => {whole milk} 0.001525 0.9375 0.001627 3.669 15
# Sort Rules by Lift
rules_by_lift <- sort(Rules, by = "lift", decreasing = TRUE)
six_rules_highest_lift <- rules_by_lift[1:6]
inspect(six_rules_highest_lift)
## lhs rhs support confidence coverage lift count
## [1] {liquor,
## red/blush wine} => {bottled beer} 0.001932 0.9048 0.002135 11.235 19
## [2] {tropical fruit,
## whipped/sour cream,
## fruit/vegetable juice} => {other vegetables} 0.001932 0.9048 0.002135 4.676 19
## [3] {root vegetables,
## whipped/sour cream,
## flour} => {whole milk} 0.001729 1.0000 0.001729 3.914 17
## [4] {other vegetables,
## cream cheese ,
## sugar} => {whole milk} 0.001525 0.9375 0.001627 3.669 15
## [5] {sausage,
## tropical fruit,
## root vegetables,
## yogurt} => {whole milk} 0.001525 0.9375 0.001627 3.669 15
## [6] {pip fruit,
## butter,
## whipped/sour cream} => {whole milk} 0.001830 0.9000 0.002034 3.522 18
# Sort Rules by Confidence
rules_by_confidence <- sort(Rules, by = "confidence", decreasing = TRUE)
six_rules_highest_confidence <- rules_by_confidence[1:6]
inspect(six_rules_highest_confidence)
## lhs rhs support confidence coverage lift count
## [1] {root vegetables,
## whipped/sour cream,
## flour} => {whole milk} 0.001729 1.0000 0.001729 3.914 17
## [2] {other vegetables,
## cream cheese ,
## sugar} => {whole milk} 0.001525 0.9375 0.001627 3.669 15
## [3] {sausage,
## tropical fruit,
## root vegetables,
## yogurt} => {whole milk} 0.001525 0.9375 0.001627 3.669 15
## [4] {liquor,
## red/blush wine} => {bottled beer} 0.001932 0.9048 0.002135 11.235 19
## [5] {tropical fruit,
## whipped/sour cream,
## fruit/vegetable juice} => {other vegetables} 0.001932 0.9048 0.002135 4.676 19
## [6] {pip fruit,
## butter,
## whipped/sour cream} => {whole milk} 0.001830 0.9000 0.002034 3.522 18
Rules <- apriori(Transactions, parameter = list(support = 0.005, confidence = 0.1))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.1 0.1 1 none FALSE TRUE 5 0.005 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 49
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [120 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.01s].
## writing ... [1582 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
# inspect(Rules)
# Show Scatter Plot
library(arulesViz)
filtered_rules <- subset(Rules, support >= 0.01 & confidence >= 0.5)
inspect(filtered_rules)
## lhs rhs support
## [1] {curd, yogurt} => {whole milk} 0.01007
## [2] {other vegetables, butter} => {whole milk} 0.01149
## [3] {other vegetables, domestic eggs} => {whole milk} 0.01230
## [4] {yogurt, whipped/sour cream} => {whole milk} 0.01088
## [5] {other vegetables, whipped/sour cream} => {whole milk} 0.01464
## [6] {pip fruit, other vegetables} => {whole milk} 0.01352
## [7] {citrus fruit, root vegetables} => {other vegetables} 0.01037
## [8] {tropical fruit, root vegetables} => {other vegetables} 0.01230
## [9] {tropical fruit, root vegetables} => {whole milk} 0.01200
## [10] {tropical fruit, yogurt} => {whole milk} 0.01515
## [11] {root vegetables, yogurt} => {other vegetables} 0.01291
## [12] {root vegetables, yogurt} => {whole milk} 0.01454
## [13] {root vegetables, rolls/buns} => {other vegetables} 0.01220
## [14] {root vegetables, rolls/buns} => {whole milk} 0.01271
## [15] {other vegetables, yogurt} => {whole milk} 0.02227
## confidence coverage lift count
## [1] 0.5824 0.01729 2.279 99
## [2] 0.5736 0.02003 2.245 113
## [3] 0.5525 0.02227 2.162 121
## [4] 0.5245 0.02074 2.053 107
## [5] 0.5070 0.02888 1.984 144
## [6] 0.5175 0.02613 2.025 133
## [7] 0.5862 0.01769 3.030 102
## [8] 0.5845 0.02105 3.021 121
## [9] 0.5700 0.02105 2.231 118
## [10] 0.5174 0.02928 2.025 149
## [11] 0.5000 0.02583 2.584 127
## [12] 0.5630 0.02583 2.203 143
## [13] 0.5021 0.02430 2.595 120
## [14] 0.5230 0.02430 2.047 125
## [15] 0.5129 0.04342 2.007 219
plot(filtered_rules, method = "graph", control = list(type = "items"))
## Available control parameters (with default values):
## layout = stress
## circular = FALSE
## ggraphdots = NULL
## edges = <environment>
## nodes = <environment>
## nodetext = <environment>
## colors = c("#EE0000FF", "#EEEEEEFF")
## engine = ggplot2
## max = 100
## verbose = FALSE
# Loading other packages if not available
if(! "readxl" %in% installed.packages()) { install.packages("readxl", dependencies = TRUE) }
library(readxl)
if(! "arules" %in% installed.packages()) { install.packages("arules", dependencies = TRUE) }
library(arules)
if(! "arulesViz" %in% installed.packages()) { install.packages("arulesViz", dependencies = TRUE) }
library(arulesViz)
# Download Training Data from URL
## InvoiceNo StockCode Description Quantity
## 1 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6
## 2 536365 71053 WHITE METAL LANTERN 6
## 3 536365 84406B CREAM CUPID HEARTS COAT HANGER 8
## 4 536365 84029G KNITTED UNION FLAG HOT WATER BOTTLE 6
## 5 536365 84029E RED WOOLLY HOTTIE WHITE HEART. 6
## 6 536365 22752 SET 7 BABUSHKA NESTING BOXES 2
## InvoiceDate UnitPrice CustomerID Country
## 1 1 12 2010 08:26 2.55 17850 United Kingdom
## 2 1 12 2010 08:26 3.39 17850 United Kingdom
## 3 1 12 2010 08:26 2.75 17850 United Kingdom
## 4 1 12 2010 08:26 3.39 17850 United Kingdom
## 5 1 12 2010 08:26 3.39 17850 United Kingdom
## 6 1 12 2010 08:26 7.65 17850 United Kingdom
# Remove rows with missing values
OnlineRetail <- na.omit(OnlineRetail)
# Convert the data to a transaction format
library(dplyr)
library(tidyr)
# Create a transaction ID by combining InvoiceNo and CustomerID
OnlineRetail <- unite(OnlineRetail, "TransactionID", InvoiceNo, CustomerID, sep = "_", remove = FALSE)
#OnlineRetail$TransactionID <- as.factor(OnlineRetail$TransactionID)
str(OnlineRetail)
## 'data.frame': 406829 obs. of 9 variables:
## $ TransactionID: chr "536365_17850" "536365_17850" "536365_17850" "536365_17850" ...
## $ InvoiceNo : Factor w/ 25900 levels "536365","536366",..: 1 1 1 1 1 1 1 2 2 3 ...
## $ StockCode : Factor w/ 4070 levels "10002","10080",..: 3538 2795 3045 2986 2985 1663 801 1548 1547 3306 ...
## $ Description : Factor w/ 4224 levels "Dotcomgiftshop Gift Voucher \x9c20.00",..: 4025 4033 936 1957 2978 3233 1571 1696 1693 263 ...
## $ Quantity : int 6 6 8 6 6 2 6 6 6 32 ...
## $ InvoiceDate : Factor w/ 23260 levels "1 02 2011 08:23",..: 607 607 607 607 607 607 607 608 608 609 ...
## $ UnitPrice : num 2.55 3.39 2.75 3.39 3.39 7.65 4.25 1.85 1.85 1.69 ...
## $ CustomerID : int 17850 17850 17850 17850 17850 17850 17850 17850 17850 13047 ...
## $ Country : Factor w/ 38 levels "Australia","Austria",..: 36 36 36 36 36 36 36 36 36 36 ...
# Create a transaction data frame
Transactions <- as(split(OnlineRetail$Description, OnlineRetail$InvoiceNo), "transactions")
# Loading other packages if not available
if(! "arules" %in% installed.packages()) { install.packages("arules", dependencies = TRUE) }
library(arules)
# Show Mining Association Rules
Rules <- apriori(Transactions, parameter = list(supp = 0.001, conf = 0.8))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 25
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[3896 item(s), 25900 transaction(s)] done [0.10s].
## sorting and recoding items ... [2355 item(s)] done [0.01s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 10
## done [0.42s].
## writing ... [62672 rule(s)] done [0.05s].
## creating S4 object ... done [0.02s].
summary(Rules)
## set of 62672 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4 5 6 7 8 9 10
## 61 2855 8963 21192 19488 7357 2023 614 119
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.00 5.00 5.00 5.45 6.00 10.00
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.00100 Min. :0.800 Min. :0.00100 Min. : 10.3
## 1st Qu.:0.00104 1st Qu.:0.833 1st Qu.:0.00124 1st Qu.: 22.0
## Median :0.00120 Median :0.871 Median :0.00135 Median : 28.5
## Mean :0.00135 Mean :0.881 Mean :0.00153 Mean : 61.5
## 3rd Qu.:0.00143 3rd Qu.:0.929 3rd Qu.:0.00162 3rd Qu.: 55.5
## Max. :0.01533 Max. :1.000 Max. :0.01818 Max. :457.5
## count
## Min. : 26.0
## 1st Qu.: 27.0
## Median : 31.0
## Mean : 34.9
## 3rd Qu.: 37.0
## Max. :397.0
##
## mining info:
## data ntransactions support confidence
## Transactions 25900 0.001 0.8
## call
## apriori(data = Transactions, parameter = list(supp = 0.001, conf = 0.8))
# Inspect Rules
inspect(sort(Rules, by = "confidence")[1:10])
## lhs rhs support confidence coverage lift count
## [1] {HERB MARKER CHIVES ,
## IVORY GIANT GARDEN THERMOMETER} => {HERB MARKER THYME} 0.001004 1 0.001004 138.50 26
## [2] {HERB MARKER ROSEMARY,
## IVORY GIANT GARDEN THERMOMETER} => {HERB MARKER MINT} 0.001081 1 0.001081 134.90 28
## [3] {GARDENERS KNEELING PAD KEEP CALM ,
## HERB MARKER THYME} => {HERB MARKER CHIVES } 0.001004 1 0.001004 155.09 26
## [4] {GARDENERS KNEELING PAD KEEP CALM ,
## HERB MARKER MINT} => {HERB MARKER CHIVES } 0.001042 1 0.001042 155.09 27
## [5] {GARDENERS KNEELING PAD KEEP CALM ,
## HERB MARKER ROSEMARY} => {HERB MARKER CHIVES } 0.001081 1 0.001081 155.09 28
## [6] {FELTCRAFT DOLL ROSIE,
## FELTCRAFT GIRL NICOLE KIT} => {FELTCRAFT GIRL AMELIE KIT} 0.001120 1 0.001120 99.23 29
## [7] {BLUE POLKADOT CUP,
## CHILDRENS CUTLERY RETROSPOT RED } => {RED RETROSPOT CUP} 0.001004 1 0.001004 90.56 26
## [8] {GARDENERS KNEELING PAD CUP OF TEA ,
## HERB MARKER THYME} => {HERB MARKER PARSLEY} 0.001158 1 0.001158 137.77 30
## [9] {HERB MARKER PARSLEY,
## JAM MAKING SET PRINTED} => {HERB MARKER THYME} 0.001158 1 0.001158 138.50 30
## [10] {GARDENERS KNEELING PAD KEEP CALM ,
## HERB MARKER THYME} => {HERB MARKER PARSLEY} 0.001004 1 0.001004 137.77 26
# Loading other packages if not available
if(! "arulesViz" %in% installed.packages()) { install.packages("arulesViz", dependencies = TRUE) }
library(arulesViz)
# Show Rules
inspect(sort(Rules, by = "confidence")[1:10])
## lhs rhs support confidence coverage lift count
## [1] {HERB MARKER CHIVES ,
## IVORY GIANT GARDEN THERMOMETER} => {HERB MARKER THYME} 0.001004 1 0.001004 138.50 26
## [2] {HERB MARKER ROSEMARY,
## IVORY GIANT GARDEN THERMOMETER} => {HERB MARKER MINT} 0.001081 1 0.001081 134.90 28
## [3] {GARDENERS KNEELING PAD KEEP CALM ,
## HERB MARKER THYME} => {HERB MARKER CHIVES } 0.001004 1 0.001004 155.09 26
## [4] {GARDENERS KNEELING PAD KEEP CALM ,
## HERB MARKER MINT} => {HERB MARKER CHIVES } 0.001042 1 0.001042 155.09 27
## [5] {GARDENERS KNEELING PAD KEEP CALM ,
## HERB MARKER ROSEMARY} => {HERB MARKER CHIVES } 0.001081 1 0.001081 155.09 28
## [6] {FELTCRAFT DOLL ROSIE,
## FELTCRAFT GIRL NICOLE KIT} => {FELTCRAFT GIRL AMELIE KIT} 0.001120 1 0.001120 99.23 29
## [7] {BLUE POLKADOT CUP,
## CHILDRENS CUTLERY RETROSPOT RED } => {RED RETROSPOT CUP} 0.001004 1 0.001004 90.56 26
## [8] {GARDENERS KNEELING PAD CUP OF TEA ,
## HERB MARKER THYME} => {HERB MARKER PARSLEY} 0.001158 1 0.001158 137.77 30
## [9] {HERB MARKER PARSLEY,
## JAM MAKING SET PRINTED} => {HERB MARKER THYME} 0.001158 1 0.001158 138.50 30
## [10] {GARDENERS KNEELING PAD KEEP CALM ,
## HERB MARKER THYME} => {HERB MARKER PARSLEY} 0.001004 1 0.001004 137.77 26
# Plot Graphs
# plot(Rules)
plot(Rules, method = "grouped")
plot(Rules, method = "graph", control = list(type = "items"))
## Available control parameters (with default values):
## layout = stress
## circular = FALSE
## ggraphdots = NULL
## edges = <environment>
## nodes = <environment>
## nodetext = <environment>
## colors = c("#EE0000FF", "#EEEEEEFF")
## engine = ggplot2
## max = 100
## verbose = FALSE
plot(Rules, method = "scatterplot", measure = c("support", "confidence"), shading = "lift")
# plot(Rules, method = "matrix", measure = "lift")
# Evaluate the Rules
quality(Rules) <- round(quality(Rules), digits = 3)
inspect(head(sort(Rules, by = "lift"), 10))
## lhs rhs support confidence coverage lift count
## [1] {PARTY PIZZA DISH GREEN POLKADOT} => {PARTY PIZZA DISH PINK POLKADOT} 0.001 0.812 0.001 457.5 26
## [2] {ENAMEL PINK TEA CONTAINER} => {ENAMEL PINK COFFEE CONTAINER} 0.001 0.825 0.002 410.9 33
## [3] {CHILDRENS GARDEN GLOVES PINK,
## CHILDS GARDEN TROWEL BLUE } => {CHILDRENS GARDEN GLOVES BLUE} 0.001 0.967 0.001 385.2 29
## [4] {CHILDRENS GARDEN GLOVES PINK,
## CHILDS GARDEN TROWEL BLUE ,
## CHILDS GARDEN TROWEL PINK} => {CHILDRENS GARDEN GLOVES BLUE} 0.001 0.966 0.001 384.7 28
## [5] {CHILDRENS GARDEN GLOVES PINK,
## CHILDS GARDEN FORK BLUE } => {CHILDRENS GARDEN GLOVES BLUE} 0.001 0.963 0.001 383.7 26
## [6] {CHILDS GARDEN FORK BLUE ,
## CHILDS GARDEN FORK PINK,
## CHILDS GARDEN SPADE BLUE} => {CHILDS GARDEN SPADE PINK} 0.001 0.929 0.001 375.8 26
## [7] {FRENCH BLUE METAL DOOR SIGN 6,
## FRENCH BLUE METAL DOOR SIGN 7,
## FRENCH BLUE METAL DOOR SIGN No} => {FRENCH BLUE METAL DOOR SIGN 9} 0.001 0.900 0.001 370.0 27
## [8] {FRENCH BLUE METAL DOOR SIGN 0,
## FRENCH BLUE METAL DOOR SIGN 3,
## FRENCH BLUE METAL DOOR SIGN 5,
## FRENCH BLUE METAL DOOR SIGN 7} => {FRENCH BLUE METAL DOOR SIGN 9} 0.001 0.900 0.002 370.0 36
## [9] {FRENCH BLUE METAL DOOR SIGN 0,
## FRENCH BLUE METAL DOOR SIGN 2,
## FRENCH BLUE METAL DOOR SIGN 3,
## FRENCH BLUE METAL DOOR SIGN 5,
## FRENCH BLUE METAL DOOR SIGN 7} => {FRENCH BLUE METAL DOOR SIGN 9} 0.001 0.900 0.002 370.0 36
## [10] {FRENCH BLUE METAL DOOR SIGN 0,
## FRENCH BLUE METAL DOOR SIGN 1,
## FRENCH BLUE METAL DOOR SIGN 3,
## FRENCH BLUE METAL DOOR SIGN 5,
## FRENCH BLUE METAL DOOR SIGN 7} => {FRENCH BLUE METAL DOOR SIGN 9} 0.001 0.900 0.002 370.0 36
num_transactions <- length(unique(OnlineRetail$InvoiceNo))
num_transactions
## [1] 22190