rm(list=ls(all=TRUE))
pacman::p_load(ggplot2,dplyr,heatmaply)
load("data/tf0.rdata")
sapply(list(cust=A0,tid=X0,items=Z0), nrow)
## cust tid items
## 32241 119328 817182
pacman::p_load(Matrix, arules, arulesViz)
## [1] 119422
## [1] 23789
p = count(Z0, prod, sort=T)
pk = p$prod[1:2000]
Z = filter(Z0, prod %in% pk)
tr = as(split(Z[,"prod"], Z[,"tid"]), "transactions"); tr
## transactions in sparse format with
## 107797 transactions (rows) and
## 2000 items (columns)
rules <- apriori(tr, parameter=list(supp=0.0002, conf=0.5))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 2e-04 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 21
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[2000 item(s), 107797 transaction(s)] done [0.13s].
## sorting and recoding items ... [2000 item(s)] done [0.01s].
## creating transaction tree ... done [0.03s].
## checking subsets of size 1 2 3 4 5 6 done [0.05s].
## writing ... [1001 rule(s)] done [0.01s].
## creating S4 object ... done [0.01s].
## set of 1001 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4 5 6
## 85 558 289 64 5
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 3.000 3.000 3.347 4.000 6.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.0002041 Min. :0.5000 Min. :0.0002319 Min. : 6.644
## 1st Qu.:0.0002319 1st Qu.:0.5571 1st Qu.:0.0003711 1st Qu.: 60.475
## Median :0.0002969 Median :0.6364 Median :0.0004824 Median : 91.249
## Mean :0.0005192 Mean :0.6517 Mean :0.0008293 Mean : 210.995
## 3rd Qu.:0.0005009 3rd Qu.:0.7358 3rd Qu.:0.0007885 3rd Qu.: 281.700
## Max. :0.0064937 Max. :0.9655 Max. :0.0096478 Max. :1131.303
## count
## Min. : 22.00
## 1st Qu.: 25.00
## Median : 32.00
## Mean : 55.97
## 3rd Qu.: 54.00
## Max. :700.00
##
## mining info:
## data ntransactions support confidence
## tr 107797 2e-04 0.5
rx = subset(rules, subset = lift > 100 & count > 100)
inspect(rx)
## lhs rhs support confidence
## [1] {4710030346110} => {4710030346097} 0.0009462230 0.5513514
## [2] {4710030346110} => {4710030346103} 0.0011132035 0.6486486
## [3] {4710030346110} => {4710030346059} 0.0010575433 0.6162162
## [4] {4716114000312} => {4716114000329} 0.0014100578 0.5527273
## [5] {4716114000329} => {4716114000312} 0.0014100578 0.5333333
## [6] {4710030346097} => {4710030346103} 0.0010946501 0.5244444
## [7] {4710030346097} => {4710030346059} 0.0012152472 0.5822222
## [8] {4719859015061} => {4713080626225} 0.0010575433 0.5066667
## [9] {4713080626225} => {4719859015061} 0.0010575433 0.5044248
## [10] {4711524000617} => {4711524000419} 0.0009462230 0.5454545
## [11] {4710030346103} => {4710030346059} 0.0015584849 0.6536965
## [12] {4710030346059} => {4710030346103} 0.0015584849 0.5121951
## [13] {4710008241119} => {4710008241218} 0.0009462230 0.5204082
## [14] {4711524000457} => {4711524000419} 0.0009554997 0.5049020
## [15] {4711524000457} => {4711524000396} 0.0010111599 0.5343137
## [16] {0076150430530} => {0076150215281} 0.0010853734 0.5176991
## [17] {4719090701051} => {4719090790000} 0.0016326985 0.5770492
## [18] {4711524000471} => {4711524000396} 0.0010853734 0.5109170
## [19] {4711524000495} => {4711524000396} 0.0011503103 0.5000000
## [20] {4711524000419} => {4711524000396} 0.0015306548 0.6790123
## [21] {4710321861209} => {4710321861186} 0.0020779799 0.6086957
## [22] {4710321871260} => {4710321861186} 0.0018553392 0.5405405
## [23] {4711524000907} => {4711524000891} 0.0014378879 0.5636364
## [24] {4711524000907} => {4711524001041} 0.0016605286 0.6509091
## [25] {0719859796124} => {0719859796117} 0.0023284507 0.6952909
## [26] {4719090790017} => {4719090790000} 0.0032561203 0.8087558
## [27] {4719090790000} => {4719090790017} 0.0032561203 0.6212389
## [28] {4711524000891} => {4711524001041} 0.0019573829 0.5994318
## [29] {4711524001041} => {4711524000891} 0.0019573829 0.5275000
## [30] {4719090701051,4719090790017} => {4719090790000} 0.0010297133 0.8473282
## [31] {4719090701051,4719090790000} => {4719090790017} 0.0010297133 0.6306818
## [32] {4710321861209,4710321871260} => {4710321861186} 0.0010204366 0.6358382
## [33] {4710321861186,4710321871260} => {4710321861209} 0.0010204366 0.5500000
## [34] {4711524000891,4711524000907} => {4711524001041} 0.0011039268 0.7677419
## [35] {4711524000907,4711524001041} => {4711524000891} 0.0011039268 0.6648045
## [36] {4711524000891,4711524001041} => {4711524000907} 0.0011039268 0.5639810
## coverage lift count
## [1] 0.001716189 264.1512 102
## [2] 0.001716189 272.0715 120
## [3] 0.001716189 202.5191 114
## [4] 0.002551091 209.0608 152
## [5] 0.002643858 209.0608 152
## [6] 0.002087257 219.9749 118
## [7] 0.002087257 191.3470 131
## [8] 0.002087257 241.6688 114
## [9] 0.002096533 241.6688 114
## [10] 0.001734742 241.9686 102
## [11] 0.002384111 214.8370 168
## [12] 0.003042756 214.8370 168
## [13] 0.001818232 177.5267 102
## [14] 0.001892446 223.9791 103
## [15] 0.001892446 155.6687 109
## [16] 0.002096533 169.1103 117
## [17] 0.002829392 110.0959 176
## [18] 0.002124363 148.8522 117
## [19] 0.002300621 145.6716 124
## [20] 0.002254237 197.8257 165
## [21] 0.003413824 124.5077 224
## [22] 0.003432378 110.5667 200
## [23] 0.002551091 172.6088 155
## [24] 0.002551091 175.4151 179
## [25] 0.003348887 132.8905 251
## [26] 0.004026086 154.3034 351
## [27] 0.005241333 154.3034 351
## [28] 0.003265397 161.5424 211
## [29] 0.003710678 161.5424 211
## [30] 0.001215247 161.6627 111
## [31] 0.001632698 156.6489 111
## [32] 0.001604868 130.0597 110
## [33] 0.001855339 161.1096 110
## [34] 0.001437888 206.9007 119
## [35] 0.001660529 203.5907 119
## [36] 0.001957383 221.0744 119