rm(list=ls(all=TRUE))
pacman::p_load(ggplot2,dplyr,heatmaply)
load("data/tf0.rdata")
sapply(list(cust=A0,tid=X0,items=Z0), nrow)
##   cust    tid  items 
##  32241 119328 817182


pacman::p_load(Matrix, arules, arulesViz)
Z0$tid %>% n_distinct
## [1] 119422
Z0$prod %>% n_distinct
## [1] 23789
p = count(Z0, prod, sort=T)
pk = p$prod[1:2000]
Z = filter(Z0, prod %in% pk)
tr = as(split(Z[,"prod"], Z[,"tid"]), "transactions"); tr
## transactions in sparse format with
##  107797 transactions (rows) and
##  2000 items (columns)
rules <- apriori(tr, parameter=list(supp=0.0002, conf=0.5))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.5    0.1    1 none FALSE            TRUE       5   2e-04      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 21 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[2000 item(s), 107797 transaction(s)] done [0.13s].
## sorting and recoding items ... [2000 item(s)] done [0.01s].
## creating transaction tree ... done [0.03s].
## checking subsets of size 1 2 3 4 5 6 done [0.05s].
## writing ... [1001 rule(s)] done [0.01s].
## creating S4 object  ... done [0.01s].
summary(rules)
## set of 1001 rules
## 
## rule length distribution (lhs + rhs):sizes
##   2   3   4   5   6 
##  85 558 289  64   5 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   3.000   3.000   3.347   4.000   6.000 
## 
## summary of quality measures:
##     support            confidence        coverage              lift         
##  Min.   :0.0002041   Min.   :0.5000   Min.   :0.0002319   Min.   :   6.644  
##  1st Qu.:0.0002319   1st Qu.:0.5571   1st Qu.:0.0003711   1st Qu.:  60.475  
##  Median :0.0002969   Median :0.6364   Median :0.0004824   Median :  91.249  
##  Mean   :0.0005192   Mean   :0.6517   Mean   :0.0008293   Mean   : 210.995  
##  3rd Qu.:0.0005009   3rd Qu.:0.7358   3rd Qu.:0.0007885   3rd Qu.: 281.700  
##  Max.   :0.0064937   Max.   :0.9655   Max.   :0.0096478   Max.   :1131.303  
##      count       
##  Min.   : 22.00  
##  1st Qu.: 25.00  
##  Median : 32.00  
##  Mean   : 55.97  
##  3rd Qu.: 54.00  
##  Max.   :700.00  
## 
## mining info:
##  data ntransactions support confidence
##    tr        107797   2e-04        0.5
rx = subset(rules, subset = lift > 100 & count > 100) 
inspect(rx)
##      lhs                              rhs             support      confidence
## [1]  {4710030346110}               => {4710030346097} 0.0009462230 0.5513514 
## [2]  {4710030346110}               => {4710030346103} 0.0011132035 0.6486486 
## [3]  {4710030346110}               => {4710030346059} 0.0010575433 0.6162162 
## [4]  {4716114000312}               => {4716114000329} 0.0014100578 0.5527273 
## [5]  {4716114000329}               => {4716114000312} 0.0014100578 0.5333333 
## [6]  {4710030346097}               => {4710030346103} 0.0010946501 0.5244444 
## [7]  {4710030346097}               => {4710030346059} 0.0012152472 0.5822222 
## [8]  {4719859015061}               => {4713080626225} 0.0010575433 0.5066667 
## [9]  {4713080626225}               => {4719859015061} 0.0010575433 0.5044248 
## [10] {4711524000617}               => {4711524000419} 0.0009462230 0.5454545 
## [11] {4710030346103}               => {4710030346059} 0.0015584849 0.6536965 
## [12] {4710030346059}               => {4710030346103} 0.0015584849 0.5121951 
## [13] {4710008241119}               => {4710008241218} 0.0009462230 0.5204082 
## [14] {4711524000457}               => {4711524000419} 0.0009554997 0.5049020 
## [15] {4711524000457}               => {4711524000396} 0.0010111599 0.5343137 
## [16] {0076150430530}               => {0076150215281} 0.0010853734 0.5176991 
## [17] {4719090701051}               => {4719090790000} 0.0016326985 0.5770492 
## [18] {4711524000471}               => {4711524000396} 0.0010853734 0.5109170 
## [19] {4711524000495}               => {4711524000396} 0.0011503103 0.5000000 
## [20] {4711524000419}               => {4711524000396} 0.0015306548 0.6790123 
## [21] {4710321861209}               => {4710321861186} 0.0020779799 0.6086957 
## [22] {4710321871260}               => {4710321861186} 0.0018553392 0.5405405 
## [23] {4711524000907}               => {4711524000891} 0.0014378879 0.5636364 
## [24] {4711524000907}               => {4711524001041} 0.0016605286 0.6509091 
## [25] {0719859796124}               => {0719859796117} 0.0023284507 0.6952909 
## [26] {4719090790017}               => {4719090790000} 0.0032561203 0.8087558 
## [27] {4719090790000}               => {4719090790017} 0.0032561203 0.6212389 
## [28] {4711524000891}               => {4711524001041} 0.0019573829 0.5994318 
## [29] {4711524001041}               => {4711524000891} 0.0019573829 0.5275000 
## [30] {4719090701051,4719090790017} => {4719090790000} 0.0010297133 0.8473282 
## [31] {4719090701051,4719090790000} => {4719090790017} 0.0010297133 0.6306818 
## [32] {4710321861209,4710321871260} => {4710321861186} 0.0010204366 0.6358382 
## [33] {4710321861186,4710321871260} => {4710321861209} 0.0010204366 0.5500000 
## [34] {4711524000891,4711524000907} => {4711524001041} 0.0011039268 0.7677419 
## [35] {4711524000907,4711524001041} => {4711524000891} 0.0011039268 0.6648045 
## [36] {4711524000891,4711524001041} => {4711524000907} 0.0011039268 0.5639810 
##      coverage    lift     count
## [1]  0.001716189 264.1512 102  
## [2]  0.001716189 272.0715 120  
## [3]  0.001716189 202.5191 114  
## [4]  0.002551091 209.0608 152  
## [5]  0.002643858 209.0608 152  
## [6]  0.002087257 219.9749 118  
## [7]  0.002087257 191.3470 131  
## [8]  0.002087257 241.6688 114  
## [9]  0.002096533 241.6688 114  
## [10] 0.001734742 241.9686 102  
## [11] 0.002384111 214.8370 168  
## [12] 0.003042756 214.8370 168  
## [13] 0.001818232 177.5267 102  
## [14] 0.001892446 223.9791 103  
## [15] 0.001892446 155.6687 109  
## [16] 0.002096533 169.1103 117  
## [17] 0.002829392 110.0959 176  
## [18] 0.002124363 148.8522 117  
## [19] 0.002300621 145.6716 124  
## [20] 0.002254237 197.8257 165  
## [21] 0.003413824 124.5077 224  
## [22] 0.003432378 110.5667 200  
## [23] 0.002551091 172.6088 155  
## [24] 0.002551091 175.4151 179  
## [25] 0.003348887 132.8905 251  
## [26] 0.004026086 154.3034 351  
## [27] 0.005241333 154.3034 351  
## [28] 0.003265397 161.5424 211  
## [29] 0.003710678 161.5424 211  
## [30] 0.001215247 161.6627 111  
## [31] 0.001632698 156.6489 111  
## [32] 0.001604868 130.0597 110  
## [33] 0.001855339 161.1096 110  
## [34] 0.001437888 206.9007 119  
## [35] 0.001660529 203.5907 119  
## [36] 0.001957383 221.0744 119