第一組:多拉百寶店行銷計畫
行銷背景 + 1.開頭 敘述統計
各族群行銷略 + 2.各族群行銷策略
總體行銷策略 + 3.熱銷商品top5 + 4.購物籃分析 + 5.模型預測收益分析
rm(list=ls(all=TRUE))
pacman::p_load(Matrix, vcd, magrittr, readr, caTools, ggplot2, dplyr,reshape2, arules, arulesViz)
## package 'arules' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\yoyow\AppData\Local\Temp\RtmpAtYwyX\downloaded_packages
## package 'TSP' successfully unpacked and MD5 sums checked
## package 'qap' successfully unpacked and MD5 sums checked
## package 'gclus' successfully unpacked and MD5 sums checked
## package 'registry' successfully unpacked and MD5 sums checked
## package 'seriation' successfully unpacked and MD5 sums checked
## package 'visNetwork' successfully unpacked and MD5 sums checked
## package 'arulesViz' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\yoyow\AppData\Local\Temp\RtmpAtYwyX\downloaded_packages
Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8") # 避免中文亂碼
## [1] ""
load("data/tf0.rdata")
看一下每個年齡的購買頻率和平均消費來決定分群
A0 %>% filter(age!="a99") %>% # 濾掉沒有年齡資料的顧客('a99')
group_by(age) %>% summarise(
Group.Size = n(), # 族群人數
avg.Freq = mean(f), # 平均購買次數
avg.Revenue = sum(f*m)/sum(f) # 平均客單價
) %>%
ggplot(aes(y=avg.Freq, x=avg.Revenue)) +
geom_point(aes(col=age, size=Group.Size), alpha=0.5) +
geom_text(aes(label=age)) +
scale_size(range=c(5,25)) +
theme_bw() + theme(legend.position="none") +
ggtitle("年齡區隔特徵 (泡泡大小:族群人數)") +
ylab("平均購買次數") + xlab("平均客單價")
title
看到各群的平均消費金額還有消費頻率之後,我們去除第a99群離群值之後,把所有的年齡層分為四群。
接下來整理A0和Z0的資料框 + A0、Z0加上每筆顧客分群 + Z0轉換資料格式時間 + 算A0統計量
# 分四群
grp1 = c("a69")
grp2 = c("a49","a54","a59","a64")
grp3 = c("a24","a29")
grp4 = c("a34","a39","a44")
A0 <- A0 %>%
mutate(grp = ifelse(age %in% grp1, 1,
ifelse(age %in% grp2, 2,
ifelse(age %in% grp3, 3,
ifelse(age %in% grp4, 4, "other"))))) %>%
filter(grp != "other")
Z0 <- Z0 %>%
mutate(grp = ifelse(age %in% grp1, 1,
ifelse(age %in% grp2, 2,
ifelse(age %in% grp3, 3,
ifelse(age %in% grp4, 4, "other"))))) %>%
filter(grp != "other")
#轉換成時間
Z0$date <- as.Date(Z0$date,format = "%y-%m-%d")
#轉換成星期
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:arules':
##
## intersect, setdiff, union
## The following objects are masked from 'package:dplyr':
##
## intersect, setdiff, union
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
Z0$weekdays <- wday(Z0$date, label = TRUE)
Z0$weekdays <- wday(Z0$date)
Z0$weekdays <- weekdays(Z0$date)
# 算四群平均
A0_avg <-
A0 %>%
group_by(grp) %>% summarise(
Group.Size = n(), # 族群人數
avg.Freq = mean(f), # 平均購買次數
avg.Revenue = sum(f*m)/sum(f), # 平均客單價
avg.Contribute = avg.Freq*avg.Revenue*Group.Size # 平均貢獻
)
# 四群統計量平均
A0_avg #四群的平均購買頻率/客單價/平均貢獻
mean(A0$m) #四群平均每人每次花多少錢
## [1] 994.5671
# 把平均客單價長條圖畫出來
A0_avg_2 <- A0_avg %>% select(avg.Revenue,grp)
A0_avg_2 <- melt( A0_avg_2, id.vars='grp')
ggplot(A0_avg_2, aes(x=reorder(grp,-value), y=value, fill=variable)) +
geom_bar(stat='identity', position='dodge')+
labs( title= "平均客單價長條圖")
# 四群的泡泡圖
A0_avg %>%
ggplot(aes(y=avg.Freq, x=avg.Revenue)) +
geom_point(aes(col=grp, size=avg.Contribute ), alpha=0.5) + #泡泡大小是總營收貢獻
geom_text(aes(label= Group.Size)) + #裡面的數字是人數
scale_size(range=c(5,25)) +
theme_bw() + theme(legend.position="none") +
ggtitle("年齡區隔特徵 (泡泡大小:族群收益)") +
ylab("平均購買次數") + xlab("平均客單價")
我們檢視各族群的購買頻率與客單價,再算出各群的總體貢獻金額,制定行銷方案。
群 幾個人 購買頻率 客單價 平均貢獻 4 17585 3.584419 953.2083 60082628 2 7857 3.820033 810.7752 24334606 1 1363 4.269259 539.6585 3140273 3 4825 3.352746 755.8747 12227785
#算a34,a39,a44居住在不同地區顧客的「每人每次」平均客單價
Age34 <- subset(X0,age == "a34")
Age39 <- subset(X0,age == "a39")
Age44 <- subset(X0,age == "a44")
A34_44 <- rbind(Age34,Age39,Age44)
avg34_44 = A34_44 %>% group_by(area) %>% summarise(aov = mean(total))
p <- ggplot(data = avg34_44, aes(x = area, y = aov)) +
geom_bar(stat = "identity", fill = "orange") +
geom_text(aes(label=round(aov, 0)), vjust=1.6, color="black",
position = position_dodge(0.9), size=3.5) +
theme_minimal() +
labs(x="Area", y = "Average Order Value")
p
(1212+1072+952+1109)/4
## [1] 1086.25
其中居住在「z110,z114,z221及zOthers」的顧客最多。這四個地區的平均客單價為約為1082(約1000),訂定滿1000外送的服務。
左上角第一群平均購買次數高,但平均客單價低,我們定為靜香族群,行銷重點為提高平均客單價。另外此族群平均每人四個月來4.25次,每次消費約$550/次。此族群熱賣商品價格落在8-28塊,可以合理的推測常買的品項為麵包、飲品、零嘴…等日常用品,針對此族群我們推出加購價來拉高客單價。
中間綠色與藍色的客群,平均消費金額約在$600-$800之間,我們套用一樣的行銷策略,因為消費頻率偏低,消費力中等,故平均客單價與消費頻率都是我們必須提升的目標,我們推出集點抽獎的行銷策略。
位於泡泡圖中右下角,不太常來買,但消費金額算高,四個月頻率來買三次左右,每次消費金額約1000元,總收益貢獻是最高的,此群的消費目標是拉高消費頻率,我們推出限量聯名商品的行銷策略。
我們可以先看看每一群的總收益貢獻比例,發現總貢獻比例 第四群 > 第二、三群 > 第一群。
A0 %>%
group_by(grp) %>%
summarise(total_conrtibute = sum(m)) %>%
mutate(percent = round(100*total_conrtibute/sum(total_conrtibute), 1))%>%
arrange(desc(percent)) %>%
ggplot(aes(x="", y=percent, fill=grp))+
geom_bar(width = 1, stat = "identity")+
coord_polar("y", start=0) +
ggtitle("每群總收益貢獻比例") #+
# geom_text(aes(label = percent), size=5, color = "white")
為了希望民眾購買,有些商品會賠本售出,這裡我們檢視一下各群賠本吸引客群的情況,發現每群雖然賠本比例較少,但還是有賠本售出的情形,故後續我們在購物籃分析,希望挑出熱門商品的同時,也會加入成本考量,希望售出的商品至少毛利率不為零。
Z0 %>%
mutate(loss = ifelse( price < cost, (-1*qty),
ifelse(price > cost, (qty), 0))) %>%
mutate(if_pos = ifelse( price < cost, "neg",
ifelse(price > cost, "pos", "same"))) %>%
group_by(if_pos,grp) %>%
summarise(total_qty = n()) %>%
group_by(grp) %>%
mutate(percent = round(100*total_qty/sum(total_qty), 1))%>%
arrange(desc(percent)) %>%
ggplot(aes(x="", y=percent, fill=if_pos))+
geom_bar(width = 1, stat = "identity")+
coord_polar("y", start=0) +
ggtitle("每群總收益貢獻比例") +
facet_wrap( ~ grp)
為了瞭解各族的消費時間,我們畫出每群每個時期天數的平均消費次數。發現每個族群的消費次數在周末都是最高的,考量到有些消費族群可能是青壯年人,平時要上班或照顧小孩不方便外出,所以我們推出平日的外送服務,希望可以提高各族群的消費頻率。
另外由馬賽克圖可知年齡層為a34,a39及a44的顧客多(顯著)居住在z110,z114,z221及zOthers。
# 星期年齡與消費日期的對應
MOSA = function(formula, data) mosaic(formula, data, shade=T,
margins=c(0,1,0,0), labeling_args = list(rot_labels=c(90,0,0,0)),
gp_labels=gpar(fontsize=9), legend_args=list(fontsize=9),
gp_text=gpar(fontsize=7),labeling=labeling_residuals)
MOSA(~area+age, A0)
# 每周消費頻率熱圖
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
## The following object is masked from 'package:magrittr':
##
## extract
## The following objects are masked from 'package:Matrix':
##
## expand, pack, unpack
Z0_week <- Z0 %>%
group_by(grp,weekdays) %>%
summarise(buy_times = n()) %>%
spread(weekdays,buy_times)
library(pheatmap)
pheatmap(as.matrix(Z0_week[, -1]))
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝' in 'mbcsToSbcs': dot substituted for <98>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝' in 'mbcsToSbcs': dot substituted for <9f>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝' in 'mbcsToSbcs': dot substituted for <9c>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝' in 'mbcsToSbcs': dot substituted for <9f>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝' in 'mbcsToSbcs': dot substituted for <e4>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝' in 'mbcsToSbcs': dot substituted for <b8>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝' in 'mbcsToSbcs': dot substituted for <80>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <98>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <9f>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <9c>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <9f>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <e4>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <ba>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <8c>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <98>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <9f>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <9c>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <9f>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <e4>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <ba>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��鈭�' in 'mbcsToSbcs': dot substituted for <94>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝�' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝�' in 'mbcsToSbcs': dot substituted for <98>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝�' in 'mbcsToSbcs': dot substituted for <9f>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝�' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝�' in 'mbcsToSbcs': dot substituted for <9c>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝�' in 'mbcsToSbcs': dot substituted for <9f>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝�' in 'mbcsToSbcs': dot substituted for <e4>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝�' in 'mbcsToSbcs': dot substituted for <b8>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '��銝�' in 'mbcsToSbcs': dot substituted for <89>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <98>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <9f>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <9c>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <9f>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <e5>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <9b>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <9b>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <98>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <9f>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <9c>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <9f>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <e5>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <85>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <ad>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <98>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <9f>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <9c>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <9f>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <97>
## Warning in strwidth(t, units = "in", cex = fontsize_col/fontsize): conversion
## failure on '����' in 'mbcsToSbcs': dot substituted for <a5>
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '��銝' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '��銝' in 'mbcsToSbcs': dot substituted for <98>
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '��銝' in 'mbcsToSbcs': dot substituted for <9f>
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '��銝' in 'mbcsToSbcs': dot substituted for <e6>
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '��銝' in 'mbcsToSbcs': dot substituted for <9c>
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '��銝' in 'mbcsToSbcs': dot substituted for <9f>
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '��銝' in 'mbcsToSbcs': dot substituted for <e4>
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '��銝' in 'mbcsToSbcs': dot substituted for <b8>
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '��銝' in 'mbcsToSbcs': dot substituted for <80>
p.s.熱圖由上而下順序是 + 第四群 + 第二群 + 第一群 + 第三群
找出各族群經常購買的top5商品,再觀察能否對商品做促銷。
#四個族群前五名熱門商品放在一起看
Z0 %>% group_by(grp,prod) %>%
summarise(sum_qty = sum(qty)) %>%
arrange(desc(sum_qty)) %>%
top_n(5) %>%
ggplot(aes(x=prod,y=sum_qty))+
geom_col(stat = "identity")+
theme(axis.text.x = element_text(angle = 90))+
facet_wrap(~grp, ncol=1)
## Selecting by sum_qty
## Warning: Ignoring unknown parameters: stat
#1號族群購買的前五名產品
Z0 %>%
group_by(grp,prod) %>%
filter(grp==1) %>%
summarise(sum_qty = sum(qty),
cost = sum(cost),
price = sum(price)) %>%
arrange(desc(price)) %>%
top_n(5,sum_qty) %>%
mutate(cc=c(180,28,14,12,8),
pp=c(133,22,16,19,11))
#2號族群購買的前五名產品
Z0 %>%
group_by(grp,prod) %>%
filter(grp==2) %>%
summarise(sum_qty = sum(qty),
cost = sum(cost),
price = sum(price)) %>%
arrange(desc(price)) %>%
top_n(5,sum_qty) %>%
mutate(cc=c(145,28,14,12,8), #商品1價格落在133~176
pp=c(133,22,16,19,11))
#3號族群購買的前五名產品
Z0 %>%
group_by(grp,prod) %>%
filter(grp==3) %>%
summarise(sum_qty = sum(qty),
cost = sum(cost),
price = sum(price)) %>%
arrange(desc(price)) %>%
top_n(5,sum_qty) %>%
mutate(cc=c(96,59,28,14,12),
pp=c(104,90,22,16,19))
#4號族群購買的前五名產品
Z0 %>%
group_by(grp,prod) %>%
filter(grp==4) %>%
summarise(sum_qty = sum(qty),
cost = sum(cost),
price = sum(price)) %>%
arrange(desc(price)) %>%
top_n(5,sum_qty) %>%
mutate(cc=c(59,28,14,12,8),
pp=c(90,22,16,19,11))
數據上我們可以看出,三個族群的熱門商品很多都是虧本賣的 所以我們找出三個族群都賣得很好也可以賺錢的東西做為加購的品項: + 加10元購買,原價16元的指定零嘴 + 加13元購買,原價19元的指定麵包 + 加15元購買,原價22元的指定飲品
因為消費者通常會一次購買2-3件商品,可以使用購物籃分析消費者常一起買的商品,在這裡我們使用apriori
演算法,找出品項間的關聯法則(Association Rules),找出適合一起促銷的商品。
設定條件,找到會帶來高營收品項(rhs
)的關聯規則(lhs => rhs
):
support
: lhs
品項被購買的基礎機率confidence
: lhs
品項被購買時rhs
被購買的機率lift
: lhs
品項被購買時,rhs
被購買所增加機率的倍數count
: 交易筆數(交易筆數如果太少,分析就沒有實質意義)# 將毛利將商品品項作排列
tapply((Z0$price-Z0$cost), Z0$prod, sum) %>% sort(dec=T) %>% names -> TOP
# 將品項作成`transactions`物件(`tr`)
tr = as(split(Z0[,"prod"], Z0[,"tid"]), "transactions"); tr
## transactions in sparse format with
## 115042 transactions (rows) and
## 23696 items (columns)
# 找出關連法則
R <- apriori(tr, parameter=list(supp=0.0001, conf=0.25))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.25 0.1 1 none FALSE TRUE 5 1e-04 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 11
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[23696 item(s), 115042 transaction(s)] done [0.46s].
## sorting and recoding items ... [10010 item(s)] done [0.01s].
## creating transaction tree ... done [0.05s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 done [0.61s].
## writing ... [9545 rule(s)] done [0.14s].
## creating S4 object ... done [0.04s].
summary(R)
## set of 9545 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4 5 6 7 8 9
## 1132 3229 2339 1369 874 448 136 18
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 3.000 4.000 3.958 5.000 9.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.0001043 Min. :0.2500 Min. :0.0001043 Min. : 3.504
## 1st Qu.:0.0001217 1st Qu.:0.4412 1st Qu.:0.0001912 1st Qu.: 76.935
## Median :0.0001738 Median :0.6364 Median :0.0002695 Median : 278.304
## Mean :0.0002412 Mean :0.6427 Mean :0.0004721 Mean : 557.074
## 3rd Qu.:0.0002347 3rd Qu.:0.8571 3rd Qu.:0.0004259 3rd Qu.:1085.767
## Max. :0.0058500 Max. :1.0000 Max. :0.0135777 Max. :5651.186
## count
## Min. : 12.00
## 1st Qu.: 14.00
## Median : 20.00
## Mean : 27.75
## 3rd Qu.: 27.00
## Max. :673.00
##
## mining info:
## data ntransactions support confidence
## tr 115042 1e-04 0.25
# 設定條件找出高收益商品
rx = subset(R, rhs %in% TOP[1:200] & lift >= 50 & count > 100 )
df = inspect(rx)
## lhs rhs support confidence coverage lift count
## [1] {4716114000312} => {4716114000329} 0.0013038716 0.5514706 0.002364354 224.97262 150
## [2] {4716114000329} => {4716114000312} 0.0013038716 0.5319149 0.002451279 224.97262 150
## [3] {4710154015138} => {4710154015206} 0.0010170199 0.3750000 0.002712053 51.54211 117
## [4] {4710011402026} => {4710011402019} 0.0027815928 0.6736842 0.004128927 90.75173 320
## [5] {4713754987614} => {4713754987607} 0.0011560995 0.3057471 0.003781228 78.33800 133
## [6] {4713754987607} => {4713754987614} 0.0011560995 0.2962138 0.003902922 78.33800 133
## [7] {4710011401142} => {4710011406123} 0.0015211836 0.4186603 0.003633456 51.51178 175
## [8] {4710088414328} => {4710088414311} 0.0017819579 0.4691076 0.003798613 87.18428 205
## [9] {4710085172702} => {4710085172696} 0.0024686636 0.5430210 0.004546166 61.66853 284
## [10] {4710254049323} => {4710254049521} 0.0020340397 0.4293578 0.004737400 54.82151 234
## [11] {4710011409056} => {4710011406123} 0.0026077433 0.4137931 0.006302046 50.91293 300
## [12] {4710011409056} => {4710011401128} 0.0044070861 0.6993103 0.006302046 51.50452 507
## [13] {4710085120093} => {4710085172696} 0.0037725352 0.4988506 0.007562455 56.65229 434
## [14] {4710011401135} => {4710011401128} 0.0058500374 0.7578829 0.007718920 55.81841 673
## [15] {4710011401142,
## 4710011409056} => {4710011401128} 0.0011647920 0.7362637 0.001582031 54.22615 134
## [16] {4710011401135,
## 4710011401142} => {4710011406123} 0.0008866327 0.4903846 0.001808035 60.33671 102
## [17] {4710011401135,
## 4710011401142} => {4710011401128} 0.0013647190 0.7548077 0.001808035 55.59192 157
## [18] {4710011401142,
## 4710011405133} => {4710011401128} 0.0012082544 0.6847291 0.001764573 50.43060 139
## [19] {4710011401128,
## 4710011401142} => {4710011406123} 0.0010083274 0.4677419 0.002155734 57.55077 116
## [20] {4710085120093,
## 4710085172702} => {4710085172696} 0.0013734114 0.6583333 0.002086195 74.76405 158
## [21] {4710085120093,
## 4710085172702} => {4710085120628} 0.0013038716 0.6250000 0.002086195 54.55330 150
## [22] {4710085172696,
## 4710085172702} => {4710085120628} 0.0015211836 0.6161972 0.002468664 53.78494 175
## [23] {4710085120628,
## 4710085172702} => {4710085172696} 0.0015211836 0.6140351 0.002477356 69.73329 175
## [24] {4710011401135,
## 4710011409056} => {4710011406123} 0.0015907234 0.4753247 0.003346604 58.48374 183
## [25] {4710011401135,
## 4710011409056} => {4710011401128} 0.0027120530 0.8103896 0.003346604 59.68556 312
## [26] {4710011405133,
## 4710011409056} => {4710011406123} 0.0014690287 0.4956012 0.002964135 60.97856 169
## [27] {4710011405133,
## 4710011409056} => {4710011401128} 0.0022600442 0.7624633 0.002964135 56.15577 260
## [28] {4710011406123,
## 4710011409056} => {4710011401128} 0.0019731924 0.7566667 0.002607743 55.72884 227
## [29] {4710011401128,
## 4710011409056} => {4710011406123} 0.0019731924 0.4477318 0.004407086 55.08872 227
## [30] {4710085120093,
## 4710085172696} => {4710085120628} 0.0021731194 0.5760369 0.003772535 50.27954 250
## [31] {4710085120093,
## 4710085120628} => {4710085172696} 0.0021731194 0.5434783 0.003998540 61.72046 250
## [32] {4710011401135,
## 4710011405133} => {4710011406123} 0.0016428783 0.4489311 0.003659533 55.23629 189
## [33] {4710011401135,
## 4710011405133} => {4710011401128} 0.0028511326 0.7790974 0.003659533 57.38087 328
## [34] {4710011401135,
## 4710011406123} => {4710011401128} 0.0024773561 0.8096591 0.003059752 59.63175 285
## [35] {4710011401128,
## 4710011401135} => {4710011406123} 0.0024773561 0.4234770 0.005850037 52.10443 285
## [36] {4710011405133,
## 4710011406123} => {4710011401128} 0.0022339667 0.7158774 0.003120599 52.72469 257
## [37] {4710011401128,
## 4710011405133} => {4710011406123} 0.0022339667 0.4370748 0.005111177 53.77750 257
## [38] {4710011401135,
## 4710011401142,
## 4710011409056} => {4710011401128} 0.0009127101 0.8536585 0.001069175 62.87233 105
## [39] {4710085120093,
## 4710085172696,
## 4710085172702} => {4710085120628} 0.0009040177 0.6582278 0.001373411 57.45360 104
## [40] {4710085120093,
## 4710085120628,
## 4710085172702} => {4710085172696} 0.0009040177 0.6933333 0.001303872 78.73885 104
## [41] {4710011401135,
## 4710011405133,
## 4710011409056} => {4710011406123} 0.0009996349 0.5299539 0.001886268 65.20530 115
## [42] {4710011401135,
## 4710011405133,
## 4710011409056} => {4710011401128} 0.0015820309 0.8387097 0.001886268 61.77134 182
## [43] {4710011401135,
## 4710011406123,
## 4710011409056} => {4710011401128} 0.0013386415 0.8415301 0.001590723 61.97907 154
## [44] {4710011401128,
## 4710011401135,
## 4710011409056} => {4710011406123} 0.0013386415 0.4935897 0.002712053 60.73107 154
## [45] {4710011405133,
## 4710011406123,
## 4710011409056} => {4710011401128} 0.0011560995 0.7869822 0.001469029 57.96160 133
## [46] {4710011401128,
## 4710011405133,
## 4710011409056} => {4710011406123} 0.0011560995 0.5115385 0.002260044 62.93947 133
## [47] {4710011401135,
## 4710011405133,
## 4710011406123} => {4710011401128} 0.0013734114 0.8359788 0.001642878 61.57022 158
## [48] {4710011401128,
## 4710011401135,
## 4710011405133} => {4710011406123} 0.0013734114 0.4817073 0.002851133 59.26906 158
在這裡依據銷售次數(count)挑選出五種銷售數量較高的商品。
lhs rhs support confidence lift count {4710011401135} => {4710011401128} 0.005862 0.753 54.9 700 {4710011409056} => {4710011401128} 0.004446 0.700 51.0 531 {4710085120093} => {4710085172696} 0.003743 0.498 57.2 447 {4710011402026} => {4710011402019} 0.002822 0.674 90.2 337 {4710011401135,4710011405133}=> {4710011401128} 0.002839 0.772 56.3 339
title
算一下挑選出要一起行銷的產品,是否售價都大於成本,發現雖然ㄉ受價位在30-50元的低價品,但整體上毛利率都為正,不會造成店家太大的負擔。
goods = c("4710011401135","4710011401128","4710011409056","4710085120093","4710085172696","4710011402026","4710011402019","4710011405133")
Z0_basket <- Z0 %>%
filter( prod %in% goods) %>%
mutate(loss = ifelse( price < cost, (-1*qty),
ifelse(price > cost, (qty), 0))) %>%
mutate(if_pos = ifelse( price < cost, "neg",
ifelse(price > cost, "pos", "same"))) %>%
group_by(prod) %>%
summarise(
#buy_times = n(),
mean_price = mean(price),
mean_cost = mean(cost)
#sum_price = sum(price)
)
#Z0_basket_2 <- gather(Z0_basket,category,value,buy_times,mean_price,sum_price)
Z0_basket_2 <- gather(Z0_basket,category,value,mean_price,mean_cost)
ggplot(data=Z0_basket_2, aes(x=reorder(prod,value), y=value, fill=category)) +
geom_bar(stat="identity", position=position_dodge())+
scale_fill_brewer(palette="Paired")+
theme_minimal()+ theme(axis.text.x = element_text(angle = 25))
options(scipen=10)
pacman::p_load(latex2exp,Matrix,dplyr,tidyr,ggplot2,caTools)
rm(list=ls(all=TRUE))
load("data/tf4.rdata")
DP = function(x,m0,b0,a0) {m0*plogis((10/a0)*(x-b0))}
par(mar=c(4,4,2,1),mfrow=c(1,2),cex=0.7)
curve(DP(x,m=0.20,b=15,a=20), 0, 30, lwd=2, ylim=c(0, 0.25),
main="F( x | m=0.2, b=15, a=20 )", ylab="delta P")
abline(h=seq(0,0.2,0.05),v=seq(0,30,5),col='lightgrey',lty=2)
m=0.5; b=40; a=40; x=200
dp = DP(x,m,b,a)
dp = ifelse(B$Buy+dp>1, 1-B$Buy, dp)
eR = dp*B$Rev - x
hist(eR)
m=0.5; b=40; a=40; X = seq(10,120,1)
sapply(X, function(x) {
dp = DP(x,m,b,a)
dp = ifelse(B$Buy+dp>1, 1-B$Buy, dp)
eR = dp*B$Rev - x
c(x=x, eReturn=sum(eR), N=sum(eR > 0), eReturn2=sum(eR[eR > 0]))
}) %>% t %>% data.frame %>%
gather('key','value',-x) %>%
ggplot(aes(x=x, y=value, col=key)) +
geom_hline(yintercept=0,linetype='dashed') +
geom_line(size=1.5,alpha=0.5) +
facet_wrap(~key,ncol=1,scales='free_y') + theme_bw()
我們假設所有顧客會回購的機率為0.5並放入模型做預測,發現當我們所投入的行銷工具成本為60元時,可獲得6,000,000元的報酬,如此一來即可使成本效益最大化。
此外,我們也發現,因為eReturn(對全部人做)跟eReturn2(只對>0的人做)的報酬都是六百萬,表示我們可以對所有的顧客都投入60元的行銷成本。且根據模型可以算出在對2500個人做時獲得6,000,000元的報酬,表示平均每位顧客的期望報酬為160元。
因此根據此模型的結果,我們針對下個月政府即將發行的「振興券」,搶先其他商家,先將顧客的券留住。只要在本店用振興券消費200元,即可獲得60元的折價券!