::p_load(dplyr, ggplot2, car, vcd, GGally, mvtnorm) pacman
= read.csv('data/wholesales.csv')
W $Channel = factor( paste0("Ch",W$Channel) )
W$Region = factor( paste0("Reg",W$Region) )
W3:8] = lapply(W[3:8], log, base=10)
W[summary(W)
Channel Region Fresh Milk Grocery
Ch1:298 Reg1: 77 Min. :0.477 Min. :1.74 Min. :0.477
Ch2:142 Reg2: 47 1st Qu.:3.495 1st Qu.:3.19 1st Qu.:3.333
Reg3:316 Median :3.930 Median :3.56 Median :3.677
Mean :3.792 Mean :3.53 Mean :3.666
3rd Qu.:4.229 3rd Qu.:3.86 3rd Qu.:4.028
Max. :5.050 Max. :4.87 Max. :4.968
Frozen Detergents_Paper Delicassen
Min. :1.40 Min. :0.477 Min. :0.477
1st Qu.:2.87 1st Qu.:2.409 1st Qu.:2.611
Median :3.18 Median :2.912 Median :2.985
Mean :3.17 Mean :2.947 Mean :2.895
3rd Qu.:3.55 3rd Qu.:3.594 3rd Qu.:3.260
Max. :4.78 Max. :4.611 Max. :4.681
B1a. 點狀圖 Simple Scatter Plot
par(cex=0.7, mar=c(4,4,2,2))
plot(W$Milk, W$Grocery)
B1b. 點狀圖+回歸線 Scatter Plot with Regrssion Line
ggplot(W, aes(x=Milk, y=Grocery)) +
geom_point(alpha=0.3) +
geom_smooth(method="lm")
`geom_smooth()` using formula = 'y ~ x'
B2. 回歸係數 Correlation \[r_{xy}=\frac{Cov(x,y)}{\sigma_x \sigma_y} =\frac{\Sigma_{i=1}^n(x_i - \bar{x})(y_i - \bar{y})} {\sqrt{\Sigma_{i=1}^n(x_i - \bar{x})^2} \sqrt{\Sigma_{i=1}^n(y_i - \bar{y})^2}}\]
cor(W$Milk, W$Grocery)
[1] 0.75885
B3. 回歸係數檢定 Correlation Test
cor.test(W$Milk, W$Grocery)
Pearson's product-moment correlation
data: W$Milk and W$Grocery
t = 24.4, df = 438, p-value <0.0000000000000002
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.71617 0.79588
sample estimates:
cor
0.75885
💡 : 簡單講,\(p-value\)大致可以視為「沒有關係」的機率(準確的說它並不是這樣)
💡 : 如果「沒有關係」的機率很小,我們就可以推論這關係是「顯著」的
💡 : \(p-value\): 給定虛無假設(\(H_0:r=0\)),檢定統計量(\(t\))大於觀察值(\(24.4\))的機率
🗿 : 當\(p = 0.05\)時,對立假設(\(H_A\))為真的機率是?
💡 : 貝氏定理:\(P(A|B) = P(B|A) \cdot P(A) / P(B)\)
B4. Simulating Bi-Variate Normal Distibution
par(cex=0.7, mar=c(1,1,1,1), mfrow=c(3,3))
for(r in seq(-1,1,0.25)) {
= c(0,0)
mu = matrix(c(1,r,r,1),nrow=2) # covariance matrix
sigma rmvnorm(500, mu, sigma) %>% plot(col='gray')
text(0,0,r,cex=3,col='blue',font=2)
}
C1. Matrix of Correlation Coefficients
cor(W[,3:8]) %>% round(3)
Fresh Milk Grocery Frozen Detergents_Paper Delicassen
Fresh 1.000 -0.020 -0.133 0.384 -0.156 0.255
Milk -0.020 1.000 0.759 -0.055 0.678 0.338
Grocery -0.133 0.759 1.000 -0.165 0.796 0.236
Frozen 0.384 -0.055 -0.165 1.000 -0.212 0.255
Detergents_Paper -0.156 0.678 0.796 -0.212 1.000 0.167
Delicassen 0.255 0.338 0.236 0.255 0.167 1.000
💡 : 相關性矩陣:(a)對角線等於1;(b)左下、右上對稱
C1. Matrix of Scatter Plots
::scatterplotMatrix(W[,3:8]) car