1. The plotting code

head(faithful)          # load data and      
  eruptions waiting
1     3.600      79
2     1.800      54
3     3.333      74
4     2.283      62
5     4.533      85
6     2.883      55
D = faithful$eruptions  # copy it to a short name
##### draw the boarder of the plot
par(cex=0.7)
plot(0,0,xlim=c(1.5,5.25),ylim=c(0,1.1),xlab="Eruption(mins)", 
     ylab="PDF/CDF", main="Distribution of Eruption Time")
abline(h=1, col='lightgray', lwd=0.25, lty=2)


##### Data Rugs
# Empirical Rug PDF, 實證(數值標記)機率密度函數
rug(D)
# Empirical CDF, 實證(數值標記)累計機率密度函數
plot(ecdf(D), cex=0, verticals=T, lwd=2, col='darkgray', add=T)


##### Histogram PDF 直方圖機率密度函數 
Bins = 20                               # no. bins
bx = seq(min(D), max(D), length=Bins+1) # break sequence 
hist(D, col="#B3FFFF7F", border="white", 
     freq=F, breaks=bx, add=T)
abline(h=0, col='lightgray', lwd=0.25)
# Histogram CDF
adj = (bx[2] - bx[1])/2
steps = stepfun(bx-adj, c(0, sapply(bx, function(b) mean(D <= b))))
plot(steps, cex=0, col='#33CC337F', lwd=3, lty=1, add=T)


##### Smooth Density PDF 平滑機率密度函數 
Adjust = 0.5                       # set bandwidth
DEN = density(D, adjust = Adjust)  # create density function
lines(DEN, col='gold', lwd=3)      

# Smooth Density CDF 畫出累計機率密度函數  
PDF = approxfun(DEN$x, DEN$y, yleft=0, yright=0)
x = seq(1,6,0.1)
y = sapply(x, function(i) integrate(PDF, -Inf, i)$value)
lines(x, y, col='red', lwd=3, lty=2)

##### Color mark the region [x1, x2] 
x1 = 3.8; x2 = 4.8
# rect(x1,-0.1,x2,1.2,col= rgb(0,1,0,alpha=0.2),border=NA)
x = seq(x1, x2, length=100)
polygon(c(x, x2, x1),  c(PDF(x), 0, 0), col="#FF99003F", border=NA)

Estimating Probability with PDF

# Calculate Probability
(integrate(PDF, x1, x2)$value)
[1] 0.4755



2. Strategic Planning

CASE-01

If you ran a Tourist Helicopter Company in Yellowstone Park. It’d be important for you to predict the eruption time of the next eruption. If your business scenario could be simplify as a game in which you can bet $30 on a consecutive period of 60 seconds, and you’d win $100 if the next eruption time fall within the period you’d chosen.
Please use the density model of bandwidth=0.5 to decide …

  • Whether to join the game or not?
  • If you join the game, which time period would you choose?
  • In the period you’d chosen, what is the expected value of the game?
pacman::p_load(dplyr, ggplot2)
x1 = seq(0,5,0.1)
p = sapply(x1, function(x) (integrate(PDF, x, x+1)$value))
data.frame(start=x1, stop=1+x1, p) %>% top_n(1, p)
  start stop      p
1   3.9  4.9 0.4766



CASE-02

The game rule has changed. Now you can place multiple bets of $5 on any 10-second periods (0~10, 10~20, 20~30 second etc.) Still you’d win $100 if the next eruption time fall within the periods you’d chosen.

  • For the maximum expected value, how would you place your bets?
  • What is the overall expected value of your bets?
x = seq(1,6,1/6)
cx = sapply(x, function(i) integrate(PDF, -Inf, i)$value)
df = data.frame(
  start = x - 1/6, stop = x, 
  prob=cx -lag(cx) 
  ) %>% 
  mutate(payoff = 100*prob - 5) 
bets = df %>% filter(payoff > 0) %>% arrange(start)
bets
  start  stop    prob payoff
1 1.667 1.833 0.06366 1.3664
2 1.833 2.000 0.08226 3.2261
3 2.000 2.167 0.06967 1.9671
4 3.833 4.000 0.05836 0.8356
5 4.000 4.167 0.07652 2.6524
6 4.167 4.333 0.08932 3.9324
7 4.333 4.500 0.09576 4.5763
8 4.500 4.667 0.08938 3.9375
9 4.667 4.833 0.06844 1.8437
nrow(bets)
[1] 9
sum(bets$payoff)
[1] 24.34



CASE 03

Let’s define Expected ROI as the ratio of the expected return versus the amount of investment (the sum of your betting money) …

  • What is the strategy to maximize the Expected ROI?
  • Is the strategy for maximal Expected ROI the same as that of maximal Expected Return?
  • Which of the two strategic objectives is better? and Why?
df = df %>% arrange(desc(payoff)) %>% mutate(
  n_bets = row_number(),
  c_invest = n_bets * 5,
  c_payoff = cumsum(payoff),
  c_ROI = c_payoff/c_invest
  ) %>% round(3)
head(df,20) %>% round(3)
   start  stop  prob payoff n_bets c_invest c_payoff  c_ROI
1  4.333 4.500 0.096  4.576      1        5    4.576  0.915
2  4.500 4.667 0.089  3.938      2       10    8.514  0.851
3  4.167 4.333 0.089  3.932      3       15   12.446  0.830
4  1.833 2.000 0.082  3.226      4       20   15.672  0.784
5  4.000 4.167 0.077  2.652      5       25   18.325  0.733
6  2.000 2.167 0.070  1.967      6       30   20.292  0.676
7  4.667 4.833 0.068  1.844      7       35   22.135  0.632
8  1.667 1.833 0.064  1.366      8       40   23.502  0.588
9  3.833 4.000 0.058  0.836      9       45   24.338  0.541
10 2.167 2.333 0.049 -0.095     10       50   24.242  0.485
11 4.833 5.000 0.041 -0.870     11       55   23.372  0.425
12 3.667 3.833 0.040 -1.005     12       60   22.367  0.373
13 2.333 2.500 0.030 -2.043     13       65   20.324  0.313
14 3.500 3.667 0.027 -2.290     14       70   18.034  0.258
15 1.500 1.667 0.027 -2.301     15       75   15.733  0.210
16 3.333 3.500 0.018 -3.183     16       80   12.550  0.157
17 5.000 5.167 0.018 -3.195     17       85    9.355  0.110
18 2.500 2.667 0.014 -3.557     18       90    5.799  0.064
19 3.167 3.333 0.010 -3.972     19       95    1.827  0.019
20 2.667 2.833 0.008 -4.213     20      100   -2.386 -0.024
ggplot(df[1:20,], aes(c_ROI, c_payoff, color=c_invest)) + 
  geom_point(size=3) + 
  geom_text(aes(label=n_bets), color='black', nudge_y=0.6, size=2.5) +
  scale_color_gradientn(colors=c('seagreen','gold','gold','orange','tomato','red')) +
  labs(title="Strategy Space",color="Investment",y="Exp.Payoff",x="Exp.ROI")


🍭 Strategy Space:
  ■ Every point is a Strategy
  ■ Each dimension is a Performance Index
  ■ The ideas of Efficiency Frontier & Reasonable Strategies

🍭 Tradeoff and Constraints:
  ■ If there’s no constraints, what is the optimal strategy?
  ■ What if you have a capital limitation at c_invest < 40
  ■ What if you have another investment opportunity with ROI = 62.5%