Research topic: Logistics between states in Brazil

Load packages and graphics

pacman::p_load(sf,sp,stringr,readxl,dplyr,leaflet,leaflet.minicharts)

# leaflet world map url
tilesURL <- "http://server.arcgisonline.com/ArcGIS/rest/services/Canvas/World_Light_Gray_Base/MapServer/tile/{z}/{y}/{x}"

# Geo-Spatial Data
B = read_excel("../../data/BR.xlsx")
B$GDP = B$GDP %>%
  str_remove(",") %>% str_extract("^[0-9\\.]+") %>% as.numeric
G = st_read("../../data/BRUFE250GC_SIR.shp")[B$sfid,]
B = cbind(B, st_centroid(G) %>% st_coordinates)
names(B)[12:13] = c('lng','lat')

Import the location of customers and vendors

load("../../data/olist.rdata")
O2 = left_join(I[,c(1,4)], S[,c(1,4)]) %>% 
  left_join(O) %>% 
  left_join(C[,c(1,5)]) %>% 
  rename(from=seller_state, to=customer_state)


Quantity of goods in each state

Calculate the amount of intra, export, and import data of states in Brazil

states = sapply(unique(O2$to), function(s) c(
  Intra = sum(s == O2$from & s == O2$to, na.rm=T),
  Import = sum(s != O2$from & s == O2$to, na.rm=T),
  Export = sum(s == O2$from & s != O2$to, na.rm=T)
  )) %>% t %>% data.frame %>% 
  mutate(
    total = Intra + Import + Export,
    stCode = unique(O2$to)) %>% 
  left_join(B[,c(3,12,13)]) %>% data.frame

Logistics of 10 southeastern provinces vs. Sao Paulo (SP)

s10 = c("MG","PR","RJ","RS","SC","SP","DF","GO","ES","MS")
i10 = which(B$stCode %in% s10)
S10 = subset(states, stCode %in% s10)

Logistics between 10 southeastern provinces

mx = xtabs(~from+to, subset(O2, from%in%s10 & to%in%s10)); mx
    to
from    DF    ES    GO    MG    MS    PR    RJ    RS    SC    SP
  DF    61    22    37   108     8    33   110    23    26   336
  ES     1     9     3    52     0    16    74    21    14   119
  GO    30     5    39   100     9    16    62    14    11   141
  MG   221   206   173  1709    71   415  1329   315   288  2964
  MS     4     0     1     4     0     4     9     1     1    19
  PR   178   117    94   938    66   827  1147   696   451  3410
  RJ   114   111   127   622    43   234  1122   227   138  1518
  RS    50    29    26   219    17   149   267   327   131   762
  SC    75    54    77   483    31   324   540   331   311  1506
  SP  1599  1647  1701  8703   561  3667  9688  4194  2749 36192

The logistics between the 10 southeastern provinces accounted for 86% of Brazil

sum(mx)/nrow(O2)
[1] 0.86839

The logistics of Sao Paulo (SP) accounts for 81% of Brazil

mean(O2$from=="SP" | O2$to=="SP", na.rm=T)
[1] 0.81313


Static logistics map

Data for pie chart

S10 = S10 %>% mutate(
  lngSP= B$lng[B$stCode == "SP"],
  latSP = B$lat[B$stCode == "SP"]) %>% 
  arrange(stCode)
S10$fromSP = mx["SP",]
S10$toSP = mx[,"SP"]
S9 = subset(S10, stCode != "SP")

Data for flow chart (n > 300)

N = 300
d = O2 %>% filter(from%in%s10 & to%in%s10) %>% 
  group_by(from, to) %>% summarise(
    n = n(),
    rDelivery = mean(order_status=="delivered"),
    rDelay = mean(order_delivered_customer_date > 
                    order_estimated_delivery_date, na.rm=T)
    ) %>% 
  filter(n >= N) %>% 
  arrange(desc(rDelay)) %>% 
  left_join(B[,c(3,12,13)], by=c("from" = "stCode")) %>% 
  left_join(B[,c(3,12,13)], by=c("to" = "stCode"))
`summarise()` regrouping output by 'from' (override with `.groups` argument)
K = round(100*d$rDelay) %>% range # K
cols = colorRampPalette(c('yellow','red'))(K[2]-K[1]+1)
d$color = cols[ round(100*d$rDelay) - K[1] + 1   ]
# table(d$color)
sprintf("min. flow (order.items) = %d", N)
[1] "min. flow (order.items) = 300"
sprintf("range of delay percentage (%%) = [%d, %d]", K[1], K[2])
[1] "range of delay percentage (%) = [2, 15]"

Logistics of Sao Paulo (SP)

dx = d %>% filter(to=="SP" | from=="SP", from!=to) %>% mutate_at(
  vars(lng.x:lat.y), ~ifelse(from>to, ., .+0.2))

basemap = leaflet(width = "100%", height = "800px") %>% 
  addTiles(tilesURL) %>% 
  addPolylines(data=G[i10,], color="gray", weight=2, fillOpacity=0) 

basemap %>% 
  addFlows(
    dx$lng.x, dx$lat.x, dx$lng.y, dx$lat.y, 
    flow=dx$n, color = dx$color, opacity = 0.6, 
    maxThickness = 10) %>% 
  addMinicharts(
    S10$lng, S10$lat, type = "pie",
    chartdata = S10[,c("Intra","Import","Export")], 
    colorPalette = c("lightgray", "purple", "green"), 
    width = 100 * sqrt(S10$total) / sqrt(max(S10$total))
    )

Logistics of other nine provinces in Brazil

dx = d %>% filter(to!="SP", from!="SP", from!=to) %>% mutate_at(
  vars(lng.x:lat.y), ~ifelse(from>to, ., .-0.2) )

basemap %>% 
  addFlows(
    dx$lng.x, dx$lat.x, dx$lng.y, dx$lat.y, 
    flow=dx$n, color = dx$color, opacity = 0.6, 
    maxThickness = 10) %>% 
  addMinicharts(
    S9$lng, S9$lat, type = "pie",
    chartdata = S9[,c("Intra","Import","Export")], 
    colorPalette =  c("lightgray", "purple", "green"), 
    width = 80 * sqrt(S9$total) / sqrt(max(S9$total))
    )


Dynamic logistics map

Create time information

O2$month = format(O2$order_delivered_customer_date, "%Y-%m")
# table(O2$month)

dx = do.call(rbind, lapply(names(table(O2$month))[6:23], function(m) {
  df = S10 
  or = O2  %>% filter(month == m, from%in%s10 | to%in%s10) %>% 
  mutate_at(vars(from,to), factor, levels=sort(s10)) %>% 
  mutate(delay = order_delivered_customer_date > order_estimated_delivery_date)
  mx = xtabs(~from+to, or)
  df$Intra = diag(mx)
  df$Import = colSums(mx) - diag(mx)
  df$Export = rowSums(mx) - diag(mx)
  df$total = df$Intra + df$Import + df$Export
  df$fromSP = mx["SP",]
  df$toSP = mx[,"SP"]
  df$fromDelay = tapply(or$delay, or$to, mean, na.rm=T)
  df$toDelay = tapply(or$delay, or$from, mean, na.rm=T)
  df$month = m
  df
  }))
# summary(dx)

# coloring 
x = c(-Inf,0.05,0.10,0.15,Inf) 
cols = c("green","orange","magenta","red")
dx$fromColor = cut(dx$fromDelay, x, cols) %>% as.character
dx$toColor = cut(dx$toDelay, x, cols) %>% as.character

# lng/lat adjustment
dx2 = dx %>% 
  mutate_at(vars(lat,latSP), 
            ~ifelse(stCode%in%c("DF","GO"), ., .+0.2) ) %>% 
  mutate_at(vars(lng,lngSP), 
            ~ifelse(stCode%in%c("DF","GO"), .+0.2, .) )

Intra, import and export data of Sao Paulo (SP) in 2017-03 ~ 2018-08

basemap %>% 
  addFlows(
    dx$lngSP, dx$latSP, dx$lng, dx$lat, 
    flow=dx$fromSP, color = dx$fromColor, 
    opacity = 0.4, maxThickness = 40, time = dx$month) %>% 
  addFlows(
    dx2$lng, dx2$lat, dx2$lngSP, dx2$latSP, 
    flow=dx2$toSP, color = dx2$toColor, 
    opacity = 0.4, maxThickness = 40, time = dx2$month) %>% 
  addMinicharts(
    dx$lng, dx$lat, type = "pie", time = dx$month,
    chartdata = dx[,c("Intra","Import","Export")],
    colorPalette = c("lightgray", "purple", "green"),
    width = 100 * sqrt(dx$total) / sqrt(max(dx$total))
    )