Déplacements et COVID

Fréquences de déplacements

Lecture / recodage

Code
library(readxl)
library(dplyr)
library(ggplot2)
library(kableExtra)
COMOPOCOV = read_excel("data/COMOPOCOV_transmis Etienne et Emmanuel(1).xlsx")

# Q1 -> Q22 contexte
# Q23 -> Q34 Avant
# Q35 -> Q47 Pendant
# Q48 -> Q59 Après
# Q60 -> Q64 Futur


lab_freq = c("Jamais","Moins de 1","1 à 3 / mois",
             "1 à 3 / sem","4 à 5 / semaine","Tous les jours")

lab_mode =  c("Conducteur","Passager","TC","deux-roues",
              "La marche","Le vélo","Trottinette","Pas")

COMAVANT_FREQ = COMOPOCOV |> select(uuid,Q28bis_1:Q28bis_8) |> 
  mutate(across(Q28bis_1:Q28bis_8, ~ factor(lab_freq[.x],levels=lab_freq)))
colnames(COMAVANT_FREQ)=c("uiid",paste0("FREQMO",1:8))


COMAVANT_MODE =  COMOPOCOV |> select(uuid,Q29_1:Q29_8) |> 
  mutate(across(Q29_1:Q29_8, ~ factor(lab_mode[.x],levels=lab_mode)))
colnames(COMAVANT_MODE)=c("uiid",paste0("MODEMO",1:8))

COMAPRES_FREQ = COMOPOCOV |> select(uuid,Q48_1:Q48_8) |> 
  mutate(across(Q48_1:Q48_8, ~ factor(lab_freq[.x],levels=lab_freq)))
colnames(COMAPRES_FREQ)=c("uiid",paste0("FREQMO",1:8))

COMAPRES_MODE =  COMOPOCOV |> select(uuid,Q49_1:Q49_8) |> 
  mutate(across(Q49_1:Q49_8, ~ factor(lab_mode[.x],levels=lab_mode)))
colnames(COMAPRES_MODE)=c("uiid",paste0("MODEMO",1:8))

Xmob=bind_rows(COMAPRES_FREQ[,c(0,1,4,5,7,8)+1] |> left_join(COMAPRES_MODE[,c(0,1,4,5,7,8)+1]),
COMAVANT_FREQ[,c(0,1,4,5,7,8)+1]  |> left_join(COMAVANT_MODE[,c(0,1,4,5,7,8)+1])) 

gg_evol = COMAVANT_FREQ|> rename_all(~paste0("AVANT_",.)) |> left_join(
  COMAPRES_FREQ|> rename_all(~paste0("APRES_",.)),
  by=c("AVANT_uiid"="APRES_uiid"))

Motif travail

Code

kable(table(COMAVANT_FREQ$FREQMO1,COMAPRES_FREQ$FREQMO1))
Jamais Moins de 1 1 à 3 / mois 1 à 3 / sem 4 à 5 / semaine Tous les jours
Jamais 106 11 11 6 12 15
Moins de 1 15 17 12 7 6 5
1 à 3 / mois 12 14 25 14 16 5
1 à 3 / sem 6 10 23 77 24 5
4 à 5 / semaine 24 4 14 74 131 10
Tous les jours 39 4 8 38 52 116
Code
ggplot(gg_evol|> count(AVANT_FREQMO1,APRES_FREQMO1,name = "nm")  |> add_count(AVANT_FREQMO1,wt=nm))+
  geom_point(aes(y=AVANT_FREQMO1,x=APRES_FREQMO1,size=nm,color=nm/n))+scale_color_distiller(palette="Reds",direction=1)+
  theme_bw()

Motif Alimentaire

Code
kable(table(COMAVANT_FREQ$FREQMO4,COMAPRES_FREQ$FREQMO4))
Jamais Moins de 1 1 à 3 / mois 1 à 3 / sem 4 à 5 / semaine Tous les jours
Jamais 12 4 2 5 2 3
Moins de 1 4 14 19 4 3 1
1 à 3 / mois 8 13 155 56 12 1
1 à 3 / sem 6 14 76 332 30 8
4 à 5 / semaine 2 5 17 39 33 5
Tous les jours 1 7 9 14 19 33
Code
ggplot(gg_evol|> count(AVANT_FREQMO4,APRES_FREQMO4,name = "nm")  |> add_count(AVANT_FREQMO4,wt=nm))+
  geom_point(aes(y=AVANT_FREQMO4,x=APRES_FREQMO4,size=nm,color=nm/n))+scale_color_distiller(palette="Reds",direction=1)+
  theme_bw()

Motif Non-alimentaire

Code
kable(table(COMAVANT_FREQ$FREQMO5,COMAPRES_FREQ$FREQMO5))
Jamais Moins de 1 1 à 3 / mois 1 à 3 / sem 4 à 5 / semaine Tous les jours
Jamais 22 9 10 9 1 0
Moins de 1 5 102 62 19 4 3
1 à 3 / mois 15 46 222 59 15 5
1 à 3 / sem 7 13 73 139 26 5
4 à 5 / semaine 0 2 16 17 10 8
Tous les jours 1 9 6 8 5 15
Code
ggplot(gg_evol|> count(AVANT_FREQMO5,APRES_FREQMO5,name = "nm")  |> add_count(AVANT_FREQMO5,wt=nm))+
  geom_point(aes(y=AVANT_FREQMO5,x=APRES_FREQMO5,size=nm,color=nm/n))+scale_color_distiller(palette="Reds",direction=1)+
  theme_bw()

Motif sport loisir

Code
kable(table(COMAVANT_FREQ$FREQMO7,COMAPRES_FREQ$FREQMO7))
Jamais Moins de 1 1 à 3 / mois 1 à 3 / sem 4 à 5 / semaine Tous les jours
Jamais 117 9 14 15 3 1
Moins de 1 28 37 25 20 6 4
1 à 3 / mois 17 33 96 55 12 3
1 à 3 / sem 17 15 59 182 26 3
4 à 5 / semaine 2 1 15 42 43 8
Tous les jours 5 5 6 11 10 23
Code
ggplot(gg_evol|> count(AVANT_FREQMO7,APRES_FREQMO7,name = "nm")  |> add_count(AVANT_FREQMO7,wt=nm))+
  geom_point(aes(y=AVANT_FREQMO7,x=APRES_FREQMO7,size=nm,color=nm/n))+scale_color_distiller(palette="Reds",direction=1)+
  theme_bw()

Selection des id avec changement baisse de mobilité sur le motif travail

Code
usel = gg_evol |> filter(unclass(AVANT_FREQMO1)>unclass(APRES_FREQMO1),unclass(AVANT_FREQMO1)>3,unclass(APRES_FREQMO1)>1) |> pull(AVANT_uiid)

gg_evol_sel_mode = COMAVANT_MODE|> rename_all(~paste0("AVANT_",.)) |> left_join(
  COMAPRES_MODE|> rename_all(~paste0("APRES_",.)),
  by=c("AVANT_uiid"="APRES_uiid")) |> filter(AVANT_uiid%in%usel)


ggplot(gg_evol_sel_mode|> count(AVANT_MODEMO1,APRES_MODEMO1,name = "nm")  |> add_count(AVANT_MODEMO1,wt=nm))+
  geom_point(aes(y=AVANT_MODEMO1,x=APRES_MODEMO1,size=nm,color=nm/n))+scale_color_distiller(palette="Reds",direction=1)+
  theme_bw()

Effet rebond ?

Code
gg_evol_sel = gg_evol|> filter(AVANT_uiid%in%usel)

delta_freq_dec = gg_evol_sel |> mutate(delta_freq4=unclass(APRES_FREQMO4)-unclass(AVANT_FREQMO4)) |>
  mutate(delta_freq5=unclass(APRES_FREQMO5)-unclass(AVANT_FREQMO5)) |>
  mutate(delta_freq7=unclass(APRES_FREQMO7)-unclass(AVANT_FREQMO7)) |>
  select(delta_freq4,delta_freq5,delta_freq7) 

delta_freq_dec|> summarise_all(mean)
#> # A tibble: 1 × 3
#>   delta_freq4 delta_freq5 delta_freq7
#>         <dbl>       <dbl>       <dbl>
#> 1      -0.189     -0.0705      -0.238

ggplot(delta_freq_dec) + geom_histogram(aes(x=delta_freq4))+theme_bw()

Code
ggplot(delta_freq_dec) + geom_histogram(aes(x=delta_freq5))+theme_bw()

Code
ggplot(delta_freq_dec) + geom_histogram(aes(x=delta_freq7))+theme_bw()

Clustering

Code
#library(greed)
#sol=greed(Xmob[,-1])
Code
#plot(sol,type='marginals')
Code

# params = coef(sol)
# 
# to_long = function(varname){
#   mat=params$Thetak[[varname]]
#   data.frame(var=varname,cluster=rep(rownames(mat),ncol(mat)),values=as.vector(mat),modality=rep(colnames(mat),each=nrow(mat)))
# }
# 
# ggplot(to_long("MODEMO1"))+geom_point(aes(x=cluster,y=modality,size=values))+ggtitle("Mo1")
# ggplot(to_long("MODEMO4"))+geom_point(aes(x=cluster,y=modality,size=values))+ggtitle("Mo4")
# ggplot(to_long("MODEMO5"))+geom_point(aes(x=cluster,y=modality,size=values))+ggtitle("Mo5")
Code

# cl = clustering(cut(sol,8))
# user_cl = data.frame(uuid = Xmob$uiid,cl=cl)
# user_cl_temp = bind_cols(user_cl[1:968,] |> select(uuid,cl_apres=cl),
#  user_cl[969:nrow(user_cl),] |> select(cl_avant=cl))

# moves = table(user_cl_temp[,3:2])
# kable(moves)
Code
# gg_moves = user_cl_temp[,3:2] |> count(cl_avant,cl_apres,name = "nm")  |> add_count(cl_avant,wt=nm)
# ggplot(gg_moves)+geom_point(aes(y=cl_avant,x=cl_apres,size=nm/n,color=nm/n))+scale_color_distiller(palette="Reds",direction=1)+theme_bw()

Sur les décroissants

Code
# moves = table(user_cl_temp[user_cl_temp$uuid%in%usel,3:2])
# kable(moves)

Régression

Code
COMOPOCOV = read_excel("data/COMOPOCOV_transmis Etienne et Emmanuel(1).xlsx")

# Q1 -> Q22 contexte
# Q23 -> Q34 Avant
# Q35 -> Q47 Pendant
# Q48 -> Q59 Après
# Q60 -> Q64 Futur


lab_freq = c("Jamais","Moins de 1","1 à 3 / mois",
             "1 à 3 / sem","4 à 5 / semaine","Tous les jours")
             
             
lab_freq_p = c("--","-","=","+","++","0")

lab_mode =  c("Conducteur","Passager","TC","deux-roues",
              "La marche","Le vélo","Trottinette","Pas")

lab_age=c("18-24","25-34","35-44","45-54","55-64","65+")

old_names_pre_freq=paste0("Q28bis_",1:8)
new_names_pre_freq=paste0("FREQMOPRE",1:8)

old_names_in_freq=paste0("Q35_",1:8)
new_names_in_freq=paste0("FREQMOIN",1:8)

old_names_in_tele=paste0("Q46_",1:4)
new_names_in_tele=paste0("TELEINPER",1:4)

old_names_post_freq=paste0("Q48_",1:8)
new_names_post_freq=paste0("FREQMOPOST",1:8)

old_names_pre_mode=paste0("Q29_",1:8)
new_names_pre_mode=paste0("MODEMOPRE",1:8)

old_names_post_mode=paste0("Q49_",1:8)
new_names_post_mode=paste0("MODEMOPOST",1:8)


lab_sexe = c("femme","homme","nsp")
lab_diplo = c("nodiplo","Brevet","CAP","Bac","Bac+2","Bac+3","Bac+4","Bac+5", ">Bac+5","nsp")
lab_situ_fam = c("Seul","Seul + enf(s)","Couple","Couple + enf(s)","Autres") 
lab_situ_trav =c("TP","P80","P80_50","P50","PM50")
lab_hab = c("maison","appartement")
lab_rev = c("M500","M500_1000","M1001_2500","M2501_4000","M4001_6000","M6001_9000","P9000","Jnsp","Jnspr") 
lab_ouinon = c("Oui","Non")
lab_5lev=c("--","-","=","+","++")

COMPOCOV_CLEAN = COMOPOCOV |> 
  mutate(across(Q28bis_1:Q28bis_8, ~ factor(lab_freq[.x],levels=lab_freq))) |>
  rename_with(~ new_names_pre_freq, all_of(old_names_pre_freq)) |>
  mutate(across(Q29_1:Q29_8, ~ factor(lab_mode[.x],levels=lab_mode))) |>
  rename_with(~ new_names_pre_mode, all_of(old_names_pre_mode)) |>
  mutate(across(Q48_1:Q48_8, ~ factor(lab_freq[.x],levels=lab_freq))) |>
  rename_with(~ new_names_post_freq, all_of(old_names_post_freq)) |>
  mutate(across(Q35_1:Q35_8, ~ factor(lab_freq_p[.x],levels=lab_freq_p))) |>
  rename_with(~ new_names_in_freq, all_of(old_names_in_freq)) |>
  mutate(across(Q49_1:Q49_8, ~ factor(lab_mode[.x],levels=lab_mode))) |>
  rename_with(~ new_names_post_mode, all_of(old_names_post_mode)) |>
  mutate(across(Q46_1:Q46_4, ~ factor(lab_5lev[.x],levels=lab_5lev))) |>
  rename_with(~ new_names_in_tele, all_of(old_names_in_tele)) |>
  mutate(age= Q1) |>
  mutate(sexe= factor(lab_sexe[Q2]  ,levels=  lab_sexe)) |>  
  mutate(diplo= factor(lab_diplo[Q3]  ,levels=  lab_diplo)) |>  
  mutate(csp = factor(case_when(Q4==3 ~ "CSP",Q4<5~"CSP+",Q4 %in% 5:6 ~ "CSP-",Q4>6~"Ina"))) |>
  mutate(tp = factor(lab_situ_trav[Q5],levels=lab_situ_trav)) |>
  mutate(situ_fam = factor(lab_situ_fam[Q7],levels=lab_situ_fam)) |>
  mutate(hab=factor(lab_hab[Q12],levels=lab_hab)) |>
  mutate(rev=factor(lab_rev[Q16],levels=lab_rev)) |>
  mutate(change_sociopro=factor(lab_ouinon[Q23])) |>
  mutate(tele_pre=if_else(is.na(Q33),0,Q33)) |>
  mutate(tele_pre=if_else(tele_pre>5,5,tele_pre)) |>
  mutate(tele_post=if_else(is.na(Q53),0,Q53)) |>
  mutate(depf=factor(dep)) |>
  select(uuid,depf,age:tele_post,nb_tele_pre=Q33,nb_tele_post=Q53,FREQMOPRE1:MODEMOPRE8,FREQMOIN1:FREQMOIN8,TELEINPER1:TELEINPER4,FREQMOPOST1:MODEMOPOST8) |>
  mutate(change_tele = !(tele_pre=="Non")&(tele_post=="Oui")) |>
  mutate(change = (unclass(FREQMOPOST1)-unclass(FREQMOPRE1))<0)|> 
  mutate(change_tele=tele_post-tele_pre)

data = COMPOCOV_CLEAN |> 
  filter(csp!="Ina") |>
  select(depf:change_sociopro,FREQMOIN1,FREQMOIN4,FREQMOIN5,FREQMOIN7,TELEINPER1:TELEINPER2,change,change_tele)|> mutate(TELEINPER1=if_else(is.na(TELEINPER1),"NA",TELEINPER1))|> 
  mutate(TELEINPER2=if_else(is.na(TELEINPER2),"NA",TELEINPER2))

kable(table(data$csp))
Var1 Freq
CSP 253
CSP- 323
CSP+ 188
Ina 0
Code

kable(table(data$change,data$change_tele))
-5 -4 -3 -2 -1 0 1 2 3 4 5
FALSE 26 17 24 28 33 273 40 25 14 5 12
TRUE 8 10 7 19 17 86 39 44 21 10 6
Code
fit=glm(change ~ FREQMOIN1 + FREQMOIN4 + FREQMOIN5 + FREQMOIN7 + csp + diplo + rev+hab+TELEINPER1+TELEINPER2+change_tele ,data=data,family = binomial)
summary(fit)
#> 
#> Call:
#> glm(formula = change ~ FREQMOIN1 + FREQMOIN4 + FREQMOIN5 + FREQMOIN7 + 
#>     csp + diplo + rev + hab + TELEINPER1 + TELEINPER2 + change_tele, 
#>     family = binomial, data = data)
#> 
#> Coefficients: (1 not defined because of singularities)
#>                  Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)      0.955964   1.225037   0.780  0.43518    
#> FREQMOIN1-      -0.613538   0.261454  -2.347  0.01894 *  
#> FREQMOIN1=      -1.162432   0.270636  -4.295 1.75e-05 ***
#> FREQMOIN1+      -1.135072   0.399892  -2.838  0.00453 ** 
#> FREQMOIN1++      0.003091   0.553041   0.006  0.99554    
#> FREQMOIN10       0.375908   0.410909   0.915  0.36029    
#> FREQMOIN4-       0.263781   0.357120   0.739  0.46013    
#> FREQMOIN4=       0.356566   0.386542   0.922  0.35629    
#> FREQMOIN4+       0.181192   0.465909   0.389  0.69735    
#> FREQMOIN4++      0.495261   0.603672   0.820  0.41198    
#> FREQMOIN40      -0.145814   0.708541  -0.206  0.83695    
#> FREQMOIN5-       0.857100   0.338675   2.531  0.01138 *  
#> FREQMOIN5=       0.769210   0.390693   1.969  0.04897 *  
#> FREQMOIN5+       1.361202   0.459768   2.961  0.00307 ** 
#> FREQMOIN5++      1.496034   0.632183   2.366  0.01796 *  
#> FREQMOIN50       0.250326   0.546067   0.458  0.64665    
#> FREQMOIN7-      -0.575077   0.326687  -1.760  0.07835 .  
#> FREQMOIN7=      -0.640220   0.324463  -1.973  0.04848 *  
#> FREQMOIN7+      -0.870823   0.379746  -2.293  0.02184 *  
#> FREQMOIN7++     -1.052905   0.551954  -1.908  0.05644 .  
#> FREQMOIN70      -1.083625   0.375454  -2.886  0.00390 ** 
#> cspCSP-          0.105528   0.258619   0.408  0.68324    
#> cspCSP+          0.262907   0.254984   1.031  0.30251    
#> diploBrevet     -0.838021   1.091642  -0.768  0.44268    
#> diploCAP        -1.425941   0.902812  -1.579  0.11423    
#> diploBac        -0.931836   0.846302  -1.101  0.27087    
#> diploBac+2      -1.075344   0.844097  -1.274  0.20268    
#> diploBac+3      -0.738035   0.842987  -0.875  0.38130    
#> diploBac+4      -1.722082   0.920949  -1.870  0.06150 .  
#> diploBac+5      -0.870453   0.852017  -1.022  0.30695    
#> diplo>Bac+5     -1.815345   0.929119  -1.954  0.05072 .  
#> diplonsp        12.619193 535.412079   0.024  0.98120    
#> revM500_1000     0.263770   0.862065   0.306  0.75962    
#> revM1001_2500    0.268349   0.736067   0.365  0.71543    
#> revM2501_4000    0.253633   0.734862   0.345  0.72999    
#> revM4001_6000    0.210429   0.744630   0.283  0.77749    
#> revM6001_9000    0.502246   0.785080   0.640  0.52234    
#> revP9000         1.161826   0.834541   1.392  0.16387    
#> revJnsp          2.303039   1.647849   1.398  0.16223    
#> revJnspr         0.403519   0.916985   0.440  0.65990    
#> habappartement  -0.159148   0.178277  -0.893  0.37202    
#> TELEINPER1--     0.288838   0.508239   0.568  0.56982    
#> TELEINPER1+     -0.042715   0.364478  -0.117  0.90671    
#> TELEINPER1++    -0.103108   0.438769  -0.235  0.81421    
#> TELEINPER1=     -0.414110   0.387251  -1.069  0.28491    
#> TELEINPER1NA    -0.922339   0.409479  -2.252  0.02429 *  
#> TELEINPER2--    -1.309066   0.571771  -2.289  0.02205 *  
#> TELEINPER2+     -0.060243   0.399870  -0.151  0.88025    
#> TELEINPER2++    -0.497043   0.490151  -1.014  0.31055    
#> TELEINPER2=     -0.565227   0.419653  -1.347  0.17801    
#> TELEINPER2NA           NA         NA      NA       NA    
#> change_tele      0.187542   0.045438   4.127 3.67e-05 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 988.80  on 763  degrees of freedom
#> Residual deviance: 879.02  on 713  degrees of freedom
#> AIC: 981.02
#> 
#> Number of Fisher Scoring iterations: 12
Code


  

library(tidymodels)
library(hstats)
set.seed(123)
data_split <- initial_split(data, prop = 0.75)
train_data <- training(data_split)
test_data <- testing(data_split)

# Create a decision tree model specification
tree_spec <- decision_tree() %>%
 set_engine("rpart") %>%
 set_mode("classification")


rf_spec <- rand_forest(trees = 300) |> 
  set_mode("classification") |> 
  set_engine("ranger", num.threads = NULL, seed = 49)

# Fit the model to the training data
model <- rf_spec %>%
 fit(factor(change) ~ ., data = train_data)


# predict() gives No/Yes columns
kable(table(test_data$change,predict(model, test_data, type = "class")|>pull(.pred_class)))
FALSE TRUE
FALSE 107 15
TRUE 47 22
Code
# .pred_No .pred_Yes
#    0.981    0.0185
# We need to extract only the "Yes" probabilities
pf <- function(m, X) {
  predict(m, X, type = "prob")$.pred_TRUE
}

pf(model, head(test_data))
#> [1] 0.2932646 0.2843280 0.5075066 0.4836971 0.2666429 0.2861905


imp <- perm_importance(
  model, X = test_data, y = "change", v = colnames(test_data|>select(-change)), pred_fun = pf, loss = "logloss",normalize=TRUE)
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |================                                                      |  24%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |======================================================                |  76%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |======================================================================| 100%

plot(imp) +
  xlab("Increase in test logloss")

Code



library(patchwork)

# All PDP in one patchwork
p <- lapply(c("csp","hab","age","diplo","depf","change_tele","FREQMOIN1","FREQMOIN4","FREQMOIN5","FREQMOIN7","TELEINPER1","TELEINPER2"), function(x) plot(partial_dep(model, v = x, X = train_data, pred_fun = pf)))
wrap_plots(p) 

Code

model <- tree_spec %>%
 fit(factor(change) ~ ., data = train_data)

# Load the library
library(rpart.plot)
# Plot the decision tree
rpart.plot(model$fit, type = 4, extra = 101, under = TRUE, cex = 0.8, box.palette = "auto")

Code
data = data |> select(-change) |> mutate(change_tele=change_tele>0)  

fit=glm(change_tele ~ FREQMOIN1 + FREQMOIN4 + FREQMOIN5 + FREQMOIN7 + csp + diplo + rev+hab+TELEINPER1+TELEINPER2 ,data=data,family = binomial)
summary(fit)
#> 
#> Call:
#> glm(formula = change_tele ~ FREQMOIN1 + FREQMOIN4 + FREQMOIN5 + 
#>     FREQMOIN7 + csp + diplo + rev + hab + TELEINPER1 + TELEINPER2, 
#>     family = binomial, data = data)
#> 
#> Coefficients: (1 not defined because of singularities)
#>                  Estimate Std. Error z value Pr(>|z|)  
#> (Intercept)    -1.523e+01  5.294e+02  -0.029   0.9770  
#> FREQMOIN1-     -4.133e-03  2.674e-01  -0.015   0.9877  
#> FREQMOIN1=     -6.020e-01  2.843e-01  -2.117   0.0342 *
#> FREQMOIN1+     -3.445e-01  4.364e-01  -0.790   0.4298  
#> FREQMOIN1++    -6.618e-01  6.610e-01  -1.001   0.3168  
#> FREQMOIN10     -1.813e-01  4.362e-01  -0.416   0.6777  
#> FREQMOIN4-     -3.884e-01  3.586e-01  -1.083   0.2788  
#> FREQMOIN4=      3.108e-02  3.978e-01   0.078   0.9377  
#> FREQMOIN4+      4.934e-02  4.771e-01   0.103   0.9176  
#> FREQMOIN4++     8.408e-02  6.966e-01   0.121   0.9039  
#> FREQMOIN40      2.667e-01  7.194e-01   0.371   0.7109  
#> FREQMOIN5-     -5.844e-01  3.409e-01  -1.714   0.0865 .
#> FREQMOIN5=     -6.785e-01  3.981e-01  -1.704   0.0883 .
#> FREQMOIN5+     -1.730e-01  4.688e-01  -0.369   0.7121  
#> FREQMOIN5++    -2.253e+00  1.128e+00  -1.997   0.0458 *
#> FREQMOIN50     -1.047e+00  5.518e-01  -1.898   0.0577 .
#> FREQMOIN7-      3.010e-01  3.485e-01   0.864   0.3878  
#> FREQMOIN7=     -5.772e-02  3.532e-01  -0.163   0.8702  
#> FREQMOIN7+      1.664e-01  4.035e-01   0.412   0.6801  
#> FREQMOIN7++    -6.588e-01  6.854e-01  -0.961   0.3365  
#> FREQMOIN70      5.151e-01  3.797e-01   1.357   0.1749  
#> cspCSP-        -5.353e-01  2.727e-01  -1.963   0.0496 *
#> cspCSP+         2.507e-02  2.681e-01   0.093   0.9255  
#> diploBrevet     1.432e+01  5.294e+02   0.027   0.9784  
#> diploCAP        1.390e+01  5.294e+02   0.026   0.9791  
#> diploBac        1.345e+01  5.294e+02   0.025   0.9797  
#> diploBac+2      1.356e+01  5.294e+02   0.026   0.9796  
#> diploBac+3      1.372e+01  5.294e+02   0.026   0.9793  
#> diploBac+4      1.314e+01  5.294e+02   0.025   0.9802  
#> diploBac+5      1.339e+01  5.294e+02   0.025   0.9798  
#> diplo>Bac+5     1.359e+01  5.294e+02   0.026   0.9795  
#> diplonsp        2.957e+01  1.549e+03   0.019   0.9848  
#> revM500_1000    1.071e+00  1.215e+00   0.881   0.3782  
#> revM1001_2500   1.233e+00  1.100e+00   1.121   0.2623  
#> revM2501_4000   1.285e+00  1.098e+00   1.171   0.2418  
#> revM4001_6000   1.393e+00  1.102e+00   1.264   0.2061  
#> revM6001_9000   1.149e+00  1.138e+00   1.009   0.3129  
#> revP9000        1.219e+00  1.178e+00   1.034   0.3009  
#> revJnsp        -1.428e+01  9.041e+02  -0.016   0.9874  
#> revJnspr       -2.024e-02  1.350e+00  -0.015   0.9880  
#> habappartement  8.725e-02  1.950e-01   0.447   0.6546  
#> TELEINPER1--    4.353e-01  5.515e-01   0.789   0.4300  
#> TELEINPER1+     1.011e+00  4.045e-01   2.499   0.0125 *
#> TELEINPER1++    1.075e+00  4.675e-01   2.299   0.0215 *
#> TELEINPER1=     4.409e-01  4.230e-01   1.042   0.2972  
#> TELEINPER1NA   -2.978e-01  4.776e-01  -0.624   0.5328  
#> TELEINPER2--   -1.602e-01  5.831e-01  -0.275   0.7835  
#> TELEINPER2+    -4.540e-02  4.453e-01  -0.102   0.9188  
#> TELEINPER2++    1.692e-01  5.210e-01   0.325   0.7454  
#> TELEINPER2=     1.167e-02  4.616e-01   0.025   0.9798  
#> TELEINPER2NA           NA         NA      NA       NA  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 909.93  on 763  degrees of freedom
#> Residual deviance: 766.62  on 714  degrees of freedom
#> AIC: 866.62
#> 
#> Number of Fisher Scoring iterations: 14
Code





data_split <- initial_split(data, prop = 0.75)
train_data <- training(data_split)
test_data <- testing(data_split)

# Create a decision tree model specification
tree_spec <- decision_tree() %>%
 set_engine("rpart") %>%
 set_mode("classification")


rf_spec <- rand_forest(trees = 300) |> 
  set_mode("classification") |> 
  set_engine("ranger", num.threads = NULL, seed = 49)

# Fit the model to the training data
model <- rf_spec %>%
 fit(factor(change_tele) ~ ., data = train_data)


# predict() gives No/Yes columns
kable(table(test_data$change_tele,predict(model, test_data, type = "class")|>pull(.pred_class)))
FALSE TRUE
FALSE 133 12
TRUE 32 14
Code
# .pred_No .pred_Yes
#    0.981    0.0185
# We need to extract only the "Yes" probabilities
pf <- function(m, X) {
  predict(m, X, type = "prob")$.pred_TRUE
}

pf(model, head(test_data))
#> [1] 0.3091640 0.2728704 0.4306812 0.4036786 0.3191574 0.5409074


 imp <- perm_importance(
   model, X = test_data, y = test_data$change_tele, pred_fun = pf, loss = "logloss",normalize=TRUE)
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |================                                                      |  24%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |======================================================                |  76%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |======================================================================| 100%
 
plot(imp) +
   xlab("Increase in test logloss")

Code



library(patchwork)

# All PDP in one patchwork
p <- lapply(c("csp","hab","age","diplo","depf","FREQMOIN1","FREQMOIN4","FREQMOIN5","FREQMOIN7","TELEINPER1","TELEINPER2"), function(x) plot(partial_dep(model, v = x, X = train_data, pred_fun = pf)))
wrap_plots(p) 

Code

model <- tree_spec %>%
 fit(factor(change_tele) ~ ., data = train_data)

# Load the library
library(rpart.plot)
# Plot the decision tree
rpart.plot(model$fit, type = 4, extra = 101, under = TRUE, cex = 0.8, box.palette = "auto")