library(tidymodels)library(hstats)set.seed(123)data_split <-initial_split(data, prop =0.75)train_data <-training(data_split)test_data <-testing(data_split)# Create a decision tree model specificationtree_spec <-decision_tree() %>%set_engine("rpart") %>%set_mode("classification")rf_spec <-rand_forest(trees =300) |>set_mode("classification") |>set_engine("ranger", num.threads =NULL, seed =49)# Fit the model to the training datamodel <- rf_spec %>%fit(factor(change) ~ ., data = train_data)# predict() gives No/Yes columnskable(table(test_data$change,predict(model, test_data, type ="class")|>pull(.pred_class)))
FALSE
TRUE
FALSE
107
15
TRUE
47
22
Code
# .pred_No .pred_Yes# 0.981 0.0185# We need to extract only the "Yes" probabilitiespf <-function(m, X) {predict(m, X, type ="prob")$.pred_TRUE}pf(model, head(test_data))#> [1] 0.2932646 0.2843280 0.5075066 0.4836971 0.2666429 0.2861905imp <-perm_importance( model, X = test_data, y ="change", v =colnames(test_data|>select(-change)), pred_fun = pf, loss ="logloss",normalize=TRUE)#> |||0%||====|6%||========|12%||============|18%||================|24%||=====================|29%||=========================|35%||=============================|41%||=================================|47%||=====================================|53%||=========================================|59%||=============================================|65%||=================================================|71%||======================================================|76%||==========================================================|82%||==============================================================|88%||==================================================================|94%||======================================================================|100%plot(imp) +xlab("Increase in test logloss")
Code
library(patchwork)# All PDP in one patchworkp <-lapply(c("csp","hab","age","diplo","depf","change_tele","FREQMOIN1","FREQMOIN4","FREQMOIN5","FREQMOIN7","TELEINPER1","TELEINPER2"), function(x) plot(partial_dep(model, v = x, X = train_data, pred_fun = pf)))wrap_plots(p)
Code
model <- tree_spec %>%fit(factor(change) ~ ., data = train_data)# Load the librarylibrary(rpart.plot)# Plot the decision treerpart.plot(model$fit, type =4, extra =101, under =TRUE, cex =0.8, box.palette ="auto")
data_split <-initial_split(data, prop =0.75)train_data <-training(data_split)test_data <-testing(data_split)# Create a decision tree model specificationtree_spec <-decision_tree() %>%set_engine("rpart") %>%set_mode("classification")rf_spec <-rand_forest(trees =300) |>set_mode("classification") |>set_engine("ranger", num.threads =NULL, seed =49)# Fit the model to the training datamodel <- rf_spec %>%fit(factor(change_tele) ~ ., data = train_data)# predict() gives No/Yes columnskable(table(test_data$change_tele,predict(model, test_data, type ="class")|>pull(.pred_class)))
FALSE
TRUE
FALSE
133
12
TRUE
32
14
Code
# .pred_No .pred_Yes# 0.981 0.0185# We need to extract only the "Yes" probabilitiespf <-function(m, X) {predict(m, X, type ="prob")$.pred_TRUE}pf(model, head(test_data))#> [1] 0.3091640 0.2728704 0.4306812 0.4036786 0.3191574 0.5409074 imp <-perm_importance( model, X = test_data, y = test_data$change_tele, pred_fun = pf, loss ="logloss",normalize=TRUE)#> |||0%||====|6%||========|12%||============|18%||================|24%||=====================|29%||=========================|35%||=============================|41%||=================================|47%||=====================================|53%||=========================================|59%||=============================================|65%||=================================================|71%||======================================================|76%||==========================================================|82%||==============================================================|88%||==================================================================|94%||======================================================================|100%plot(imp) +xlab("Increase in test logloss")
Code
library(patchwork)# All PDP in one patchworkp <-lapply(c("csp","hab","age","diplo","depf","FREQMOIN1","FREQMOIN4","FREQMOIN5","FREQMOIN7","TELEINPER1","TELEINPER2"), function(x) plot(partial_dep(model, v = x, X = train_data, pred_fun = pf)))wrap_plots(p)
Code
model <- tree_spec %>%fit(factor(change_tele) ~ ., data = train_data)# Load the librarylibrary(rpart.plot)# Plot the decision treerpart.plot(model$fit, type =4, extra =101, under =TRUE, cex =0.8, box.palette ="auto")