Use when creating multiple models which vary by one parameter. See example below.
Usage
map_models(
df,
params,
model_str,
engine = parsnip::set_engine(object = parsnip::logistic_reg(), engine = "glm"),
rm_raw_model = TRUE
)
Arguments
- df
Data frame.
- params
A character vector of values to substitute
.x
inmodel_str
.- model_str
A model formula as a string, containing one variable as
.x
.- engine
A
parsnip
engine.- rm_raw_model
If
TRUE
(the default), the raw models are removed.
Details
Ideally aim to supply a df
without missing data. Otherwise, check N
observations for models (in fit
column, model_glance
) to see if many (or
any) observations were removed.
Examples
# dummy dataset
mtcars2 <- mtcars %>%
tibble::rownames_to_column(var = "make") %>%
dplyr::mutate("is_merc" = ifelse(stringr::str_detect(.data[["make"]], pattern = "^Merc"),
yes = 1,
no = 0 )) %>%
tibble::as_tibble() %>%
dplyr::mutate(dplyr::across(tidyselect::starts_with("is_"), as.factor))
# preview dummy dataset
mtcars2
#> # A tibble: 32 × 13
#> make mpg cyl disp hp drat wt qsec vs am gear carb
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Mazda RX4 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 Mazda RX4 … 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 Datsun 710 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 Hornet 4 D… 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 Hornet Spo… 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 Valiant 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 Duster 360 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 Merc 240D 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 Merc 230 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 Merc 280 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> # … with 22 more rows, and 1 more variable: is_merc <fct>
# run `map_models()` - logistic regression to predict Mercedes make
result <-
map_models(
df = mtcars2,
model_str = "is_merc ~ mpg + cyl + disp + {.x}",
params = c(GEAR = 'gear', CARB = 'carb'),
engine = parsnip::set_engine(object = parsnip::logistic_reg(), engine = "glm"),
rm_raw_model = FALSE
)
#> gear, 1 of 2
#> carb, 2 of 2
# view result
result
#> # A workflow set/tibble: 2 × 8
#> wflow_id info option result wflow_la…¹ fit model…² model…³
#> <chr> <list> <list> <list> <chr> <list> <chr> <chr>
#> 1 gear <tibble> <opts[0]> <list [0]> GEAR <named list> is_mer… classi…
#> 2 carb <tibble> <opts[0]> <list [0]> CARB <named list> is_mer… classi…
#> # … with abbreviated variable names ¹wflow_label, ²model_formula, ³model_engine
# model outputs are stored in the `fit` column
names(result$fit[[1]])
#> [1] "model_raw" "model_tidy" "model_glance"
# 'tidy' model outputs are under `model_tidy`
result$fit[[1]]$model_tidy
#> # A tibble: 5 × 5
#> term estimate std.error statistic p.value
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 (Intercept) 9.81 8.37 1.17 0.242
#> 2 mpg -0.297 0.228 -1.31 0.191
#> 3 cyl 0.752 0.771 0.975 0.330
#> 4 disp -0.0294 0.0155 -1.89 0.0583
#> 5 gear -0.954 0.951 -1.00 0.316
# 'glance' model outputs are under `model_glance`
result$fit[[1]]$model_glance
#> # A tibble: 1 × 8
#> null.deviance df.null logLik AIC BIC deviance df.residual nobs
#> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <int> <int>
#> 1 33.6 31 -13.3 36.7 44.0 26.7 27 32
# `model_raw` contains either the raw model under `result`, or an error message under `error`
names(result$fit[[1]]$model_raw)
#> [1] "result" "error"