Create multiple models which vary by one parameter — map

Use when creating multiple models which vary by one parameter. See example below.

Usage

map_models(
  df,
  params,
  model_str,
  engine = parsnip::set_engine(object = parsnip::logistic_reg(), engine = "glm"),
  rm_raw_model = TRUE
)

Arguments

df: Data frame.
params: A character vector of values to substitute .x in model_str.
model_str: A model formula as a string, containing one variable as .x.
engine: A parsnip engine.
rm_raw_model: If TRUE (the default), the raw models are removed.

Value

A data frame

Details

Ideally aim to supply a df without missing data. Otherwise, check N observations for models (in fit column, model_glance) to see if many (or any) observations were removed.

Examples

# dummy dataset
mtcars2 <- mtcars %>%
 tibble::rownames_to_column(var = "make") %>%
 dplyr::mutate("is_merc" = ifelse(stringr::str_detect(.data[["make"]], pattern = "^Merc"),
                                  yes = 1,
                                  no = 0 )) %>%
 tibble::as_tibble() %>%
 dplyr::mutate(dplyr::across(tidyselect::starts_with("is_"), as.factor))

# preview dummy dataset
mtcars2
#> # A tibble: 32 × 13
#>    make          mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>    <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 Mazda RX4    21       6  160    110  3.9   2.62  16.5     0     1     4     4
#>  2 Mazda RX4 …  21       6  160    110  3.9   2.88  17.0     0     1     4     4
#>  3 Datsun 710   22.8     4  108     93  3.85  2.32  18.6     1     1     4     1
#>  4 Hornet 4 D…  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1
#>  5 Hornet Spo…  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2
#>  6 Valiant      18.1     6  225    105  2.76  3.46  20.2     1     0     3     1
#>  7 Duster 360   14.3     8  360    245  3.21  3.57  15.8     0     0     3     4
#>  8 Merc 240D    24.4     4  147.    62  3.69  3.19  20       1     0     4     2
#>  9 Merc 230     22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2
#> 10 Merc 280     19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4
#> # … with 22 more rows, and 1 more variable: is_merc <fct>

# run `map_models()` - logistic regression to predict Mercedes make
result <-
map_models(
  df = mtcars2,
  model_str = "is_merc ~ mpg + cyl + disp + {.x}",
  params = c(GEAR = 'gear', CARB = 'carb'),
  engine = parsnip::set_engine(object = parsnip::logistic_reg(), engine = "glm"),
  rm_raw_model = FALSE
)
#> gear, 1 of 2
#> carb, 2 of 2

# view result
result
#> # A workflow set/tibble: 2 × 8
#>   wflow_id info     option    result     wflow_la…¹ fit          model…² model…³
#>   <chr>    <list>   <list>    <list>     <chr>      <list>       <chr>   <chr>  
#> 1 gear     <tibble> <opts[0]> <list [0]> GEAR       <named list> is_mer… classi…
#> 2 carb     <tibble> <opts[0]> <list [0]> CARB       <named list> is_mer… classi…
#> # … with abbreviated variable names ¹wflow_label, ²model_formula, ³model_engine

# model outputs are stored in the `fit` column
names(result$fit[[1]])
#> [1] "model_raw"    "model_tidy"   "model_glance"

# 'tidy' model outputs are under `model_tidy`
result$fit[[1]]$model_tidy
#> # A tibble: 5 × 5
#>   term        estimate std.error statistic p.value
#>   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
#> 1 (Intercept)   9.81      8.37       1.17   0.242 
#> 2 mpg          -0.297     0.228     -1.31   0.191 
#> 3 cyl           0.752     0.771      0.975  0.330 
#> 4 disp         -0.0294    0.0155    -1.89   0.0583
#> 5 gear         -0.954     0.951     -1.00   0.316 

# 'glance' model outputs are under `model_glance`
result$fit[[1]]$model_glance
#> # A tibble: 1 × 8
#>   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
#>           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
#> 1          33.6      31  -13.3  36.7  44.0     26.7          27    32

# `model_raw` contains either the raw model under `result`, or an error message under `error`
names(result$fit[[1]]$model_raw)
#> [1] "result" "error"