Using the for-loop below I can create a list of all managers above a given employee (essentially a list of an employee's manager, her manager's manager, etc.)
library(dplyr)
library(tidyr)
library(purrr)
# Create test data
ds <-
tibble(
emp_id = c("001", "002", "003", "004", "005"),
mgr_id = c("002", "004", "004", "005", NA)
)
# Hardcoded for-loop example
mgr_ids_above <- vector("list", length = 5)
id <- "001"
for (i in seq_along(mgr_ids_above)) {
mgr_ids_above[[i]] <- ds$mgr_id[ds$emp_id == id]
id <- mgr_ids_above[[i]]
}
# drop NAs
mgr_ids_above <- unlist(mgr_ids_above)
mgr_ids_above <- mgr_ids_above[!is.na(mgr_ids_above)]
# return to list format
as.list(mgr_ids_above)
My hope is to apply this for-loop to the entire data frame and save the results in a list-column. I can successfully do this using pmap() to apply a hard-coded for-loop to my data frame, but when I try to write a generalized function, everything falls apart.
# Define custom function with hardcoded data and variable names
get_mgrs_above <- function(id, max_steps = 5){
mgr_ids_above <- vector("list", length = max_steps)
for (i in seq_along(mgr_ids_above)) {
mgr_ids_above[[i]] <- ds$mgr_id[ds$emp_id == id]
id <- mgr_ids_above[[i]]
}
# drop NAs
mgr_ids_above <- unlist(mgr_ids_above)
mgr_ids_above <- mgr_ids_above[!is.na(mgr_ids_above)]
# return to list format
as.list(mgr_ids_above)
}
# Apply custom function
ds_mgrs_above <-
ds %>%
mutate(
ranks_above = pmap(
list(id = emp_id),
get_mgrs_above
)
)
The output of the above code is
A tibble: 5 x 3
emp_id mgr_id ranks_above
<chr> <chr> <list>
1 001 002 <list [3]>
2 002 004 <list [2]>
3 003 004 <list [2]>
4 004 005 <list [1]>
5 005 NA <list [0]>
And the content of the ranks_above list column look like
ds_mgrs_above$ranks_above[[1]]
[[1]]
[1] "002"
[[2]]
[1] "004"
[[3]]
[1] "005"
My failing function with all data and variables supplied as arguments fails with the message, "Error in mutate_impl(.data, dots) : Evaluation error: Element 1 has length 2, not 1 or 5..":
get_mgrs_above <- function(
data,
id = emp_id,
mgr_id = mgr_id,
emp_id = emp_id,
max_steps = 5){
mgr_ids_above <- vector("list", length = max_steps)
for (i in seq_along(mgr_ids_above)) {
mgr_ids_above[[i]] <- data$mgr_id[data$emp_id == id]
id <- mgr_ids_above[[i]]
}
# drop NAs
mgr_ids_above <- unlist(mgr_ids_above)
mgr_ids_above <- mgr_ids_above[!is.na(mgr_ids_above)]
# return to list format
as.list(mgr_ids_above)
}
ds %>%
mutate(
ranks_above = pmap(
list(
data = ds,
id = emp_id,
mgr_id = mgr_id,
emp_id = emp_id,
max_steps = 5
),
get_mgrs_above
)
)
To avoid confusion, this is a post about how to write a generalizable function that will create a list column from two columns. This is one component of a larger data munging attempt on a data frame with ~15k employees.