1

Using the for-loop below I can create a list of all managers above a given employee (essentially a list of an employee's manager, her manager's manager, etc.)

library(dplyr)
library(tidyr)
library(purrr)

# Create test data 
ds <-
  tibble(
    emp_id = c("001", "002", "003", "004", "005"),
    mgr_id  = c("002", "004", "004", "005", NA)
  )

# Hardcoded for-loop example 
  mgr_ids_above <- vector("list", length = 5)
  id <- "001"

  for (i in seq_along(mgr_ids_above)) {
    mgr_ids_above[[i]] <- ds$mgr_id[ds$emp_id == id]

    id <- mgr_ids_above[[i]]
  }

  # drop NAs
  mgr_ids_above <- unlist(mgr_ids_above)
  mgr_ids_above <- mgr_ids_above[!is.na(mgr_ids_above)]

  # return to list format
  as.list(mgr_ids_above)

My hope is to apply this for-loop to the entire data frame and save the results in a list-column. I can successfully do this using pmap() to apply a hard-coded for-loop to my data frame, but when I try to write a generalized function, everything falls apart.

# Define custom function with hardcoded data and variable names
get_mgrs_above <- function(id, max_steps = 5){

  mgr_ids_above <- vector("list", length = max_steps)

  for (i in seq_along(mgr_ids_above)) {
    mgr_ids_above[[i]] <- ds$mgr_id[ds$emp_id == id]

    id <- mgr_ids_above[[i]]
  }

  # drop NAs
  mgr_ids_above <- unlist(mgr_ids_above)
  mgr_ids_above <- mgr_ids_above[!is.na(mgr_ids_above)]

  # return to list format
  as.list(mgr_ids_above)
}

# Apply custom function
ds_mgrs_above <-
  ds %>%
  mutate(
    ranks_above = pmap(
      list(id = emp_id),
      get_mgrs_above
    )
  )

The output of the above code is

A tibble: 5 x 3
emp_id mgr_id ranks_above
  <chr>  <chr>  <list>     
1 001    002    <list [3]> 
2 002    004    <list [2]> 
3 003    004    <list [2]> 
4 004    005    <list [1]> 
5 005    NA     <list [0]>

And the content of the ranks_above list column look like

ds_mgrs_above$ranks_above[[1]]

[[1]]
[1] "002"

[[2]]
[1] "004"

[[3]]
[1] "005"

My failing function with all data and variables supplied as arguments fails with the message, "Error in mutate_impl(.data, dots) : Evaluation error: Element 1 has length 2, not 1 or 5..":

get_mgrs_above <- function(
  data,
  id = emp_id,
  mgr_id = mgr_id,
  emp_id = emp_id,
  max_steps = 5){

  mgr_ids_above <- vector("list", length = max_steps)

  for (i in seq_along(mgr_ids_above)) {
    mgr_ids_above[[i]] <- data$mgr_id[data$emp_id == id]

    id <- mgr_ids_above[[i]]
  }

  # drop NAs
  mgr_ids_above <- unlist(mgr_ids_above)
  mgr_ids_above <- mgr_ids_above[!is.na(mgr_ids_above)]

  # return to list format
  as.list(mgr_ids_above)
}

ds %>%
  mutate(
    ranks_above = pmap(
      list(
        data = ds,
        id = emp_id,
        mgr_id = mgr_id,
        emp_id = emp_id,
        max_steps = 5
      ),
      get_mgrs_above
    )
  )

To avoid confusion, this is a post about how to write a generalizable function that will create a list column from two columns. This is one component of a larger data munging attempt on a data frame with ~15k employees.

1
  • @CalumYou, yes, I have updated with expected output examples. Commented Aug 22, 2018 at 23:23

2 Answers 2

1

The way your loop was structured was confusing me so I changed it by using while. This allows just an application of map at the end.

get_mgrs_above <- function(id, data = NULL, max_steps = 5) {

  stopifnot(!is.null(data))

  mgr_ids_above <- list()

  mgr  <- id
  iter <- 0

  while (iter < max_steps & !is.na(mgr)) {

    mgr <- data$mgr_id[data$emp_id == mgr]

    if (!is.na(mgr)) {
      mgr_ids_above <- append(mgr_ids_above, mgr)
    }

    iter <- iter + 1

  }

  return(mgr_ids_above)

}

ds$ranks_above <- map(ds$emp_id, get_mgrs_above, data = ds)
Sign up to request clarification or add additional context in comments.

Comments

1

Here's my attempt at doing what I think you want. I couldn't figure out a way to adapt your code but I hope this approach makes sense. Basically you want to get the full command chain for each employee from a single table of employee ids and their direct manager's id. Here I make that table lookup and repeatedly join it onto an input dataframe that is basically just the employee ids, a command_chain list-col that I add each additional manager to, and a current_join column that stores the id to be looked up in each iteration.

Then we can simply wrap the join_once function in a join_all which will keep calling it until we've reached the end of all the command chains (there are only NAs). I cleaned up the output to discard the NAs and print the command chains as comma-separated strings so you can see what it's done.

To some extent I don't know if this is particularly efficient because you have to join a lot of variables that might not need it (e.g. here 004 is joined three more times than necessary) but it's at least conceptually simple I think.

library(tidyverse)
lookup <- tibble(
    emp_id = c("001", "002", "003", "004", "005"),
    mgr_id = c("002", "004", "004", "005", NA)
  )

input <- lookup %>%
  select(emp_id) %>%
  mutate(command_chain = emp_id, current_join = emp_id)

join_once <- function(df) {
  df %>%
    left_join(lookup, by = c("current_join" = "emp_id")) %>%
    mutate(
      command_chain = map2(command_chain, mgr_id, ~ c(.x, .y)),
      current_join = mgr_id
    ) %>%
    select(-mgr_id)
}

join_all <- function(df) {
  output <- df
  while (!all(is.na(output$current_join))) {
    output <- join_once(output)
  }
  return(output)
}

output <- join_all(input)
output %>%
  mutate(
    command_chain = map(command_chain, ~ discard(.x, is.na)),
    cc_as_string = map_chr(command_chain, ~ str_c(.x, collapse = ","))
    ) %>%
  select(-current_join)
#> # A tibble: 5 x 3
#>   emp_id command_chain cc_as_string   
#>   <chr>  <list>        <chr>          
#> 1 001    <chr [4]>     001,002,004,005
#> 2 002    <chr [3]>     002,004,005    
#> 3 003    <chr [3]>     003,004,005    
#> 4 004    <chr [2]>     004,005        
#> 5 005    <chr [1]>     005

Created on 2018-08-22 by the reprex package (v0.2.0).

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.