0

How do I apply my function in a pipeline.

This is my df

library(tidyverse)
library(lubridate)

status  <- c("exit", "start", "start", "exit", "start", "exit", "exit", "suspended", "start")
active_date <- c("1/05/2018", "11/10/2017", "1/05/2018", "1/07/2018", "1/07/2018", "27/09/2018", "27/09/2018", "27/09/2018", "25/10/2018")
start_date <- c("11/10/2017", "11/10/2017", "1/05/2018", "1/05/2018", "1/07/2018", "1/07/2018", "1/07/2018", "27/09/2018", "27/09/2018")
exit_date <- c("1/05/2018", NA, NA, "1/07/2018", NA, "27/09/2018", "27/09/2018", NA, NA)
suspend_start_date <- c(NA, NA, NA, NA, NA, "27/09/2018", "27/09/2018", "27/09/2018", "27/09/2018")
suspend_end_date <- c(NA, NA, NA, NA, NA, NA, "25/10/2018", NA, "25/10/2018")


df <- cbind(status, start_date, exit_date,  suspend_start_date, suspend_end_date) %>%
  as_tibble %>% mutate_at(2:5, .funs = dmy)

This is my function

find_active_date <- function(x = status,
                             exit_date, 
                             suspend_start_date,
                             suspend_end_date,
                             start_date){
  case_when(x == "exit" ~ exit_date,
            x == "suspended" ~ suspend_start_date,
            x == "start" & !is.na(suspend_end_date) ~ suspend_end_date,
            TRUE ~ start_date)  

}

The function works when I put in one piece of input at a time like this:

find_active_date(df$status[1],
                 df$exit_date[1],
                 df$suspend_start_date[1],
                 df$suspend_end_date[1],
                 df$start_date[1])

This is the desired output

output_df <- cbind(df, active_date) %>% 
              as_tibble %>% 
              mutate(active_date = dmy(active_date))

This is what I have tried which is not working

df %>%
  rowwise %>%
  mutate(active_date = find_active_date(status, 
                                        suspend_start_date, 
                                        suspend_end_date, 
                                        start_date))
0

2 Answers 2

2

We can use pmap with reduce and it wouldn't do any coercing/reconversion

library(tidyerse)
df$active_date <- pmap(df, find_active_date) %>%
                          reduce(c)
df
# A tibble: 9 x 6
#  status    start_date exit_date  suspend_start_date suspend_end_date active_date
#  <chr>     <date>     <date>     <date>             <date>           <date>     
#1 exit      2017-10-11 2018-05-01 NA                 NA               2018-05-01 
#2 start     2017-10-11 NA         NA                 NA               2017-10-11 
#3 start     2018-05-01 NA         NA                 NA               2018-05-01 
#4 exit      2018-05-01 2018-07-01 NA                 NA               2018-07-01 
#5 start     2018-07-01 NA         NA                 NA               2018-07-01 
#6 exit      2018-07-01 2018-09-27 2018-09-27         NA               2018-09-27 
#7 exit      2018-07-01 2018-09-27 2018-09-27         2018-10-25       2018-09-27 
#8 suspended 2018-09-27 NA         2018-09-27         NA               2018-09-27 
#9 start     2018-09-27 NA         2018-09-27         2018-10-25       2018-10-25 

Or using base R with Map

do.call(c, do.call(Map, c(f = find_active_date, df)))

NOTE: In the function one of the parameter is named as 'x'. So, the 'status' column should also match that parameter name.

NOTE2 : Both solutions does not require any coercsion to Date class afterwards.

Sign up to request clarification or add additional context in comments.

Comments

1

Your rowwise solutions works but you were missing exit_date

library(dplyr)

df %>%
  rowwise %>%
  mutate(active_date = find_active_date(status, 
                                        exit_date,
                                        suspend_start_date, 
                                        suspend_end_date, 
                                        start_date))


# A tibble: 9 x 6
#  status    start_date exit_date  suspend_start_date suspend_end_date active_date
#  <chr>     <date>     <date>     <date>             <date>           <date>     
#1 exit      2017-10-11 2018-05-01 NA                 NA               2018-05-01 
#2 start     2017-10-11 NA         NA                 NA               2017-10-11 
#3 start     2018-05-01 NA         NA                 NA               2018-05-01 
#4 exit      2018-05-01 2018-07-01 NA                 NA               2018-07-01 
#5 start     2018-07-01 NA         NA                 NA               2018-07-01 
#6 exit      2018-07-01 2018-09-27 2018-09-27         NA               2018-09-27 
#7 exit      2018-07-01 2018-09-27 2018-09-27         2018-10-25       2018-09-27 
#8 suspended 2018-09-27 NA         2018-09-27         NA               2018-09-27 
#9 start     2018-09-27 NA         2018-09-27         2018-10-25       2018-10-25 

Other option is to use pmap_dbl from purrr which returns date as numeric value which you can change later with as.Date.

library(dplyr)
library(purrr)

df %>%
  mutate(active_date = pmap_dbl(list(status, exit_date, suspend_start_date, 
                       suspend_end_date, start_date), find_active_date), 
         active_date = as.Date(active_date, origin = "1970-01-01"))

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.