2

Sample data here

dat <- structure(list(UserEmail = c("[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]"), State = c("NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW"), date = structure(c(14853, 14883, 14975, 14975, 15006, 
15006, 15006, 15006, 15065, 15095, 15156, 15156, 15187, 15218, 
15248, 15309, 15309, 15340, 15340, 15340, 15371, 15371, 15431, 
15706, 15706, 15765, 15857, 15918, 16010, 16191, 16222, 16252, 
16283, 16344, 16375, 16375, 16375, 16436, 16526, 16617, 16617, 
16648, 16648, 16709, 16709, 16709, 16709, 16709, 16709, 16770, 
16770, 16770, 16770, 16770, 16801, 16801, 16832, 16832, 16832, 
16832, 16861, 16861, 16861, 16861, 16861, 16861, 16861, 16861, 
16892, 16922, 16922, 16922, 16953, 16953, 16953, 16953, 16953, 
16953, 16953, 16953, 16953, 16953, 16953, 16953, 16983, 16983, 
16983, 16983, 16983, 16983, 16983, 17014, 17014, 17014, 17014, 
17014, 17014, 17014, 17014, 17014, 17045, 17045, 17045, 17045, 
17045, 17045, 17045, 17045, 17045, 17045, 17045, 17045, 17045, 
17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075, 
17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075, 
17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075, 
17075, 17075, 17075, 17075, 17075, 17075, 17075, 17106, 17106, 
17106, 17106, 17106, 17136, 17136, 17136, 17167, 17167, 17167, 
17167, 17167, 17167, 17167, 17198, 17198, 17198, 17198, 17198, 
17198, 17198, 17198, 17318, 17318, 17318, 17318, 17348, 17348, 
17348, 17379, 17379, 17379, 17410, 17410, 17440, 17440, 17440, 
17440, 17440, 17440, 17440, 17440, 17440, 17440, 17440, 17440, 
17440, 17440, 17440, 17440, 17440, 17440), class = "Date"), rhdv = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 
1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1), pindone = c(0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 
0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 
0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 
0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0)), row.names = c(NA, -200L), groups = structure(list(
    UserEmail = c("[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]"
    ), .rows = structure(list(155L, 72L, 171:172, 66L, 30L, 174L, 
        c(1L, 13L), 3L, 6L, 22L, 96L, 40L, 173L, 26L, 97L, 70L, 
        c(164L, 167L, 168L), 57:59, 2L, 25L, c(44L, 45L, 46L, 
        47L, 48L, 49L, 50L, 51L, 54L, 55L, 71L, 84L, 87L, 89L, 
        90L, 91L, 94L, 95L, 102L, 105L, 110L, 111L, 112L, 113L, 
        149L, 157L, 158L, 159L, 175L, 179L), 61L, 56L, c(15L, 
        16L, 17L, 18L, 19L, 21L), 98L, 115:147, 4L, 60L, 8L, 
        24L, 154L, 177L, 5L, 34L, 38L, 176L, 28L, 150L, 148L, 
        14L, 35:37, 183L, 12L, 7L, c(153L, 156L), 62:65, 23L, 
        c(178L, 182L), 33L, 103L, 32L, 43L, c(41L, 67L, 68L), 
        108:109, 11L, 180L, 165L, 73:83, 69L, 9L, c(181L, 190L, 
        191L, 192L, 193L, 194L, 195L, 196L, 197L, 198L, 199L, 
        200L), 29L, 31L, c(85L, 86L, 88L, 93L, 101L, 104L, 106L, 
        107L, 114L, 151L, 160L, 161L, 162L, 163L, 166L, 170L), 
        184:189, 100L, 169L, 52L, 10L, 27L, 99L, 20L, 152L, c(42L, 
        53L, 92L), 39L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, 75L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

Question: For each dat$rhdv record within unique groupings of dat$UserEmail I would like to identify all instances within a 12 month window either side of the date of the dat$rhdv record where dat$pindone == 1. The solution I'm looking for is a new column appended to dat, dat$pindone12, which indicates 0 if the above is false or 1 if the above is true (i.e if true then there should be a dat$pindone == 1 record occurring within 12 months before or after a dat$rhdv == 1 record for that specific dat$UserEmail).

I have been through may iterations of attempted solutions without any success to date. I suspect the beginnings of the solution are

dat$pindone12 <- dat %>% group_by(UserEmail) %>% ifelse(rhdv == 1 & *condition to refer to required date range across any rows* & pindone == 1, 1, 0) 

2 Answers 2

3

It's probably easier to break it down into steps, i.e. work out the 'rhdv - 12mo' and 'rhdv + 12mo' then check whether the pindone's fall within the timeframe. Not sure if this solution will work on your actual data, but perhaps:

library(tidyverse)
library(lubridate)

dat2 <- dat %>%
  group_by(UserEmail) %>%
  mutate(Start_date = ifelse(rhdv == 1, date - years(1), NA),
         End_date = ifelse(rhdv == 1, date + years(1), NA)) %>%
  mutate(Start_date = as.Date(Start_date, origin = "1970-01-01"),
         End_date = as.Date(End_date, origin = "1970-01-01")) %>%
  fill(c(Start_date, End_date), .direction = "downup") %>%
  mutate(pindone12 = ifelse(date > Start_date &
                              date < End_date &
                              pindone == 1,
                            1, 0))
dat2 %>%
  arrange(desc(pindone12))
#> # A tibble: 200 × 8
#> # Groups:   UserEmail [75]
#>    UserEmail      State date        rhdv pindone Start_date End_date   pindone12
#>    <chr>          <chr> <date>     <dbl>   <dbl> <date>     <date>         <dbl>
#>  1 samuel.porter… NSW   2016-06-01     0       1 2015-06-01 2017-06-01         1
#>  2 samuel.porter… NSW   2016-06-01     1       1 2015-06-01 2017-06-01         1
#>  3 samuel.porter… NSW   2016-06-01     0       1 2015-06-01 2017-06-01         1
#>  4 samuel.porter… NSW   2016-06-01     0       1 2015-06-01 2017-06-01         1
#>  5 samuel.porter… NSW   2016-06-01     0       1 2015-06-01 2017-06-01         1
#>  6 samuel.porter… NSW   2016-06-01     0       1 2015-06-01 2017-06-01         1
#>  7 benradic@gmai… NSW   2010-09-01     0       0 NA         NA                 0
#>  8 gettrapped@op… NSW   2010-10-01     0       0 NA         NA                 0
#>  9 bradley.grove… NSW   2011-01-01     0       0 NA         NA                 0
#> 10 jimt@tadaust.… NSW   2011-01-01     0       0 NA         NA                 0
#> # … with 190 more rows

Created on 2022-06-21 by the reprex package (v2.0.1)

This shows a single UserEmail with 6 pindone12 records; is this what you're expecting to see?

Sign up to request clarification or add additional context in comments.

Comments

2

jared_mamrot solution looks good. I read your directions as: within each group of UserEmail, if a record has rhdv ==1, find records within a year of that date and if any of those other records have pindone ==1, that particular record gets pindone12 = 1, otherwise 0. I think jared's solution is giving you pindone12 = 1 if pindone for that record == 1. Also, his solution is giving pindon12 = 1 for records with rhdv == 0. If that's what you want, great. Based on my understanding:

library(purrr)
library(dplyr)
# Group and nest data
dat %>% 
  group_by(UserEmail) %>% 
  nest() %>% 
  # pmap to retain UserEmail column
  purrr::pmap_dfr( function(UserEmail, data){
    data %>% 
      # tmp data to index within each group
      {. -> tmp
        lapply(1:nrow(tmp), function(i){
          # If more than one record, otherwise pindone12 is 0 because no other records with pindone == 1
          if(nrow(tmp) > 1) {
            tmp %>% 
              # For each record get other records within one year
              filter(date <= (date[i] +365) & date >= (date[i] - 365)) %>% 
              # Find other records that have pindone == 1 and that record has rhdv == 1
              dplyr::mutate(pindone12 = ifelse(any(pindone[-i] == 1 & rhdv[i] == 1), 1, 0),
                            email = UserEmail) %>% 
              # Retain that one record
              dplyr::slice(i)
          } else {
            tmp %>% 
              mutate(pindone12 = 0,
                     email = UserEmail)
          }
        })
      } %>% 
      bind_rows
  })

When I then filter by those records with pindone12 == 1

... %>% filter(pindone12 == 1)
# A tibble: 6 × 6
  State date        rhdv pindone pindone12 email                             
  <chr> <date>     <dbl>   <dbl>     <dbl> <chr>                             
1 NSW   2016-06-01     1       1         1 [email protected]…
2 NSW   2016-06-01     1       0         1 [email protected]…
3 NSW   2016-06-01     1       0         1 [email protected]…
4 NSW   2016-06-01     1       0         1 [email protected]…
5 NSW   2016-06-01     1       0         1 [email protected]…
6 NSW   2016-06-01     1       0         1 [email protected]

3 Comments

Thanks. dat$pindone12 == 1 can occur when dat$rhdv == 0 (i.e in same row of data) as long as the dat$date associated with the dat$pindone == 1 record is within 12 months (forward or backward) of an dat$rhdv == 1 record for the same dat$UserEmail.
It sounds like that's what I'm doing...? but if not you can adjust the mutate or filter.
For this particular subset of data, we should have dat$pindone12 == 1 for all instances where dat$pindone == 1 and dat$UserEmail == "samuel.porter". This is because all of the dat$UserEmail == "samuel.porter" records are submitted on the same date, 2016-06-01, and we have dat$rhdv == 1 records on this date, so therefore all dat$pindone == 1 records are within 12 months of a dat$rhdv == 1 record.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.