0

I have a dataset with id and measurements taken at time. Some measurements are taken at time0 while some others are taken at time1. That results in some missing values. I want to combine rows with time0 and time 1 since both are baseline measurements and a new dataset has time starting from 1.Basically merge time0 and time1 for each id. Cannot think of a way to do that. To show what my data looks like, here is some simulated data.

set.seed(234)

 N=3
 t<-sample(2:6,N,replace=TRUE)
 id<-c(rep(1:N,t))
 n<-length(id)
 x<-as.matrix(cbind(a=rnorm(n,0,1),b=rnorm(n,0,1),c=rnorm(n,0,1),d=rnorm(n,0,1),e=rn
orm(n,0,1)))

time<-c(rbind(as.matrix(c(1:t[1]+1)),as.matrix(c(1:t[2]+1)),as.matrix(c(1:t[3]+1))))

x1<-cbind(id,time,x)

 ######### Add missing data

x2<-rbind(x1,c(1,0,0.98,NA,NA,0.71,0.85))
x3<-rbind(x2,c(1,1,NA,0.85,0.62,NA,0.85))
x4<-rbind(x3,c(2,0,0.81,NA,NA,0.68,0.87))
x5<-rbind(x4,c(2,1,NA,0.97,0.83,NA,0.85))
x6<-rbind(x5,c(3,0,0.87,NA,NA,0.72,0.83))
x7<-rbind(x6,c(3,1,NA,0.98,0.71,NA,0.86))

# create a new dataframe with missing

  newx<-x7[order(x7[,1],x7[,2]),]

  newx
       id time          a          b           c           d            e
  [1,]  1    0  0.9800000         NA          NA  0.71000000  0.850000000
  [2,]  1    1         NA  0.8500000  0.62000000          NA  0.850000000
  [3,]  1    2  0.7590390 -0.8716028 -0.30554099 -0.30528521  0.030963334
  [4,]  1    3  0.3713058  1.1876234  0.86956546 -0.28108275  0.669563187
  [5,]  1    4  0.5758514 -0.6672287 -1.06121591 -1.16458396 -0.140668367
  [6,]  1    5 -0.5703207  0.5383396 -0.09635967  0.09034109  1.281077794
  [7,]  1    6  0.1198567  0.4905632  0.47460932  1.01451692 -0.621039707
  [8,]  2    0  0.8100000         NA          NA  0.68000000  0.870000000
  [9,]  2    1         NA  0.9700000  0.83000000          NA  0.850000000
 [10,]  2    2  0.2095484 -1.0216529 -0.02671707  0.37160636  0.160315383
 [11,]  2    3 -0.1481357 -0.3726091  1.10167492  1.70677625 -0.860442148
 [12,]  2    4  0.6433900  1.3251178 -0.26842418  0.92790039  0.318602469
 [13,]  2    5  1.1348350 -0.7313432  0.01035965  1.05747589 -1.829611181
 [14,]  2    6  0.1995994  0.7625386  0.25897152 -1.05112649 -1.121045817
 [15,]  3    0  0.8700000         NA          NA  0.72000000  0.830000000
 [16,]  3    1         NA  0.9800000  0.71000000          NA  0.860000000
 [17,]  3    2  0.2987197  0.3275333 -0.39459737  2.48875683  0.002293782
 [18,]  3    3 -0.3191671 -1.1440187 -0.48873668 -0.32581308 -0.289496481

2 Answers 2

3

I am not sure if this is what you want since you don't show expected results. This uses na.locf from package zoo to roll the measurements backwards (fromLast = TRUE) filling in an NA with the value that follows. Using package dplyr for the group_by and mutate_all which operates on dataframes

library(dplyr)
library(zoo)
newx %>% 
  data.frame() %>% 
  group_by(id) %>% 
  mutate_all(na.locf, fromLast = TRUE) %>%
  filter(time != 1) %>%
  mutate(time = if_else(time == 0, 1, time))

#       id  time          a          b           c           d            e
# 1      1     1  0.9800000  0.8500000  0.62000000  0.71000000  0.850000000
# 2      1     2  0.7590390 -0.8716028 -0.30554099 -0.30528521  0.030963334
# 3      1     3  0.3713058  1.1876234  0.86956546 -0.28108275  0.669563187
# 4      1     4  0.5758514 -0.6672287 -1.06121591 -1.16458396 -0.140668367
# 5      1     5 -0.5703207  0.5383396 -0.09635967  0.09034109  1.281077794
# 6      1     6  0.1198567  0.4905632  0.47460932  1.01451692 -0.621039707
# 7      2     1  0.8100000  0.9700000  0.83000000  0.68000000  0.870000000
# 8      2     2  0.2095484 -1.0216529 -0.02671707  0.37160636  0.160315383
# 9      2     3 -0.1481357 -0.3726091  1.10167492  1.70677625 -0.860442148
# 10     2     4  0.6433900  1.3251178 -0.26842418  0.92790039  0.318602469
# 11     2     5  1.1348350 -0.7313432  0.01035965  1.05747589 -1.829611181
# 12     2     6  0.1995994  0.7625386  0.25897152 -1.05112649 -1.121045817
# 13     3     1  0.8700000  0.9800000  0.71000000  0.72000000  0.830000000
# 14     3     2  0.2987197  0.3275333 -0.39459737  2.48875683  0.002293782
# 15     3     3 -0.3191671 -1.1440187 -0.48873668 -0.32581308 -0.289496481
Sign up to request clarification or add additional context in comments.

2 Comments

Yes. Thanks. That does it. Now I just need to get rid of time 1 for all id and rename time 0 to time 1.
edited to filter out time 1 and rename time 0 to time 1
1

We can also use data.table

library(data.table)
library(zoo)
as.data.table(newx)[time!=1, na.locf(.SD, fromLast = TRUE), by = id][time==0, time := 1][]
# id time          a          b           c           d            e
# 1:  1    1  0.9800000 -0.8716028 -0.30554099  0.71000000  0.850000000
# 2:  1    2  0.7590390 -0.8716028 -0.30554099 -0.30528521  0.030963334
# 3:  1    3  0.3713058  1.1876234  0.86956546 -0.28108275  0.669563187
# 4:  1    4  0.5758514 -0.6672287 -1.06121591 -1.16458396 -0.140668367
# 5:  1    5 -0.5703207  0.5383396 -0.09635967  0.09034109  1.281077794
# 6:  1    6  0.1198567  0.4905632  0.47460932  1.01451692 -0.621039707
# 7:  2    1  0.8100000 -1.0216529 -0.02671707  0.68000000  0.870000000
# 8:  2    2  0.2095484 -1.0216529 -0.02671707  0.37160636  0.160315383
# 9:  2    3 -0.1481357 -0.3726091  1.10167492  1.70677625 -0.860442148
#10:  2    4  0.6433900  1.3251178 -0.26842418  0.92790039  0.318602469
#11:  2    5  1.1348350 -0.7313432  0.01035965  1.05747589 -1.829611181
#12:  2    6  0.1995994  0.7625386  0.25897152 -1.05112649 -1.121045817
#13:  3    1  0.8700000  0.3275333 -0.39459737  0.72000000  0.830000000
#14:  3    2  0.2987197  0.3275333 -0.39459737  2.48875683  0.002293782
#15:  3    3 -0.3191671 -1.1440187 -0.48873668 -0.32581308 -0.289496481

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.