3

I have a large dataframe which is effectively combined output from a nested list using do.call(rbind, nested_list)

The output has the same number of rows for each list element (e.g. 5 rows per list) and I need to add a column which has a unique numeric code for each list (or group). How can I write a loop to reproduce the group column I have included in the example below, e.g. the five rows have a group value == 1, rows 6 to 10 have a group value == 2, rows 11 to 15 have a group value == 3

df <- data.frame("ID" = 1:15)
df$Var_A <- c(1,3,5,7,9,11,13,15,17,19,21,23,25,27,29)
df$Var_B <- c(10,0,0,0,12,12,12,12,0,14,NA_real_,14,16,16,16)
df$Var_C <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$Var_D <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$New_A <- c(2,5,5,8,11,14,15,17,20,21,22,23,25,25,27)
df$New_B <- c(10,0,0,0,12,12,12,12,0,14,NA_real_,14,16,16,16)
df$New_C <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$New_D <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$Group <- c(1,1,1,1,1,2,2,2,2,2,3,3,3,3,3)

   ID Var_A Var_B Var_C Var_D New_A New_B New_C New_D Group
1   1     1    10    10    10     2    10    10    10     1
2   2     3     0    12    12     5     0    12    12     1
3   3     5     0    14    14     5     0    14    14     1
4   4     7     0    16    16     8     0    16    16     1
5   5     9    12    10    10    11    12    10    10     1
6   6    11    12    12    12    14    12    12    12     2
7   7    13    12    14    14    15    12    14    14     2
8   8    15    12    16    16    17    12    16    16     2
9   9    17     0    10    10    20     0    10    10     2
10 10    19    14    12    12    21    14    12    12     2
11 11    21    NA    14    14    22    NA    14    14     3
12 12    23    14    16    16    23    14    16    16     3
13 13    25    16    10    10    25    16    10    10     3
14 14    27    16    12    12    25    16    12    12     3
15 15    29    16    14    14    27    16    14    14     3
1
  • 2
    Do you mean df$Group <- rep(1:3, each = 5)? Commented Dec 1, 2021 at 4:03

5 Answers 5

3

You can use the ceiling function:

df <- data.frame("ID" = 1:15)
df$Var_A <- c(1,3,5,7,9,11,13,15,17,19,21,23,25,27,29)
df$Var_B <- c(10,0,0,0,12,12,12,12,0,14,NA_real_,14,16,16,16)
df$Var_C <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$Var_D <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$New_A <- c(2,5,5,8,11,14,15,17,20,21,22,23,25,25,27)
df$New_B <- c(10,0,0,0,12,12,12,12,0,14,NA_real_,14,16,16,16)
df$New_C <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$New_D <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$Group <- ceiling(as.numeric(df$ID)/5)

df
#    ID Var_A Var_B Var_C Var_D New_A New_B New_C New_D Group
# 1   1     1    10    10    10     2    10    10    10     1
# 2   2     3     0    12    12     5     0    12    12     1
# 3   3     5     0    14    14     5     0    14    14     1
# 4   4     7     0    16    16     8     0    16    16     1
# 5   5     9    12    10    10    11    12    10    10     1
# 6   6    11    12    12    12    14    12    12    12     2
# 7   7    13    12    14    14    15    12    14    14     2
# 8   8    15    12    16    16    17    12    16    16     2
# 9   9    17     0    10    10    20     0    10    10     2
# 10 10    19    14    12    12    21    14    12    12     2
# 11 11    21    NA    14    14    22    NA    14    14     3
# 12 12    23    14    16    16    23    14    16    16     3
# 13 13    25    16    10    10    25    16    10    10     3
# 14 14    27    16    12    12    25    16    12    12     3
# 15 15    29    16    14    14    27    16    14    14     3
Sign up to request clarification or add additional context in comments.

Comments

2

Without adding an ID or rownums we can do this using nrow and knowledge of the group length.

group_len <- 5
groups <- nrow(df)/group_len
df$group <- rep(1:groups, each = group_len)

# Example: 
# rep(1:3, each = 5)
# 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3

Comments

1

I'd use dplyr::mutate with dplyr::row_number:

library(dplyr)

df %>% 
   mutate(Group=ceiling(row_number() / 5))

Output:

   ID Var_A Var_B Var_C Var_D New_A New_B New_C New_D Group
1   1     1    10    10    10     2    10    10    10     1
2   2     3     0    12    12     5     0    12    12     1
3   3     5     0    14    14     5     0    14    14     1
4   4     7     0    16    16     8     0    16    16     1
5   5     9    12    10    10    11    12    10    10     1
6   6    11    12    12    12    14    12    12    12     2
7   7    13    12    14    14    15    12    14    14     2
8   8    15    12    16    16    17    12    16    16     2
9   9    17     0    10    10    20     0    10    10     2
10 10    19    14    12    12    21    14    12    12     2
11 11    21    NA    14    14    22    NA    14    14     3
12 12    23    14    16    16    23    14    16    16     3
13 13    25    16    10    10    25    16    10    10     3
14 14    27    16    12    12    25    16    12    12     3
15 15    29    16    14    14    27    16    14    14     3

Comments

1

An option would be to combine cumsum with rep.

cumsum(rep_len(c(TRUE, rep(FALSE, 4)), nrow(df)))
#cumsum(rep_len(c(TRUE, FALSE, FALSE, FALSE, FALSE), nrow(df))) #Alternative
# [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3

Or making use of auto repeat.

df$Group <- c(TRUE, rep(FALSE, 4))
df$Group <- cumsum(df$Group)
df$Group
# [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3

Or create a sequence with length of nrow and make an integer division %/%.

0:(nrow(df)-1) %/% 5
#seq(0, nrow(df)-1) %/% 5    #Alternative
#(seq_len(nrow(df))-1) %/% 5 #Alternative
# [1] 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2

Or using rep:

rep(1:ceiling(nrow(df)/5), each=5, length.out=nrow(df))
# [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3

Comments

1

You could use the cut function with labels = FALSE to return an integer to use for the group.

n_per_group <- 5
df$group <- cut(x = df$ID, breaks = nrow(df) / n_per_group, labels = FALSE)
df$group 
#[1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.