3

Here is my data set. I would like to add 5 new columns to mydata with 5 different conditions.

mydata=data.frame(sub=rep(c(1:4),c(3,4,5,5)),t=c(1:3,1:4,1:5,1:5),
                      y.val=c(10,20,13,
                          5,7,8,0,
                          45,17,25,12,10,
                          40,0,0,5,8))
mydata
   sub t y.val
1    1 1    10
2    1 2    20
3    1 3    13
4    2 1     5
5    2 2     7
6    2 3     8
7    2 4     0
8    3 1    45
9    3 2    17
10   3 3    25
11   3 4    12
12   3 5    10
13   4 1    40
14   4 2     0
15   4 3     0
16   4 4     5
17   4 5     8

I would like to add the following 5 (max of 't' column) columns as

mydata$It1=ifelse(mydata$t==1 & mydata$y.val>0,1,0)
mydata$It2=ifelse(mydata$t==2 & mydata$y.val>0,1,0)
mydata$It3=ifelse(mydata$t==3 & mydata$y.val>0,1,0)
mydata$It4=ifelse(mydata$t==4 & mydata$y.val>0,1,0)
mydata$It5=ifelse(mydata$t==5 & mydata$y.val>0,1,0)

Here is the expected outcome.

> mydata
   sub t y.val It1 It2 It3 It4 It5
1    1 1    10   1   0   0   0   0
2    1 2    20   0   1   0   0   0
3    1 3    13   0   0   1   0   0
4    2 1     5   1   0   0   0   0
5    2 2     7   0   1   0   0   0
6    2 3     8   0   0   1   0   0
7    2 4     0   0   0   0   0   0
8    3 1    45   1   0   0   0   0
9    3 2    17   0   1   0   0   0
10   3 3    25   0   0   1   0   0
11   3 4    12   0   0   0   1   0
12   3 5    10   0   0   0   0   1
13   4 1    40   1   0   0   0   0
14   4 2     0   0   0   0   0   0
15   4 3     0   0   0   0   0   0
16   4 4     5   0   0   0   1   0
17   4 5     8   0   0   0   0   1

I appreciate your help if it can be written as a function using for loop or any other technique.

5 Answers 5

4

You could use sapply/lapply

n <- seq_len(5)
mydata[paste0("It", n)] <- +(sapply(n, function(x) mydata$t==x & mydata$y.val>0))
mydata

#   sub t y.val It1 It2 It3 It4 It5
#1    1 1    10   1   0   0   0   0
#2    1 2    20   0   1   0   0   0
#3    1 3    13   0   0   1   0   0
#4    2 1     5   1   0   0   0   0
#5    2 2     7   0   1   0   0   0
#6    2 3     8   0   0   1   0   0
#7    2 4     0   0   0   0   0   0
#8    3 1    45   1   0   0   0   0
#9    3 2    17   0   1   0   0   0
#10   3 3    25   0   0   1   0   0
#11   3 4    12   0   0   0   1   0
#12   3 5    10   0   0   0   0   1
#13   4 1    40   1   0   0   0   0
#14   4 2     0   0   0   0   0   0
#15   4 3     0   0   0   0   0   0
#16   4 4     5   0   0   0   1   0
#17   4 5     8   0   0   0   0   1

mydata$t==x & mydata$y.val>0 returns a logical value of TRUE/FALSE based on condition. The + changes those logical values to 1/0 respectively. (Try +c(FALSE, TRUE)). It avoids using ifelse i.e ifelse(condition, 1, 0).

Sign up to request clarification or add additional context in comments.

1 Comment

@Uddin Updated the answer with some explanation.
3

Here's another approach based on multiplying a model matrix by the logical y.val > 0.

df <- cbind(mydata[1:3], model.matrix(~ factor(t) + 0, mydata)*(mydata$y.val>0))

Which gives:

   sub t y.val factor.t.1 factor.t.2 factor.t.3 factor.t.4 factor.t.5
1    1 1    10          1          0          0          0          0
2    1 2    20          0          1          0          0          0
3    1 3    13          0          0          1          0          0
4    2 1     5          1          0          0          0          0
5    2 2     7          0          1          0          0          0
6    2 3     8          0          0          1          0          0
7    2 4     0          0          0          0          0          0
8    3 1    45          1          0          0          0          0
9    3 2    17          0          1          0          0          0
10   3 3    25          0          0          1          0          0
11   3 4    12          0          0          0          1          0
12   3 5    10          0          0          0          0          1
13   4 1    40          1          0          0          0          0
14   4 2     0          0          0          0          0          0
15   4 3     0          0          0          0          0          0
16   4 4     5          0          0          0          1          0
17   4 5     8          0          0          0          0          1

To clean up the names you can do:

names(df) <- sub("factor.t.", "It", names(df), fixed = TRUE)

Comments

3

You can use sapply to compare each t for equality against 1:5 and combine this with an & of y.val>0.

within(mydata, It <- +(sapply(1:5, `==`, t) & y.val>0))
#   sub t y.val It.1 It.2 It.3 It.4 It.5
#1    1 1    10    1    0    0    0    0
#2    1 2    20    0    1    0    0    0
#3    1 3    13    0    0    1    0    0
#4    2 1     5    1    0    0    0    0
#5    2 2     7    0    1    0    0    0
#6    2 3     8    0    0    1    0    0
#7    2 4     0    0    0    0    0    0
#8    3 1    45    1    0    0    0    0
#9    3 2    17    0    1    0    0    0
#10   3 3    25    0    0    1    0    0
#11   3 4    12    0    0    0    1    0
#12   3 5    10    0    0    0    0    1
#13   4 1    40    1    0    0    0    0
#14   4 2     0    0    0    0    0    0
#15   4 3     0    0    0    0    0    0
#16   4 4     5    0    0    0    1    0
#17   4 5     8    0    0    0    0    1

Comments

2

Here's a tidyverse solution, using pivot_wider:

library(tidyverse)

mydata %>%
  mutate(new_col = paste0("It", t),
         y_test = as.integer(y.val > 0)) %>%
  pivot_wider(id_cols = c(sub, t, y.val),
              names_from = new_col,
              values_from = y_test,
              values_fill = list(y_test = 0))

     sub     t y.val   It1   It2   It3   It4   It5
   <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1     1     1    10     1     0     0     0     0
 2     1     2    20     0     1     0     0     0
 3     1     3    13     0     0     1     0     0
 4     2     1     5     1     0     0     0     0
 5     2     2     7     0     1     0     0     0
 6     2     3     8     0     0     1     0     0
 7     2     4     0     0     0     0     0     0
 8     3     1    45     1     0     0     0     0
 9     3     2    17     0     1     0     0     0
10     3     3    25     0     0     1     0     0
11     3     4    12     0     0     0     1     0
12     3     5    10     0     0     0     0     1
13     4     1    40     1     0     0     0     0
14     4     2     0     0     0     0     0     0
15     4     3     0     0     0     0     0     0
16     4     4     5     0     0     0     1     0
17     4     5     8     0     0     0     0     1

Explanation:

  • Make two columns, new_col (new column names with "It") and y_test (y.val > 0).
  • Pivot new_col values into column names.
  • Fill in the NA values with zeros.

Comments

2

One purrr and dplyr option could be:

map_dfc(.x = 1:5,
        ~ mydata %>%
         mutate(!!paste0("It", .x) := as.integer(t == .x & y.val > 0)) %>%
         select(starts_with("It"))) %>%
 bind_cols(mydata)

   It1 It2 It3 It4 It5 sub t y.val
1    1   0   0   0   0   1 1    10
2    0   1   0   0   0   1 2    20
3    0   0   1   0   0   1 3    13
4    1   0   0   0   0   2 1     5
5    0   1   0   0   0   2 2     7
6    0   0   1   0   0   2 3     8
7    0   0   0   0   0   2 4     0
8    1   0   0   0   0   3 1    45
9    0   1   0   0   0   3 2    17
10   0   0   1   0   0   3 3    25
11   0   0   0   1   0   3 4    12
12   0   0   0   0   1   3 5    10
13   1   0   0   0   0   4 1    40
14   0   0   0   0   0   4 2     0
15   0   0   0   0   0   4 3     0
16   0   0   0   1   0   4 4     5
17   0   0   0   0   1   4 5     8

Or if you want to perform it dynamically according the range in t column:

map_dfc(.x = reduce(as.list(range(mydata$t)), `:`),
        ~ mydata %>%
         mutate(!!paste0("It", .x) := as.integer(t == .x & y.val > 0)) %>%
         select(starts_with("It"))) %>%
 bind_cols(mydata)

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.