4

I have a group of columns for each time and I want to convert it to a lot of boolean columns (one by category) with mutate() and across() like that :

data <- data.frame(category_t1 = c("A","B","C","C","A","B"),
                   category_t2 = c("A","C","B","B","B",NA),
                   category_t3 = c("C","C",NA,"B",NA,"A"))

data %>% mutate(across(starts_with("category"), 
                       ~case_when(.x == "A" ~ TRUE, !is.na(.x) ~ FALSE),
                       .names = "{str_replace(.col, 'category', 'A')}"),
                across(starts_with("category"), 
                       ~case_when(.x == "B" ~ TRUE, !is.na(.x) ~ FALSE),
                       .names = "{str_replace(.col, 'category', 'B')}"),
                across(starts_with("category"), 
                       ~case_when(.x == "C" ~ TRUE, !is.na(.x) ~ FALSE),
                       .names = "{str_replace(.col, 'category', 'C')}"))

Which makes :

category_t1 category_t2 category_t3  A_t1  A_t2  A_t3  B_t1  B_t2  B_t3  C_t1  C_t2
1         A           A           C  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
2         B           C           C FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE
3         C           B        <NA> FALSE FALSE    NA FALSE  TRUE    NA  TRUE FALSE
4         C           B           B FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE
5         A           B        <NA>  TRUE FALSE    NA FALSE  TRUE    NA FALSE FALSE
6         B        <NA>           A FALSE    NA  TRUE  TRUE    NA FALSE FALSE    NA

It works but I would like to know if there is a better idea because here I am doing the same code 3 times instead of one big code (and imagine if I had 10 times to repeat it...). I though I could do it with map() but I didn't manage to make it work. I think there is a problem because of .names argument in across() that cannot connect with the string I use in case_when().

I think maybe there is something to do in the ... argument, like :

data %>% mutate(across(starts_with("category"),
                       ~case_when(.x == mod ~ TRUE, !is.na(.x) ~ FALSE),
                       mod = levels(as.factor(data$category_t1)),
                       .names = "{str_replace(.col, 'category', mod)}"))

But of course that doesn't work here. Do you know how to do that ?

Thanks a lot.

2
  • 1
    Don't know bout tidyverse, I would simply do for (i in LETTERS[1:3]) data[paste0(i, "_t", 1:3)] <- data == i Commented Aug 31, 2022 at 13:45
  • 1
    Love that one @DavidArenburg! Commented Aug 31, 2022 at 14:08

4 Answers 4

2

We may use table in across

library(dplyr)
library(stringr)
library(tidyr)
data %>%
   mutate(across(everything(), ~ as.data.frame.matrix(table(row_number(), .x) * 
     NA^(is.na(.x)) > 0),
    .names = "{str_remove(.col, 'category_')}")) %>% 
  unpack(where(is.data.frame), names_sep = ".")

-output

# A tibble: 6 × 12
  category_t1 category_t2 category_t3 t1.A  t1.B  t1.C  t2.A  t2.B  t2.C  t3.A  t3.B  t3.C 
  <chr>       <chr>       <chr>       <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>
1 A           A           C           TRUE  FALSE FALSE TRUE  FALSE FALSE FALSE FALSE TRUE 
2 B           C           C           FALSE TRUE  FALSE FALSE FALSE TRUE  FALSE FALSE TRUE 
3 C           B           <NA>        FALSE FALSE TRUE  FALSE TRUE  FALSE NA    NA    NA   
4 C           B           B           FALSE FALSE TRUE  FALSE TRUE  FALSE FALSE TRUE  FALSE
5 A           B           <NA>        TRUE  FALSE FALSE FALSE TRUE  FALSE NA    NA    NA   
6 B           <NA>        A           FALSE TRUE  FALSE NA    NA    NA    TRUE  FALSE FALSE

Or use model.matrix from base R

data1 <- replace(data, is.na(data), "NA")
lvls <- lapply(data1, \(x) levels(factor(x, levels = c("NA", "A", "B", "C"))))
m1 <- model.matrix(~ 0 + ., data = data1, xlev = lvls)

out <- cbind(data, m1[, -grep("NA", colnames(m1))] > 0)

-output

out
category_t1 category_t2 category_t3 category_t1A category_t1B category_t1C category_t2A category_t2B category_t2C category_t3A category_t3B category_t3C
1           A           A           C         TRUE        FALSE        FALSE         TRUE        FALSE        FALSE        FALSE        FALSE         TRUE
2           B           C           C        FALSE         TRUE        FALSE        FALSE        FALSE         TRUE        FALSE        FALSE         TRUE
3           C           B        <NA>        FALSE        FALSE         TRUE        FALSE         TRUE        FALSE        FALSE        FALSE        FALSE
4           C           B           B        FALSE        FALSE         TRUE        FALSE         TRUE        FALSE        FALSE         TRUE        FALSE
5           A           B        <NA>         TRUE        FALSE        FALSE        FALSE         TRUE        FALSE        FALSE        FALSE        FALSE
6           B        <NA>           A        FALSE         TRUE        FALSE        FALSE        FALSE        FALSE         TRUE        FALSE        FALSE
> colnames(out)
 [1] "category_t1"  "category_t2"  "category_t3" 
 [4] "category_t1A" "category_t1B" "category_t1C"
 [7] "category_t2A" "category_t2B" "category_t2C"
 [10] "category_t3A"
[11] "category_t3B" "category_t3C"


Or another option with table

cbind(data, do.call(cbind.data.frame,
  lapply(data, \(x) (table(seq_along(x), x)* NA^is.na(x)) > 0)))

-output

category_t1 category_t2 category_t3 category_t1.A category_t1.B category_t1.C category_t2.A category_t2.B category_t2.C category_t3.A category_t3.B
1           A           A           C          TRUE         FALSE         FALSE          TRUE         FALSE         FALSE         FALSE         FALSE
2           B           C           C         FALSE          TRUE         FALSE         FALSE         FALSE          TRUE         FALSE         FALSE
3           C           B        <NA>         FALSE         FALSE          TRUE         FALSE          TRUE         FALSE            NA            NA
4           C           B           B         FALSE         FALSE          TRUE         FALSE          TRUE         FALSE         FALSE          TRUE
5           A           B        <NA>          TRUE         FALSE         FALSE         FALSE          TRUE         FALSE            NA            NA
6           B        <NA>           A         FALSE          TRUE         FALSE            NA            NA            NA          TRUE         FALSE
  category_t3.C
1          TRUE
2          TRUE
3            NA
4         FALSE
5            NA
6         FALSE
Sign up to request clarification or add additional context in comments.

Comments

1

Not a tidyverse option (although pipe-compatible), it is very easily doable with package fastDummies:

fastDummies::dummy_cols(data, ignore_na = TRUE)
  category_t1 category_t2 category_t3 category_t1_A category_t1_B category_t1_C category_t2_A category_t2_B category_t2_C category_t3_A category_t3_B category_t3_C
1           A           A           C             1             0             0             1             0             0             0             0             1
2           B           C           C             0             1             0             0             0             1             0             0             1
3           C           B        <NA>             0             0             1             0             1             0            NA            NA            NA
4           C           B           B             0             0             1             0             1             0             0             1             0
5           A           B        <NA>             1             0             0             0             1             0            NA            NA            NA
6           B        <NA>           A             0             1             0            NA            NA            NA             1             0             0

Comments

1

purrr's map_dfc could match well with your current approach:

library(dplyr)
library(purrr)

bind_cols(data, 
          map_dfc(LETTERS[1:3], \(letter) { mutate(data,
                                                   across(starts_with("category"), 
                                                          ~ case_when(.x == letter ~ TRUE, !is.na(.x) ~ FALSE),
                                                   .names = paste0("{str_replace(.col, 'category', '", letter, "')}")),
                                                   .keep = "none") }
                  )
          )

Or skip the bind_cols and use .keep = ifelse(letter == "A", "all", "none").

Output:

  category_t1 category_t2 category_t3  A_t1  A_t2  A_t3  B_t1  B_t2  B_t3  C_t1  C_t2  C_t3
1           A           A           C  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
2           B           C           C FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE
3           C           B        <NA> FALSE FALSE    NA FALSE  TRUE    NA  TRUE FALSE    NA
4           C           B           B FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE
5           A           B        <NA>  TRUE FALSE    NA FALSE  TRUE    NA FALSE FALSE    NA
6           B        <NA>           A FALSE    NA  TRUE  TRUE    NA FALSE FALSE    NA FALSE

Comments

0

A base solution with nested lapply():

cbind(data, lapply(data, \(x) {
  lev <- levels(factor(x))
  sapply(setNames(lev, lev), \(y) x == y)
}))

  category_t1 category_t2 category_t3 category_t1.A category_t1.B category_t1.C category_t2.A category_t2.B category_t2.C category_t3.A category_t3.B category_t3.C
1           A           A           C          TRUE         FALSE         FALSE          TRUE         FALSE         FALSE         FALSE         FALSE          TRUE
2           B           C           C         FALSE          TRUE         FALSE         FALSE         FALSE          TRUE         FALSE         FALSE          TRUE
3           C           B        <NA>         FALSE         FALSE          TRUE         FALSE          TRUE         FALSE            NA            NA            NA
4           C           B           B         FALSE         FALSE          TRUE         FALSE          TRUE         FALSE         FALSE          TRUE         FALSE
5           A           B        <NA>          TRUE         FALSE         FALSE         FALSE          TRUE         FALSE            NA            NA            NA
6           B        <NA>           A         FALSE          TRUE         FALSE            NA            NA            NA          TRUE         FALSE         FALSE

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.