0
# simulate data
id=c(1:30)
house_type=sample(c("3STR","2STR","1STR","DETC,1STR","2STR,DETC","OTHERS"),20, replace=TRUE)
house=data.table(id,house_type)

# I want to create a house_type2 variable in house data.table
# In the real data, there are many other house type, with 1/2/3STR embeded in these house_typ. 
# I want to extract the STR (how many story) information from the data.

house[,story:="others"][str_detect("3STR",house_type),story:="3STR"][
  str_detect("2STR",house_type),story:="2STR"][
    str_detect("1STR",house_type),story:="1STR"]

The simulated data looks really sample; in the real data, the house_type string has more variation. I managed to do it with data.table chaining syntax. But I am asking a more elegant way to do it: define a function to extract story and apply function on the house_type column.

1 Answer 1

1

You could use regular expressions to extract the number:

library(data.table)
library(stringr)
# simulate data
set.seed(123L)
id=c(1:30)
house_type=sample(c("3STR","2STR","1STR","DETC,1STR","2STR,DETC","OTHERS"),20, replace=TRUE)
house=data.table(id,house_type)
#> Warning in as.data.table.list(x, keep.rownames = keep.rownames, check.names
#> = check.names, : Item 2 has 20 rows but longest item has 30; recycled with
#> remainder.

#OP
house[,story:="others"][str_detect("3STR",house_type),story:="3STR"][
    str_detect("2STR",house_type),story:="2STR"][
        str_detect("1STR",house_type),story:="1STR"]

## Solutions
house[, story2 := as.integer(sub("\\D*(\\d+).*", "\\1", house_type))]
#> Warning in eval(jsub, SDenv, parent.frame()): NAs introduced by coercion
house[, story3 := fifelse(is.na(story2), "other", paste0(story2, "STR"))]

house
#>        id house_type  story story2 story3
#>     <int>     <char> <char>  <int> <char>
#>  1:     1       1STR   1STR      1   1STR
#>  2:     2     OTHERS others     NA  other
#>  3:     3       1STR   1STR      1   1STR
#>  4:     4       2STR   2STR      2   2STR
#>  5:     5       2STR   2STR      2   2STR
#>  6:     6     OTHERS others     NA  other
#>  7:     7       1STR   1STR      1   1STR
#>  8:     8  2STR,DETC others      2   2STR
#>  9:     9  DETC,1STR others      1   1STR
#> 10:    10     OTHERS others     NA  other
#> 11:    11     OTHERS others     NA  other
#> 12:    12       3STR   3STR      3   3STR
#> 13:    13       2STR   2STR      2   2STR
#> 14:    14       1STR   1STR      1   1STR
#> 15:    15  2STR,DETC others      2   2STR
#> 16:    16       1STR   1STR      1   1STR
#> 17:    17       1STR   1STR      1   1STR
#> 18:    18       3STR   3STR      3   3STR
#> 19:    19  DETC,1STR others      1   1STR
#> 20:    20       3STR   3STR      3   3STR
#> 21:    21       1STR   1STR      1   1STR
#> 22:    22     OTHERS others     NA  other
#> 23:    23       1STR   1STR      1   1STR
#> 24:    24       2STR   2STR      2   2STR
#> 25:    25       2STR   2STR      2   2STR
#> 26:    26     OTHERS others     NA  other
#> 27:    27       1STR   1STR      1   1STR
#> 28:    28  2STR,DETC others      2   2STR
#> 29:    29  DETC,1STR others      1   1STR
#> 30:    30     OTHERS others     NA  other
#>        id house_type  story story2 story3
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.