0

I am working with R.

I have a data set that looks like this...

structure(
  list(
    Condition = c(
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1",
      "1"
    ),
    category = c(
      "work",
      "work",
      "work",
      "work",
      "work",
      "people",
      "people",
      "people",
      "people",
      "people",
      "class",
      "class",
      "class",
      "class",
      "class",
      "beach",
      "beach",
      "beach",
      "beach",
      "beach",
      "park",
      "park",
      "park",
      "park",
      "park",
      "house",
      "house",
      "house",
      "house",
      "house",
      "street",
      "street",
      "street",
      "street",
      "street",
      "internet",
      "internet",
      "internet",
      "internet",
      "internet"
    ),
    Value = c(
      7.36,
      7.92,
      7.66,
      6.92,
      4.76,
      2.82,
      3.18,
      2.1,
      8.28,
      7.26,
      5.16,
      5.72,
      7.12,
      7.14,
      5.06,
      5.14,
      3.34,
      4.74,
      NA,
      NA,
      3.42,
      3.87,
      5.3,
      4.26,
      4.46,
      5.1,
      3.76,
      10.4,
      3.38,
      4.86,
      4.14,
      4.24,
      4.68,
      5.18,
      4.46,
      8.38,
      3.92,
      4.14,
      4.78,
      2.94
    )
  ),
  row.names = c(NA, -40L),
  class = c("tbl_df", "tbl",
            "data.frame")
)

So, as you can see the words in the category column repeat themself 5 times. Those "chunks" of five words are like a group that it is within the condition 1. So, I need a random sample of 4 chunks of words. That is a total of 20 observations under the value column.

I expect something like this...

Condition     category     Value 
   1             people     #
   1             people     #
   1             people     #
   1             people     ...
   1             people
   1             street
   1             street
   1             street
   1             street
   1             street
   1             park
   1             park 
   1             park
   1             park
   1             park
   1             class
   1             class
   1             class
   1             class
   1             class

Any help would be great. Thanks!

2 Answers 2

1

tidyverse

set.seed(1)
library(tidyverse)
df %>% 
  group_nest(Condition, category) %>% 
  sample_n(tbl = ., size = 4) %>% 
  unnest(data)
#> # A tibble: 20 x 3
#>    Condition category Value
#>    <chr>     <chr>    <dbl>
#>  1 1         beach     5.14
#>  2 1         beach     3.34
#>  3 1         beach     4.74
#>  4 1         beach    NA   
#>  5 1         beach    NA   
#>  6 1         internet  8.38
#>  7 1         internet  3.92
#>  8 1         internet  4.14
#>  9 1         internet  4.78
#> 10 1         internet  2.94
#> 11 1         work      7.36
#> 12 1         work      7.92
#> 13 1         work      7.66
#> 14 1         work      6.92
#> 15 1         work      4.76
#> 16 1         class     5.16
#> 17 1         class     5.72
#> 18 1         class     7.12
#> 19 1         class     7.14
#> 20 1         class     5.06

Created on 2021-06-08 by the reprex package (v2.0.0)

data.table

set.seed(1)

library(data.table)
library(magrittr)
setDT(df)[, lapply(.SD, list), by = list(Condition, category)] %>% 
  .[category %in% sample(category, 4)] %>% 
  .[, lapply(.SD, unlist)] %>% 
  .[order(Condition, category)]
#>     Condition category Value
#>  1:         1    beach  7.66
#>  2:         1    beach  3.18
#>  3:         1    beach  5.14
#>  4:         1    beach    NA
#>  5:         1    beach  4.78
#>  6:         1 internet  6.92
#>  7:         1 internet  2.10
#>  8:         1 internet  3.34
#>  9:         1 internet  8.38
#> 10:         1 internet  2.94
#> 11:         1   people  7.92
#> 12:         1   people  2.82
#> 13:         1   people  7.26
#> 14:         1   people    NA
#> 15:         1   people  4.14
#> 16:         1     work  7.36
#> 17:         1     work  4.76
#> 18:         1     work  8.28
#> 19:         1     work  4.74
#> 20:         1     work  3.92

Created on 2021-06-08 by the reprex package (v2.0.0)

Sign up to request clarification or add additional context in comments.

Comments

0

If I understand you correctly, you want

your_data |>
  split(~ category) |>
  sample(4) |>
  dplyr::bind_rows()

returning

# A tibble: 20 x 3
   Condition category Value
   <chr>     <chr>    <dbl>
 1 1         house     5.1
 2 1         house     3.76
 3 1         house    10.4
 4 1         house     3.38
 5 1         house     4.86
 6 1         class     5.16
 7 1         class     5.72
 8 1         class     7.12
 9 1         class     7.14
10 1         class     5.06
11 1         internet  8.38
12 1         internet  3.92
13 1         internet  4.14
14 1         internet  4.78
15 1         internet  2.94
16 1         work      7.36
17 1         work      7.92
18 1         work      7.66
19 1         work      6.92
20 1         work      4.76

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.