I am trying to group a continuous value data into tertile. I am using the function quantile to do this. following is my code
dd$wbc_tert = with(dd,
cut(wbc,
vTert,
include.lowest = T,
labels = c("Low", "Medium", "High")))
Isn't it supposed to give equal count of values in each group? I am getting different count in the groups.
> dd %>% filter(wbc_tert == 'High') %>% select('wbc') %>% nrow()
[1] 143
> dd %>% filter(wbc_tert == 'Low') %>% select('wbc') %>% nrow()
[1] 148
> dd %>% filter(wbc_tert == 'Medium') %>% select('wbc') %>% nrow()
[1] 139
This is the dput of the values
c(10.9, 5.4, 9.1, 7.4, 6.6, 5.5, 4.4, 6.7, 7.8, 6.7, 6.6, 8.6,
8.4, 4.8, 7, 5.2, 7, 6.7, 10.4, 7.5, 8.5, 6.8, 8.5, 9.4, 4.6,
6.8, 10.2, 6.7, 4.6, 4.9, 6.7, 8.9, 5.9, 5.9, 9.9, 4.1, 8.4,
9, 7.7, 8.2, 5.7, 8.4, 7.7, 4.6, 6.5, 7.3, 4.9, 3.8, 6.2, 7.9,
5.3, 8.9, 6, 4.8, 5.9, 5.4, 8.6, 6.1, 9.5, 5.8, 6.2, 5.8, 7.9,
9.6, 6.6, 9.6, 7, 10.1, 9, 6.9, 9.1, 6.8, 8.4, 9.6, 4.4, 10.5,
7.9, 5.6, 5.1, 6.6, 6.5, 12.7, 5.3, 7.7, 4.8, 4.7, 6.1, 4.3,
6.1, 11.6, 5.9, 7.4, 5.7, 4.7, 4.8, 8.5, 5.6, 7.9, 9.1, 7.8,
5.3, 5, 8.1, 8.3, 4.7, 5.4, 7.6, 7.2, 5.7, 7.9, 7.9, 6.4, 3.8,
4.7, 6.2, 5, 7.6, 5.8, 5.4, 4.3, 6, 4.7, 6, 6.1, 5.8, 5.6, 4.7,
5, 11.5, 6.3, 4.4, 6.8, 6.6, 6.8, 6.1, 4.8, 5.4, 5.8, 5.2, 7.1,
5.4, 9.1, 6.9, 5.4, 8.5, 5.3, 7.3, 6.9, 9, 6.3, 8.4, 7.8, 5.7,
6.4, 5.3, 9.6, 6.4, 9.9, 8.9, 7.7, 6.2, 7.2, 4.6, 5.4, 4.6, 11.2,
3.1, 12.3, 5.9, 11.1, 6.2, 6.6, 4.1, 7.4, 9.4, 4.1, 6.7, 6.7,
6.1, 6.3, 5.6, NA, 3.7, 6.8, 6.7, 6.4, 7.3, 5.7, 6.7, 6.9, 5.7,
5.3, 4, 5.6, 4.8, 5.5, 6, 6.6, 3.6, 5.6, 8.9, 6.3, 5.8, 8.2,
8.6, 8.5, 5.7, 8.6, 6, 5.1, 5.7, 8.2, 5.4, 6.9, 6.9, 8.3, 9.5,
5.4, 10.2, 8.8, 7.2, 4.8, 9.8, 4.6, 6.3, 5.8, 4.9, 12.7, 7.5,
10.6, 9.3, 5.5, 10.7, 6.2, 9.3, 8.3, 7.8, 8.05, 9.57, 6.62, 6.21,
5.34, 6.11, 10.37, 4.45, 5.55, 8.05, 8.31, 5.06, 6.05, 4.76,
9.09, 9.11, 9.04, 6.99, 6.33, 9.47, 6.48, 4.46, 9.44, 6.88, 7.09,
5.75, 10.89, 6.68, 3.64, 6.55, 8.69, 5.89, 9.05, 6.38, 11.62,
9.11, 9.22, 7.97, 9.64, 12.76, 8.39, 6.57, 8.1, 7.3, 10.1, 4.7,
6.4, 7.2, 5.5, 3.7, 5.1, 9.8, 7.6, 7.7, 6, 3.9, 6.8, 5.4, 5.4,
9.7, 9, 6, 7.3, 6.3, 5.8, 8.3, 7, 4.1, 11.2, 5, 7.6, 6.5, 4.8,
8, 10.1, 7.1, 7.4, 4.3, 4, 10.12, 4.3, 7.26, 8.84, 8.44, 8.44,
8.12, 6.5, 8.58, 8.55, 8.82, 4.53, 9.51, 4.93, 4.42, 4.69, 8.69,
5.77, 3.37, 6.58, 3.72, 3.09, 7.13, 8.11, 7.2, 12.18, 6.52, 7.91,
5.69, 8.24, 7.67, 5.69, 4.85, 7.03, 4.16, 3.57, 8.1, 4.61, 5.98,
5.13, 7.68, 5.47, 5.54, 4.59, 6, 11.62, 7.38, 7.06, 8.74, 8.02,
6.73, 7.19, 6.36, 4.86, 6.55, 8.4, 7.76, 4.73, 4.8, 5.73, 8.53,
4.6, 7.96, 9.48, 6.59, 5.75, 6.61, 6.49, 7.91, 6.92, 7.14, 6.24,
12.53, 7.03, 4.73, 8.05, 7.26, 4.07, 6.7, 5.7, 7.39, 5.2, 6.61,
6.8, 6.77, 5.65, 6.08, 7.24, 6.13, 7.92, 7.37, 7.99, 3.31, 9.72,
8.71, 8.35, 5.05, 8.15, 5.1, 5.4, 8.8, 4.9, 5, 7.43, 10.3, 6.3,
9.5, 6.9, 6.7, 5.4, 7.7, 8, 6.5, 5.6, 9.7)
Can someone please help what could be the reason
> dd %>% filter(wbc_tert == '5.9-7.7') %>% select('wbc') %>% unique() %>% nrow() [1] 56> dd %>% filter(wbc_tert == '3.1-5.9') %>% select('wbc') %>% unique() %>% nrow() [1] 56> dd %>% filter(wbc_tert == '7.7-12.8') %>% select('wbc') %>% unique() %>% nrow() [1] 78