I wanted to set the length of the boxplot whiskers to be the median of the data +/- 1.96*standard deviation (aka the 95% distribution of the data). I did this by calculating the boxplot statistics using aggregate and setting those to be the minimum, lower quartile, median, etc. How can I set the boxplot width to vary so that it is proportional to the square root of the number of observations (like ggplot does with varwidth = TRUE)? Anything I currently try (setting weight, width) varies the width of all of the categories equally. Thank you.
rm(list = ls())
library(ggplot2)
set.seed(1)
residuals <- runif(n=1000, min=-3, max=3)
category <- c('A','A','A','B','B','C','D','E','E','F')
df1 <- data.frame(category,residuals)
boxplot_stats <- aggregate(residuals ~ category, df1, function(x) {
median_val = median(x)
z_score = 1.96
min_quantile = median_val - z_score * sd(x)
lower_quantile = quantile(x, c(0.25))
upper_quantile = quantile(x, c(0.75))
max_quantile = median_val + z_score * sd(x)
n_obs_sqrt = sqrt(length(x))
c(min_quantile, lower_quantile, median_val, upper_quantile, max_quantile, n_obs_sqrt)
})
custom_boxplot <- ggplot(boxplot_stats, aes(x=category))+
geom_boxplot(aes(ymin = residuals[, 1], lower = residuals[, 2], middle = residuals[, 3], upper = residuals[, 4], ymax = residuals[, 5]), stat = "identity", color = "black",fill="lightblue") +
labs(title="boxplot",x="Category",y="Residuals") +
theme_bw()
print(custom_boxplot)

