0

Hi I want to draw an error bar on a bar plot. The problem is that since my dataframe is in a wide format I can't manage to plot it. The data frame is shown below:

structure(list(Insured_Age_Group = c(1, 2, 3, 4, 5, 6, 7), Policy_Status = c("Issuance", 
"Issuance", "Issuance", "Issuance", "Issuance", "Issuance", "Issuance"
), Deposit_mean = c(3859543.73892798, 4013324.11384503, 3970469.37408863, 
4405204.3601121, 4379252.01763646, 3816234.23370925, 3342252.39385489
), Deposit_n = c(31046L, 20039L, 20399L, 48677L, 30045L, 13947L, 
3157L), Deposit_sd = c(2816342.35213949, 3016203.31909278, 3292567.51598225, 
4345771.64693777, 4260381.02418456, 4748349.50958046, 4033440.60986956
), se_Deposit = c(31328.4343156912, 41761.74740604, 45184.1713046368, 
38606.556913894, 48174.6323355127, 78805.8303265365, 140700.113248691
), Insurance_mean = c(1962975.48419977, 2003323.06714903, 2665058.97077804, 
3033051.58298144, 3579542.94373979, 4338039.6868955, 4806849.35326484
), Insurance_n = c(31046L, 20039L, 20399L, 48677L, 30045L, 13947L, 
3157L), Insurance_sd = c(1187550.43329336, 1065410.12671512, 
1840293.78284101, 2248320.36787743, 2642040.82537531, 3128969.83541335, 
3030600.81901732), se_Insurance = c(13210.075727384, 14751.455352518, 
25254.5009726065, 19973.4167588603, 29875.1085068105, 51929.8475078389, 
105717.653906674)), row.names = c(NA, -7L), groups = structure(list(
    Insured_Age_Group = c(1, 2, 3, 4, 5, 6, 7), .rows = structure(list(
        1L, 2L, 3L, 4L, 5L, 6L, 7L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, 7L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

  Insured_Age_Group Policy_Status Deposit_mean Deposit_n Deposit_sd se_Deposit Insurance_mean Insurance_n Insurance_sd se_Insurance
              <dbl> <chr>                <dbl>     <int>      <dbl>      <dbl>          <dbl>       <int>        <dbl>        <dbl>
1                 1 Issuance          3859544.     31046   2816342.     31328.       1962975.       31046     1187550.       13210.
2                 2 Issuance          4013324.     20039   3016203.     41762.       2003323.       20039     1065410.       14751.
3                 3 Issuance          3970469.     20399   3292568.     45184.       2665059.       20399     1840294.       25255.
4                 4 Issuance          4405204.     48677   4345772.     38607.       3033052.       48677     2248320.       19973.
5                 5 Issuance          4379252.     30045   4260381.     48175.       3579543.       30045     2642041.       29875.
6                 6 Issuance          3816234.     13947   4748350.     78806.       4338040.       13947     3128970.       51930.
7                 7 Issuance          3342252.      3157   4033441.    140700.       4806849.        3157     3030601.      105718.

As can be seen, for each value of Deposit_mean and Insurance_mean, I have calculated se_Deposit and se_Insurance (Standard error). I have plotted the plot shown below for the mean values:

graph without error bar I know how I can add error bar using geom_errorbar, However, I am not sure how I can add a corresponding se value for each of these bar plots as they are in a wide format. So basically, somehow I have to change the wide formate to long format in such a way that in front of each calculated deposit_mean and insurance_mean I have it's corresponding standard error

Any help or suggestion?

1 Answer 1

2

I think I would reshape the data by pivoting to long format, then pivoting back to a different wide format:

library(dplyr)
library(tidyr)
library(ggplot2)

df2 <- df %>% 
  rename(Insurance_se = se_Insurance, Deposit_se = se_Deposit) %>% 
  pivot_longer(-c(1:2), names_sep = "_", names_to = c("type", "metric")) %>% 
  pivot_wider(names_from = metric, values_from = value)

This gives you data in the following format:

df2
#> # A tibble: 14 x 7
#> # Groups:   Insured_Age_Group [7]
#>    Insured_Age_Group Policy_Status type          mean     n       sd      se
#>                <dbl> <chr>         <chr>        <dbl> <dbl>    <dbl>   <dbl>
#>  1                 1 Issuance      Deposit   3859544. 31046 2816342.  31328.
#>  2                 1 Issuance      Insurance 1962975. 31046 1187550.  13210.
#>  3                 2 Issuance      Deposit   4013324. 20039 3016203.  41762.
#>  4                 2 Issuance      Insurance 2003323. 20039 1065410.  14751.
#>  5                 3 Issuance      Deposit   3970469. 20399 3292568.  45184.
#>  6                 3 Issuance      Insurance 2665059. 20399 1840294.  25255.
#>  7                 4 Issuance      Deposit   4405204. 48677 4345772.  38607.
#>  8                 4 Issuance      Insurance 3033052. 48677 2248320.  19973.
#>  9                 5 Issuance      Deposit   4379252. 30045 4260381.  48175.
#> 10                 5 Issuance      Insurance 3579543. 30045 2642041.  29875.
#> 11                 6 Issuance      Deposit   3816234. 13947 4748350.  78806.
#> 12                 6 Issuance      Insurance 4338040. 13947 3128970.  51930.
#> 13                 7 Issuance      Deposit   3342252.  3157 4033441. 140700.
#> 14                 7 Issuance      Insurance 4806849.  3157 3030601. 105718.

You can then add your error bars and whichever stylistic tweaks you desire:

ggplot(df2, aes(factor(Insured_Age_Group), mean, fill = type)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.6) +
  geom_errorbar(aes(ymin = mean - 1.96*se, ymax = mean + 1.96*se),
                width = 0.4,
                position = position_dodge(width = 0.8), size = 1) +
  geom_text(aes(label = scales::dollar(mean), y = mean/2), hjust = 0.5, 
            angle = 90, position = position_dodge(width = 0.8)) +
  scale_y_continuous(labels = scales::dollar) +
  scale_fill_brewer(palette = "Greens") +
  labs(x = "Insured Age Group",
       y = "Premium value",
       title = paste("Mean value for Deposit and Insurance Annual Premium",
                     "for Issuance Group", sep = "\n")) +
  theme_bw() +
  theme(panel.border = element_blank(),
        axis.line = element_line(),
        plot.title = element_text(size = 18, hjust = 0.5))

enter image description here

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.