I'm trying to plot a bar chart with an overlayed line graph measured on another y -axis.
I know this isn't necessarily the way you're supposed to do this, but this is what the customer wants.
I've got most of the way there but I'm having trouble finishing it off.
Here's the dataframe I'm working with:
age var1 var2 count
1 18 0.6553120 0.05554554 100.00
2 19 0.4869745 0.65048383 50.00
3 20 0.2142351 0.98546390 25.00
4 21 0.3888707 0.04171436 12.50
5 22 0.2707270 0.13346492 6.25
6 23 0.7637131 0.21246432 3.12
I then reshape it into long format:
age count composite_col new_val
1 18 50.000 var1 0.65531200
2 19 25.000 var1 0.48697446
3 20 12.500 var1 0.21423509
4 21 6.250 var1 0.38887071
5 22 3.125 var1 0.27072698
6 23 1.560 var1 0.76371308
7 18 50.000 var2 0.05554554
8 19 25.000 var2 0.65048383
9 20 12.500 var2 0.98546390
10 21 6.250 var2 0.04171436
11 22 3.125 var2 0.13346492
12 23 1.560 var2 0.21246432
# mutate the counts so that we don't double count
df4 = df4 %>% mutate(count = count/(length(unique(composite_col))))
and then plot it with this monstrosity:
ggplot(data = df4) + theme_classic() + geom_col( aes(x=age, y=count),width=1.0, fill="red", colour="black") +
geom_line( aes(x=age, y=new_val*100, colour=composite_col) ) +
scale_x_continuous( breaks = seq(18, 23,by = 1), expand = c(0,0)) +
scale_y_continuous("count", expand=c(0,0), sec.axis = sec_axis(~./100, name = "price", breaks = seq(0,2,by = 0.1) ))
Unfortunately it plots the counts of the age bins in a stacked fashion
full code here:
library(tidyr)
library(ggplot2)
set.seed(1911)
df2 = data.frame(age = c(18,19,20,21,22,23), var1 = runif(6,0,1), var2 = runif(6,0,1), count = c(100,50,25,12.5,6.25,3.12) )
df2
df4 = gather(df2, "composite_col","new_val",c(var1,var2))
df4
# mutate the counts so that we don't double count
df4 = df4 %>% mutate(count = count/(length(unique(composite_col))))
ggplot(data = df4) + theme_classic() + geom_col( aes(x=age, y=count),width=1.0, fill="red", colour="black") +
geom_line( aes(x=age, y=new_val*100, colour=composite_col) ) +
scale_x_continuous( breaks = seq(18, 23,by = 1), expand = c(0,0)) +
scale_y_continuous("count", expand=c(0,0), sec.axis = sec_axis(~./100, name = "price", breaks = seq(0,2,by = 0.1) ))


geom_col(..., position='dodge').