I have the following data frame of data about US counties grouped by their income decile and who won the 2016 election:
# A tibble: 1,188 x 5
# Groups: day_month_year, deciles_income [270]
day_month_year deciles_income winner2016 key mean_spend_cases
<date> <int> <chr> <chr> <dbl>
1 2020-01-12 1 Donald Trump mean_spend_all 0.00108
2 2020-01-12 1 Hillary Clinton mean_spend_all 0.0196
3 2020-01-12 2 Donald Trump mean_spend_all -0.000334
4 2020-01-12 2 Hillary Clinton mean_spend_all 0.00664
5 2020-01-12 3 Donald Trump mean_spend_all 0.00807
6 2020-01-12 3 Hillary Clinton mean_spend_all 0.0257
7 2020-01-12 4 Donald Trump mean_spend_all -0.00491
8 2020-01-12 4 Hillary Clinton mean_spend_all -0.0119
9 2020-01-12 5 Donald Trump mean_spend_all 0.000497
10 2020-01-12 5 Hillary Clinton mean_spend_all 0.00001
# … with 1,178 more rows
In the key column, I have melted the variables of mean_spend_all and new_case_rate_07da. I am trying to create a data plot which would show two lines for the development in new cases with time on x-axis (each line having a different color based on whether the winner is Trump or Clinton), and points for the change in spending (the color, again, being a function of the winner2016 column).
I am then making a facet wrap so that I have ten graphs based on the income of the counties' residents. Finally, I would like to display a line of best fit for the change in spending for which I am using the stat_smooth() function.
Ideally, the graph would look similar to this but with added lines for the case rate:
ggplot(data = group_by(afc, winner2016),
aes(x = afc$day_month_year)) +
geom_point(aes(color = winner2016, y = filter(afc, key == "mean_spend_all")$mean_spend_cases *100)) +
geom_line(aes(color = winner2016, y = filter(afc, key == "new_case_rate_07da")$mean_spend_cases)) +
facet_wrap(afc$deciles_income)+
labs(title = "Change in spending for counties grouped by decile of income",
x = "Decile of a County by income",
y = "Change in consumer spending relative to January 14")+
stat_smooth(aes(color = (afc$winner2016))) +
scale_y_continuous(limits = c(-30,15))
However, I am getting the error "Aesthetics must be either length 1 or the same as the data (1188): y" which I assume is because of using filter().
This is the structure:
structure(list(day_month_year = structure(c(18301, 18434, 18406,
18301, 18287, 18406, 18350, 18399, 18329, 18308, 18343, 18413,
18308, 18434, 18280, 18273, 18371, 18434, 18273, 18448, 18287,
18434, 18350, 18343, 18427, 18273, 18399, 18273, 18294, 18427
), tzone = "Europe/Prague", class = "Date"), deciles_income = c(9L,
5L, 4L, 6L, 8L, 8L, 2L, 10L, 8L, 2L, 1L, 4L, 8L, 2L, 7L, 6L,
5L, 9L, 8L, 3L, 5L, 8L, 8L, 8L, 9L, 7L, 9L, 6L, 9L, 8L), winner2016 = c("Hillary Clinton",
"Hillary Clinton", "Hillary Clinton", "Donald Trump", "Donald Trump",
"Hillary Clinton", "Donald Trump", "Donald Trump", "Hillary Clinton",
"Donald Trump", "Donald Trump", "Donald Trump", "Donald Trump",
"Hillary Clinton", "Hillary Clinton", "Hillary Clinton", "Hillary Clinton",
"Hillary Clinton", "Hillary Clinton", "Hillary Clinton", "Donald Trump",
"Donald Trump", "Hillary Clinton", "Donald Trump", NA, "Donald Trump",
"Donald Trump", "Donald Trump", NA, "Hillary Clinton"), key = c("new_case_rate_07da",
"new_case_rate_07da", "mean_spend_all", "new_case_rate_07da",
"mean_spend_all", "new_case_rate_07da", "new_case_rate_07da",
"new_case_rate_07da", "new_case_rate_07da", "mean_spend_all",
"mean_spend_all", "new_case_rate_07da", "mean_spend_all", "new_case_rate_07da",
"new_case_rate_07da", "mean_spend_all", "new_case_rate_07da",
"new_case_rate_07da", "new_case_rate_07da", "mean_spend_all",
"mean_spend_all", "new_case_rate_07da", "new_case_rate_07da",
"new_case_rate_07da", "mean_spend_all", "new_case_rate_07da",
"new_case_rate_07da", "new_case_rate_07da", "mean_spend_all",
"mean_spend_all"), mean_spend_cases = c(NA, 7.15300714285714,
-0.0640216666666667, 0, 0.0156585338983051, 4.90477891156463,
1.04001215805471, 4.98906868131868, NA, -0.0116506382978723,
-0.0940805, 3.22004958592133, 0.0157676779661017, 10.4577329192547,
NA, -0.0137643636363636, 3.87815714285714, 5.65400529100529,
NA, 0.00507125, 0.0140480451612903, 5.29207102502018, 3.33591666666667,
0.280013559322034, 0.0406, NA, 4.06433752775722, NA, 0.00533333333333333,
-0.109501666666667)), row.names = c(NA, -30L), groups = structure(list(
day_month_year = structure(c(18273, 18273, 18273, 18280,
18287, 18287, 18294, 18301, 18301, 18308, 18308, 18329, 18343,
18343, 18350, 18350, 18371, 18399, 18399, 18406, 18406, 18413,
18427, 18427, 18434, 18434, 18434, 18434, 18448), tzone = "Europe/Prague", class = "Date"),
deciles_income = c(6L, 7L, 8L, 7L, 5L, 8L, 9L, 6L, 9L, 2L,
8L, 8L, 1L, 8L, 2L, 8L, 5L, 9L, 10L, 4L, 8L, 4L, 8L, 9L,
2L, 5L, 8L, 9L, 3L), .rows = structure(list(c(16L, 28L),
26L, 19L, 15L, 21L, 5L, 29L, 4L, 1L, 10L, 13L, 9L, 11L,
24L, 7L, 23L, 17L, 27L, 8L, 3L, 6L, 12L, 30L, 25L, 14L,
2L, 22L, 18L, 20L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 29L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
How would you approach the problem?



dput(afc[sample(nrow(afc),30),])and paste the output in your question in order to help you?