I have a dataframe and a list of dataframes that I want to use to make multiple boxplots. The dataframe contains chemsitry information from for multiple analytes with results from multiple labs.
The the list of dataframes is storing results of summary stats, mean, median and so forth. This information is stored separate as outliers have been removed from calculating the stats.
The boxplots i want to produce will display all values
I sample of my dataframe below
structure(list(Determination_No = c(1L, 2L, 3L, 4L, 5L, 6L, 1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), LAB.ID = c(2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L,
10L, 10L, 10L, 10L, 10L, 10L, 12L, 12L, 12L, 12L, 12L, 12L),
Fe = c(55.94, 55.7, 56.59, 56.5, 55.98, 55.93, 56.83, 56.54,
56.18, 56.5, 56.51, 56.34, 56.39, 56.43, 56.53, 56.31, 56.47,
56.35, 56.32, 56.29, 56.31, 56.32, 56.39, 56.32, 56.48, 56.4,
56.54, 56.43, 56.73, 56.62, 56.382, 56.258, 56.442, 56.258,
56.532, 56.264, 56.3, 56.5, 56.2, 56.5, 56.7, 56.5, 56.11,
56.46, 56.1, 56.35, 56.36, 56.37), SiO2 = c(7.63, 7.65, 7.73,
7.67, 7.67, 7.67, 7.84, 7.69, 7.59, 7.77, 7.74, 7.64, 7.67,
7.74, 7.62, 7.81, 7.66, 7.8, 7.91, 7.84, 7.96, 7.87, 7.84,
7.92, 7.77, 7.83, 7.76, 7.78, 7.65, 7.74, 7.936, 7.685, 7.863,
7.838, 7.828, 7.767, 7.872684992, 7.851291827, 7.872684992,
7.722932832, 7.680146501, 7.615967003, 7.64, 7.71, 7.71,
7.65, 7.82, 7.68), Al2O3 = c(2.01, 2.02, 2.03, 2.01, 2.02,
2, 2.01, 2.01, 2, 2.02, 2.02, 2.03, 2, 2.03, 1.99, 2.01,
2.01, 2.01, 2.02, 2.02, 2.05, 2.03, 2.02, 2.03, 1.88, 1.9,
1.89, 1.88, 1.88, 1.87, 2.053, 2.044, 2.041, 2.038, 2.008,
2.02, 2.002830415, 2.021725042, 2.021725042, 1.983935789,
2.002830415, 2.021725042, 2.09, 2.05, 1.96, 2.09, 2.06, 2.02
)), row.names = c(NA, -48L), class = "data.frame")
An example of my list of dataframes below, which was mostly put together for display reasons, however, I have realised that some of the info I need for my charts
df.summary<-
list(Fe = structure(c("Min", "Max", "Median", "Mean", "Std Dev",
"Coeff. Variation", "Dev. From Cert Mean", " NA", " NA",
" NA", " NaN", " NA", " NA", " NaN", "56.18", "56.83",
"56.50", "56.48", "0.218", "0.39", " 0.13", "56.31", "56.53",
"56.41", "56.41", "0.080", "0.14", " 0.01", "56.29", "56.39",
"56.32", "56.33", "0.034", "0.06", "-0.15", "56.40", "56.73",
"56.51", "56.53", "0.125", "0.22", " 0.22", "56.26", "56.53",
"56.32", "56.36", "0.116", "0.20", "-0.09", "56.20", "56.70",
"56.50", "56.45", "0.176", "0.31", " 0.08", "56.10", "56.46",
"56.36", "56.29", "0.150", "0.27", "-0.21", "56.10", "56.83",
"56.41", "56.41", "0.153", "0.27", ""), .Dim = c(7L, 10L), .Dimnames = list(
c("LabMinSummary", "LabMaxSummary", "LabMedianSummary", "LabMeanSummary",
"lab.SDSummary", "cv.summmary", "LabDevMean.Summary"), c("",
"2", "3", "4", "5", "7", "8", "10", "12", ""))), SiO2 = structure(c("Min",
"Max", "Median", "Mean", "Std Dev", "Coeff. Variation", "Dev. From Cert Mean",
"7.63", "7.73", "7.67", "7.67", "0.033", "0.44", "-1.09", "7.59",
"7.84", "7.72", "7.71", "0.091", "1.18", "-0.55", "7.62", "7.81",
"7.70", "7.72", "0.079", "1.02", "-0.48", "7.84", "7.96", "7.89",
"7.89", "0.048", "0.61", " 1.75", "7.65", "7.83", "7.76", "7.76",
"0.060", "0.77", " 0.01", "7.68", "7.94", "7.83", "7.82", "0.086",
"1.10", " 0.84", "7.62", "7.87", "7.79", "7.77", "0.111", "1.43",
" 0.19", "7.64", "7.82", "7.70", "7.70", "0.065", "0.84", "-0.68",
"7.59", "7.96", "7.74", "7.74", "0.097", "1.25", ""), .Dim = c(7L,
10L), .Dimnames = list(c("LabMinSummary", "LabMaxSummary", "LabMedianSummary",
"LabMeanSummary", "lab.SDSummary", "cv.summmary", "LabDevMean.Summary"
), c("", "2", "3", "4", "5", "7", "8", "10", "12", ""))), Al2O3 = structure(c("Min",
"Max", "Median", "Mean", "Std Dev", "Coeff. Variation", "Dev. From Cert Mean",
"2.00", "2.03", "2.01", "2.01", "0.010", "0.52", "-0.16", "2.00",
"2.03", "2.01", "2.01", "0.010", "0.52", "-0.16", "1.99", "2.03",
"2.01", "2.01", "0.013", "0.66", "-0.49", "2.02", "2.05", "2.02",
"2.03", "0.012", "0.58", " 0.50", " NA", " NA", " NA", " NaN",
" NA", " NA", " NaN", "2.01", "2.05", "2.04", "2.03", "0.017",
"0.82", " 0.78", "1.98", "2.02", "2.01", "2.01", "0.015", "0.77",
"-0.45", " NA", " NA", " NA", " NaN", " NA", " NA", " NaN",
"1.98", "2.05", "2.01", "2.01", "0.016", "0.77", ""), .Dim = c(7L,
10L), .Dimnames = list(c("LabMinSummary", "LabMaxSummary", "LabMedianSummary",
"LabMeanSummary", "lab.SDSummary", "cv.summmary", "LabDevMean.Summary"
), c("", "2", "3", "4", "5", "7", "8", "10", "12", ""))))
For an individual analyte I have the following code below that produces what I want for an individual analyte for each lab.
Plotlaborder <- unique(df$LAB.ID)
df %>%
mutate(LAB.ID = factor(LAB.ID, levels = Plotlaborder)) %>%
ggplot(outlier.shape = NA, mapping = aes(x = LAB.ID, y = df2$Fe, color = LAB.ID)) +
stat_boxplot(geom = 'errorbar')+
geom_boxplot(outlier.shape = NA) +
geom_hline(linetype = 'dashed', color = 'blue', size = 0.75,
mapping = aes(yintercept = as.numeric(df.summary[["Fe"]][[4,10]]))) + # Add a line for the accepted mean
geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
mapping = aes(yintercept = as.numeric(df.summary[["Fe"]][[4,10]]) - (as.numeric(df.summary[["Fe"]][[5,10]])) * 3)) +
geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
mapping = aes(yintercept = as.numeric(df.summary[["Fe"]][[4,10]]) + (as.numeric(df.summary[["Fe"]][[5,10]])) * 3)) +
ggtitle("Fe Box Plot") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab(label = "Lab No") +
ylab("Fe values %")
I want use either lapply or Map to generate the same chart for each analyte and I want to pass the mean and sd values from my list of dataframe (df.summary) to create my geo_hline values and assign the analyte name to the title and the ylab.
what is the best way forward?

