0

I have a dataframe and a list of dataframes that I want to use to make multiple boxplots. The dataframe contains chemsitry information from for multiple analytes with results from multiple labs.

The the list of dataframes is storing results of summary stats, mean, median and so forth. This information is stored separate as outliers have been removed from calculating the stats.

The boxplots i want to produce will display all values

I sample of my dataframe below

structure(list(Determination_No = c(1L, 2L, 3L, 4L, 5L, 6L, 1L, 
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 
6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), LAB.ID = c(2L, 2L, 2L, 2L, 
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 
5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 
10L, 10L, 10L, 10L, 10L, 10L, 12L, 12L, 12L, 12L, 12L, 12L), 
    Fe = c(55.94, 55.7, 56.59, 56.5, 55.98, 55.93, 56.83, 56.54, 
    56.18, 56.5, 56.51, 56.34, 56.39, 56.43, 56.53, 56.31, 56.47, 
    56.35, 56.32, 56.29, 56.31, 56.32, 56.39, 56.32, 56.48, 56.4, 
    56.54, 56.43, 56.73, 56.62, 56.382, 56.258, 56.442, 56.258, 
    56.532, 56.264, 56.3, 56.5, 56.2, 56.5, 56.7, 56.5, 56.11, 
    56.46, 56.1, 56.35, 56.36, 56.37), SiO2 = c(7.63, 7.65, 7.73, 
    7.67, 7.67, 7.67, 7.84, 7.69, 7.59, 7.77, 7.74, 7.64, 7.67, 
    7.74, 7.62, 7.81, 7.66, 7.8, 7.91, 7.84, 7.96, 7.87, 7.84, 
    7.92, 7.77, 7.83, 7.76, 7.78, 7.65, 7.74, 7.936, 7.685, 7.863, 
    7.838, 7.828, 7.767, 7.872684992, 7.851291827, 7.872684992, 
    7.722932832, 7.680146501, 7.615967003, 7.64, 7.71, 7.71, 
    7.65, 7.82, 7.68), Al2O3 = c(2.01, 2.02, 2.03, 2.01, 2.02, 
    2, 2.01, 2.01, 2, 2.02, 2.02, 2.03, 2, 2.03, 1.99, 2.01, 
    2.01, 2.01, 2.02, 2.02, 2.05, 2.03, 2.02, 2.03, 1.88, 1.9, 
    1.89, 1.88, 1.88, 1.87, 2.053, 2.044, 2.041, 2.038, 2.008, 
    2.02, 2.002830415, 2.021725042, 2.021725042, 1.983935789, 
    2.002830415, 2.021725042, 2.09, 2.05, 1.96, 2.09, 2.06, 2.02
    )), row.names = c(NA, -48L), class = "data.frame")


An example of my list of dataframes below, which was mostly put together for display reasons, however, I have realised that some of the info I need for my charts

df.summary<-
list(Fe = structure(c("Min", "Max", "Median", "Mean", "Std Dev", 
"Coeff. Variation", "Dev. From Cert Mean", "   NA", "   NA", 
"   NA", "  NaN", "   NA", "  NA", "  NaN", "56.18", "56.83", 
"56.50", "56.48", "0.218", "0.39", " 0.13", "56.31", "56.53", 
"56.41", "56.41", "0.080", "0.14", " 0.01", "56.29", "56.39", 
"56.32", "56.33", "0.034", "0.06", "-0.15", "56.40", "56.73", 
"56.51", "56.53", "0.125", "0.22", " 0.22", "56.26", "56.53", 
"56.32", "56.36", "0.116", "0.20", "-0.09", "56.20", "56.70", 
"56.50", "56.45", "0.176", "0.31", " 0.08", "56.10", "56.46", 
"56.36", "56.29", "0.150", "0.27", "-0.21", "56.10", "56.83", 
"56.41", "56.41", "0.153", "0.27", ""), .Dim = c(7L, 10L), .Dimnames = list(
    c("LabMinSummary", "LabMaxSummary", "LabMedianSummary", "LabMeanSummary", 
    "lab.SDSummary", "cv.summmary", "LabDevMean.Summary"), c("", 
    "2", "3", "4", "5", "7", "8", "10", "12", ""))), SiO2 = structure(c("Min", 
"Max", "Median", "Mean", "Std Dev", "Coeff. Variation", "Dev. From Cert Mean", 
"7.63", "7.73", "7.67", "7.67", "0.033", "0.44", "-1.09", "7.59", 
"7.84", "7.72", "7.71", "0.091", "1.18", "-0.55", "7.62", "7.81", 
"7.70", "7.72", "0.079", "1.02", "-0.48", "7.84", "7.96", "7.89", 
"7.89", "0.048", "0.61", " 1.75", "7.65", "7.83", "7.76", "7.76", 
"0.060", "0.77", " 0.01", "7.68", "7.94", "7.83", "7.82", "0.086", 
"1.10", " 0.84", "7.62", "7.87", "7.79", "7.77", "0.111", "1.43", 
" 0.19", "7.64", "7.82", "7.70", "7.70", "0.065", "0.84", "-0.68", 
"7.59", "7.96", "7.74", "7.74", "0.097", "1.25", ""), .Dim = c(7L, 
10L), .Dimnames = list(c("LabMinSummary", "LabMaxSummary", "LabMedianSummary", 
"LabMeanSummary", "lab.SDSummary", "cv.summmary", "LabDevMean.Summary"
), c("", "2", "3", "4", "5", "7", "8", "10", "12", ""))), Al2O3 = structure(c("Min", 
"Max", "Median", "Mean", "Std Dev", "Coeff. Variation", "Dev. From Cert Mean", 
"2.00", "2.03", "2.01", "2.01", "0.010", "0.52", "-0.16", "2.00", 
"2.03", "2.01", "2.01", "0.010", "0.52", "-0.16", "1.99", "2.03", 
"2.01", "2.01", "0.013", "0.66", "-0.49", "2.02", "2.05", "2.02", 
"2.03", "0.012", "0.58", " 0.50", "  NA", "  NA", "  NA", " NaN", 
"   NA", "  NA", "  NaN", "2.01", "2.05", "2.04", "2.03", "0.017", 
"0.82", " 0.78", "1.98", "2.02", "2.01", "2.01", "0.015", "0.77", 
"-0.45", "  NA", "  NA", "  NA", " NaN", "   NA", "  NA", "  NaN", 
"1.98", "2.05", "2.01", "2.01", "0.016", "0.77", ""), .Dim = c(7L, 
10L), .Dimnames = list(c("LabMinSummary", "LabMaxSummary", "LabMedianSummary", 
"LabMeanSummary", "lab.SDSummary", "cv.summmary", "LabDevMean.Summary"
), c("", "2", "3", "4", "5", "7", "8", "10", "12", ""))))

For an individual analyte I have the following code below that produces what I want for an individual analyte for each lab.

Plotlaborder <- unique(df$LAB.ID)

df %>%
  mutate(LAB.ID = factor(LAB.ID, levels = Plotlaborder)) %>%
  ggplot(outlier.shape = NA, mapping = aes(x = LAB.ID, y = df2$Fe, color = LAB.ID)) +
  stat_boxplot(geom = 'errorbar')+
  geom_boxplot(outlier.shape = NA) +
  geom_hline(linetype = 'dashed', color = 'blue', size = 0.75,
             mapping = aes(yintercept = as.numeric(df.summary[["Fe"]][[4,10]]))) + # Add a line for the accepted mean
  geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
             mapping = aes(yintercept = as.numeric(df.summary[["Fe"]][[4,10]]) - (as.numeric(df.summary[["Fe"]][[5,10]])) * 3)) +
  geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
             mapping = aes(yintercept = as.numeric(df.summary[["Fe"]][[4,10]]) + (as.numeric(df.summary[["Fe"]][[5,10]])) * 3)) +
  ggtitle("Fe Box Plot") +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab(label = "Lab No") +
  ylab("Fe values %")

I want use either lapply or Map to generate the same chart for each analyte and I want to pass the mean and sd values from my list of dataframe (df.summary) to create my geo_hline values and assign the analyte name to the title and the ylab.

what is the best way forward?

Boxplot for 1 analyte

2 Answers 2

2

I think a simple solution would be to wrap your code into a function and use the name of df.summary as argument:

library(tidyverse)

Plotlaborder <- unique(df$LAB.ID)
mycompound <- names(df.summary)
df <- df %>%
  mutate(LAB.ID = factor(LAB.ID, levels = Plotlaborder))
myplot <- function(compound) {
    png(filename=paste0("boxplot_",compound,".png"),width = 480, height = 480,units = "px")
    print(ggplot(df,outlier.shape = NA, mapping = aes(x = LAB.ID, y = .data[[compound]], color = LAB.ID)) +
    stat_boxplot(geom = 'errorbar')+
    geom_boxplot(outlier.shape = NA) +
    geom_hline(linetype = 'dashed', color = 'blue', size = 0.75,
               mapping = aes(yintercept = as.numeric(df.summary[[compound]][[4,10]]))) + # Add a line for the accepted mean
    geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
               mapping = aes(yintercept = as.numeric(df.summary[[compound]][[4,10]]) - (as.numeric(df.summary[[compound]][[5,10]])) * 3)) +
    geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
               mapping = aes(yintercept = as.numeric(df.summary[[compound]][[4,10]]) + (as.numeric(df.summary[[compound]][[5,10]])) * 3)) +
    ggtitle(paste0(compound," Box Plot")) +
    theme(plot.title = element_text(hjust = 0.5)) +
    xlab(label = "Lab No") +
    ylab(paste0(compound," values %")))
    dev.off()
}


lapply(mycompound, myplot)

This will save in your working directory as many plot as the length(df.summary).

list.files(pattern = "png")
[1] "boxplot_Al2O3.png" "boxplot_Fe.png"    "boxplot_SiO2.png"

an example of the output produced

enter image description here

Sign up to request clarification or add additional context in comments.

Comments

1

You can use imap on df.summary :

library(tidyverse)

df <- df %>% mutate(LAB.ID = factor(LAB.ID, levels = Plotlaborder)) 

imap(df.summary, ~{
  ggplot(df, outlier.shape = NA, 
        mapping = aes(x = LAB.ID, y = .data[[.y]], color = LAB.ID)) +
    stat_boxplot(geom = 'errorbar')+
    geom_boxplot(outlier.shape = NA) +
    geom_hline(linetype = 'dashed', color = 'blue', size = 0.75,
               mapping = aes(yintercept = as.numeric(.x[[4,10]]))) + 
    geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
               mapping = aes(yintercept = as.numeric(.x[[4,10]]) - (as.numeric(.x[[5,10]])) * 3)) +
    geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
               mapping = aes(yintercept = as.numeric(.x[[4,10]]) + (as.numeric(.x[[5,10]])) * 3)) +
    ggtitle(paste0(.y, " Box Plot")) +
    theme(plot.title = element_text(hjust = 0.5)) +
    xlab(label = "Lab No") +
    ylab(paste0(.y, 'values %'))
}) -> list_plot

This would generate list of plots in list_plot and individual plots can be accessed via list_plot[[1]], list_plot[[2]] etc.

1 Comment

thanks once again your solution works well as well

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.