2

I am using ggplot to plot production for over time by gas well.


GAS_PRODUCTION_CURVE <- RawdataTest %>% ggplot(mapping=aes(x=DaysOn, y=GasProd_MCF, color=WellID)) + 
  geom_line(size=0.5) + theme_bw() + 
  scale_color_manual(values = cols) + scale_y_continuous(label=comma) + 
  coord_cartesian(xlim = c(0, max(RawdataTest$DaysOn)), ylim = c(0,max(RawdataTest$GasProd_MCF))) + 
  theme(legend.position="none") + xlab("Days On") +
  ylab("Gas Rate [MCF]")

This gets me the plot that I want (note: this is just a subset of the data). However, I want to have the well data graphed, but color by the variable "RSOperator". In other words, I want all wells with the same RSOperator to be the same color. That way users can distinguish the difference in well performance between wells. Is there a way to adjust my code to accomplish this?

3
  • Check this out sape.inf.usi.ch/quick-reference/ggplot2/colour Commented Nov 21, 2019 at 13:30
  • But you're already using color for color=WellID Commented Nov 21, 2019 at 13:35
  • If I eliminate color=WellID, how do I keep the well level data graphed but color by RSOperator? Commented Nov 21, 2019 at 13:38

2 Answers 2

1

I simulate some data that hopefully looks like yours, and you can see how to get the same color for a common RSOperator.

RawdataTest = data.frame(
  DaysOn = rep(1:10,6),
  GasProd_MCF = c(rep(1:10,3),rep(2*(1:10),3))+rnorm(60,3,1),
  WellID = rep(1:3,each=10,times=2),
  RSOperator = rep(letters[1:2],each=30)
)
# create a uniq identifier for observation
RawdataTest <- RawdataTest %>% 
mutate(uniq_id=paste(RSOperator,WellID,sep=""))

# create mapping for uniq id to color, depends on RSOperator
MAPPING <- RawdataTest %>% distinct(RSOperator,uniq_id)
RS_COLS =  brewer.pal(9,"Set1")
RS_COLS = RS_COLS[1:n_distinct(MAPPING$RSOperator)]
names(RS_COLS) = unique(MAPPING$RSOperator)
PLOT_COLS = RS_COLS[MAPPING$RSOperator]
names(PLOT_COLS) = MAPPING$uniq_id

 ggplot(RawdataTest,mapping=aes(x=DaysOn, y=GasProd_MCF,col=uniq_id)) + 
  geom_line(size=0.5) + theme_bw() + 
  scale_color_manual(values = PLOT_COLS)

enter image description here

However you can see it's hard to distinguish a1 from a2 etc.. You might need to consider combining it with linetype, but once you have a lot of lines, it gets crazy:

LINETYPE = rep(1:3,2)
names(LINETYPE) = MAPPING$uniq_id

 ggplot(RawdataTest,mapping=aes(x=DaysOn, y=GasProd_MCF,linetype=uniq_id,col=uniq_id)) + 
  geom_line(size=0.5) + theme_bw() + 
  scale_color_manual(values = PLOT_COLS) +
  scale_linetype_manual(values=LINETYPE) 

enter image description here

Sign up to request clarification or add additional context in comments.

Comments

0

From what I learned from StupidWolf's answer, we just need to input a named vector into:

ggplot(RawdataTest,mapping=aes(x=DaysOn, y=GasProd_MCF,col=uniq_id)) + 
  geom_line(size=0.5) + theme_bw() + 
  scale_color_manual(values = NAMED_VECTOR)

Where each element is a color and its name is a value of the column that identifies each line of the plot. The same applies to linetypes.

I took the liberty of creating a function that is heavily inspired by what StupidWolff did in his answer, but which makes more use of the %>% operator for clarity and has some added functionality (as allowing to specify a color for each value). I must admit that the function started being way shorter.

#id: character vector with the id variable
#variable: character vector with the variable we are going to use for styling
#styles: style options (Preferably the same length or longer as unique variable values - Throws a warning)
#named_style: TRUE or FALSE. Specifies whether to create a named vector from values and styles.
#values: values from a variable for which styles are specified. Must be in the same order than each respective style. Other values will be styled according to other_styles.
#other_styles: applies when values specified. Specifies styles for other (non specified) values.


#Plot style mapper - Generates a named vector that can be used for Ggplot styles and colors
ggplot_style_mapper <- function(df, id, variable, styles, values = NULL, other_styles = NULL) {

variablequo <- enquo(variable)
#Style_by_variable
style_by_variable <- if(is.null(values)) {FALSE} else {TRUE}

#Warning
if((n_distinct(df[[variable]]) > length(styles))&style_by_variable == FALSE) {warning("style vector is shorter than unique id-variables")}
styles <- if(style_by_variable == TRUE&length(styles) > length(values)) {styles[1:length(values)]} else (styles)

#Other styles
other_styles <- if(!is.null(other_styles)) {other_styles[!other_styles %in% styles]} else {NULL}
if((length(other_styles) == 0|is.null(other_styles))&style_by_variable == TRUE&(length(values) < length(unique(df[[variable]])))) {warning("Either other_styles necessary but not specified, or other_styles %in% styles")}

#Named_style = TRUE
named_vector <- if(style_by_variable == TRUE) {
 
  mapped <- df %>%
    distinct_at(.vars = c(variable)) %>%
    filter(., .data[[!!variablequo]] %in% values) %>%
    {if(nrow(.) > length(styles)) add_column(., style = c(rep(styles, length.out = nrow(.)))) else
      add_column(., style = styles[1:nrow(.)])} 
  
  dataframe <- df %>%
    distinct_at(., .vars = c(id, variable)) %>%
    select(., all_of(c(id, variable))) %>%
    left_join(., mapped, by = variable)
  
  NAs <- dataframe %>%
    filter(., is.na(style)) %>%
    select(., all_of(c(id, variable))) %>% 
    {if(nrow(.) == 0) . else if (nrow(.) > length(other_styles)) add_column(., style = c(rep(other_styles, length.out = nrow(.)))) else
      add_column(., style = other_styles[1:nrow(.)])} 
  
  dataframe %>%
    filter(., !is.na(style)) %>%
    bind_rows(., NAs) %>%
    pull(., .data[["style"]], name = .data[[id]])
  
  } else {
      
    mapped <- df %>%
      distinct_at(.vars = c(variable)) %>%
      {if(nrow(.) > length(styles)) add_column(., style = c(rep(styles, length.out = nrow(.)))) else
        add_column(., style = styles[1:nrow(.)])} 
    
    dataframe <- df %>%
      distinct_at(., .vars = c(id, variable)) %>%
      select(., all_of(c(id, variable))) %>%
      left_join(., mapped, by = variable) %>%
      pull(., .data[["style"]], name = .data[[id]])}

named_vector }
    

The function allows us to more easily define the named vector with the styling before creating the graph:

#Create named vectors and graph
#Create named vectors 
named_colors <- ggplot_style_mapper(RawdataTest, id = "uniq_id", variable = "RSOperator",
                                    styles = RColorBrewer::brewer.pal(9,"Set1"))
named_linetype <- ggplot_style_mapper(RawdataTest, id = "uniq_id", variable = "WellID",
                                      styles = c(1,2,3))

#Graph
ggplot(RawdataTest,mapping=aes(x=DaysOn, y=GasProd_MCF,linetype=uniq_id,col=uniq_id)) + 
  geom_line(size=0.5) + theme_bw() + 
  scale_color_manual(values = named_colors) +
  scale_linetype_manual(values= named_linetype) 

We could also do it in one step:

#Alternatively, create the graph in one step (shorter but messier)
ggplot(RawdataTest,mapping=aes(x=DaysOn, y=GasProd_MCF,col=uniq_id, linetype=uniq_id)) + 
  geom_line(size=0.5) + theme_bw() + 
  scale_color_manual(values = ggplot_style_mapper(RawdataTest, id = "uniq_id", variable = "RSOperator",
                                                  styles = RColorBrewer::brewer.pal(9,"Set1"))) +
   scale_linetype_manual(values= ggplot_style_mapper(RawdataTest, id = "uniq_id", variable = "WellID",
                                                styles = c(1,2,3))) 

The extra functionality I added was that the function allows us to specify the styling of each specific value of the relevant variable. It even allows us to specify styling for just a few values and groups the rest into one styling. Finally, we can specify styling for one or a few values and assign a pallette of stylings to the rest.

#Style for each specific value
named_colors <- ggplot_style_mapper(df = RawdataTest, id = "uniq_id", variable = "RSOperator",
                                    styles = c("blue", "red"), values = c("a", "b"))
named_linetype <- ggplot_style_mapper(df = RawdataTest, id = "uniq_id", variable = "WellID",
                                      styles = c(3,2,1), values = c(1,2,3))

#Style for a few values and other style for the rest
named_colors <- ggplot_style_mapper(df = RawdataTest, id = "uniq_id", variable = "RSOperator",
                                    styles = c("blue"), values = c("a"), other_styles = "black")
named_linetype <- ggplot_style_mapper(df = RawdataTest, id = "uniq_id", variable = "WellID",
                                      styles = c(3), values = c(1,2), other_styles = 6)

#Style for a few values and style pallette for the rest
named_colors <- ggplot_style_mapper(df = RawdataTest, id = "uniq_id", variable = "RSOperator",
                                    styles = c("blue"), values = c("a"), other_styles = c("black"))
named_linetype <- ggplot_style_mapper(df = RawdataTest, id = "uniq_id", variable = "WellID",
                                      styles = c(3), values = c(1), other_styles = c(6,7))

Hope this helps somebody!

pd: data

pacman::p_load(RColorBrewer, tidyverse)

#Create dataframe as StupidWolf
#Create data
RawdataTest = data.frame(
  DaysOn = rep(1:10,6),
  GasProd_MCF = c(rep(1:10,3),rep(2*(1:10),3))+rnorm(60,3,1),
  WellID = rep(1:3,each=10,times=2),
  RSOperator = rep(letters[1:2],each=30)
)
# create a uniq identifier for observation
RawdataTest <- RawdataTest %>% 
  mutate(uniq_id=paste(RSOperator,WellID,sep=""))

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.