1

I have My Data stored in p, which can be found below.

I have four specific categories for a group of tumor patients. Three of the groups correspond to the tumor stage and is stored as p$WHO.Grade=1,2,3. The last group is All tumor patients combined.

I am producing a specific plot consisting of multiple boxplots demonstrating the distribution of a continuous covariate (p$ki67pro) in the four groups described as above and in relation to the event of recurrence (p$recurrence==0 for no and p$recurrence==1 for yes).

As it turns out, there are no events for p$WHO.Grade==3, which means that I want my blot to look exactly like this (manipulated in photoshop):

enter image description here

However, I get the picture below when I use the following script:

library(ggplot2)
library(dplyr)    
p %>%
      bind_rows(p %>% mutate(WHO.Grade = 4)) %>%
      mutate(WHO.Grade = factor(WHO.Grade),
             recurrence = factor(recurrence)) %>%
      ggplot(aes(WHO.Grade, ki67pro, 
                 fill = recurrence, colour = recurrence)) +
      geom_boxplot(outlier.alpha = 0, 
                   position = position_dodge(width = 0.78, preserve = "single")) +
      geom_point(size = 3, shape = 21, 
                 position = position_jitterdodge()) +
      scale_x_discrete(name = "", 
                       label = c("WHO-I","WHO-II","WHO-III","All")) +
      scale_y_continuous(name = "x", breaks=seq(0,30,5), limits=c(0,30)) +
      scale_fill_manual(values = c("#edf1f9", "#fcebeb"), name = "",
                        labels = c("", "")) +
      scale_colour_manual(values = c("#1C73C2", "red"), name = "",
                          labels = c("","")) + 
      theme(legend.position="none",
            panel.background = element_blank(),
            axis.line = element_line(colour = "black")) 

enter image description here

It seems like p$WHO.Grade==All automatically is inserted at the p$WHO.Grade==3 space, which should be leaved blank.

Therefore, my question is: how can I graphically insert a blank space at p$WHO.Grade==3 given my script above?

p <- structure(list(WHO.Grade = c(1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 
                                  1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 
                                  1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                  1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
                                  1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
                                  1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                  1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                  1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
                                  1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
                                  1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
                                  1L, 1L, 1L, 1L, 1L, 1L), ki67pro = c(1L, 12L, 3L, 3L, 5L, 3L, 
                                                                       25L, 7L, 4L, 5L, 12L, 3L, 15L, 4L, 5L, 7L, 8L, 3L, 12L, 10L, 
                                                                       4L, 10L, 7L, 3L, 2L, 3L, 7L, 4L, 7L, 10L, 4L, 5L, 5L, 3L, 5L, 
                                                                       2L, 5L, 3L, 3L, 3L, 4L, 4L, 3L, 2L, 5L, 1L, 5L, 2L, 3L, 1L, 2L, 
                                                                       3L, 3L, 5L, 4L, 20L, 5L, 0L, 4L, 3L, 0L, 3L, 4L, 1L, 2L, 20L, 
                                                                       2L, 3L, 5L, 4L, 8L, 1L, 4L, 5L, 4L, 3L, 6L, 12L, 3L, 4L, 4L, 
                                                                       2L, 5L, 3L, 3L, 3L, 2L, 5L, 4L, 2L, 3L, 4L, 3L, 3L, 2L, 2L, 4L, 
                                                                       7L, 4L, 3L, 4L, 2L, 3L, 6L, 2L, 3L, 10L, 5L, 10L, 3L, 10L, 3L, 
                                                                       4L, 5L, 2L, 4L, 3L, 4L, 4L, 4L, 5L, 3L, 12L, 5L, 4L, 3L, 2L, 
                                                                       4L, 3L, 4L, 2L, 1L, 6L, 1L, 4L, 12L, 3L, 4L, 3L, 2L, 6L, 5L, 
                                                                       4L, 3L, 4L, 4L, 4L, 3L, 5L, 4L, 5L, 4L, 1L, 3L, 3L, 4L, 0L, 3L
                                  ), recurrence = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 
                                                    0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 
                                                    1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
                                                    1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 
                                                    1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 
                                                    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
                                                    0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 
                                                    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
                                                    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
                                                    0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 
                                                    0L, 0L, 0L)), class = "data.frame", row.names = c(1L, 2L, 3L, 
                                                                                                      9L, 10L, 11L, 13L, 14L, 15L, 16L, 18L, 19L, 20L, 21L, 22L, 23L, 
                                                                                                      24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 
                                                                                                      37L, 38L, 39L, 40L, 41L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 
                                                                                                      52L, 53L, 54L, 55L, 57L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 
                                                                                                      67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 
                                                                                                      80L, 81L, 82L, 83L, 84L, 85L, 87L, 89L, 90L, 91L, 92L, 93L, 94L, 
                                                                                                      96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L, 105L, 106L, 
                                                                                                      107L, 109L, 110L, 111L, 112L, 113L, 114L, 115L, 116L, 117L, 118L, 
                                                                                                      119L, 120L, 121L, 123L, 124L, 125L, 126L, 127L, 128L, 130L, 131L, 
                                                                                                      132L, 133L, 134L, 135L, 136L, 137L, 138L, 139L, 140L, 141L, 142L, 
                                                                                                      143L, 144L, 145L, 146L, 147L, 148L, 149L, 150L, 151L, 152L, 153L, 
                                                                                                      154L, 155L, 156L, 157L, 158L, 159L, 160L, 161L, 162L, 163L, 164L, 
                                                                                                      165L, 166L, 167L, 168L, 169L, 170L, 171L, 172L, 173L, 174L, 175L
                                                    ))
0

2 Answers 2

2

The simplest way is to adjust your WHO.Grade factor to include all 4 levels - c("WHO-I","WHO-II","WHO-III","All")```. Here's the first adjustment on line 3:

p %>%
  bind_rows(p %>% mutate(WHO.Grade = 4)) %>%
  mutate(WHO.Grade = factor(WHO.Grade, levels = 1:4, labels = c("WHO-I","WHO-II","WHO-III","All")),
         recurrence = factor(recurrence))

Now that we've named our factors, we can modify the scale_x_discrete() call to remove the label and add drop = FALSE:

  scale_x_discrete(name = "",
                   # label = c("WHO-I","WHO-II","WHO-III","All"),
                   drop = FALSE)

Putting everything together we get:

p %>%
  bind_rows(p %>% mutate(WHO.Grade = 4)) %>% as_tibble()%>%
  mutate(WHO.Grade = factor(WHO.Grade, levels = 1:4, labels = c("WHO-I","WHO-II","WHO-III","All")),
         recurrence = factor(recurrence))%>%
  ggplot(aes(WHO.Grade, ki67pro, 
             fill = recurrence, colour = recurrence)) +
  geom_boxplot(outlier.alpha = 0, 
               position = position_dodge(width = 0.78, preserve = "single")) +
  geom_point(size = 3, shape = 21,
             position = position_jitterdodge()) +
  scale_x_discrete(name = "",
                   # label = c("WHO-I","WHO-II","WHO-III","All"),
                   drop = FALSE) +
  scale_y_continuous(name = "x", breaks=seq(0,30,5), limits=c(0,30)) +
  scale_fill_manual(values = c("#edf1f9", "#fcebeb"), name = "",
                    labels = c("", "")) +
  scale_colour_manual(values = c("#1C73C2", "red"), name = "",
                      labels = c("","")) + 
  theme(legend.position="none",
        panel.background = element_blank(),
        axis.line = element_line(colour = "black")) 

ggplot2 preserve factor

Sign up to request clarification or add additional context in comments.

Comments

1

I could not reach to the result you want as appeared in the Photoshop image, but you could gain the following image:

enter image description here

Which it is what you want, but all the entries of "WHO-III" are zeros

The code that generates it is:

library(ggplot2)
library(dplyr)  

p= p %>%
  bind_rows(p %>% mutate(WHO.Grade = 3)) %>%
  bind_rows(p %>% mutate(WHO.Grade = 4))
p[p$WHO.Grade == 3, 2] = 0

p %>%  
  mutate(WHO.Grade = factor(WHO.Grade),
         recurrence = factor(recurrence)) %>%
  ggplot(aes(WHO.Grade, ki67pro, 
             fill = recurrence, colour = recurrence)) +
  geom_boxplot(outlier.alpha = 0, 
               position = position_dodge(width = 0.78, preserve = "single")) +
  geom_point(size = 3, shape = 21, 
             position = position_jitterdodge()) +
  scale_x_discrete(name = "", 
                   label = c("WHO-I","WHO-II","WHO-III","All"), drop = FALSE) +
  scale_y_continuous(name = "x", breaks=seq(0,30,5), limits=c(0,30)) +
  scale_fill_manual(values = c("#edf1f9", "#fcebeb"), name = "",
                    labels = c("", "")) +
  scale_colour_manual(values = c("#1C73C2", "red"), name = "",
                      labels = c("","")) + 
  theme(legend.position="none",
        panel.background = element_blank(),
        axis.line = element_line(colour = "black")) 

Hope this helps

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.