0

Hello I have a df such as :

tab

  X       molecule gene start_gene end_gene start_scaff end_scaff   strand direction COL1 COL2
1 7  scaffold_1254   G7       6708    11967           1     20072 backward        -1   10   20
2 5  scaffold_7638   G5       9567    10665           1     15336 backward        -1   18    1
3 4  scaffold_7638   G4       3456     4479           1     15336  forward         1   18    1
4 2 scaffold_15158   G2      10105    10609           1     13487 backward        -1    5    9
5 6  scaffold_8315   G6       2760     3849           1     10827  forward         1   25    7
6 3  scaffold_7180   G3       9814    10132           1     10155 backward        -1   21    9
7 1 scaffold_74038   G1       1476     2010           1      2010  forward         1    8   34

so far with this code :

ggplot(tab, aes(x = start_scaff, xend = end_scaff, 
                y = molecule, yend = molecule)) +
  geom_segment(size = 3, col = "grey80") +
  geom_segment(aes(x = ifelse(direction == 1, start_gene, end_gene),
                   xend = ifelse(direction == 1, end_gene, start_gene)),
               data = tab, 
               arrow = arrow(length = unit(0.1, "inches")), size = 2) +
  geom_text_repel(aes(x = start_gene, y = molecule, label = gene),
            data = tab, nudge_y = 0.5,size=2) + 
  scale_y_discrete(limits = rev(levels(tab$molecule))) +
  theme_minimal()

I mannaged to get this plot : enter image description here

and I wondered if there were a way to add a column just next to geom_segment with COL1 and COL2 values and color the text inside the boxe depending on a threshold : green values > 10, red values <= 10

and get something like

enter image description here

dput(tab)

structure(list(X = c(7L, 5L, 4L, 2L, 6L, 3L, 1L), molecule = structure(c(1L, 
5L, 5L, 2L, 6L, 3L, 4L), .Label = c("scaffold_1254", "scaffold_15158", 
"scaffold_7180", "scaffold_74038", "scaffold_7638", "scaffold_8315"
), class = "factor"), gene = structure(c(7L, 5L, 4L, 2L, 6L, 
3L, 1L), .Label = c("G1", "G2", "G3", "G4", "G5", "G6", "G7"), class = "factor"), 
    start_gene = c(6708L, 9567L, 3456L, 10105L, 2760L, 9814L, 
    1476L), end_gene = c(11967L, 10665L, 4479L, 10609L, 3849L, 
    10132L, 2010L), start_scaff = c(1L, 1L, 1L, 1L, 1L, 1L, 1L
    ), end_scaff = c(20072L, 15336L, 15336L, 13487L, 10827L, 
    10155L, 2010L), strand = structure(c(1L, 1L, 2L, 1L, 2L, 
    1L, 2L), .Label = c("backward", "forward"), class = "factor"), 
    direction = c(-1L, -1L, 1L, -1L, 1L, -1L, 1L), COL1 = c(10L, 
    18L, 18L, 5L, 25L, 21L, 8L), COL2 = c(20L, 1L, 1L, 9L, 7L, 
    9L, 34L)), class = "data.frame", row.names = c(NA, -7L))
3

1 Answer 1

1
col_data <- tab %>% 
  select(molecule, COL1, COL2) %>%
  pivot_longer(cols = contains("COL")) %>%
  mutate(
    color = ifelse(value < 10, "darkred", "darkgreen"),
    x = ifelse(name == "COL1", max(tab$end_scaff) * 1.075, max(tab$end_scaff) * 1.2)
  )

header_data <- data.frame(
  x = col_data$x %>% unique() %>% sort(),
  label = c("COL1", "COL2")
)

ggplot(tab, aes(x = start_scaff, xend = end_scaff, 
                y = molecule, yend = molecule)) +
  geom_segment(size = 3, col = "grey80") +
  geom_segment(aes(x = ifelse(direction == 1, start_gene, end_gene),
                   xend = ifelse(direction == 1, end_gene, start_gene)),
               data = tab, 
               arrow = arrow(length = unit(0.1, "inches")), size = 2) +
  geom_text_repel(aes(x = start_gene, y = molecule, label = gene),
                  data = tab, nudge_y = 0.5,size=2) + 
  scale_y_discrete(limits = rev(levels(tab$molecule))) +
  theme_minimal() +
  geom_text(
    data = col_data, 
    aes(label = value, x = x, color = color, y = molecule), 
    fontface = "bold",
    inherit.aes = FALSE
  ) +
  geom_text(
    data = header_data,
    aes(label = label, x = x, y = c(Inf, Inf)),
    vjust = "inward",
    fontface = "bold",
    inherit.aes = FALSE
  ) +
  scale_color_identity()

gives:

enter image description here

You can add:

scale_x_continuous(breaks = function(x){
    l = scales::pretty_breaks(4)(x)
    l[l <= max(tab$end_scaff)]
  })

to remove exceeding labels on x-axis:

enter image description here

Using patchwork you can create 2 plots and then glue them:

p1 <- ggplot(tab, aes(x = start_scaff, xend = end_scaff, 
                y = molecule, yend = molecule)) +
  geom_segment(size = 3, col = "grey80") +
  geom_segment(aes(x = ifelse(direction == 1, start_gene, end_gene),
                   xend = ifelse(direction == 1, end_gene, start_gene)),
               data = tab, 
               arrow = arrow(length = unit(0.1, "inches")), size = 2) +
  geom_text_repel(aes(x = start_gene, y = molecule, label = gene),
                  data = tab, nudge_y = 0.5,size=2) + 
  scale_y_discrete(limits = rev(levels(tab$molecule))) +
  theme_minimal() 


col_data <- tab %>% 
  select(molecule, COL1, COL2) %>%
  pivot_longer(cols = contains("COL")) %>%
  mutate(
    color = ifelse(value < 10, "darkred", "darkgreen"),
    x = ifelse(name == "COL1", 0, 1) %>% factor()
  )

p2 <- ggplot(col_data, aes(x, molecule)) + 
  geom_text(aes(label = value, color = color), fontface = "bold", size = 5) +
  labs(x = NULL) +
  scale_color_identity() +
  theme_void() +
  theme(
    axis.ticks.x = element_blank(),
    axis.text.x = element_blank()
  ) +
  geom_text(
    data = data.frame(label = c("COL1", "COL2"), x = factor(c(0,1))),
    aes(label = label, x = x, y = c(Inf, Inf)),
    vjust = "inward",
    fontface = "bold",
    size = 6,
    inherit.aes = FALSE
  ) +
  scale_y_discrete(limits = rev(levels(col_data$molecule)))

p1 + p2 + plot_layout(widths = c(3,1))

enter image description here

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.