R语言 将文本添加到ggplot(将差异表达基因的数量添加到MA图)

k4aesqcs  于 2023-04-18  发布在  其他
关注(0)|答案(1)|浏览(140)

在MA图函数中,将显著表达基因的数量添加到图中。MA图显示两组之间差异表达的基因及其倍数变化和平均表达。我试图将差异表达基因的总数添加到图的右上方。与其“硬编码”x和y坐标,不如使用相对定位。
也许这些数字应该被添加到df cotaing数据被绘制,而不是一个新的?任何建议,我将非常感谢!

示例数据

set.seed(47)
gene_creator <- paste("gene",1:100,sep="")
genes = sample(gene_creator,8)

dex_A <- data.frame(
  gene = genes,
  group = "group_A",
  logFC = sample(c(-5:5), replace=T, size=8),
  adj.P.Val = sample(c(0.01,1), replace=T, size=8),
  AveExpr = sample(c(30:36), replace=T, size=8)
)

dex_B <- data.frame(
  gene = genes,
  group = "group_B",
  logFC = sample(c(-5:5), replace=T, size=8),
  adj.P.Val = sample(c(0.01,1), replace=T, size=8),
  AveExpr = sample(c(30:36), replace=T, size=8)
)

dex_df <- rbind(dex_A, dex_B)

解决方案(不工作)

library('tidyverse')
library('ggrepel')

## find min max genes
minmax <- bind_rows(dex_df %>%
                          filter(adj.P.Val < 0.05) %>%
                          group_by(group) %>%
                          slice_max(logFC, n = 3) %>%
                          mutate(rank = "top5"),
                        dex_df %>%
                          filter(adj.P.Val < 0.05) %>%
                          group_by(group) %>%
                          slice_min(logFC, n = 3) %>%
                          mutate(rank = "min5"))

## get numbers
numbers <- dex_df %>%
  group_by(group) %>%
  summarize(
    n_up = sum(adj.P.Val <= 0.05 & logFC > 0.5),
    n_down = -sum(adj.P.Val <= 0.05 & logFC < 0.5)
  ) %>%
  pivot_longer(-group, names_to = "direction", values_to = "n")

# colors significance
colors_sig <- c("#999999", "#0072B2", "#D55E00")
names(colors_sig) <- c("NA", "Up", "Down")


# make MA plot
make_plot_MA <- function(dex_df, minmax, numbers, comp){
  # get Min Max for comp
  MINmax <- minmax %>% filter(group == comp)
  # numbers of difEx
  numb4text <- numbers %>% filter(group == comp)
  # plot
  dex_df %>%
    mutate(lab = ifelse(dex_df$gene %in% minmax$gene, as.character(dex_df$gene), NA)) %>%
    mutate(col =  ifelse(dex_df$adj.P.Val > 0.05, "NS", ifelse(logFC > 0, 'Up', 'Down'))) %>%
    filter(group == comp) %>%
    #filter(adj.P.Val < 0.05) %>%
    ggplot(aes(x = AveExpr, y = logFC, label = lab, color = col)) +
    geom_point()+ 
    geom_hline(yintercept = 0) +
    xlab ("Log2 mean expression") +
    ylab ("Log2 fold change") +
    geom_label_repel(color = 'black', min.segment.length = 0) +
    geom_point(data = . %>% filter(gene %in% minmax$gene), color = "black") +
    ggtitle(comp) +
    scale_color_manual(values = colors_sig) +
    geom_text(data=numb4text, aes(x=34, y=6, label=n))
  
}

make_plot_MA(dex_df, MinMax, numbers, 'group_A')
ar7v8xwq

ar7v8xwq1#

make_plot_MA <- function(dex_df, minmax, numbers, comp){
  # get Min Max for comp
  MINmax <- minmax %>% filter(group == comp)
  # numbers of difEx
  numb4text <- numbers %>% filter(group == comp)
  label.up=paste("up:", numb4text$n[1])
  label.down=paste("down:", numb4text$n[2])
  # plot
  dex_df %>%
    mutate(lab = ifelse(dex_df$gene %in% minmax$gene, as.character(dex_df$gene), NA)) %>%
    mutate(col =  ifelse(dex_df$adj.P.Val > 0.05, "NS", ifelse(logFC > 0, 'Up', 'Down'))) %>%
    filter(group == comp) %>%
    #filter(adj.P.Val < 0.05) %>%
    ggplot(aes(x = AveExpr, y = logFC, label = lab, color = col)) +
    geom_point()+ 
    geom_hline(yintercept = 0) +
    xlab ("Log2 mean expression") +
    ylab ("Log2 fold change") +
    geom_label_repel(color = 'black', min.segment.length = 0) +
    geom_point(data = . %>% filter(gene %in% minmax$gene), color = "black") +
    ggtitle(comp) +
    scale_color_manual(values = colors_sig) +
    geom_text(inherit.aes = F,aes(x=max(dex_df$AveExpr), y=max(dex_df$logFC), label=label.up))+
    geom_text(inherit.aes = F,aes(x=max(dex_df$AveExpr), y=max(dex_df$logFC)-1, label=label.down))
  
  
}

make_plot_MA(dex_df, minmax, numbers, 'group_A')

相关问题