如何使用ggpubr::stat_compare_means()仅绘制一个组与基础均值之间的一个选定p值

xlpyo6sf  于 2023-02-26  发布在  其他
关注(0)|答案(1)|浏览(372)

我已经将我的数据框中8组的平均值与基本平均值进行了比较(所有组的平均值)。对于ggplot 2的箱形图,我只想添加样本6相对于基础均值的显著性p值(这是因为只有样本6显示出比其他样本更高的平均值)。如何选择并显示样本6相对于基线的p值-我的意思是使用ggplot 2,或者如果你有任何其他建议,这将是伟大的。我附上了箱线图,我想获得。这是我的数据框架:

structure(list(X = 1:80, Values = c(0.630595738859822, 0.602792596576932, 
0.717008661687034, 0.682665239025856, 0.793946879180833, 0.881948645398259, 
0.625114600712147, 0.437617156442347, 0.745624594554559, 0.894202100943608, 
0.563490328640227, 0.438643112591669, 0.634989790753718, 0.419198849946639, 
0.635117536091368, 0.326358188291684, 0.549155626314892, 0.670890309309185, 
0.496113525284973, 0.621044760371873, 0.719504647704111, 0.639863189313384, 
0.535720139062473, 0.669721827058233, 0.650912293714071, 0.696457414671846, 
0.858706200486706, 0.731304436277949, 0.858640625829461, 0.620944056367355, 
0.707824409068981, 0.735697338021026, 0.617587152354055, 0.827159777309146, 
0.746617151207609, 0.756958937191696, 0.667851068680371, 0.751767118706959, 
0.514272623762152, 0.574995342576934, 0.565052649366379, 0.680859397561326, 
0.610337344835454, 0.525126720395167, 0.598829305776582, 0.665333671717166, 
0.741661361938011, 0.675284838360854, 0.566029505622414, 0.766213446491105, 
1.15708210114631, 0.890211146977994, 1.25307838058234, 0.968181676151837, 
1.00095211090357, 2.8420291708288, 1.90093888157347, 1.06396479241609, 
1.56573878519669, 0.775059529464859, 0.419162723989262, 0.550280097449462, 
0.644290507715508, 0.576790241978692, 0.758599327639705, 0.684331966752555, 
0.40462575314429, 0.814338126516362, 0.587363556965946, 0.560989067834604, 
0.796437227052055, 0.713486371171068, 0.60447141342468, 0.560626244287651, 
0.598806081197721, 0.642049867384496, 0.521445328537076, 0.699890870949464, 
0.845524358473877, 0.469453181792765), Samples = c("sample1", 
"sample1", "sample1", "sample1", "sample1", "sample1", "sample1", 
"sample1", "sample1", "sample1", "sample2", "sample2", "sample2", 
"sample2", "sample2", "sample2", "sample2", "sample2", "sample2", 
"sample2", "sample3", "sample3", "sample3", "sample3", "sample3", 
"sample3", "sample3", "sample3", "sample3", "sample3", "sample4", 
"sample4", "sample4", "sample4", "sample4", "sample4", "sample4", 
"sample4", "sample4", "sample4", "sample5", "sample5", "sample5", 
"sample5", "sample5", "sample5", "sample5", "sample5", "sample5", 
"sample5", "sample6", "sample6", "sample6", "sample6", "sample6", 
"sample6", "sample6", "sample6", "sample6", "sample6", "sample7", 
"sample7", "sample7", "sample7", "sample7", "sample7", "sample7", 
"sample7", "sample7", "sample7", "sample8", "sample8", "sample8", 
"sample8", "sample8", "sample8", "sample8", "sample8", "sample8", 
"sample8")), class = "data.frame", row.names = c(NA, -80L))

这是我的代码:

library(ggplot2)
library(ggpubr)

p= ggplot(Data2, aes(Samples, Values)) +
geom_boxplot(aes(col = Samples, fill = Samples), alpha = 0.2) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
labs(x = "Samples", y = "Values",
   title = "Test 1",
   subtitle = "",
   caption = "Assay") +
  geom_hline(yintercept = mean(sub_df_10_test2$Values), linetype = 2)+ 
  theme(legend.position = "none") +
  stat_compare_means(label = "p.signif", method = "t.test",
                 ref.group = ".all.", hide.ns = F)   +
    theme(legend.position = "none") +
    theme_bw() +
    theme(legend.position = "none")+
    ylim(0, 5)
     p

zpgglvta

zpgglvta1#

这不是一个统计问答网站,虽然我可以告诉你如何在你的图上打印一个p值,但你需要小心你的统计数据。你应该比较样本6和其他样本,而不是样本6,以便统计检验有意义。使用t检验有点问题,由于样本6的方差比其余数据的方差大一个数量级,并且表现为正偏态,因此Mann-Whitney可能在这里更好。此外,您需要调整多重假设检验的p值,该多重假设检验隐含在挑选单个样本以进行特别关注(每个样本一个)中。
你可以这样做:

ggplot(Data2, aes(Samples, Values)) +
  geom_boxplot(aes(col = Samples, fill = Samples), alpha = 0.2) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  labs(x = "Samples", y = "Values",
       title = "Test 1",
       subtitle = "",
       caption = "Assay") +
  geom_hline(yintercept = mean(Data2$Values), linetype = 2)+ 
  theme(legend.position = "none") +
  stat_compare_means(label = "p.signif", method = "t.test",
                     ref.group = ".all.", hide.ns = F)   +
  annotate('text', x = 'sample6', y = 4.5,
            label = scales::pvalue(wilcox.test(Values ~ sample6, 
              within(Data2, sample6 <- Samples == 'sample6'))$p.value * 6,
              add_p = TRUE)) +
  theme(legend.position = "none") +
  theme_bw() +
  theme(legend.position = "none")+
  ylim(0, 5)

相关问题