我已经将我的数据框中8组的平均值与基本平均值进行了比较(所有组的平均值)。对于ggplot 2的箱形图,我只想添加样本6相对于基础均值的显著性p值(这是因为只有样本6显示出比其他样本更高的平均值)。如何选择并显示样本6相对于基线的p值-我的意思是使用ggplot 2,或者如果你有任何其他建议,这将是伟大的。我附上了箱线图,我想获得。这是我的数据框架:
structure(list(X = 1:80, Values = c(0.630595738859822, 0.602792596576932,
0.717008661687034, 0.682665239025856, 0.793946879180833, 0.881948645398259,
0.625114600712147, 0.437617156442347, 0.745624594554559, 0.894202100943608,
0.563490328640227, 0.438643112591669, 0.634989790753718, 0.419198849946639,
0.635117536091368, 0.326358188291684, 0.549155626314892, 0.670890309309185,
0.496113525284973, 0.621044760371873, 0.719504647704111, 0.639863189313384,
0.535720139062473, 0.669721827058233, 0.650912293714071, 0.696457414671846,
0.858706200486706, 0.731304436277949, 0.858640625829461, 0.620944056367355,
0.707824409068981, 0.735697338021026, 0.617587152354055, 0.827159777309146,
0.746617151207609, 0.756958937191696, 0.667851068680371, 0.751767118706959,
0.514272623762152, 0.574995342576934, 0.565052649366379, 0.680859397561326,
0.610337344835454, 0.525126720395167, 0.598829305776582, 0.665333671717166,
0.741661361938011, 0.675284838360854, 0.566029505622414, 0.766213446491105,
1.15708210114631, 0.890211146977994, 1.25307838058234, 0.968181676151837,
1.00095211090357, 2.8420291708288, 1.90093888157347, 1.06396479241609,
1.56573878519669, 0.775059529464859, 0.419162723989262, 0.550280097449462,
0.644290507715508, 0.576790241978692, 0.758599327639705, 0.684331966752555,
0.40462575314429, 0.814338126516362, 0.587363556965946, 0.560989067834604,
0.796437227052055, 0.713486371171068, 0.60447141342468, 0.560626244287651,
0.598806081197721, 0.642049867384496, 0.521445328537076, 0.699890870949464,
0.845524358473877, 0.469453181792765), Samples = c("sample1",
"sample1", "sample1", "sample1", "sample1", "sample1", "sample1",
"sample1", "sample1", "sample1", "sample2", "sample2", "sample2",
"sample2", "sample2", "sample2", "sample2", "sample2", "sample2",
"sample2", "sample3", "sample3", "sample3", "sample3", "sample3",
"sample3", "sample3", "sample3", "sample3", "sample3", "sample4",
"sample4", "sample4", "sample4", "sample4", "sample4", "sample4",
"sample4", "sample4", "sample4", "sample5", "sample5", "sample5",
"sample5", "sample5", "sample5", "sample5", "sample5", "sample5",
"sample5", "sample6", "sample6", "sample6", "sample6", "sample6",
"sample6", "sample6", "sample6", "sample6", "sample6", "sample7",
"sample7", "sample7", "sample7", "sample7", "sample7", "sample7",
"sample7", "sample7", "sample7", "sample8", "sample8", "sample8",
"sample8", "sample8", "sample8", "sample8", "sample8", "sample8",
"sample8")), class = "data.frame", row.names = c(NA, -80L))
这是我的代码:
library(ggplot2)
library(ggpubr)
p= ggplot(Data2, aes(Samples, Values)) +
geom_boxplot(aes(col = Samples, fill = Samples), alpha = 0.2) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
labs(x = "Samples", y = "Values",
title = "Test 1",
subtitle = "",
caption = "Assay") +
geom_hline(yintercept = mean(sub_df_10_test2$Values), linetype = 2)+
theme(legend.position = "none") +
stat_compare_means(label = "p.signif", method = "t.test",
ref.group = ".all.", hide.ns = F) +
theme(legend.position = "none") +
theme_bw() +
theme(legend.position = "none")+
ylim(0, 5)
p
1条答案
按热度按时间zpgglvta1#
这不是一个统计问答网站,虽然我可以告诉你如何在你的图上打印一个p值,但你需要小心你的统计数据。你应该比较样本6和其他样本,而不是样本6,以便统计检验有意义。使用t检验有点问题,由于样本6的方差比其余数据的方差大一个数量级,并且表现为正偏态,因此Mann-Whitney可能在这里更好。此外,您需要调整多重假设检验的p值,该多重假设检验隐含在挑选单个样本以进行特别关注(每个样本一个)中。
你可以这样做: