在R中,将组间比较的p值添加到分组条形图的最佳方法是什么

z31licg0  于 2023-05-04  发布在  其他
关注(0)|答案(1)|浏览(198)

请考虑以下MWE。
编辑:感谢非常好的评论,很明显,在我非常简化的MWE中没有足够的数据来进行组间的统计比较。由于这个缺点,我重写了MWE:

library(dplyr)
library(ggplot2)
library(tidyr)

# Create random data
set.seed(123456789)
n_participants <- 100
participant_id <- 1:n_participants
group <- sample(c("A", "B"), size = n_participants, replace = TRUE)
gene1 <- sample(0:1, size = n_participants, replace = TRUE)
gene2 <- sample(0:1, size = n_participants, replace = TRUE)
gene3 <- sample(0:1, size = n_participants, replace = TRUE)
gene4 <- sample(0:1, size = n_participants, replace = TRUE)
gene5 <- sample(0:1, size = n_participants, replace = TRUE)
data <- data.frame(participant_id, group, gene1, gene2, gene3, gene4, gene5)

head(data)
#>   participant_id group gene1 gene2 gene3 gene4 gene5
#> 1              1     B     1     0     1     0     1
#> 2              2     B     0     0     1     1     0
#> 3              3     A     1     1     0     1     0
#> 4              4     B     0     0     1     1     0
#> 5              5     A     0     1     1     1     0
#> 6              6     B     0     0     1     1     0

# Run chisq.tests to compare the categorical variables gene1:gene5 between groups "A" and "B"
data_long <- data %>% 
  pivot_longer(cols = gene1:gene5, names_to = "gene", values_to = "value")

test_results <- data_long %>% 
  group_by(gene) %>% 
  summarize(p_value = chisq.test(table(group, value))$p.value)

test_results
#> # A tibble: 5 × 2
#>   gene  p_value
#>   <chr>   <dbl>
#> 1 gene1   0.394
#> 2 gene2   0.103
#> 3 gene3   0.463
#> 4 gene4   0.437
#> 5 gene5   0.423

# For plotting, create probability table
prob_table <- data %>%
  group_by(group) %>%
  summarise(gene1 = mean(gene1, na.rm = TRUE) * 100,
            gene2 = mean(gene2, na.rm = TRUE) * 100,
            gene3 = mean(gene3, na.rm = TRUE) * 100,
            gene4 = mean(gene4, na.rm = TRUE) * 100,
            gene5 = mean(gene5, na.rm = TRUE) * 100)

# Thereafter convert to long form data for ggplot
data_to_ggplot <- pivot_longer(prob_table, gene1:gene5,
                               names_to = "gene", values_to = "Percentage")

# Plot data
ggplot(data_to_ggplot, aes(fill = group, x = gene, y = Percentage)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.6) +
  ylim(0, 100) +
  labs(x = "", y = "Percentage") +
  ggtitle("Prevalence of certain genes in groups A and B") +
  theme_classic() +   
  theme(plot.title = element_text(hjust = 0.5),
        axis.text = element_text(size = 14),
        axis.title = element_text(size = 14))

创建于2023-04-29带有reprex v2.0.2
为了得到下面的图像,你建议用什么方法来添加对象test_results的p值?geom_signif可以在这种情况下使用吗?

ql3eal8s

ql3eal8s1#

是的,你肯定可以用ggsignif包来做
你可以在下面找到如何将geom_signif参数添加到图中

library(dplyr)
library(ggplot2)
library(tidyr)
library(ggsignif)
# Plot data
ggplot(data_to_ggplot, aes(fill = group, x = gene, y = Percentage)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.6) +
  ylim(0, 100) +
  labs(x = "", y = "Percentage") +
  ggtitle("Prevalence of certain genes in groups A and B") +
  theme_classic() +   
  theme(plot.title = element_text(hjust = 0.5),
        axis.text = element_text(size = 14),
        axis.title = element_text(size = 14))+
  geom_signif(
    y_position = rep(65,5), xmin = 1:5-0.2, xmax = 1:5+0.2,
    annotation = round(test_results$p_value,3),tip_length = 0.4)

创建于2023-04-29带有reprex v2.0.2

相关问题