R语言 根据ggplot中的Tukey重要性字母匹配箱线图和标签颜色

sd2nnvve  于 2023-01-06  发布在  其他
关注(0)|答案(1)|浏览(124)

我尝试根据ggplot2或ggboxplot中Tukey的重要性字母匹配箱线图和标签颜色
我不知道如何做到这一点自动或在一个更优雅的方式使用地形。例如颜色。
我已经手动完成,只是为了显示什么是我想要的图与箱线图和标签的颜色相同的Tukey的重要性字母:

我的意思是,让“a”,“B”等等的箱形图都有相同的颜色,箱形图和字母都是一样的,就像这样,但是使用ggplot https://r-graph-gallery.com/84-tukey-test_files/figure-html/unnamed-chunk-3-1.png
你的帮助将不胜感激
下面是根据这篇文章的公认答案编写的脚本:Is there a function to add AOV post-hoc testing results to ggplot2 boxplot?

library(plyr)
library(ggplot2)
library(multcompView)

set.seed(0)
lev <- gl(3, 10)
y <- c(rnorm(10), rnorm(10) + 0.1, rnorm(10) + 3)
d <- data.frame(lev=lev, y=y)

a <- aov(y~lev, data=d)
tHSD <- TukeyHSD(a, ordered = FALSE, conf.level = 0.95)

generate_label_df <- function(HSD, flev){
  # Extract labels and factor levels from Tukey post-hoc 
  Tukey.levels <- HSD[[flev]][,4]
  Tukey.labels <- multcompLetters(Tukey.levels)['Letters']
  plot.labels <- names(Tukey.labels[['Letters']])
  
  # Get highest quantile for Tukey's 5 number summary and add a bit of space to buffer between    
  # upper quantile and label placement
  boxplot.df <- ddply(d, flev, function (x) max(fivenum(x$y)) + 0.2)
  
  # Create a data frame out of the factor levels and Tukey's homogenous group letters
  plot.levels <- data.frame(plot.labels, labels = Tukey.labels[['Letters']],
                            stringsAsFactors = FALSE)
  
  # Merge it with the labels
  labels.df <- merge(plot.levels, boxplot.df, by.x = 'plot.labels', by.y = flev, sort = FALSE)
  
  return(labels.df)
}

#Generate ggplot

ggplot(d, aes(x=lev, y=y)) + geom_boxplot(fill = c("green", "green", "orange")) +
   geom_text(data = generate_label_df(tHSD, 'lev'), colour = c("green","orange", "green"), aes(x = plot.labels, y = V1, label = labels )) +
   scale_colour_manual(values=c("green", "green", "orange"))
mklgxw1f

mklgxw1f1#

这对你有用吗?在下面找到我的评论。

library(plyr)
library(ggplot2)
library(multcompView)

set.seed(0)
lev <- gl(3, 10)
y <- c(rnorm(10), rnorm(10) + 0.1, rnorm(10) + 3)
d <- data.frame(lev=lev, y=y)

a <- aov(y~lev, data=d)
tHSD <- TukeyHSD(a, ordered = FALSE, conf.level = 0.95)

generate_label_df <- function(HSD, flev){
  # Extract labels and factor levels from Tukey post-hoc 
  Tukey.levels <- HSD[[flev]][,4]
  Tukey.labels <- multcompLetters(Tukey.levels)['Letters']
  plot.labels <- names(Tukey.labels[['Letters']])
  
  # Get highest quantile for Tukey's 5 number summary and add a bit of space to buffer between    
  # upper quantile and label placement
  boxplot.df <- ddply(d, flev, function (x) max(fivenum(x$y)) + 0.2)
  
  # Create a data frame out of the factor levels and Tukey's homogenous group letters
  plot.levels <- data.frame(plot.labels, labels = Tukey.labels[['Letters']],
                            stringsAsFactors = FALSE)
  
  # Merge it with the labels
  labels.df <- merge(plot.levels, boxplot.df, by.x = 'plot.labels', by.y = flev, sort = FALSE)
  
  return(labels.df)
}

#############################
### new stuff starts here ###
#############################

label_df <- generate_label_df(tHSD, 'lev')
label_df$lev <- label_df$plot.labels

#Generate ggplot
lev_cols <- c("1" = "green", "2" = "green", "3" = "orange")

ggplot(d, aes(x = lev, y = y)) + 
  geom_boxplot(aes(fill = lev)) +
  geom_text(
    data = label_df,
    aes(
      x = plot.labels, 
      y = V1, 
      label = labels, 
      color = lev
    )
  ) +
  scale_color_manual(values = lev_cols) +
  scale_fill_manual(values = lev_cols)

创建于2022年10月14日,使用reprex v2.0.2
正如你所看到的,你可以告诉不同的geoms_在他们的aes()(!),他们应该根据,例如lev列的颜色.完成后,你可以定义lev中的哪个级别应该有什么颜色,通过命名向量c("Levelname1" = "Colorname1", ...),我们在这里有lev_cols,并提供给scale_color_manual().
在这个特定的例子中,它稍微复杂一些,因为对于geom_boxplot(),我们实际上需要不同的fill,而对于geom_text(),我们需要不同的color,因此我们需要scale_color_manual()scale_fill_manual()。此外,您提供给geom_text()的数据没有名为lev的列,但实际上我只是想让它简单些。

奖金

仅供参考,您可能会发现以下替代方法来获得紧凑的字母显示,以及替代方法来绘制有趣的结果。

# extra -------------------------------------------------------------------
library(tidyverse)
library(emmeans)
library(multcomp)
library(multcompView)

set.seed(0)
lev <- gl(3, 10)
y <- c(rnorm(10), rnorm(10) + 0.1, rnorm(10) + 3)
d <- data.frame(lev = lev, y = y)

# This also gets you the letters ------------------------------------------
# fit model
model <- lm(y ~ lev, data = d)

# get (adjusted) y means per group
model_means <- emmeans(object = model,
                       specs = "lev")

# add letters to each mean
model_means_cld <- cld(object = model_means,
                       adjust = "Tukey",
                       Letters = letters,
                       alpha = 0.05)
#> Note: adjust = "tukey" was changed to "sidak"
#> because "tukey" is only appropriate for one set of pairwise comparisons
# show output
model_means_cld
#>  lev emmean    SE df lower.CL upper.CL .group
#>  2   -0.262 0.283 27   -0.982    0.457  a    
#>  1    0.359 0.283 27   -0.361    1.079  a    
#>  3    3.069 0.283 27    2.350    3.789   b   
#> 
#> Confidence level used: 0.95 
#> Conf-level adjustment: sidak method for 3 estimates 
#> P value adjustment: tukey method for comparing a family of 3 estimates 
#> significance level used: alpha = 0.05 
#> NOTE: If two or more means share the same grouping letter,
#>       then we cannot show them to be different.
#>       But we also did not show them to be the same.

# You may also like this plot ---------------------------------------------
ggplot() +
  # general layout
  theme_classic() +
  theme(plot.caption = ggtext::element_textbox_simple()) +
  # black data points
  geom_point(
    data = d,
    aes(y = y, x = lev),
    shape = 16,
    alpha = 0.5,
    position = position_nudge(x = -0.2)
  ) +
  # black boxplot
  geom_boxplot(
    data = d,
    aes(y = y, x = lev),
    width = 0.05,
    outlier.shape = NA,
    position = position_nudge(x = -0.1)
  ) +
  # red mean value
  geom_point(
    data = model_means_cld,
    aes(y = emmean, x = lev),
    size = 2,
    color = "red"
  ) +
  # red mean errorbar
  geom_errorbar(
    data = model_means_cld,
    aes(ymin = lower.CL, ymax = upper.CL, x = lev),
    width = 0.05,
    color = "red"
  ) +
  # red letters
  geom_text(
    data = model_means_cld,
    aes(
      y = emmean,
      x = lev,
      label = str_trim(.group)
    ),
    position = position_nudge(x = 0.1),
    hjust = 0,
    color = "red"
  ) +
  # caption
  labs(
    caption = "Black dots represent raw data. Red dots and error bars represent (estimated marginal) means ± 95% confidence interval per group. Means not sharing any letter are significantly different by the Tukey-test at the 5% level of significance."
  )

创建于2022年10月14日,使用reprex v2.0.2

相关问题