R语言 如何使用ggplot在条形图中绘制箱线图?

holgip5t  于 2023-03-15  发布在  其他
关注(0)|答案(1)|浏览(223)

我现在正试图重现IPCC海洋和冰冻圈特别报告中看到的条形图(见下图||* 请注意,没有y轴和x轴标签,因为该图取自相应报告中的较大图,见图2.1:https://www.ipcc.ch/srocc/chapter/chapter-2/ *)。水平粗线表示数据的中值,而上限和下限分别表示第75和第25百分位数。因此,您可以说灰色框是一种箱线图。我的目标是将数据的此“箱线图”添加到我的图中。

我能够复制连续的条形图(见下图)。

**但是,我很难将箱线图集成到这个条形图中。**我尝试使用geom_boxplot()(下面的代码示例)执行此操作,但它产生了一个错误:

geom_boxplot()中的错误:!计算美观度时出现问题第一层出错FUN()出错导致:!未找到对象“Occurs.com”
我的另一个想法是计算(包括第25位、中位数和第75位),然后使用geom_vline()添加所需百分位数,但是我不能用这个在原始图中创建灰盒。所以我更愿意在图中使用箱线图或者用另一种方法创建灰盒。不幸的是我'我已经没有什么想法了,无法让它工作,这就是为什么我想知道是否有人知道如何在ggplot中做到这一点?
这是我用的数据(plotting_df2是背对背条形图的数据,df_ZDI是我想添加的箱线图的数据)和当前背对背条形图的代码。请注意,抛出上述错误的geom_boxplot()调用在此代码示例中是一个注解。另外,在df_ZDI中用零填充的列“Area_perc”被创建,以便在将其添加到连续条形图中后使箱形图居中。
我很高兴得到任何帮助:)

library("dplyr")
library("ggplot2")

#data
plotting_df2 <- structure(list(Height_range = c("2600 - <2800", "2800 - <3000", 
    "3000 - <3200", "3200 - <3400", "3400 - <3600", "3600 - <3800", 
    "3800 - <4000", "4000 - <4200", "4200 - <4400", "4400 - <4600", 
    "4600 - <4800", "4800 - <5000", "5000 - <5200", "5200 - <5400", 
    "5400 - <5600", "5600 - <5800", "5800 - <6000", "6000 - <6200", 
    "6200 - <6400", "6400 - <6600", "6600 - <6800", "6800 - <7000", 
    "2600 - <2800", "2800 - <3000", "3000 - <3200", "3200 - <3400", 
    "3400 - <3600", "3600 - <3800", "3800 - <4000", "4000 - <4200", 
    "4200 - <4400", "4400 - <4600", "4600 - <4800", "4800 - <5000", 
    "5000 - <5200", "5200 - <5400", "5400 - <5600", "5600 - <5800", 
    "5800 - <6000", "6000 - <6200", "6200 - <6400", "6400 - <6600", 
    "6600 - <6800", "6800 - <7000"), Area_perc = c(0, -0.00813093730353531, 
    -0.260883801237534, -1.34685081373276, -4.12725403762506, -5.8860804561115, 
    -6.69221176709734, -7.43112895264885, -8.77252668268104, -12.0759025431697, 
    -14.691106543883, -13.4787204972044, -9.6733444626552, -7.30287193712887, 
    -3.61505367574966, -2.20305798709396, -1.17985012540267, -0.739452756272111, 
    -0.260689048248227, -0.179416191398369, -0.0569287331865788, 
    -0.0185380501695873, 0, 0.0210832065597772, 0.371033084959052, 
    1.31854295448614, 4.72853608089054, 10.711542546146, 22.9105680160703, 
    29.9335291171623, 20.4001968810962, 7.655966743397, 1.64776231937217, 
    0.279744368079868, 0.0214946817807394, 0, 0, 0, 0, 0, 0, 0, 0, 
    0), Umgebung = c("Glacial", "Glacial", "Glacial", "Glacial", 
    "Glacial", "Glacial", "Glacial", "Glacial", "Glacial", "Glacial", 
    "Glacial", "Glacial", "Glacial", "Glacial", "Glacial", "Glacial", 
    "Glacial", "Glacial", "Glacial", "Glacial", "Glacial", "Glacial", 
    "Periglacial", "Periglacial", "Periglacial", "Periglacial", "Periglacial", 
    "Periglacial", "Periglacial", "Periglacial", "Periglacial", "Periglacial", 
    "Periglacial", "Periglacial", "Periglacial", "Periglacial", "Periglacial", 
    "Periglacial", "Periglacial", "Periglacial", "Periglacial", "Periglacial", 
    "Periglacial", "Periglacial")), class = c("grouped_df", "tbl_df", 
    "tbl", "data.frame"), row.names = c(NA, -44L), groups = structure(list(
        Height_range = c("2600 - <2800", "2600 - <2800", "2800 - <3000", 
        "2800 - <3000", "3000 - <3200", "3000 - <3200", "3200 - <3400", 
        "3200 - <3400", "3400 - <3600", "3400 - <3600", "3600 - <3800", 
        "3600 - <3800", "3800 - <4000", "3800 - <4000", "4000 - <4200", 
        "4000 - <4200", "4200 - <4400", "4200 - <4400", "4400 - <4600", 
        "4400 - <4600", "4600 - <4800", "4600 - <4800", "4800 - <5000", 
        "4800 - <5000", "5000 - <5200", "5000 - <5200", "5200 - <5400", 
        "5200 - <5400", "5400 - <5600", "5400 - <5600", "5600 - <5800", 
        "5600 - <5800", "5800 - <6000", "5800 - <6000", "6000 - <6200", 
        "6000 - <6200", "6200 - <6400", "6200 - <6400", "6400 - <6600", 
        "6400 - <6600", "6600 - <6800", "6600 - <6800", "6800 - <7000", 
        "6800 - <7000"), Umgebung = c("Glacial", "Periglacial", "Glacial", 
        "Periglacial", "Glacial", "Periglacial", "Glacial", "Periglacial", 
        "Glacial", "Periglacial", "Glacial", "Periglacial", "Glacial", 
        "Periglacial", "Glacial", "Periglacial", "Glacial", "Periglacial", 
        "Glacial", "Periglacial", "Glacial", "Periglacial", "Glacial", 
        "Periglacial", "Glacial", "Periglacial", "Glacial", "Periglacial", 
        "Glacial", "Periglacial", "Glacial", "Periglacial", "Glacial", 
        "Periglacial", "Glacial", "Periglacial", "Glacial", "Periglacial", 
        "Glacial", "Periglacial", "Glacial", "Periglacial", "Glacial", 
        "Periglacial"), .rows = structure(list(1L, 23L, 2L, 24L, 
            3L, 25L, 4L, 26L, 5L, 27L, 6L, 28L, 7L, 29L, 8L, 30L, 
            9L, 31L, 10L, 32L, 11L, 33L, 12L, 34L, 13L, 35L, 14L, 
            36L, 15L, 37L, 16L, 38L, 17L, 39L, 18L, 40L, 19L, 41L, 
            20L, 42L, 21L, 43L, 22L, 44L), ptype = integer(0), class = c("vctrs_list_of", 
        "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
    ), row.names = c(NA, -44L), .drop = TRUE))

df_ZDI <- structure(list(elev_ZDI = c(3307.20835232205, 1331.87925720215, 
222.621368887689, 836.756324132284, 2066.40147484673, 3066.89243265788, 
1407.60896335178, 491.074270799425, 1518.27969767253, 1872.66952904595, 
2764.18468865289, 834.062467659844, 429.105528672536, 1711.29469570584, 
1818.18913269043, 2923.67825486925, 1310.91319944594, 405.303607556555, 
1356.16757982042, 1656.92714335124, 3176.3096818712, 1785.96018914117, 
323.876850085788, 742.874162546794, 1899.74567159017, 3011.39425354004, 
1974.31221652561, 317.509443208906, 439.012686824799, 2080.99008568658, 
3245.08500908746, 2500.59694722493, 642.457545979818, 674.900856865777, 
2560.82551439073, 3122.98277079264, 2156.09715440538, 621.882653978136, 
556.954903009203, 2213.49671427409, 3209.67580227322, 1722.6931822035, 
482.353815809886, 909.885089916653, 2000.93998396132, 2909.65182291667, 
1283.94950095283, 563.962105886141, 1682.45457678901, 2186.01452416314, 
2192.81169128418, 991.659460300869, 897.015666135152, 1805.44834476047, 
2088.38637356228), Area_perc = structure(c(1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L), levels = "0", class = "factor")), class = "data.frame", row.names = c(NA, 
-55L))

#code for back-to-back barchart

plot <- plotting_df2 %>% 
  ggplot(aes(x = Height_range, y = Area_perc, group = Umgebung, fill = Umgebung)) +
  geom_bar(stat = "identity", width = 0.75) +
  #geom_boxplot(data = df_ZDI, aes(x = elev_ZDI, y = Area_perc)) +
  coord_flip() +
  scale_x_discrete() +
  # another trick!
  scale_y_continuous(breaks = seq(-35, 35, 10), 
                     labels = abs(seq(-35, 35, 10)),
                     limits = c(-35,  35), expand = c(0,0)) +
  labs(x = "Elevation [m. a.s.l.]", y = expression("Area [%]"), title = "") +
  theme(legend.position = "bottom",
        plot.title = element_text(hjust = 0.5),
        legend.title = element_blank(),
        axis.title.y = element_text(margin = margin(t = 0, r = 5, b = 0, l = 0)), #move y-axis title further away from axis labels
        panel.grid.minor = element_blank()) +
  guides(fill = guide_legend(reverse = TRUE)) +   # reverse order of items in legend
  scale_fill_manual(values=c("#AD355D", "#440A68"),  # change colors of bars
                    name="",
                    breaks=c("Periglacial", "Glacial"),
                    labels=c("Periglacial", "Glacial"))
plot
xfb7svmp

xfb7svmp1#

您的代码中的第一个问题是geom_boxplot继承了全局groupfill美学,这可以通过添加inherit.aes=FALSE来修复。
但是,即使在修复之后,您仍有一个离散的x刻度,而在geom_boxplot中,您希望将连续变量Map到x上。要修复此问题,一个选项是将范围转换为数值,并通过geom_rect绘制条形。为此,我使用tidyr::separate_wider_regex(需要tidyr >= 1.3.0)将范围拆分为lowerupper列。
注:第二个选择当然是把灰框也画成长方形,再画一段中线。

library(ggplot2)
library(tidyr)
library(dplyr)

plotting_df2 <- plotting_df2 |>
  tidyr::separate_wider_regex(Height_range,
    patterns = c(lower = "\\d+", ".*?", upper = "\\d+"),
    cols_remove = FALSE
  ) |>
  mutate(across(c(lower, upper), as.numeric))

plotting_df2 %>%
  ggplot(aes(ygroup = Umgebung, fill = Umgebung)) +
  geom_rect(aes(ymin = lower + 10, ymax = upper - 10, xmin = 0, xmax = Area_perc)) +
  geom_boxplot(data = df_ZDI, aes(y = elev_ZDI), inherit.aes = FALSE, width = 20) +
  scale_x_continuous(
    breaks = seq(-35, 35, 10),
    labels = abs(seq(-35, 35, 10)),
    limits = c(-35, 35), expand = c(0, 0)
  ) +
  labs(x = "Elevation [m. a.s.l.]", y = expression("Area [%]"), title = "") +
  theme(
    legend.position = "bottom",
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    axis.title.y = element_text(margin = margin(t = 0, r = 5, b = 0, l = 0)), # move y-axis title further away from axis labels
    panel.grid.minor = element_blank()
  ) +
  guides(fill = guide_legend(reverse = TRUE)) + # reverse order of items in legend
  scale_fill_manual(
    values = c("#AD355D", "#440A68"), # change colors of bars
    name = "",
    breaks = c("Periglacial", "Glacial"),
    labels = c("Periglacial", "Glacial")
  )

相关问题