R语言 如何在ggplot中突出显示y轴的非零起点?

nhjlsmyf  于 2022-12-25  发布在  其他
关注(0)|答案(2)|浏览(124)

我尝试在ggplot中尽可能好地复制gapminder中的生育率图:

我有:

fertility <- read.csv("https://raw.githubusercontent.com/MarcoKuehne/marcokuehne.github.io/main/data/Gapminder/tfr-by-gapminder-v12-20171212.csv", sep = ";")

# manipulate
library(tidyverse)
fertility <- fertility %>% 
  select(!c(geo.name, geo, indicator)) %>% 
  rownames_to_column %>% 
  gather(var, value, -rowname) %>% 
  spread(rowname, value) %>% 
  rename(year = var, fert = `1`) %>% 
  slice_head(n = 301) 

fertility[,1:2] <- sapply(fertility[,1:2],FUN=as.numeric)

fertility1 <- fertility[1:217,]
fertility2 <- fertility[218:301,]

# visualize 
ggplot(data = fertility, aes(x=year, y=fert)) + 
  geom_point(aes(x=year[1], y=fert[1]), size = 4) + 
  geom_text(aes(x=year[1], y=fert[1], label = year[1]), vjust = 2.5) +
  geom_point(aes(x=year[165], y=fert[165]), size = 4) + 
  geom_text(aes(x=year[165], y=fert[165], label = year[165]), vjust = -3, hjust = -0.5) +
  geom_text(aes(x=year[165], y=fert[165], label = "5 births"), vjust = -1.5, hjust = -0.2) +
  #geom_point(aes(x=year[217], y=fert[217]), size = 4) + 
  geom_text(aes(x=year[217], y=fert[217], label = year[217]), vjust = -3) +
  geom_text(aes(x=year[217], y=fert[217], label = "2.5 births"), vjust = -1.5) +
  geom_line(data = fertility1, size=1.3, arrow=arrow(length=unit(0.30,"cm"), ends="last", type = "closed")) +
  geom_line(data = fertility2, size=1.3,linetype="dashed") +
  labs(title = "Average Number Of Babies Per Woman From 1800 to Today",
       caption = "Source: Gapminder based on UN-Pop", y="", x="") +
  theme_minimal() +
  theme(panel.grid.major.x = element_blank(),
       panel.grid.minor.x = element_blank())

我对geom_line()的线宽有一些问题。我正在寻找geom_line()geom_smooth()之间的折衷方案,以使线图更平滑一点。
但我现在最担心的是y轴的断裂,当y轴不是从零开始的时候,我想在gapminder的图表中强调一下。

yr9zkbsy

yr9zkbsy1#

根据@罗兰的建议,您可以使用loess曲线来平滑线条。但是,我建议不要尝试使用geom_smooth,而是在ggplot之外进行计算,因为我们还需要点和标签的平滑值,并且我们还需要“两条”线的相同值。
对于轴中断,一个简单的方法是使用两个annotate,一个放置segment,一个添加填充的“间隙”,我使用ribbon。来修复限制并设置clip="off"。还请注意,我通过geom_h/vline添加了轴线,以便注解层可以放置在轴的顶部。
最后,我稍微调整了数据管理代码,使用数据集放置标签和点,而不是使用vjust,我直接通过y位置移动标签。

编辑最后,我添加了一些额外的造型。

# manipulate
library(tidyverse)
library(showtext)
library(hrbrthemes)

font_add_google("lato", "Lato", regular.wt = 900)

fertility <- fertility %>%
  select(!c(geo.name, geo, indicator)) %>%
  mutate(
    across(-indicator.name, as.character),
    across(-indicator.name, ~ readr::parse_number(.x, locale = locale(decimal_mark = ",")))
  ) %>%
  pivot_longer(-indicator.name, names_to = "year", values_to = "fert", names_prefix = "X") %>%
  mutate(year = as.numeric(year)) |>
  mutate(fert_smooth = predict(loess(fert ~ year, span = .05)))

fertility_to_high <- fertility |>
  filter(year %in% c(1800, 1964, 2016)) |>
  mutate(
    label = if_else(!year == 1800, paste0(year, "<br>**", round(fert, 1), " births**"), as.character(year)),
    hjust = if_else(year == 2016, 0, .5)
  )

ggplot(data = fertility, aes(x = year, y = fert_smooth)) +
  geom_point(data = subset(fertility_to_high, year == 1964), size = 10, shape = 21, fill = NA, color = "black") +
  geom_point(data = subset(fertility_to_high, year != 2016), size = 3) +
  ggtext::geom_richtext(data = fertility_to_high, aes(y = fert + .15, label = label, hjust = hjust), 
                        vjust = 0, label.colour = NA, family = font_rc, lineheight = 1.2) +
  geom_line(linewidth = 1.3, linetype = "dashed") +
  geom_line(data = ~ subset(.x, year <= 2016), linewidth = 1.3, arrow = arrow(length = unit(0.30, "cm"), ends = "last", type = "closed")) +
  geom_hline(yintercept = 1) +
  geom_vline(xintercept = 1785) +
  annotate(geom = "ribbon", x = c(1780, 1790) - .55, ymin = c(1.3, 1.5), ymax = c(1.5, 1.7), fill = "white") +
  annotate(
    geom = "segment",
    x = c(1780, 1780) - .5, xend = c(1790, 1790) - .5,
    y = c(1.3, 1.5), yend = c(1.5, 1.7), linewidth = 1
  ) +
  scale_y_continuous(breaks = 1:6, labels = c(0, 2:6), expand = c(0, .0, .05, 0)) +
  scale_x_continuous(expand = c(0.05, 0, 0, 0)) +
  labs(
    title = toupper("Average Number Of Babies Per Woman From 1800 to Today"),
    caption = "Source: Gapminder based on UN-Pop", y = "", x = ""
  ) +
  coord_cartesian(clip = "off", ylim = c(1, 6), xlim = c(1800, NA)) +
  theme_bw(base_family = font_rc, base_size = 12) +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    plot.title = element_text(family = "lato"),
    plot.title.position = "plot",
    plot.caption = element_text(family = "lato", color = "grey40"),
    axis.text.x = element_text(hjust = c(rep(.5, 3), 1))
  )

mklgxw1f

mklgxw1f2#

关键是要加上:

scale_y_continuous(limits = c(0, 6.2), expand = c(0, 0))

它告诉R将y的范围限制在0和6.2之间,并且在这两个值之外不显示间隙。下面的示例代码可能对你有用,运行后,你会得到:

fertility <- read.csv("https://raw.githubusercontent.com/MarcoKuehne/marcokuehne.github.io/main/data/Gapminder/tfr-by-gapminder-v12-20171212.csv", sep = ";")

# manipulate
library(tidyverse)
fertility <- fertility |>  
  select(!c(geo.name, geo, indicator)) |> 
  t() |> 
  as.data.frame() |> 
  rownames_to_column() |> 
  slice(-1) |> 
  as_tibble() |> 
  rename(c(year = rowname, fert = V1)) |> 
  mutate(year = str_remove(year, "X"),
         year = as.Date(ISOdate(year, 1, 1)),
         fert = str_replace(fert, ",", "."),
         fert = as.numeric(fert),
         fert_1 = case_when(year <= as.Date("2017-01-01") ~ fert, 
                            TRUE ~ as.numeric(NA)),
         fert_2 = case_when(year >= as.Date("2017-01-01") ~ fert, 
                            TRUE ~ as.numeric(NA)),
         arr_data = case_when((year > as.Date("2014-01-01") & year < "2018-01-01") ~ fert, 
                              TRUE ~ as.numeric(NA))) 
  

fertility |> 
  ggplot(aes(x = year, 
             y = fert)) +
  geom_smooth(aes(x = year, 
                  y = fert_1, 
                  group = 1),
              span = 0.11,
              se = FALSE,
              colour = "black",
              size = 1.5) +
  
  geom_line(aes(x = year, 
                y = arr_data),
            arrow = arrow(length=unit(0.4,"cm"), 
                          ends="last", 
                          type = "closed"), 
            size = 3) +
  
  geom_line(aes(x = year, 
                y = fert_2, 
                group = 1),
            linetype = 2,
            size = 1.5) +
  
  
  geom_point(aes(x = year[1], 
                 y = fert[1]),
             size = 5) +
  
  annotate(geom = "text", 
           x = fertility$year[1], 
           y = fertility$fert[1], 
           label = "1800",
           size = 4,
           vjust = -1.2) +
  
  geom_point(aes(x = fertility$year[166], 
                 y = fertility$fert[166]),
             shape = 1,
             size = 12,
             colour = "grey50") +
  
  annotate(geom = "text", 
           x = fertility$year[166], 
           y = fertility$fert[166], 
           label = "1965",
           size = 4,
           vjust = -4.2) +
  
  annotate(geom = "text", 
           x = fertility$year[166], 
           y = fertility$fert[166], 
           label = "5 births",
           size = 5,
           fontface = "bold",
           vjust = -2) +
  
  geom_point(aes(x = fertility$year[166], 
                 y = fertility$fert[166]),
             shape = 1,
             size = 12,
             colour = "grey50") +
  
  annotate(geom = "text", 
           x = fertility$year[166], 
           y = fertility$fert[166], 
           label = "1965",
           size = 4,
           vjust = -4.2) +
  
  annotate(geom = "text", 
           x = fertility$year[166], 
           y = fertility$fert[166], 
           label = "5 births",
           size = 5,
           fontface = "bold",
           vjust = -2) +
  
  annotate(geom = "text", 
           x = fertility$year[218], 
           y = fertility$fert[218], 
           label = "2017",
           size = 4,
           vjust = -4.2,
           hjust = 0) +
  
  annotate(geom = "text", 
           x = fertility$year[218], 
           y = fertility$fert[218], 
           label = "2.5 births",
           size = 5,
           fontface = "bold",
           vjust = -2,
           hjust = 0) +
  
  
  theme_bw() +
  scale_x_date(expand = expansion(mult = c(0.02, 0))) +
  
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.border = element_blank(),
    
    axis.text.x = element_text(size = 10),
    
    
    axis.line.x = element_line(),
    axis.line.y = element_line(),
    
    plot.caption = element_text(colour = "grey50"),
    plot.title = element_text(size = 16,
                              hjust = 0.5,
                              face = "bold"),
    
    
    plot.margin = margin(r = 18,
                         t = 5,
                         l = 5,
                         b = 2)
  ) +
  
  labs(
    title = "AVERAGE NUMBER OF BABIES PER WOMAN FROM 1800 TO TODAY",
    x = element_blank(),
    y = element_blank(),
    caption = "Source:Gapminder[7] based on UN-Pop[3]"
  )

相关问题