我已经成功地使用R的ggplot + geom_sankey在ggsankey tutorial之后创建了一个Sankey镜像。然而，我遵循了这篇文章（How to skip nodes with NA value in ggsankey?）来解决数据中的“NA”。
但是，我想：
1.正确对齐每个单独的节点列（以下称为“早期”，“最新”和“当前”），以便节点框按时间顺序彼此相对，从左到右流动;
1.去掉图中出现的空方格;
1.使第一列节点的节点（例如，“Earlier”）的大小与其他节点的大小相似。

可复制示例

devtools::install_github("davidsjoberg/ggsankey")
library(ggsankey); library(ggplot2)

字符串

创建一个df

Years <- data.frame(Earlier = c(rep(2012, 2), paste(2013), paste(2014), rep(2015, 2), rep(2018, 2), rep(2022, 2), rep(NA, 31)),
           Latest = c(rep(2023, 4), rep(2022, 6), rep(2021, 10), rep(2020, 3), rep(2019, 6), rep(2018, 3), rep(2017, 3), rep(2013, 4), rep(NA, 2)),
           Current = c(rep(2023, 10), rep(2022, 12), rep(2021, 11), rep(2020, 1), rep(NA, 7)))

型

Shuffle

set.seed(123)
Years[sample(1:nrow(Years)), ]

df_stack <- Years %>% make_long(Earlier, Latest, Current)
head(df_stack)

型

图形化

ggplot(df_stack, aes(x = x, 
                       next_x = next_x,
                       node = node,
                       next_node = next_node, 
                       fill = factor(node), 
                       label = node,
                       color = factor(node))) + 
  geom_sankey(flow.alpha = 0.5, node.color = 1, 
              smooth = 6, width = 0.2,) +  #width = width of nodes
  geom_sankey_label(size = 3.5, color = 1, fill = "white") +
  scale_fill_viridis_d(direction = -1, option = "turbo") + 
  scale_colour_viridis_d(direction = -1, option = "turbo") +
  theme_sankey(base_size = 15) +
  theme(legend.position = "none") + xlab('')

型
这会产生下图。我在这张图上也指出了第2点和第3点。x1c 0d1x
对于第1点（上图）-我想按时间顺序排列年份，以便更容易解释。这里是一个非常粗略的草图，其中节点应该相互尊重。它应该看起来像上图，但它的顺序和间距的节点，我得到了这个遗憾的图像。

额外信息：sessionInfo（）R version 4.3.0（2023-04-21）平台：aarch 64-apple-darwin 20（64-bit）运行环境：macOS Ventura 13.6
版本：ggsankey_0.0.99999
任何帮助航行这个泥潭将不胜感激。

空白框来自列df_stack$node中的缺失值。您可以通过过滤NA来删除这些框。

library(ggsankey)
library(ggplot2)
library(dplyr)

Years <- data.frame(Earlier = c(rep(2012, 2), 2013, 2014, rep(2015, 2), rep(2018, 2), rep(2022, 2), rep(NA, 31)),
                    Latest = c(rep(2023, 4), rep(2022, 6), rep(2021, 10), rep(2020, 3), rep(2019, 6), rep(2018, 3), rep(2017, 3), rep(2013, 4), rep(NA, 2)),
                    Current = c(rep(2023, 10), rep(2022, 12), rep(2021, 11), rep(2020, 1), rep(NA, 7)))

df_stack <- Years %>% 
  make_long(Earlier, Latest, Current) %>% 
  filter(!is.na(node))

# plot
ggplot(df_stack, aes(x = x, 
                     next_x = next_x,
                     node = node,
                     next_node = next_node, 
                     fill = factor(node), 
                     label = node,
                     color = factor(node))) + 
  geom_sankey(flow.alpha = 0.5, node.color = 1, 
              smooth = 6, width = 0.2,) +  
  geom_sankey_label(size = 3.5, color = 1, fill = "white") +
  scale_fill_viridis_d(direction = -1, option = "turbo") + 
  scale_colour_viridis_d(direction = -1, option = "turbo") +
  theme_sankey(base_size = 15) +
  theme(legend.position = "none") + xlab('')

字符串
x1c 0d1x的数据
创建于2023-11-10使用reprex v2.0.2

编辑

我无法真正找到明确的解决方案，你的问题，对齐节点平行的年份，但我有一些建议，以调整或节点之间的空间。
1.在geom_sankey和geom_sankey_label中使用space参数。这种方法增加了所有列中节点之间的间距，但不仅仅是第一列。请注意，space的值对于geom_sankey和geom_sankey_label必须相同

ggplot(df_stack, aes(x = x, 
                     next_x = next_x,
                     node = node,
                     next_node = next_node, 
                     fill = factor(node), 
                     label = node,
                     color = factor(node))) + 
  geom_sankey(flow.alpha = 0.5, node.color = 1, 
              smooth = 6, width = 0.2, 
              space = 15 # add spacing
              ) +  
  geom_sankey_label(size = 3.5, color = 1, fill = "white", 
                    space = 15 # add spacing
                    ) +
  scale_fill_viridis_d(direction = -1, option = "turbo") + 
  scale_colour_viridis_d(direction = -1, option = "turbo") +
  theme_sankey(base_size = 15) +
  theme(legend.position = "none") + xlab('')

型

创建于2023-11-10使用reprex v2.0.2
1.为节点指定权重。根据文档（?make_long），您可以为图中的连接指定权重。您可以为第一列Earlier中的节点给予更多权重;这将相应地更改大小和间距。但是，请记住，修改后的节点之间的链接不再表示原始频率。

df_stack <- Years |> 
  mutate(weights = if_else(is.na(Earlier), 1, 3)) |> 
  make_long(Earlier, Latest, Current, value = weights) |> 
  filter(!is.na(node))

ggplot(df_stack, aes(x = x, 
                     next_x = next_x,
                     node = node,
                     next_node = next_node, 
                     fill = factor(node), 
                     label = node,
                     color = factor(node),
                     value = value)) + 
  geom_sankey(flow.alpha = 0.5, node.color = 1,
              smooth = 6, width = 0.2) +  
  geom_sankey_label(size = 3.5, color = 1, fill = "white") +
  scale_fill_viridis_d(direction = -1, option = "turbo") + 
  scale_colour_viridis_d(direction = -1, option = "turbo") +
  theme_sankey(base_size = 15) +
  theme(legend.position = "none") + xlab('')

型

创建于2023-11-10使用reprex v2.0.2

替代解决方案

如果你愿意学习不同的包，还有一个使用networkD3::sankeyNetwork的替代解决方案。输出是一个交互式图表，你可以在其中移动/重新组织单个节点。

library(networkD3)
library(dplyr)

# define links and nodes
# get counts and add column numbers
df1 <- Years |> 
  count(Earlier, Latest) |> 
  na.omit() |> 
  rename(source = Earlier, target = Latest) |> 
  mutate(source = paste0(source, "_1"),
         target = paste0(target, "_2"))

df2 <- Years |> 
  count(Latest, Current) |> 
  na.omit() |> 
  rename(source = Latest, target = Current) |> 
  mutate(source = paste0(source, "_2"),
         target = paste0(target, "_3"))

links <- bind_rows(df1, df2)

# nodes
nodes <- data.frame(id = unique(c(links$source, links$target)), stringsAsFactors = FALSE) |> 
  mutate(name = gsub("_\\d$", "", id))

# add source and target ids
links <- links |> 
  mutate(source_id = match(source, nodes$id) - 1,
         target_id = match(target, nodes$id) - 1)

# plot
sn <- sankeyNetwork(Links = links, Nodes = nodes, Source = "source_id",
              Target = "target_id", Value = "n", NodeID = "name")

# add x-axis labels
js_string <-
  '
   function(el) {

    var cols_x = this.sankey.nodes()
      .map(d => d.x).filter((v, i, a) => a.indexOf(v) === i)
      .sort(function(a, b){return a - b});

    var labels = ["Earliest", "Latest", "Current" ]

    cols_x.forEach((d, i) => {
      d3.select(el).select("svg")
        .append("text")
        .attr("x", d)
        .attr("y", 12)
        .attr("text-anchor", "start")
        .text(labels[i]);
    })
   }
  '
sn <- htmlwidgets::onRender(sn, js_string)
sn

型

R中的geom_sankey：间距和对齐节点

创建一个df

Shuffle

图形化

1条答案

相关问题

热门标签

最新问答