R中 Dataframe 列表的元素中位数

osh3o9ms  于 2023-02-06  发布在  其他
关注(0)|答案(4)|浏览(121)

我有几个列表,每个列表包含许多数据框,我想计算每个列表中每个数据框的元素之间的元素中值,即list1的元素[[1]][1,1]list2的元素[[1]][1,1]list3的元素[[1]][1,1]之间的元素中值,以此类推。每个数据框都有许多列。但下面是一些只有两列的示例数据:

set.seed(1)

list1 <- list(data.frame(a = sample.int(10, 4), b = sample.int(10, 4)),
              data.frame(a = sample.int(10, 4), b = sample.int(10, 4)))

list2 <- list(data.frame(a = sample.int(10, 4), b = sample.int(10, 4)),
              data.frame(a = sample.int(10, 4), b = sample.int(10, 4)))

list3 <- list(data.frame(a = sample.int(10, 4), b = sample.int(10, 4)),
              data.frame(a = sample.int(10, 4), b = sample.int(10, 4)))

这是预期结果:

[[1]]
 a b
 7 4
 9 9
 7 3
 4 6

[[2]]
 a b
 5 7
 8 6
 2 6
 5 2

有什么建议吗?

4bbkushb

4bbkushb1#

使用purrr

library(purrr)
lsts <- list(list1,list2,list3)

map(transpose(lsts),~map_dfc(transpose(.), ~apply(list2DF(.x),1,median)))   

[[1]]
# A tibble: 4 × 2
      a     b
  <int> <int>
1     7     4
2     9     9
3     7     3
4     4     6

[[2]]
# A tibble: 4 × 2
      a     b
  <int> <int>
1     5     7
2     8     6
3     2     6
4     5     2

在Base R中,假设它们都具有相同的结构:

dims <- c(dim(list1[[1]]), length(list1), length(lsts))
d <- apply(array(unlist(lsts), dims), head(seq(dims),-1), median)
asplit(aperm(d, c(1,3,2)), 3)

[[1]]
     [,1] [,2]
[1,]    7    4
[2,]    9    9
[3,]    7    3
[4,]    4    6

[[2]]
     [,1] [,2]
[1,]    5    7
[2,]    8    6
[3,]    2    6
[4,]    5    2
ckocjqey

ckocjqey2#

下面是另一个(基数R)选项:

Map(function(...) {
  dots1 <- list(...)
  out <- do.call(mapply, c(list(FUN=function(...) {
    dots2 <- list(...)
    apply(do.call(cbind, dots2), 1, median)
  }), dots1))
  data.frame(out)
}, list1, list2, list3)
# [[1]]
#   a b
# 1 7 4
# 2 9 9
# 3 7 3
# 4 4 6
# [[2]]
#   a b
# 1 5 7
# 2 8 6
# 3 2 6
# 4 5 2

当然不漂亮,但功能齐全。
相关的dplyr选项:

list(list1, list2, list3) |>
  lapply(bind_rows, .id = "id1") |>
  bind_rows(.id = "id2") |>
  group_by(id1, id2) |>
  mutate(rn = row_number()) |>
  group_by(id1, rn) |>
  summarize(across(c(a, b), ~ median(.))) |>
  ungroup() |>
  select(-rn) |>
  group_nest(id1) |>
  pull(data)
# [[1]]
# # A tibble: 4 × 2
#       a     b
#   <int> <int>
# 1     7     4
# 2     9     9
# 3     7     3
# 4     4     6
# [[2]]
# # A tibble: 4 × 2
#       a     b
#   <int> <int>
# 1     5     7
# 2     8     6
# 3     2     6
# 4     5     2
9avjhtql

9avjhtql3#

当然不是最有效的解决方案,但tidyverse的一个选项可能是:

map_dfr(mget(ls(pattern = "list")), 
        function(list_of_lists) imap(list_of_lists, 
                                     function(lists, lists_id)
                                      lists %>%
                                      mutate(rowid = row_number(),
                                             lists_id = lists_id))) %>%
 group_by(rowid, lists_id) %>%
 summarise(across(c(a, b), median))

  rowid lists_id     a     b
  <int>    <int> <int> <int>
1     1        1    10     3
2     1        2     8     1
3     2        1     5     4
4     2        2     9     8
5     3        1     6     6
6     3        2     6     3
7     4        1     3     2
8     4        2     4     6

如果目标是返回列表:

map_dfr(mget(ls(pattern = "list")), 
        function(list_of_lists) imap(list_of_lists, 
                                     function(lists, lists_id)
                                      lists %>%
                                      mutate(rowid = row_number(),
                                             lists_id = lists_id))) %>%
 group_by(rowid, lists_id) %>%
 summarise(across(c(a, b), median)) %>%
 ungroup() %>%
 group_split(lists_id)

[[1]]
# A tibble: 4 × 4
  rowid lists_id     a     b
  <int>    <int> <int> <int>
1     1        1    10     3
2     2        1     5     4
3     3        1     6     6
4     4        1     3     2

[[2]]
# A tibble: 4 × 4
  rowid lists_id     a     b
  <int>    <int> <int> <int>
1     1        2     8     1
2     2        2     9     8
3     3        2     6     3
4     4        2     4     6
y3bcpkx1

y3bcpkx14#

下面是tidyverse解决方案的初稿(我相信它还可以改进):

library(tidyverse)

bind_rows(list1, list2, list3) %>% 
  mutate(x =rep(1:3, each=8, length.out = n())) %>% 
  group_by(x) %>% 
  pivot_wider(names_from = x, 
              values_from = c(a,b),
              values_fn = list) %>% 
  unnest() %>% 
  rowwise() %>% 
  transmute(a = median(c(a_1, a_2, a_3)), 
         b = median(c(b_1, b_2, b_3))
         ) %>% 
  ungroup() %>% 
  group_by(x = as.integer(gl(n(),4,n()))) %>% 
  group_split() %>% 
  map(.,~(.x %>%select(-x)))
[[1]]
# A tibble: 4 × 2
      a     b
  <int> <int>
1     7     4
2     9     9
3     7     3
4     4     6

[[2]]
# A tibble: 4 × 2
      a     b
  <int> <int>
1     5     7
2     8     6
3     2     6
4     5     2

相关问题