R语言 各行中x最小值的平均值

mwngjboj  于 2023-02-01  发布在  其他
关注(0)|答案(5)|浏览(169)

我有一个数据集,看起来像

df <- data.frame(A=rnorm(10), B=rnorm(10), C=rnorm(10), D=rnorm(10), E=rnorm(10))

我需要为每一行找到最低的四个条目,然后计算这四个条目的平均值。
我试过了

df %>% rowwise() %>% mutate(Min = min(A, B, C, D, E))

但我很难找到一个方法,为第二个最低的入口,第三个最低的入口等。
由于我有36列条目,总是需要与4个最低行条目的工作,我试图与百分位数(4/36 = 1/9 ...),但这似乎是复杂的方式做。
有没有更简单的方法让我忽略?

qkf9rpyu

qkf9rpyu1#

您可以使用c_across,对值进行排序,选择其中的前4个值,并获得它们的平均值。

library(dplyr)

df <- df %>% 
  rowwise() %>% 
  mutate(Average_4 = mean(head(sort(c_across()), 4)))

df

#       A      B      C       D       E   Average_4
#     <dbl>  <dbl>  <dbl>   <dbl>   <dbl>     <dbl>
# 1 -0.560   1.22  -1.07   0.426  -0.695    -0.474 
# 2 -0.230   0.360 -0.218 -0.295  -0.208    -0.238 
# 3  1.56    0.401 -1.03   0.895  -1.27     -0.249 
# 4  0.0705  0.111 -0.729  0.878   2.17      0.0826
# 5  0.129  -0.556 -0.625  0.822   1.21     -0.0575
# 6  1.72    1.79  -1.69   0.689  -1.12     -0.102 
# 7  0.461   0.498  0.838  0.554  -0.403     0.277 
# 8 -1.27   -1.97   0.153 -0.0619 -0.467    -0.940 
# 9 -0.687   0.701 -1.14  -0.306   0.780    -0.357 
#10 -0.446  -0.473  1.25  -0.380  -0.0834   -0.346

或者是R -

df$Average_4 <- apply(df, 1, function(x) mean(head(sort(x), 4)))

数据

set.seed(123)
df <- data.frame(A=rnorm(10), B=rnorm(10), C=rnorm(10), D=rnorm(10), E=rnorm(10))
zfciruhq

zfciruhq2#

使用Base R,

df$New <-  colMeans(apply(df,1,sort)[1:4,])
df

给出,

A          B          C           D           E         New
1  -0.56047565  1.2240818 -1.0678237  0.42646422 -0.69470698 -0.47413553
2  -0.23017749  0.3598138 -0.2179749 -0.29507148 -0.20791728 -0.23778529
3   1.55870831  0.4007715 -1.0260044  0.89512566 -1.26539635 -0.24887592
4   0.07050839  0.1106827 -0.7288912  0.87813349  2.16895597  0.08260834
5   0.12928774 -0.5558411 -0.6250393  0.82158108  1.20796200 -0.05750290
6   1.71506499  1.7869131 -1.6866933  0.68864025 -1.12310858 -0.10152416
7   0.46091621  0.4978505  0.8377870  0.55391765 -0.40288484  0.27744988
8  -1.26506123 -1.9666172  0.1533731 -0.06191171 -0.46665535 -0.94006136
9  -0.68685285  0.7013559 -1.1381369 -0.30596266  0.77996512 -0.35739914
10 -0.44566197 -0.4727914  1.2538149 -0.38047100 -0.08336907 -0.34557336

数据:

set.seed(123)
df <- data.frame(A=rnorm(10), B=rnorm(10), C=rnorm(10), D=rnorm(10), E=rnorm(10))
xghobddn

xghobddn3#

如果您首先获取长格式的数据,这类事情往往会更容易。

library(tidyr)
library(dplyr) 
df %>% 
    pivot_longer(everything()) %>% 
    group_by(name) %>% 
    top_n(-4) %>%
    group_by(name) %>%
    summarise(mean_val = mean(value))
# A tibble: 5 x 2
  name  mean_val
  <chr>    <dbl>
1 A       -0.620
2 B       -1.87 
3 C       -1.91 
4 D       -1.17 
5 E       -0.36
cl25kdpy

cl25kdpy4#

使用排序函数并将第二个最小值作为子集

df %>% rowwise() %>% mutate(Min = sort(c(A, B, C, D, E), FALSE)[1],
                            Min2 = sort(c(A, B, C, D, E), FALSE)[2])

# A tibble: 10 x 7
# Rowwise: 
        A      B      C      D      E    Min   Min2
    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1  0.252 -1.69  -0.612 -1.54   1.50  -1.69  -1.54 
 2  0.642  1.25  -0.372 -1.32  -0.289 -1.32  -0.372
 3 -0.347 -1.77  -0.515 -0.676  1.92  -1.77  -0.676
 4 -0.515  0.223 -0.342 -0.367  0.519 -0.515 -0.367
 5  0.398 -0.561  0.684 -0.514  0.477 -0.561 -0.514
 6  0.954 -0.508  0.798 -0.959  0.303 -0.959 -0.508
 7  0.260 -0.939 -0.800 -0.122  0.821 -0.939 -0.800
 8 -2.05   1.64   0.230  0.666  0.393 -2.05   0.230
 9  0.288 -0.653  1.69   1.24  -1.44  -1.44  -0.653
10  0.362  0.839 -0.768  1.18  -0.869 -0.869 -0.768

如果要查找四个最小值的平均值:

df %>% rowwise() %>% mutate(Average_Min = mean(sort(c(A, B, C, D, E), FALSE)[1:4]))

# A tibble: 10 x 6
# Rowwise: 
         A       B      C      D       E Average_Min
     <dbl>   <dbl>  <dbl>  <dbl>   <dbl>       <dbl>
 1 -2.99    0.279   0.999 -0.131  0.128      -0.678 
 2 -0.0505 -0.0756 -1.09  -0.417 -0.225      -0.452 
 3  1.30    0.123   0.854  0.652  0.670       0.575 
 4 -1.35   -1.03   -0.627 -1.65   0.538      -1.16  
 5 -0.950   0.0897  0.421 -0.677 -0.0553     -0.398 
 6  1.69    1.32   -0.396 -1.31   0.502       0.0307
 7  0.244  -0.308  -0.390 -0.405 -0.640      -0.436 
 8 -1.39    1.48    0.384  1.36   1.80        0.458 
 9  0.887  -0.470   1.66  -0.661  0.999       0.189 
10  0.0282 -0.866  -1.13  -0.915  0.878      -0.720
zzlelutf

zzlelutf5#

我们可以使用pmap来遍历行sortvectorc(...)),使用head获取前“n”个值,然后使用mean返回,以在mutate中创建新列“Average_4

library(purrr)
library(dplyr)
df %>% 
   mutate(Average_4 = pmap_dbl(cur_data(), ~ mean(head(sort(c(...)), 4))))
             A          B          C           D           E   Average_4
1  -0.56047565  1.2240818 -1.0678237  0.42646422 -0.69470698 -0.47413553
2  -0.23017749  0.3598138 -0.2179749 -0.29507148 -0.20791728 -0.23778529
3   1.55870831  0.4007715 -1.0260044  0.89512566 -1.26539635 -0.24887592
4   0.07050839  0.1106827 -0.7288912  0.87813349  2.16895597  0.08260834
5   0.12928774 -0.5558411 -0.6250393  0.82158108  1.20796200 -0.05750290
6   1.71506499  1.7869131 -1.6866933  0.68864025 -1.12310858 -0.10152416
7   0.46091621  0.4978505  0.8377870  0.55391765 -0.40288484  0.27744988
8  -1.26506123 -1.9666172  0.1533731 -0.06191171 -0.46665535 -0.94006136
9  -0.68685285  0.7013559 -1.1381369 -0.30596266  0.77996512 -0.35739914
10 -0.44566197 -0.4727914  1.2538149 -0.38047100 -0.08336907 -0.34557336

或者,使用collapse中的fmean/dapply可以更快地完成此操作

library(collapse)
df$Average_4 <-  dapply(df, MARGIN = 1, FUN = \(x) fmean(ss(sort(x), 1:4)))
df$Average_4
#[1] -0.47413553 -0.23778529 -0.24887592  0.08260834 -0.05750290 -0.10152416  0.27744988 -0.94006136 -0.35739914 -0.34557336

数据

set.seed(123)
df <- data.frame(A=rnorm(10), B=rnorm(10), C=rnorm(10), D=rnorm(10), E=rnorm(10))

相关问题