R语言 将两行的值相加,并将它们存储在另一列中,重复

ff29svar  于 2022-12-06  发布在  其他
关注(0)|答案(7)|浏览(365)

我有一个这样的数据框

x1<- c(0,1,1,1,1,0)

df<-data.frame(x1)

我想添加另一个列,该列将每两行的值相加,并存储前两行的值。

在这里你可以看到前两行的和是1,并且在新列的前两行中给出(x2)。接下来,第三和第四行的和在新列的第三和第四行中给出。有人能帮忙吗?

knsnq2tg

knsnq2tg1#

您可以使用楼层划分定义组,然后简单地获得分组总和:

library(dplyr)

df %>%
  mutate(group = (row_number() - 1) %/% 2) %>%
  group_by(group) %>%
  mutate(x2 = sum(x1)) %>%
  ungroup() %>%
  select(-group)
# # A tibble: 6 × 2
#      x1    x2
#   <dbl> <dbl>
# 1     0     1
# 2     1     1
# 3     1     2
# 4     1     2
# 5     1     1
# 6     0     1
72qzrwbm

72qzrwbm2#

这里使用dplyr创建一个辅助列作为分组依据

library(dplyr)

x1<- c(0,1,1,1,1,0)

df <- data.frame(x1)

len_df <- nrow(df)
aux <- rep(seq(1:(len_df/2)),each = 2)[1:len_df]

df %>% 
  mutate(aux = aux) %>% 
  group_by(aux) %>% 
  mutate(x2 = sum(x1)) %>% 
  ungroup() %>%
  select(-aux)

# A tibble: 6 x 2
     x1    x2
  <dbl> <dbl>
1     0     1
2     1     1
3     1     2
4     1     2
5     1     1
6     0     1
w7t8yxp5

w7t8yxp53#

使用gl为每2行创建一个索引,并在分组后求和

library(dplyr)
df <- df %>%
    group_by(grp = as.integer(gl(n(), 2, n()))) %>% 
    mutate(x2 = sum(x1)) %>% 
    ungroup %>% 
    select(-grp)
  • 输出
df
# A tibble: 6 × 2
     x1    x2
  <dbl> <dbl>
1     0     1
2     1     1
3     1     2
4     1     2
5     1     1
6     0     1

或使用collapse/data.table

library(data.table)
library(collapse)
setDT(df)[, x2 := fsum(x1, g = rep(.I, each = 2, length.out = .N), TRA = 1)]
  • 输出
> df
      x1    x2
   <num> <num>
1:     0     1
2:     1     1
3:     1     2
4:     1     2
5:     1     1
6:     0     1
x759pob2

x759pob24#

您可以使用ave + ceiling(两者都是以R为底数的函数)

> transform(df, x2 = ave(x1, ceiling(seq_along(x1) / 2)) * 2)
  x1 x2
1  0  1
2  1  1
3  1  2
4  1  2
5  1  1
6  0  1
kcwpcxri

kcwpcxri5#

第一种方法是创建没有中间变量的data.frame。
这将 Dataframe 分成两组,求和,然后将模式重复到新变量中。

df<-data.frame(x1=c(0,1,1,1,1,0))

df$x2<-rep(lapply(split(df, rep(1:3, each=2)), sum), each=2)

#  x1 x2
#1  0  1
#2  1  1
#3  1  2
#4  1  2
#5  1  1
#6  0  1
jucafojl

jucafojl6#

在Base R中,您可以执行以下操作:

transform(df,x2 = ave(x1, gl(nrow(df)/2, 2), FUN = sum))

  x1 x2
1  0  1
2  1  1
3  1  2
4  1  2
5  1  1
6  0  1
z3yyvxxp

z3yyvxxp7#

针对特定性能指标评测提供了更多选项。

x1 <- sample(0:1, 1e4, 1)

microbenchmark::microbenchmark(
  matrix = rep(colSums(matrix(x1, 2)), each = 2),
  recycle = x1 + x1[seq(x1) + c(1, -1)],
  cumsum = rep(diff(cumsum(c(0, x1))[seq(1, length(x1) + 1, 2)]), each = 2),
  Thomas = ave(x1, ceiling(seq_along(x1)/2))*2,
  onyambu = ave(x1, gl(length(x1)/2, 2), FUN = sum),
  check = "equal"
)
#> Unit: microseconds
#>     expr       min         lq        mean     median         uq       max neval
#>   matrix    65.001    69.6510    79.27203    78.4510    82.1510   148.501   100
#>  recycle    95.001   100.6505   108.65003   107.5510   110.6010   176.901   100
#>   cumsum   137.201   148.9010   169.61090   166.5505   177.7015   340.002   100
#>   Thomas 24645.401 25297.2010 26450.46994 25963.3515 27463.2010 31803.101   100
#>  onyambu  3774.902  3935.7510  4444.36500  4094.3520  4336.1505 11070.301   100

data.table用于大数据:

library(data.table)
library(collapse)

x1 <- sample(0:1, 1e6, 1)
df <- data.frame(x1)

microbenchmark::microbenchmark(
  matrix = setDT(df)[, x2 := rep(colSums(matrix(x1, 2)), each = 2)],
  recycle = setDT(df)[, x2 := x1 + x1[.I + c(1, -1)]],
  akrun = setDT(df)[, x2 := fsum(x1, g = rep(.I, each = 2, length.out = .N), TRA = 1)],
  check = "identical"
)
#> Unit: milliseconds
#>     expr       min        lq     mean    median       uq     max neval
#>   matrix  8.053302  8.937301 10.64786  9.376551 12.51890 17.2037   100
#>  recycle 12.117101 12.965950 16.57696 14.003151 17.09805 56.4729   100
#>    akrun 10.071701 10.611051 14.42578 11.291601 14.79090 55.1141   100

相关问题