如何知道R中后面几行的值是否相同?

uyto3xhc  于 2022-12-25  发布在  其他
关注(0)|答案(5)|浏览(132)

我有这样的数据:
| 断裂|开始|停|
| - ------|- ------|- ------|
| 1个|1个|无|
| 1个|无|无|
| 1个|无|无|
| 1个|无|无|
| 1个|无|1个|
| 无|无|无|
| 无|无|无|
| 无|无|无|
| 无|无|无|
我尝试创建一个新列,这样我就有了开始和结束只有,如果有五个1在一行中,当刹车== 1像这样:
| 断裂|开始|停|新山口|
| - ------|- ------|- ------|- ------|
| 1个|1个|无|开始|
| 1个|无|无||
| 1个|无|无||
| 1个|无|无||
| 1个|无|1个|停|
| 无|无|无||
| 无|无|无||
| 无|无|无||
| 无|无|无||

nuypyhwy

nuypyhwy1#

我在数据中又添加了一些行

df=structure(list(Break = c(1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L), Start = c(1L, 0L, 0L, 0L, 0L, 0L, 1L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), Stop = c(0L, 0L, 0L, 0L, 1L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L)), class = "data.frame", row.names = c(NA, 
-31L))

使用回路的

tmp=rle(paste(ifelse(df$Start==1,"A",""),ifelse(df$Stop==1,"B",""),sep=""))

for (i in seq(2,length(tmp$lengths)-1)) {
  if (tmp$values[i-1]=="A" & tmp$values[i]=="" & tmp$lengths[i]>=3 & tmp$values[i+1]=="B") {
    tmp$values[i-1]="Start"
    tmp$values[i+1]="Stop"
  }
}

df$NewCol=rep(tmp$values,tmp$lengths)
df$NewCol[!(df$NewCol %in% c("Start","Stop") & df$Break==1)]=""

结果是

Break Start Stop NewCol
1      1     1    0  Start
2      1     0    0       
3      1     0    0       
4      1     0    0       
5      1     0    1   Stop
6      1     0    0       
7      0     1    0       
8      0     0    0       
9      0     0    0       
10     0     0    0       
11     0     0    0       
12     0     0    1       
13     0     0    0       
14     1     1    0       
15     1     0    0       
16     1     0    0       
17     1     0    1       
18     1     0    0       
19     1     0    0       
20     1     0    0       
21     1     0    0       
22     1     0    0       
23     1     0    0       
24     1     0    0       
25     1     1    0  Start
26     1     0    0       
27     1     0    0       
28     1     0    0       
29     1     0    0       
30     1     0    0       
31     1     0    1   Stop
jvlzgdj9

jvlzgdj92#

试试这个:

df<- data.frame(Break = c(1,1,1,1,1,0,0,0,1,1,1,1,1,0))
df$Start <- 0
df$Stop <- 0
v<- c(1,1,1,1,1)
x<-which(df == v)
df$Start[x[seq(1,length(x),5)]] <- 1
df$Stop[x[seq(5,length(x),5)]] <- 1
14ifxucb

14ifxucb3#

据我所知,* Start * 和 * Stop * 可以忽略吗?无论哪种方式,此解决方案都不需要它们,因此可以根据需要添加。我扩展了示例,以显示存在多于或少于5个1时的边缘情况。

library(dplyr)

df %>% 
  group_by(grp = cumsum(lag(Break, default=T) != Break)) %>% 
  mutate(is = cumsum(Break) %% 5, 
         newcol = case_when(is == 0 & Break == 1 ~ "Stop", 
                    is == 1 & Break == 1 & lead(is, 4) == 0 & 
                      lead(Break, 4) == 1 ~ "Start", 
                    TRUE ~ ""), 
    is = NULL) %>% 
  ungroup() %>% 
  select(-grp)
# A tibble: 18 × 2
   Break newcol 
   <dbl> <chr>  
 1     1 "Start"
 2     1 ""     
 3     1 ""     
 4     1 ""     
 5     1 "Stop" 
 6     0 ""     
 7     0 ""     
 8     1 "Start"
 9     1 ""     
10     1 ""     
11     1 ""     
12     1 "Stop" 
13     1 "Start"
14     1 ""     
15     1 ""     
16     1 ""     
17     1 "Stop" 
18     1 ""
数据
df <- structure(list(Break = c(1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1)), row.names = c(NA, -18L), class = "data.frame")
ccgok5k5

ccgok5k54#

这也可以是一种解决方案:

# install.packages("data.table")
library(data.table)
    df<-structure(list(
      Break = c(1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L),
      Start = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L),
      Stop = c(0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L)),
      class = "data.frame", row.names = c(NA, -21L))                                                                                                         
    dt <- as.data.table(df)
    dt[Break == 1, dbatch := {
      nbatch = ceiling(.N / 5)
      head(rep(seq(nbatch), each = 5), .N)
      }]
    dt[, NewCol := fcase(
      sum(Break) == 5 & .SD[, Start[1] == 1 & Stop[5] == 1] & Start == 1, "Start",
      sum(Break) == 5 & .SD[, Start[1] == 1 & Stop[5] == 1] & Stop == 1, "Stop",
      default = ""
    ), by = dbatch]

    Break Start Stop dbatch NewCol
 1:     1     1    0      1  Start
 2:     1     0    0      1       
 3:     1     0    0      1       
 4:     1     0    0      1       
 5:     1     0    1      1   Stop
 6:     0     0    0     NA       
 7:     0     0    0     NA       
 8:     0     0    0     NA       
 9:     0     0    0     NA       
10:     1     1    0      2       
11:     1     0    0      2       
12:     1     0    0      2       
13:     1     0    1      2       
14:     1     0    0      2       
15:     0     0    0     NA       
16:     0     0    0     NA       
17:     1     1    0      3  Start
18:     1     0    0      3       
19:     1     0    0      3       
20:     1     0    0      3       
21:     1     0    1      3   Stop
    Break Start Stop dbatch NewCol
dz6r00yl

dz6r00yl5#

使用rle

library(purrr)
library(dplyr)

df |>
    mutate(NewCol = with(rle(Break),
                map2(lengths,values, ~{
                    if(.x >= 5 && .y == 1)
                        c("Start",rep("", .x - 2), "Stop")
                    else
                        rep("", .x)
                }) |>
                flatten()))

##>   Break Start Stop NewCol
##> 1     1     1    0  Start
##> 2     1     0    0       
##> 3     1     0    0       
##> 4     1     0    0       
##> 5     1     0    1   Stop
##> 6     0     0    0       
##> 7     0     0    0       
##> 8     0     0    0       
##> 9     0     0    0

相关问题