R -基于跨多列的二进制数据添加新列

niwlg2el  于 2023-03-05  发布在  其他
关注(0)|答案(3)|浏览(155)

我无法让我的数据框添加额外的列。我已经检查了这么多堆栈溢出,但这里是一个子集(Adding a new column in a matrix in Radding new column to data frame in Rnew column not added to dataframe in RR: complete a dataset with a new column addedR: add a new column to dataframes from a function
我需要一个单列,告诉我们,如果有一个积极的或"1"在任何病毒行我有。
我试图确定概率和从我所看到的,我将需要这个列做进一步的计算,所以请帮助,如果可以!
样本数据

Filovirus (MOD) PCR   :    Phlebo (Sanchez-Seco) PCR
0                          0         
0                          1            
0                          0            
0                          0        
0                          0         
0                          0        
0                          0       
0                          0         
0                          0        
0                          0   

species code  forest site
<fctr>  <dbl> <fctr>
SM      1     UMNP-mangabey
SM      1     UMNP-mangabey
RC      9     UMNP-hondohondoc
BWC     9     UMNP-hondohondod
BWC     9     UMNP-hondohondod
BWC     9     UMNP-hondohondod
BWC     9     UMNP-hondohondod
BWC     9     UMNP-hondohondod
BWC     9     UMNP-hondohondod
BWC     9     UMNP-hondohondod

我得到的最接近的方法是用基数R来调用哪些行具有正值
我遵循了here的解决方案,但还没有让它为我工作。

tmp=which(data==1,arr.ind=T)    
tmp=tmp[order(tmp[,"row"]),]
c("positive","negative")[tmp[,"col"]] -> data$new

如有任何建议,不胜感激。
数据输出

structure(list(`Filovirus (MOD) PCR` = c("0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0"), `Filovirus (A) PCR` = c("0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0"), `Filovirus (B) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Filo C PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Filovirus (D) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Coronavirus   (Quan) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Coronavirus (Watanabe) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Paramyxo  (Tong)  PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Flavivirus Moureau PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Flavivirus  Sanchez-seco PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Arena Lozano 1 PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Retrovirus Courgnard PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Simian Foamy Goldberg (Pol) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Simian Foamy Goldberg (LTR Region) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Influenza (Anthony) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Influenza (Liang) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Rhabdo (CII) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Enterovirus CII I PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Enterovirus CII-II PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Alphav   (Sanchez-Seco) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Lyssavirus (Vasquez-Moron) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Seadornavirus (CII) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Hantavirus (Raboni) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Hantavirus (Klempa) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Nipah (Wacharapleusadee) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Henipa (Feldman) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Bunya S (Briese) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Bunya L (Briese) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), `Phlebo (Sanchez-Seco) PCR` = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0"), species = structure(c(3L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("SM", "SY", "BWC", 
"YB", "RC"), class = "factor"), code = c(2, 5, 5, 5, 5, 5, 5, 
5, 5, 5), forestsite = structure(c(3L, 14L, 14L, 14L, 14L, 14L, 
14L, 14L, 14L, 14L), .Label = c("Magombera1", "Magombera2", "NDUFR", 
"Ndundulu1", "Ndundulu2", "Ndundulu3", "Nyumbanitu", "UMNP-campsite3", 
"UMNP-hondohondoa", "UMNP-hondohondob", "UMNP-hondohondoc", "UMNP-hondohondod", 
"UMNP-hondohondoe", "UMNP-HQ", "MamaGoti", "UMNP-mangabey", "UMNP-njokamoni", 
"UMNP-Sanje1", "UMNP-Sanje2", "UMNP-Sanje3", "Sonjo", "SonjoRoad"
), class = "factor")), row.names = c(NA, -10L), class = c("tbl_df", 
"tbl", "data.frame"))
ovfsdjhp

ovfsdjhp1#

    • 更新:**您的01是字符类型。使用type.convert(as.is = TRUE)转换为数字将使代码工作:
library(dplyr)

df %>%
  type.convert(as.is=TRUE) %>% 
  mutate(new_column = if_else(rowSums(select(., contains("PCR"))) > 0, "positive", "negative"))
Filovirus (…¹ Filov…² Filov…³ Filo …⁴ Filov…⁵ Coron…⁶ Coron…⁷ Param…⁸ Flavi…⁹ Flavi…˟ Arena…˟ Retro…˟ Simia…˟ Simia…˟ Influ…˟
           <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>
 1             0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
 2             0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
 3             0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
 4             0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
 5             0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
 6             0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
 7             0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
 8             0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
 9             0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
10             0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
# … with 18 more variables: `Influenza (Liang) PCR` <int>, `Rhabdo (CII) PCR` <int>, `Enterovirus CII I PCR` <int>,
#   `Enterovirus CII-II PCR` <int>, `Alphav   (Sanchez-Seco) PCR` <int>, `Lyssavirus (Vasquez-Moron) PCR` <int>,
#   `Seadornavirus (CII) PCR` <int>, `Hantavirus (Raboni) PCR` <int>, `Hantavirus (Klempa) PCR` <int>,
#   `Nipah (Wacharapleusadee) PCR` <int>, `Henipa (Feldman) PCR` <int>, `Bunya S (Briese) PCR` <int>,
#   `Bunya L (Briese) PCR` <int>, `Phlebo (Sanchez-Seco) PCR` <int>, species <chr>, code <int>, forestsite <chr>,
#   new_column <chr>, and abbreviated variable names ¹​`Filovirus (MOD) PCR`, ²​`Filovirus (A) PCR`, ³​`Filovirus (B) PCR`,
#   ⁴​`Filo C PCR`, ⁵​`Filovirus (D) PCR`, ⁶​`Coronavirus   (Quan) PCR`, ⁷​`Coronavirus (Watanabe) PCR`, …
# ℹ Use `colnames()` to see all variable names
    • 第一个答案:**dplyr吊坠为:数据摘自@langtang(非常感谢):

一个二个一个一个

hpxqektj

hpxqektj2#

更新了给定字符列和新的32列示例

df["new"] = apply(df[, -c(29:32)], 1,\(x) ifelse(sum(as.numeric(x))>0, "positive", "negative"))

原始答案(假设为数字列):

您可以简单地执行以下操作:

df["new"] =ifelse(rowSums(df[,-(1:3)])>0, "positive", "negative")

输出:

species code      forest_site Filovirus (MOD) PCR Phlebo (Sanchez-Seco) PCR      new
1       SM    1    UMNP-mangabey                   0                         0 negative
2       SM    1    UMNP-mangabey                   0                         1 positive
3       RC    9 UMNP-hondohondoc                   0                         0 negative
4      BWC    9 UMNP-hondohondod                   0                         0 negative
5      BWC    9 UMNP-hondohondod                   0                         0 negative
6      BWC    9 UMNP-hondohondod                   0                         0 negative
7      BWC    9 UMNP-hondohondod                   0                         0 negative
8      BWC    9 UMNP-hondohondod                   0                         0 negative
9      BWC    9 UMNP-hondohondod                   0                         0 negative
10     BWC    9 UMNP-hondohondod                   0                         0 negative

输入:

structure(list(species = c("SM", "SM", "RC", "BWC", "BWC", "BWC", 
"BWC", "BWC", "BWC", "BWC"), code = c(1L, 1L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L), forest_site = c("UMNP-mangabey", "UMNP-mangabey", 
"UMNP-hondohondoc", "UMNP-hondohondod", "UMNP-hondohondod", "UMNP-hondohondod", 
"UMNP-hondohondod", "UMNP-hondohondod", "UMNP-hondohondod", "UMNP-hondohondod"
), `Filovirus (MOD) PCR` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `Phlebo (Sanchez-Seco) PCR` = c(0, 
1, 0, 0, 0, 0, 0, 0, 0, 0)), class = "data.frame", row.names = c(NA, 
-10L))
xesrikrc

xesrikrc3#

另一个选项是if_any

library(dplyr)
df1 %>%
 type.convert(as.is = TRUE) %>%
 mutate(new_column = c("negative", "positive")[if_any(contains("PCR")) + 1])
  • 输出
species code      forest_site Filovirus (MOD) PCR Phlebo (Sanchez-Seco) PCR new_column
1       SM    1    UMNP-mangabey                   0                         0   negative
2       SM    1    UMNP-mangabey                   0                         1   positive
3       RC    9 UMNP-hondohondoc                   0                         0   negative
4      BWC    9 UMNP-hondohondod                   0                         0   negative
5      BWC    9 UMNP-hondohondod                   0                         0   negative
6      BWC    9 UMNP-hondohondod                   0                         0   negative
7      BWC    9 UMNP-hondohondod                   0                         0   negative
8      BWC    9 UMNP-hondohondod                   0                         0   negative
9      BWC    9 UMNP-hondohondod                   0                         0   negative
10     BWC    9 UMNP-hondohondod                   0                         0   negative

相关问题