如何在R中的新列中编写变量

bxfogqkk 于 2023-03-27 发布在其他

关注(0)|答案(4)|浏览(117)

在R.
我有以下数据集，并希望创建一个新列Medication1_coded，其中列“Medication1”中的所有“扑热息痛”和“布洛芬”值将显示为1，所有“阿片类”和“氯胺酮”值显示为组2，其余值将显示为组3。然后，我需要重复此操作，每次为列名中包含名称“药物”的10列创建一个新列。

df <- data.frame(id = paste0("ID",1:4),
             Medication1= c("paracetamol", "ibuprofen", "opiate", "sertraline"),
             Medication2= c("Lipitor", "ketamine", "zoloft", "xanax"),
             Medication3= c("ibuprofen", "paracetamol", "Zocor", "Zestril"),
             other= LETTERS[1:4])

非常感谢所有可爱的人在那里〈3

来源：https://stackoverflow.com/questions/75805641/how-to-code-a-variable-in-a-new-column-in-r

4条答案

按热度按时间

sbdsn5lh1#

您可以reframeacross您的列，在这种情况下，那些contains“Medications”

library(dplyr)

df |> 
  reframe(across(contains("Medication"), ~ case_match(.x, c("ibuprofen", "paracetamol") ~ 1, 
                                               c("opiate", "ketamine") ~ 2, .default = 3),
                .names = "{.col}_coded"))

Medication1_coded Medication2_coded Medication3_coded
1                 1                 3                 1
2                 1                 2                 1
3                 2                 3                 3
4                 3                 3                 3

赞(0）回复(0）举报 2023-03-27

nnsrf1az2#

以R为底：

Codes <- c(paracetamol = 1, ibuprofen = 1, opiate=2, ketamine = 2)
idx <- startsWith(names(df), 'Medication')
df[idx] <- pmin(Codes[unlist(df[idx])], 3, na.rm = TRUE)
df

   id Medication1 Medication2 Medication3 other
1 ID1           1           3           1     A
2 ID2           1           2           1     B
3 ID3           2           3           3     C
4 ID4           3           3           3     D

另一种方式：

a <- reshape(df, seq(2, ncol(df)-1), dir='long', sep='')
a$Medication <- pmin(Codes[a$Medication], 3, na.rm = TRUE)
reshape(a)

       id other Medication1 Medication2 Medication3
ID1.1 ID1     A           1           3           1
ID2.1 ID2     B           1           2           1
ID3.1 ID3     C           2           3           3
ID4.1 ID4     D           3           3           3

如果要使用本机管道：

reshape(df, seq(2, ncol(df)-1), dir='long', sep='')|>
   within({Medication = pmin(Codes[Medication], 3, na.rm = TRUE)})|>
   reshape()

       id other Medication1 Medication2 Medication3
ID1.1 ID1     A           1           3           1
ID2.1 ID2     B           1           2           1
ID3.1 ID3     C           2           3           3
ID4.1 ID4     D           3           3           3

赞(0）回复(0）举报 2023-03-27

ztmd8pv53#

首先pivot_longer数据，然后left_join按名称替换代码，将所有其他名称替换为3，最后pivot_wider为原始格式。

library(dplyr)
library(tidyr)

Recode <- data.frame(code = c(1,1,2,2), 
                     nm = c("paracetamol", "ibuprofen", "opiate", "ketamine"))

left_join(pivot_longer(df, -id), Recode, by = join_by(value == nm)) %>% 
  mutate(code = replace(code, is.na(code), 3), 
         value = ifelse(name == "other", value, code), code = NULL) %>%
  pivot_wider(names_from=name, values_from=value)
# A tibble: 4 × 5
  id    Medication1 Medication2 Medication3 other
  <chr> <chr>       <chr>       <chr>       <chr>
1 ID1   1           3           1           A    
2 ID2   1           2           1           B    
3 ID3   2           3           3           C    
4 ID4   3           3           3           D

数据

df <- structure(list(id = c("ID1", "ID2", "ID3", "ID4"), Medication1 = 
c("paracetamol", "ibuprofen", "opiate", "sertraline"), Medication2 = 
c("Lipitor", "ketamine", "zoloft", "xanax"), Medication3 = c("ibuprofen", 
"paracetamol", "Zocor", "Zestril"), other = c("A", "B", "C", 
"D")), class = "data.frame", row.names = c(NA, -4L))

赞(0）回复(0）举报 2023-03-27

cgyqldqp4#

碱基R
方法一：

codes <- c(paracetamol=1, ibuprofen=1, opiate=2, ketamine=2)
for (v in grep("^Medication\\d+$", names(df), value = TRUE)) {
  df[[paste0(v, '_coded')]] <- {
    a <- codes[match(df[[v]], names(codes))]
    ## can also use ifelse(,,)
    a[is.na(a) & !is.na(df[[v]])] <- max(codes)+1 # 3
    a
  }
}

方法二：

codes <- c(paracetamol=1, ibuprofen=1, opiate=2, ketamine=2)
for (v in grep("^Medication\\d+$", names(df), value = TRUE)) {
  for (s in setdiff(unique(df[[v]]), names(codes))) codes[s] <- 3
  df[[paste0(v, '_coded')]] <- codes[df[[v]]]
}

方法三：

Codes <- list(c('paracetamol', 'ibuprofen'), c('opiate', 'ketamine'))
for (v in grep("^Medication\\d+$", names(df), value = TRUE)) {
  newv <- paste0(v, '_coded')
  df[[newv]] <- NA
  for (j in seq_along(Codes)) {
    df[df[[v]] %in% Codes[[j]], newv] <- j
    df[is.na(df[[newv]]) & !is.na(df[[v]]), newv] <- length(Codes)+1 # 3
  }
}

结果：

> df
   id Medication1 Medication2 Medication3 other Medication1_coded
1 ID1 paracetamol     Lipitor   ibuprofen     A                 1
2 ID2   ibuprofen    ketamine paracetamol     B                 1
3 ID3      opiate      zoloft       Zocor     C                 2
4 ID4  sertraline       xanax     Zestril     D                 3
  Medication2_coded Medication3_coded
1                 3                 1
2                 2                 1
3                 3                 3
4                 3                 3

赞(0）回复(0）举报 2023-03-27

我来回答

如何在R中的新列中编写变量

4条答案

数据

相关问题

热门标签

最新问答