我可以在Rstudio中使用循环高效地创建这个 Dataframe 吗?

wz3gfoph  于 2022-12-25  发布在  其他
关注(0)|答案(1)|浏览(135)

我想优化我的代码。我有很多重复,我相信这可以用一个循环来完成。

ciecg <- data.frame(baseline_cardinv_n = sum(d$cardinv_ecg1,na.rm=T),
                    baseline_total_n = table(d$cardinv_ecg1)[1]+table(d$cardinv_ecg1)[2],
                    baseline_pct = mean(d$cardinv_ecg1,na.rm=T)*100,
                    latest_cardinv_n = sum(d$cardinv_ecg2,na.rm=T),
                    latest_total_n = table(d$cardinv_ecg2)[1]+table(d$cardinv_ecg2)[2],
                    latest_pct = mean(d$cardinv_ecg2,na.rm=T)*100); rownames(ciecg) <- "ECG only"; ciecg

ciecghol <- data.frame(baseline_cardinv_n = sum(d$cardinv_ecg1_hol1,na.rm=T),
                       baseline_total_n = table(d$cardinv_ecg1_hol1)[1]+table(d$cardinv_ecg1_hol1)[2],
                       baseline_pct = mean(d$cardinv_ecg1_hol1,na.rm=T)*100,
                       latest_cardinv_n = sum(d$cardinv_ecg2_hol2,na.rm=T),
                       latest_total_n = table(d$cardinv_ecg2_hol2)[1]+table(d$cardinv_ecg2_hol2)[2],
                       latest_pct = mean(d$cardinv_ecg2_hol2,na.rm=T)*100); rownames(ciecghol) <- "ECG + HOL"; ciecghol

ciecgholec <- data.frame(baseline_cardinv_n = sum(d$cardinv_ecg1_hol1_ec1,na.rm=T),
                         baseline_total_n = table(d$cardinv_ecg1_hol1_ec1)[1]+table(d$cardinv_ecg1_hol1_ec1)[2],
                         baseline_pct = mean(d$cardinv_ecg1_hol1_ec1,na.rm=T)*100,
                         latest_cardinv_n = sum(d$cardinv_ecg2_hol2_ec2,na.rm=T),
                         latest_total_n = table(d$cardinv_ecg2_hol2_ec2)[1]+table(d$cardinv_ecg2_hol2_ec2)[2],
                         latest_pct = mean(d$cardinv_ecg2_hol2_ec2,na.rm=T)*100); rownames(ciecgholec) <- "ECG + HOL + TTE"; ciecgholec

round(rbind(ciecg,ciecghol,ciecgholec),1)

如果我在控制台中打印最后一行,我会得到:

> round(rbind(ciecg,ciecghol,ciecgholec),1)
                baseline_cardinv_n baseline_total_n baseline_pct latest_cardinv_n latest_total_n latest_pct
ECG only                        47              194         24.2               83            169       49.1
ECG + HOL                       61              183         33.3               59            120       49.2
ECG + HOL + TTE                 73              180         40.6               65            113       57.5

我想用一个循环来缩短我的代码,下面是我的尝试(我只是想让CIECG对我来说更简单):

ci_exam <- data.frame(matrix(nrow = 3,ncol = 0))
exam_1 <- c("cardinv_ecg1","cardinv_hol1","cardinv_ec1")
exam_2 <- c("cardinv_ecg2","cardinv_hol2","cardinv_ec2")
exam_name <- c("ECG only","HOL only","EC only")
for (i in 1:3) {
  ci_exam[exam_name[i]] <- c(sum(d$exam_1[i],na.rm=T),
                             table(d$exam_1[i])[1]+table(d$exam_1[i])[2],
                             mean(d$exam_1[i],na.rm=T)*100,
                             sum(d$exam_2[i],na.rm=T),
                             table(d$exam_2[i])[1]+table(d$exam_2[i])[2],
                             mean(d$exam_2[i],na.rm=T)*100)
}

但给了我这个错误

Error in `[<-.data.frame`(`*tmp*`, exam_name[i], value = c(0, NA, NA,  : 
  erstatning har 6 rækker, data har 3
In addition: Warning messages:
1: In mean.default(d$exam_1[i], na.rm = T) :
  argument er ikke numerisk eller logisk: returnerer NA
2: In mean.default(d$exam_2[i], na.rm = T) :
  argument er ikke numerisk eller logisk: returnerer NA

尝试创建一个循环。我希望在一个循环中创建整个 Dataframe (所有ciecg、ciecghol、ciecgholec组合在一个循环中,如命令循环所示(rbind(ciecg,ciecghol,ciecgholec),1))

33qvvth1

33qvvth11#

步骤1:将重复代码转换为函数

do_thing <- function(x,y,z){
 this_ <-  data.frame(
    baseline_cardinv_n = sum(d[[x]], na.rm = T),
    baseline_total_n = table(d[[x]])[1] + table(d[[x]])[2],
    baseline_pct = mean(d[[x]], na.rm = T) * 100,
    latest_cardinv_n = sum(d[[y]], na.rm = T),
    latest_total_n = table(d[[y]])[1] + table(d[[y]])[2],
    latest_pct = mean(d[[y]], na.rm = T) * 100
  ) ; rownames(this_) <- z; this_
}
#example of use
ciecg <- do_thing("cardinv_ecg1",
                  "cardinv_ecg2",
                  "ECG only")

第2步;迭代参数并使用函数

#note to align with your first sets of examples, I had to
params <- data.frame(exam_1  = c("cardinv_ecg1","cardinv_ecg1_hol1","cardinv_ecg1_hol1_ec1"),
exam_2  = c("cardinv_ecg2","cardinv_ecg1_hol1","cardinv_ecg2_hol2_ec2"),
exam_name = c("ECG only", "ECG + HOL","ECG + HOL + TTE")) 

library(dplyr)

(results_from_params <- params |> rowwise() |> mutate(res = list(do_thing(x=exam_1,
                                                                    y=exam_2,
                                                                    z=exam_name))))

bind_rows(results_from_params$res)

相关问题