R语言 我如何将这个cv.glmnet Package 成一个函数?

46qrfjad  于 2023-04-03  发布在  其他
关注(0)|答案(1)|浏览(168)

这是基本的代码,我需要运行相当多的时间,鉴于我的基因集。

x <-as.matrix(pca_matrix1)
y <- Surv(surv_obj1$OS_MONTHS,surv_obj1$Status)


set.seed(02042019) ## for reproducibility    
ix <- sample(1:nrow(x), 0.7*nrow(x)) 
y_tr <- y[ix]
x_tr <- x[ix,]
y_te <- y[-ix]
x_te <- x[-ix,]

#library(glmnet)
cvfit_tr <- cv.glmnet(x_tr,y_tr,family="cox",nfolds = 10, alpha = 1)
plot(cvfit_tr)



co_mat <- as.matrix(coef(cvfit_tr,s="lambda.min"))
co_mat[co_mat[,1]!=0,]


## get predictions x^Tbeta
preds <- predict(cvfit_tr,x_te,s="lambda.min",type="link")
preds
## split into low, medium, high risk groups
#library(ggplot2)
levs <- cut_number(preds,2)
fit <- survfit(y_te~levs)
out <- survdiff(y_te~levs)
out
broom::glance(out)$p.value

我的一个尝试是创建函数

lasso_surv <- function(exprs, surv, alpha = 1, nfolds = 10, lambda = NULL, genes = NULL) {
  

  x <- as.matrix(exprs)
  y <- Surv(surv$OS_MONTHS, surv$Status)
  
  cv.fit <- cv.glmnet(x, y, nfolds = nfolds, alpha = alpha, type.measure = "cox")
  
  best.lambda <- cv.fit$lambda.min
  fit <- glmnet(x, y, alpha = alpha, lambda = best.lambda)
  coef <- as.matrix(fit$beta[, drop = FALSE])
  
  lasso.result <- list(cvfit = cv.fit, fit = fit, coef = coef)
  return(lasso.result)
}

当我运行这个的时候我得到这个错误

lasso_result <- lasso_surv(as.matrix(x_te), y_te)
Error in surv$OS_MONTHS : $ operator is invalid for atomic vectors

这里我已经创建了surv对象

y <- Surv(surv_obj1$OS_MONTHS,surv_obj1$Status)

我敢肯定,有一些非常基本的错误,在代码中,我无法找出它
任何建议或帮助将不胜感激

已更新

代码由itIsNaz建议

lasso_surv <- function(exprs, OS_MONTHS, Status, alpha = 1, nfolds = 10, lambda = NULL, genes = NULL) {
  
  y <- Surv(OS_MONTHS, Status) 
  x <- as.matrix(exprs)     
  cv.fit <- cv.glmnet(x, y, nfolds = nfolds, alpha = alpha, family="cox")     
  best.lambda <- cv.fit$lambda.min   
  fit <- glmnet(x, y, alpha = alpha, lambda = best.lambda)  
  coef <- as.matrix(fit$beta[, drop = FALSE])      
  lasso.result <- list(cvfit = cv.fit, fit = fit, coef = coef)  
  return(lasso.result) 
  
}

resullt <- lasso_surv(x,surv_obj1$OS_MONTHS,surv_obj1$Status)

错误

Error in Ops.Surv(x, w) : Invalid operation on a survival time

我修改了type.measure到family=考克斯

更新我的生存数据

dput(surv_obj1)
structure(list(OS_MONTHS = c(5.3, 95.5, 25.8, 7.1, 21.5, 43.4, 
7.7, 81.9, 32.6, 44.4, 11.3, 32.3, 99.9, 15.4, 20.5, 6.3, 6.6, 
8.1, 1.2, 19, 11.5, 55.4, 73, 11.2, 10.2, 52.6, 57.3, 4.6, 7.5, 
30.5, 6.3, 45.8, 69, 46.8, 22.6, 95.6, 10.5, 11.8, 28.4, 26.3, 
43.5, 4.5, 52.7, 3.9, 30.6, 61.2, 47.5, 0.6, 45.3, 12.2, 2.4, 
9.3, 8.1, 59.3, 19.2, 26.3, 41.4, 36.9, 0.5, 8.2, 7.4, 36.1, 
1.4, 5.5, 0.3, 88.3, 0.2, 0.5, 0.8, 8.4, 4.2, 29.7, 0.8, 67.7, 
18.1, 5.7, 0.1, 5.7, 13.8, 6.6, 48.3, 56.3, 27.1, 24.4, 5.6, 
53.9, 4, 26, 10.2, 34, 35.2, 47, 24.1, 42.1, 76.2, 7, 77.3, 32.7, 
16.4, 9.3, 17, 7.9, 24.8, 0.3, 22.3, 27, 83.5, 34.3, 3.1, 7.5, 
6.6, 5.2, 83.3, 33.5, 16.3, 2.3, 75.8, 42.1, 30, 7.7, 0.7, 20.5, 
1, 1.3, 11, 73.6, 40.3, 1.3, 1.6, 27.4, 1.9, 29.4, 18.5, 59, 
118.1, 8, 2.4, 47.2, 37.1, 26.8, 86.6, 46.5, 27.7, 71.3, 10.7, 
4.6), Status = c(1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 
1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 
1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 
1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 
1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 
1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 
0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 
0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 
0L, 0L, 1L, 0L, 0L, 1L, 1L)), row.names = c(1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 
58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 69L, 70L, 71L, 
72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 
85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 
98L, 99L, 100L, 101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L, 
109L, 110L, 111L, 112L, 113L, 114L, 115L, 116L, 117L, 118L, 119L, 
120L, 121L, 122L, 123L, 124L, 125L, 126L, 127L, 128L, 129L, 130L, 
131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L, 139L, 140L, 141L, 
142L, 143L, 144L, 145L, 146L, 147L), class = "data.frame")

以及匹配生存维度的表达式虚拟 Dataframe

genes <- paste0("Gene", 1:30)

# Create a matrix with 146 rows and 30 columns
exprs <- matrix(rnorm(146*30), nrow = 146, ncol = 30)

# Set the column names to be the gene names
colnames(exprs) <- genes
p4tfgftt

p4tfgftt1#

library(glmnet)
library(survival)

# Function for Lasso regularization on survival data
lasso_surv <- function(exprs, OS_MONTHS, Status, alpha = 1, nfolds = 10, lambda = NULL, genes = NULL) {
  
  y <- Surv(OS_MONTHS, Status) 
  x <- as.matrix(exprs)     
  cv.fit <- cv.glmnet(x, y, nfolds = nfolds, alpha = alpha, family="cox")     
  best.lambda <- cv.fit$lambda.min   
  fit <- glmnet(x, y, alpha = alpha, lambda = best.lambda, family = "cox")  
  coef <- as.matrix(fit$beta[, drop = FALSE])      
  lasso.result <- list(cvfit = cv.fit, fit = fit, coef = coef)  
  return(lasso.result) 
  
}

# Generate simulated survival data
set.seed(123)
n <- 100
p <- 50
x <- matrix(rnorm(n*p), n, p)
beta <- c(runif(20, 0.5, 1), rep(0, p-20))
colnames(x) <- paste0("gene", 1:p)
y <- c(rep(1, 50), rep(0, 50))
t <- rexp(n, exp(x %*% beta))

# Create survival object
surv_obj1 <- data.frame(OS_MONTHS = t, Status = y)

# Run lasso_surv on survival object
res <- lasso_surv(x, surv_obj1$OS_MONTHS, surv_obj1$Status, nfolds = 10, alpha = 1)

print(res)

相关问题