循环运行多个数据集并将每个输出保存在R中

wtzytmuj 于 2022-12-06 发布在其他

关注(0)|答案(3)|浏览(183)

我们尝试对八个不同的数据集运行一个循环，然后以标准化格式保存输出。
Dataframe 称为df1、df2、df3等。
我不能共享数据（here is a sample of the data），但每个数据集都是df1的子集--因此它始终具有相同的列。
df1将显示为：

age   wt   sex
10    200  F
15    250  F
20    300  F
12    200  M
13    250  M
25    300  M

对于不同的条件，子集化的df s将是例如df2<-df1%>%filter(sex=="F")、df3<-df1%>%filter(sex=="M")等。
下面是一个小的代码示例，我们希望为每个 Dataframe 运行该代码。

nls.mon <- nls(wt~A*(1-exp(k*(t0-age))), 
    data=df1,
    start = list(A=253.6,k=.03348,t0=32.02158))

aad_mon_est <- data.frame(tidy(nls.mon)) 
mon_A_est  <- as.numeric(aad_mon_est[1, "estimate"])
mon_k_est  <- as.numeric(aad_mon_est[2, "estimate"])
mon_t0_est <- as.numeric(aad_mon_est[3, "estimate"])

nls.von <- nls(wt ~A*(1-(1/3)*exp(k*(t0-age)))^3, 
    data=df1,
    start=list(A=253.6,k=.03348,t0=32.02158))

aad_von_est <- data.frame(tidy(nls.von))
von_A_est  <- as.numeric(aad_von_est[1, "estimate"])
von_k_est  <- as.numeric(aad_von_est[2, "estimate"])
von_t0_est <- as.numeric(aad_von_est[3, "estimate"])

是否有办法让循环遍历每个 Dataframe （df1、df2、df3等），然后保存aad_arc_B_est、aad_arc_k_est和aad_arc_mx_est？
我们希望输出如下所示：

dataframe  model     A_est   k_est  t0_est
df1        nls.mon   250     10     0.14   
df1        nls.von   350     12     0.13   
df2        nls.mon   150     11     0.15   
df2        nls.von   240     14     0.16
df3        nls.mon   220     11     0.11   
df3        nls.von   450     15     0.10

我们正在考虑使用一个索引--类似于for (i in dataframe)，让它遍历每个 Dataframe ，
dataframe[i,] <- row_i在？后面追加每行
但是，也许有更好的办法呢？

来源：https://stackoverflow.com/questions/71754675/loop-to-run-through-multiple-datasets-and-save-each-output-in-r

3条答案

按热度按时间

a5g8bdjr1#

您是否考虑过将代码更改为函数？您可以将所有data. frame存储在一个命名列表中，然后对列表中的每个data.frame应用函数，然后收集结果。

library(tidyverse)
library(broom)

# changing your code to a function
my_function <- function(.df){
  
  nls.mon <- nls(wt~A*(1-exp(k*(t0-age))), 
                 data=.df,
                 start = list(A=253.6,k=.03348,t0=32.02158))
  
  nls.von <- nls(wt ~A*(1-(1/3)*exp(k*(t0-age)))^3, 
                 data=.df,
                 start= list(A=253.6,k=.03348,t0=32.02158))
  
  # I slightly edited this part of your code
  df <- bind_rows(
    tidy(nls.mon) %>% mutate(model = "nls.mon"),
    tidy(nls.von) %>% mutate(model = "nls.von")
  ) %>%
    select(model, term, estimate) %>%
    pivot_wider(names_from = "term", values_from = "estimate")

  return(df)
}

# reading in data, the path to the data needs to be changed
df1 <- read_csv(r"{C:\Users\novot\Downloads\sample.csv}") %>%
  select(-1)

df2 <- df1 %>%
  filter(sex == "M")

# using map to apply the created function to each member of the list
df_out <- list("df1" = df1, "df2" = df2) %>%
  map(
    ~my_function(.x)
  ) %>%
  bind_rows(.id = "dataframe")

df_out
#> # A tibble: 4 x 5
#>   dataframe model       A      k    t0
#>   <chr>     <chr>   <dbl>  <dbl> <dbl>
#> 1 df1       nls.mon  248. 0.0135  2.09
#> 2 df1       nls.von  246. 0.0222 32.9 
#> 3 df2       nls.mon  248. 0.0135  2.09
#> 4 df2       nls.von  246. 0.0222 32.9

赞(0）回复(0）举报 2022-12-06

zaqlnxep2#

您可以将代码 Package 在函数中，然后将该函数应用于 Dataframe 列表

f <- function(n,dfs) {
  nls.aad_arc <- nls(wt~ B*atan(k*(age - mx)) + my, data=dfs[[n]],start = list(B=120, k=.02, mx=30, my= 82.06)) 
  aad_arc_est <- data.frame(tidy(nls.aad_arc)) 
  data.frame(
    dataframe = n,
    aad_arc_B_est  <- as.numeric(aad_arc_est[1, "estimate"]), 
    aad_arc_k_est  <- as.numeric(aad_arc_est[2, "estimate"]),
    aad_arc_mx_est <- as.numeric(aad_arc_est[3, "estimate"])  
  )
}

dfs = list('df1'=df1, 'df2'=df2,'df3' = df3)
do.call(rbind, lapply(names(dfs), function(x) f(x,dfs)))

赞(0）回复(0）举报 2022-12-06

nnsrf1az3#

最适合我们的解决方案是这个（它有点长，但也很灵活/用户友好）。
我们不仅对初始参数（A，t0，k）进行了计算，而且还可以应用于反正切函数之外的其他方程。

#Dataframe and variable names 
dfs      <- list(aad, fad, mad, fnm, fma, mnm, mma) #List of dataframes for analysis 
df_names <- list('aad', 'fad', 'mad', 'fnm', 'fma', 'mnm', 'mma') #list of dataframe names
p_names  <- list('df', 'mod', 'A_val', 'A_val_se', 'B_val', 'B_val_se', 'k_val', 'k_val_se', 't0_val', 't0_val_se', 'm_val', 'm_val_se', 'mx_val', 'mx_val_se', 'my_val', 'my_val_se', 'ran_A_max', 'ran_A_min', 'ran_B_max', 'ran_B_min', 'ran_k_max', 'ran_k_min', 'ran_t0_max', 'ran_t0_min', 'ran_m_max', 'ran_m_min', 'ran_mx_max', 'ran_mx_min', 'ran_my_max', 'ran_my_min') #List of parameters and values
m_names  <- list('arc') #List of model names 
df_num   <- length(dfs) #Number of dataframes
var_num  <- length(p_names) #Number of variables in our table  
mod_num  <- length(m_names) #Number of growth models

#nlme direct output variable names 
arc_names <- list("arc_aad", "arc_fad", "arc_mad", "arc_fnm", "arc_fma", "arc_mnm", "arc_mma")   #list of variable names for nlme.arc outputs

#Creates blank dataframe 
s_pmod <- data.frame(matrix(ncol = var_num, nrow = df_num*mod_num)) #Sets dataframe size from number of models, parameters, and datasets  
colnames(s_pmod) <- p_names #Sets column names 
s_pmod #View blank dataframe 

#Creates dataframe for nlme.arc outputs
nlme_arc.out  <- data.frame(matrix(ncol = df_num, nrow = 1)) #Sets dataframe size from number of models, parameters, and datasets  
colnames(s_pmod) <- p_names #Sets column names 

#Runs each model over all dataframes and appends values into s_pmod    
for (i in 1:df_num) {    
#print(dfs[[i]]) #View full dataframes  

#Arctangent     
#nls fit for determining best initial guess for nlme model 
nls.arc <- nls(wt~ B*atan(k*(age - mx)) + my, #Function
    data = dfs[[i]], #Input dataframe 
    start = list(B = 120, k = .02, mx = 30, my= 82.2)) #Initial guesses  

arc_est <- data.frame(tidy(nls.arc)) #Converts nls output into dataframe
arc_A_est  <- as.numeric(arc_est[1, "estimate"]) #Extracts parameters from dataframe for initial guesses
arc_k_est  <- as.numeric(arc_est[2, "estimate"])
arc_mx_est <- as.numeric(arc_est[3, "estimate"])
arc_my_est <- as.numeric(arc_est[4, "estimate"]) 

#non linear least squares fit
nlme.arc <- nlme(wt ~ (B*atan(k*(age - mx)) + my), #Function
    data=dfs[[i]], #Input dataset 
    fixed=B+k+mx+my~1, #Fixed effects
    random=list(squirrel_id = pdDiag(B+k+mx+my~1)), #pdDiag specifies random affects are uncorrilated  
    start=c(B=arc_A_est, k=arc_k_est, mx=arc_mx_est, my=arc_my_est), #Initial guesses from nls calcuations
    na.action=na.omit, #Omit any NA values
    control=nlmeControl(maxIter=200, pnlsMaxIter=10, msMaxIter=100)) #Maximum number of itterations before determined divergent

#Assign nlme.arc outputs to a unique variable name
assign(arc_names[[i]], nlme.arc)   

#Extracts and define parameters and from nlme output and appends into s_pmod
arc_out <- data.frame(tidy(nlme.arc)) #Creates df with nlme model outputs
s_pmod[(1+mod_num*(i-1)), 'B_val']  <- as.numeric(arc_out[1, "estimate"]) #Extracts individual model outputs as numerics 
s_pmod[(1+mod_num*(i-1)), 'k_val']  <- as.numeric(arc_out[2, "estimate"])  
s_pmod[(1+mod_num*(i-1)), 'mx_val'] <- as.numeric(arc_out[3, "estimate"])
s_pmod[(1+mod_num*(i-1)), 'my_val'] <- as.numeric(arc_out[4, "estimate"])
s_pmod[(1+mod_num*(i-1)), 'A_val']  <- ((as.numeric(arc_out[1, "estimate"])*pi)/2)+ as.numeric(arc_out[4, "estimate"]) #Calculates and appends upper asymptote for arctangent

#Appends parameter standard error measurements into s_pmod
arc_out <- data.frame(tidy(nlme.arc)) #Creates df with nlme model outputs
s_pmod[(1+mod_num*(i-1)), 'B_val_se']  <- as.numeric(arc_out[1, "std.error"]) #Extracts parameter standard error and appends into s_pmod  
s_pmod[(1+mod_num*(i-1)), 'k_val_se']  <- as.numeric(arc_out[2, "std.error"])  
s_pmod[(1+mod_num*(i-1)), 'mx_val_se'] <- as.numeric(arc_out[3, "std.error"])
s_pmod[(1+mod_num*(i-1)), 'my_val_se'] <- as.numeric(arc_out[4, "std.error"])
s_pmod[(1+mod_num*(i-1)), 'A_val_se']  <- (as.numeric(arc_out[1, "std.error"])*pi)/2 #Calculates and appends standard error of parameter A

#Extracts random effects and appends values into s_pmod 
arc_ran <- ranef(nlme.arc) #Extracts all random effects in the dataframe 
s_pmod[(1+mod_num*(i-1)), 'ran_B_max']  <- max(arc_ran$B) #Extracts and appends maximum random effect for parameter
s_pmod[(1+mod_num*(i-1)), 'ran_B_min']  <- min(arc_ran$B) #Extracts and appends minimum random effect for parameter 
s_pmod[(1+mod_num*(i-1)), 'ran_k_max']  <- max(arc_ran$k)
s_pmod[(1+mod_num*(i-1)), 'ran_k_min']  <- min(arc_ran$k)
s_pmod[(1+mod_num*(i-1)), 'ran_mx_max'] <- max(arc_ran$mx)
s_pmod[(1+mod_num*(i-1)), 'ran_mx_min'] <- min(arc_ran$mx)
s_pmod[(1+mod_num*(i-1)), 'ran_my_max'] <- max(arc_ran$my)
s_pmod[(1+mod_num*(i-1)), 'ran_my_min'] <- min(arc_ran$my)

s_pmod[(1+mod_num*(i-1)), 'df']         <- df_names[i] #Appends dataframe name

}#End of loop

输出如下：

s_pmod %>% filter(mod=="arc")
   df mod    A_val  A_val_se    B_val  B_val_se      k_val     k_val_se t0_val t0_val_se m_val m_val_se   mx_val mx_val_se   my_val my_val_se ran_A_max ran_A_min ran_B_max ran_B_min   ran_k_max
1 aad arc 253.9916 0.7015467 107.1339 0.4466185 0.02322817 0.0001367976     NA        NA    NA       NA 38.51732 0.2957660 85.70604 0.5693539        NA        NA  33.14024 -31.18713 0.005225140
2 fad arc 260.7412 1.1965906 144.9240 0.7617732 0.01653624 0.0001315679     NA        NA    NA       NA 13.69431 0.4264706 33.09517 1.0042012        NA        NA  39.99318 -30.58696 0.011562276
3 mad arc 259.7367 0.8902014 105.3544 0.5667198 0.02458354 0.0002119215     NA        NA    NA       NA 42.28176 0.3671864 94.24638 0.7103262        NA        NA  19.11504 -31.07500 0.005268441
4 fnm arc 250.3786 1.1041547 105.6588 0.7029267 0.02316940 0.0002086655     NA        NA    NA       NA 38.23024 0.4760433 84.41015 0.9010855        NA        NA  22.95082 -20.57937 0.003980173
5 fma arc 260.0539 2.0168075 148.5924 1.2839396 0.01517521 0.0002634139     NA        NA    NA       NA 10.76892 0.7676714 26.64548 1.7216544        NA        NA  40.69058 -28.85547 0.011894187
6 mnm arc 260.5745 1.0445654 104.6731 0.6649910 0.02487617 0.0002508612     NA        NA    NA       NA 43.30369 0.4131035 96.15442 0.7882765        NA        NA  18.67611 -33.10133 0.005059236
7 mma arc 258.7071 1.6936251 107.2850 1.0781952 0.02405637 0.0003971414     NA        NA    NA       NA 40.17088 0.7339977 90.18416 1.4159323        NA        NA  19.59260 -18.45392 0.005446648
     ran_k_min ran_t0_max ran_t0_min ran_m_max ran_m_min      ran_mx_max       ran_mx_min      ran_my_max      ran_my_min
1 -0.007378940         NA         NA        NA        NA 0.0000105015210 -0.0000150388369 21.817420360305 -19.49415109139
2 -0.012402311         NA         NA        NA        NA 0.0000024081791 -0.0000027762636  0.000001706597  -0.00000156346
3 -0.009515597         NA         NA        NA        NA 1.6232147679678 -2.1681922266485 24.362426128753 -24.18797605980
4 -0.004196356         NA         NA        NA        NA 0.0000244818069 -0.0000307554261 15.925214720426 -13.92832902269
5 -0.011169350         NA         NA        NA        NA 0.0000007861126 -0.0000006581591  0.000124799561  -0.00012778252
6 -0.009676215         NA         NA        NA        NA 5.8551715194943 -6.5436633258442 18.513325143935 -22.03287338292
7 -0.005949639         NA         NA        NA        NA 0.0000019910474 -0.0000021254555 20.710967774449 -13.82431195267

赞(0）回复(0）举报 2022-12-06

我来回答

循环运行多个数据集并将每个输出保存在R中

3条答案

相关问题

热门标签

最新问答