向量化多个for循环

我有一个求和，我可以使用四个for循环计算，但我想知道是否可以简化，也许使用一个向量化函数，以减少计算时间。kronecker(x, x)），或者可能是使用outer的东西？
总和为：

其中，E 是1 - 9范围内的整数样本空间。i 和 j 索引也是1 - 9范围内的整数。因此，f、***g***和***h***都是维数为9 x9的矩阵。
h***矩阵是固定的，我有它，但是我模拟了***g***很多次，然后我选择了一个最小化另一个函数的矩阵，问题是，1000次模拟，太少了，大约需要1秒，我真的想尝试100万次，但是这么多次会花很长时间。
我在函数中有for循环：

sim <- function(y, nreps, h) {

  G <- vector("list", nreps)     # list containing random values from Dirichlet distribution
  F <- vector("list", nreps)     # list containing the f matrices
  M <- vector("numeric", nreps)  # vector to store the results

  require(gtools)

  for(n in 1:nreps) {
    f <- matrix(0, nrow=9, ncol=9)        # initialize f
    g <- gtools::rdirichlet(9, rep(1,9))  # simulate g
    
    for(i in 1:9) {
      for(j in 1:9) {
        for(k in 1:9) {
          for(l in 1:9) {
            f[i,j] <- f[i,j] + h[i,k] * h[j,l] * g[k,l]  # summation (see above)
          }
        }
      }
    }
    F[[n]] <- f  # store f matrix
    G[[n]] <- g  # store g matrix
    M[n] <- sum((y - f)^2) # sum of squared differences between y and f
  }
  m <- which.min(M)  # which M is the minimum?
  return(list(g=G[[m]], m=M[m]))
}

我调用这个函数

sim(y=f.y1, nreps=1000, h=x)

数据如下：

> dput(f.y1)
structure(c(0.0182002022244692, 0.0121334681496461, 0.0101112234580384, 
0, 0, 0, 0, 0, 0, 0.0485338725985844, 0.0940343781597573, 0.112234580384226, 
0.0434782608695652, 0.00910010111223458, 0.00101112234580384, 
0, 0, 0, 0.0333670374115268, 0.110212335692619, 0.132457027300303, 
0.0808897876643074, 0.0222446916076845, 0.0070778564206269, 0.00101112234580384, 
0, 0, 0.0070778564206269, 0.0202224469160768, 0.0596562184024267, 
0.0616784630940344, 0.0262891809908999, 0.0070778564206269, 0, 
0, 0, 0.00202224469160768, 0.00505561172901921, 0.0151668351870576, 
0.0182002022244692, 0.0111223458038423, 0.00404448938321537, 
0, 0, 0, 0.00202224469160768, 0.00404448938321537, 0.00505561172901921, 
0.00505561172901921, 0.00202224469160768, 0.00202224469160768, 
0, 0, 0, 0, 0.00202224469160768, 0.00202224469160768, 0.00202224469160768, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0), class = "table", dim = c(9L, 9L), dimnames = structure(list(
    c("0", "1", "2", "3", "4", "5", "6", "7", "8"), c("0", "1", 
    "2", "3", "4", "5", "6", "7", "8")), names = c("", "")))

> dput(x)
structure(c(0.61, 0.16, 0.03, 0.005, 0, 0, 0, 0, 0, 0.32, 0.61, 
0.16, 0.03, 0.005, 0, 0, 0, 0, 0.06, 0.16, 0.61, 0.16, 0.03, 
0.005, 0, 0, 0, 0.01, 0.06, 0.16, 0.61, 0.16, 0.03, 0.01, 0, 
0, 0, 0.01, 0.03, 0.16, 0.61, 0.16, 0.03, 0.01, 0, 0, 0, 0.01, 
0.03, 0.16, 0.61, 0.16, 0.06, 0.01, 0, 0, 0, 0.005, 0.03, 0.16, 
0.61, 0.16, 0.06, 0, 0, 0, 0, 0.005, 0.03, 0.16, 0.61, 0.32, 
0, 0, 0, 0, 0, 0.005, 0.03, 0.16, 0.61), dim = c(9L, 9L))

您还需要为rdirichlet函数加载gtools包。

library(gtools)

幸运的是，这个特殊的例子只是“简单的”矩阵乘法，所以可以很容易地向量化为：

sim1 <- function(y, nreps, h) {

  G <- vector("list", nreps)     # list containing random values from Dirichlet distribution
  F <- vector("list", nreps)     # list containing the f matrices
  M <- vector("numeric", nreps)  # vector to store the results

  require(gtools)

  for(n in 1:nreps) {
    g <- gtools::rdirichlet(9, rep(1,9))  # simulate g
    f <- h %*% g %*% t(h)
    
    F[[n]] <- f  # store f matrix
    G[[n]] <- g  # store g matrix
    M[n] <- sum((y - f)^2) # sum of squared differences between y and f
  }
  m <- which.min(M)  # which M is the minimum?
  return(list(g=G[[m]], m=M[m]))
}

运行函数进行比较

#Original version
set.seed(0)
system.time(a <- sim(y=f.y1, nreps=1000, h=x))
#   user  system elapsed 
#   0.97    0.03    1.00 

#revised version
set.seed(0)
system.time(b <- sim1(y=f.y1, nreps=1000, h=x))
#   user  system elapsed 
#   0.01    0.00    0.02 

#Check they give the same answer
all.equal(a, b)  
#[1] TRUE

向量化多个for循环

1条答案

相关问题

热门标签

最新问答