R语言 如何快速完成大矩阵与向量的逐元乘法?

c90pui9n  于 2023-01-22  发布在  其他
关注(0)|答案(4)|浏览(167)

我想做一个向量与一个非常大的二进制矩阵的每一行的元素乘法。我的向量的长度等于我的矩阵的列数。我已经实现了使用循环如下,但它非常慢。有人知道解决方案,以加快它?

A <- c()
M # Binary matric
W <- matrix(0, nrow=nrow(M), ncol=ncol(M))
W <- data.frame(W)
for (i in 1:nrow(W)) {
    W[i,] <- M[i,] * A
}
hfwmuf9z

hfwmuf9z1#

这个公认的答案肯定不是R上最快的方法。这个方法要快得多,我不知道它是不是最快的方法:

M * outer(rep.int(1L, nrow(M)), v)

还要注意,collapse::TRA提供了针对矩阵和 Dataframe 两者的基于C/C++的解决方案(其另外支持分组扫描操作)。

library(microbenchmark)
library(collapse)

all_obj_equal(t(t(M) * v), M * outer(rep.int(1L, nrow(M)), v), TRA(M, v, "*"))
[1] TRUE

microbenchmark(t(t(M) * v), M * outer(rep.int(1L, nrow(M)), v), TRA(M, v, "*"), times = 100, unit = "relative")
Unit: relative
                                 expr       min        lq      mean    median        uq      max neval cld
                          t(t(M) * v) 16.256563 12.703469 10.771698 12.113859 10.148008 8.365288   100   c
   M * outer(rep.int(1L, nrow(M)), v)  1.097177  1.449713  1.375522  1.426085  1.273911 1.785404   100  b 
                       TRA(M, v, "*")  1.000000  1.000000  1.000000  1.000000  1.000000 1.000000   100 a

(Note:M,此处为尺寸4915 x 4915的Eora 26 ICIO Matrix 2015,v为1/输出)

2022年更新

我发现tcrossprod()通常比outer()快一点,特别是对于较小的数据,因为它是.Internal,而outer有一些R开销。关于包的解决方案,collapse 现在引入了操作符%r*%等来有效地做到这一点。还有setop函数和运算符%+=%等,通过引用进行数学运算,因此这里对生成的数据进行了新的基准测试。示出了对于该特定问题,通过引用的C级数学比有效基数R快一个数量级或量级。

library(collapse) # 1.8+
library(Rfast)
library(microbenchmark)
M <- matrix(rnorm(1e6), 1000) # 1000 x 1000 matrix
v <- rnorm(1000)              # 1000 x 1 vector

# Testing numeric equality of methods
all_obj_equal(t(t(M) * v),
              M %r*% v, 
              M * tcrossprod(rep.int(1L, nrow(M)), v), 
              M * outer(rep.int(1L, nrow(M)), v), 
              eachrow(M, v, "*"), 
              setop(M, "*", v, rowwise = TRUE))
#> [1] TRUE

# Undo modification by reference
setop(M, "/", v, rowwise = TRUE)

# Benchmark
microbenchmark(collapse_ref = setop(M, "*", v, rowwise = TRUE),
               collapse_ref_rev = setop(M, "/", v, rowwise = TRUE), # Need to reverse operation by reference each iteration
               collapse_op = M %r*% v, 
               tcrossprod = M * tcrossprod(rep.int(1L, nrow(M)), v), 
               outer = M * outer(rep.int(1L, nrow(M)), v), 
               Rfast = eachrow(M, v, "*"), times = 10000)
#> Unit: microseconds
#>              expr      min        lq      mean   median        uq        max neval
#>      collapse_ref  167.731  185.8530  310.1048  212.175  317.3605  16423.616 10000
#>  collapse_ref_rev  188.272  205.4510  323.1454  231.527  326.1960   7483.115 10000
#>       collapse_op  183.926  925.7595 1794.6258 1157.738 1666.4040  59253.815 10000
#>        tcrossprod 1107.779 1959.0005 3118.5997 2324.700 3302.3040 182811.579 10000
#>             outer 1112.904 1981.9195 3096.3271 2345.200 3298.9215  90762.766 10000
#>             Rfast  178.432  925.1445 1886.5011 1151.710 1703.3655 103405.239 10000

reprex package(v2.0.1)于2022年8月17日创建

jtw3ybtb

jtw3ybtb2#

使用向量循环,因为矩阵是用列填充的,所以需要转置:

t(t(M) * v)
weylhg0b

weylhg0b3#

您可以使用Rfast::eachrow,它的速度相当快。

M<-Rfast::matrnorm(1000,1000)
v=rnorm(1000)

Rfast2::benchmark(Rfast::eachrow(M,v,"*"),M * outer(rep.int(1L, nrow(M)), v),M * Rfast::Outer(v, rep.int(1, nrow(M))),times = 100)
   milliseconds 
                                            min      mean      max
Rfast::eachrow(M, v, "*")                2.4790  4.440305  12.3601
M * outer(rep.int(1L, nrow(M)), v)       5.7612 25.952871 807.1748
M * Rfast::Outer(v, rep.int(1, nrow(M))) 3.4185  7.525743 204.3348
xpcnnkqh

xpcnnkqh4#

其他一些 base 选项可能是。

sweep(M, 2, v, `*`)
M * v[col(M)]
M * rep(v, each=nrow(M))
M * matrix(v, nrow(M), ncol(M), TRUE)
  • base* 选项的基准。
METHODS <- alist(
rep    = M * rep(v, each=nrow(M)),
tt     = t(t(M) * v),
sweep  = sweep(M, 2, v, `*`),
col    = M * v[col(M)],
matrix = M * matrix(v, nrow(M), ncol(M), TRUE),
outer  = M * outer(rep.int(1L, nrow(M)), v),
tcrosp = M * tcrossprod(rep.int(1L, nrow(M)), v)  )

set.seed(42)
M <- matrix(rnorm(1e6), 1000)
v <- M[1,]
bench::mark(exprs = METHODS)
#  expression     min  median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time
#  <bch:expr> <bch:t> <bch:t>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm>
#1 rep        15.86ms 15.96ms      61.7    7.63MB     15.4    24     6      389ms
#2 tt          5.02ms  5.45ms     173.    15.26MB     78.5    55    25      319ms
#3 sweep       5.62ms  5.93ms     155.    15.29MB     71.3    48    22      309ms
#4 col            4ms  4.24ms     214.    11.45MB     75.5    71    25      331ms
#5 matrix       2.9ms  3.15ms     306.     7.63MB     53.3   115    20      376ms
#6 outer       1.67ms  2.65ms     373.     7.67MB     66.8   145    26      389ms
#7 tcrosp      1.67ms  2.64ms     369.     7.64MB     66.3   139    25      377ms

set.seed(42)
M <- matrix(rnorm(1e6), 100000)
v <- M[1,]
bench::mark(exprs = METHODS)
#  expression     min  median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time
#  <bch:expr> <bch:t> <bch:t>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm>
#1 rep        15.88ms 15.92ms      62.7    7.63MB     12.1    26     5      415ms
#2 tt          5.39ms  6.07ms     166.    15.26MB     75.1    53    24      319ms
#3 sweep       6.08ms  6.32ms     156.    15.26MB     78.1    44    22      282ms
#4 col         3.25ms  3.32ms     298.    11.44MB     95.3    97    31      325ms
#5 matrix      2.25ms  2.29ms     433.     7.63MB     79.0   159    29      367ms
#6 outer       1.82ms  1.89ms     520.     8.77MB    118.    177    40      340ms
#7 tcrosp      1.81ms  1.88ms     495.     8.77MB    106.    173    37      349ms

set.seed(42)
M <- matrix(rnorm(1e6), 10)
v <- M[1,]
bench::mark(exprs = METHODS)
#  expression     min  median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time
#  <bch:expr> <bch:t> <bch:t>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm>
#1 rep        15.88ms 15.94ms      62.4    7.63MB     12.0    26     5      417ms
#2 tt          5.39ms  6.08ms     164.    15.26MB     74.4    53    24      322ms
#3 sweep       4.12ms  4.22ms     223.    15.26MB     76.3    73    25      328ms
#4 col         4.02ms  4.17ms     234.    11.44MB     49.1    86    18      367ms
#5 matrix      4.11ms  4.39ms     227.     7.63MB     28.0    97    12      428ms
#6 outer       2.02ms  2.11ms     423.     7.63MB     59.0   172    24      407ms
#7 tcrosp      2.03ms  2.12ms     419.     7.63MB     56.5   163    22      389ms

即使对于1e6矩阵,使用的时间也是一些ms。如果经常执行此操作,则会产生影响。

相关问题