#include <iostream>
#include <chrono>
#include <Eigen/Dense>
int main() {
Eigen::ArrayXXd m = Eigen::ArrayXd::LinSpaced(300 * 50, 0, 300 * 50 - 1).reshaped(300, 50);
{
decltype(m) result; // prevent loop from being eliminated
auto start = std::chrono::steady_clock::now();
for (size_t i = 0; i < 100000; i++)
{
result = m.square().square();
}
auto end = std::chrono::steady_clock::now();
std::cout << "Per run(microsecond)=" << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 100000.0 << std::endl;
}
{
decltype(m) result; // prevent loop from being eliminated
auto start = std::chrono::steady_clock::now();
for (size_t i = 0; i < 100000; i++)
{
result = m * m * m * m;
}
auto end = std::chrono::steady_clock::now();
std::cout << "Per run(microsecond)=" << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 100000.0 << std::endl;
}
{
decltype(m) result; // prevent loop from being eliminated
auto start = std::chrono::steady_clock::now();
for (size_t i = 0; i < 10000; i++)
{
result = m.pow(4);
}
auto end = std::chrono::steady_clock::now();
std::cout << "Per run(microsecond)=" << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 10000.0 << std::endl;
}
return 0;
m.square().square();
Per run(microsecond)=17.9101
m * m * m * m;
Per run(microsecond)=10.3267
m.pow(4);
Per run(microsecond)=431.636
2条答案
按热度按时间wkftcu5l1#
经过一整天的调试,我意识到这是我代码中的瓶颈。On the documentation it says that there is no SIMD for
a.pow()
.不管什么原因,实际上在我的机器上,对于300 x 50 Eigen::ArrayXXd,a * a * a * a
似乎更快。vc6uscn92#
对于C++17
if constexpr
,这是可能的,否则就不可能了,所以目前,a.pow(x)
等价于为每个i
调用std::pow(a[i],x)
。