import numpy as np
import xarray as xr
#create two large arrays
a = np.random.rand(1000, 1000)
b = np.random.rand(1000, 1000)
#compute the sum using NumPy
c = a + b
#create two large xarray datasets
a = xr.DataArray(np.random.rand(1000, 1000), dims=['x', 'y'])
b = xr.DataArray(np.random.rand(1000, 1000), dims=['x', 'y'])
#compute the sum using xarray
c = a + b
%timeit a + b # using xarray
%timeit np.add(a, b) # using NumPy
# performance comparison using memory_profiler and timeit
import timeit
from memory_profiler import memory_usage
# define the function for the xarray approach
def xarray_approach():
ds = xr.open_dataset('temperature.nc')
ds_monthly = ds.resample(time='1M').mean(dim='time')
ds_monthly.to_netcdf('monthly_mean_temperature.nc')
# define the function for the numpy approach
def numpy_approach():
f = nc.Dataset('temperature.nc', 'r')
t = f.variables['temperature'][:]
t_monthly = np.mean(np.reshape(t, (-1, 30, 12)), axis=1)
g = nc.Dataset('monthly_mean_temperature.nc', 'w')
g.createDimension('time', None)
g.createDimension('lat', t.shape[1])
g.createDimension('lon', t.shape[2])
t_var = g.createVariable('temperature', 'f4', ('time', 'lat', 'lon'))
t_var[:] = t_monthly
g.close()
f.close()
# measure the memory usage and computation time of the xarray approach
xarray_memory_usage = memory_usage(xarray_approach)
xarray_time = timeit.timeit(xarray_approach, number=1)
# measure the memory usage and computation time of the numpy approach
numpy_memory_usage = memory_usage(numpy_approach)
numpy_time = timeit.timeit(numpy_approach, number=1)
# print the results
print(f"Memory usage: xarray={max(xarray_memory_usage):.2f} MB, numpy={max(numpy_memory_usage):.2f} MB")
print(f"Computation time: xarray={xarray_time:.2f} s, numpy={numpy_time:.2f} s
1条答案
按热度按时间5jvtdoz21#
当比较xarray和NumPy的性能时,需要注意的是,xarray是构建在NumPy之上的,并且继承了NumPy的许多性能特征。通常,NumPy对于涉及大型数组的简单数值计算可能更快,更有效,而xarray更适合于涉及标记数组或多维数组的更复杂的任务,这些任务具有缺失或不完整的数据。
在我的机器上,xarray版本每个循环大约需要8.46毫秒,而NumPy版本每个循环大约需要1.4毫秒。这表明NumPy在涉及大型数组的简单数值运算方面比xarray更快,更有效。
但是,需要注意的是,xarray提供了NumPy所提供的额外功能,例如对标记数组和缺失数据的支持。如果您的任务涉及处理标记或不完整的数据,xarray可能是更好的选择,尽管它的性能稍慢。