我有一个数据框形式的列,其中包含一些数字的比率。在该df列上,我想使用df.apply()方法应用hurst函数。
我不知道这个错误是df.apply
还是hurst_function
,考虑一下使用df.apply方法计算col上的hurst指数的代码:
import hurst
def hurst_function(df_col_slice):
display(df_col_slice)
return hurst.compute_Hc(df_col_slice)
def func(df_col):
results = round(df_col.rolling(101).apply(hurst_function)[100:],1)
return results
func(df_col)
我得到错误:
Input In [73], in func(df_col)
---> 32 results = round(df_col.rolling(101).apply(hurst_function)[100:],1)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\core\window\rolling.py:1843, in Rolling.apply(self, func, raw, engine, engine_kwargs, args, kwargs)
1822 @doc(
1823 template_header,
1824 create_section_header("Parameters"),
(...)
1841 kwargs: dict[str, Any] | None = None,
1842 ):
-> 1843 return super().apply(
1844 func,
1845 raw=raw,
1846 engine=engine,
1847 engine_kwargs=engine_kwargs,
1848 args=args,
1849 kwargs=kwargs,
1850 )
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\core\window\rolling.py:1315, in RollingAndExpandingMixin.apply(self, func, raw, engine, engine_kwargs, args, kwargs)
1312 else:
1313 raise ValueError("engine must be either 'numba' or 'cython'")
-> 1315 return self._apply(
1316 apply_func,
1317 numba_cache_key=numba_cache_key,
1318 numba_args=numba_args,
1319 )
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\core\window\rolling.py:590, in BaseWindow._apply(self, func, name, numba_cache_key, numba_args, **kwargs)
587 return result
589 if self.method == "single":
--> 590 return self._apply_blockwise(homogeneous_func, name)
591 else:
592 return self._apply_tablewise(homogeneous_func, name)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\core\window\rolling.py:442, in BaseWindow._apply_blockwise(self, homogeneous_func, name)
437 """
438 Apply the given function to the DataFrame broken down into homogeneous
439 sub-frames.
440 """
441 if self._selected_obj.ndim == 1:
--> 442 return self._apply_series(homogeneous_func, name)
444 obj = self._create_data(self._selected_obj)
445 if name == "count":
446 # GH 12541: Special case for count where we support date-like types
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\core\window\rolling.py:431, in BaseWindow._apply_series(self, homogeneous_func, name)
428 except (TypeError, NotImplementedError) as err:
429 raise DataError("No numeric types to aggregate") from err
--> 431 result = homogeneous_func(values)
432 return obj._constructor(result, index=obj.index, name=obj.name)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\core\window\rolling.py:582, in BaseWindow._apply.<locals>.homogeneous_func(values)
579 return func(x, start, end, min_periods, *numba_args)
581 with np.errstate(all="ignore"):
--> 582 result = calc(values)
584 if numba_cache_key is not None:
585 NUMBA_FUNC_CACHE[numba_cache_key] = func
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\core\window\rolling.py:579, in BaseWindow._apply.<locals>.homogeneous_func.<locals>.calc(x)
571 start, end = window_indexer.get_window_bounds(
572 num_values=len(x),
573 min_periods=min_periods,
574 center=self.center,
575 closed=self.closed,
576 )
577 self._check_window_bounds(start, end, len(x))
--> 579 return func(x, start, end, min_periods, *numba_args)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\core\window\rolling.py:1342, in RollingAndExpandingMixin._generate_cython_apply_func.<locals>.apply_func(values, begin, end, min_periods, raw)
1339 if not raw:
1340 # GH 45912
1341 values = Series(values, index=self._on)
-> 1342 return window_func(values, begin, end, min_periods)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\_libs\window\aggregations.pyx:1315, in pandas._libs.window.aggregations.roll_apply()
TypeError: must be real number, not tuple
我能做些什么来解决这个问题?
编辑:display(df_col_slice)
给出以下输出:
0 0.282043
1 0.103355
2 0.537766
3 0.491976
4 0.535050
...
96 0.022696
97 0.438995
98 -0.131486
99 0.248250
100 1.246463
Length: 101, dtype: float64
1条答案
按热度按时间uelo1irk1#
hurst.compute_Hc
函数返回3个值的元组:其中
H
是赫斯特指数,c
-是某个常数。但是,
pandas._libs.window.aggregations.roll_apply()
期望它的参数(函数)返回一个单一的(标量),这是滚动窗口的缩减结果。这就是
hurst_function
函数需要从vals
返回某个值的原因。