我已经构建了一个函数来创建一个数据框,根据输入,该数据框包含一列或两列。输入数据框具有从Excel导入的日期时间列。在导入过程中,我使用它将列设置为日期时间:
df['Date Raised'] = pd.to_datetime(df['Date Raised'],
dayfirst=True,
format='"%Y-%m-%d"')
然后我将 Dataframe 送入这个函数:
def data_award_by_grade(df, x=None):
df = df.copy()
df = df[["Amount Awarded", "Nominee Grade", "Date Raised"]]
target_date = pd.Timestamp("2023-04-01")
after_target_date = df[df['Date Raised'] > target_date]
if x is not None and x > 0:
df_one = after_target_date.groupby(["Nominee Grade"]).mean().astype(int)
df_one = df_one.rename(columns={'Amount Awarded': 'Total Average'})
x_months_after_date = target_date + pd.DateOffset(days=x * 30)
df_two = after_target_date[after_target_date['Date Raised'] <= x_months_after_date]
df_two = df_two.groupby(["Nominee Grade"]).mean().astype(int)
df_two = df_two.rename(columns={'Amount Awarded': f'Average Across {x} Month(s)'})
result_df = df_one.add(df_two, fill_value=0).replace(np.nan, 0).astype(int)
else:
result_df = after_target_date.groupby(["Nominee Grade"]).mean().astype(int)
result_df = result_df.rename(columns={'Amount Awarded': 'Total Average'})
return result_df
award_by_grade = data_award_by_grade(raw_data, 6)
award_by_grade
每当我运行它时,它都会返回错误:
TypeError: Converting from datetime64[ns] to int32 is not supported. Do obj.astype('int64').astype(dtype) instead
完整错误:
TypeError Traceback (most recent call last)
<ipython-input-30-462a3025057a> in <module>
22 return result_df
23
---> 24 MQD_award_by_grade = data_award_by_grade(MQD_raw_data, 6)
25
26 MQD_award_by_grade
<ipython-input-30-462a3025057a> in data_award_by_grade(df, x)
7
8 if x is not None and x > 0:
----> 9 df_one = after_target_date.groupby(["Nominee Grade"]).mean().astype(int)
10 df_one = df_one.rename(columns={'Amount Awarded': 'Total Average'})
11
~\Anaconda3\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors)
6322 else:
6323 # else, only a single dtype is given
-> 6324 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
6325 return self._constructor(new_data).__finalize__(self, method="astype")
6326
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in astype(self, dtype, copy, errors)
449 copy = False
450
--> 451 return self.apply(
452 "astype",
453 dtype=dtype,
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, align_keys, **kwargs)
350 applied = b.apply(f, **kwargs)
351 else:
--> 352 applied = getattr(b, f)(**kwargs)
353 result_blocks = extend_blocks(applied, result_blocks)
354
~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors, using_cow)
509 values = self.values
510
--> 511 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
512
513 new_values = maybe_coerce_values(new_values)
~\Anaconda3\lib\site-packages\pandas\core\dtypes\astype.py in astype_array_safe(values, dtype, copy, errors)
240
241 try:
--> 242 new_values = astype_array(values, dtype, copy=copy)
243 except (ValueError, TypeError):
244 # e.g. _astype_nansafe can fail on object-dtype of strings
~\Anaconda3\lib\site-packages\pandas\core\dtypes\astype.py in astype_array(values, dtype, copy)
182 if not isinstance(values, np.ndarray):
183 # i.e. ExtensionArray
--> 184 values = values.astype(dtype, copy=copy)
185
186 else:
~\Anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py in astype(self, dtype, copy)
699 elif is_period_dtype(dtype):
700 return self.to_period(freq=dtype.freq)
--> 701 return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)
702
703 # -----------------------------------------------------------------
~\Anaconda3\lib\site-packages\pandas\core\arrays\datetimelike.py in astype(self, dtype, copy)
470 values = self.asi8
471 if dtype != np.int64:
--> 472 raise TypeError(
473 f"Converting from {self.dtype} to {dtype} is not supported. "
474 "Do obj.astype('int64').astype(dtype) instead"
1条答案
按热度按时间7rtdyuoh1#
正如@FObersteiner向您建议的那样,使用
.astype('int64')
将datetime列转换为数字。您正在32位平台上工作,这就是引发此异常的原因。但是,如果在此之后应用
pd.DateOffset
,将日期时间转换为int(或float)意味着什么?您可以尝试以下选项之一: