def notnull_index(col: str) -> list:
'''
Returns list of indices where column value is not NaN
'''
return list(df[df[col].notnull()==True].index)
def cumsum(col: str) -> pd.DataFrame:
'''
Returns the cumsum of the column for non-NaN values
'''
# List of indices where the rows are not null
idx = notnull_index(col)
# Filters DataFrame on non-null rows
df_notnull = df[df.index.isin(idx)]
# Computes the cumulative sum of the column
result = reduce(lambda x, y: x + [x[-1]+y] if x else [y], df_notnull[col], [])
# Creates a new column filled with NaN values
df[f'cumsum_{col}'] = np.nan
# Fill the cumsum column with the cumsum values at the right indices
df.loc[df.index.isin(idx), f'cumsum_{col}'] = result
# Ffill the missing values
df[f'cumsum_{col}'].fillna(method='ffill', inplace=True)
return df
1条答案
按热度按时间bjg7j2ky1#
假设你的DataFrame名为df,这应该可以工作,但fillna(0)要好得多;)