pandas Python数据转换--EDA

vuktfyat  于 2023-01-28  发布在  Python
关注(0)|答案(1)|浏览(111)

尝试将数据从

  • lm-代表上个月

希望这是有意义的,我怎么有它

import pandas as pd
df = pd.read_excel('data.xlsx') #reading data
output = []
grouped = df.groupby('txn_id')
for txn_id, group in grouped:
avg_amt = group['avg_amount'].iloc[-1]
min_amt = group['min_amount'].iloc[-1]
lm_avg = group['avg_amount'].iloc[-6:-1]
min_amt_list = group['min_amount'].iloc[-6:-1]
output.append([txn_id, *lm_avg, min_amt, *min_amt_list])

result_df = pd.DataFrame(output, columns=['txn_id', 'lm_avg', 'lm_avg-1', 'lm_avg-2', 'lm_avg-3', 'lm_avg-4', 'lm_avg-5', 'min_am', 'min_amt-1', 'min_amt-2', 'min_amt-3', 'min_amt-4', 'min_amt-5'])#getting multiple crows for 1 txn_id which is not expected
egmofgnx

egmofgnx1#

使用pivot_table

# Rename columns before reshaping your dataframe with pivot_table
cols = df[::-1].groupby('TXN_ID').cumcount().astype(str)
out = (df.rename(columns={'AVG_Amount': 'lm_avg', 'MIN_AMOUNT': 'min_amnt'})
         .pivot_table(index='TXN_ID', values=['lm_avg', 'min_amnt'], columns=cols))

# Flat columns name
out.columns = ['-'.join(i) if i[1] != '0' else i[0] for i in out.columns.to_flat_index()]

# Reset index
out = out.reset_index()

输出:

>>> out
   TXN_ID  lm_avg  lm_avg-1  lm_avg-2  lm_avg-3  lm_avg-4  lm_avg-5  min_amnt  min_amnt-1  min_amnt-2  min_amnt-3  min_amnt-4  min_amnt-5
0       1     578       688       589       877       556        78       400          31          20         500         300          30
1       2     578       688       589       877       556        78       400          31          20           0           0          90

相关问题