python 按键值分组嵌套字典

ef1yzkbh  于 2023-04-04  发布在  Python
关注(0)|答案(2)|浏览(114)

如何按键值对嵌套字典进行分组?例如,我的字典如下所示:

{'comb_indx': {0: '3925220EE', 1: '3925220EE', 2: '3925220EE', 3: '66478EE', 4: '66478EE', 5: '66478EE', 6: '42300EE', 7: '42300EE', 8: '42300EE'}, country': {0: 'EE', 1: 'EE', 2: ''EE', 3: 'EE', 4: 'EE', 5: 'EE', 6: 'EE', 7: 'EE', 8: 'EE'}, 'type': {0: 'CREDIT_ACCOUNT', 1: 'CREDIT_ACCOUNT', 2: 'CREDIT_ACCOUNT', 3: 'SMALL_LOAN', 4: 'SMALL_LOAN', 5: 'SMALL_LOAN', 6: 'SMALL_LOAN', 7: 'SMALL_LOAN', 8: 'SMALL_LOAN'}}

我想把这本字典转换成smth这样:基本上,group by 'comb_indx'中的值:

{
{ 'comb_indx': 3925220EE, 'country': {0: 'EE', 1: 'EE', 2: ''EE'}, 'type': {0: 'CREDIT_ACCOUNT', 1: 'CREDIT_ACCOUNT', 2: 'CREDIT_ACCOUNT'}}, 
{ 'comb_indx': 66478EE, 'country': {3: 'EE', 4: 'EE', 5: ''EE'}, 'type': {3: 'SMALL_LOAN', 4: 'SMALL_LOAN', 5: 'SMALL_LOAN'}}, 
{ 'comb_indx': 42300EE, 'country': {6: 'EE', 7: 'EE', 8: ''EE'}, 'type': {6: 'SMALL_LOAN', 7: 'SMALL_LOAN', 8: 'SMALL_LOAN'}}, 
}

基本上,按'comb_indx'中的值分组。
我试过所有类似的问题,但总是出错。

e7arh2l6

e7arh2l61#

您可以使用defaultdict按'comb_indx'值对项目进行分组,然后将defaultdict转换回字典列表:

from collections import defaultdict

data = {'comb_indx': {0: '3925220EE', 1: '3925220EE', 2: '3925220EE', 3: '66478EE', 4: '66478EE', 5: '66478EE', 6: '42300EE', 7: '42300EE', 8: '42300EE'}, 'country': {0: 'EE', 1: 'EE', 2: 'EE', 3: 'EE', 4: 'EE', 5: 'EE', 6: 'EE', 7: 'EE', 8: 'EE'}, 'type': {0: 'CREDIT_ACCOUNT', 1: 'CREDIT_ACCOUNT', 2: 'CREDIT_ACCOUNT', 3: 'SMALL_LOAN', 4: 'SMALL_LOAN', 5: 'SMALL_LOAN', 6: 'SMALL_LOAN', 7: 'SMALL_LOAN', 8: 'SMALL_LOAN'}}

# group by 'comb_indx'
grouped = defaultdict(lambda: defaultdict(list))
for i, comb_indx in data['comb_indx'].items():
    for k, v in data.items():
        grouped[comb_indx][k].append(v[i])

# convert defaultdict to list of dicts
result = []
for k, v in grouped.items():
   new_dict = {'comb_indx': k}
   new_dict.update(v)
   result.append(new_dict)

输出:

[
    {'comb_indx': '3925220EE', 'country': ['EE', 'EE', 'EE'], 'type': ['CREDIT_ACCOUNT', 'CREDIT_ACCOUNT', 'CREDIT_ACCOUNT']},
    {'comb_indx': '66478EE', 'country': ['EE', 'EE', 'EE'], 'type': ['SMALL_LOAN', 'SMALL_LOAN', 'SMALL_LOAN']},
    {'comb_indx': '42300EE', 'country': ['EE', 'EE', 'EE'], 'type': ['SMALL_LOAN', 'SMALL_LOAN', 'SMALL_LOAN']}
]
bxjv4tth

bxjv4tth2#

我会使用pandas.DataFrame.groupby

import pandas as pd
from IPython.display import display

d = {'comb_indx': {0: '3925220EE', 1: '3925220EE', 2: '3925220EE', 3: '66478EE', 4: '66478EE', 5: '66478EE', 6: '42300EE', 7: '42300EE', 8: '42300EE'}, 'country': {0: 'EE', 1: 'EE', 2: 'EE', 3: 'EE', 4: 'EE', 5: 'EE', 6: 'EE', 7: 'EE', 8: 'EE'}, 'type': {0: 'CREDIT_ACCOUNT', 1: 'CREDIT_ACCOUNT', 2: 'CREDIT_ACCOUNT', 3: 'SMALL_LOAN', 4: 'SMALL_LOAN', 5: 'SMALL_LOAN', 6: 'SMALL_LOAN', 7: 'SMALL_LOAN', 8: 'SMALL_LOAN'}} 
df = pd.DataFrame(d)

for idx, group in df.groupby('comb_indx'):
    print(idx)
    display(group)
    print(group.to_dict()) #if you want a dict

如果pandas不是一个选项,那么一个简单的循环呢?

from collections import defaultdict

d = {'comb_indx': {0: '3925220EE', 1: '3925220EE', 2: '3925220EE', 3: '66478EE', 4: '66478EE', 5: '66478EE', 6: '42300EE', 7: '42300EE', 8: '42300EE'}, 'country': {0: 'EE', 1: 'EE', 2: 'EE', 3: 'EE', 4: 'EE', 5: 'EE', 6: 'EE', 7: 'EE', 8: 'EE'}, 'type': {0: 'CREDIT_ACCOUNT', 1: 'CREDIT_ACCOUNT', 2: 'CREDIT_ACCOUNT', 3: 'SMALL_LOAN', 4: 'SMALL_LOAN', 5: 'SMALL_LOAN', 6: 'SMALL_LOAN', 7: 'SMALL_LOAN', 8: 'SMALL_LOAN'}} 
group_by = 'comb_indx'

indexes = set(d[group_by].values()) #unique 'comb_indx'
grouped = []
for i in indexes:
    keys = [k for k,v in d[group_by].items() if v == i] #get keys at index
    group = defaultdict(dict)
    for col in d:
        for k in keys:
            group[col].update({k:d[col][k]})

    grouped.append(dict(group))
        
for _d in grouped:
    print(_d)

{'comb_indx': {3: '66478EE', 4: '66478EE', 5: '66478EE'}, 'country': {3: 'EE', 4: 'EE', 5: 'EE'}, 'type': {3: 'SMALL_LOAN', 4: 'SMALL_LOAN', 5: 'SMALL_LOAN'}}
{'comb_indx': {6: '42300EE', 7: '42300EE', 8: '42300EE'}, 'country': {6: 'EE', 7: 'EE', 8: 'EE'}, 'type': {6: 'SMALL_LOAN', 7: 'SMALL_LOAN', 8: 'SMALL_LOAN'}}
{'comb_indx': {0: '3925220EE', 1: '3925220EE', 2: '3925220EE'}, 'country': {0: 'EE', 1: 'EE', 2: 'EE'}, 'type': {0: 'CREDIT_ACCOUNT', 1: 'CREDIT_ACCOUNT', 2: 'CREDIT_ACCOUNT'}}

相关问题