我试图将嵌套的JSON文件与列表和字典转换为 Dataframe 。到目前为止,我已经写了这段代码:
for result in rawData['results']:
openfda_data = result.get('openfda', {})
# Collect 'openfda' data
openfda_entries = {
key: [value for value in values] if isinstance(values, list) else [values]
for key, values in openfda_data.items()
}
# Flatten the 'submissions' data
for submission in result.get('submissions', []):
flattened_entry = {
'application_number': result['application_number'],
'sponsor_name': result['sponsor_name'],
'submission_type': submission.get('submission_type', None),
'submission_number': submission.get('submission_number', None),
'submission_status': submission.get('submission_status', None),
'submission_status_date': submission.get('submission_status_date', None),
'submission_class_code': submission.get('submission_class_code', None),
'submission_class_code_description': submission.get('submission_class_code_description', None)
}
# Add 'openfda' data
for key, values in openfda_entries.items():
flattened_entry[f'openfda_{key}'] = values # Store all values in a list
flattened_data.append(flattened_entry)
# Flatten the 'products' data
for product in result.get('products', []):
flattened_product = {
'application_number': result['application_number'],
'sponsor_name': result['sponsor_name'],
'product_number': product.get('product_number', None),
'reference_drug': product.get('reference_drug', None),
'brand_name': product.get('brand_name', None),
'active_ingredient_name': product['active_ingredients'][0].get('name', None),
'active_ingredient_strength': product['active_ingredients'][0].get('strength', None),
'reference_standard': product.get('reference_standard', None),
'dosage_form': product.get('dosage_form', None),
'route': product.get('route', None),
'marketing_status': product.get('marketing_status', None)
}
flattened_data.append(flattened_product)
字符串
这段代码能够flaten大部分的数据,以重新命名
问题在于openfda值。来自openfda条目的值在结果 Dataframe 中的列表中,如下所示:
[“首选关节炎8小时”、“醋氨酚- APAP 8小时”、“醋氨酚”、"关节炎疼痛缓解剂“、”首选关节炎疼痛缓解剂“、”醋氨酚- APAP关节炎“、”关节炎疼痛缓解剂“]
我还想把这些值变平,这样这个值也在列表之外。
"results": [
{
"submissions": [
{
"submission_type": "ORIG",
"submission_number": "1",
"submission_status": "AP",
"submission_status_date": "20021021"
},
{
"submission_type": "SUPPL",
"submission_number": "6",
"submission_status": "AP",
"submission_status_date": "20110610",
"submission_class_code": "LABELING",
"submission_class_code_description": "Labeling"
}
],
"application_number": "ANDA076177",
"sponsor_name": "DR REDDYS LABS SA",
"openfda": {
"application_number": [
"ANDA076177"
],
"brand_name": [
"CAMILA"
],
"generic_name": [
"NORETHINDRONE"
],
"manufacturer_name": [
"Mayne Pharma Inc."
],
"product_ndc": [
"51862-884"
],
"product_type": [
"HUMAN PRESCRIPTION DRUG"
],
"route": [
"ORAL"
],
"substance_name": [
"NORETHINDRONE"
],
"rxcui": [
"198042",
"748961",
"748962"
],
"spl_id": [
"022c6de4-4589-4d4c-8ac4-977c7e3ef568"
],
"spl_set_id": [
"a786be85-49ba-4369-b510-7dccc10f7f18"
],
"package_ndc": [
"51862-884-01",
"51862-884-03"
],
"nui": [
"M0447349",
"N0000175602"
],
"pharm_class_cs": [
"Progesterone Congeners [CS]"
],
"pharm_class_epc": [
"Progestin [EPC]"
],
"unii": [
"T18F433X4S"
]
},
"products": [
{
"product_number": "001",
"reference_drug": "No",
"brand_name": "CAMILA",
"active_ingredients": [
{
"name": "NORETHINDRONE",
"strength": "0.35MG"
}
],
"reference_standard": "No",
"dosage_form": "TABLET",
"route": "ORAL-28",
"marketing_status": "Prescription",
"te_code": "AB1"
}
]
},
{
"submissions": [
{
"submission_type": "SUPPL",
"submission_number": "37",
"submission_status": "AP",
"submission_status_date": "20200707",
"review_priority": "STANDARD",
"submission_class_code": "LABELING",
"submission_class_code_description": "Labeling"
},
{
"submission_type": "SUPPL",
"submission_number": "31",
"submission_status": "AP",
"submission_status_date": "20130910",
"review_priority": "STANDARD",
"submission_class_code": "LABELING",
"submission_class_code_description": "Labeling"
},
{
"submission_type": "SUPPL",
"submission_number": "13",
"submission_status": "AP",
"submission_status_date": "20080129",
"submission_class_code": "LABELING",
"submission_class_code_description": "Labeling"
},
{
"submission_type": "SUPPL",
"submission_number": "5",
"submission_status": "AP",
"submission_status_date": "20040817",
"submission_class_code": "LABELING",
"submission_class_code_description": "Labeling"
},
{
"submission_type": "SUPPL",
"submission_number": "10",
"submission_status": "AP",
"submission_status_date": "20070110",
"submission_class_code": "LABELING",
"submission_class_code_description": "Labeling"
}
],
"application_number": "ANDA076194",
"sponsor_name": "WATSON LABS",
"openfda": {
"application_number": [
"ANDA076194"
],
"brand_name": [
"LISINOPRIL AND HYDROCHLOROTHIAZIDE"
],
"generic_name": [
"LISINOPRIL AND HYDROCHLOROTHIAZIDE"
],
"manufacturer_name": [
"Actavis Pharma, Inc."
],
"product_ndc": [
"0591-0860",
"0591-0861",
"0591-0862"
],
"product_type": [
"HUMAN PRESCRIPTION DRUG"
],
"route": [
"ORAL"
],
"substance_name": [
"HYDROCHLOROTHIAZIDE",
"LISINOPRIL"
],
"rxcui": [
"197885",
"197886",
"197887"
],
"spl_id": [
"ff5e5610-775f-48a9-a410-e8d8b547b958"
],
"spl_set_id": [
"ab5e9ae4-e575-4a63-99f3-2bde36bdd508"
],
"package_ndc": [
"0591-0860-01",
"0591-0860-05",
"0591-0861-01",
"0591-0861-05",
"0591-0862-01",
"0591-0862-05"
],
"nui": [
"N0000175359",
"N0000175419",
"M0471776"
],
"pharm_class_pe": [
"Increased Diuresis [PE]"
],
"pharm_class_epc": [
"Thiazide Diuretic [EPC]"
],
"pharm_class_cs": [
"Thiazides [CS]"
],
"unii": [
"E7199S1YWR",
"0J48LPH2TH"
]
},
"products": [
{
"product_number": "003",
"reference_drug": "No",
"brand_name": "LISINOPRIL AND HYDROCHLOROTHIAZIDE",
"active_ingredients": [
{
"name": "HYDROCHLOROTHIAZIDE",
"strength": "12.5MG"
},
{
"name": "LISINOPRIL",
"strength": "10MG"
}
],
"reference_standard": "No",
"dosage_form": "TABLET",
"route": "ORAL",
"marketing_status": "Prescription",
"te_code": "AB"
},
{
"product_number": "001",
"reference_drug": "No",
"brand_name": "LISINOPRIL AND HYDROCHLOROTHIAZIDE",
"active_ingredients": [
{
"name": "HYDROCHLOROTHIAZIDE",
"strength": "12.5MG"
},
{
"name": "LISINOPRIL",
"strength": "20MG"
}
],
"reference_standard": "No",
"dosage_form": "TABLET",
"route": "ORAL",
"marketing_status": "Prescription",
"te_code": "AB"
},
{
"product_number": "002",
"reference_drug": "No",
"brand_name": "LISINOPRIL AND HYDROCHLOROTHIAZIDE",
"active_ingredients": [
{
"name": "HYDROCHLOROTHIAZIDE",
"strength": "25MG"
},
{
"name": "LISINOPRIL",
"strength": "20MG"
}
],
"reference_standard": "No",
"dosage_form": "TABLET",
"route": "ORAL",
"marketing_status": "Prescription",
"te_code": "AB"
}
]
}
]
型
我有办法做到吗?
1条答案
按热度按时间hmtdttj41#
我没有看到你提供的任何预期输出。但你可以使用这样的东西:
字符串
退出:
型
如果数据文件太大,我们可以尝试拆分框架:
型
或者我们可以把它添加到你写的代码中(我认为这是最快的方法):
型