我有多个JSON对象的DataFrame:JSON DataFrame。我想删除.
之前的所有内容或所有.
之前的内容,只保留最后一个.
之后的字符串。我该如何为此编写Python脚本?我想使用split函数,但不确定如何为DataFrame JSON对象实现它,因为我得到了ValueError: Length mismatch: Expected axis has 75 elements, new values have 2 elements
。以下是我的剧本:
import pandas as pd
import os
from openpyxl.workbook import Workbook
import csv
from pathlib import Path
from pandas.io.json import json_normalize
import json
from datetime import datetime
from datetime import date
from datetime import timedelta
import psycopg2
from psycopg2 import OperationalError
# Import our files
import pg # Various functions to interface with the Postgres servers
from db_creds import * # The DB server and user creds
#try:
# Connect to an existing database
connection = pg.create_connection(sourceDB_setting[3], sourceDB_setting[5], sourceDB_setting[6], sourceDB_setting[1], sourceDB_setting[2])
#Create a cursor to perform database operations
cursor = connection.cursor()
cursor.execute("SELECT application_data, id FROM ssap_applications LIMIT 2;")
results = cursor.fetchall()
if os.path.exists('App_data3.csv'):
os.remove('App_data3.csv')
for row in results:
jrec, app_id = row
# Process each row here
#print(jrec)
jrec = json.loads(jrec)
normal_json = pd.json_normalize(jrec)
normal_json.columns = normal_json.columns.str.split('.')[-1]
print(normal_json.columns)
# save to csv
normal_json.to_csv('App_data3.csv', header=True, index=False, mode='a', encoding='utf-8')
cursor.close()
FYI,这是我的DataFrame JSON对象:
Index(['singleStreamlinedApplication.applicationType',
'singleStreamlinedApplication.ApplicationType',
'singleStreamlinedApplication.applicationStatus',
'singleStreamlinedApplication.coverageYear',
'singleStreamlinedApplication.eligibilityStatus',
'singleStreamlinedApplication.currentPage',
'singleStreamlinedApplication.maxAchievedPage',
'singleStreamlinedApplication.finalized',
'singleStreamlinedApplication.acceptPrivacyIndicator',
'singleStreamlinedApplication.ssapApplicationId',
'singleStreamlinedApplication.IP',
'singleStreamlinedApplication.applicationGuid',
'singleStreamlinedApplication.applicationSignatureDate',
'singleStreamlinedApplication.getHelpIndicator',
'singleStreamlinedApplication.helpType',
'singleStreamlinedApplication.authorizedRepresentative.signatureisProofIndicator',
'singleStreamlinedApplication.authorizedRepresentative.phone',
'singleStreamlinedApplication.authorizedRepresentative.partOfOrganizationIndicator',
'singleStreamlinedApplication.authorizedRepresentative.address.postalCode',
'singleStreamlinedApplication.authorizedRepresentative.address.county',
'singleStreamlinedApplication.authorizedRepresentative.address.primaryAddressCountyFipsCode',
'singleStreamlinedApplication.authorizedRepresentative.address.streetAddress1',
'singleStreamlinedApplication.authorizedRepresentative.address.streetAddress2',
'singleStreamlinedApplication.authorizedRepresentative.address.state',
'singleStreamlinedApplication.authorizedRepresentative.address.city',
'singleStreamlinedApplication.authorizedRepresentative.address.countyCode',
'singleStreamlinedApplication.authorizedRepresentative.address.addressId',
'singleStreamlinedApplication.authorizedRepresentative.name.middleName',
'singleStreamlinedApplication.authorizedRepresentative.name.lastName',
'singleStreamlinedApplication.authorizedRepresentative.name.firstName',
'singleStreamlinedApplication.authorizedRepresentative.name.suffix',
'singleStreamlinedApplication.authorizedRepresentative.emailAddress',
'singleStreamlinedApplication.authorizedRepresentative.organizationId',
'singleStreamlinedApplication.authorizedRepresentative.companyName',
'singleStreamlinedApplication.authorizedRepresentative.hasUserElectronicallySigned',
'singleStreamlinedApplication.applyingForhouseHold',
'singleStreamlinedApplication.applyingForFinancialAssistanceIndicator',
'singleStreamlinedApplication.consentAgreement',
'singleStreamlinedApplication.agreeToUseIncomeData',
'singleStreamlinedApplication.agreeToEndCoverage',
'singleStreamlinedApplication.numberOfYearsAgreed',
'singleStreamlinedApplication.authorizedRepresentativeIndicator',
'singleStreamlinedApplication.applicationStartDate',
'singleStreamlinedApplication.applicationDate',
'singleStreamlinedApplication.taxFilerDependants',
'singleStreamlinedApplication.isRidpVerified',
'singleStreamlinedApplication.clientIp',
'singleStreamlinedApplication.primaryTaxFilerPersonId',
'singleStreamlinedApplication.broker.internalBrokerId',
'singleStreamlinedApplication.broker.brokerFederalTaxIdNumber',
'singleStreamlinedApplication.broker.brokerName',
'singleStreamlinedApplication.broker.brokerFirstName',
'singleStreamlinedApplication.broker.brokerLastName',
'singleStreamlinedApplication.assister.assisterName',
'singleStreamlinedApplication.assister.assisterFirstName',
'singleStreamlinedApplication.assister.assisterLastName',
'singleStreamlinedApplication.assister.assisterID',
'singleStreamlinedApplication.assister.internalAssisterId',
'singleStreamlinedApplication.taxHousehold',
'singleStreamlinedApplication.incarcerationAsAttestedIndicator',
'singleStreamlinedApplication.mecCheckStatus',
'singleStreamlinedApplication.jointTaxFilerSpousePersonId',
'singleStreamlinedApplication.currentApplicantId',
'singleStreamlinedApplication.ssapVersion',
'singleStreamlinedApplication.exemptHousehold',
'singleStreamlinedApplication.brokerChanged',
'singleStreamlinedApplication.renewalConsentChanged',
'singleStreamlinedApplication.incomeConsentChanged',
'singleStreamlinedApplication.isEditApplication',
'singleStreamlinedApplication.manuallyCreated',
'singleStreamlinedApplication.claimNoticeDMAHSAcknowledgment',
'singleStreamlinedApplication.paybackDMAHSAcknowledgment',
'singleStreamlinedApplication.uiProperties',
'singleStreamlinedApplication.externalApplicationId',
'singleStreamlinedApplication.outboundCaseNumber'],
dtype='object')
1条答案
按热度按时间lpwwtiir1#
你已经接近了,你只需要在 *
[-1]
之前加上str
*:输出: