当我调用将两个DataFrame合并在一起时,我总是得到与我试图进行内部合并的DataFrame相同的输出。我让这个函数为另一个做同样事情的函数工作,但是具有不同的两个 Dataframe 的集合。(我处理健康数据,因此我尝试自动编辑并合并这些文件,以使我的新用户(ef_in)与系统中当前的所有用户相对应(ul_in),以及从在ul_in上但不在ef_in上的用户禁用它们)。
def client_merge(ef_in, ul_in):
# ef_in = pd. read_csv("COC_Ready_EF.csv", sep=',')
# ul_in = pd.read_csv("Ready UserList.csv", sep=',')
pd.set_option('mode.chained_assignment', None)
ef_in['UniqueID'] = ef_in['UniqueID'].astype(object)
ef_in['ZipCode'] = ef_in['ZipCode'].astype(object)
ef_in['HireDate'] = ef_in['HireDate'].astype(object)
ef_in['DateOfBirth'] = ef_in['DateOfBirth'].astype(object)
ul_in['UniqueID'] = ul_in['UniqueID'].astype(object)
ul_in['Action'] = ul_in['Action'].astype(object)
ul_in['ZipCode'] = ul_in['ZipCode'].astype(object)
df = pd.concat(([ef_in, ul_in]), axis=0, ignore_index=True, sort=False)
df.drop_duplicates(subset=['UniqueID'], keep=False, inplace=True)
new_users = df.merge(ef_in)
disable_users = df.merge(ul_in)
disable_users['Action'].fillna('Disable', inplace=True)
ready_to_print_file = pd.concat([new_users, disable_users], ignore_index=False)
rtpf1 = ready_to_print_file[ready_to_print_file["FirstName"].str.contains("Admin") == False]
rtpf2 = rtpf1[rtpf1["FirstName"].str.contains("Clarks") == False]
rtpf3 = rtpf2[rtpf2["FirstName"].str.contains("Test") == False]
rtpf3.to_csv(path, header=True, index=False)
我从这得到ul_in:
def client_ul_formatter(in_file):
pd.set_option('mode.chained_assignment', None)
in_file = pd.read_csv(in_file, sep=',')
df = in_file[['FirstName', 'LastName', 'Region', 'UniqueID', 'DateOfBirth', 'Gender',
'ZipCode', 'Email', 'Role', 'HireDate', 'Company', 'Action']]
return df
我的名字来自:
def rotate_date(strg, n):
return strg[n:] + strg[:n]
def client_ef_formatter(input_file):
pd.set_option('mode.chained_assignment', None)
input_file = pd.read_csv(input_file, sep=',',
dtype={'HIREDATE': str, 'DATE OF BIRTH': str})
df = input_file[['LAST NAME', 'FIRST AND MIDDLE', 'DATE OF BIRTH', 'GENDER', 'RELATIONSHIP',
'HIREDATE', 'ZIP', 'ALT ID', 'EMAIL ADDRESS']]
df = df.drop(df[df['RELATIONSHIP'] != 'E'].index)
df = df.drop(['RELATIONSHIP'], axis=1)
p = -1
hdf = []
for _ in (df['HIREDATE']):
p = (p + len(df['HIREDATE']) - (len(df['HIREDATE']) - 1))
hd = df['HIREDATE'].iloc[p]
f = rotate_date(hd, -4)
hdf.append(f)
q = -1
ddf = []
for _ in (df['DATE OF BIRTH']):
q = (q + len(df['DATE OF BIRTH']) - (len(df['DATE OF BIRTH']) - 1))
dob = df['DATE OF BIRTH'].iloc[q]
f = rotate_date(dob, -4)
ddf.append(f)
df['HIREDATE'] = hdf
df['DATE OF BIRTH'] = ddf
df['DATE OF BIRTH'] = pd.to_datetime(df['DATE OF BIRTH'], errors='coerce', format='%m%d%Y')
df['HIREDATE'] = pd.to_datetime(df['HIREDATE'], errors='coerce', format='%m%d%Y')
df.rename(columns={'HIREDATE': 'HireDate', 'LAST NAME': 'LastName', 'FIRST AND MIDDLE': 'FirstName',
'DATE OF BIRTH': 'DateOfBirth', 'ALT ID': 'UniqueID', 'GENDER': 'Gender',
'ZIP': 'ZipCode', 'EMAIL ADDRESS': 'Email'}, inplace=True)
df['Region'] = pd.Series(dtype=str)
df['Role'] = pd.Series(dtype=str)
df['Company'] = pd.Series(dtype=str)
df['Action'] = pd.Series(dtype=str)
df = df.reindex(columns=['FirstName', 'LastName', 'Region', 'UniqueID', 'DateOfBirth', 'Gender', 'ZipCode', 'Email',
'Role', 'HireDate', 'Company', 'Action'])
df['Company'].fillna('client_account', inplace=True)
df['Role'].fillna('Employee On Plan', inplace=True)
df.to_csv(path, header=True, index=False)
这让我停了下来。我很不确定,因为它不是在抛出错误。
1条答案
按热度按时间f5emj3cl1#
必须将两个传入 Dataframe 的“UniqueID”转换为浮点数,而不是类型(对象)。