我有一个 Dataframe 与一些列的特殊字符。我想用一个函数在每列中迭代来删除列中的那些字符
import pandas as pd
import numpy as np
# 40 rows of complete dataframe
data = {'Region': ['East', 'South', 'South', 'North', 'North', 'South', 'East', 'North', 'North', 'West ', 'West ', 'South', 'North', 'East', 'South', 'South', 'North', 'East', 'South', 'North', 'East', 'North', 'West ', 'North', 'East', 'South', 'West ', 'North', 'South', 'West ', 'West ', 'South', 'South', 'South', 'East', 'East', 'South', 'West ', 'East', 'South'],
'District': ['Kono', 'Bo', 'Pujehun', 'Port Loko', 'Koinadugu', 'Bo', 'Kono', 'Koinadugu', 'Koinadugu', 'Western Urban', 'Western Rural ', 'Pujehun', 'Bombali', 'Kono', 'Pujehun', 'Pujehun', 'Port Loko', 'Kenema', 'Bo', 'Koinadugu', 'Kono', 'Koinadugu', 'Western Urban', 'Koinadugu', 'Kailahun', 'Bo', 'Western Urban', 'Bombali', 'Bo', 'Western Rural ', 'Western Urban', 'Pujehun', 'Pujehun', 'Moyamba', 'Kono', 'Kenema', 'Bo', 'Western Urban', 'Kailahun', 'Bo'],
'Chiefdom': ['Nimiyama', 'Bo Town', 'Barri', 'Lokomasama', 'Nieni', 'Bo Town', 'Gbense', 'Diang', 'Dembelia-Sinkunia', 'West III', 'Waterloo Rural', 'Makpele', 'Sella Limba', 'Nimikoro', np.nan, 'Pejeh(Futa peje', 'Lokomasama', 'Kandu Leppiama', 'Bo Town', 'Sengbe', 'Gbane Kandor', 'Nieni', 'West III', 'Nieni', 'Mandu', 'Valunia', 'East III', 'Sella Limba', 'Bo Town', 'Mountain Rural', 'West I', 'Kpaka', 'Makpele', 'Ribbi', 'Nimikoro', 'Gaura', 'Bo Town', 'West II', 'Mandu', 'Bo Town'],
'Section': ['Sewafeh', 'Second Bongay street coronation field', 'Sembehun Barri', 'Raka', 'Bandakarifya', 'J 11 Nikibu Section', 'Hill station', 'Kondaibaia', 'Gbindi Central', 'Aberdeen', 'Bass Street', 'Dombu', 'Kathantha Yimbor', 'jiama', 'Mandaima', 'Njagbema', 'mapang', 'Baoma oil mill', 'Gbonda Road Coronation Field', 'Dankawalie', 'Fanda Viĺlage', 'Kumala', 'Lumley', 'Kumala', 'Mobai', 'Baomahun', 'Jalloh terrace', 'Kabba Ferry', 'Tenp.nant Street Niagboima', 'Regent', 'Siaka Steven Stadium', 'Soso', 'Jorporwahun', 'foryah village', 'Yendema', 'Perrie', 'Forth Street', 'Assesion Twon', 'Yoyah', 'Cee Line'],
'wp Name': ['sokoya', 'More Rogers compound', 'Sembehun drilled well', 'community well', 'Bandakarifya Health facility hand pump', 'Mr Bangle', 'Kamara family', 'Hand dug well', 'Samba Water well', '9 Kimbima road', 'indian mac', 'Guboi Community', 'Wcsl pri School well kathantha.', 'public stand pipe', 'Mandaima', 'Njagbema Community', 'school well', 'CHC', 'More Sarah water point', 'mamayeria water well', 'Strime', 'Kumala community tap', '16 E Freetown Road', 'Kumala community tap', 'Tobu', 'Baomahun community water', 'Amidu Bah water point 12 city road', 'Kabba ferry com well 3', 'SALWACO Water Point', '7 Gloucester Road Regent', 'Stadium Hostels', 'Sowa compand', 'Kallonla', 'foryah community well', 'Hand dug well', 'Lanssana Jumu', 'Forth Street', 'Gray Bush', 'community pump', 'Private'],
'Latitude': [8.58331956, 7.954980701482942, 7.40037027, 8.707599, 9.26926281, 7.959563252331055, 8.64424692, 9.38030499, 9.91149193, 8.49867403, 8.32211437, 7.31525401, 9.54705837071811, 8.54967284, 7.44251431, 7.50650933, 8.778933309645701, 8.04202, 7.956939396996258, 9.63100657, 8.64097544, 9.06444497, 8.45597167, 9.06357503, 7.99293476, 8.41437496755645, 8.464235217224767, 9.586626399662924, 7.954810813602998, 8.43581747, 8.48045449, 7.29593482, 7.34045448, 8.291899626644375, 8.57198629, 7.660193333333332, 7.955800507520509, 8.481692669842559, 7.93007759, 7.952044236023938],
'Longitude': [-11.22175896, -11.738578683547583, -11.38590836, -12.99610243, -11.19311269, -11.749480176861558, -10.95233617, -11.56538161, -11.4472028, -13.28776409, -13.07541178, -11.26317706, -12.170908329699612, -11.08732622, -11.74650559, -11.52460214, -13.092156324868172, -11.328288333333337, -11.740514057032058, -11.32949058, -10.59901501, -11.40249037, -13.27166235, -11.40196698, -10.75225434, -11.669409575710674, -13.181634554478194, -12.217858620215289, -11.749735856785582, -13.21830899, -13.24944949, -11.65502583, -11.33420895, -12.77962831263424, -11.09309991, -11.079631666666668, -11.74833729662934, -13.24936134294358, -10.76663342, -11.733189998375863],
'Elevation': [328.0, 150.3, 80.0, 58.0, 505.0, 132.3, 421.0, 394.0, 388.0, 51.0, 54.8, 172.0, 129.3, 413.0, 94.0, 113.0, 31.5, 141.6, 193.3, 457.0, 264.0, 441.0, 58.0, 428.0, 223.6, 147.5, 120.3, 42.7, 124.2, 409.0, 31.0, 57.0, 127.0, -74.9, 432.0, 103.8, 152.5, 87.3, 253.0, 231.3],
'Type of water point': ['11:Dam/pan (runoff harvesting)', '9:Unprotected dug well', '3:Tube well or borehole', '2:Protected dug well', '2:Protected dug well', '9:Unprotected dug well', '2:Protected dug well', '2:Protected dug well', '2:Protected dug well', '7:Piped water into dwelling/plot/yard', '2:Protected dug well', '2:Protected dug well', '2:Protected dug well', '6:Public tap/standpipe (stand-alone or water kiosk', '1:Protected spring', '2:Protected dug well', '2:Protected dug well', '2:Protected dug well', '9:Unprotected dug well', '2:Protected dug well', '12:Surface water (lake/river/stream)', '6:Public tap/standpipe (stand-alone or water kiosk', '7:Piped water into dwelling/plot/yard', '6:Public tap/standpipe (stand-alone or water kiosk', '6:Public tap/standpipe (stand-alone or water kiosk', '9:Unprotected dug well', '2:Protected dug well', '2:Protected dug well', '2:Protected dug well', '7:Piped water into dwelling/plot/yard', '6:Public tap/standpipe (stand-alone or water kiosk', '2:Protected dug well', '2:Protected dug well', '2:Protected dug well', '3:Tube well or borehole', '6:Public tap/standpipe (stand-alone or water kiosk', '9:Unprotected dug well', '6:Public tap/standpipe (stand-alone or water kiosk', '2:Protected dug well', '9:Unprotected dug well'],
'Water point Functionality': ['1:Yes – Functional (and in use)', '1:Yes – Functional (and in use)', '1:Yes – Functional (and in use)', '1:Yes – Functional (and in use)', '4:No - Broken down', '1:Yes – Functional (and in use)', '1:Yes – Functional (and in use)', '4:No - Broken down', '1:Yes – Functional (and in use)', '1:Yes – Functional (and in use)', '3:Yes - But damaged', '1:Yes – Functional (and in use)', '2:Yes – Functional (but not in use)', '1:Yes – Functional (and in use)', '4:No - Broken down', '4:No - Broken down', '4:No - Broken down', '1:Yes – Functional (and in use)', '1:Yes – Functional (and in use)', '1:Yes – Functional (and in use)', '6:No - Under rehabilitation', '4:No - Broken down', '1:Yes – Functional (and in use)', '4:No - Broken down', '1:Yes – Functional (and in use)', '1:Yes – Functional (and in use)', '1:Yes – Functional (and in use)', '4:No - Broken down', '1:Yes – Functional (and in use)', '1:Yes – Functional (and in use)', '1:Yes – Functional (and in use)', '2:Yes – Functional (but not in use)', '1:Yes – Functional (and in use)', '3:Yes - But damaged', '1:Yes – Functional (and in use)', '3:Yes - But damaged', '1:Yes – Functional (and in use)', '1:Yes – Functional (and in use)', '1:Yes – Functional (and in use)', '1:Yes – Functional (and in use)'],
'Water Sustainability': ['Seasonal', 'Seasonal', 'Seasonal', 'Seasonal', 'Seasonal', 'Seasonal', 'Always water', 'Always water', 'Always water', 'Always water', 'Seasonal', 'Seasonal', 'Seasonal', 'Always water', 'Dry always / Never water', 'Always water', 'Seasonal', 'Seasonal', 'Seasonal', 'Seasonal', 'Seasonal', 'Always water', 'Always water', 'Always water', 'Seasonal', 'Always water', 'Always water', 'Dry always / Never water', 'Seasonal', 'Seasonal', 'Always water', 'Seasonal', 'Seasonal', 'Always water', 'Always water', 'Seasonal', 'Seasonal', 'Always water', 'Seasonal', 'Seasonal'],
'WP_suitability': ['1:Yes', '1:Yes', '1:Yes', '1:Yes', '2:No', '1:Yes', '1:Yes', '2:No', '1:Yes', '1:Yes', '1:Yes', '1:Yes', '1:Yes', '1:Yes', '2:No', '1:Yes', '1:Yes', '1:Yes', '1:Yes', '1:Yes', '1:Yes', '2:No', '1:Yes', '2:No', '1:Yes', '1:Yes', '1:Yes', '9999:Uknown', '1:Yes', '1:Yes', '1:Yes', '1:Yes', '1:Yes', '1:Yes', '1:Yes', '1:Yes', '1:Yes', '1:Yes', '1:Yes', '1:Yes'],
'Year of construction': [1990, 2003, 2016, 1984, 2010, 2006, 2011, 2013, 1998, 2011, 2013, 2005, 2013, 1986, 2000, 2008, 2013, 2015, 2008, 1985, 1970, 1991, 2013, 1991, 1982, 2010, 2000, 2005, 2009, 2010, 1979, 2014, 2014, 1985, 2009, 1988, 2006, 2015, 2004, 2013],
'Ownership': ['1:Community', '8:Private Individual', '1:Community', '1:Community', '6:Health Facility', '8:Private Individual', '8:Private Individual', '1:Community', '8:Private Individual', '8:Private Individual', '8:Private Individual', '1:Community', '5:School', '1:Community', '5:School', '1:Community', '5:School', '6:Health Facility', '8:Private Individual', '1:Community', '1:Community', '1:Community', '8:Private Individual', '1:Community', '1:Community', '8:Private Individual', '8:Private Individual', '1:Community', '8:Private Individual', '8:Private Individual', '3:GUMA', '1:Community', '1:Community', '1:Community', '1:Community', '1:Community', '8:Private Individual', '1:Community', '1:Community', '8:Private Individual'],
'Maintenance in charge': ['1:WASH management committee', '3:Privat owner', '2:Other community group', '2:Other community group', '2:Other community group', '3:Privat owner', '3:Privat owner', '1:WASH management committee', '3:Privat owner', '3:Privat owner', '3:Privat owner', '2:Other community group', '10:NGO', '1:WASH management committee', '10:NGO', '2:Other community group', '1:WASH management committee', '7:Health Facility', '3:Privat owner', '1:WASH management committee', '11:No management', '2:Other community group', '3:Privat owner', '2:Other community group', '2:Other community group', '3:Privat owner', '3:Privat owner', '10:NGO', '3:Privat owner', '3:Privat owner', '5:GUMA', '1:WASH management committee', '10:NGO', '2:Other community group', '2:Other community group', '1:WASH management committee', '3:Privat owner', '1:WASH management committee', '1:WASH management committee', '3:Privat owner'],
'WASH management committee?': ['1:Yes', '2:No', '2:No', '2:No', '2:No', '2:No', '2:No', '1:Yes', '2:No', '2:No', '2:No', '2:No', '1:Yes', '1:Yes', '1:Yes', '2:No', '1:Yes', '1:Yes', '2:No', '1:Yes', '2:No', '2:No', '2:No', '2:No', '1:Yes', '2:No', '2:No', '3:Unknown', '2:No', '2:No', '2:No', '2:No', '3:Unknown', '1:Yes', '2:No', '1:Yes', '2:No', '1:Yes', '1:Yes', '2:No'],
'Proximity of spare part supplier': [0, 42, 180, 1000, 200, 45, 22, 90, 26, 80, 2, 5000, 60, 35, 800, 60, 1200, 10, 40, 5, 2, 200, 10, 200, 40, 138, 60, 48, 45, 45, 30, 56, 410, 0, 30, 180, 35, 5, 98, 3],
'declared ODF?': ['No', 'No', 'No', 'Yes', 'No', 'No', 'Yes', 'No', 'Yes', 'No', "Don't know", 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', "Don't know", 'No', 'Yes', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', "Don't know", 'No', 'Yes', "Don't know", 'No', 'No', "Don't know", 'No', 'No', 'No', 'No']}
wp_clean = pd.DataFrame(data)
cols_x = wp_clean[['Region','District','Type of water point', 'Water point Functionality','WP_suitability',
'Ownership','Maintenance in charge','WASH management committee?']]
# top 5 rows of columns to clean
Region District Type of water point Water point Functionality WP_suitability Ownership Maintenance in charge WASH management committee?
0 East Kono 11:Dam/pan (runoff harvesting) 1:Yes – Functional (and in use) 1:Yes 1:Community 1:WASH management committee 1:Yes
1 South Bo 9:Unprotected dug well 1:Yes – Functional (and in use) 1:Yes 8:Private Individual 3:Privat owner 2:No
2 South Pujehun 3:Tube well or borehole 1:Yes – Functional (and in use) 1:Yes 1:Community 2:Other community group 2:No
3 North Port Loko 2:Protected dug well 1:Yes – Functional (and in use) 1:Yes 1:Community 2:Other community group 2:No
4 North Koinadugu 2:Protected dug well 4:No - Broken down 2:No 6:Health Facility 2:Other community group 2:No
上面是一个数据框,包含所有带有特殊字符的列
我尝试了这个代码,它的工作,但我想要一个单一的功能,因为这看起来很混乱。
wp_clean['Type of water point'] = wp_clean['Type of water point'].str.split(':', n=1).str.get(-1)
wp_clean['Water point Functionality'] = wp_clean['Water point Functionality'].str.split(':', n=1).str.get(-1)
wp_clean['WP_suitability'] = wp_clean['WP_suitability'].str.split(':', n=1).str.get(-1)
wp_clean['Ownership'] = wp_clean['Ownership'].str.split(':', n=1).str.get(-1)
wp_clean['Maintenance in charge'] = wp_clean['Maintenance in charge'].str.split(':', n=1).str.get(-1)
wp_clean['WASH management committee?'] = wp_clean['WASH management committee?'].str.split(':', n=1).str.get(-1)
1条答案
按热度按时间7cjasjjr1#
将
.apply
与lambda
函数配合使用,可将函数应用于多个列。结果