我做了一些pandas读/写函数,使得我的整数现在由于某种原因变成了浮点数。Open,5,0,-1,-1
Open,5.0,0.0,-1.0,-1.0
我不确定是哪个函数在我不知情的情况下专门更改了我的数据类型。现在我的数据无法通过elasticsearch解析。
下面是我的代码示例,其中函数发生。
def format_location(location):
if location and isinstance(location, str) and re.match(r'^\d+$', location): # Check if the cell contains only digits
if len(location) == 3:
return f"9{location}2" # Prepend '9' and append '2' to 3-digit numbers
elif len(location) == 5:
return location # Return the original 5-digit value
return location # Return the original value for non-numeric or incorrect length values
def drop_extra_column(input_directory, output_directory):
# Check all files in the input directory
for file in input_directory.iterdir():
if file.suffix == '.csv':
try:
# Open the input CSV file and create an output file in the output directory
with open(file, 'r', newline='') as input_file, \
open(os.path.join(output_directory, f"{file.name}"), 'w', newline='') as output_file:
reader = csv.reader(input_file)
writer = csv.writer(output_file)
for row in reader:
# Check if the row has more than 18 columns
if len(row) > 18:
row.pop(18) # Remove the 19th column (index 18)
writer.writerow(row)
# print(f"Processed data saved to '{file.name}'")
except Exception as e:
print(f"Error processing file '{file.name}': {e}. Skipping this file.")
continue
def process_csv_files(intermediate_directory, output_directory):
# Check all files in the input directory
for file in intermediate_directory.iterdir():
if file.suffix == '.csv':
try:
# Read CSV file using Pandas
df = pd.read_csv(file)
# Apply formatting to 'location' column only for numeric cells
if 'location' in df.columns:
df['location'] = df['location'].apply(format_location)
# Define the output file path
output_path = output_directory / f"{file.name}"
# Write back to the CSV file in the output directory using Pandas
df.to_csv(output_path, index=False)
print(f"Formatted data saved to '{output_path}'")
except Exception as e:
print(f"Error processing file '{file.name}': {e}. Skipping this file.")
continue
字符串
1条答案
按热度按时间yhqotfr81#
我设法解决了我的问题,迫使那些麻烦的列与
字符串