pandas Excel文件的Python问题,列不是并排书写的

tf7tbtn2  于 2023-05-05  发布在  Python


def import_files():
    template_sheet2 = template_wb["TT Matrix"]
    global dfs2
    dfs2 = []
    for excelfile in excelfiles:
        df2 = pd.read_excel(excelfile, sheet_name="TT Matrix", header=None, usecols=lambda col: col not in range(3) and col not in range(-4, 0))
    #concatenates the dataframe
    combined_df2 = pd.concat(dfs2, ignore_index=True, axis=1)
    start_col = 4 #starting from column D
    matrix_cols = combined_df2.columns[start_col-4:-4] #get columns starting             from column D, dropping last 4 columns
    for j, col_name in enumerate(matrix_cols):
        col_letter = get_column_letter(j+start_col) #gets the column letter
        template_sheet2[col_letter + "1"].value = col_name #writes column headers

    row_index = 1
    for df in dfs2:
        df = df.iloc[:, :-4] #drop last 4 columns
        df = df.loc[:, df.notna().any()] #only select columns with non-null values
        matrix_cols = df.columns #update matrix_cols to include only selected columns
        for i, row in df.iterrows():
            for j, col_name in enumerate(matrix_cols):
                col_letter = get_column_letter(j+start_col) #gets the column letter
                template_sheet2[col_letter + str(row_index)].value = row[col_name] #copies the data
            row_index += 1

代码更长,但我只添加了与我的问题相关的部分。其他代码不会干扰此工作表。我需要所有的列都写在前28行。我已经尝试在df = df.loc[:, df.notna().any()]上添加axis=1,但这不起作用,并产生布尔错误。This is the desired outputThis is the actual output我该如何解决这个问题?谢谢!



TLDR:combined_df = pd.concat(dfs, axis=1)

import pandas as pd

n_files = 5
# Placeholder for the list of paths to your files
paths = [f'path_to_file{i}.xlsx' for i in range(n_files)]

# Load the data into a list of dataframes
dfs = [pd.read_excel(path) for path in paths]


import numpy as np

m_rows = 28

# create a random generator with a fixed seed for reproducibility
rng = np.random.default_rng(seed=42)

columns = [
    ['A', 'B', 'C'],
    ['E', 'F'],
    ['G', 'H', 'I', 'J'],

# create a pandas dataframe with random data and three columns
dfs = [pd.DataFrame(rng.random((m_rows, len(cols))), columns=cols) for cols in columns]


combined_df = pd.concat(dfs, axis=1)
