python 求最小迭代

olhwl3o2  于 2023-03-06  发布在  Python
关注(0)|答案(2)|浏览(86)
sales_data = {
    'order_number': [1001, 1002, 1003, 1004, 1005],
    'order_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']
}

sales_data = pd.DataFrame(sales_data)

capacity_data = {
    'date': pd.date_range(start='2022-02-01', end='2022-02-28', freq='D'),
    'capacity': [0, 0, 0, 1, 1, 100, 110, 120, 130, 140, 150, 160, 170, 180, 
                 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310]
}

capacity_data = pd.DataFrame(capacity_data)

我想这样输出

output = {'order_number': [1001, 1002, 1003, 1004, 1005], 'confirmation_date':['2022-02-04', '2022-02-05', '2022-02-06', '2022-02-06', '2022-02-06']

我只想找到容量空闲的最近日期,如果空闲,则为下一次迭代减少一个容量。
这是我的剧本:

order_number = None
 confirmation_date = None
    
 grouped = sales_data['order_number'].unique()
   
    # Iterate over the groups and rows within each group
    for group in grouped:
        
            for order_row in range(len(capacity_data)):
                    if capacity_data['capacity'][order_row] > 0:
                        try:
                            order_number.append(group) 
                            confirmation_date.append(capacity_data['date'][order_row])
                            capacity_data['capacity'][order_row] = capacity_data['capacity'][order_row] - 1
                        except:
                             pass
                    else:
                        pass
    orderdict =  dict(zip(order_number, caonfirmation_date))

我还想问一下,是否有办法使此脚本在遍历超过100k行时更加优化

ut6juiuv

ut6juiuv1#

您可以这样做:

import pandas as pd
import numpy as np

sales_data = {
    'order_number': [1001, 1002, 1003, 1004, 1005],
    'order_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']
}

sales_data = pd.DataFrame(sales_data)

capacity_data = {
    'date': pd.date_range(start='2022-02-01', end='2022-02-28', freq='D'),
    'capacity': [0, 0, 0, 1, 1, 100, 110, 120, 130, 140, 150, 160, 170, 180, 
                 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 303]
}

capacity_data = pd.DataFrame(capacity_data)

confirmation_dates = []

for index, row in sales_data.iterrows():
    
    order_date = pd.to_datetime(row['order_date'])
    
    available_date = capacity_data[capacity_data['capacity'] > 0]['date'].sub(order_date).abs().idxmin()
    confirmation_dates.append(str(capacity_data.loc[available_date, 'date'].date()))
    capacity_data.loc[available_date, 'capacity'] -= 1

output = {'order_number': sales_data['order_number'].tolist(), 'confirmation_date': confirmation_dates}

print(output)

返回输出:

{'order_number': [1001, 1002, 1003, 1004, 1005], 'confirmation_date': ['2022-02-04', '2022-02-05', '2022-02-07', '2022-02-10', '2022-02-14']}

尝试在完整数据上使用它。如果速度太慢,请按以下方式使用apply()

import pandas as pd
import numpy as np

sales_data = {
    'order_number': [1001, 1002, 1003, 1004, 1005],
    'order_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']
}

sales_data = pd.DataFrame(sales_data)

capacity_data = {
    'date': pd.date_range(start='2022-02-01', end='2022-02-28', freq='D'),
    'capacity': [0, 0, 0, 1, 1, 100, 110, 120, 130, 140, 150, 160, 170, 180, 
                 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 303]
}

capacity_data = pd.DataFrame(capacity_data)

def find_confirmation_date(row):
    order_date = row['order_date']
    capacity_data_copy = capacity_data.copy()  # create a copy of the DataFrame
    available_dates = capacity_data_copy[capacity_data_copy['capacity'] >= 0]['date']
    closest_date = available_dates.iloc[(available_dates - pd.to_datetime(order_date)).abs().argsort()[0]]
    capacity_data_copy.loc[capacity_data_copy['date'] == closest_date, 'capacity'] -= 1
    return closest_date.strftime('%Y-%m-%d')

output = sales_data.apply(find_confirmation_date, axis=1)
output_dict = {'order_number': sales_data['order_number'].to_list(), 'confirmation_date': output.tolist()}

这就给了你

{'order_number': [1001, 1002, 1003, 1004, 1005], 'confirmation_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']}
sbtkgmzw

sbtkgmzw2#

我想你是想在订单到达并且有货的情况下确认订单,你可以按照下面的代码来实现。

sales_data = {
    'order_number': [1001, 1002, 1003, 1004, 1005],
    'order_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']
}

sales_data = pd.DataFrame(sales_data)
sales_data['order_date'] = pd.to_datetime(sales_data['order_date'])

capacity_data = {
    'date': pd.date_range(start='2022-02-01', end='2022-02-28', freq='D'),
    'capacity': [0, 0, 0, 1, 1, 100, 110, 120, 130, 140, 150, 160, 170, 180, 
                 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 200]
}

capacity_data = pd.DataFrame(capacity_data)

order_number = []
confirmation_date = []

# Iterate over the groups and rows within each group
for sales_idx in range(len(sales_data)):
    
    sales_row = sales_data.iloc[sales_idx]
    
    for capacity_idx in range(len(capacity_data)):
            
        capacity_row = capacity_data.iloc[capacity_idx]
            
        if capacity_row[0] < sales_row[1] or capacity_row[1]<=0:
            continue
        
        order_number.append(sales_row[0])
        confirmation_date.append(capacity_row[0])
        
        capacity_data._set_value(capacity_idx,'capacity',capacity_row[1]-1)
        break

final_data = {
    'order_number':order_number,
    'confirm_date':confirmation_date
}

final_data = pd.DataFrame(final_data)

print(final_data)

相关问题