如何解决在循环中使用自定义函数时Pandas dataframe中的“KeyError”问题?

xwbd5t1u  于 2023-06-04  发布在  其他
关注(0)|答案(1)|浏览(181)

无法找出在循环中使用的已定义函数的问题。
我正在尝试开发的程序是关于供应链管理的,涉及计划和管理交付和仓库容量。我写了一个在主代码之外工作的函数,但是当我试图在主过程中使用它时,它会遇到错误。假设有一个名为df的pandas dataframe,如下所示:

df = 
{'Name': {0: 'a', 1: 'c', 2: 'j', 3: 'd', 4: 'e'},
 'Type': {0: 1, 1: 1, 2: 1, 3: 2, 4: 2},
 'Number of Beams': {0: 60, 1: 60, 2: 60, 3: 60, 4: 60},
 'Number of Columns': {0: 25, 1: 25, 2: 25, 3: 25, 4: 25},
 'Total Weight': {0: 120, 1: 125, 2: 130, 3: 145, 4: 145},
 'Warehouse1 Distance to Site Location': {0: 968, 1: 447, 2: 580, 3: 245, 4: 100},
 'Warehouse2 Distance to Site Location': {0: 220, 1: 513, 2: 123, 3: 35, 4: 940},
 'Warehouse3 Distance to Site Location': {0: 215, 1: 617, 2: 319, 3: 175, 4: 228},
 'Distance to Site Location': {0: 215, 1: 447, 2: 123, 3: 35, 4: 100},
 'Date of Registeration': {0: 0, 1: 0, 2: 0, 3: 0, 4: 0},
 'Earliest Time to Deliver': {0: 8, 1: 9, 2: 7, 3: 8, 4: 8},
 'Latest Time to Deliver': {0: 10, 1: 10, 2: 11, 3: 12, 4: 9},
 'Frame Cost': {0: 3720, 1: 3875, 2: 4030, 3: 4495, 4: 4495},
 'Transportation Cost': {0: 516, 1: 1117.5, 2: 319.8, 3: 101.5, 4: 290},
 'Date of Delivery': {0: 8, 1: 9, 2: 7, 3: 8, 4: 8}}

有一些变量需要定义如下:

current_day = 0
Charge = [250, 130, 140, 200]
Capacities = {1:{"Warehouse1":250, "Warehouse2":250, "Warehouse3":250}, 
              2:{"Warehouse1":130, "Warehouse2":130, "Warehouse3":130}, 
              3:{"Warehouse1":140, "Warehouse2":140, "Warehouse3":140},
              4:{"Warehouse1":200, "Warehouse2":200, "Warehouse3":200}}
Daily_Capacities = []
Indicator = 0

我之前提到的函数是:

def Select_Warhouse(df, li):
    # Initialize variables
    min_col = None
    min_val = float('inf')
    results = []

    # Loop over dictionaries
    for dict_item in li:
        for key in dict_item.keys():
            # Check if key is a partial match with any column name
            matching_col = [col for col in df.columns if key in col]
            if matching_col:
                # Retrieve column name and value
                col_name = matching_col[0]
                col_val = df.loc[0, col_name]
                results.append((col_name, col_val))
                # Update minimum value if necessary
                if col_val < min_val:
                    min_col = col_name
                    min_val = col_val
    output = [el for el in li if list(el.keys())[0] in min_col.split()[0]]
    return str(*output[0].keys())

最后,试图确定交付的可行分配计划的循环是这样的:

while len(df) > 0:
    
    if current_day % 15 == 0 and current_day != 0 and Indicator == 0:
        for i, j in zip(Capacities, Charge):
            for key in Capacities[i]:
                Capacities[i][key] = Capacities[i][key] + j
    
    Indicator = 0
    
    select = df[df['Date of Delivery'] == current_day]
    if len(select) == 0:
        Daily_Capacities.append(copy.deepcopy(Capacities))        
        current_day += 1
        continue
    
    if len(select) > 1:
        Indicator = 1
        select = select[select['Total Weight'] == select['Total Weight'].max()]
    
    if len(select) > 1:
        select = select[select['Latest Time to Deliver'] == select['Latest Time to Deliver'].min()]
    
    if len(select) > 1:
        select = select.sample()
  
    Available_Warehouses = []
    for key, value in Capacities[select['Type'].iloc[0]].items():
        if select['Total Weight'].iloc[0] <= value:
                Available_Warehouses.append({key:value})
    
    if len(Available_Warehouses) == 0:
        df.loc[select.index, 'Date of Delivery'] += 1
        if Indicator == 1:
            continue        
        Daily_Capacities.append(copy.deepcopy(Capacities))
        current_day += 1
        continue
                
    elif len(Available_Warehouses) == 1:
        Warehouse = str(*Available_Warehouses[0].keys())
        
    elif len(Available_Warehouses) > 1:
        Warehouse = Select_Warhouse(select, Available_Warehouses)
                        
    Feasible_Distribution_Plan = Feasible_Distribution_Plan.append(select.iloc[0], ignore_index= True)
    df.drop(select.index, inplace= True)
    Capacities[select['Type'].iloc[0]][Warehouse] -= select.iloc[0]['Total Weight']
    
    if Indicator == 1:
        continue
    
    for key in dailycapacities:
        Daily_Capacities.append(copy.deepcopy(Capacities))        
    current_day += 1             

Feasible_Distribution_Plan['Date of Delivery'] = Feasible_Distribution_Plan['Date of Delivery'].astype('int32')
        
Feasible_Distribution_Plan

当我运行程序时,它面临一个错误:

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   3801             try:
-> 3802                 return self._engine.get_loc(casted_key)
   3803             except KeyError as err:

~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

KeyError: 0

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
<ipython-input-8-61768d41f965> in <module>
     41 
     42     elif len(Available_Warehouses) > 1:
---> 43         Warehouse = Select_Warhouse(select, Available_Warehouses)
     44 
     45     Feasible_Distribution_Plan = Feasible_Distribution_Plan.append(select.iloc[0], ignore_index= True)

<ipython-input-7-54081f8dbf92> in Select_Warhouse(df, li)
     13                 # Retrieve column name and value
     14                 col_name = matching_col[0]
---> 15                 col_val = df.loc[0, col_name]
     16                 results.append((col_name, col_val))
     17                 # Update minimum value if necessary

~\anaconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
   1064             key = tuple(com.apply_if_callable(x, self.obj) for x in key)
   1065             if self._is_scalar_access(key):
-> 1066                 return self.obj._get_value(*key, takeable=self._takeable)
   1067             return self._getitem_tuple(key)
   1068         else:

~\anaconda3\lib\site-packages\pandas\core\frame.py in _get_value(self, index, col, takeable)
   3922             #  results if our categories are integers that dont match our codes
   3923             # IntervalIndex: IntervalTree has no get_loc
-> 3924             row = self.index.get_loc(index)
   3925             return series._values[row]
   3926 

~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   3802                 return self._engine.get_loc(casted_key)
   3803             except KeyError as err:
-> 3804                 raise KeyError(key) from err
   3805             except TypeError:
   3806                 # If we have a listlike key, _check_indexing_error will raise

KeyError: 0

我对python有点陌生,不能解决这个问题。函数在循环外工作,但在循环内不工作。你知道吗?ps:如果你对程序逻辑有任何疑问,请随时提出。

carvr3hs

carvr3hs1#

当你在循环中使用func时,你试图访问索引= 0的df.loc[0, col_name],它可能不在循环中传递的 Dataframe 中。为了解决这个问题,你应该将定义函数中的df.loc[0, col_name]行改为df.iloc[0][col_name]。此外,在你的代码中,没有dailycapacities,但你在循环的最后一个for中使用了它。那个也需要修理一下

相关问题