如何将每组的系数相乘,然后计算R中原始值的百分比

mwkjh3gx  于 2023-05-11  发布在  其他
关注(0)|答案(3)|浏览(128)

我有一个类似下面的玩具数据集的数据集:

s=structure(list(month_id = c(202306L, 202305L, 202307L, 202305L, 
202306L, 202307L), MDM_Key = c(1L, 1L, 1L, 2L, 2L, 2L), sale_count = c(NA, 
19161L, NA, 17726L, NA, NA), iek_max_discount = c(0.5356, 0.5256, 
0.5456, 0.559, 0.569, 0.589)), class = "data.frame", row.names = c(NA, 
-6L))

另一个数据集的系数

cf=structure(list(MDM_Key = 1:2, ML.coef = c(1.46, 1.67)), class = "data.frame", row.names = c(NA, 
-2L))

iek_max_discount是百分比值,即不是0.5356,而是53.56
我需要每个mdm_key(组变量,来自cf)的每个系数乘以当前上个月(来自s数据集)的相应iek_max_discount(到相应mdm_key)值。
例如,现在的月份是五月。这意味着对于mdm_key =1,我们取系数1.46并乘以June的值(month_id=2023061,46*53,56=78)。
结果值也是百分比;现在,让我们计算初始值sale_count的78%的比例是多少(它也仅针对当月呈现)。19161/100*78=14945
然后,结果必须添加到19161和总和插入到六月。
类似地,我们为7月(month_id=202307)执行此操作;乘以系数1,46*54,56=79。计算比例:19161/100*79=15137
我们将结果值添加到19161,并将结果插入到7月份。
例如,mdm_key=1的所需输出

month_id    MDM_Key sale_count  iek_max_discount    percent prop    final
202306           1               0,5356                  78 14945   34106
202305           1  19161        0,5256         
202307           1               0,5456                  79 15137   34298

在对mdm_key=2(或任何其他mdm_key,如果有的话)执行相同的过程之后
做这样一系列算术运算的最好和最简单的方法是什么?

js4nwp54

js4nwp541#

假设每个 MDM_Key 的 * 销售_count* 总是存在第一个月

library(dplyr)

merge(s, cf) %>% 
  arrange(MDM_Key, month_id) %>% 
  mutate(ML.coef = if_else(!is.na(sale_count), NA, ML.coef), 
         percent = iek_max_discount * ML.coef * 100, 
         prop = (percent / 100) * sale_count[1], 
         final = sale_count[1] + prop, .by = MDM_Key)
  MDM_Key month_id sale_count iek_max_discount ML.coef percent     prop
1       1   202305      19161           0.5256      NA      NA       NA
2       1   202306         NA           0.5356    1.46 78.1976 14983.44
3       1   202307         NA           0.5456    1.46 79.6576 15263.19
4       2   202305      17726           0.5590      NA      NA       NA
5       2   202306         NA           0.5690    1.67 95.0230 16843.78
6       2   202307         NA           0.5890    1.67 98.3630 17435.83
     final
1       NA
2 34144.44
3 34424.19
4       NA
5 34569.78
6 35161.83
axkjgtzd

axkjgtzd2#

library(dplyr)

cur_month <- as.integer(202305)

s %>% 
  right_join(cf, by = "MDM_Key") %>% 
  mutate(percent = floor(if_else(month_id == cur_month, 
                           NA, ML.coef * iek_max_discount * 100)),
         prop = floor(percent * sale_count[month_id == cur_month]/100),
         final = sale_count[month_id == cur_month] + prop, 
         .by = MDM_Key)
#>   month_id MDM_Key sale_count iek_max_discount ML.coef percent  prop final
#> 1   202306       1         NA           0.5356    1.46      78 14945 34106
#> 2   202305       1      19161           0.5256    1.46      NA    NA    NA
#> 3   202307       1         NA           0.5456    1.46      79 15137 34298
#> 4   202305       2      17726           0.5590    1.67      NA    NA    NA
#> 5   202306       2         NA           0.5690    1.67      95 16839 34565
#> 6   202307       2         NA           0.5890    1.67      98 17371 35097
rn0zuynd

rn0zuynd3#

我有点难以理解你想做什么。以下是我最好的猜测:

# merge them together to have the key in the same row
scf <- merge(s, cf, by = "MDM_Key")

# split into lists by each unique MDM_Key
s_by_key <- split(scf, s$MDM_Key)

# loop through each element of the list
for(i in 1:length(s_by_key)){

  # trunc to remove the decimals
  s_by_key[[i]]$percent <- trunc(s_by_key[[i]]$iek_max_discount *  s_by_key[[i]]$ML.coef * 100)
  
  # get the most recent sales month
  months_with_sales <- which(!is.na(s_by_key[[i]]$sale_count))
  latest_month_with_sales <- which.max(s_by_key[[i]]$month_id[months_with_sales])
  
  last_sales <- s_by_key[[i]][latest_month_with_sales, "sale_count"]
  s_by_key[[i]]$prop <- trunc(last_sales / 100 * s_by_key[[i]]$percent)
  s_by_key[[i]]$final <- last_sales + s_by_key[[i]]$prop
  
  # and set those rows to NA to be like the desired output
  s_by_key[[i]][latest_month_with_sales, c("percent","prop","final")] <- NA
  
}

output <- do.call(rbind, s_by_key)
output
#    MDM_Key month_id sale_count iek_max_discount ML.coef percent  prop final
#1.1       1   202306         NA           0.5356    1.46      78 14945 34106
#1.2       1   202305      19161           0.5256    1.46      NA    NA    NA
#1.3       1   202307         NA           0.5456    1.46      79 15137 34298
#2.4       2   202305      17726           0.5590    1.67      NA    NA    NA
#2.5       2   202306         NA           0.5690    1.67      95 16839 34565
#2.6       2   202307         NA           0.5890    1.67      98 17371 35097

相关问题