R语言 列名中包含日期的条件求和

owfi6suc  于 2023-04-03  发布在  其他
关注(0)|答案(3)|浏览(84)

我想根据r中指定的日期计算条件和。我的示例df是

start_date = c("7/24/2017", "7/1/2017", "7/25/2017")
end_date   = c("7/27/2017", "7/4/2017", "7/28/2017")
`7/23/2017` = c(1,5,1)
`7/24/2017` = c(2,0,2)
`7/25/2017` = c(0,0,10)
`7/26/2017` = c(2,2,2)
`7/27/2017` = c(0,0,0)
df = data.frame(start_date,end_date,`7/23/2017`,`7/24/2017`,`7/25/2017`,`7/26/2017`,`7/27/2017`)

在Excel中,它看起来像:

我想根据 * 列A和B* 中指定的日期执行 * 列H* 中指定的计算,该计算是 * 列C到G* 的条件总和
显然,Excel允许列为日期,但不允许R。

8yparm6h

8yparm6h1#

#wide to long format
  dat <- reshape(df, direction="long", varying=list(names(df)[3:7]), v.names="Value", 
          idvar=c("start_date","end_date"), timevar="Date",
          times=seq(as.Date("2017/07/23"),as.Date("2017/07/27"), "day"))

  #convert from factor to date class
  dat$end_date <- as.Date(dat$end_date, format = "%m/%d/%Y")
  dat$start_date <- as.Date(dat$start_date, format = "%m/%d/%Y")

  library(dplyr)
  dat %>% group_by(start_date, end_date) %>% 
          mutate(mval = ifelse(between(Date, start_date, end_date), Value, 0)) %>% 
          summarise(conditional_sum=sum(mval)) 

# # A tibble: 3 x 3 
# # Groups:   start_date [?] 
#   start_date   end_date conditional_sum 
#       <date>     <date>           <dbl> 
# 1 2017-07-01 2017-07-04               0 
# 2 2017-07-24 2017-07-27               4 
# 3 2017-07-25 2017-07-28              12
nwlqm0z1

nwlqm0z12#

您可以通过以下方式实现:

# number of trailing columns without numeric values
c = 2

# create a separate vector with the dates
dates = as.Date(gsub("X","",tail(colnames(df),-c)),format="%m.%d.%Y")

# convert date columns in dataframe
df$start_date = as.Date(df$start_date,format="%m/%d/%Y")
df$end_date = as.Date(df$end_date,format="%m/%d/%Y")

# calculate sum
sapply(1:nrow(df),function(x) {y = df[x,(c+1):ncol(df)][dates %in% 
  seq(df$start_date[x],df$end_date[x],by="day")  ]; ifelse(length(y)>0,sum(y),0) })

退货:

[1]  4  0 12

希望这有帮助!

9ceoxa92

9ceoxa923#

以下是一个dplyr管道中的解决方案:

library(dplyr)
library(lubridate)
library(tidyr)

df %>%
  gather(date, value, -c(1, 2)) %>%
  mutate(date = gsub('X', '', date)) %>%
  mutate(date = gsub('\\.', '/', date)) %>%
  mutate(date = mdy(date)) %>%
  filter(date >= mdy(start_date) & date <=mdy(end_date)) %>%
  group_by(start_date, end_date) %>%
  summarize(Conditional_Sum = sum(value)) %>%
  right_join(df) %>%
  mutate(Conditional_Sum = ifelse(is.na(Conditional_Sum), 0, Conditional_Sum)) %>%
  select(-one_of('Conditional_Sum'), one_of('Conditional_Sum'))

##   start_date  end_date X7.23.2017 X7.24.2017 X7.25.2017 X7.26.2017 X7.27.2017 Conditional_Sum
##       <fctr>    <fctr>      <dbl>      <dbl>      <dbl>      <dbl>      <dbl>           <dbl>
## 1  7/24/2017 7/27/2017          1          2          0          2          0               4
## 2   7/1/2017  7/4/2017          5          0          0          2          0               0
## 3  7/25/2017 7/28/2017          1          2         10          2          0              12

相关问题