R中的For循环提取单行中唯一引用的所有行值

2g32fytz  于 2023-06-19  发布在  其他
关注(0)|答案(1)|浏览(84)

我有一个数据集,有5列(Ref,A,B,C,D)和2行(1,2),但在每行2下有多行从0到n值为这4列的每一行。我用R写了一个for循环代码来提取所有行的值并将它们粘贴到行上。但是我的代码跳过了第0行的值。
Excel data entry formatDataset
文件(数据集、输出、支持文件)可在此处找到:[ https://drive.google.com/drive/folders/1wfBs9oTpHuxhLeXvjLp_sLnvnZSO7Bk9?usp=sharing ]
任何帮助将不胜感激。谢谢你

library(dplyr)

# Import the data from the CSV file
data <- read.csv("/file path/data.csv")

EUR <- data.frame(Ref = data$Value[data$Column == "A" & data$Row == '1'])

Investment_strategies <- data[data$Row == '2', ]
Investment_strategies <- Investment_strategies[Investment_strategies$Line >= 0, ]
line_count <- length(Investment_strategies[, "Row"])

line_num <- 0
for (line in seq_len(line_count)) {
  col_suffix <- paste0("_", line_num)

temp_col_A <- Investment_strategies[Investment_strategies$Column == "A" & Investment_strategies$Row=='2'& Investment_strategies$Line == line, ][match(EUR[,"Ref"], 
              Investment_strategies[Investment_strategies$Column=="A" & Investment_strategies$Row=='2'& Investment_strategies$Line == line, 'Ref']), 'Value']
  
temp_col_B <- Investment_strategies[Investment_strategies$Column == "B" & Investment_strategies$Row=='2'& Investment_strategies$Line == line, ][match(EUR[,"Ref"], 
              Investment_strategies[Investment_strategies$Column=="B" & Investment_strategies$Row=='2'& Investment_strategies$Line == line, 'Ref']), 'Value']
  
temp_col_C <- Investment_strategies[Investment_strategies$Column == "C" & Investment_strategies$Row=='2'& Investment_strategies$Line == line, ][match(EUR[,"Ref"], 
              Investment_strategies[Investment_strategies$Column=="C" & Investment_strategies$Row=='2'& Investment_strategies$Line == line, 'Ref']), 'Value']
  
temp_col_D <- Investment_strategies[Investment_strategies$Column == "D" & Investment_strategies$Row=='2'& Investment_strategies$Line == line, ][match(EUR[,"Ref"], 
              Investment_strategies[Investment_strategies$Column=="D" & Investment_strategies$Row=='2'& Investment_strategies$Line == line, 'Ref']), 'Value']

  if (any(!is.na(temp_col_A)) || any(!is.na(temp_col_B)) || any(!is.na(temp_col_C)) || any(!is.na(temp_col_D))) {
    EUR[, paste0("Strategy", col_suffix)] <- temp_col_A
    EUR[, paste0("Primary", col_suffix)] <- temp_col_B
    EUR[, paste0("Value", col_suffix)] <- temp_col_C
    EUR[, paste0("Other Strategy", col_suffix)] <- temp_col_D
    
    line_num <- line_num + 1
  }
  
  if (line_num > line_count) {
    break
  }
}

write.csv(EUR, "/file path/output.csv", row.names = FALSE)
jv4diomz

jv4diomz1#

library(dplyr)

# Import the data from the CSV file
data <- read.csv("/file path/data.csv")

EUR <- data.frame(Ref = data$Value[data$Column == "A" & data$Row == '1'])

Investment_strategies <- data[data$Row == '2', ]
Investment_strategies <- Investment_strategies[Investment_strategies$Line >= 0, ]
line_count <- length(Investment_strategies[, "Row"])

line_num <- 0
temp_col_A <- vector("list", line_count)
temp_col_B <- vector("list", line_count)

for (line in seq_len(line_count)) {
  col_suffix <- paste0("_", line_num)

  temp_col_A[[line]] <- Investment_strategies[Investment_strategies$Column == "A" &
                                             Investment_strategies$Row == '2' &
                                             Investment_strategies$Line == line, ] %>%
    filter(Ref %in% EUR$Ref) %>%
    select(Value)

  temp_col_B[[line]] <- Investment_strategies[Investment_strategies$Column == "B" &
                                             Investment_strategies$Row == '2' &
                                             Investment_strategies$Line == line, ] %>%
    filter(Ref %in% EUR$Ref) %>%
    select(Value)

  line_num <- line_num + 1
}

# Combine the values from each line into a single row
combined_values_A <- do.call(cbind, temp_col_A)
combined_values_B <- do.call(cbind, temp_col_B)

# Rename the columns
colnames(combined_values_A) <- paste0("A", 1:line_count)
colnames(combined_values_B) <- paste0("B", 1:line_count)

# Combine the extracted values with the EUR data
result <- cbind(EUR, combined_values_A, combined_values_B)

更新后的代码创建了两个列表temp_col_A和temp_col_B,分别为每行存储A列和B列的值。代码遍历每一行,根据条件筛选数据,并将值存储在相应的列表中。最后,提取的值被合并成一行,列名称附加行号,然后与EUR数据合并。

相关问题