是否有一个R函数可以删除行和列以生成对称方阵?

kfgdxczn  于 2023-07-31  发布在  其他
关注(0)|答案(2)|浏览(94)

我有一个网络矩阵为每个参与者在单独的Excel表。然而,这些矩阵的格式不正确,因为它们当前是不对称的。
是否可以删除额外的行和列来创建对称矩阵?也就是说,现在的数据是这样的:

df <- data.frame(
  Receiver = c("Receiver1", "Receiver2","Receiver3", "Receiver4",
              "Receiver5", "Receiver6","Receiver7", "Receiver8",
              "Receiver9", "Receiver10"),
  Sender1 = c(0,1,0,0,0,1,0,0,0,0),
  Sender2 = c(0,0,0,1,0,0,0,0,0,0),
  Sender3 = c(0,0,0,0,0,0,0,0,0,0),
  Sender4 = c(0,1,0,0,0,0,1,0,0,0),
  Sender5 = c(1,0,1,0,0,1,0,0,0,0),
  Sender6 = c(0,1,0,0,0,0,1,0,0,0),
  Sender7 = c(0,0,1,0,1,0,0,0,0,0),
  Sender8 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
  Sender9 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
  Sender10 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA)
)

字符串
这就是我在寻找的:

df_clean <- data.frame(
  Receiver = c("Receiver1", "Receiver2","Receiver3", "Receiver4",
              "Receiver5", "Receiver6","Receiver7"),
  Sender1 = c(0,1,0,0,0,1,0),
  Sender2 = c(0,0,0,1,0,0,0),
  Sender3 = c(0,0,0,0,0,0,0),
  Sender4 = c(0,1,0,0,0,0,1),
  Sender5 = c(1,0,1,0,0,1,0),
  Sender6 = c(0,1,0,0,0,0,1),
  Sender7 = c(0,0,1,0,1,0,0))


1.有没有一个for循环,我可以创建,使它这样做,每一个参与者在每个单独的excel工作表,我导入到R?
我有200名参与者这样做,所以我正在寻找一个自动化的方法,而不是这样做一个接一个。
提前感谢!

wfauudbj

wfauudbj1#

df1 <- Filter(\(x)!all(is.na(x)), df)
subset(df1, Receiver %in%sub("Sender", "Receiver", names(df1)[-1]))

   Receiver Sender1 Sender2 Sender3 Sender4 Sender5 Sender6 Sender7
1 Receiver1       0       0       0       0       1       0       0
2 Receiver2       1       0       0       1       0       1       0
3 Receiver3       0       0       0       0       1       0       1
4 Receiver4       0       1       0       0       0       0       0
5 Receiver5       0       0       0       0       0       0       1
6 Receiver6       1       0       0       0       1       0       0
7 Receiver7       0       0       0       1       0       1       0

字符串

yhuiod9q

yhuiod9q2#

如果我理解正确的话,这个问题有很多可能的解决方法(例如:这里有一些删除“所有NA”列的选项:Remove columns from dataframe where ALL values are NA)。
使用dplyr包中的函数的一个潜在解决方案是:

library(dplyr)
df_clean <- df %>%
  select(where(~any(!is.na(.)))) %>%
  slice_head(n = ncol(.) - 1)

字符串
对于循环,你可以制作一个 Dataframe 列表并循环遍历它们,例如。
示例数据:

df1 <- data.frame(
  Receiver = c("Receiver1", "Receiver2","Receiver3", "Receiver4",
               "Receiver5", "Receiver6","Receiver7", "Receiver8",
               "Receiver9", "Receiver10"),
  Sender1 = c(0,1,0,0,0,1,0,0,0,0),
  Sender2 = c(0,0,0,1,0,0,0,0,0,0),
  Sender3 = c(0,0,0,0,0,0,0,0,0,0),
  Sender4 = c(0,1,0,0,0,0,1,0,0,0),
  Sender5 = c(1,0,1,0,0,1,0,0,0,0),
  Sender6 = c(0,1,0,0,0,0,1,0,0,0),
  Sender7 = c(0,0,1,0,1,0,0,0,0,0),
  Sender8 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
  Sender9 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
  Sender10 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA)
)

# make df2 different to df1 (add more NAs)
df2 <- data.frame(
  Receiver = c("Receiver1", "Receiver2","Receiver3", "Receiver4",
               "Receiver5", "Receiver6","Receiver7", "Receiver8",
               "Receiver9", "Receiver10"),
  Sender1 = c(0,1,0,0,0,1,0,0,0,NA),
  Sender2 = c(0,0,0,1,0,0,0,0,0,NA),
  Sender3 = c(0,0,0,0,0,0,0,0,0,NA),
  Sender4 = c(0,1,0,0,0,0,1,0,0,NA),
  Sender5 = c(1,0,1,0,0,1,0,0,0,NA),
  Sender6 = c(0,1,0,0,0,0,1,0,0,NA),
  Sender7 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
  Sender8 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
  Sender9 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
  Sender10 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA)
)


for循环代码示例:

library(dplyr, warn = FALSE)

# Collate all dataframes you want to 'clean' into a list
list_of_dfs <- mget(grep("^df.*", names(which(unlist(eapply(.GlobalEnv,is.data.frame)))), 
                         value = TRUE))

# create a list to store the results
clean_dfs <- list()

# run the for-loop
for (i in seq_along(list_of_dfs)) {
  clean_dfs[[i]] <- list_of_dfs[[i]] %>%
    select(where(~any(!is.na(.)))) %>%
    slice_head(n = ncol(.) - 1)
}

# Name each cleaned dataframe with the prefix "clean_"
names(clean_dfs) <- paste0("clean_", names(list_of_dfs))

# Export them to your work space
list2env(clean_dfs, envir = .GlobalEnv)
#> <environment: R_GlobalEnv>

# View them
print(clean_df1)
#>    Receiver Sender1 Sender2 Sender3 Sender4 Sender5 Sender6 Sender7
#> 1 Receiver1       0       0       0       0       1       0       0
#> 2 Receiver2       1       0       0       1       0       1       0
#> 3 Receiver3       0       0       0       0       1       0       1
#> 4 Receiver4       0       1       0       0       0       0       0
#> 5 Receiver5       0       0       0       0       0       0       1
#> 6 Receiver6       1       0       0       0       1       0       0
#> 7 Receiver7       0       0       0       1       0       1       0
print(clean_df2)
#>    Receiver Sender1 Sender2 Sender3 Sender4 Sender5 Sender6
#> 1 Receiver1       0       0       0       0       1       0
#> 2 Receiver2       1       0       0       1       0       1
#> 3 Receiver3       0       0       0       0       1       0
#> 4 Receiver4       0       1       0       0       0       0
#> 5 Receiver5       0       0       0       0       0       0
#> 6 Receiver6       1       0       0       0       1       0


创建于2023-07-28使用reprex v2.0.2

编辑

我只是重新读了你的问题,看到你想这样做200次;如果你有200个单独的xlsx文件(每个文件1个工作表),你可以读取每个工作表,“清理”它,然后将它写成csv文件:

library(tidyverse)
library(readxl)
library(fs)

path <- dir_ls(path = "~/Desktop", glob = "*.xlsx")
list_of_dfs <- map(path, read_excel)

for (i in seq_along(list_of_dfs)) {
  list_of_dfs[[i]] %>%
    select(where(~any(!. == "NA"))) %>%
    slice_head(n = ncol(.) - 1) %>%
    write_csv(., paste0(names(list_of_dfs)[[i]], ".csv"))
}

dir_ls(path = "~/Desktop/", glob = "*.xlsx.csv")
#> /Users/jared/Desktop/df1.xlsx.csv /Users/jared/Desktop/df2.xlsx.csv


创建于2023-07-28使用reprex v2.0.2
不确定这是否对您的实际用例有帮助,但无论如何都想包含它,以防万一它对您有用。祝你好运!

相关问题