R语言 方便地移动列

uqdfh47h  于 2023-01-06  发布在  其他
关注(0)|答案(6)|浏览(138)

关于如何将列移到第一个或最后一个位置,有很棒的问题和答案。
使用dplyr,最佳答案分别模拟为:

iris2 <- iris %>% head(2)
iris2 %>% select( Sepal.Width, everything()) # move Sepal.Width to first
#   Sepal.Width Sepal.Length Petal.Length Petal.Width Species
# 1         3.5          5.1          1.4         0.2  setosa
# 2         3.0          4.9          1.4         0.2  setosa

iris2 %>% select(-Sepal.Width, Sepal.Width) # move Sepal.Width to last
#   Sepal.Length Petal.Length Petal.Width Species Sepal.Width
# 1          5.1          1.4         0.2  setosa         3.5
# 2          4.9          1.4         0.2  setosa         3.0

然而,我没有找到任何简单的方法来移动一列之后或之前的一个给定的。
预期产出:

iris2 %>% move_at(Species, Sepal.Width, side = "before") 
#   Sepal.Length Species Sepal.Width Petal.Length Petal.Width
# 1          5.1  setosa         3.5          1.4         0.2
# 2          4.9  setosa         3.0          1.4         0.2

iris2 %>% move_at(Species, Sepal.Width, side = "after")
#   Sepal.Length Sepal.Width Species Petal.Length Petal.Width
# 1          5.1         3.5  setosa          1.4         0.2
# 2          4.9         3.0  setosa          1.4         0.2
1hdlvixo

1hdlvixo1#

  • 更新:使用rlang::enquo我可以使它更好,然后使用@Zsombor的答案我可以使它更短,更优雅。旧的解决方案(在基地R)在答案的结尾 *
#' Move column or selection of columns
#'
#' Column(s) described by \code{cols} are moved before (default) or after the reference 
#'   column described by \code{ref}
#'
#' @param data A \code{data.frame}
#' @param cols unquoted column name or numeric or selection of columns using a select helper
#' @param ref unquoted column name
#' @param side \code{"before"} or \code{"after"}
#'
#' @return A data.frame with reordered columns
#' @export
#'
#' @examples
#' iris2 <- head(iris,2)
#' move(iris2, Species, Sepal.Width)
#' move(iris2, Species, Sepal.Width, "after")
#' move(iris2, 5, 2)
#' move(iris2, 4:5, 2)
#' move(iris2, one_of("Sepal.Width","Species"), Sepal.Width)
#' move(iris2, starts_with("Petal"), Sepal.Width)
move <- function(data, cols, ref, side = c("before","after")){
  if(! requireNamespace("dplyr")) 
    stop("Make sure package 'dplyr' is installed to use function 'move'")
  side <- match.arg(side)
  cols <- rlang::enquo(cols)
  ref  <- rlang::enquo(ref)
  if(side == "before") 
    dplyr::select(data,1:!!ref,-!!ref,-!!cols,!!cols,dplyr::everything()) 
  else
    dplyr::select(data,1:!!ref,-!!cols,!!cols,dplyr::everything())
}

示例:

iris2 %>% move(Species, Sepal.Width)
#   Sepal.Length Species Sepal.Width Petal.Length Petal.Width
# 1          5.1  setosa         3.5          1.4         0.2
# 2          4.9  setosa         3.0          1.4         0.2

iris2 %>% move(Species, Sepal.Width, "after")
#   Sepal.Length Sepal.Width Species Petal.Length Petal.Width
# 1          5.1         3.5  setosa          1.4         0.2
# 2          4.9         3.0  setosa          1.4         0.2

iris2 %>% move(5, 2)
#   Sepal.Length Species Sepal.Width Petal.Length Petal.Width
# 1          5.1  setosa         3.5          1.4         0.2
# 2          4.9  setosa         3.0          1.4         0.2

iris2 %>% move(4:5, 2)
#   Sepal.Length Petal.Width Species Sepal.Width Petal.Length
# 1          5.1         0.2  setosa         3.5          1.4
# 2          4.9         0.2  setosa         3.0          1.4

iris2 %>% move(one_of("Sepal.Width","Species"), Sepal.Width)
#   Sepal.Length Sepal.Width Species Petal.Length Petal.Width
# 1          5.1         3.5  setosa          1.4         0.2
# 2          4.9         3.0  setosa          1.4         0.2

iris2 %>% move(starts_with("Petal"), Sepal.Width)
#   Sepal.Length Petal.Length Petal.Width Sepal.Width Species
# 1          5.1          1.4         0.2         3.5  setosa
# 2          4.9          1.4         0.2         3.0  setosa
  • 旧解决方案 *

下面是一个简单的解决方案,只使用基本R编程:

move_at <- function(data, col, ref, side = c("before","after")){
  side = match.arg(side)
  col_pos <- match(as.character(substitute(col)),names(data))
  ref_pos <- match(as.character(substitute(ref)),names(data))
  sorted_pos <- c(col_pos,ref_pos)
  if(side =="after") sorted_pos <- rev(sorted_pos)
  data[c(setdiff(seq_len(ref_pos-1),col_pos),
         sorted_pos,
         setdiff(seq_along(data),c(seq_len(ref_pos),col_pos)))]
}

iris2 %>% move_at(Species, Sepal.Width)
#   Sepal.Length Species Sepal.Width Petal.Length Petal.Width
# 1          5.1  setosa         3.5          1.4         0.2
# 2          4.9  setosa         3.0          1.4         0.2

iris2 %>% move_at(Species, Sepal.Width, "after")
#   Sepal.Length Sepal.Width Species Petal.Length Petal.Width
# 1          5.1         3.5  setosa          1.4         0.2
# 2          4.9         3.0  setosa          1.4         0.2
vuktfyat

vuktfyat2#

不管原始的列顺序如何,这似乎都能起作用(感谢@Moody_Mudskipper的评论):

iris %>% select(1:Sepal.Width, -Species, Species, everything()) %>% head(2)
#>   Sepal.Length Sepal.Width Species Petal.Length Petal.Width
#> 1          5.1         3.5  setosa          1.4         0.2
#> 2          4.9         3.0  setosa          1.4         0.2
iris %>% select(1:Sepal.Width, -Sepal.Width, -Species, Species, everything()) %>% head(2)
#>   Sepal.Length Species Sepal.Width Petal.Length Petal.Width
#> 1          5.1  setosa         3.5          1.4         0.2
#> 2          4.9  setosa         3.0          1.4         0.2
j2datikz

j2datikz3#

顺便说一句另一个解决办法是

library(tidyverse)
data(iris)

iris %>% 
  select(-Species) %>% 
  add_column(Specis = iris$Species, .before = "Petal.Length") %>% 
  head()

#>   Sepal.Length Sepal.Width Specis Petal.Length Petal.Width
#> 1          5.1         3.5 setosa          1.4         0.2
#> 2          4.9         3.0 setosa          1.4         0.2
#> 3          4.7         3.2 setosa          1.3         0.2
#> 4          4.6         3.1 setosa          1.5         0.2
#> 5          5.0         3.6 setosa          1.4         0.2
#> 6          5.4         3.9 setosa          1.7         0.4

reprex package(v0.2.0)于2018年8月31日创建。

klr1opcd

klr1opcd4#

我发现了一个有趣的函数(moveMe,由@A5C1D2H2I1M1N2O1R2T1编写),它非常适合这个问题:

source('https://raw.githubusercontent.com/mrdwab/SOfun/master/R/moveMe.R')

head(iris[ moveMe(names(iris), 'Species before Sepal.Width') ], 2)
#   Sepal.Length Species Sepal.Width Petal.Length Petal.Width
# 1          5.1  setosa         3.5          1.4         0.2
# 2          4.9  setosa         3.0          1.4         0.2

head(iris[ moveMe(names(iris), 'Species after Sepal.Width') ], 2)
#   Sepal.Length Sepal.Width Species Petal.Length Petal.Width
# 1          5.1         3.5  setosa          1.4         0.2
# 2          4.9         3.0  setosa          1.4         0.2

它还允许更复杂的指令:

head(iris[ moveMe(names(iris), 'Species after Sepal.Width; Petal.Width first; Sepal.Length last') ], 2)
#   Petal.Width Sepal.Width Species Petal.Length Sepal.Length
# 1         0.2         3.5  setosa          1.4          5.1
# 2         0.2         3.0  setosa          1.4          4.9
6uxekuva

6uxekuva5#

为了完成这些答案,有一个名为relocate()的函数,因为dplyr 1.0.0

library(dplyr)

iris %>% 
  head(n = 2) %>%
  relocate(Species, .before = Sepal.Width)
#>   Sepal.Length Species Sepal.Width Petal.Length Petal.Width
#> 1          5.1  setosa         3.5          1.4         0.2
#> 2          4.9  setosa         3.0          1.4         0.2

创建于2022年10月18日,使用reprex v2.0.2

x6492ojm

x6492ojm6#

另一个Base R解决方案(不确定它如何与其他解决方案进行基准测试)。

# Function to move column names before or after another column name: 
# .move_vec_name_to => function 
.move_vec_name_to <- function(vec_names, move_vec_name, near_vec_name, side = c("before", "after")){
  # Resolve the side to move the col vector to: 
  # .side => character scalar
  .side <- match.arg(side)
  # Resolve the number of column vectors: n => integer scalar
  n <- length(vec_names)
  # Resolve the index of the col vector to be moved: 
  # move_vec_idx => integer scalar
  move_vec_idx <- which(vec_names == move_vec_name)
  # Resolve the index of where the col vector is to be 
  # moved to: near_vec_idx => integer scalar
  near_vec_idx <- which(vec_names == near_vec_name)
  # If we want to move something before or after and there is no need:
  if((move_vec_idx <= near_vec_idx & .side == "before") || (near_vec_idx <= move_vec_idx && .side == "after")){
    # Keep the names the same: new_col_name_vec => character vector 
    new_col_name_vec <- vec_names
    # Otherwise: 
  }else{
    # Drop the name of the vector to be moved from the col 
    # name vector: vec_wo_move_vec => character vector
    vec_wo_move_vec <- vec_names[-move_vec_idx]
    # Resolve the new column name vector: 
    # if we want to move the column before a given col vector: 
    if(.side == "before"){
      # new_col_name_vec => character vector
      new_col_name_vec <- c(
        vec_wo_move_vec[seq_len(near_vec_idx - 1)], 
        move_vec_name,
        near_vec_name, 
        vec_wo_move_vec[seq(pmin(near_vec_idx + 1, n), length(vec_names))]
      )[seq_len(n)]
      # Otherwise if we want to move it after: 
    }else{
      # new_col_name_vec => character vector
      new_col_name_vec <- c(
        vec_wo_move_vec[seq_len(pmax(near_vec_idx-2, 0))], 
        near_vec_name, 
        move_vec_name,
        vec_wo_move_vec[seq(pmax(near_vec_idx, 1), n, 1)]
      )[seq_len(n)]
    }
  }
  # Explicitly define the returned object: 
  # character vector => env
  return(new_col_name_vec)
}

# Function to move multiple vector names to a certain side of another vector name: 
# .move_vec_names_to => function 
.move_vec_names_to <- function(vec_names, move_vec_names, near_vec_name, side = c("before", "after")){
  # Resolve the side: .side => character vector
  .side <- match.arg(side)
  # Reverse the input vectors to be moved: .move_vec_names => character vector
  .move_vec_names <- if(.side == "after"){
    rev(move_vec_names)
  }else{
    move_vec_names
  }
  # Set the termination case: 
  if(length(.move_vec_names) <= 1){
    # Return vector names with columns moved: character vector => env
    return(
      .move_vec_name_to(
        vec_names, 
        .move_vec_names, 
        near_vec_name, 
        .side
      )
    )
    # Otherwise: 
  }else{
    # Apply the column movement function recursively: 
    # character vector => env
    return(
      .move_vec_name_to(
        .move_vec_name_to(
          vec_names, 
          .move_vec_names[1], 
          near_vec_name, 
          .side
        ), 
        .move_vec_names[-1], 
        near_vec_name, 
        .side
      )
    )
  }
}

# Function to move column vector before or after another column vector: 
# move_to => function
move_to <- function(df, move_vec_name, near_vec_name, side = c("before", "after")){
  # Resolve the side to move the col vector to: 
  # .side => character scalar
  .side <- match.arg(side)
  # Apply vector name move function: df => data.frame
  df <- if(length(move_vec_name) > 1){
    df[,.move_vec_names_to(colnames(df), move_vec_name, near_vec_name, .side), drop = FALSE]
  }else{
    df[,.move_vec_name_to(colnames(df), move_vec_name, near_vec_name, .side), drop = FALSE]
  }
  # Explicitly define the returned object: 
  # data.frame => env
  return(df)
}

# Function to test the move_to user defined function: 
# test_single_col_move_to => function 
test_single_col_move_to <- function(df){
  # Import required pacakage:
  library(dplyr)
  # Generate a data.frame of test cases: 
  # test_val_df => data.frame
  test_val_df <- setNames(
    expand.grid(
      names(df), 
      names(df),
      c("before", "after")
    ),
    c(
      "move_vec",
      "near_vec", 
      "side"
    )
  )
  # Convert vals to chars: test_val_df => data.frame
  test_val_df[] <- lapply(
    test_val_df, 
    as.character
  )
  # Test all vector names in iris are in the resulting df 
  # and that all names are where they are supposed to be: 
  # test_vec_names => list of boolean vectors
  test_vec_names <- lapply(
    seq_len(
      nrow(test_val_df)
    ), 
    function(i){
      # Resolve the test values: 
      move_vec <- test_val_df[i, 1, drop = TRUE] 
      near_vec <- test_val_df[i, 2, drop = TRUE] 
      side <- test_val_df[i, 3, drop = TRUE]
      # Test 1 base R functionality:
      test1 <- names(
        move_to(
          df, 
          move_vec,
          near_vec,
          side
        )
      )
      # Test 2 base R functionality: 
      test2 <- df |> move_to(move_vec, near_vec, side) |> names()
      # Test 3 dplyr functionality:
      test3 <- df %>% move_to(move_vec, near_vec, side) %>% names
      # Test 4 dply functionality: 
      test4 <- df %>% move_to(., move_vec, near_vec, side) %>% names
      # Store all tests in a list: test_list => list of character vectors
      test_list <- list(test1, test2, test3, test4)
      # list of tests: list of lists of boolean vectors => env
      list(
        # Test all names in new col vectors are in df: 
        unlist(Map(function(x){all(x %in% names(df))}, test_list)),
        # Test befores & afters: 
        unlist(Map(function(y){
          ifelse(
            side == "before", 
            which(y == move_vec) <= which(y == near_vec), 
            which(y == move_vec) >= which(y == near_vec)
          )
        },
        test_list
        )
        )
      )
    }
  )
  # Resolve if all tests have been passed:
  # tests_passed => boolean scalar
  tests_passed <- all(unlist(test_vec_names))
  # Explicitly define returned argument: 
  # boolean scalar => env
  return(tests_passed)
}

# Test any move of any column vector to anywhere on iris: 
# boolean scalar => stdout(console)
test_single_col_move_to(iris)

# Apply the function to move multiple vectors before or after another
# vector: data.frame => stdout(console)
names(iris)
move_to(
  iris, 
  c("Sepal.Width", "Petal.Length"), 
  "Species", 
  "after"
)
move_to(
  iris, 
  c("Species", "Petal.Width"), 
  "Petal.Length", 
  "before"
)

相关问题