创建一个新的 Dataframe ,其中将包含另一个 Dataframe 的每列的class(

fykwrbwg  于 2023-01-28  发布在  其他
关注(0)|答案(2)|浏览(112)

我有下面的示例数据框,我想使用R创建一个新的数据框,它有2列。第一列将命名为"Name",并将包括数据集的所有列名。数据集每次都不同,因此列数可能会有所不同。第二列将命名为"Class",并将包括每列的class()

structure(list(case_id = c("3397364", "3397364"), action = c("3397364-RAAMELK", 
"3397364-RAAMELK"), resource = c("RAAMELK", "RAAMELK"), lifecycle = c(1, 
1), registration_type = structure(1:2, .Label = c("start", "complete"
), class = "factor"), timestamp = structure(c(1667523600, 1667531220
), tzone = "UTC", class = c("POSIXct", "POSIXt")), activity = c("RAAMELK", 
"RAAMELK"), activity_description = c("Forbrukt r<e5>melk", "Forbrukt r<e5>melk"
), ...9 = c(NA, NA), product = c("K101152", "K101152"), product_type_text = c("200100 - Milk", 
"200100 - Milk"), qty = c(NA, 31), in_out = c("in", "out"), qty_scrap = c(NA_real_, 
NA_real_), `FP ordre` = c(NA_character_, NA_character_), Artikkeltype = c("SF", 
"SF"), .order = 1:2), row.names = c(NA, -2L), class = c("eventlog", 
"log", "tbl_df", "tbl", "data.frame"), case_id = "case_id", activity_id = "activity", activity_instance_id = "action", lifecycle_id = "registration_type", resource_id = "resource", timestamp = "timestamp")
rmbxnbpk

rmbxnbpk1#

tibble(因为数据在tbl_df中)与sapply一起使用。由于某些类定义返回多个字符串,因此需要paste

library(tibble)

tibble(Name = colnames(df), Class = sapply(df, function(x) 
  paste(class(x), collapse=", ")))
# A tibble: 17 × 2
   Name                 Class          
   <chr>                <chr>          
 1 case_id              character      
 2 action               character      
 3 resource             character      
 4 lifecycle            numeric        
 5 registration_type    factor         
 6 timestamp            POSIXct, POSIXt
 7 activity             character      
 8 activity_description character      
 9 ...9                 logical        
10 product              character      
11 product_type_text    character      
12 qty                  numeric        
13 in_out               character      
14 qty_scrap            numeric        
15 FP ordre             character      
16 Artikkeltype         character      
17 .order               integer
qxsslcnc

qxsslcnc2#

dat <- stack(lapply(df, class))[2:1] 
colnames(dat) <- c("Name", "Class")

> dat
#                    Name     Class
# 1               case_id character
# 2                action character
# 3              resource character
# 4             lifecycle   numeric
# 5     registration_type    factor
# 6             timestamp   POSIXct
# 7             timestamp    POSIXt
# 8              activity character
# 9  activity_description character
# 10                 ...9   logical
# 11              product character
# 12    product_type_text character
# 13                  qty   numeric
# 14               in_out character
# 15            qty_scrap   numeric
# 16             FP ordre character
# 17         Artikkeltype character
# 18               .order   integer

您也可以检查summary.default以获得一种快速方法:

summary.default(df)
#                      Length Class   Mode     
# case_id              2      -none-  character
# action               2      -none-  character
# resource             2      -none-  character
# lifecycle            2      -none-  numeric  
# registration_type    2      factor  numeric  
# timestamp            2      POSIXct numeric  
# activity             2      -none-  character
# activity_description 2      -none-  character
# ...9                 2      -none-  logical  
# product              2      -none-  character
# product_type_text    2      -none-  character
# qty                  2      -none-  numeric  
# in_out               2      -none-  character
# qty_scrap            2      -none-  numeric  
# FP ordre             2      -none-  character
# Artikkeltype         2      -none-  character
# .order               2      -none-  numeric

相关问题