# Function to only consider certain characters define by a regex
# and split a string scalar into seperate elements in a vector
split_string_to_vec <- function(s, n, consider_elements_pattern = "[[:alpha:]]"){
# Ensure s is a character scalar:
stopifnot(is.character(s) && length(s) == 1)
# Ensure n is an integer scalar:
stopifnot(is.numeric(n) && length(n) == 1)
# Split the string into separate elements:
# str_vec => character vector
str_vec <- unlist(strsplit(s, ""))
# Assign an index to the string vector:
# idx => named integer vector
idx <- setNames(seq_len(length(str_vec)), str_vec)
# Resolve which values are to be considered (only alpha numerics):
# considered_vals => named integer vector
considered_vals <- idx[grepl(consider_elements_pattern, names(idx))]
# Split the string vector into a list:
# grpd_strings => list of character vectors
grpd_strings <- split(
considered_vals,
ceiling(seq_along(considered_vals) / n)
)
# For each string group, resolve the group with the
# appropriate characters in order: res_vec => character vector
res_vec <- vapply(
seq_along(grpd_strings),
function(i){
# Get current list element:
curr <- grpd_strings[[i]]
# If its the first element:
if(i == 1){
# Ignore previous element only focus on this
# one: ir => named integer vector
ir <- sort(c(curr, idx[min(curr):max(curr)]))
# Otherwise:
}else{
# Resolve the previous element:
prev <- grpd_strings[[(i-1)]]
# ir => named integer vector
ir <- sort(c(curr, idx[(max(prev)+1):max(curr)]))
}
# Flatten result into a unique (by idx) string:
# character scalar => env
paste0(
names(
subset(
ir,
!(duplicated(ir))
)
),
collapse = ""
)
},
character(1)
)
# Explicitly define the returned object:
# character vector => env
return(res_vec)
}
# Input Data:
# s => string scalar
s <- "QW%ERT%ZU%I%O%P"
# n => integer scalar
n <- 3
# Apply the function: string scalar => stdout(console)
split_string_to_vec(s, n, consider_elements_pattern = "[[:alpha:]]")
2条答案
按热度按时间pgvzfuti1#
regmatches
(而不是strsplit
)怎么样?或
tapply
+strsplit
它给出了
bwitn5fc2#
比上面的简洁得多,但基本解决方案都是一样的: