对于df.nodup
中的每一列,我希望消除小于第25百分位数或大于第75百分位数的值。
for(i in 1:ncol(df.nodup)) { # for-loop over columns
Q <- quantile(df.nodup[ , i], probs=c(.25, .75), na.rm=T)
iqr <- IQR(df.nodup[ , i])
df.sig <- subset(df.nodup, df.nodup[ , i] > (Q[ , i][1] - 1.5*iqr) & df.nodup[ , i] < (Q[ , i][2]+1.5*iqr))
}
追溯:
Error in Q[, i] : incorrect number of dimensions
数据:
> dput(df.nodup[1:20,1:5])
structure(list(C_1 = c(1.02, 1.02, 0.49, 0.78, 0.66, 0.73, 0.69,
0.63, 0.71, 0.71, 0.83, 0.87, 0.79, 0.78, 0.63, 0.58, 0.78, 0.72,
0.73, 0.68), C_2 = c(0.86, 0.71, 0.48, 0.71, 0.62, 0.61, 0.6,
0.63, 0.61, 0.86, 0.71, 0.92, 0.72, 0.79, 0.5, 0.56, 0.63, 0.59,
0.63, 0.61), C_3 = c(0.67, 0.87, 0.58, 0.7, 0.64, 0.66, 0.63,
0.76, 0.63, 0.77, 0.79, 0.96, 0.75, 1.22, 0.61, 0.65, 0.6, 0.79,
0.64, 0.67), C_4 = c(1.3, 1, 0.75, 1.06, 0.78, 0.88, 0.98, 1.03,
0.96, 1, 1.01, 1.14, 1.12, 1.01, 0.77, 0.84, 0.97, 0.8, 0.86,
1.04), D_5 = c(0.73, 0.92, 0.66, 0.71, 0.75, 0.68, 0.65, 0.64,
0.65, 0.95, 0.8, 1, 0.74, 1.02, 0.55, 0.67, 0.73, 0.75, 0.66,
0.63)), row.names = c("AAAS", "AAK1", "AAMDC", "AARS", "AASDHPPT",
"ABCD3", "ABCE1", "ABCF1", "ABHD10", "ABI1", "ACAA1", "ACACA",
"ACAD9", "ACADVL", "ACBD3", "ACIN1", "ACLY", "ACO1", "ACO2",
"ACOT7"), class = "data.frame")
1条答案
按热度按时间46qrfjad1#
我们可以在
lapply
中使用replace
然而,使用Tukey的方法可能更好:
**注意:有许多方法可以删除离群值,但始终确保您可以论证why an observation might be an outlier。