我的数据结构是基于151个个体x51个变量(1个分类变量由3个类别或组(OO、NUTS、LFD)组成,50个连续数值变量),实验背景是基于3种干预,患者接受不同的治疗,变量是以数字形式表示的基因表达结果,我打算形成聚类或观察分组。
当然,也有缺失值,个体样本缺失可能与临床试验本身有关,但在我的数据库中,我将附上一个切片样本,缺失我会说完全是随机的,为什么会这样呢?参加定期会议的个体,采集血液,之后由于处理错误,有几个基因扩增,而其他没有被放大的,从我的Angular 来看,是随机的。的确,在起源上可能与临床试验有某种联系。
回顾可能的方法,我发现missMDA包,我最近一直在回顾。我的第一个疑问,我认为是正确的,是确认是否MFA是最好的方法来分析我的数据库。其他选项可能是:
- PCA排除分类变量运行定量变量?
- FAMD看起来更像是定量和定性的结合,而不是像我的情况那样作为一个分组变量
'gene'示例数据库(在missMDA包中提供)与我的类似,方法是MFA,理论基础(我不是Maven)在我看来是正确的。然而,按照' missMDA: A Package for Handling Missing Values in Multivariate Data Analysis '中的步骤,我发现了下面的错误(我尝试了几个选项来公式化组):
#The ncp estimated excluding the group using PCA approach (just quantiative continuous variabes was 5, the variables are scaled type = "s"
res.mfa <- imputeMFA(PCA[, -1], group = c(2:51), type = "s", ncp = 5)
# Error in if (type[g] == "s") { : missing value where TRUE/FALSE needed
res.mfa <- imputeMFA(df[, -1], group = 50, type = "s", ncp = 5)
#Error in (cumsum(group.mod)[g - 1] + 1):cumsum(group.mod)[g] : NA/NaN argument
简化为身体限制的df示例(已经标准化(比例功能))
df <- structure(list(grup_int = structure(c(3L, 3L, 3L, 2L, 3L, 1L,
1L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 2L,
1L, 1L, 1L, 3L, 3L, 2L, 3L, 1L, 1L, 3L, 3L, 1L, 3L, 2L, 3L, 2L,
1L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L,
1L, 2L, 1L, 1L, 3L, 2L, 1L, 2L, 2L, 1L, 3L, 3L, 3L, 3L, 1L, 2L,
1L, 2L, 1L, 3L, 2L, 2L, 1L, 2L, 2L, 1L, 3L, 1L, 2L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 2L, 2L, 2L, 2L, 1L,
2L, 3L, 2L, 2L, 3L, 2L, 2L, 1L, 1L, 2L, 3L, 2L, 2L, 1L, 1L, 1L,
1L, 3L, 2L, 3L, 3L, 1L, 3L, 3L, 2L, 2L, 3L, 1L, 1L, 2L, 2L, 2L,
3L), levels = c("OO", "NUTS", "LFD"), label = "GENERAL: Grupo de intervención", class = "factor"),
ppara = structure(c(2.882, -0.091, -0.519, NA, NA, NA, 0.102,
NA, NA, -0.292, NA, 0.73, 0.555, -0.11, -1.022, -0.027, 0.114,
NA, 0.593, -0.768, -0.53, NA, NA, -0.224, -0.327, -0.952,
-0.185, NA, NA, -0.52, -1.175, 0.286, NA, -0.398, NA, -0.144,
NA, NA, NA, -0.903, NA, -0.258, -0.897, -0.751, -0.662, -0.628,
-0.779, NA, NA, 0.743, 0.142, NA, NA, NA, NA, -0.607, -0.739,
-0.437, NA, -1.152, -1.272, 0.608, 1.317, -0.547, 3.081,
0.647, -0.799, -0.682, -0.354, NA, 0.031, NA, 1.992, 3.665,
NA, NA, -0.027, -0.318, -0.916, NA, NA, NA, -0.31, -1.113,
-0.089, -0.391, NA, 0.134, -0.574, -0.291, -0.012, NA, NA,
-0.607, NA, -1.018, -0.702, NA, 1.624, 0.841, 0.869, NA,
0.373, -0.857, 0.007, 0.099, NA, -0.613, -0.005, 1.059, 1.525,
2.504, NA, NA, -0.737, -0.864, NA, NA, -0.901, -0.257, 0.968,
NA, -0.743, -0.023, NA, NA, NA, -0.219, NA, 0.226, -0.366,
-0.788, NA, -0.942, -0.215, NA, -0.659, -0.532, 3.052, 1.426,
NA, 0.366, -0.52, 0.377, NA, 1.421, NA, NA, NA, NA, NA), dim = c(151L,
1L), "`scaled:center`" = 1.35197894736842, "`scaled:scale`" = 0.835827593173089),
ppard = structure(c(0.214, -0.376, -0.152, -0.121, -0.147,
0.681, 0.373, 0.144, 0.291, -0.554, -0.344, -0.463, 0.565,
-0.612, -1.073, -0.038, 0.373, -0.801, 0.052, -0.765, -0.891,
0.56, 0.059, -0.396, -0.447, -0.83, -0.088, -0.543, -0.126,
-0.678, -0.769, 0.079, NA, -0.345, NA, -0.12, -0.845, 0.182,
-0.234, -0.785, 0.257, -0.035, -0.426, -0.428, -0.668, -0.51,
-0.626, -0.264, -0.588, -0.508, -0.184, NA, 2.647, -0.403,
-0.99, -0.727, 0.047, -0.487, -0.286, -0.865, -0.841, 3.273,
0.653, -0.439, 3.509, 0.653, -0.971, -0.298, -0.343, NA,
0.44, 0.143, 0.847, 5.239, 0.973, 2.861, 0.36, -0.538, 4.029,
-0.154, -0.361, -0.047, -0.222, -1.302, -0.047, -0.005, NA,
0.507, -0.244, -0.38, -0.116, -0.958, NA, -0.194, NA, -0.713,
-0.498, 0.239, 0.405, 1.012, 0.542, 0.22, 0.846, -0.455,
-0.003, 0.477, -0.096, -0.301, 0.85, 0.575, 0.606, 0.713,
NA, -1.197, -0.766, -0.846, -0.583, -0.53, -0.246, -1.062,
-0.221, -0.265, -1.083, -0.449, -1.117, -0.899, 0.146, -0.561,
-0.471, 0.171, -0.263, -0.33, 0.028, -0.625, -0.564, 4.08,
-0.444, 0.092, 0.581, 0.14, -0.112, -0.349, 0.198, 0.077,
NA, -0.409, NA, 0.05, 0.489, NA, 0.903), dim = c(151L, 1L
), "`scaled:center`" = 1.40111428571429, "`scaled:scale`" = 0.811830324669782),
pparg = structure(c(1.298, -0.171, 0.056, 0.017, -0.428,
0.257, 0.719, -0.1, 1.28, -0.19, 0.344, -0.629, 1.572, -0.713,
-0.739, 1.027, 0.22, -0.321, -1.283, -1.283, 0.1, -0.38,
0.257, -0.325, -0.572, -0.705, -0.442, -1.129, NA, -1.129,
-0.869, 2.129, NA, 0.044, NA, -0.352, -0.568, 1.976, -0.823,
-0.843, 0.529, -0.046, -0.223, -0.646, -0.308, 0.227, 0.028,
-0.352, -0.097, 0.054, -0.512, NA, 3.624, 0.399, -0.588,
-0.986, -0.672, -0.612, 0.362, -1.199, -0.896, -0.675, 0.354,
-0.641, 3.271, 0.3, -1.196, -0.789, -0.285, NA, -0.244, -0.156,
1.765, 4.562, 1.241, NA, -0.407, -0.84, 2.605, 0.016, -0.619,
-0.317, -0.472, -1.337, -0.555, -0.932, NA, 0.312, -0.4,
-1.241, 0.22, -0.937, NA, -0.134, NA, -0.241, -0.176, 0.138,
-0.716, 0.312, 0.061, -0.682, 0.609, 0.174, -0.07, -0.554,
-0.224, 0.107, 0.228, 0.491, 2.581, 1.164, NA, -0.368, -0.816,
-0.471, -0.126, -0.144, -0.281, -0.635, 0.618, 0.649, -1.601,
-0.913, -1.271, -0.756, 0.529, 0.047, -0.922, 1.729, -1.05,
0.01, 0.8, 1.488, 0.452, 1.876, -0.13, 0.485, -0.041, 0.211,
-0.859, -0.621, -0.515, -0.177, NA, -0.632, NA, -0.545, 1.322,
NA, 1.602), dim = c(151L, 1L), "`scaled:center`" = 1.2910652173913, "`scaled:scale`" = 0.700929843387113),
nr1h3 = structure(c(0.407, 0.244, -0.42, -0.013, -0.829,
0.492, 0.881, 0.171, -0.709, 0.22, 0.026, -0.45, 0.262, 1.012,
-0.847, 0.484, -0.607, -0.601, -0.821, -0.194, -0.568, 0.673,
-0.607, -0.458, -0.494, -0.492, 0.623, -0.93, -0.058, -0.41,
-0.784, 0.052, -2.094, 0.07, NA, 0.272, 0.024, 0.458, 0.832,
-0.75, 0.074, 0.766, -0.158, 0.463, -0.184, -0.469, -0.269,
-0.387, -0.337, -0.271, 1.308, NA, NA, -0.497, -0.944, -1.006,
-1.024, -0.783, 0.276, -1.132, -0.827, -0.343, 2.697, -0.497,
NA, 0.897, NA, -0.294, 0.517, NA, 0.236, 0.241, 0.679, NA,
2.847, 2.11, 0.069, 1.473, 4.445, NA, NA, NA, -0.207, -1.697,
-0.069, 0.105, NA, 1.004, NA, NA, -0.428, -1.257, NA, 1.736,
NA, -0.691, 0.09, 0.226, 0.674, -0.55, 0.699, -0.258, 0.906,
-0.691, 0.768, 0.484, -0.199, -0.62, 0.128, 0.742, 2.149,
0.737, NA, -1.296, NA, -0.545, 0.495, -0.888, -0.926, -0.97,
-0.279, -0.028, -1.396, -1.321, -1.254, -0.858, -0.592, -0.691,
-0.885, 0.077, 0.641, -0.643, -0.286, -0.932, -0.77, 4.228,
-0.589, 0.254, 0.947, -0.461, -0.469, 0.023, -0.476, -0.071,
NA, -0.445, NA, -0.207, 0.469, NA, 2.62), dim = c(151L, 1L
), "`scaled:center`" = 1.30208396946565, "`scaled:scale`" = 0.609461200104939),
nr1h2 = structure(c(-0.49, 0.333, -0.525, -0.673, -0.442,
-0.029, 0.504, 0.169, -0.193, -0.641, -0.405, 0.01, 1.152,
0.002, -0.478, -0.514, -0.077, -0.61, -0.404, -0.544, -0.434,
0.448, 0.162, -0.463, -0.447, -0.81, -0.209, -0.194, 0.637,
-0.427, -0.366, -0.211, -1.252, -0.205, NA, -0.003, -0.277,
0.027, 0.172, -0.569, 0.343, 0.601, -0.468, -0.254, 0.001,
0.092, -0.416, -0.224, -0.518, -0.356, -0.401, NA, 1.952,
-0.607, -0.721, -0.481, -0.375, -0.499, -0.01, -0.822, -0.704,
0.01, 0.686, -0.429, 6.483, 0.214, -0.302, 0.019, -0.323,
NA, 0.566, 0.441, 0.273, 2.845, 0.409, 4.469, 0.115, -0.21,
4.747, 0.324, -0.453, -0.528, -0.057, -0.943, -0.463, -0.127,
NA, 0.056, -0.343, -0.05, -0.539, -0.785, NA, -0.327, NA,
-0.466, -0.413, -0.181, 0.43, 0.548, -0.098, 0.565, 0.487,
-0.571, 0.046, 0.251, 0.113, 0.382, 0.343, 0.919, 0.215,
0.433, NA, -0.757, -0.711, -0.656, -0.525, -0.482, -0.268,
-0.714, -0.101, 0.093, -0.493, -0.626, -0.845, -0.7, -0.48,
-0.404, -0.668, -0.028, 0.2, -0.506, -0.078, -0.84, -0.54,
3.735, -0.366, -0.326, 0.271, 0.199, -0.624, -0.504, 0.573,
-0.131, NA, -0.212, NA, 0.092, 0.175, NA, 0.61), dim = c(151L,
1L), "`scaled:center`" = 1.38648936170213, "`scaled:scale`" = 1.07206008371747),
rxra = structure(c(0.003, -0.137, -0.372, -0.339, 0.001,
-0.083, 0.371, -0.04, -0.286, -0.707, -0.405, 0.067, 0.205,
-0.515, -0.48, -0.25, -0.28, -0.408, -0.649, -0.735, -0.722,
0.391, -0.008, 0.584, -0.517, -0.531, -0.268, -0.405, NA,
0.39, -0.025, -0.02, NA, -0.119, NA, -0.586, -0.449, -0.152,
-0.343, -0.126, -0.528, 0.012, -0.769, 0.074, -0.213, -0.501,
-0.624, -0.602, -0.847, -0.655, -0.619, NA, 2.641, -0.764,
-0.931, -0.833, 0.084, -0.065, -0.135, -0.77, -0.665, 0.893,
0.544, -0.512, 7.66, 0.111, -0.297, 0.248, -0.305, NA, 0.041,
0.515, 0.562, 4.191, 0.701, 1.726, -0.371, -0.225, 2.191,
0.063, -0.714, 0.049, 0.049, -1.099, -0.875, -0.101, NA,
-0.152, -0.058, 0.084, -0.564, -0.49, NA, 0.138, NA, -0.592,
-0.782, 0.033, 0.165, 0.161, 0.576, 1.449, 2.191, -0.21,
-0.182, 0.547, 0.163, 0.057, 1.104, 0.842, 0.208, 0.941,
NA, -0.817, -0.543, -0.4, -0.344, -0.747, -0.811, -1.077,
-0.334, 0.312, -0.541, -0.726, -0.872, -0.365, -0.232, 0.04,
-0.334, -0.111, -0.304, -0.476, 0.172, -0.389, -0.233, 3.064,
-0.104, -0.063, -0.049, 0.686, -0.851, -0.243, -0.098, 0.067,
NA, -0.076, NA, 0.112, 0.429, NA, 0.241), dim = c(151L, 1L
), "`scaled:center`" = 1.39937410071942, "`scaled:scale`" = 0.869319134487005),
rxrb = structure(c(-0.489, -0.548, 0.591, 0.137, 0.814, 0.161,
-0.204, -0.242, -0.408, -0.65, -0.242, 0.987, 0.468, -0.6,
-1.373, -0.739, -0.917, -0.565, -0.633, -0.833, -0.81, 0.509,
0.217, 0.388, -0.067, -0.614, -0.055, -0.185, 0.416, 0.94,
-0.399, 0.106, -1.652, -0.902, NA, -0.224, -0.289, -0.568,
0.219, -0.61, -0.913, 0.299, -0.743, -0.183, -0.251, -0.699,
-1.232, -0.43, -0.297, -0.664, -0.273, NA, 3.264, 0.207,
-0.849, -0.253, -0.134, -0.762, 0.206, -0.693, -0.627, 0.747,
-0.139, -0.043, 4.043, 0.643, -0.319, -0.16, -0.185, NA,
0.083, 0.399, 0.147, 6.611, 0.054, -1.572, 0.721, -0.009,
-1.597, -0.062, 1.607, 0.272, -0.169, -1.139, 1.191, 0.293,
NA, 0.196, 0.383, -0.531, 0.02, 3.971, NA, 0.246, NA, -0.154,
-0.51, 0.083, 0.838, 0.19, 0.446, 0.083, 0.219, 1.676, -0.353,
0.421, -0.163, 0.091, 0.758, 0.625, 0.79, 0.084, NA, -0.956,
-0.775, -0.26, -0.446, -0.898, -0.439, -0.713, -0.93, -0.587,
-0.918, -0.161, -0.408, -0.216, -0.311, -0.168, 0.107, 0.15,
0.634, 0.853, 0.179, -0.47, -0.796, -1.533, 0.429, -0.155,
0.578, 0.741, 0.193, 0.067, -0.334, -0.016, NA, -0.006, NA,
0.053, 0.837, NA, -0.265), dim = c(151L, 1L), "`scaled:center`" = 1.39233333333333, "`scaled:scale`" = 0.828513485248186),
cyp27a1 = structure(c(-0.366, -0.342, -0.257, -0.731, 0.498,
-0.251, 1.016, -0.209, 0.086, -0.84, 0.399, -0.466, 0.022,
-0.54, -0.085, 0.018, -0.248, -0.82, -0.666, -0.821, -0.443,
0.187, -0.135, -0.433, 0.494, -0.57, -0.332, -0.088, NA,
-0.004, -0.705, 0.502, NA, -0.313, NA, -0.471, -0.423, -0.398,
-0.272, -0.82, 0.102, 0.183, -0.703, -0.155, -0.437, -0.427,
-0.739, -0.292, -0.586, -0.574, -0.284, NA, NA, -0.42, -1.099,
-0.879, -0.527, -0.609, -0.227, -0.827, -0.765, 6.111, 1.02,
-0.66, NA, 0.53, -0.424, 0.055, -0.184, NA, 0.633, 0.364,
0.418, NA, 0.229, 2.879, -0.264, -0.285, 3.73, 0.569, 0.053,
-0.368, -0.2, -1.217, -0.249, -0.318, NA, 0.37, 0.15, -0.23,
0.258, -0.623, NA, 0.183, NA, -0.321, -0.534, 0.537, -0.046,
0.809, 0.899, 1.415, 1.484, -0.158, -0.109, 0.863, -0.256,
0.281, 0.94, 1.062, 1.988, 0.064, NA, -0.455, -0.959, -0.437,
-0.657, -0.82, 0.022, -0.927, -0.821, 0.878, -1.189, -0.121,
-1.278, -0.811, -0.653, -0.54, -0.576, -0.266, 0.11, -0.433,
-0.075, -0.617, -0.682, 4.587, 0.149, -0.146, 0.384, 0.917,
-0.38, -0.17, 0.082, -0.301, NA, -0.176, NA, 0.135, -0.007,
NA, 2.875), dim = c(151L, 1L), "`scaled:center`" = 1.37367647058824, "`scaled:scale`" = 0.862860015103544),
abca1 = structure(c(-0.275, 0.339, -0.655, -0.414, -0.611,
0.548, -0.053, -0.25, 0.157, -0.472, -0.555, -0.329, 0.027,
-0.017, -0.442, 0.044, -0.191, -0.52, -0.685, -0.778, -0.503,
0.33, 0.045, -0.593, -0.279, -0.651, -0.192, 0.161, -0.159,
0.023, -0.304, 1.422, NA, -0.06, NA, -0.379, -0.488, -0.313,
-0.336, -0.056, -0.7, -0.563, -0.631, -0.275, -0.742, -0.394,
-0.655, -0.432, -0.616, -0.642, -0.141, NA, 0.3, -0.336,
-0.975, -0.667, -0.418, -0.325, 0.318, -0.852, -0.577, -0.766,
0.772, -0.559, 7.974, 0.385, -0.653, -0.261, -0.474, NA,
-0.007, 0.203, 0.908, 2.893, 1.629, 1.134, -0.125, 0.149,
4.345, 1.047, 0.223, -0.071, -0.296, -0.814, -0.64, 0.255,
NA, -0.277, -0.065, 0.479, -0.375, -0.422, NA, 0.292, NA,
-0.422, 0.515, 0.309, 0.11, 0.717, 0.468, 1.66, 1.869, -0.286,
0.075, 0.288, 0.092, -0.088, 0.473, -0.19, 0.548, 0.37, NA,
-0.824, -0.697, -0.561, -0.549, -0.529, -0.846, -0.768, -0.818,
0.101, -0.316, -0.727, -0.192, -0.498, -0.784, 0.324, -0.654,
0.626, -0.297, -0.268, -0.002, -0.21, -0.193, 2.531, 0.516,
-0.403, -0.064, 0.461, -0.481, 0.154, 0.215, -0.275, NA,
-0.527, NA, -0.642, 0.003, NA, 0.59), dim = c(151L, 1L), "`scaled:center`" = 1.66513571428571, "`scaled:scale`" = 1.46050537823946)), row.names = c("50109018",
"50109019", "50109025", "50109026", "50109027", "50118001", "50202099",
"50203004", "50203006", "50203008", "50203009", "50203010", "50203011",
"50203012", "50203013", "50203014", "50203015", "50203016", "50203017",
"50203019", "50203020", "50203022", "50203026", "50203027", "50203029",
"50203030", "50203031", "50203032", "50430001", "50508026", "50508027",
"50521001", "50521002", "50527001", "50601001", "50705001", "60901020",
"60901021", "60901023", "60901024", "60901026", "60901027", "60901028",
"60901029", "60901030", "60901031", "60901033", "60901034", "60901035",
"60901036", "60901037", "60901038", "70107034", "70111021", "70111022",
"70111023", "70111024", "70201047", "70204055", "70204056", "70211014",
"70710002", "70713001", "70713002", "70802011", "70802012", "70802013",
"70802015", "71801001", "71801002", "71801003", "110104017",
"110104019", "110104023", "110104024", "110104027", "110104028",
"110104029", "110104030", "110110005", "110113001", "110113003",
"110113005", "110113006", "110113007", "110113008", "110606056",
"110606061", "111201006", "111201007", "111201014", "111201017",
"111201019", "111201026", "111202007", "111202009", "111202015",
"120715011", "120715012", "120715019", "120715020", "120715021",
"120715022", "120715025", "120715026", "120715027", "120715029",
"120715030", "120715032", "120715033", "120715034", "120715035",
"120715037", "130102008", "130102009", "130102010", "130102012",
"130102013", "130102014", "130104004", "130105044", "130105045",
"130106034", "130106037", "130106038", "130108008", "130108009",
"140101088", "140101091", "140101096", "140101097", "140101099",
"140102087", "140102088", "140102089", "140102090", "140102092",
"140102095", "140103019", "140103020", "140103023", "140103024",
"140103026", "140103027", "140103028", "140103029", "140103030",
"140103033", "140103035", "140103036", "140103038"), class = "data.frame")
1条答案
按热度按时间jobtbby31#
似乎在组数为1的情况下出现了问题。有几行代码如下所示:
对于一个组,这会失败,因为它不是从2向上计数到组数,而是从2向下计数到1。如果仅当
length(group) > 1
使用if()
语句时才激活此行,则它会工作。我在CRAN的missMDA
包的一个分支中实现了这一点。下面是一个示例。