我尝试从sklearn
运行PolynomialFeatures
,并使用一个大型稀疏矩阵作为输入:
[1] x
>>> <11967295x120006 sparse matrix of type '<class 'numpy.int64'>'
with 55375058 stored elements in Compressed Sparse Row format>
[2] from sklearn.preprocessing import PolynomialFeatures
[3] pf = PolynomialFeatures(interaction_only=True, include_bias=False, degree=2)
[4] xinter = pf.fit_transform(x)
字符串
得到错误ValueError: negative column index found
:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-78-dc5dc18d59d2> in <module>
1 start = time.time()
----> 2 xinter = pf.fit_transform(x)
3 end = time.time()
/venv/lib/python3.6/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
551 if y is None:
552 # fit method of arity 1 (unsupervised transformation)
--> 553 return self.fit(X, **fit_params).transform(X)
554 else:
555 # fit method of arity 2 (supervised transformation)
/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py in transform(self, X)
1523 break
1524 to_stack.append(Xp_next)
-> 1525 XP = sparse.hstack(to_stack, format='csr')
1526 elif sparse.isspmatrix_csc(X) and self.degree < 4:
1527 return self.transform(X.tocsr()).tocsc()
/venv/lib/python3.6/site-packages/scipy/sparse/construct.py in hstack(blocks, format, dtype)
463
464 """
--> 465 return bmat([blocks], format=format, dtype=dtype)
466
467
/venv/lib/python3.6/site-packages/scipy/sparse/construct.py in bmat(blocks, format, dtype)
572 for j in range(N):
573 if blocks[i,j] is not None:
--> 574 A = coo_matrix(blocks[i,j])
575 blocks[i,j] = A
576 block_mask[i,j] = True
/venv/lib/python3.6/site-packages/scipy/sparse/coo.py in __init__(self, arg1, shape, dtype, copy)
170 self._shape = check_shape(arg1.shape)
171 else:
--> 172 coo = arg1.tocoo()
173 self.row = coo.row
174 self.col = coo.col
/venv/lib/python3.6/site-packages/scipy/sparse/compressed.py in tocoo(self, copy)
1015 from .coo import coo_matrix
1016 return coo_matrix((self.data, (row, col)), self.shape, copy=copy,
-> 1017 dtype=self.dtype)
1018
1019 tocoo.__doc__ = spmatrix.tocoo.__doc__
/venv/lib/python3.6/site-packages/scipy/sparse/coo.py in __init__(self, arg1, shape, dtype, copy)
196 self.data = self.data.astype(dtype, copy=False)
197
--> 198 self._check()
199
200 def reshape(self, *args, **kwargs):
/venv/lib/python3.6/site-packages/scipy/sparse/coo.py in _check(self)
289 raise ValueError('negative row index found')
290 if self.col.min() < 0:
--> 291 raise ValueError('negative column index found')
292
293 def transpose(self, axes=None, copy=False):
ValueError: negative column index found
型
这看起来不正确,因为col
索引类型是int64
:
> /venv/lib/python3.6/site-packages/scipy/sparse/coo.py(291)_check()
289 raise ValueError('negative row index found')
290 if self.col.min() < 0:
--> 291 raise ValueError('negative column index found')
292
293 def transpose(self, axes=None, copy=False):
ipdb> self.col.max()
2147482788
ipdb> self.col.dtype
dtype('int64')
ipdb> self.col.min()
-2147480639
型
我使用以下版本:
scipy.__version__
'1.3.1'
sklearn.__version__
'0.21.2'
型
我感谢任何帮助来解决这个问题!
1条答案
按热度按时间ercv8c1e1#
我认为你的问题在sklearn github repo中讨论过,解决方案合并到主分支中。我收到了同样的错误,升级scikit-learn到v1.3.2解决了它。