如果sym
Dataframe 的“Composite_Element_REF”列值与df_normal_symbol
的索引匹配,我想用sym
Dataframe 中的相邻列(即Gene_Symbol
列)替换df_normal_symbol
的索引。
df_normal_symbol.index = df_normal.loc[sym["Composite_Element_REF"], df_normal.index].values
追溯:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Input In [38], in <cell line: 1>()
----> 1 df_normal_symbol.index = df_normal.loc[sym["Composite_Element_REF"], df_normal.index].values
File /scg/apps/software/jupyter/python_3.9/lib/python3.9/site-packages/pandas/core/indexing.py:961, in _LocationIndexer.__getitem__(self, key)
959 if self._is_scalar_access(key):
960 return self.obj._get_value(*key, takeable=self._takeable)
--> 961 return self._getitem_tuple(key)
962 else:
963 # we by definition only have the 0th axis
964 axis = self.axis or 0
File /scg/apps/software/jupyter/python_3.9/lib/python3.9/site-packages/pandas/core/indexing.py:1147, in _LocIndexer._getitem_tuple(self, tup)
1145 # ugly hack for GH #836
1146 if self._multi_take_opportunity(tup):
-> 1147 return self._multi_take(tup)
1149 return self._getitem_tuple_same_dim(tup)
File /scg/apps/software/jupyter/python_3.9/lib/python3.9/site-packages/pandas/core/indexing.py:1098, in _LocIndexer._multi_take(self, tup)
1082 """
1083 Create the indexers for the passed tuple of keys, and
1084 executes the take operation. This allows the take operation to be
(...)
1095 values: same type as the object being indexed
1096 """
1097 # GH 836
-> 1098 d = {
1099 axis: self._get_listlike_indexer(key, axis)
1100 for (key, axis) in zip(tup, self.obj._AXIS_ORDERS)
1101 }
1102 return self.obj._reindex_with_indexers(d, copy=True, allow_dups=True)
File /scg/apps/software/jupyter/python_3.9/lib/python3.9/site-packages/pandas/core/indexing.py:1099, in <dictcomp>(.0)
1082 """
1083 Create the indexers for the passed tuple of keys, and
1084 executes the take operation. This allows the take operation to be
(...)
1095 values: same type as the object being indexed
1096 """
1097 # GH 836
1098 d = {
-> 1099 axis: self._get_listlike_indexer(key, axis)
1100 for (key, axis) in zip(tup, self.obj._AXIS_ORDERS)
1101 }
1102 return self.obj._reindex_with_indexers(d, copy=True, allow_dups=True)
File /scg/apps/software/jupyter/python_3.9/lib/python3.9/site-packages/pandas/core/indexing.py:1327, in _LocIndexer._get_listlike_indexer(self, key, axis)
1324 ax = self.obj._get_axis(axis)
1325 axis_name = self.obj._get_axis_name(axis)
-> 1327 keyarr, indexer = ax._get_indexer_strict(key, axis_name)
1329 return keyarr, indexer
File /scg/apps/software/jupyter/python_3.9/lib/python3.9/site-packages/pandas/core/indexes/base.py:5782, in Index._get_indexer_strict(self, key, axis_name)
5779 else:
5780 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
-> 5782 self._raise_if_missing(keyarr, indexer, axis_name)
5784 keyarr = self.take(indexer)
5785 if isinstance(key, Index):
5786 # GH 42790 - Preserve name from an Index
File /scg/apps/software/jupyter/python_3.9/lib/python3.9/site-packages/pandas/core/indexes/base.py:5845, in Index._raise_if_missing(self, key, indexer, axis_name)
5842 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
5844 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
-> 5845 raise KeyError(f"{not_found} not in index")
预期产出:
pd.DataFrame({'TCGA-CZ-5457-11A': {nan: 0.102035759907132,
'VDAC3': 0.893345348116849,
'ACTN1': 0.847131904106541,
'ATP2A1': 0.580488869725658,
'SFRP1': 0.470767306311169,
nan: 0.147416341092933,
'NIPA2': 0.0120942766037886},
'TCGA-BQ-5888-11A': {nan: 0.147149659097321,
'VDAC3': 0.910195291355705,
'ACTN1': 0.816669300689161,
'ATP2A1': 0.514358122653833,
'SFRP1': 0.441313292788889,
nan: 0.245573257728479,
'NIPA2': 0.0147939578910346},
'TCGA-B0-4846-11A': {nan: 0.113480434528015,
'VDAC3': 0.886088576813537,
'ACTN1': 0.664793188247786,
'ATP2A1': 0.516081593815069,
'SFRP1': 0.400027063258341,
nan: 0.190871544331105,
'NIPA2': 0.0099210543418163},
'TCGA-CJ-4920-11A': {nan: 0.111657157534977,
'VDAC3': 0.918934002233238,
'ACTN1': 0.773517265412361,
'ATP2A1': 0.571990354691741,
'SFRP1': 0.489779654823996,
nan: 0.24188997202946,
'NIPA2': 0.0097521875052793,
'cg00000658': 0.919644862137697,
'cg00000721': 0.94229345837988},
'TCGA-B0-4849-11A': {nan: 0.13898299158527,
'VDAC3': 0.892691778501429,
'ACTN1': 0.697181652158477,
'ATP2A1': 0.47288614791789,
'SFRP1': 0.377593967259526,
nan: 0.149294919317939,
'NIPA2': 0.0107697567839102},
'TCGA-BQ-5891-11A': {nan: 0.0943910860490585,
'VDAC3': 0.798899904372697,
'ACTN1': 0.689450514637892,
'ATP2A1': 0.568046821756013,
'SFRP1': 0.464626018317553,
nan: 0.231639837864006,
'NIPA2': 0.0487962187571897},
'TCGA-BP-5186-11A': {'cg00000165': 0.110112361205661,
'VDAC3': 0.827523582109836,
'ACTN1': 0.757610109046985,
'ATP2A1': 0.484209696051666,
'SFRP1': 0.412811564854099,
nan: 0.167420794630144,
'NIPA2': 0.0104916507529456},
'TCGA-A3-3373-11A': {nan: 0.117830727124756,
'VDAC3': 0.90581935721054,
'ACTN1': 0.761457792189881,
'ATP2A1': 0.507633250448944,
'SFRP1': 0.51611998698701,
nan: 0.1737386620934,
'NIPA2': 0.0108894792403789},
'TCGA-BP-5180-11A': {nan: 0.119205137521098,
'VDAC3': 0.891261719087507,
'ACTN1': 0.746767379239554,
'ATP2A1': 0.463089282194905,
'SFRP1': 0.464692516947339,
nan: 0.228609755811405,
'NIPA2': 0.0095536851256427}})
数据:df_normal_symbol
pd.DataFrame({'TCGA-CZ-5457-11A': {'cg00000165': 0.102035759907132,
'cg00000236': 0.893345348116849,
'cg00000289': 0.847131904106541,
'cg00000292': 0.580488869725658,
'cg00000321': 0.470767306311169,
'cg00000363': 0.147416341092933,
'cg00000622': 0.0120942766037886,
'cg00000658': 0.93695494977688,
'cg00000721': 0.975854444522775},
'TCGA-BQ-5888-11A': {'cg00000165': 0.147149659097321,
'cg00000236': 0.910195291355705,
'cg00000289': 0.816669300689161,
'cg00000292': 0.514358122653833,
'cg00000321': 0.441313292788889,
'cg00000363': 0.245573257728479,
'cg00000622': 0.0147939578910346,
'cg00000658': 0.933589698841974,
'cg00000721': 0.93311604425552},
'TCGA-B0-4846-11A': {'cg00000165': 0.113480434528015,
'cg00000236': 0.886088576813537,
'cg00000289': 0.664793188247786,
'cg00000292': 0.516081593815069,
'cg00000321': 0.400027063258341,
'cg00000363': 0.190871544331105,
'cg00000622': 0.0099210543418163,
'cg00000658': 0.863861413753196,
'cg00000721': 0.935039379256587},
'TCGA-CJ-4920-11A': {'cg00000165': 0.111657157534977,
'cg00000236': 0.918934002233238,
'cg00000289': 0.773517265412361,
'cg00000292': 0.571990354691741,
'cg00000321': 0.489779654823996,
'cg00000363': 0.24188997202946,
'cg00000622': 0.0097521875052793,
'cg00000658': 0.919644862137697,
'cg00000721': 0.94229345837988},
'TCGA-B0-4849-11A': {'cg00000165': 0.13898299158527,
'cg00000236': 0.892691778501429,
'cg00000289': 0.697181652158477,
'cg00000292': 0.47288614791789,
'cg00000321': 0.377593967259526,
'cg00000363': 0.149294919317939,
'cg00000622': 0.0107697567839102,
'cg00000658': 0.855919013625267,
'cg00000721': 0.927295110742551},
'TCGA-BQ-5891-11A': {'cg00000165': 0.0943910860490585,
'cg00000236': 0.798899904372697,
'cg00000289': 0.689450514637892,
'cg00000292': 0.568046821756013,
'cg00000321': 0.464626018317553,
'cg00000363': 0.231639837864006,
'cg00000622': 0.0487962187571897,
'cg00000658': 0.879745629519866,
'cg00000721': 0.575514399845868},
'TCGA-BP-5186-11A': {'cg00000165': 0.110112361205661,
'cg00000236': 0.827523582109836,
'cg00000289': 0.757610109046985,
'cg00000292': 0.484209696051666,
'cg00000321': 0.412811564854099,
'cg00000363': 0.167420794630144,
'cg00000622': 0.0104916507529456,
'cg00000658': 0.889507665618008,
'cg00000721': 0.956223420054809},
'TCGA-A3-3373-11A': {'cg00000165': 0.117830727124756,
'cg00000236': 0.90581935721054,
'cg00000289': 0.761457792189881,
'cg00000292': 0.507633250448944,
'cg00000321': 0.51611998698701,
'cg00000363': 0.1737386620934,
'cg00000622': 0.0108894792403789,
'cg00000658': 0.831762722499429,
'cg00000721': 0.950671976784028},
'TCGA-BP-5180-11A': {'cg00000165': 0.119205137521098,
'cg00000236': 0.891261719087507,
'cg00000289': 0.746767379239554,
'cg00000292': 0.463089282194905,
'cg00000321': 0.464692516947339,
'cg00000363': 0.228609755811405,
'cg00000622': 0.0095536851256427,
'cg00000658': 0.922630855301534,
'cg00000721': 0.958168591617036}})
sym
pd.DataFrame({'Composite_Element_REF': {1: 'cg00000108',
2: 'cg00000109',
3: 'cg00000165',
4: 'cg00000236',
5: 'cg00000289',
6: 'cg00000292',
7: 'cg00000321',
8: 'cg00000363',
9: 'cg00000622'},
'Gene_Symbol': {1: 'C3orf35',
2: 'FNDC3B',
3: nan,
4: 'VDAC3',
5: 'ACTN1',
6: 'ATP2A1',
7: 'SFRP1',
8: nan,
9: 'NIPA2'}})
1条答案
按热度按时间xlpyo6sf1#
你的方法有两个问题。
第一个是
.loc
需要一个来自其索引和列的元素列表作为参数。注意,
.loc
切片器不能处理丢失的值,所以这个方法不起作用。例如sym.loc[df_normal_symbol.index[:-2],'Gene_Symbol'].values
就可以。第二个问题可以通过拆分 Dataframe 、重写索引并重新连接来解决。但是,我建议创建一个dict
并在列表解析中使用它。