pandas 只读某些列

yhuiod9q  于 2023-06-20  发布在  其他
关注(0)|答案(2)|浏览(128)

我想只从excel文件中读取某些列到pandas数据框中。我想通过usecols的列范围来指定这一点,但我遇到了一个错误。将列范围指定为字母的正确方法是什么(是的,我知道我可以提供列索引)?

import pandas as pd
df = pd.read_excel("test.xlsx", usecols="A:AV", header=None)

~/anaconda3/lib/python3.5/site-packages/pandas/io/www.example.com in read_excelexcel.py208 true_values = true_values,false_values = false_values,squeeze = squeeze,--> 209kwds)210 211 (io, sheetname, header, skiprows, skip_footer, index_col, names, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, has_index_names, converters, dtype, true_values, false_values, engine, squeeze,kwds) 207 skip_footer=skip_footer, converters=converters, dtype=dtype, 208 true_values=true_values, false_values=false_values, squeeze=squeeze, --> 209kwds) 210 211
~/anaconda3/lib/python3.5/site-packages/pandas/io/www.example.com in_parse_excelexcel.py- -> 510
kwds)511 512输出[asheetname]= www.example.com()(self, sheetname, header, skiprows, names, skip_footer, index_col, has_index_names, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, true_values, false_values, verbose, dtype, squeeze,kwds) 508 squeeze=squeeze, 509 dtype=dtype, --> 510kwds) 511 512 output[asheetname] = parser.read ()
python3.5/site-packages/pandas/io/www.example.com in TextParser(* args,*kwds)1910 """1911 kwds ['engine ']=' python '-> 1912 return TextFileReader( args,**kwds)1913 1914 parsers.py in TextParser(args,kwds) 1910 """ 1911 kwds['engine'] = 'python' -> 1912 return TextFileReader(args,kwds) 1913 1914
~/anaconda3/lib/python3.5/site-packages/pandas/io/www.example.com in
init
(self,f,engine,kwds)762 self. options ['has_index_names']= kwds ['has_index_names'] 763--> 764 self._make_engine(self. engine)765 766 def close(self)(self)(self)parsers.py ininit
(self, f, engine,kwds) 762 self.options['has_index_names'] = kwds['has_index_names'] 763 --> 764 self._make_engine(self.engine) 765 766 def close(self):
python3.5/site-packages/pandas/io/www.example.com in_make_engine(self,engine)993 '"c","python",or''"python-fwf")'. format(994 engine = engine))--> 995 self._engine = klass(self. f,self. options)996 997 def_failover_to_python(self):parsers.py in _make_engine(self, engine) 993 ' "c", "python", or' ' "python-fwf")'.format( 994 engine=engine)) --> 995 self._engine = klass(self.f,self.options) 996 997 def _failover_to_python(self):
~/anaconda3/lib/python3.5/site-packages/pandas/io/www.example.com in
init
(self,f,kwds)1994 #如果指定is,则从self.usecols推断列索引。parsers.py ininit
(self, f,**kwds) 1994 # infer column indices from self.usecols if is is specified. 1995 self._col_indices = None -> 1996 self.columns, self.num_original_columns = self._infer_columns() 1997 1998 # Now self.columns has the set of columns that we will process.
~/anaconda3/lib/python3.5/site-packages/pandas/io/www.example.com in_inferred_columns(self)2387 else:parsers.py2391如果self. usecols为None或len(names)>= num_original_columns: 2388 columns = [lrange(ncols)] -> 2389 columns = self._handle_usecols(columns, columns[0]) 2390 else: 2391 if self.usecols is None or len(names) >= num_original_columns:
~/anaconda3/lib/python3.5/site-packages/pandas/io/www.example.com in_handle_usecols(self,columns,usecols_key)2422 for col in self. usecols:parsers.py- > 2424 col_indices. append(usecols_key. index(col))2425 2423 if isinstance(col, string_types): -> 2424 col_indices.append(usecols_key.index(col)) 2425
else:2426 col_indices. append(col)

In [20]: pd.__version__
Out[20]: '0.20.2'

文件:test.xlsx

tuwxkamq

tuwxkamq1#

需要升级到pandas 0.21.0dev discussion):

df = pd.read_excel("test.xlsx", usecols='A:B', header=None)                              
print (df.head())

          0                    1
0  sequence  2015-02-11 00:00:00
1        Aa                 6239
2    AaBpL5                  NaN
3    AaCjL5                  NaN
4    AaDrL2                  NaN
wlp8pajw

wlp8pajw2#

df=pd.read_excel(“test.xlsx”,usecols=“A:AE”)
上面的命令在我的IDLE中使用python版本-3.11.3,pandas版本-2.0.2工作

相关问题