Python SQL代码错误- sqlite3,操作错误:SQL变量太多

rnmwe5a2  于 2023-04-30  发布在  SQLite
关注(0)|答案(1)|浏览(164)

我正在尝试下面的代码,但我得到了错误

if not os.path.isfile('train.db'):
    disk_engine = create_engine('sqlite:///train.db')
    start = dt.datetime.now()
    chunksize = 15000
    j = 0
    index_start = 1
    for df in pd.read_csv('final_features.csv', names=['Unnamed: 0','id','is_duplicate','cwc_min','cwc_max','csc_min','csc_max','ctc_min','ctc_max','last_word_eq','first_word_eq','abs_len_diff','mean_len','token_set_ratio','token_sort_ratio','fuzz_ratio','fuzz_partial_ratio','longest_substr_ratio','freq_qid1','freq_qid2','q1len','q2len','q1_n_words','q2_n_words','word_Common','word_Total','word_share','freq_q1+q2','freq_q1-q2','0_x','1_x','2_x','3_x','4_x','5_x','6_x','7_x','8_x','9_x','10_x','11_x','12_x','13_x','14_x','15_x','16_x','17_x','18_x','19_x','20_x','21_x','22_x','23_x','24_x','25_x','26_x','27_x','28_x','29_x','30_x','31_x','32_x','33_x','34_x','35_x','36_x','37_x','38_x','39_x','40_x','41_x','42_x','43_x','44_x','45_x','46_x','47_x','48_x','49_x','50_x','51_x','52_x','53_x','54_x','55_x','56_x','57_x','58_x','59_x','60_x','61_x','62_x','63_x','64_x','65_x','66_x','67_x','68_x','69_x','70_x','71_x','72_x','73_x','74_x','75_x','76_x','77_x','78_x','79_x','80_x','81_x','82_x','83_x','84_x','85_x','86_x','87_x','88_x','89_x','90_x','91_x','92_x','93_x','94_x','95_x','0_y','1_y','2_y','3_y','4_y','5_y','6_y','7_y','8_y','9_y','10_y','11_y','12_y','13_y','14_y','15_y','16_y','17_y','18_y','19_y','20_y','21_y','22_y','23_y','24_y','25_y','26_y','27_y','28_y','29_y','30_y','31_y','32_y','33_y','34_y','35_y','36_y','37_y','38_y','39_y','40_y','41_y','42_y','43_y','44_y','45_y','46_y','47_y','48_y','49_y','50_y','51_y','52_y','53_y','54_y','55_y','56_y','57_y','58_y','59_y','60_y','61_y','62_y','63_y','64_y','65_y','66_y','67_y','68_y','69_y','70_y','71_y','72_y','73_y','74_y','75_y','76_y','77_y','78_y','79_y','80_y','81_y','82_y','83_y','84_y','85_y','86_y','87_y','88_y','89_y','90_y','91_y','92_y','93_y','94_y','95_y'], chunksize=chunksize, iterator=True, encoding='utf-8', ):
        df.index += index_start
        j+=1
        print('{} rows'.format(j*chunksize))
        df.to_sql('data', disk_engine, if_exists='append')
        index_start = df.index[-1] + 1

这是我得到的o/p

15000 rows
---------------------------------------------------------------------------
OperationalError                          Traceback (most recent call last)
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1192                         parameters,
-> 1193                         context)
   1194         except BaseException as e:

~\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
    506     def do_execute(self, cursor, statement, parameters, context=None):
--> 507         cursor.execute(statement, parameters)
    508 

OperationalError: too many SQL variables

The above exception was the direct cause of the following exception:

OperationalError                          Traceback (most recent call last)
<ipython-input-83-b376654c990a> in <module>()
     14         j+=1
     15         print('{} rows'.format(j*chunksize))
---> 16         df.to_sql('data', disk_engine, if_exists='append')
     17         index_start = df.index[-1] + 1

~\Anaconda3\lib\site-packages\pandas\core\generic.py in to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype)
   2125     ...     df2.to_excel(writer, sheet_name='Sheet_name_2')
   2126 
-> 2127     ExcelWriter can also be used to append to an existing Excel file:
   2128 
   2129     >>> with pd.ExcelWriter('output.xlsx',

~\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype)
    448     index=True,
    449     index_label=None,
--> 450     chunksize=None,
    451     dtype=None,
    452     method=None,

~\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype)
   1147 
   1148     @staticmethod
-> 1149     def _query_iterator(
   1150         result, chunksize, columns, index_col=None, coerce_float=True, parse_dates=None
   1151     ):

~\Anaconda3\lib\site-packages\pandas\io\sql.py in insert(self, chunksize)
    661         ----------
    662         conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection
--> 663         keys : list of str
    664            Column names
    665         data_iter : generator of list

~\Anaconda3\lib\site-packages\pandas\io\sql.py in _execute_insert(self, conn, keys, data_iter)
    636         return str(CreateTable(self.table).compile(self.pd_sql.connectable))
    637 
--> 638     def _execute_create(self):
    639         # Inserting table into database, add to MetaData object
    640         self.table = self.table.tometadata(self.pd_sql.meta)

~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in execute(self, object, *multiparams, **params)
    946             raise exc.ObjectNotExecutableError(object)
    947         else:
--> 948             return meth(self, multiparams, params)
    949 
    950     def _execute_function(self, func, multiparams, params):

~\Anaconda3\lib\site-packages\sqlalchemy\sql\elements.py in _execute_on_connection(self, connection, multiparams, params)
    267     def _execute_on_connection(self, connection, multiparams, params):
    268         if self.supports_execution:
--> 269             return connection._execute_clauseelement(self, multiparams, params)
    270         else:
    271             raise exc.ObjectNotExecutableError(self)

~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_clauseelement(self, elem, multiparams, params)
   1058             compiled_sql,
   1059             distilled_params,
-> 1060             compiled_sql, distilled_params
   1061         )
   1062         if self._has_events or self.engine._has_events:

~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1198                 parameters,
   1199                 cursor,
-> 1200                 context)
   1201 
   1202         if self._has_events or self.engine._has_events:

~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
   1411                 util.raise_from_cause(
   1412                     sqlalchemy_exception,
-> 1413                     exc_info
   1414                 )
   1415             else:

~\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in raise_from_cause(exception, exc_info)
    201     exc_type, exc_value, exc_tb = exc_info
    202     cause = exc_value if exc_value is not exception else None
--> 203     reraise(type(exception), exception, tb=exc_tb, cause=cause)
    204 
    205 if py3k:

~\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in reraise(tp, value, tb, cause)
    184             value.__cause__ = cause
    185         if value.__traceback__ is not tb:
--> 186             raise value.with_traceback(tb)
    187         raise value
    188 

~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1191                         statement,
   1192                         parameters,
-> 1193                         context)
   1194         except BaseException as e:
   1195             self._handle_dbapi_exception(

~\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
    505 
    506     def do_execute(self, cursor, statement, parameters, context=None):
--> 507         cursor.execute(statement, parameters)
    508 
    509     def do_execute_no_params(self, cursor, statement, context=None):

我已经尝试了各种chunksize值,但它不会工作。有人可以建议我解决这个错误。我在jupyter笔记本中运行这段代码。我已经更新了pandas和其他库的版本,所以不存在兼容性问题。

xu3bshqb

xu3bshqb1#

此错误与传递给sqlite3的参数数有关。本质上,在幕后发生的事情是向db引擎发出一个SQL查询:INSERT INTO myTable (col1, col2, col3,..., col_n) VALUES (?, ?, ?,..., ?),其中?是数据框架中传递到数据库的值。
发生此错误是因为您的数据框非常宽(有很多列),因此在插入过程中,许多参数被传递到SQL语句中。实际上,你可以在错误堆栈中看到我的解释:

--> 507         cursor.execute(statement, parameters)

简单地说,SQLite只能处理有限数量的参数。这只是SQLite的一个局限性。您可以向下滚动到此page中的#9以了解有关此的更多信息。
设置chunksize不会解决您的问题。我的建议是使用另一个数据库,如postgres或mysql。

相关问题