scrapy 用后置法求取废品误差

yftpprvb  于 2022-11-09  发布在  其他
关注(0)|答案(1)|浏览(145)

我正在使用后的方法在scrapy,但他们给予我这些错误TypeError: __init__() got an unexpected keyword argument 'data'有任何解决方案basillay我试图从表中刮取数据,这些是我的页面链接https://www.benrishi-navi.com/english/english1_2.php

import scrapy
from scrapy.http import Request
from scrapy.selector import Selector

class TestSpider(scrapy.Spider):
    name = 'test'

    url = "https://www.benrishi-navi.com/english/english1_2.php"

    payload='tuusan_year=&tuusan_month=&tuusan_chk=&methodAndOr1=&methodAndOr2=&methodAndOr3=&text_sen=&text_skill=&text_business=&tokkyo_data=&fuki_day_chk=&shuju=&kensyuu_bunya=&text_kensyuu=&methodAndOr_kensyuu=&keitai_kikan=&keitai_hisu=&display_flag=1&search=2&text=&method=&methodAndOr=&area=&pref=&name=&kana=&id=&year=&month=&day=&day_chk=&exp01=&exp02=&exp03=&trip=&venture_support=&venture_flag=&university_support=&university_flag=&university1=&university2=&university=&college=&high_pref=&junior_pref=&elementary_pref=&tyosaku=&hp=&jukoureki=&experience1=&experience2=&experience3=&experience4=&sort=&fuki_year=&fuki_month=&fuki_day=&fuki_day_chk=&id_chk=&shugyou=&fuki=&address1=&address2=&trip_pref=&expref=&office=&max_count=1437&search_count=10&start_count=1&search_default=10'
    headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8,pt;q=0.7',
    'Cache-Control': 'max-age=0',
    'Connection': 'keep-alive',
    'Content-Type': 'application/x-www-form-urlencoded',
    'Cookie': 'CAKEPHP=u6u40lefkqnm45j49a5i0h6bs3',
    'Origin': 'https://www.benrishi-navi.com',
    'Referer': 'https://www.benrishi-navi.com/english/english1_2.php',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-User': '?1',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
    'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"'
    }

    def start_requests(self):
        yield scrapy.FormRequest(
            url=self.url,
            method='POST',
            data=self.payload,
            headers=self.headers,
            callback=self.parse,
        )

    def parse(self,response):
        t=response.xpath('table')
        print(t)
lmyy7pcs

lmyy7pcs1#

1.您必须使用FormRequest.from_response来代替FormRequest
1.使用formdata作为参数而不是数据
1.使用formdata/payload作为键值对,表示字典
1.避免注入太多不必要的头文件

完整的工作代码示例:

import scrapy
from scrapy import FormRequest
from scrapy.crawler import CrawlerProcess

class TestSpider(scrapy.Spider):
    name = 'tes'
    start_urls = ['https://www.benrishi-navi.com/english/english1_2.php']

    def parse(self, response):
        headers = {
            'Content-Type': 'application/x-www-form-urlencoded'
        }
        formdata = {
            'tuusan_year':'',
            'tuusan_month':'',
            'tuusan_chk':'' ,
            'methodAndOr1':'' ,
            'methodAndOr2':'' ,
            'methodAndOr3':'',
            'text_sen':'' ,
            'text_skill':'' ,
            'text_business':'',
            'tokkyo_data':'' ,
            'fuki_day_chk':'',
            'shuju':'',
            'kensyuu_bunya':'',
            'text_kensyuu':'',
            'methodAndOr_kensyuu':'', 
            'keitai_kikan':'', 
            'keitai_hisu':'', 
            'display_flag':'1',
            'search':'2',
            'text':'', 
            'method':'', 
            'methodAndOr':'', 
            'area':'', 
            'pref':'', 
            'name':'',
            'kana':'', 
            'id':'', 
            'year':'', 
            'month':'', 
            'day':'', 
            'day_chk':'', 
            'exp01':'', 
            'exp02':'', 
            'exp03':'', 
            'trip':'', 
            'venture_support':'', 
            'venture_flag':'', 
            'university_support':'', 
            'university_flag':'', 
            'university1':'', 
            'university2':'', 
            'university':'', 
            'college':'', 
            'high_pref':'', 
            'junior_pref':'',
            'elementary_pref':'', 
            'tyosaku':'', 
            'hp':'', 
            'jukoureki':'', 
            'experience1':'',
            'experience2':'', 
            'experience3':'', 
            'experience4':'', 
            'sort':'', 
            'fuki_year':'',
            'fuki_month':'', 
            'fuki_day':'', 
            'fuki_day_chk':'', 
            'id_chk':'', 
            'shugyou':'', 
            'fuki':'', 
            'address1':'', 
            'address2':'',
            'trip_pref':'', 
            'expref':'', 
            'office':'', 
            'max_count': '1437',
            'search_count': '10',
            'start_count': '1',
            'search_default': '10',
        }
        yield FormRequest.from_response(response,
                                        formdata=formdata,
                                        headers=headers,
                                        callback=self.parse_item)

    def parse_item(self, response):
        pass

if __name__ == "__main__":
    process =CrawlerProcess(TestSpider)
    process.crawl()
    process.start()

相关问题