python Scrapy未从网站返回任何内容

jchrr9hc  于 2022-11-09  发布在  Python
关注(0)|答案(1)|浏览(125)

我想爬网网站:https://www.zappos.com/p/lamade-mozza-halter-pullover-black/product/9796103/color/3用户复查部分,但返回空结果,说明选择器的路径正确。

import scrapy

from scrapy import Selector,Request

class LaptopSpider(scrapy.Spider):
    name = 'cs'

    def start_requests(self):
        url =' https://www.zappos.com/p/lamade-mozza-halter-pullover-black/product/9796103/color/3'
        yield Request(url,callback=self.parse)

    def parse(self, response):
        products_selector = response.css('#productRecap > div.p--z > div:nth-child(3) > div > div > div > div > div.Oi-z > div::text').get()
        print(products_selector)
lb3vh1jj

lb3vh1jj1#

请尝试以下方法从帖子中的链接获取评论:

import scrapy

class ZapposSpider(scrapy.Spider):
    name = 'zappos'

    link = 'https://www.zappos.com/p/lamade-mozza-halter-pullover-black/product/9796103/color/3'
    base_url = 'https://api.prod.cassiopeia.ugc.zappos.com/display/v2/reviews'

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'
    }

    params = {
        'offset': '0',
        'page': '1',
        'productId': '',
        'sort': 'upVotes:desc,overallRating:desc,reviewDate:desc'
    }

    def start_requests(self):
        product_id = self.link.split("product/")[1].split("/")[0]
        self.params['productId'] = product_id

        yield scrapy.FormRequest(
            url=self.base_url,
            headers=self.headers,
            callback= self.parse,
            method="GET",
            formdata=self.params,
        )

    def parse(self, response):
        for item in response.json()['reviews']:
            reviewer = item['name']
            review = item['summary']
            yield {"reviewer":reviewer,"review":review}

相关问题