python Scrapy亚马逊未能返回所有评论

hpcdzsge  于 2022-11-23  发布在  Python
关注(0)|答案(1)|浏览(134)

我想从亚马逊刮亚马逊评论,退货结果总是没有,但是,有产品评论可以正确退货。有什么问题吗?

import scrapy
from scrapy import Selector, Request
from test1.items import Test1Item

class hiSpider(scrapy.Spider):

    name = 'hello'

    def start_requests(self):
        urls =  ['https://www.amazon.com/s?k=t-shirts+for+men&page=2&crid=2GLXHBOKVG093&qid=1668745933&sprefix=t-shirts+for+men%2Caps%2C280&ref=sr_pg_1']
       
    def parse(self, response):
        sel = Selector(response)
        items = sel.css('span[data-component-type="s-search-results"]>div>div[data-component-type="s-search-result"]')
        for item in items:
            help = Test1Item()
            detail_url = item.css('a[class="a-link-normal s-no-outline"]::attr(href)').get()
            url = response.urljoin(detail_url)
            yield Request(url=url,
                          callback=self.parse_detail,
                          cb_kwargs={'item': help}

                          )

    def parse_detail(self,response,**kwargs):
        help = kwargs['item']
        sel = Selector(response)
        comment_url = response.request.url
        product_id = comment_url.split("dp/")[1].split("/")[0]
        print(product_id)
        nexturl = f'https://www.amazon.com/product-reviews/{product_id}/ref=cm_cr_arp_d_viewopt_srt?sortBy=recent&pageNumber=1'
        yield Request(url=nexturl,
                      callback=self.parse_detail1,
                      cb_kwargs={'item': help}
                      )

    def parse_detail1(self, response, **kwargs):
        help = kwargs['item']
        sel = Selector(response)
        help["name"] = sel.css('a[data-hook="review-title"]>span::text').get()
        yield help

我想把所有的评论都退回去,怎么办

enyaitl3

enyaitl31#

测试此代码。

import scrapy
from scrapy import Request
from test1.items import Test1Item

class hiSpider(scrapy.Spider):

    name = 'hello'

    # if you use ( statr_urls ) you don't need start_request() function 

    def start_requests(self):
        url =  'https://www.amazon.com/s?k=t-shirts+for+men&page=2&crid=2GLXHBOKVG093&qid=1668745933&sprefix=t-shirts+for+men%2Caps%2C280&ref=sr_pg_1'
        yield scrapy.Request(
            url=url,
            callback=self.parse
        )
       
    def parse(self, response):
        # you dont need Selector here because response is a selector object 

        items = response.css('your css selector')
        help = Test1Item()

        for item in items:
            detail_url = item.css('your css selector').get()
            url = response.urljoin(detail_url)
            yield Request(url=url,
                          callback=self.parse_detail,
                          cb_kwargs={'item': help}

                          )

    def parse_detail(self,response,**kwargs):
        help = response.cb_kwargs['item'] # response.cb_kwargs['item'] not kwargs['item']

        comment_url = response.request.url
        product_id = comment_url.split("dp/")[1].split("/")[0]

        print(product_id)

        nexturl = f'https://www.amazon.com/product-reviews/{product_id}/ref=cm_cr_arp_d_viewopt_srt?sortBy=recent&pageNumber=1'

        yield Request(url=nexturl,
                      callback=self.parse_detail1,
                      cb_kwargs={'item': help}
                      )

    def parse_detail1(self, response, **kwargs):
        help = response.cb_kwargs['item']
        help["name"] = response.css('your css selector').get()
        yield help

相关问题