Scrapy crawlspider如何迭代页面

0kjbasz6  于 2022-11-09  发布在  其他
关注(0)|答案(1)|浏览(113)
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule

class LaptopCrawlSpider(CrawlSpider):
    name = 'laptop_crawl'
    start_urls = ["https://www.trendyol.com/sr?q=laptop&qt=laptop&st=laptop&os=8&pi=2"]

    laptop_detail_link_rules  = LinkExtractor(restrict_xpaths='//*[@class="p-card-wrppr"]//a[@href]')
    laptop_detail = Rule(laptop_detail_link_rules,
                         callback='parse_item',
                         follow=False)
    rules = (
        laptop_detail,
    )

    def parse_item(self, response):
        all_data_pc = response.xpath('/html/body')

        for info in all_data_pc:
            DeğerlendirmeSayısı = info.xpath("//*[@class='pr-in-ratings']//a/text()").get().split(" ")[0]
            Fiyat = info.xpath("//*[@class='product-price-container']//span/text()").get()
            Marka      = str(info.xpath('//*[@class="detail-name"][1]/text()').get())
            l1 = str(info.xpath('//*[@class="detail-border"]/ul/li[1]/span[1]/text()').getall())
            l1ö = info.xpath('//*[@class="detail-border"]/ul/li[1]//span/b/text()').getall()
            l2 = str(info.xpath('//*[@class="detail-border"]/ul/li[2]/span[1]/text()').getall())
            l2ö = info.xpath('//*[@class="detail-border"]/ul/li[2]//span/b/text()').getall()
            l3 = str(info.xpath('//*[@class="detail-border"]/ul/li[2]/span[1]/text()').getall())
            l3ö = info.xpath('//*[@class="detail-border"]/ul/li[2]//span/b/text()').getall()
            l4 = str(info.xpath('//*[@class="detail-border"]/ul/li[4]/span[1]/text()').getall())
            l4ö = info.xpath('//*[@class="detail-border"]/ul/li[4]//span/b/text()').getall()
            l5 = str(info.xpath('//*[@class="detail-border"]/ul/li[5]/span[1]/text()').getall())
            l5ö = info.xpath('//*[@class="detail-border"]/ul/li[5]//span/b/text()').getall()
            l6 = str(info.xpath('//*[@class="detail-border"]/ul/li[6]/span[1]/text()').getall())
            l6ö = info.xpath('//*[@class="detail-border"]/ul/li[6]//span/b/text()').getall()
            l7 = str(info.xpath('//*[@class="detail-border"]/ul/li[7]/span[1]/text()').getall())
            l7ö = info.xpath('//*[@class="detail-border"]/ul/li[7]//span/b/text()').getall()
            l8 = str(info.xpath('//*[@class="detail-border"]/ul/li[8]/span[1]/text()').getall())
            l8ö = info.xpath('//*[@class="detail-border"]/ul/li[8]//span/b/text()').getall()
            l9 = str(info.xpath('//*[@class="detail-border"]/ul/li[9]/span[1]/text()').getall())
            l9ö = info.xpath('//*[@class="detail-border"]/ul/li[9]//span/b/text()').getall()

            yield {

                "Marka": Marka,
                "Fiyat" : Fiyat,
                "Değerlendirme Sayısı": DeğerlendirmeSayısı,
                l1: l1ö,
                l2: l2ö,
                l3: l3ö,
                l4: l4ö,
                l5: l5ö,
                l6: l6ö,
                l7: l7ö,
                l8: l8ö,
                l9: l9ö,

                }

"""import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

df = pd.read_csv('')
df.head(25)"""

如何才能可以得到以下页面如:

https://www.trendyol.com/sr?q=laptop&qt=laptop&st=laptop&os=2
https://www.trendyol.com/sr?q=laptop&qt=laptop&st=laptop&os=3
https://www.trendyol.com/sr?q=laptop&qt=laptop&st=laptop&os=4

我试了网上的所有方法,但都不管用。
谢谢你们的回答。

p3rjfoxz

p3rjfoxz1#

l = "https://www.trendyol.com/sr?q=laptop&qt=laptop&st=laptop&os=8&pi="
start_urls = [l+str(2)]
i = 2
while i < 6:
    i += 1
    start_urls.append((l + str(i)))
def justreturn(self):
    return self.start_urls

我知道这不是解决这个问题最有效的方法,但仍然...它在起作用:)

相关问题