import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
class LaptopCrawlSpider(CrawlSpider):
name = 'laptop_crawl'
start_urls = ["https://www.trendyol.com/sr?q=laptop&qt=laptop&st=laptop&os=8&pi=2"]
laptop_detail_link_rules = LinkExtractor(restrict_xpaths='//*[@class="p-card-wrppr"]//a[@href]')
laptop_detail = Rule(laptop_detail_link_rules,
callback='parse_item',
follow=False)
rules = (
laptop_detail,
)
def parse_item(self, response):
all_data_pc = response.xpath('/html/body')
for info in all_data_pc:
DeğerlendirmeSayısı = info.xpath("//*[@class='pr-in-ratings']//a/text()").get().split(" ")[0]
Fiyat = info.xpath("//*[@class='product-price-container']//span/text()").get()
Marka = str(info.xpath('//*[@class="detail-name"][1]/text()').get())
l1 = str(info.xpath('//*[@class="detail-border"]/ul/li[1]/span[1]/text()').getall())
l1ö = info.xpath('//*[@class="detail-border"]/ul/li[1]//span/b/text()').getall()
l2 = str(info.xpath('//*[@class="detail-border"]/ul/li[2]/span[1]/text()').getall())
l2ö = info.xpath('//*[@class="detail-border"]/ul/li[2]//span/b/text()').getall()
l3 = str(info.xpath('//*[@class="detail-border"]/ul/li[2]/span[1]/text()').getall())
l3ö = info.xpath('//*[@class="detail-border"]/ul/li[2]//span/b/text()').getall()
l4 = str(info.xpath('//*[@class="detail-border"]/ul/li[4]/span[1]/text()').getall())
l4ö = info.xpath('//*[@class="detail-border"]/ul/li[4]//span/b/text()').getall()
l5 = str(info.xpath('//*[@class="detail-border"]/ul/li[5]/span[1]/text()').getall())
l5ö = info.xpath('//*[@class="detail-border"]/ul/li[5]//span/b/text()').getall()
l6 = str(info.xpath('//*[@class="detail-border"]/ul/li[6]/span[1]/text()').getall())
l6ö = info.xpath('//*[@class="detail-border"]/ul/li[6]//span/b/text()').getall()
l7 = str(info.xpath('//*[@class="detail-border"]/ul/li[7]/span[1]/text()').getall())
l7ö = info.xpath('//*[@class="detail-border"]/ul/li[7]//span/b/text()').getall()
l8 = str(info.xpath('//*[@class="detail-border"]/ul/li[8]/span[1]/text()').getall())
l8ö = info.xpath('//*[@class="detail-border"]/ul/li[8]//span/b/text()').getall()
l9 = str(info.xpath('//*[@class="detail-border"]/ul/li[9]/span[1]/text()').getall())
l9ö = info.xpath('//*[@class="detail-border"]/ul/li[9]//span/b/text()').getall()
yield {
"Marka": Marka,
"Fiyat" : Fiyat,
"Değerlendirme Sayısı": DeğerlendirmeSayısı,
l1: l1ö,
l2: l2ö,
l3: l3ö,
l4: l4ö,
l5: l5ö,
l6: l6ö,
l7: l7ö,
l8: l8ö,
l9: l9ö,
}
"""import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
df = pd.read_csv('')
df.head(25)"""
如何才能可以得到以下页面如:
https://www.trendyol.com/sr?q=laptop&qt=laptop&st=laptop&os=2
https://www.trendyol.com/sr?q=laptop&qt=laptop&st=laptop&os=3
https://www.trendyol.com/sr?q=laptop&qt=laptop&st=laptop&os=4
我试了网上的所有方法,但都不管用。
谢谢你们的回答。
1条答案
按热度按时间p3rjfoxz1#
我知道这不是解决这个问题最有效的方法,但仍然...它在起作用:)