我试着搜索到到网的景点,但是我找不到每个景点的名字和地址。我怀疑我把 * product.css(...)* 写错了(有json吗?)
有谁能告诉我如何更正代码,以获得每个景点的名称和地址?
我的当前代码:
import scrapy
class QuotesSpider(scrapy.Spider):
name = "quotes"
start_urls = [
'https://www.tripadvisor.com/Attractions-g187427-Activities-oa90-Spain'
]
def parse(self, response):
for link in response.css('.EsZYd a::attr(href)'):
yield response.follow(link.get(), callback=self.parse_categories)
def parse_categories(self, response):
products = response.css('div.eeqnt')
for product in products:
yield {
'name' : product.css('h1.WlYyy cPsXC GeSzT::text').get().strip(),
'address' : product.css('span.WlYyy cacGK Wb::text').get().strip(),
}
更新代码(从列表中每页上的每个附件导出信息):
import scrapy
class QuotesSpider(scrapy.Spider):
name = "quotes"
start_urls = [
'https://www.tripadvisor.com/Attractions-g274862-Activities-a_allAttractions.true-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa30-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa60-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa90-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa120-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa150-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa180-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa210-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa240-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa270-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa300-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa330-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa360-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa390-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa420-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa450-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa480-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa510-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa540-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa570-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa600-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa630-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa660-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa690-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa720-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa750-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa780-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa810-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa840-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa870-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa900-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa930-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa960-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa990-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1020-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1050-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1080-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1110-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1140-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1170-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1200-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1230-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1260-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1290-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1320-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1350-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1380-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1410-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1440-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1470-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1500-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1530-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1560-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1590-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1620-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1650-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1680-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1710-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1740-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1770-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1800-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1830-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1860-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1890-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1920-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1950-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa1980-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2010-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2040-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2070-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2100-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2130-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2160-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2190-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2220-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2250-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2280-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2310-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2340-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2370-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2400-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2430-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2460-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2490-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2520-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2550-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2580-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2610-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2640-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2670-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2700-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2730-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2760-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2790-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2820-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2850-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2880-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2910-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2940-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa2970-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa3000-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa3030-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa3060-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa3090-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa3120-Slovenia.html',
'https://www.tripadvisor.com/Attractions-g274862-Activities-oa3150-Slovenia.html'
]
def parse(self, response):
for link in response.css('.EsZYd a::attr(href)').getall():
yield response.follow(link, callback=self.parse_categories)
def parse_categories(self, response):
yield {
'name': response.css('h1.WlYyy.cPsXC.GeSzT::text').get(),
'reviews': response.xpath('(//*[@class="cfIVb"])[1]//text()').getall(),
'address': response.xpath('(//*[@class="dGWve"])//text()').getall(),
'url': response.url,
}
2条答案
按热度按时间f4t66c6m1#
它实际上与python无关,而是与css选择器有关。
CSS类应该用点分隔,而不是空格
WlYyy.cPsXC.GeSzT
。最好的建议是在dev-toolbar中使用chrome。它可以给予你通过css-selector或xpath获得特定元素的路径,只需在DOM树中右键单击该元素,然后选择copy menu-item。
避免使用类(尤其是没有语义的类)作为锚点。它们可能会随页面或时间的变化而变化。
最好使用语义上有意义的节点,就像你的例子:标题的XPath看起来像
//main//header//div[@data-automation="main_h1"]//h1
。yptwkmov2#
你不能在每个刊登页面使用
for loop