我试着做一个网页刮刀从堆栈溢出问题,但第三列不下载的数据,你能帮我吗?
from scrapy.item import Field
from scrapy.item import Item
from scrapy.spiders import Spider
from scrapy.selector import Selector
from scrapy.loader import ItemLoader
class Question(Item):
a_id = Field()
b_question = Field()
c_desc = Field()
class StackOverflowSpider(Spider):
name = "MyFirstSpider"
custom_settings = {
'USER-AGENT': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36"
}
start_urls = ['https://stackoverflow.com/questions']
def parse(self, response):
sel = Selector(response)
questions = sel.xpath('//div[@id="questions"]//div[@class="s-post-summary--content"]')
i = 1
for quest in questions:
item = ItemLoader(Question(), quest)
item.add_xpath('b_question', './/h3/a/text()')
item.add_xpath('c_desc', './/div[@class="s-post-summary--content-excerpt"]/text()')
item.add_value('a_id', i)
i = i+1
yield item.load_item()
picture from csv file output
picture from website and the html code
1条答案
按热度按时间oyxsuwqo1#
试试看:我添加了一些内嵌注解来解释这些更改