我有下面的文件和代码
import logging
from scrapy import signals
from scrapy.exceptions import NotConfigured
logger = logging.getLogger(__name__)
class SpiderOpenCloseLogging:
def __init__(self, item_count):
self.item_count = item_count
self.items_scraped = 0
@classmethod
def from_crawler(cls, crawler):
print('Hey I am called')
# first check if the extension should be enabled and raise
# NotConfigured otherwise
# if not crawler.settings.getbool('MYEXT_ENABLED'):
# raise NotConfigured
# get the number of items from settings
item_count = 1000 #crawler.settings.getint('MYEXT_ITEMCOUNT', 1000)
# instantiate the extension object
ext = cls(crawler.settings,crawler.stats)
# connect the extension object to signals
crawler.signals.connect(ext.spider_opened, signal=signals.spider_opened)
crawler.signals.connect(ext.spider_closed, signal=signals.spider_closed)
crawler.signals.connect(ext.item_scraped, signal=signals.item_scraped)
# return the extension object
return ext
def spider_opened(self, spider):
logger.info("opened spider %s", spider.name)
def spider_closed(self, spider):
logger.info("closed spider %s", spider.name)
def item_scraped(self, item, spider):
self.items_scraped += 1
if self.items_scraped % self.item_count == 0:
logger.info("scraped %d items", self.items_scraped)
我已经更改了设置
MYEXT_ENABLED = True
EXTENSIONS = {
'project.custom_extension.SpiderOpenCloseLogging': 300
}
但是没有信号被调用,我已经检查了设置中给出的路径,蜘蛛被调用
未记录我提供的打印事件
有人能告诉我我错过了什么吗
谢谢
1条答案
按热度按时间cl25kdpy1#
所有的信号都是从我对你的脚本的改编中调用的。你犯了一些错误,对我来说没有任何意义,因为你没有具体说明任何东西。这就是为什么你没有得到信号,而是得到错误:
几个错误:
1.
i.通过更新,我们进行了以下更正:
二.
三.
以下是一个完整的例子:
输出量: