scrapy不同的运行方式 CrawlerProcess 1 2 3 4 5 6 7 custom_settings = {} project_settings = get_project_settings() settings = dict (project_settings.copy()) settings.update(custom_settings.get('settings' )) process = CrawlerProcess(settings) process.crawl(Example2Spider) process.start()
CrawlerRunner 1 2 3 4 5 6 7 8 9 10 configure_logging() runner = CrawlerRunner() @defer.inlineCallbacks def crawl (): yield runner.crawl(Example2Spider) reactor.stop() crawl() reactor.run()
cmd 1 2 3 4 5 custom_settings = {} project_settings = get_project_settings() settings = dict (project_settings.copy()) settings.update(custom_settings) execute(["scrapy" , "crawl" , "{}" .format (name)], settings)
custom_settings设置 多个spider自动运行 1 2 3 4 5 6 7 8 9 10 11 12 process = CrawlerProcess(settings=get_project_settings()) for module_string in find_modules('demo_project.spiders' ): module = import_string(module_string) class_string = module_string.split('.' )[-1 ].capitalize() + 'Spider' print (f"正在处理的spider:-> {class_string} " ) spider_class = getattr (module, class_string) process.crawl(spider_class) process.start()
多进程运行spider 1 2 3 4 5 6 7 8 subpros = [] s = 'scrapy crawl {} >/dev/null 2>&1' .format (spider_name) for _ in range (pools): subpro = subprocess.Popen(s, shell=True , stdout=None ) subpros.append(subpro) time.sleep(2 ) for por in subpros: por.wait()
具体可以参考:https://github.com/SummerWorm-Bullfrog/ScrapyTemplate