Python爬虫:scrapy定时运行的脚本

原理:

1个进程 -> 多个子进程 -> scrapy进程

代码示例

将以下代码文件放入scrapy项目中任意位置即可

# -*- coding: utf-8 -*-
# 4;File    : run_spider.py
# 4;Date    : 2018-08-06
# @Author  : Peng Shiyu
from multiprocessing import Process
from scrapy import cmdline
import time
import logging
# 配置参数即可, 爬虫名称,运行频率
confs = [
    {
34;spider_name�34;: �34;hexun_pdf",
        "frequency": 2,
    },
]
def start_spider(spider_name, frequency):
    args = ["scrapy", "crawl", spider_name]
    while True:
        start = time.time()
        p = Process(target=cmdline.execute, args=(args,))
        p.start()
        p.join()
        logging.debug("### use time: %s" % (time.time() - start))
        time.sleep(frequency)
if __name__ == '__main__':
    for conf in confs:
        process = Process(target=start_spider,
                          args=(conf["spider_name"], conf["frequency"]))
        process.start()
        time.sleep(10)