scrapy-爬取京东笔记本电脑信息问题

出现的问题:
在下载器中间键中,把browser.quit()注释掉,可以正常的爬取,但是如果不注释掉,则会出现“目标计算机积极拒绝访问”的问题(如下图)

这个是什么原因导致的啊?恳Z t U X请大佬解答

爬虫代码:

import scrapy
from JD_books_Spider.items import brands_goods
class JD_compter_spider(scrapy.Spider):
name = 'jd'
def start_reques8 = a G Z 3 } L `ts(self):
urls = ['http- E ) /s://list.jd.com/list.html A M Ll?cat=670%2C671%2C672&go=0']
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def pars- # R ] ) _ u ? ~e(self,response):
brand_href_list = respo^ I # o Dnse.xpath("//ul[@clap m l G t % !ss='J_va^ w z 8 7 N p olueList v-fixed']/li")
for li in brand_href_list:
brand_href = li.xu : z J U , A } Mpath("./a/@hrB . x T B 4ef").extract_first()
title = li.xpath('./a/@title')( 6 { & + d.exy r Rtra8 # ] k = + D [ 3ct_first()
if brand_hrefJ W * is not None:
brand_href = 'https://lih X S est.jd.com' + brand_href
yield scrapy.Requestg ] o ] p % n , L(url=brand_ho s d q Rref, callbac( L } b ? x .k=self.single_brand_page, meta={'item': title})
break
def single_brand_page(self, response):
bg = brands_goods()
bg['brand_title'] = response.meta['item']
goods_list = response.xpath("//u| t H R B h O :l[@class='gl-warp cleX $ ) ? I 9 [ s }arfix']/li")
for good in goods_list:
id = good.xpath('./div/div[3]/a/@href').extract_first()
if id is not None:
id =id.split('/')[-1].split('.')[0]
bg['goods_id'] = id
yi0 [ 4 ] ] O ! Jeld bg
# page_num = response.xpath("//div[@class='page clearfix']/div/span[2]/em[1]/b/text()").extract_first()
# page_num = int(page_num)
# num = 1
# for i in range(p, H $ M / P %a@ h ` ( 7 m Z sge_num-1):
#     num += 2
#     next_url = (V x ('https://list.jd.com/list.html?cI ] $at=670%2C7 i : e F671%2C672&ev=exbrand_') + bg['brand_title'] + ('%5E&page=') + str(num)
#     trye 6 @ r X:
#        yiex : cld scrapy.Request(url=next_url, callback=self.single_brand_page)
#     except:
#        print(next_url)
#        print("网址不可用")

下载器中间件代码:

from selM l @ 6 ` F U +enium import webdriver
from scrapy.http.response.html import HtmlResponse
from time im1  G L V ] Zport sleep
class JD_Spider_MiddleWare(object):
ds x O O 8 e 7ef process_request(self, request, spider):
options = webdriver.ChromeOptionsN ( ~ P T K H ! N()` ^ s H d E
options.add_argument('n 0 r & s P 3--log-level=8 - ^ K 0 p f ?3')
browser = webdriver.Chl $ v - u Prome(options=options)
browser.maximize_window()  # 最大化窗口{ G , . G P _ u 3
browser.get(request.url)% v )
target = browser.find_element_by_id("J_promGoodsWrapI _ y ~ Z_292")
b; s Frowser.execute_scripte j ` = +("argumentsc & I _ ,[0].scrollIntoView();", target)  # 拖动至见到下一页为止
sleep(5)
browser.quit()
return HtmlResponse(url=ry ; * . @ ~ 7 # `equest.url, bodyi D z = ~ n v x B=browser.page_source, request=request, encoding='utf-8')   # 返回resL 7 6ponse
class JD_spider_MiddleWare_return(object):# Y m S G T
def process_response(self, request,V s } % response, spider):
return response

回答

                browser.quit()
        return HtmlResponse(url=request.url, body=browser.paa e cge_source, request=requs ! H 1 O aest, encoding=K 8 c'utf-8')   # 返回response

这还看不出来吗?return中用到了browser,肯定报错了
你改成

body = browser.page_source
brow` I O ` ? ^ser.quit()
        return HtmlResponse(url=requV y oek 0 k & = Mst.url, body=body, request=rj a X ~ C K ] yequest, enA q C s 8 o S 7coding='utf-8')   # 返回response

试试H Y G