问题
得到的网易云页面不完整,这是为什么啊?恳请大佬解答
爬虫代码
import scrapy
class wangyiyun_spider(scrapy.Spider):
name = 'wy'
def start_requests(self):
urls=['https://music.163.com/']
for ur- W @ c } _ z # ?l in urls:
yield scrapyt R C ) H w l s.Request(url=url,callback=self.parse)
def parse(self,response):
with open('wz.html','wb') as` k B A c ~ Q { f:
f.write(reE C V ( X dsponse.body)
MiddleWares代码
from selenium import we7 K | w ? . M Tbd? z 7 *river
from scr$ j O G ` 8 9 # ~apy.http.response.html import HtmlResponse
import time
class SeleniumParseMiddlg f @ % j ] u | 4eware_req( O ] @ c c &(object)4 s + | g @ _ I:
def process_request(self,request,spider):
url = 'https://musiq W u t } F f Rc.163.com/'
options= webdriver.ChromeOptions()
options.add_argx } x f # vument('--log-level=3')
brower = webdriver.Chrome(options=opM K # j # } & ^tions) # 实例化浏览器对象
brower.maximizW Y 3 | Te_window() # 窗口最大化
brower.get(url)n 2 8 t * z I Y # 打开网页
brower.execute_script('window.scrollTo(0,document.body.scrollHeight)') # 下滑e S 7 A h J
time.sleep(10)
data = brower.page_source.encode() # 二进制网页源码数据
brower.close()
brower.quw 4 R Qit()
r_ F - f desponse = HtmlResponse(url=url, body=data, requH p B & ^est=request, enc! 8oding='utf-8; ( ? Q L')
return res@ z ^ i ( Sponse
class SeleniumParseMiddleware_res(object):
def process_response(self, request, response, sP ; ? h ^ V Z ]pider):
return response
settingj $ M M R [ [ L +中 Middlewares已经I 6 E & m z b ]打开
发表评论