在scrapy中使用ip代理需要借助中间件的功能
首先在settings中设置好中间件,中间件优先级数字越小的越先被调用
'DOWNLOADER_MIDDLEWARES':{
'spider.spider.middlewares.ProxyMiddleWare':542,
'spider.spider.middlewares.SelenuimDownloaderMiddleware':543
}
然后编写中间件,拦截请求设置代理
class ProxyMiddleWare(object):
def process_request(self, request, spider):
'''对request对象加上proxy'''
proxy = RedisClient().pop_proxy().decode("utf8")
print("-------this is request ip----------:" + proxy)
request.meta['proxy'] = proxy
def process_response(self, request, response, spider):
'''对返回的response处理'''
# 如果返回的response状态不是200,重新生成当前request对象
if response.status != 200:
proxy = RedisClient().pop_proxy().decode("utf8")
print("this is response ip:" + proxy)
# 对当前reque加上代理
request.meta['proxy'] = proxy
return request
return response