天天看點

6.Scrapy中間件之自定義代理

Scrapy中間件之自定義代理

1.middlewares.py

import random,base64,six
def to_bytes(text, encoding=None, errors='strict'):
    if isinstance(text, bytes):
        return text
    if not isinstance(text, six.string_types):
        raise TypeError('to_bytes must receive a unicode, str or bytes '
                        'object, got %s' % type(text).__name__)
    if encoding is None:
        encoding = 'utf-8'
    return text.encode(encoding, errors)

#自定義請求代理設定
class ProxyMiddleware(object):
    def process_request(self, request, spider):
        PROXIES = [
            {'ip_port': '111.11.228.75:80', 'user_pass': ''},
            {'ip_port': '120.198.243.22:80', 'user_pass': ''},
            {'ip_port': '111.8.60.9:8123', 'user_pass': ''},
            {'ip_port': '101.71.27.120:80', 'user_pass': ''},
            {'ip_port': '122.96.59.104:80', 'user_pass': ''},
            {'ip_port': '122.224.249.122:8088', 'user_pass': ''},
        ]
        proxy = random.choice(PROXIES)
        if proxy['user_pass'] is not None:
            request.meta['proxy'] = to_bytes("http://%s" %proxy['ip_port'])
            encoded_user_pass = base64.encodestring(to_bytes(proxy['user_pass']))
            request.headers['Proxy-Authorization'] = to_bytes('Basic ' + encoded_user_pass)
            print
            "**************ProxyMiddleware have pass************" + proxy['ip_port']
        else:
            print
            "**************ProxyMiddleware no pass************" + proxy['ip_port']
            request.meta['proxy'] = to_bytes("http://%s" % proxy['ip_port'])
           

2.setting.py

DOWNLOADER_MIDDLEWARES = {
   'xxx.middlewares.ProxyMiddleware': 500,
}