天天看点

检查网址是否存在

# -*- coding: utf-8 -*-

from urlparse import urlsplit
from httplib import HTTPConnection


class HttpChecker:
    """ 检测网址是否存在 """

    def __init__(self, domain):
        if "//" in domain: #网址,不止是域名
            self.netloc = urlsplit(domain).netloc
        else:
            self.netloc = domain

    def __enter__(self):
        self.connection = HTTPConnection(self.netloc)
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        del self.connection

    def check(self, url, splited=False):
        status = 0
        if splited: #已经是网址中域名以后部分,必须以/开头
            path = url
        else:
            netloc, path = urlsplit(url)[1:3]
            if netloc and netloc != self.netloc:
                self.netloc = netloc
                self.connection = HTTPConnection(self.netloc)
        self.connection.connect()
        self.connection.request("HEAD", path)
        status = self.connection.getresponse().status
        self.connection.close()
        return status == 200



if __name__ == "__main__":
    with HttpChecker("www.google.com.hk") as hc:
        print hc.check("http://www.google.com.hk/intl/zh-CN/options/")
      

继续阅读