天天看點

用Python從智聯招聘自動爬取招聘資訊

#coding:utf8

import re

import requests

import urllib

from lxml import etree

city = urllib.quote("北京")  #把utf8編碼的\x改為适合網址的%

kw = "python"

url = """http://sou.zhaopin.com/jobs/searchresult.ashx?\

jl=%s&kw=%s&sm=0&sg=27ce606676a743128f9fbb1fa5dd09e7&p=1"""%(city,kw)

ret = requests.get(url)

reg = '<td class="Jobname">.*?href="(.*?)" target="_blank" rel="external nofollow" .*?</span>.*?</td>'

urlAll = re.findall(reg,ret.content,re.S)

for url1 in urlAll:

        ret1 = requests.get(url1)

        cont1 = ret1.content

        title = re.findall('<td colspan="2">.*?<h1>(.*?)</h1>.*?</td>',cont1,re.S)

        cmName = re.findall('<td colspan="2">.*?<h2>.*?<a target="_blank" href=".*?>(.*?)</a></h2>.*?</td>',cont1,re.S)

        money = re.findall('<tr>.*?職位月薪:</td>.*?<td valign=" target="_blank" rel="external nofollow" top">(.*?)</td>.*?</tr>',cont1,re.S)

        position = re.findall('td class=.*?>工作地點:</td>.*?<td.*?<a.*?>(.*?)</a>',cont1,re.S)

        print title[0],"+",cmName[0],"+月薪:",money[0],"+工作地點:",position[0]