#coding:utf8
import re
import requests
import urllib
from lxml import etree
city = urllib.quote("北京") #把utf8編碼的\x改為适合網址的%
kw = "python"
url = """http://sou.zhaopin.com/jobs/searchresult.ashx?\
jl=%s&kw=%s&sm=0&sg=27ce606676a743128f9fbb1fa5dd09e7&p=1"""%(city,kw)
ret = requests.get(url)
reg = '<td class="Jobname">.*?href="(.*?)" target="_blank" rel="external nofollow" .*?</span>.*?</td>'
urlAll = re.findall(reg,ret.content,re.S)
for url1 in urlAll:
ret1 = requests.get(url1)
cont1 = ret1.content
title = re.findall('<td colspan="2">.*?<h1>(.*?)</h1>.*?</td>',cont1,re.S)
cmName = re.findall('<td colspan="2">.*?<h2>.*?<a target="_blank" href=".*?>(.*?)</a></h2>.*?</td>',cont1,re.S)
money = re.findall('<tr>.*?職位月薪:</td>.*?<td valign=" target="_blank" rel="external nofollow" top">(.*?)</td>.*?</tr>',cont1,re.S)
position = re.findall('td class=.*?>工作地點:</td>.*?<td.*?<a.*?>(.*?)</a>',cont1,re.S)
print title[0],"+",cmName[0],"+月薪:",money[0],"+工作地點:",position[0]