天天看點

python mongodb查詢速度優化_python實戰計劃-對mongodb進行查詢

目的:用這個網址http://www.jianshu.com/p/ef1028a4668e介紹的方法所爬取的内容放入mongodb中,并查詢價格超過500的資訊。

from bs4 import BeautifulSoupimport requestsimport pymongoclient = pymongo.MongoClient('localhost',27017)xiaozu = client['xiaozu']xinxi = xiaozu['xinxi']def sex_judge(sex):    for isex in sex:        if isex == 'member_ico':            return 'male'        elif isex == 'member_ico1':            return 'female'        else:            return 'None'end_page = input('end_page:')for i in range(1, int(end_page)):    base_url = 'http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(i)    #print(base_url)    html1 = requests.get(base_url)    bsObj1 = BeautifulSoup(html1.text, 'lxml')    detail_url = bsObj1.find_all('a', {'class':'resule_img_a'})    for i in detail_url:        html2 = requests.get(i.get('href'))        bsObj2 = BeautifulSoup(html2.text, 'lxml')        image = bsObj2.select('#curBigImage')[0].get('src')        landlord_image = bsObj2.select('#floatRightBox > div.js_box.clearfix > div.member_pic > a > img')[0].get('src')        title = bsObj2.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em')[0].get_text()        address = list(bsObj2.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span')[0].stripped_strings)        price = bsObj2.select('#pricePart > div.day_l > span')[0].get_text()        name = bsObj2.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a')[0].get_text()        sexs = sex_judge(bsObj2.select('#floatRightBox > div.js_box.clearfix > div.member_pic > div')[0].get('class'))        data = {            'image':image,            'landlord_image':landlord_image,            'title':title,            'address':address,            'price':int(price),            'name':name,            'sexs':sexs        }        xinxi.insert_one(data)for item in xinxi.find({'price':{'$gt':500}}):    print(item)