天天看点

boren - 学习用python爬取脑经急转弯

import requests
import re
for yema in range(1,75):
    print("正在爬取第%d页"%yema)
    wangzhi ="http://www.2345.com/inner/jzw/%d.htm"%yema
    response = requests.get(wangzhi)
    # print(response.text)
    # .可以替代任意一个字符
    # * 指前面这个字符可以出现多次
    r =re.findall('<li><span class="table_left">(.*)</span>',response.text)
    f = open('text','a')
    for i in r:
        try:
            i = i.split('</span><span class="table_right"><a href="javascript:;" target="_blank" rel="external nofollow"  class="answer" οnclick="MM_popupMsg(')
            # print(i[0])
            f.write(i[0])
            f.write('\n')
            # print(i[1])
        except:
            continue  # 继续 跳出当前循环,进行下一次循环
    f.close()

           
# 匹配术:正则表达式
# re
s = """
nbvenkghmd ,fv,kgm x,dtr5foguhijhugyfgvhbjnkljhkgjhvbjjbcgfxvhbjkhvgchvbjknhvgcvhbjkhvghbjnkbhvg njbhvgbjSDTRFGJBHKNLKJHGVBJNKML;NBHVGCVHBJNKLJXFCGVHBJNKL;VFCGVHJKLHUGYFTCGVHBJHKGHCHBJKNJHVGB000000000152154@#$%^&*u(p)(trcgvhbfdxfcgASDADSASDDDD45/AD4./ADA4,4TS5RS.RASF2CZ46R87Z4knhv njkbhvjnkbhvbjn
"""
# f,g R F B @
import re
# # r = re.findall('f[a-z]g',s) # findall 发现所有
# # r = re.findall('r[0-9]f',s) # findall 发现所有
# #r = re.findall('B([0-9]*)@',s) # findall 发现所有
# r =re.findall("fdxfcg(.*)k",s)
# print(r)

# a = '大吉大利?几万吃鸡'
# a = a.split('?')
# print(a[0])

for i in range(10):
    if i == 5:
        continue
    print(i)