代碼如下
import re
import requests
import json
#from multiprocessing import Pool
# 多程序
#url = 'https://maoyan.com/board/4?offset=0'
#r = requests.get(url)
#r.encoding = r.apparent_encoding
#print(r.status_code)
#print(r.request.headers)
# 擷取單頁
def get_one_page(url):
try:
kv = {'user-agent':'Mozilla/5.0'}
r = requests.get(url,headers=kv)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
print("爬取失敗")
# 提取需要的資訊
def parse_one_page(html):
pattern = re.compile('<dd>.*?board-index.*?>(\d+)</i>.*?name"><a.*?>(.*?)</a>.*?star">(.*?)</p>.*?releasetime">(.*?)</p>.*?integer">(.*?)</i>.*?fraction">(.*?)</i>.*?</dd>', re.S)
items = re.findall(pattern, html)
for item in items:
yield {
'index':item[0],
'title':item[1],
'actor':item[2].strip()[3:],
'time':item[3].strip()[5:],
'score':item[4]+item[5]
}
# 将資訊寫入檔案
def write_to_file(content):
with open('result.txt', 'a', encoding='utf-8') as f:
f.write(json.dumps(content, ensure_ascii=False) + '\n')
f.close()
# 主函數
def main(offset):
url = 'https://maoyan.com/board/4?' + 'offset=' + str(offset)
html = get_one_page(url)
for item in parse_one_page(html):
print(item)
write_to_file(item)
if __name__ == '__main__':
for i in range(10):
main(i*10)
#pool = Pool()
#pool.map(main, [i*10 for i in range(10)])
#多線程 輸出結果會亂順序
運作結果如下:
{“index”: “1”, “title”: “霸王别姬”, “actor”: “張國榮,張豐毅,鞏俐”, “time”: “1993-01-01”, “score”: “9.6”}
{“index”: “2”, “title”: “肖申克的救贖”, “actor”: “蒂姆·羅賓斯,摩根·弗裡曼,鮑勃·岡頓”, “time”: “1994-10-14(美國)”, “score”: “9.5”}
{“index”: “3”, “title”: “羅馬假日”, “actor”: “格利高裡·派克,奧黛麗·赫本,埃迪·艾伯特”, “time”: “1953-09-02(美國)”, “score”: “9.1”}
{“index”: “4”, “title”: “這個殺手不太冷”, “actor”: “讓·雷諾,加裡·奧德曼,娜塔莉·波特曼”, “time”: “1994-09-14(法國)”, “score”: “9.5”}
{“index”: “5”, “title”: “教父”, “actor”: “馬龍·白蘭度,阿爾·帕西諾,詹姆斯·肯恩”, “time”: “1972-03-24(美國)”, “score”: “9.3”}
{“index”: “6”, “title”: “泰坦尼克号”, “actor”: “萊昂納多·迪卡普裡奧,凱特·溫絲萊特,比利·贊恩”, “time”: “1998-04-03”, “score”: “9.5”}
{“index”: “7”, “title”: “唐伯虎點秋香”, “actor”: “周星馳,鞏俐,鄭佩佩”, “time”: “1993-07-01(中國香港)”, “score”: “9.2”}
{“index”: “8”, “title”: “千與千尋”, “actor”: “柊瑠美,入野自由,夏木真理”, “time”: “2001-07-20(日本)”, “score”: “9.3”}
{“index”: “9”, “title”: “魂斷藍橋”, “actor”: “費雯·麗,羅伯特·泰勒,露塞爾·沃特森”, “time”: “1940-05-17(美國)”, “score”: “9.2”}
{“index”: “10”, “title”: “亂世佳人”, “actor”: “費雯·麗,克拉克·蓋博,奧利維娅·德哈維蘭”, “time”: “1939-12-15(美國)”, “score”: “9.1”}
{“index”: “11”, “title”: “天空之城”, “actor”: “寺田農,鹫尾真知子,龜山助清”, “time”: “1992”, “score”: “9.1”}
{“index”: “12”, “title”: “喜劇之王”, “actor”: “周星馳,莫文蔚,張柏芝”, “time”: “1999-02-13(中國香港)”, “score”: “9.2”}
{“index”: “13”, “title”: “辛德勒的名單”, “actor”: “連姆·尼森,拉爾夫·費因斯,本·金斯利”, “time”: “1993-12-15(美國)”, “score”: “9.2”}
{“index”: “14”, “title”: “大鬧天宮”, “actor”: “邱嶽峰,畢克,富潤生”, “time”: “1965-12-31”, “score”: “9.0”}
{“index”: “15”, “title”: “音樂之聲”, “actor”: “朱莉·安德魯斯,克裡斯托弗·普盧默,埃琳諾·帕克”, “time”: “1965-03-02(美國)”, “score”: “9.0”}
{“index”: “16”, “title”: “剪刀手愛德華”, “actor”: “約翰尼·德普,薇諾娜·瑞德,黛安·韋斯特”, “time”: “1990-12-06(美國)”, “score”: “8.8”}
{“index”: “17”, “title”: “春光乍洩”, “actor”: “張國榮,梁朝偉,張震”, “time”: “1997-05-30(中國香港)”, “score”: “9.2”}
{“index”: “18”, “title”: “美麗人生”, “actor”: “羅伯托·貝尼尼,尼可萊塔·布拉斯基,喬治·坎塔裡尼”, “time”: “1997-12-20(意大利)”, “score”: “9.3”}
{“index”: “19”, “title”: “海上鋼琴師”, “actor”: “蒂姆·羅斯,普路特·泰勒·文斯,比爾·努恩”, “time”: “1998-10-28(意大利)”, “score”: “9.2”}
{“index”: “20”, “title”: “黑客帝國”, “actor”: “基努·裡維斯,凱瑞-安·莫斯,勞倫斯·菲什伯恩”, “time”: “2000-01-14”, “score”: “9.0”}
{“index”: “21”, “title”: “哈利·波特與魔法石”, “actor”: “丹尼爾·雷德克裡夫,魯伯特·格林特,艾瑪·沃特森”, “time”: “2002-01-26”, “score”: “9.1”}
{“index”: “22”, “title”: “加勒比海盜”, “actor”: “約翰尼·德普,凱拉·奈特莉,奧蘭多·布魯姆”, “time”: “2003-11-21”, “score”: “8.9”}
{“index”: “23”, “title”: “指環王3:王者無敵”, “actor”: “伊萊賈·伍德,伊恩·麥克萊恩,麗芙·泰勒”, “time”: “2004-03-15”, “score”: “9.2”}
{“index”: “24”, “title”: “無間道”, “actor”: “劉德華,梁朝偉,黃秋生”, “time”: “2003-09-05”, “score”: “9.1”}
{“index”: “25”, “title”: “射雕英雄傳之東成西就”, “actor”: “張國榮,梁朝偉,張學友”, “time”: “1993-02-05(中國香港)”, “score”: “8.9”}
{“index”: “26”, “title”: “楚門的世界”, “actor”: “金·凱瑞,勞拉·琳妮,諾亞·艾默裡奇”, “time”: “1998-06-01(美國)”, “score”: “8.9”}
{“index”: “27”, “title”: “蝙蝠俠:黑暗騎士”, “actor”: “克裡斯蒂安·貝爾,希斯·萊傑,阿倫·伊克哈特”, “time”: “2008-07-18(美國)”, “score”: “9.3”}
{“index”: “28”, “title”: “教父2”, “actor”: “阿爾·帕西諾,羅伯特·德尼羅,黛安·基頓”, “time”: “1974-12-12(美國)”, “score”: “9.0”}
{“index”: “29”, “title”: “指環王2:雙塔奇兵”, “actor”: “伊萊賈·伍德,伊恩·麥克萊恩,麗芙·泰勒”, “time”: “2003-04-25”, “score”: “9.1”}
{“index”: “30”, “title”: “機器人總動員”, “actor”: “本·貝爾特,艾麗莎·奈特,傑夫·格爾林”, “time”: “2008-06-27(美國)”, “score”: “9.3”}
{“index”: “31”, “title”: “天堂電影院”, “actor”: “菲利浦·諾瓦雷,賽爾喬·卡斯特利托,蒂茲亞娜·羅達托”, “time”: “1988-11-17(意大利)”, “score”: “9.2”}
{“index”: “32”, “title”: “活着”, “actor”: “葛優,鞏俐,牛犇”, “time”: “1994-05-18(法國)”, “score”: “9.0”}
{“index”: “33”, “title”: “拯救大兵瑞恩”, “actor”: “湯姆·漢克斯,馬特·達蒙,湯姆·塞茲摩爾”, “time”: “1998-07-24(美國)”, “score”: “8.9”}
{“index”: “34”, “title”: “哈爾的移動城堡”, “actor”: “倍賞千惠子,木村拓哉,美輪明宏”, “time”: “2004-11-20(日本)”, “score”: “9.0”}
{“index”: “35”, “title”: “阿凡達”, “actor”: “薩姆·沃辛頓,佐伊·索爾達娜,米歇爾·羅德裡格茲”, “time”: “2010-01-04”, “score”: “9.1”}
{“index”: “36”, “title”: “盜夢空間”, “actor”: “萊昂納多·迪卡普裡奧,渡邊謙,約瑟夫·高登-萊維特”, “time”: “2010-09-01”, “score”: “9.2”}
{“index”: “37”, “title”: “忠犬八公的故事”, “actor”: “Forest,理查·基爾,瓊·艾倫”, “time”: “2010-03-12(英國)”, “score”: “9.3”}
{“index”: “38”, “title”: “幽靈公主”, “actor”: “松田洋治,石田百合子,田中裕子”, “time”: “1997-07-12(日本)”, “score”: “8.9”}
{“index”: “39”, “title”: “搏擊俱樂部”, “actor”: “愛德華·哈裡森·諾頓,布拉德·皮特,海倫娜·伯翰·卡特”, “time”: “1999-10-15(美國)”, “score”: “8.8”}
{“index”: “40”, “title”: “東邪西毒”, “actor”: “張國榮,梁朝偉,劉嘉玲”, “time”: “1994-09-17”, “score”: “8.8”}
{“index”: “41”, “title”: “風之谷”, “actor”: “島本須美,永井一郎,坂本千夏”, “time”: “1992”, “score”: “8.9”}
{“index”: “42”, “title”: “瘋狂原始人”, “actor”: “尼古拉斯·凱奇,艾瑪·斯通,瑞安·雷諾茲”, “time”: “2013-04-20”, “score”: “9.5”}
{“index”: “43”, “title”: “當幸福來敲門”, “actor”: “威爾·史密斯,賈登·史密斯,坦迪·牛頓”, “time”: “2008-01-17”, “score”: “8.9”}
{“index”: “44”, “title”: “V字仇殺隊”, “actor”: “娜塔莉·波特曼,雨果·維文,斯蒂芬·瑞”, “time”: “2006-03-17(美國)”, “score”: “8.8”}
{“index”: “45”, “title”: “十二怒漢”, “actor”: “亨利·方達,李·科布,馬丁·鮑爾薩姆”, “time”: “1957-04-13(美國)”, “score”: “9.1”}
{“index”: “46”, “title”: “放牛班的春天”, “actor”: “熱拉爾·朱尼奧,讓-巴蒂斯特·莫尼耶,瑪麗·布奈爾”, “time”: “2004-10-16”, “score”: “8.8”}
{“index”: “47”, “title”: “三傻大鬧寶萊塢”, “actor”: “阿米爾·汗,黃渤,卡琳娜·卡普”, “time”: “2011-12-08”, “score”: “9.1”}
{“index”: “48”, “title”: “勇敢的心”, “actor”: “梅爾·吉布森,蘇菲·瑪索,帕特裡克·麥高漢”, “time”: “1995-05-24(美國)”, “score”: “8.8”}
{“index”: “49”, “title”: “黑客帝國3:矩陣革命”, “actor”: “基努·裡維斯,雨果·維文,凱瑞-安·莫斯”, “time”: “2003-11-05”, “score”: “8.8”}
{“index”: “50”, “title”: “速度與激情5”, “actor”: “範·迪塞爾,保羅·沃克,道恩·強森”, “time”: “2011-05-12”, “score”: “9.2”}
{“index”: “51”, “title”: “馴龍高手”, “actor”: “傑伊·巴魯切爾,傑拉德·巴特勒,亞美莉卡·費雷拉”, “time”: “2010-05-14”, “score”: “9.0”}
{“index”: “52”, “title”: “神偷奶爸”, “actor”: “史蒂夫·卡瑞爾,傑森·席格爾,拉塞爾·布蘭德”, “time”: “2010-07-09(美國)”, “score”: “9.0”}
{“index”: “53”, “title”: “少年派的奇幻漂流”, “actor”: “蘇拉·沙瑪,伊爾凡·可汗,塔布”, “time”: “2012-11-22”, “score”: “9.1”}
{“index”: “54”, “title”: “聞香識女人”, “actor”: “阿爾·帕西諾,克裡斯·奧唐納,加布裡埃爾·安瓦爾”, “time”: “1992-12-23(美國)”, “score”: “8.8”}
{“index”: “55”, “title”: “斷背山”, “actor”: “希斯·萊傑,傑克·吉倫哈爾,米歇爾·威廉姆斯”, “time”: “2006-01-13(美國)”, “score”: “9.0”}
{“index”: “56”, “title”: “飛屋環遊記”, “actor”: “愛德華·阿斯納,喬丹·長井,鮑勃·彼德森”, “time”: “2009-08-04”, “score”: “8.9”}
{“index”: “57”, “title”: “大話西遊之月光寶盒”, “actor”: “周星馳,莫文蔚,吳孟達”, “time”: “2014-10-24”, “score”: “9.6”}
{“index”: “58”, “title”: “飛越瘋人院”, “actor”: “傑克·尼科爾森,路易絲·弗萊徹,威爾·薩姆森”, “time”: “1975-11-19(美國)”, “score”: “8.8”}
{“index”: “59”, “title”: “怦然心動”, “actor”: “瑪德琳·卡羅爾,卡蘭·麥克奧利菲,艾丹·奎因”, “time”: “2010-08-06(美國)”, “score”: “8.9”}
{“index”: “60”, “title”: “美國往事”, “actor”: “羅伯特·德尼羅,詹姆斯·伍茲,伊麗莎白·麥戈文”, “time”: “1984-02-17(美國)”, “score”: “9.1”}
{“index”: “61”, “title”: “緻命魔術”, “actor”: “休·傑克曼,克裡斯蒂安·貝爾,邁克爾·凱恩”, “time”: “2006-10-20(美國)”, “score”: “8.8”}
{“index”: “62”, “title”: “鬼子來了”, “actor”: “姜文,姜宏波,陳強”, “time”: “2000-05-12(法國戛納)”, “score”: “8.9”}
{“index”: “63”, “title”: “無敵破壞王”, “actor”: “約翰·C·賴利,薩拉·西爾弗曼,簡·林奇”, “time”: “2012-11-06”, “score”: “9.1”}
{“index”: “64”, “title”: “美麗心靈”, “actor”: “羅素·克洛,詹妮弗·康納利,艾德·哈裡斯”, “time”: “2001-12-21(美國)”, “score”: “8.8”}
{“index”: “65”, “title”: “蝙蝠俠:黑暗騎士崛起”, “actor”: “克裡斯蒂安·貝爾,邁克爾·凱恩,加裡·奧德曼”, “time”: “2012-08-27”, “score”: “8.9”}
{“index”: “66”, “title”: “夜訪吸血鬼”, “actor”: “湯姆·克魯斯,布拉德·皮特,克爾斯滕·鄧斯特”, “time”: “1994-11-11(美國)”, “score”: “8.8”}
{“index”: “67”, “title”: “倩女幽魂”, “actor”: “張國榮,王祖賢,午馬”, “time”: “2011-04-30”, “score”: “9.2”}
{“index”: “68”, “title”: “哈利·波特與死亡聖器(下)”, “actor”: “丹尼爾·雷德克裡夫,魯伯特·格林特,艾瑪·沃特森”, “time”: “2011-08-04”, “score”: “9.0”}
{“index”: “69”, “title”: “本傑明·巴頓奇事”, “actor”: “布拉德·皮特,凱特·布蘭切特,塔拉吉·P·漢森”, “time”: “2008-12-25(美國)”, “score”: “8.8”}
{“index”: “70”, “title”: “鋼琴家”, “actor”: “艾德裡安·布洛迪,艾米莉娅·福克斯,米哈烏·熱布羅夫斯基”, “time”: “2002-09-25(法國)”, “score”: “8.8”}
{“index”: “71”, “title”: “觸不可及”, “actor”: “弗朗索瓦·克魯塞,奧瑪·希,安娜·勒尼”, “time”: “2011-11-02(法國)”, “score”: “9.1”}
{“index”: “72”, “title”: “熔爐”, “actor”: “孔劉,鄭有美,金智英”, “time”: “2011-09-22(南韓)”, “score”: “8.8”}
{“index”: “73”, “title”: “初戀這件小事”, “actor”: “馬裡奧·毛瑞爾,平采娜·樂維瑟派布恩,阿查拉那·阿瑞亞衛考”, “time”: “2012-06-05”, “score”: “8.8”}
{“index”: “74”, “title”: “大話西遊之大聖娶親”, “actor”: “周星馳,朱茵,莫文蔚”, “time”: “2014-10-24”, “score”: “8.8”}
{“index”: “75”, “title”: “新龍門客棧”, “actor”: “張曼玉,梁家輝,甄子丹”, “time”: “2012-02-24”, “score”: “8.8”}
{“index”: “76”, “title”: “甜蜜蜜”, “actor”: “黎明,張曼玉,曾志偉”, “time”: “2015-02-13”, “score”: “9.2”}
{“index”: “77”, “title”: “素媛”, “actor”: “李來,薛耿求,嚴志媛”, “time”: “2013-10-02(南韓)”, “score”: “9.1”}
{“index”: “78”, “title”: “小鞋子”, “actor”: “默罕默德·阿米爾·納吉,Kamal Mirkarimi,Behzad Rafi”, “time”: “1999-01-22(美國)”, “score”: “9.1”}
{“index”: “79”, “title”: “螢火之森”, “actor”: “内山昂輝,佐倉绫音,後藤弘樹”, “time”: “2011-09-17(日本)”, “score”: “9.0”}
{“index”: “80”, “title”: “時空戀旅人”, “actor”: “瑞秋·麥克亞當斯,多姆納爾·格裡森,比爾·奈伊”, “time”: “2013-09-04(英國)”, “score”: “8.9”}
{“index”: “81”, “title”: “穿條紋睡衣的男孩”, “actor”: “阿沙·巴特菲爾德,維拉·法梅加,大衛·休裡斯”, “time”: “2008-09-12(英國)”, “score”: “9.0”}
{“index”: “82”, “title”: “竊聽風暴”, “actor”: “烏爾裡希·穆埃,塞巴斯蒂安·科赫,馬蒂娜·格德克”, “time”: “2006-03-23(德國)”, “score”: “9.0”}
{“index”: “83”, “title”: “7号房的禮物”, “actor”: “柳承龍,鄭鎮榮,樸信惠”, “time”: “2013-01-23(南韓)”, “score”: “8.9”}
{“index”: “84”, “title”: “借東西的小人阿莉埃蒂”, “actor”: “志田未來,神木隆之介,大竹忍”, “time”: “2010-07-17(日本)”, “score”: “8.8”}
{“index”: “85”, “title”: “恐怖直播”, “actor”: “河正宇,李璟榮,李大為”, “time”: “2013-07-31(南韓)”, “score”: “8.8”}
{“index”: “86”, “title”: “海豚灣”, “actor”: “裡克·奧巴瑞,路易·西霍尤斯,哈迪·瓊斯”, “time”: “2009-07-31(美國)”, “score”: “8.9”}
{“index”: “87”, “title”: “忠犬八公物語”, “actor”: “仲代達矢,春川真澄,井川比佐志”, “time”: “1987-08-01(日本)”, “score”: “9.0”}
{“index”: “88”, “title”: “上帝之城”, “actor”: “亞曆桑德雷·羅德裡格斯,艾莉絲·布拉加,萊安德魯·菲爾米諾”, “time”: “2002-08-30(巴西)”, “score”: “8.9”}
{“index”: “89”, “title”: “辯護人”, “actor”: “宋康昊,郭度沅,吳達洙”, “time”: “2013-12-18(南韓)”, “score”: “8.8”}
{“index”: “90”, “title”: “七武士”, “actor”: “三船敏郎,志村喬,千秋實”, “time”: “1954-04-26(日本)”, “score”: “9.1”}
{“index”: “91”, “title”: “英雄本色”, “actor”: “狄龍,張國榮,周潤發”, “time”: “2017-11-17”, “score”: “9.2”}
{“index”: “92”, “title”: “一一”, “actor”: “吳念真,金燕玲,李凱莉”, “time”: “2000-09-20(法國)”, “score”: “8.9”}
{“index”: “93”, “title”: “完美的世界”, “actor”: “凱文·科斯特納,克林特·伊斯特伍德,T·J·勞瑟”, “time”: “1993-11-24(美國)”, “score”: “8.9”}
{“index”: “94”, “title”: “海洋”, “actor”: “雅克·貝漢,姜文,蘭斯洛特·佩林”, “time”: “2011-08-12”, “score”: “9.0”}
{“index”: “95”, “title”: “愛·回家”, “actor”: “俞承豪,金藝芬,童孝熙”, “time”: “2002-04-05(南韓)”, “score”: “9.0”}
{“index”: “96”, “title”: “黃金三镖客”, “actor”: “克林特·伊斯特伍德,李·範·克裡夫,埃裡·瓦拉赫”, “time”: “1966-12-23(意大利)”, “score”: “8.9”}
{“index”: “97”, “title”: “我愛你”, “actor”: “宋在浩,李順才,尹秀晶”, “time”: “2011-02-17(南韓)”, “score”: “9.0”}
{“index”: “98”, “title”: “遷徙的鳥”, “actor”: “雅克·貝漢,Philippe Labro”, “time”: “2001-12-12(法國)”, “score”: “9.1”}
{“index”: “99”, “title”: “阿飛正傳”, “actor”: “張國榮,張曼玉,劉德華”, “time”: “2018-06-25”, “score”: “8.8”}
{“index”: “100”, “title”: “龍貓”, “actor”: “帕特·卡洛爾,蒂姆·達利,麗娅·薩隆加”, “time”: “2018-12-14”, “score”: “9.2”}