由于之前用scrapy寫過爬蟲,很喜歡用裡面的callback與Pipeline,特别是callback,讓方法調用路徑看起來非常直覺。于是突發奇想,模仿scrapy中的callback寫一個demo。
# 模拟scrapy中的回調函數
# @File:模拟scrapy中的回調函數.py
# @Author:王星球
# @E-mail:[email protected]
# Created_date:20200528
#
#
#
#
#
#
#
#
#
#
#
#
# Demo 1
class FormRequest(object):
def __init__(self, callback, args):
self.callback = callback
self.args = args
def __call__(self, *args, **kwargs):
return self.callback(self.args)
class Pipeline(object):
def __init__(self, return_or_yield):
self.file = open("20200528回調函數_%s.txt" % return_or_yield, encoding='utf-8', mode='a')
def write_csv(self, obj):
self.file.write(str(obj) + "\n")
print("已經寫入檔案")
def __del__(self):
self.file.close()
class Spider1(object):
def __init__(self):
self.start_url()()()
def start_url(self):
print("return FormRequest 1")
return FormRequest(callback=self.parse,
args=({
"url": "baidu.com",
"text": "回調函數測試",
"Author": "王星球",
"E-mail": "[email protected]"
}))
def parse(self, response):
print("return FormRequest 1")
return FormRequest(callback=Pipeline("return").write_csv, args=(response,))
class Spider2(object):
def __init__(self):
level2_status = True
level1 = self.start_url()
while level2_status:
try:
level2 = level1.__next__()
level2().__next__()()
except StopIteration:
level2_status = False
def start_url(self):
print("yield FormRequest 1")
yield FormRequest(callback=self.parse,
args=({
"url": "baidu.com",
"text": "回調函數測試",
"Author": "王星球",
"E-mail": "[email protected]"
}))
def parse(self, response):
print("yield FormRequest 2")
yield FormRequest(callback=Pipeline("yield").write_csv, args=(response,))
if __name__ == '__main__':
Spider1()
Spider2()
說明
1.demo版本的代碼
2.核心代碼:__call__方法
3.生成器方式或者直接return都可以
運作結果截圖
生成的檔案截圖
檔案1内容截圖
檔案2内容截圖