首先贴一个多线程,多进程,单线程的差别,
来自:https://www.runoob.com/w3cnote/python-single-thread-multi-thread-and-multi-process.html的结论
一、多线程
判断文件为空
获取目录下的文件
import sys,json,os,threading
dres={}
def an(file):
reslist=[]
print file+"###################start"
if os.path.getsize(file)==0:
return
fl=open(file)
qos=fl.read()
GJSON=json.loads(qos)
for i in GJSON:
if i["xxxxxxxxxx"] != []:
reslist.append(xxxxxxxxxxxxxxxxx)
dres[file]=reslist
print "################"+file+"################end"
threads = []
fw=open("./qos_all_"+sys.argv[1],"a")
for root, dirs, files in os.walk(sys.argv[1], topdown=False):
for name in files:
print(os.path.join(root, name))
x=os.path.join(root, name)
t=threading.Thread(target=an,args=(x,))
threads.append(t)
for i in threads:
print i
# i.setDaemon(True)
i.start()
for i in threads:
i.join()
for i in dres:
print dres[i]
for j in dres[i]:
fw.write(j+"\n")
fw.close()
二、python多进程
方法1,未控制并发数,不推荐
跟多线程调用的方式类似,用multiprocessing .Process替换threading.Thread
要注意的是子进程是无法修改父进程里面的数据结构,所以在子进程里面进行了写文件的操作。
在这里也没有使用进程数控制,脚本启用了非常多子进程
import sys,json,os,threading
from multiprocessing import Process
def an(file):
reslist=[]
print file+"###################start"
if os.path.getsize(file)==0:
return
fl=open(file)
qos=fl.read()
GJSON=json.loads(qos)
for i in GJSON:
if i["*******"] != []:
fw.write(************************************************)
# print "#########"
print "################"+file+"################end"
fl.close()
print sys.argv[1]
threads = []
fw=open("./qos_all_"+sys.argv[1],"a")
for root, dirs, files in os.walk(sys.argv[1], topdown=False):
for name in files:
print(os.path.join(root, name))
x=os.path.join(root, name)
t=Process(target=an,args=(x,))
threads.append(t)
for i in threads:
print i
# i.setDaemon(True)
i.start()
for i in threads:
i.join()
#for i in dres:
# print dres[i]
# for j in dres[i]:
# print j
# fw.write(j+"\n")
fw.close()
方法2,使用Pool控制进程数
import sys,json,os,threading
from multiprocessing import Pool
def an(file):
reslist=[]
print file+"###################start"
if os.path.getsize(file)==0:
return
fl=open(file)
qos=fl.read()
GJSON=json.loads(qos)
for i in GJSON:
if i["productQos"] != []:
fw.write("*************************************************")
print "################"+file+"################end"
fl.close()
print sys.argv[1]
fw=open("./qos_all_"+sys.argv[1],"a")
pool = Pool(processes=16)
for root, dirs, files in os.walk(sys.argv[1], topdown=False):
for name in files:
print(os.path.join(root, name))
x=os.path.join(root, name)
pool.apply_async(an,args=(x,))
pool.close()
pool.join()
fw.close()
pool = Pool(processes=16) 控制并发数
pool.apply_async(an,args=(x,)) 非阻塞,并发
pool.apply(an,args=(x,)) 阻塞,实际上没有并发