音量获取和静音截取工具
import math
import numpy as np
from scipy.io import wavfile
# method 1: absSum
def calVolume(waveData, frameSize, overLap):
wlen = len(waveData)
step = frameSize - overLap
frameNum = int(math.ceil(wlen*1.0/step))
volume = np.zeros((frameNum,1))
for i in range(frameNum):
curFrame = waveData[np.arange(i*step,min(i*step+frameSize,wlen))]
curFrame = curFrame - np.median(curFrame) # zero-justified
volume[i] = np.sum(np.abs(curFrame))
return volume
# method 2: 10 times log10 of square sum
def calVolumeDB(waveData, frameSize, overLap):
wlen = len(waveData)
step = frameSize - overLap
frameNum = int(math.ceil(wlen*1.0/step))
volume = np.zeros((frameNum,1))
for i in range(frameNum):
curFrame = waveData[np.arange(i*step,min(i*step+frameSize,wlen))]
curFrame = curFrame - np.median(curFrame) # zero-justified
volume[i] = 10*np.log10(np.sum(curFrame*curFrame))
return volume
def findIndex(vol,thres):
l = len(vol)
ii = 0
silence_start = False
index = np.zeros(l,dtype=np.int32)
for i in range(l-1):
if((vol[i]-thres)*(vol[i+1]-thres)<0):
if ii == 0 and vol[i]-thres < 0:
silence_start = True
index[ii]=i
ii = ii+1
return silence_start, index[0:ii]
def cutMute(strData, frameSize):
dataNew = None
waveData = np.frombuffer(strData, dtype=np.int16)
waveData = waveData*1.0/max(abs(waveData)) # normalization(0.1)
vol = calVolume(waveData, frameSize, 0)
#print("vol conteng: %s"%(vol))
threshold1 = max(vol)*0.10
threshold2 = min(vol)*10.0
threshold3 = max(vol)*0.05+min(vol)*5.0
threshold3 = 1
silence_start, findex = findIndex(vol,threshold3)
findex = findex*frameSize*2
index_num = len(findex)
#print("findex len: %s, content: %s"%(index_num,findex))
if index_num == 0:
dataNew = strData
elif index_num == 1:
if silence_start:
dataNew = strData[findex[0]:]
else:
dataNew = strData[:findex[0]]
else:
for i in range(0,len(findex)):
end = findex[i]
if silence_start:
if i == 1:
start = findex[i - 1]
dataNew = strData[int(start):int(end)]
elif i % 2:
start = findex[i - 1]
dataNew = dataNew + strData[int(start):int(end)]
elif i == index_num - 1:
start = findex[i]
dataNew = dataNew + strData[int(start):]
else:
if i == 0:
dataNew = strData[:int(end)]
elif i % 2 == 0:
start = findex[i - 1]
dataNew = dataNew + strData[int(start):int(end)]
elif i == index_num - 1:
start = findex[i]
dataNew = dataNew + strData[int(start):]
waveNew = np.fromstring(dataNew, dtype=np.int16)
return waveNew
音频文件频谱图和零界点绘图测试
import wave
import numpy as np
import matplotlib.pyplot as plt
import volume as vp
from scipy.io import wavfile
def findIndex(vol,thres):
l = len(vol)
ii = 0
silence_start = False
index = np.zeros(l,dtype=np.int32)
for i in range(l-1):
if((vol[i]-thres)*(vol[i+1]-thres)<0):
if ii == 0 and vol[i]-thres < 0:
silence_start = True
index[ii]=i
ii = ii+1
return silence_start, index[0:ii]
fw = wave.open('1.wav','r')
params = fw.getparams()
nchannels, sampwidth, framerate, nframes = params[:4]
strData = fw.readframes(nframes)
print(type(strData))
waveData = np.frombuffer(strData, dtype=np.int16)
waveData = waveData*1.0/max(abs(waveData)) # normalization(0.1)
fw.close()
frameSize = 160
overLap = 0
vol = vp.calVolume(waveData,frameSize,overLap)
threshold1 = max(vol)*0.10
threshold2 = min(vol)*10.0
#threshold3 = max(vol)*0.05+min(vol)*5.0
threshold3 = 2
print('max: %s, min: %s, threshold1: %s, threshold2: %s, threshold3: %s'%(max(vol),min(vol), threshold1, threshold2, threshold3))
time = np.arange(0,nframes) * (1.0/framerate)
frame = np.arange(0,len(vol)) * (nframes*1.0/len(vol)/framerate)
index1 = findIndex(vol,threshold1)[1]*(nframes*1.0/len(vol)/framerate)
index2 = findIndex(vol,threshold2)[1]*(nframes*1.0/len(vol)/framerate)
index3 = findIndex(vol,threshold3)[1]*(nframes*1.0/len(vol)/framerate)
end = nframes * (1.0/framerate)
print(index1)
interval = 0
for i in range(1,len(index1)-4):
print(index1[i+2])
interval += (index1[i+2] - index1[i])
if i >= 2:
interval/=2
print(interval)
plt.subplot(311)
plt.plot(time,waveData,color="black")
#plt.plot([index1,index1],[-1,1],'-r')
#plt.plot([index2,index2],[-1,1],'-g')
plt.plot([index3,index3],[-1,1],'-b')
plt.ylabel('Amplitude')
plt.subplot(312)
plt.plot(frame,vol,color="black")
plt.plot([0,end],[threshold1,threshold1],'-r', label="threshold 1")
plt.plot([0,end],[threshold2,threshold2],'-g', label="threshold 2")
plt.plot([0,end],[threshold3,threshold3],'-b', label="threshold 3")
plt.legend()
plt.ylabel('Volume(absSum)')
plt.xlabel('time(seconds)')
'''
print(waveData)
print("test")
dataNew = None
silence_start, findex = findIndex(vol,threshold3)
print(findex)
findex = findex*frameSize*2
print("findex len: %s"%len(findex))
print(findex)
for i in range(0,len(findex)):
end = findex[i]
if silence_start:
if i == 1:
start = findex[i - 1]
dataNew = strData[int(start):int(end)]
elif i % 2:
print("findex[%d]: %d"%(i, findex[i]))
start = findex[i - 1]
dataNew = dataNew + strData[int(start):int(end)]
else:
print("findex[%d]: %d"%(i, findex[i]))
if i == 0:
dataNew = strData[:int(end)]
elif i % 2 == 0:
start = findex[i - 1]
dataNew = dataNew + strData[int(start):int(end)]
print (type(dataNew))
'''
waveNew = vp.cutMute(strData, 160)
time = np.arange(0,len(waveNew)) * (1.0/framerate)
print(waveNew)
plt.subplot(313)
plt.plot(time,waveNew,color="black")
plt.legend()
plt.ylabel('Amplitude')
plt.savefig("test.png")
plt.show()
wavfile.write("cut_file.wav", 8000, waveNew)