天天看点

python wav文件音频频谱图音量分析,静音截取

音量获取和静音截取工具

import math
import numpy as np
from scipy.io import wavfile

# method 1: absSum
def calVolume(waveData, frameSize, overLap):
    wlen = len(waveData)
    step = frameSize - overLap
    frameNum = int(math.ceil(wlen*1.0/step))
    volume = np.zeros((frameNum,1))
    for i in range(frameNum):
        curFrame = waveData[np.arange(i*step,min(i*step+frameSize,wlen))]
        curFrame = curFrame - np.median(curFrame) # zero-justified
        volume[i] = np.sum(np.abs(curFrame))
    return volume

# method 2: 10 times log10 of square sum
def calVolumeDB(waveData, frameSize, overLap):
    wlen = len(waveData)
    step = frameSize - overLap
    frameNum = int(math.ceil(wlen*1.0/step))
    volume = np.zeros((frameNum,1))
    for i in range(frameNum):
        curFrame = waveData[np.arange(i*step,min(i*step+frameSize,wlen))]
        curFrame = curFrame - np.median(curFrame) # zero-justified
        volume[i] = 10*np.log10(np.sum(curFrame*curFrame))
    return volume

def findIndex(vol,thres):
    l = len(vol)
    ii = 0
    silence_start = False
    index = np.zeros(l,dtype=np.int32)
    for i in range(l-1):
        if((vol[i]-thres)*(vol[i+1]-thres)<0):
            if ii == 0 and vol[i]-thres < 0:
                silence_start = True
            index[ii]=i
            ii = ii+1
    return silence_start, index[0:ii]

def cutMute(strData, frameSize):
    dataNew = None
    waveData = np.frombuffer(strData, dtype=np.int16)
    waveData = waveData*1.0/max(abs(waveData))  # normalization(0.1)

    vol = calVolume(waveData, frameSize, 0)
    #print("vol conteng: %s"%(vol))
    threshold1 = max(vol)*0.10
    threshold2 = min(vol)*10.0
    threshold3 = max(vol)*0.05+min(vol)*5.0
    threshold3 = 1
    silence_start, findex = findIndex(vol,threshold3)
    findex = findex*frameSize*2
    index_num = len(findex)
    #print("findex len: %s, content: %s"%(index_num,findex))
    if index_num == 0:
        dataNew = strData
    elif index_num == 1:
        if silence_start:
            dataNew = strData[findex[0]:]
        else:
            dataNew = strData[:findex[0]]
    else:
        for i in range(0,len(findex)):
            end = findex[i]
            if silence_start:
                if i == 1:
                    start = findex[i - 1]
                    dataNew = strData[int(start):int(end)]
                elif i % 2:
                    start = findex[i - 1]
                    dataNew = dataNew + strData[int(start):int(end)]
                elif i == index_num - 1:
                    start = findex[i]
                    dataNew = dataNew + strData[int(start):]
            else:
                if i == 0: 
                    dataNew = strData[:int(end)]
                elif i % 2 == 0:
                    start = findex[i - 1]
                    dataNew = dataNew + strData[int(start):int(end)]
                elif i == index_num - 1:
                    start = findex[i]
                    dataNew = dataNew + strData[int(start):]

    waveNew = np.fromstring(dataNew, dtype=np.int16)
    return waveNew

           

音频文件频谱图和零界点绘图测试

import wave
import numpy as np
import matplotlib.pyplot as plt
import volume as vp
from scipy.io import wavfile

def findIndex(vol,thres):
    l = len(vol)
    ii = 0
    silence_start = False
    index = np.zeros(l,dtype=np.int32)
    for i in range(l-1):
        if((vol[i]-thres)*(vol[i+1]-thres)<0):
            if ii == 0 and vol[i]-thres < 0:
                silence_start = True
            index[ii]=i
            ii = ii+1
    return silence_start, index[0:ii]

fw = wave.open('1.wav','r')
params = fw.getparams()
nchannels, sampwidth, framerate, nframes = params[:4]
strData = fw.readframes(nframes)
print(type(strData))
waveData = np.frombuffer(strData, dtype=np.int16)
waveData = waveData*1.0/max(abs(waveData))  # normalization(0.1)
fw.close()

frameSize = 160
overLap = 0
vol = vp.calVolume(waveData,frameSize,overLap)
threshold1 = max(vol)*0.10
threshold2 = min(vol)*10.0
#threshold3 = max(vol)*0.05+min(vol)*5.0
threshold3 = 2
print('max: %s, min: %s, threshold1: %s, threshold2: %s, threshold3: %s'%(max(vol),min(vol), threshold1, threshold2, threshold3))

time = np.arange(0,nframes) * (1.0/framerate)
frame = np.arange(0,len(vol)) * (nframes*1.0/len(vol)/framerate)
index1 = findIndex(vol,threshold1)[1]*(nframes*1.0/len(vol)/framerate)
index2 = findIndex(vol,threshold2)[1]*(nframes*1.0/len(vol)/framerate)
index3 = findIndex(vol,threshold3)[1]*(nframes*1.0/len(vol)/framerate)
end = nframes * (1.0/framerate)
print(index1)
interval = 0
for i in range(1,len(index1)-4):
    print(index1[i+2])
    interval += (index1[i+2] - index1[i])
    if i >= 2:
        interval/=2
print(interval)

plt.subplot(311)
plt.plot(time,waveData,color="black")
#plt.plot([index1,index1],[-1,1],'-r')
#plt.plot([index2,index2],[-1,1],'-g')
plt.plot([index3,index3],[-1,1],'-b')
plt.ylabel('Amplitude')

plt.subplot(312)
plt.plot(frame,vol,color="black")
plt.plot([0,end],[threshold1,threshold1],'-r', label="threshold 1")
plt.plot([0,end],[threshold2,threshold2],'-g', label="threshold 2")
plt.plot([0,end],[threshold3,threshold3],'-b', label="threshold 3")
plt.legend()
plt.ylabel('Volume(absSum)')
plt.xlabel('time(seconds)')

'''
print(waveData)
print("test")
dataNew = None
silence_start, findex = findIndex(vol,threshold3)
print(findex)
findex = findex*frameSize*2
print("findex len: %s"%len(findex))
print(findex)

for i in range(0,len(findex)):
    end = findex[i]
    if silence_start:
        if i == 1:
            start = findex[i - 1]
            dataNew = strData[int(start):int(end)]
        elif i % 2:
            print("findex[%d]: %d"%(i, findex[i]))
            start = findex[i - 1]
            dataNew = dataNew + strData[int(start):int(end)]
    else:
        print("findex[%d]: %d"%(i, findex[i]))
        if i == 0: 
            dataNew = strData[:int(end)]
        elif i % 2 == 0:
            start = findex[i - 1]
            dataNew = dataNew + strData[int(start):int(end)]

print (type(dataNew))
'''
waveNew = vp.cutMute(strData, 160)
time = np.arange(0,len(waveNew)) * (1.0/framerate)
print(waveNew)

plt.subplot(313)
plt.plot(time,waveNew,color="black")
plt.legend()
plt.ylabel('Amplitude')

plt.savefig("test.png")
plt.show()

wavfile.write("cut_file.wav", 8000, waveNew)