作者:位元組流動
來源:
https://blog.csdn.net/Kennethdroid/article/details/107405505 關于音頻的可視化,在舊文中,我們曾經實作過将 Android AudioRecorder 采集的實時音頻單通道 PCM 資料用 OpenGL 渲染成柱狀圖。具體的渲染過程和細節,請移步這篇文章,代碼已開源: OpenGL ES 實作可視化實時音頻提取一個通道的音頻資料
在上一篇文章,我們建構 OpenSLES 播放器時,對資料格式的定義如下:
SLDataFormat_PCM pcm = {
SL_DATAFORMAT_PCM,//format type
(SLuint32)2,//channel count 雙通道
SL_SAMPLINGRATE_44_1,//44100hz
SL_PCMSAMPLEFORMAT_FIXED_16,// bits per sample 2位元組=16bit
SL_PCMSAMPLEFORMAT_FIXED_16,// container size
SL_SPEAKER_FRONT_LEFT | SL_SPEAKER_FRONT_RIGHT,// channel mask
SL_BYTEORDER_LITTLEENDIAN // endianness 小端序
};
從上面代碼中可以看出,音頻驅動接收的 PCM 資料的采樣率是 44.1kHz,雙通道,采樣大小 2 位元組。由于我們要渲染的是一個通道的 PCM 資料,是以需要對雙通道的資料做一個提取。

如上圖所示,解碼後的 PCM 資料是 2 個通道的資料交叉存儲,當使用指針偏移提取某一通道的資料時,每次偏移的步長是 2 位元組 X 通道數 = 4 個位元組。
提取某一通道的 PCM 資料方式如下,通過該方式我們可以将一幀音頻資料每個通道的資料進行分離。
//小端序存儲的音頻資料
uint8_t* pByte = audioFrame->data;
for(int i=0; i<audioFrame->dataSize; i++) {
short *pShort = pByte + i * 4;
//左聲道值
short leftChannelValue = *pShort;
pShort = pByte + i * 4 + 2;
//右聲道值
short rightChannelValue = *pShort;
}
另外需要注意的是,資料的存儲方式分為大端序和小端序,小端序指低位址存放低位、高位址存放高位,大端序與小端序相反,即低位址存放高位,分離通道資料需要注意。
//大端序存儲的音頻資料
uint8_t* pByte = audioFrame->data;
for(int i=0; i<audioFrame->dataSize; i++) {
short *pShort = pByte + i * 4;
//左聲道值
short leftChannelValue = ((*pShort & 0xFF00) >> 8) | ((*pShort & 0x00FF) << 8);
pShort = pByte + i * 4 + 2;
//右聲道值
short rightChannelValue = ((*pShort & 0xFF00) >> 8) | ((*pShort & 0x00FF) << 8);
}
OpenGL ES 渲染音頻資料
OpenGLES 全稱 OpenGL for Embedded Systems ,是三維圖形應用程式接口 OpenGL 的子集,本質上是一個跨程式設計語言、跨平台的程式設計接口規範,主要應用于嵌入式裝置,如手機、平闆等。
由于前期已經系統地闡述了 OpenGL ES 相關知識點,這裡就不做展開叙述,詳細内容請參考:
Android OpenGL ES 從入門到精通系統性學習教程利用 OpenGL 渲染音頻資料,本質上就是根據音頻資料的值去建構一組如下圖所示的網格,最終渲染成條狀圖。
接下來就是代碼實作過程,首先在 Java 層建立 GLSurfaceView 的 Render ,FFMediaPlayer 中增加對應 Native 函數:
private GLSurfaceView.Renderer mAudioGLRender = new GLSurfaceView.Renderer() {
@Override
public void onSurfaceCreated(GL10 gl10, EGLConfig eglConfig) {
FFMediaPlayer.native_OnAudioVisualSurfaceCreated();
}
@Override
public void onSurfaceChanged(GL10 gl10, int w, int h) {
FFMediaPlayer.native_OnAudioVisualSurfaceChanged(w, h);
}
@Override
public void onDrawFrame(GL10 gl10) {
FFMediaPlayer.native_OnAudioVisualDrawFrame();
}
};
public class FFMediaPlayer {
static {
System.loadLibrary("learn-ffmpeg");
}
//......
//for audio visual render
public static native void native_OnAudioVisualSurfaceCreated();
public static native void native_OnAudioVisualSurfaceChanged(int width, int height);
public static native void native_OnAudioVisualDrawFrame();
}
對應 Java 層接口的 JNI :
//可視化音頻的渲染接口
JNIEXPORT void JNICALL
Java_com_byteflow_learnffmpeg_media_FFMediaPlayer_native_1OnAudioVisualSurfaceCreated(JNIEnv *env,
jclass clazz) {
AudioVisualRender::GetInstance()->OnAudioVisualSurfaceCreated();
}
JNIEXPORT void JNICALL
Java_com_byteflow_learnffmpeg_media_FFMediaPlayer_native_1OnAudioVisualSurfaceChanged(JNIEnv *env,
jclass clazz,
jint width,
jint height) {
AudioVisualRender::GetInstance()->OnAudioVisualSurfaceChanged(width, height);
}
JNIEXPORT void JNICALL
Java_com_byteflow_learnffmpeg_media_FFMediaPlayer_native_1OnAudioVisualDrawFrame(JNIEnv *env,
jclass clazz) {
AudioVisualRender::GetInstance()->OnAudioVisualDrawFrame();
}
Native 層實作音頻渲染的類:
#include <LogUtil.h>
#include <GLUtils.h>
#include "AudioVisualRender.h"
#include <gtc/matrix_transform.hpp>
#include <detail/type_mat.hpp>
#include <detail/type_mat4x4.hpp>
#include <render/video/OpenGLRender.h>
AudioVisualRender* AudioVisualRender::m_pInstance = nullptr;
std::mutex AudioVisualRender::m_Mutex;
AudioVisualRender *AudioVisualRender::GetInstance() {
if(m_pInstance == nullptr) {
std::unique_lock<std::mutex> lock(m_Mutex);
if(m_pInstance == nullptr) {
m_pInstance = new AudioVisualRender();
}
}
return m_pInstance;
}
void AudioVisualRender::ReleaseInstance() {
std::unique_lock<std::mutex> lock(m_Mutex);
if(m_pInstance != nullptr) {
delete m_pInstance;
m_pInstance = nullptr;
}
}
void AudioVisualRender::OnAudioVisualSurfaceCreated() {
ByteFlowPrintE("AudioVisualRender::OnAudioVisualSurfaceCreated");
if (m_ProgramObj)
return;
char vShaderStr[] =
"#version 300 es\n"
"layout(location = 0) in vec4 a_position;\n"
"layout(location = 1) in vec2 a_texCoord;\n"
"uniform mat4 u_MVPMatrix;\n"
"out vec2 v_texCoord;\n"
"void main()\n"
"{\n"
" gl_Position = u_MVPMatrix * a_position;\n"
" v_texCoord = a_texCoord;\n"
" gl_PointSize = 4.0f;\n"
"}";
char fShaderStr[] =
"#version 300 es \n"
"precision mediump float; \n"
"in vec2 v_texCoord; \n"
"layout(location = 0) out vec4 outColor; \n"
"uniform float drawType; \n"
"void main() \n"
"{ \n"
" if(drawType == 1.0) \n"
" { \n"
" outColor = vec4(1.5 - v_texCoord.y, 0.3, 0.3, 1.0); \n"
" } \n"
" else if(drawType == 2.0) \n"
" { \n"
" outColor = vec4(1.0, 1.0, 1.0, 1.0); \n"
" } \n"
" else if(drawType == 3.0) \n"
" { \n"
" outColor = vec4(0.3, 0.3, 0.3, 1.0); \n"
" } \n"
"} \n";
//生成着色器程式
m_ProgramObj = GLUtils::CreateProgram(vShaderStr, fShaderStr);
if (m_ProgramObj == GL_NONE) {
LOGCATE("VisualizeAudioSample::Init create program fail");
}
//設定 MVP Matrix 變換矩陣
// Projection matrix
glm::mat4 Projection = glm::ortho(-1.0f, 1.0f, -1.0f, 1.0f, 0.1f, 100.0f);
//glm::mat4 Projection = glm::frustum(-ratio, ratio, -1.0f, 1.0f, 4.0f, 100.0f);
//glm::mat4 Projection = glm::perspective(45.0f, ratio, 0.1f, 100.f);
// View matrix
glm::mat4 View = glm::lookAt(
glm::vec3(0, 0, 4), // Camera is at (0,0,1), in World Space
glm::vec3(0, 0, 0), // and looks at the origin
glm::vec3(0, 1, 0) // Head is up (set to 0,-1,0 to look upside-down)
);
// Model matrix
glm::mat4 Model = glm::mat4(1.0f);
Model = glm::scale(Model, glm::vec3(1.0f, 1.0f, 1.0f));
Model = glm::rotate(Model, 0.0f, glm::vec3(1.0f, 0.0f, 0.0f));
Model = glm::rotate(Model, 0.0f, glm::vec3(0.0f, 1.0f, 0.0f));
Model = glm::translate(Model, glm::vec3(0.0f, 0.0f, 0.0f));
m_MVPMatrix = Projection * View * Model;
}
void AudioVisualRender::OnAudioVisualSurfaceChanged(int w, int h) {
ByteFlowPrintE("AudioVisualRender::OnAudioVisualSurfaceChanged [w, h] = [%d, %d]", w, h);
glClearColor(1.0f, 1.0f, 1.0f, 1.0);
glViewport(0, 0, w, h);
}
void AudioVisualRender::OnAudioVisualDrawFrame() {
ByteFlowPrintD("AudioVisualRender::OnAudioVisualDrawFrame");
glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT);
std::unique_lock<std::mutex> lock(m_Mutex);
if (m_ProgramObj == GL_NONE || m_pAudioBuffer == nullptr) return;
UpdateMesh();
lock.unlock();
// Generate VBO Ids and load the VBOs with data
if(m_VboIds[0] == 0)
{
glGenBuffers(2, m_VboIds);
glBindBuffer(GL_ARRAY_BUFFER, m_VboIds[0]);
glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * m_RenderDataSize * 6 * 3, m_pVerticesCoords, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, m_VboIds[1]);
glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * m_RenderDataSize * 6 * 2, m_pTextureCoords, GL_DYNAMIC_DRAW);
}
else
{
glBindBuffer(GL_ARRAY_BUFFER, m_VboIds[0]);
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(GLfloat) * m_RenderDataSize * 6 * 3, m_pVerticesCoords);
glBindBuffer(GL_ARRAY_BUFFER, m_VboIds[1]);
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(GLfloat) * m_RenderDataSize * 6 * 2, m_pTextureCoords);
}
if(m_VaoId == GL_NONE)
{
glGenVertexArrays(1, &m_VaoId);
glBindVertexArray(m_VaoId);
glBindBuffer(GL_ARRAY_BUFFER, m_VboIds[0]);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(GLfloat), (const void *) 0);
glBindBuffer(GL_ARRAY_BUFFER, GL_NONE);
glBindBuffer(GL_ARRAY_BUFFER, m_VboIds[1]);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(GLfloat), (const void *) 0);
glBindBuffer(GL_ARRAY_BUFFER, GL_NONE);
glBindVertexArray(GL_NONE);
}
// Use the program object
glUseProgram(m_ProgramObj);
glBindVertexArray(m_VaoId);
GLUtils::setMat4(m_ProgramObj, "u_MVPMatrix", m_MVPMatrix);
GLUtils::setFloat(m_ProgramObj, "drawType", 1.0f);
glDrawArrays(GL_TRIANGLES, 0, m_RenderDataSize * 6);
GLUtils::setFloat(m_ProgramObj, "drawType", 2.0f);
glDrawArrays(GL_LINES, 0, m_RenderDataSize * 6);
}
void AudioVisualRender::UpdateAudioFrame(AudioFrame *audioFrame) {
if(audioFrame != nullptr) {
ByteFlowPrintD("AudioVisualRender::UpdateAudioFrame audioFrame->dataSize=%d", audioFrame->dataSize);
std::unique_lock<std::mutex> lock(m_Mutex);
if(m_pAudioBuffer != nullptr && m_pAudioBuffer->dataSize != audioFrame->dataSize) {
delete m_pAudioBuffer;
m_pAudioBuffer = nullptr;
delete [] m_pTextureCoords;
m_pTextureCoords = nullptr;
delete [] m_pVerticesCoords;
m_pVerticesCoords = nullptr;
}
if(m_pAudioBuffer == nullptr) {
m_pAudioBuffer = new AudioFrame(audioFrame->data, audioFrame->dataSize);
m_RenderDataSize = m_pAudioBuffer->dataSize / RESAMPLE_LEVEL;
m_pVerticesCoords = new vec3[m_RenderDataSize * 6]; //(x,y,z) * 6 points
m_pTextureCoords = new vec2[m_RenderDataSize * 6]; //(x,y) * 6 points
} else {
memcpy(m_pAudioBuffer->data, audioFrame->data, audioFrame->dataSize);
}
lock.unlock();
}
}
//建立和更新條狀圖的網格,這裡一幀音頻資料太大,進行了采樣
void AudioVisualRender::UpdateMesh() {
float dy = 0.25f / MAX_AUDIO_LEVEL;
float dx = 1.0f / m_RenderDataSize;
for (int i = 0; i < m_RenderDataSize; ++i) {
int index = i * RESAMPLE_LEVEL; //RESAMPLE_LEVEL 表示采樣間隔
short *pValue = (short *)(m_pAudioBuffer->data + index);
float y = *pValue * dy;
y = y < 0 ? y : -y;
vec2 p1(i * dx, 0 + 1.0f);
vec2 p2(i * dx, y + 1.0f);
vec2 p3((i + 1) * dx, y + 1.0f);
vec2 p4((i + 1) * dx, 0 + 1.0f);
m_pTextureCoords[i * 6 + 0] = p1;
m_pTextureCoords[i * 6 + 1] = p2;
m_pTextureCoords[i * 6 + 2] = p4;
m_pTextureCoords[i * 6 + 3] = p4;
m_pTextureCoords[i * 6 + 4] = p2;
m_pTextureCoords[i * 6 + 5] = p3;
m_pVerticesCoords[i * 6 + 0] = GLUtils::texCoordToVertexCoord(p1);
m_pVerticesCoords[i * 6 + 1] = GLUtils::texCoordToVertexCoord(p2);
m_pVerticesCoords[i * 6 + 2] = GLUtils::texCoordToVertexCoord(p4);
m_pVerticesCoords[i * 6 + 3] = GLUtils::texCoordToVertexCoord(p4);
m_pVerticesCoords[i * 6 + 4] = GLUtils::texCoordToVertexCoord(p2);
m_pVerticesCoords[i * 6 + 5] = GLUtils::texCoordToVertexCoord(p3);
}
}
void AudioVisualRender::Init() {
m_VaoId = GL_NONE;
m_pTextureCoords = nullptr;
m_pVerticesCoords = nullptr;
memset(m_VboIds, 0, sizeof(GLuint) * 2);
m_pAudioBuffer = nullptr;
}
//釋放記憶體
void AudioVisualRender::UnInit() {
if (m_pAudioBuffer != nullptr) {
delete m_pAudioBuffer;
m_pAudioBuffer = nullptr;
}
if (m_pTextureCoords != nullptr) {
delete [] m_pTextureCoords;
m_pTextureCoords = nullptr;
}
if (m_pVerticesCoords != nullptr) {
delete [] m_pVerticesCoords;
m_pVerticesCoords = nullptr;
}
}
最後隻需要在 OpenSLES 播放器的回調函數(見上篇文章)中調用下面函數即可:
AudioFrame *audioFrame = m_AudioFrameQueue.front();
if (nullptr != audioFrame && m_AudioPlayerPlay) {
SLresult result = (*m_BufferQueue)->Enqueue(m_BufferQueue, audioFrame->data, (SLuint32) audioFrame->dataSize);
if (result == SL_RESULT_SUCCESS) {
//最後隻需要在 OpenSLES 播放器的回調函數中調用 UpdateAudioFrame 函數即可
AudioVisualRender::GetInstance()->UpdateAudioFrame(audioFrame);
m_AudioFrameQueue.pop();
delete audioFrame;
}
}
聯系與交流
技術交流可以添加我的微信:Byte-Flow
「視訊雲技術」你最值得關注的音視訊技術公衆号,每周推送來自阿裡雲一線的實踐技術文章,在這裡與音視訊領域一流工程師交流切磋。