新版FFmpeg音頻編解碼處理

ffmpeg版本更新近一年跟打了雞血一樣，刷刷刷的往上升（貌似現在很多軟體版本更新都相當快，而且都是大版本更新），應用軟體也就算了，升就升吧，最多新版可能運作速度稍慢些，可API的版本更新太快，對開發來說，就不是一件讓人愉快的事。

用ffmpeg也有快兩年的時間，前段時間将項目中的ffmpeg庫從1.0更新到了2.1（老實說，這樣更新API庫風險太大），發現不少API發生了變化，以前項目中寫的音頻編碼部分完全不能工作了，認真研究了兩天，才發現，新版的ffmpeg對音頻編碼處理已經有了很大的變化，記錄在此，做個備忘。

早期ffmpeg編碼音頻，輸入資料一般都是S16格式，解碼輸出一般也是S16，也就是說PCM資料是存儲在連續的buffer中，對一個雙聲道（左右）音頻來說，存儲格式可能就為

LRLRLR.........（左聲道在前還是右聲道在前沒有認真研究過）。是以以往編碼部分的代碼基本形如：

int sample_bytes = av_get_bytes_per_sample(pCodecCtx->sample_fmt);

int frame_bytes = pCodecCtx->frame_size * sample_bytes * pCodecCtx->channels;

// AVFifoBuffer* fifo; 存放pcm資料

while(av_fifo_size(fifo) >= frame_bytes) {

av_fifo_generic_read(fifo, inputBuf, frame_bytes, NULL);

AVPacket pkt = {0};

av_init_packet(&pkt);

pkt.data = encodeBuf;

pkt.size = AVCODEC_MAX_AUDIO_FRAME_SIZE;

int got_packet = 0;

audioframe->nb_samples = pCodecCtx->frame_size;

int samples_size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,

audioframe->nb_samples,

pCodecCtx->sample_fmt, 0);

avcodec_fill_audio_frame(audioframe, pCodecCtx->channels, pCodecCtx->sample_fmt,

inputBuf, samples_size, 0);

audioframe->pts = audio_sync_opts;

audio_sync_opts = audioframe->pts + audioframe->nb_samples;

avcodec_encode_audio2(pCodecCtx, &pkt, audioframe, &got_packet);

if (got_packet ) {

//處理pkt，封裝存儲、流輸出或交由上層應用

}

項目中需要對音視訊流進行轉碼輸出，音頻處理部分一般是先解碼（得到PCM S16資料），再交由編碼（MP3、AAC）

ffmpeg更新到2.1後（具體哪個版本開始的沒去查，可能早幾個版本就已經這樣做了），音頻格式增加了plane概念（呃，不是灰機，是平面）

enum AVSampleFormat {

AV_SAMPLE_FMT_NONE = -1,

AV_SAMPLE_FMT_U8, ///< unsigned 8 bits

AV_SAMPLE_FMT_S16, ///< signed 16 bits

AV_SAMPLE_FMT_S32, ///< signed 32 bits

AV_SAMPLE_FMT_FLT, ///< float

AV_SAMPLE_FMT_DBL, ///< double

// 以下都是帶平面格式

AV_SAMPLE_FMT_U8P, ///< unsigned 8 bits, planar

AV_SAMPLE_FMT_S16P, ///< signed 16 bits, planar

AV_SAMPLE_FMT_S32P, ///< signed 32 bits, planar

AV_SAMPLE_FMT_FLTP, ///< float, planar

AV_SAMPLE_FMT_DBLP, ///< double, planar

AV_SAMPLE_FMT_NB ///< Number of sample formats. DO NOT USE if linking dynamically

};

這就有點像視訊部分的YUV資料，有的帶P，有的是不帶P的，同樣對雙聲道音頻PCM資料，以S16P為例，存儲就可能是

plane 0: LLLLLLLLLLLLLLLLLLLLLLLLLL...

plane 1: RRRRRRRRRRRRRRRRRRRRRRRRRR...

而不再是以前的連續buffer。

如mp3編碼就明确規定了隻使用平面格式的資料

AVCodec ff_libmp3lame_encoder = {

.....

.capabilities = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME,

.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S32P,

AV_SAMPLE_FMT_FLTP,

AV_SAMPLE_FMT_S16P,

AV_SAMPLE_FMT_NONE },

....

};

而AAC編碼依舊使用 AV_SAMPLE_FMT_S16格式

也就說，音頻編碼不能再像以前那樣簡單的處理，統一輸入S16資料，而要根據具體的codec轉化為其支援的格式，否則無論是編碼還是解碼輸出的聲音會莫名其妙，幸好，轉換工作不用自己做，ffmpeg提供了相應的API，swr_convert（類似以前的audio_resample，隻是audio_resample目前已不再推薦使用，因為swr_convert更強大）

基于此，對音頻編碼部分做了相應修改，主要用的資料結構為 struct SwrContext* m_SwrCtx;

step 1：判斷是否需要進行convert，初始化階段

if (pCodecCtx->channels != pInputCtx->channels

|| pCodecCtx->sample_rate != pInputCtx->sample_rate

|| pCodecCtx->sample_fmt != pInputCtx->sample_fmt)

{

u::Log::write(get_log_file(), "Audio need resample!");

if ( NULL == m_SwrCtx ) {

m_SwrCtx = swr_alloc();

}

#if LIBSWRESAMPLE_VERSION_MINOR >= 17 // 根據版本不同，選用适當函數

av_opt_set_int(m_SwrCtx, "ich", pInputCtx->channels, 0);

av_opt_set_int(m_SwrCtx, "och", pCodecCtx->channels, 0);

av_opt_set_int(m_SwrCtx, "in_sample_rate", pInputCtx->sample_rate, 0);

av_opt_set_int(m_SwrCtx, "out_sample_rate", pCodecCtx->sample_rate, 0);

av_opt_set_sample_fmt(m_SwrCtx, "in_sample_fmt", pInputCtx->sample_fmt, 0);

av_opt_set_sample_fmt(m_SwrCtx, "out_sample_fmt", pCodecCtx->sample_fmt, 0);

#else

m_SwrCtx = swr_alloc_set_opts(m_SwrCtx,

pInputCtx->channel_layout, AV_SAMPLE_FMT_S16, pInputCtx->sample_rate,

pInputCtx->channel_layout, pInputCtx->sample_fmt, pInputCtx->sample_rate,

0, NULL);

#endif

swr_init(m_SwrCtx);

if (av_sample_fmt_is_planar(pCodecCtx->sample_fmt)) {

//如果是分平面資料，為每一聲道配置設定一個fifo，單獨存儲各平面資料

for (int i = 0; i < pCodecCtx->channels; i++){

m_fifo[i] = av_fifo_alloc(BUF_SIZE_20K);

}

} else {

//不分平面，所有的資料隻要一個fifo就夠了，其實用不用fifo完全看個人了，隻是我覺得友善些

fifo = av_fifo_alloc(BUF_SIZE_20K);

}

step 2：進行轉換

//以下代碼部分抄自ffmpeg自帶的例子

if (m_SwrCtx != NULL) {

if ( !m_audioOut ) {

ret = av_samples_alloc_array_and_samples(&m_audioOut,

&dst_samples_linesize, pCodecCtx->channels, max_dst_nb_samples, pCodecCtx->sample_fmt, 0);

if (ret < 0){

av_log(NULL, AV_LOG_WARNING, "[%s.%d %s() Could not allocate destination samples\n", __FILE__, __LINE__, __FUNCTION__);

return -1;

}

dst_nb_samples = av_rescale_rnd(swr_get_delay(m_SwrCtx, pCodecCtx->sample_rate) + src_nb_samples,

pCodecCtx->sample_rate, pCodecCtx->sample_rate, AV_ROUND_UP);

if (dst_nb_samples > max_dst_nb_samples) {

av_free(m_audioOut[0]);

ret = av_samples_alloc(m_audioOut, &dst_samples_linesize, pCodecCtx->channels, dst_nb_samples, pCodecCtx->sample_fmt, 0);

if (ret < 0){

av_log(NULL, AV_LOG_WARNING, "[%s.%d %s() Could not allocate samples Buffer\n", __FILE__, __LINE__, __FUNCTION__);

return -1;

}

max_dst_nb_samples = dst_nb_samples;

}

//輸入也可能是分平面的，是以要做如下處理

uint8_t* m_ain[SWR_CH_MAX];

setup_array(m_ain, (uint8_t*)input_buf, data->ctx.sample_fmt, src_nb_samples);

len = swr_convert(m_SwrCtx, m_audioOut, dst_nb_samples, (const uint8_t**)m_ain, src_nb_samples);

if (len < 0) {

char errmsg[BUF_SIZE_1K];

av_strerror(len, errmsg, sizeof(errmsg));

av_log(NULL, AV_LOG_WARNING, "[%s:%d] swr_convert!(%d)(%s)", __FILE__, __LINE__, len, errmsg);

return -1;

}

paudiobuf = m_audioOut[0];

decode_size = len * pCodecCtx->channels * av_get_bytes_per_sample(pCodecCtx->sample_fmt);

} else {

paudiobuf = (uint8_t*)input_buf;

decode_size = input_size;

}

//存儲PCM資料，注意：m_SwrCtx即使進行了轉換，也要判斷轉換後的資料是否分平面

if (m_SwrCtx && av_sample_fmt_is_planar(pCodecCtx->sample_fmt) ) {

for (int i = 0; i < pCodecCtx->channels; i++){

if (av_fifo_realloc2(m_fifo[i], av_fifo_size(m_fifo[i]) + len*av_get_bytes_per_sample(pCodecCtx->sample_fmt)) < 0){

av_log(NULL, AV_LOG_FATAL, "av_fifo_realloc2() failed\n");

return -1;

}

av_fifo_generic_write(m_fifo[i], m_audioOut[0]+i*dst_samples_linesize, len*av_get_bytes_per_sample(pCodecCtx->sample_fmt), NULL);

}

} else {

if (av_fifo_realloc2(fifo, av_fifo_size(fifo) + decode_size) < 0) {

av_log(NULL, AV_LOG_FATAL, "av_fifo_realloc2() failed\n");

return -1;

}

av_fifo_generic_write(fifo, paudiobuf, decode_size, NULL);

}

setup_array函數摘自ffmpeg例程

static void setup_array(uint8_t* out[SWR_CH_MAX], uint8_t* in, int format, int samples){

if (av_sample_fmt_is_planar((AVSampleFormat)format)) {

int i;

int plane_size = av_get_bytes_per_sample((AVSampleFormat)(format & 0xFF)) * samples;

format &= 0xFF;

for (i = 0; i < SWR_CH_MAX; i++) {

out[i] = in + i*plane_size;

}

} else {

out[0] = in;

}

step 3：進行編碼

//編碼格式要求是分平面資料

if (m_SwrCtx && ( av_sample_fmt_is_planar(pCodecCtx->sample_fmt) )) {

//這裡為簡單示例，隻判斷第一個聲道（因為左右聲道資料大小是一緻的），實際應用中應考慮每個聲道具體情況

while(av_fifo_size(m_fifo[0]) >= pCodecCtx->frame_size * sample_bytes){

for (int i = 0; i < pCodecCtx->channels; i++) {

//inputBuf是一塊連續記憶體

av_fifo_generic_read(m_fifo[i], inputBuf+i*pCodecCtx->frame_size * sample_bytes, pCodecCtx->frame_size * sample_bytes, NULL);

}

AVPacket pkt = {0};

av_init_packet(&pkt);

pkt.data = encodeBuf;

pkt.size = AVCODEC_MAX_AUDIO_FRAME_SIZE;

int got_packet = 0;

audioframe->nb_samples = pCodecCtx->frame_size;

int samples_size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,

audioframe->nb_samples,

pCodecCtx->sample_fmt, 0);

avcodec_fill_audio_frame(audioframe, pCodecCtx->channels, pCodecCtx->sample_fmt,

inputBuf, samples_size, 0);

int ret = avcodec_encode_audio2(pCodecCtx, &pkt, audioframe, &got_packet);

if (got_packet ) {

//處理pkt

}

} else {

//不分平面

while(av_fifo_size(fifo) >= frame_bytes) {

av_fifo_generic_read(fifo, inputBuf, frame_bytes, NULL);

AVPacket pkt = {0};

av_init_packet(&pkt);

pkt.data = encodeBuf;

pkt.size = AVCODEC_MAX_AUDIO_FRAME_SIZE;

int got_packet = 0;

audioframe->nb_samples = pCodecCtx->frame_size;

int samples_size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,

audioframe->nb_samples,

pCodecCtx->sample_fmt, 0);

avcodec_fill_audio_frame(audioframe, pCodecCtx->channels, pCodecCtx->sample_fmt,

inputBuf, samples_size, 0);

int ret = avcodec_encode_audio2(pCodecCtx, &pkt, audioframe, &got_packet);

if (got_packet ) {

//處理pkt

}

另：

對于解碼也可能需要做swr_convert，比如做播放器，很多時候我們是将S16格式資料丢給聲霸卡，而新版ffmpeg解碼音頻輸出的格式可能不滿足S16，如AAC解碼後得到的是FLT（浮點型），AC3解碼是FLTP（帶平面）等，需要根據具體的情況決定是否需要convert，轉換過程與上類似

新版FFmpeg音頻編解碼處理

繼續閱讀

Android ffmpeg yuv原始資料寫入yuv檔案

FFmpeg視訊解碼為YUV像素資料檔案

FFmpeg視訊檔案解碼為YUV資料

FFmpeg 解碼視訊流實作yuv播放

YUV RGB常見視訊格式解析

FFmpeg 将多幅jpg/png圖檔轉為mp4/avi/yuv視訊序列的方法轉YUV轉MP4/AVI播放YUV視訊

ffmpeg擷取視訊時長(秒數)

Java通過調用FFMPEG擷取視訊時長（已測試）

asp.net中将各種視訊檔案轉換成.flv格式

利用ffmpeg把視訊檔案轉換為flv檔案

用ffmpeg 轉換flv 2 wma

c#中調用Ffmpeg轉換視訊格式的問題

c#使用 FFMPEG 視訊格式轉換

ffmpeg windows下編譯ffmpeg

ffmpeg視訊轉換工具

ffmpeg開發出現的問題(四) ftp/rstp/ts 流輸出