FFmpeg音頻編解碼處理

新版的ffmpeg對音頻編碼處理已經有了很大的變化，記錄在此，做個備忘。

早期ffmpeg編碼音頻，輸入資料一般都是S16格式，解碼輸出一般也是S16，也就是說PCM資料是存儲在連續的buffer中，對一個雙聲道（左右）音頻來說，存儲格式可能就為

LRLRLR.........（左聲道在前還是右聲道在前沒有認真研究過）。是以以往編碼部分的代碼基本形如：

int sample_bytes = av_get_bytes_per_sample(pCodecCtx->sample_fmt);

     int frame_bytes = pCodecCtx->frame_size * sample_bytes * pCodecCtx->channels;

    // AVFifoBuffer* fifo;    存放pcm資料
     while(av_fifo_size(fifo) >= frame_bytes) {
         av_fifo_generic_read(fifo, inputBuf, frame_bytes, NULL);

         AVPacket pkt = {0};
         av_init_packet(&pkt);
         pkt.data = encodeBuf;
         pkt.size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
         int got_packet = 0;

         audioframe->nb_samples = pCodecCtx->frame_size;
         int samples_size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,
                                                   audioframe->nb_samples,
                                                   pCodecCtx->sample_fmt, 0);
         avcodec_fill_audio_frame(audioframe, pCodecCtx->channels, pCodecCtx->sample_fmt,
                 inputBuf, samples_size, 0);
         audioframe->pts = audio_sync_opts;
         audio_sync_opts = audioframe->pts + audioframe->nb_samples;

         avcodec_encode_audio2(pCodecCtx, &pkt, audioframe, &got_packet);
         if (got_packet ) {
             //處理pkt，封裝存儲、流輸出或交由上層應用
         }

     }

項目中需要對音視訊流進行轉碼輸出，音頻處理部分一般是先解碼（得到PCM S16資料），再交由編碼（MP3、AAC）

ffmpeg更新到2.1後（具體哪個版本開始的沒去查，可能早幾個版本就已經這樣做了），音頻格式增加了plane概念（呃，不是灰機，是平面）

enum AVSampleFormat {
     AV_SAMPLE_FMT_NONE = -1,
     AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits
     AV_SAMPLE_FMT_S16,         ///< signed 16 bits
     AV_SAMPLE_FMT_S32,         ///< signed 32 bits
     AV_SAMPLE_FMT_FLT,         ///< float
     AV_SAMPLE_FMT_DBL,         ///< double

 // 以下都是帶平面格式
     AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar
     AV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planar
     AV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planar
     AV_SAMPLE_FMT_FLTP,        ///< float, planar
     AV_SAMPLE_FMT_DBLP,        ///< double, planar

     AV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically
 };

這就有點像視訊部分的YUV資料，有的帶P，有的是不帶P的，同樣對雙聲道音頻PCM資料，以S16P為例，存儲就可能是

plane 0: LLLLLLLLLLLLLLLLLLLLLLLLLL...
 plane 1: RRRRRRRRRRRRRRRRRRRRRRRRRR...

AVCodec ff_libmp3lame_encoder = {
 .....
     .capabilities          = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME,
     .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S32P,
                                                              AV_SAMPLE_FMT_FLTP,
                                                              AV_SAMPLE_FMT_S16P,
                                                              AV_SAMPLE_FMT_NONE },
 ....
 };

step 1：判斷是否需要進行convert，初始化階段
 if (pCodecCtx->channels != pInputCtx->channels
 || pCodecCtx->sample_rate != pInputCtx->sample_rate
 || pCodecCtx->sample_fmt != pInputCtx->sample_fmt)
 {
 u::Log::write(get_log_file(), "Audio need resample!");
 if ( NULL == m_SwrCtx ) {
 m_SwrCtx = swr_alloc();
 }
 #if LIBSWRESAMPLE_VERSION_MINOR >= 17 // 根據版本不同，選用适當函數
 av_opt_set_int(m_SwrCtx, "ich", pInputCtx->channels, 0);
 av_opt_set_int(m_SwrCtx, "och", pCodecCtx->channels, 0);
 av_opt_set_int(m_SwrCtx, "in_sample_rate",  pInputCtx->sample_rate, 0);
 av_opt_set_int(m_SwrCtx, "out_sample_rate",  pCodecCtx->sample_rate, 0);
 av_opt_set_sample_fmt(m_SwrCtx, "in_sample_fmt", pInputCtx->sample_fmt, 0);
 av_opt_set_sample_fmt(m_SwrCtx, "out_sample_fmt", pCodecCtx->sample_fmt, 0);

 #else
 m_SwrCtx = swr_alloc_set_opts(m_SwrCtx,
 pInputCtx->channel_layout, AV_SAMPLE_FMT_S16, pInputCtx->sample_rate,
 pInputCtx->channel_layout, pInputCtx->sample_fmt, pInputCtx->sample_rate,
 0, NULL);
 #endif
 swr_init(m_SwrCtx);
 if (av_sample_fmt_is_planar(pCodecCtx->sample_fmt)) {
 //如果是分平面資料，為每一聲道配置設定一個fifo，單獨存儲各平面資料
 for (int i = 0; i < pCodecCtx->channels; i++){
 m_fifo[i] = av_fifo_alloc(BUF_SIZE_20K);
 }
 } else {
 //不分平面，所有的資料隻要一個fifo就夠了，其實用不用fifo完全看個人了，隻是我覺得友善些
 fifo = av_fifo_alloc(BUF_SIZE_20K);
 }

 }

 step 2：進行轉換
 //以下代碼部分抄自ffmpeg自帶的例子
 if (m_SwrCtx != NULL) {
 if ( !m_audioOut ) {
 ret = av_samples_alloc_array_and_samples(&m_audioOut,
 &dst_samples_linesize, pCodecCtx->channels, max_dst_nb_samples, pCodecCtx->sample_fmt, 0);
 if (ret < 0){
 av_log(NULL, AV_LOG_WARNING, "[%s.%d %s() Could not allocate destination samples\n", __FILE__, __LINE__, __FUNCTION__);
 return -1;
 }
 }

 dst_nb_samples = av_rescale_rnd(swr_get_delay(m_SwrCtx, pCodecCtx->sample_rate) + src_nb_samples,
 pCodecCtx->sample_rate, pCodecCtx->sample_rate, AV_ROUND_UP);
 if (dst_nb_samples > max_dst_nb_samples) {
 av_free(m_audioOut[0]);
 ret = av_samples_alloc(m_audioOut, &dst_samples_linesize, pCodecCtx->channels, dst_nb_samples, pCodecCtx->sample_fmt, 0);
 if (ret < 0){
 av_log(NULL, AV_LOG_WARNING, "[%s.%d %s() Could not allocate samples Buffer\n", __FILE__, __LINE__, __FUNCTION__);
 return -1;
 }
 max_dst_nb_samples = dst_nb_samples;
 }

 //輸入也可能是分平面的，是以要做如下處理
 uint8_t* m_ain[SWR_CH_MAX];
 setup_array(m_ain, (uint8_t*)input_buf, data->ctx.sample_fmt, src_nb_samples);

 len = swr_convert(m_SwrCtx, m_audioOut, dst_nb_samples, (const uint8_t**)m_ain, src_nb_samples);

 if (len < 0) {
 char errmsg[BUF_SIZE_1K];
 av_strerror(len, errmsg, sizeof(errmsg));
 av_log(NULL, AV_LOG_WARNING, "[%s:%d] swr_convert!(%d)(%s)", __FILE__, __LINE__, len, errmsg);
 return -1;
 }

 paudiobuf = m_audioOut[0];
 decode_size = len * pCodecCtx->channels * av_get_bytes_per_sample(pCodecCtx->sample_fmt);

 } else {
 paudiobuf = (uint8_t*)input_buf;
 decode_size = input_size;
 }

 //存儲PCM資料，注意：m_SwrCtx即使進行了轉換，也要判斷轉換後的資料是否分平面
 if (m_SwrCtx && av_sample_fmt_is_planar(pCodecCtx->sample_fmt) ) {
 for (int i = 0; i < pCodecCtx->channels; i++){
 if (av_fifo_realloc2(m_fifo[i], av_fifo_size(m_fifo[i]) + len*av_get_bytes_per_sample(pCodecCtx->sample_fmt)) < 0){
 av_log(NULL, AV_LOG_FATAL, "av_fifo_realloc2() failed\n");
 return -1;
 }
 av_fifo_generic_write(m_fifo[i], m_audioOut[0]+i*dst_samples_linesize, len*av_get_bytes_per_sample(pCodecCtx->sample_fmt), NULL);
 }
 } else {
 if (av_fifo_realloc2(fifo, av_fifo_size(fifo) + decode_size) < 0) {
 av_log(NULL, AV_LOG_FATAL, "av_fifo_realloc2() failed\n");
 return -1;
 }
 av_fifo_generic_write(fifo, paudiobuf, decode_size, NULL);
 }


 setup_array函數摘自ffmpeg例程
 static void setup_array(uint8_t* out[SWR_CH_MAX], uint8_t* in, int format, int samples){
 if (av_sample_fmt_is_planar((AVSampleFormat)format)) {
 int i;
 int plane_size = av_get_bytes_per_sample((AVSampleFormat)(format & 0xFF)) * samples;
 format &= 0xFF;
 for (i = 0; i < SWR_CH_MAX; i++) {
 out[i] = in + i*plane_size;
 }
 } else {
 out[0] = in;
 }
 }

 step 3：進行編碼
 //編碼格式要求是分平面資料
 if (m_SwrCtx && ( av_sample_fmt_is_planar(pCodecCtx->sample_fmt) )) {
   //這裡為簡單示例，隻判斷第一個聲道（因為左右聲道資料大小是一緻的），實際應用中應考慮每個聲道具體情況
 while(av_fifo_size(m_fifo[0]) >= pCodecCtx->frame_size * sample_bytes){
 for (int i = 0; i < pCodecCtx->channels; i++) {
   //inputBuf是一塊連續記憶體
 av_fifo_generic_read(m_fifo[i], inputBuf+i*pCodecCtx->frame_size * sample_bytes, pCodecCtx->frame_size * sample_bytes, NULL);
 }
 AVPacket pkt = {0};
 av_init_packet(&pkt);
 pkt.data = encodeBuf;
 pkt.size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
 int got_packet = 0;

 audioframe->nb_samples = pCodecCtx->frame_size;
 int samples_size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,
                                                   audioframe->nb_samples,
                                                   pCodecCtx->sample_fmt, 0);
 avcodec_fill_audio_frame(audioframe, pCodecCtx->channels, pCodecCtx->sample_fmt,
 inputBuf, samples_size, 0);

 int ret = avcodec_encode_audio2(pCodecCtx, &pkt, audioframe, &got_packet);

 if (got_packet ) {
 //處理pkt
 }

 }

 } else {
 //不分平面
 while(av_fifo_size(fifo) >= frame_bytes) {
 av_fifo_generic_read(fifo, inputBuf, frame_bytes, NULL);

 AVPacket pkt = {0};
 av_init_packet(&pkt);
 pkt.data = encodeBuf;
 pkt.size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
 int got_packet = 0;

 audioframe->nb_samples = pCodecCtx->frame_size;
 int samples_size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,
                                                   audioframe->nb_samples,
                                                   pCodecCtx->sample_fmt, 0);

 avcodec_fill_audio_frame(audioframe, pCodecCtx->channels, pCodecCtx->sample_fmt,
 inputBuf, samples_size, 0);

 int ret = avcodec_encode_audio2(pCodecCtx, &pkt, audioframe, &got_packet);

 if (got_packet ) {
 //處理pkt
 }

 }
 }

FFmpeg音頻編解碼處理

繼續閱讀

CSU 1563 Lexicography

UVA 110 Meta-Loopless Sorts

HDU 4723 How Long Do You Have to Draw

HDU 1402 A * B Problem Plus

UVA 348 Optimal Array Multiplication Sequence

HDU 6232 Confliction

UESTC 1269 ZhangYu Speech

ZOJ 1041 Transmitters

CSU 1562 Fun House

HDU 5923 Prediction

CodeForces 21B Intersection

HDU 5344 MZL's xor

UVA 590 Always on the run

ZOJ 1104 Leaps Tall Buildings

HDU 2821 Pusher

詳解STM32單片機的堆棧