天天看點

FFmpeg音頻編解碼處理

新版的ffmpeg對音頻編碼處理已經有了很大的變化,記錄在此,做個備忘。

早期ffmpeg編碼音頻,輸入資料一般都是S16格式,解碼輸出一般也是S16,也就是說PCM資料是存儲在連續的buffer中,對一個雙聲道(左右)音頻來說,存儲格式可能就為

LRLRLR.........(左聲道在前還是右聲道在前沒有認真研究過)。是以以往編碼部分的代碼基本形如:

int sample_bytes = av_get_bytes_per_sample(pCodecCtx->sample_fmt);

     int frame_bytes = pCodecCtx->frame_size * sample_bytes * pCodecCtx->channels;

    // AVFifoBuffer* fifo;    存放pcm資料
     while(av_fifo_size(fifo) >= frame_bytes) {
         av_fifo_generic_read(fifo, inputBuf, frame_bytes, NULL);

         AVPacket pkt = {0};
         av_init_packet(&pkt);
         pkt.data = encodeBuf;
         pkt.size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
         int got_packet = 0;

         audioframe->nb_samples = pCodecCtx->frame_size;
         int samples_size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,
                                                   audioframe->nb_samples,
                                                   pCodecCtx->sample_fmt, 0);
         avcodec_fill_audio_frame(audioframe, pCodecCtx->channels, pCodecCtx->sample_fmt,
                 inputBuf, samples_size, 0);
         audioframe->pts = audio_sync_opts;
         audio_sync_opts = audioframe->pts + audioframe->nb_samples;

         avcodec_encode_audio2(pCodecCtx, &pkt, audioframe, &got_packet);
         if (got_packet ) {
             //處理pkt,封裝存儲、流輸出或交由上層應用
         }

     }      

項目中需要對音視訊流進行轉碼輸出,音頻處理部分一般是先解碼(得到PCM S16資料),再交由編碼(MP3、AAC)

ffmpeg更新到2.1後(具體哪個版本開始的沒去查,可能早幾個版本就已經這樣做了),音頻格式增加了plane概念(呃,不是灰機,是平面)

enum AVSampleFormat {
     AV_SAMPLE_FMT_NONE = -1,
     AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits
     AV_SAMPLE_FMT_S16,         ///< signed 16 bits
     AV_SAMPLE_FMT_S32,         ///< signed 32 bits
     AV_SAMPLE_FMT_FLT,         ///< float
     AV_SAMPLE_FMT_DBL,         ///< double

 // 以下都是帶平面格式
     AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar
     AV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planar
     AV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planar
     AV_SAMPLE_FMT_FLTP,        ///< float, planar
     AV_SAMPLE_FMT_DBLP,        ///< double, planar

     AV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically
 };      

這就有點像視訊部分的YUV資料,有的帶P,有的是不帶P的,同樣對雙聲道音頻PCM資料,以S16P為例,存儲就可能是

plane 0: LLLLLLLLLLLLLLLLLLLLLLLLLL...
 plane 1: RRRRRRRRRRRRRRRRRRRRRRRRRR...      
AVCodec ff_libmp3lame_encoder = {
 .....
     .capabilities          = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME,
     .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S32P,
                                                              AV_SAMPLE_FMT_FLTP,
                                                              AV_SAMPLE_FMT_S16P,
                                                              AV_SAMPLE_FMT_NONE },
 ....
 };      
step 1:判斷是否需要進行convert,初始化階段
 if (pCodecCtx->channels != pInputCtx->channels
 || pCodecCtx->sample_rate != pInputCtx->sample_rate
 || pCodecCtx->sample_fmt != pInputCtx->sample_fmt)
 {
 u::Log::write(get_log_file(), "Audio need resample!");
 if ( NULL == m_SwrCtx ) {
 m_SwrCtx = swr_alloc();
 }
 #if LIBSWRESAMPLE_VERSION_MINOR >= 17 // 根據版本不同,選用适當函數
 av_opt_set_int(m_SwrCtx, "ich", pInputCtx->channels, 0);
 av_opt_set_int(m_SwrCtx, "och", pCodecCtx->channels, 0);
 av_opt_set_int(m_SwrCtx, "in_sample_rate",  pInputCtx->sample_rate, 0);
 av_opt_set_int(m_SwrCtx, "out_sample_rate",  pCodecCtx->sample_rate, 0);
 av_opt_set_sample_fmt(m_SwrCtx, "in_sample_fmt", pInputCtx->sample_fmt, 0);
 av_opt_set_sample_fmt(m_SwrCtx, "out_sample_fmt", pCodecCtx->sample_fmt, 0);

 #else
 m_SwrCtx = swr_alloc_set_opts(m_SwrCtx,
 pInputCtx->channel_layout, AV_SAMPLE_FMT_S16, pInputCtx->sample_rate,
 pInputCtx->channel_layout, pInputCtx->sample_fmt, pInputCtx->sample_rate,
 0, NULL);
 #endif
 swr_init(m_SwrCtx);
 if (av_sample_fmt_is_planar(pCodecCtx->sample_fmt)) {
 //如果是分平面資料,為每一聲道配置設定一個fifo,單獨存儲各平面資料
 for (int i = 0; i < pCodecCtx->channels; i++){
 m_fifo[i] = av_fifo_alloc(BUF_SIZE_20K);
 }
 } else {
 //不分平面,所有的資料隻要一個fifo就夠了,其實用不用fifo完全看個人了,隻是我覺得友善些
 fifo = av_fifo_alloc(BUF_SIZE_20K);
 }

 }

 step 2:進行轉換
 //以下代碼部分抄自ffmpeg自帶的例子
 if (m_SwrCtx != NULL) {
 if ( !m_audioOut ) {
 ret = av_samples_alloc_array_and_samples(&m_audioOut,
 &dst_samples_linesize, pCodecCtx->channels, max_dst_nb_samples, pCodecCtx->sample_fmt, 0);
 if (ret < 0){
 av_log(NULL, AV_LOG_WARNING, "[%s.%d %s() Could not allocate destination samples\n", __FILE__, __LINE__, __FUNCTION__);
 return -1;
 }
 }

 dst_nb_samples = av_rescale_rnd(swr_get_delay(m_SwrCtx, pCodecCtx->sample_rate) + src_nb_samples,
 pCodecCtx->sample_rate, pCodecCtx->sample_rate, AV_ROUND_UP);
 if (dst_nb_samples > max_dst_nb_samples) {
 av_free(m_audioOut[0]);
 ret = av_samples_alloc(m_audioOut, &dst_samples_linesize, pCodecCtx->channels, dst_nb_samples, pCodecCtx->sample_fmt, 0);
 if (ret < 0){
 av_log(NULL, AV_LOG_WARNING, "[%s.%d %s() Could not allocate samples Buffer\n", __FILE__, __LINE__, __FUNCTION__);
 return -1;
 }
 max_dst_nb_samples = dst_nb_samples;
 }

 //輸入也可能是分平面的,是以要做如下處理
 uint8_t* m_ain[SWR_CH_MAX];
 setup_array(m_ain, (uint8_t*)input_buf, data->ctx.sample_fmt, src_nb_samples);

 len = swr_convert(m_SwrCtx, m_audioOut, dst_nb_samples, (const uint8_t**)m_ain, src_nb_samples);

 if (len < 0) {
 char errmsg[BUF_SIZE_1K];
 av_strerror(len, errmsg, sizeof(errmsg));
 av_log(NULL, AV_LOG_WARNING, "[%s:%d] swr_convert!(%d)(%s)", __FILE__, __LINE__, len, errmsg);
 return -1;
 }

 paudiobuf = m_audioOut[0];
 decode_size = len * pCodecCtx->channels * av_get_bytes_per_sample(pCodecCtx->sample_fmt);

 } else {
 paudiobuf = (uint8_t*)input_buf;
 decode_size = input_size;
 }

 //存儲PCM資料,注意:m_SwrCtx即使進行了轉換,也要判斷轉換後的資料是否分平面
 if (m_SwrCtx && av_sample_fmt_is_planar(pCodecCtx->sample_fmt) ) {
 for (int i = 0; i < pCodecCtx->channels; i++){
 if (av_fifo_realloc2(m_fifo[i], av_fifo_size(m_fifo[i]) + len*av_get_bytes_per_sample(pCodecCtx->sample_fmt)) < 0){
 av_log(NULL, AV_LOG_FATAL, "av_fifo_realloc2() failed\n");
 return -1;
 }
 av_fifo_generic_write(m_fifo[i], m_audioOut[0]+i*dst_samples_linesize, len*av_get_bytes_per_sample(pCodecCtx->sample_fmt), NULL);
 }
 } else {
 if (av_fifo_realloc2(fifo, av_fifo_size(fifo) + decode_size) < 0) {
 av_log(NULL, AV_LOG_FATAL, "av_fifo_realloc2() failed\n");
 return -1;
 }
 av_fifo_generic_write(fifo, paudiobuf, decode_size, NULL);
 }


 setup_array函數摘自ffmpeg例程
 static void setup_array(uint8_t* out[SWR_CH_MAX], uint8_t* in, int format, int samples){
 if (av_sample_fmt_is_planar((AVSampleFormat)format)) {
 int i;
 int plane_size = av_get_bytes_per_sample((AVSampleFormat)(format & 0xFF)) * samples;
 format &= 0xFF;
 for (i = 0; i < SWR_CH_MAX; i++) {
 out[i] = in + i*plane_size;
 }
 } else {
 out[0] = in;
 }
 }

 step 3:進行編碼
 //編碼格式要求是分平面資料
 if (m_SwrCtx && ( av_sample_fmt_is_planar(pCodecCtx->sample_fmt) )) {
   //這裡為簡單示例,隻判斷第一個聲道(因為左右聲道資料大小是一緻的),實際應用中應考慮每個聲道具體情況
 while(av_fifo_size(m_fifo[0]) >= pCodecCtx->frame_size * sample_bytes){
 for (int i = 0; i < pCodecCtx->channels; i++) {
   //inputBuf是一塊連續記憶體
 av_fifo_generic_read(m_fifo[i], inputBuf+i*pCodecCtx->frame_size * sample_bytes, pCodecCtx->frame_size * sample_bytes, NULL);
 }
 AVPacket pkt = {0};
 av_init_packet(&pkt);
 pkt.data = encodeBuf;
 pkt.size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
 int got_packet = 0;

 audioframe->nb_samples = pCodecCtx->frame_size;
 int samples_size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,
                                                   audioframe->nb_samples,
                                                   pCodecCtx->sample_fmt, 0);
 avcodec_fill_audio_frame(audioframe, pCodecCtx->channels, pCodecCtx->sample_fmt,
 inputBuf, samples_size, 0);

 int ret = avcodec_encode_audio2(pCodecCtx, &pkt, audioframe, &got_packet);

 if (got_packet ) {
 //處理pkt
 }

 }

 } else {
 //不分平面
 while(av_fifo_size(fifo) >= frame_bytes) {
 av_fifo_generic_read(fifo, inputBuf, frame_bytes, NULL);

 AVPacket pkt = {0};
 av_init_packet(&pkt);
 pkt.data = encodeBuf;
 pkt.size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
 int got_packet = 0;

 audioframe->nb_samples = pCodecCtx->frame_size;
 int samples_size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,
                                                   audioframe->nb_samples,
                                                   pCodecCtx->sample_fmt, 0);

 avcodec_fill_audio_frame(audioframe, pCodecCtx->channels, pCodecCtx->sample_fmt,
 inputBuf, samples_size, 0);

 int ret = avcodec_encode_audio2(pCodecCtx, &pkt, audioframe, &got_packet);

 if (got_packet ) {
 //處理pkt
 }

 }
 }