作者: 葉餘 來源: https://www.cnblogs.com/leisure_chn/p/10312713.html ffplay是FFmpeg工程自帶的簡單點傳播放器,使用FFmpeg提供的解碼器和SDL庫進行視訊播放。本文基于FFmpeg工程4.1版本進行分析,其中ffplay源碼清單如下: https://github.com/FFmpeg/FFmpeg/blob/n4.1/fftools/ffplay.c
在嘗試分析源碼前,可先閱讀如下參考文章作為鋪墊:
[1].
雷霄骅,視音頻編解碼技術零基礎學習方法 [2]. 視訊編解碼基礎概念 [3]. 色彩空間與像素格式 [4]. 音頻參數解析 [5]. FFmpeg基礎概念 “ffplay源碼分析”系列文章如下: ffplay源碼分析1-概述 ffplay源碼分析2-資料結構 ffplay源碼分析3-代碼架構 ffplay源碼分析4-音視訊同步 ffplay源碼分析5-圖像格式轉換 [6]. ffplay源碼分析6-音頻重采樣 [7]. ffplay源碼分析7-播放控制6. 音頻重采樣
FFmpeg解碼得到的音頻幀的格式未必能被SDL支援,在這種情況下,需要進行音頻重采樣,即将音頻幀格式轉換為SDL支援的音頻格式,否則是無法正常播放的。
音頻重采樣涉及兩個步驟:
- 打開音頻裝置時進行的準備工作:确定SDL支援的音頻格式,作為後期音頻重采樣的目标格式
- 音頻播放線程中,取出音頻幀後,若有需要(音頻幀格式與SDL支援音頻格式不比對)則進行重采樣,否則直接輸出
6.1 打開音頻裝置
音頻裝置的打開實際是在解複用線程中實作的。解複用線程中先打開音頻裝置(設定音頻回調函數供SDL音頻播放線程回調),然後再建立音頻解碼線程。調用鍊如下:
main() -->
stream_open() -->
read_thread() -->
stream_component_open() -->
audio_open(is, channel_layout, nb_channels, sample_rate, &is->audio_tgt);
decoder_start(&is->auddec, audio_thread, is);
audio_open()函數填入期望的音頻參數,打開音頻裝置後,将實際的音頻參數存入輸出參數is->audio_tgt中,後面音頻播放線程用會用到此參數,使用此參數将原始音頻資料重采樣,轉換為音頻裝置支援的格式。
static int audio_open(void *opaque, int64_t wanted_channel_layout, int wanted_nb_channels, int wanted_sample_rate, struct AudioParams *audio_hw_params)
{
SDL_AudioSpec wanted_spec, spec;
const char *env;
static const int next_nb_channels[] = {0, 0, 1, 6, 2, 6, 4, 6};
static const int next_sample_rates[] = {0, 44100, 48000, 96000, 192000};
int next_sample_rate_idx = FF_ARRAY_ELEMS(next_sample_rates) - 1;
env = SDL_getenv("SDL_AUDIO_CHANNELS");
if (env) { // 若環境變量有設定,優先從環境變量取得聲道數和聲道布局
wanted_nb_channels = atoi(env);
wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels);
}
if (!wanted_channel_layout || wanted_nb_channels != av_get_channel_layout_nb_channels(wanted_channel_layout)) {
wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels);
wanted_channel_layout &= ~AV_CH_LAYOUT_STEREO_DOWNMIX;
}
// 根據channel_layout擷取nb_channels,當傳入參數wanted_nb_channels不比對時,此處會作修正
wanted_nb_channels = av_get_channel_layout_nb_channels(wanted_channel_layout);
wanted_spec.channels = wanted_nb_channels; // 聲道數
wanted_spec.freq = wanted_sample_rate; // 采樣率
if (wanted_spec.freq <= 0 || wanted_spec.channels <= 0) {
av_log(NULL, AV_LOG_ERROR, "Invalid sample rate or channel count!\n");
return -1;
}
while (next_sample_rate_idx && next_sample_rates[next_sample_rate_idx] >= wanted_spec.freq)
next_sample_rate_idx--; // 從采樣率數組中找到第一個不大于傳入參數wanted_sample_rate的值
// 音頻采樣格式有兩大類型:planar和packed,假設一個雙聲道音頻檔案,一個左聲道采樣點記作L,一個右聲道采樣點記作R,則:
// planar存儲格式:(plane1)LLLLLLLL...LLLL (plane2)RRRRRRRR...RRRR
// packed存儲格式:(plane1)LRLRLRLR...........................LRLR
// 在這兩種采樣類型下,又細分多種采樣格式,如AV_SAMPLE_FMT_S16、AV_SAMPLE_FMT_S16P等,注意SDL2.0目前不支援planar格式
// channel_layout是int64_t類型,表示音頻聲道布局,每bit代表一個特定的聲道,參考channel_layout.h中的定義,一目了然
// 資料量(bits/秒) = 采樣率(Hz) * 采樣深度(bit) * 聲道數
wanted_spec.format = AUDIO_S16SYS; // 采樣格式:S表帶符号,16是采樣深度(位深),SYS表采用系統位元組序,這個宏在SDL中定義
wanted_spec.silence = 0; // 靜音值
wanted_spec.samples = FFMAX(SDL_AUDIO_MIN_BUFFER_SIZE, 2 << av_log2(wanted_spec.freq / SDL_AUDIO_MAX_CALLBACKS_PER_SEC)); // SDL聲音緩沖區尺寸,機關是單聲道采樣點尺寸x聲道數
wanted_spec.callback = sdl_audio_callback; // 回調函數,若為NULL,則應使用SDL_QueueAudio()機制
wanted_spec.userdata = opaque; // 提供給回調函數的參數
// 打開音頻裝置并建立音頻處理線程。期望的參數是wanted_spec,實際得到的硬體參數是spec
// 1) SDL提供兩種使音頻裝置取得音頻資料方法:
// a. push,SDL以特定的頻率調用回調函數,在回調函數中取得音頻資料
// b. pull,使用者程式以特定的頻率調用SDL_QueueAudio(),向音頻裝置提供資料。此種情況wanted_spec.callback=NULL
// 2) 音頻裝置打開後播放靜音,不啟動回調,調用SDL_PauseAudio(0)後啟動回調,開始正常播放音頻
// SDL_OpenAudioDevice()第一個參數為NULL時,等價于SDL_OpenAudio()
while (!(audio_dev = SDL_OpenAudioDevice(NULL, 0, &wanted_spec, &spec, SDL_AUDIO_ALLOW_FREQUENCY_CHANGE | SDL_AUDIO_ALLOW_CHANNELS_CHANGE))) {
av_log(NULL, AV_LOG_WARNING, "SDL_OpenAudio (%d channels, %d Hz): %s\n",
wanted_spec.channels, wanted_spec.freq, SDL_GetError());
// 如果打開音頻裝置失敗,則嘗試用不同的聲道數或采樣率再試打開音頻裝置,這裡有些奇怪,暫不深究
wanted_spec.channels = next_nb_channels[FFMIN(7, wanted_spec.channels)];
if (!wanted_spec.channels) {
wanted_spec.freq = next_sample_rates[next_sample_rate_idx--];
wanted_spec.channels = wanted_nb_channels;
if (!wanted_spec.freq) {
av_log(NULL, AV_LOG_ERROR,
"No more combinations to try, audio open failed\n");
return -1;
}
}
wanted_channel_layout = av_get_default_channel_layout(wanted_spec.channels);
}
// 檢查打開音頻裝置的實際參數:采樣格式
if (spec.format != AUDIO_S16SYS) {
av_log(NULL, AV_LOG_ERROR,
"SDL advised audio format %d is not supported!\n", spec.format);
return -1;
}
// 檢查打開音頻裝置的實際參數:聲道數
if (spec.channels != wanted_spec.channels) {
wanted_channel_layout = av_get_default_channel_layout(spec.channels);
if (!wanted_channel_layout) {
av_log(NULL, AV_LOG_ERROR,
"SDL advised channel count %d is not supported!\n", spec.channels);
return -1;
}
}
// wanted_spec是期望的參數,spec是實際的參數,wanted_spec和spec都是SDL中的結構。
// 此處audio_hw_params是FFmpeg中的參數,輸出參數供上級函數使用
audio_hw_params->fmt = AV_SAMPLE_FMT_S16;
audio_hw_params->freq = spec.freq;
audio_hw_params->channel_layout = wanted_channel_layout;
audio_hw_params->channels = spec.channels;
audio_hw_params->frame_size = av_samples_get_buffer_size(NULL, audio_hw_params->channels, 1, audio_hw_params->fmt, 1);
audio_hw_params->bytes_per_sec = av_samples_get_buffer_size(NULL, audio_hw_params->channels, audio_hw_params->freq, audio_hw_params->fmt, 1);
if (audio_hw_params->bytes_per_sec <= 0 || audio_hw_params->frame_size <= 0) {
av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size failed\n");
return -1;
}
return spec.size;
}
打開音頻裝置,涉及到FFmpeg中音頻存儲的基礎概念,為稍顯清晰,将相關注釋摘抄如下:
6.1.1 音頻格式相關
**planar&packed**
音頻采樣格式有兩大類型:planar和packed,假設一個雙聲道音頻檔案,一個左聲道采樣點記作L,一個右聲道采樣點記作R,則:
planar存儲格式:(plane1)LLLLLLLL...LLLL (plane2)RRRRRRRR...RRRR
packed存儲格式:(plane1)LRLRLRLR...........................LRLR
在這兩種采樣類型下,又細分多種采樣格式,如AV_SAMPLE_FMT_S16、AV_SAMPLE_FMT_S16P等,注意SDL2.0目前不支援planar格式
SDL中定義音頻參數資料結構定義如下:
/**
* The calculated values in this structure are calculated by SDL_OpenAudio().
*
* For multi-channel audio, the default SDL channel mapping is:
* 2: FL FR (stereo)
* 3: FL FR LFE (2.1 surround)
* 4: FL FR BL BR (quad)
* 5: FL FR FC BL BR (quad + center)
* 6: FL FR FC LFE SL SR (5.1 surround - last two can also be BL BR)
* 7: FL FR FC LFE BC SL SR (6.1 surround)
* 8: FL FR FC LFE BL BR SL SR (7.1 surround)
*/
typedef struct SDL_AudioSpec
{
int freq; /**< DSP frequency -- samples per second */
SDL_AudioFormat format; /**< Audio data format */
Uint8 channels; /**< Number of channels: 1 mono, 2 stereo */
Uint8 silence; /**< Audio buffer silence value (calculated) */
Uint16 samples; /**< Audio buffer size in sample FRAMES (total samples divided by channel count) */
Uint16 padding; /**< Necessary for some compile environments */
Uint32 size; /**< Audio buffer size in bytes (calculated) */
SDL_AudioCallback callback; /**< Callback that feeds the audio device (NULL to use SDL_QueueAudio()). */
void *userdata; /**< Userdata passed to callback (ignored for NULL callbacks). */
} SDL_AudioSpec;
FFmpeg中定義音頻參數的相關資料結構為:
/**
* \brief Audio format flags.
*
* These are what the 16 bits in SDL_AudioFormat currently mean...
* (Unspecified bits are always zero).
*
* \verbatim
++-----------------------sample is signed if set
||
|| ++-----------sample is bigendian if set
|| ||
|| || ++---sample is float if set
|| || ||
|| || || +---sample bit size---+
|| || || | |
15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
\endverbatim
*
* There are macros in SDL 2.0 and later to query these bits.
*/
typedef Uint16 SDL_AudioFormat;
/**
* \name Audio format flags
*
* Defaults to LSB byte order.
*/
/* @{ */
#define AUDIO_U8 0x0008 /**< Unsigned 8-bit samples */
#define AUDIO_S8 0x8008 /**< Signed 8-bit samples */
#define AUDIO_U16LSB 0x0010 /**< Unsigned 16-bit samples */
#define AUDIO_S16LSB 0x8010 /**< Signed 16-bit samples */
#define AUDIO_U16MSB 0x1010 /**< As above, but big-endian byte order */
#define AUDIO_S16MSB 0x9010 /**< As above, but big-endian byte order */
#define AUDIO_U16 AUDIO_U16LSB
#define AUDIO_S16 AUDIO_S16LSB
/* @} */
// 這個結構是在ffplay.c中定義的:
typedef struct AudioParams {
int freq;
int channels;
int64_t channel_layout;
enum AVSampleFormat fmt;
int frame_size;
int bytes_per_sec;
} AudioParams;
/**
* Audio sample formats
*
* - The data described by the sample format is always in native-endian order.
* Sample values can be expressed by native C types, hence the lack of a signed
* 24-bit sample format even though it is a common raw audio data format.
*
* - The floating-point formats are based on full volume being in the range
* [-1.0, 1.0]. Any values outside this range are beyond full volume level.
*
* - The data layout as used in av_samples_fill_arrays() and elsewhere in FFmpeg
* (such as AVFrame in libavcodec) is as follows:
*
* @par
* For planar sample formats, each audio channel is in a separate data plane,
* and linesize is the buffer size, in bytes, for a single plane. All data
* planes must be the same size. For packed sample formats, only the first data
* plane is used, and samples for each channel are interleaved. In this case,
* linesize is the buffer size, in bytes, for the 1 plane.
*
*/
enum AVSampleFormat {
AV_SAMPLE_FMT_NONE = -1,
AV_SAMPLE_FMT_U8, ///< unsigned 8 bits
AV_SAMPLE_FMT_S16, ///< signed 16 bits
AV_SAMPLE_FMT_S32, ///< signed 32 bits
AV_SAMPLE_FMT_FLT, ///< float
AV_SAMPLE_FMT_DBL, ///< double
AV_SAMPLE_FMT_U8P, ///< unsigned 8 bits, planar
AV_SAMPLE_FMT_S16P, ///< signed 16 bits, planar
AV_SAMPLE_FMT_S32P, ///< signed 32 bits, planar
AV_SAMPLE_FMT_FLTP, ///< float, planar
AV_SAMPLE_FMT_DBLP, ///< double, planar
AV_SAMPLE_FMT_S64, ///< signed 64 bits
AV_SAMPLE_FMT_S64P, ///< signed 64 bits, planar
AV_SAMPLE_FMT_NB ///< Number of sample formats. DO NOT USE if linking dynamically
};
**channel_layout**
channel_layout是int64_t類型,表示音頻聲道布局,每bit代表一個特定的聲道,參考channel_layout.h中的定義:
/**
* @defgroup channel_masks Audio channel masks
*
* A channel layout is a 64-bits integer with a bit set for every channel.
* The number of bits set must be equal to the number of channels.
* The value 0 means that the channel layout is not known.
* @note this data structure is not powerful enough to handle channels
* combinations that have the same channel multiple times, such as
* dual-mono.
*
* @{
*/
#define AV_CH_FRONT_LEFT 0x00000001
#define AV_CH_FRONT_RIGHT 0x00000002
#define AV_CH_FRONT_CENTER 0x00000004
#define AV_CH_LOW_FREQUENCY 0x00000008
#define AV_CH_BACK_LEFT 0x00000010
#define AV_CH_BACK_RIGHT 0x00000020
#define AV_CH_FRONT_LEFT_OF_CENTER 0x00000040
#define AV_CH_FRONT_RIGHT_OF_CENTER 0x00000080
#define AV_CH_BACK_CENTER 0x00000100
#define AV_CH_SIDE_LEFT 0x00000200
#define AV_CH_SIDE_RIGHT 0x00000400
#define AV_CH_TOP_CENTER 0x00000800
#define AV_CH_TOP_FRONT_LEFT 0x00001000
#define AV_CH_TOP_FRONT_CENTER 0x00002000
#define AV_CH_TOP_FRONT_RIGHT 0x00004000
#define AV_CH_TOP_BACK_LEFT 0x00008000
#define AV_CH_TOP_BACK_CENTER 0x00010000
#define AV_CH_TOP_BACK_RIGHT 0x00020000
#define AV_CH_STEREO_LEFT 0x20000000 ///< Stereo downmix.
#define AV_CH_STEREO_RIGHT 0x40000000 ///< See AV_CH_STEREO_LEFT.
#define AV_CH_WIDE_LEFT 0x0000000080000000ULL
#define AV_CH_WIDE_RIGHT 0x0000000100000000ULL
#define AV_CH_SURROUND_DIRECT_LEFT 0x0000000200000000ULL
#define AV_CH_SURROUND_DIRECT_RIGHT 0x0000000400000000ULL
#define AV_CH_LOW_FREQUENCY_2 0x0000000800000000ULL
/** Channel mask value used for AVCodecContext.request_channel_layout
to indicate that the user requests the channel order of the decoder output
to be the native codec channel order. */
#define AV_CH_LAYOUT_NATIVE 0x8000000000000000ULL
/**
* @}
* @defgroup channel_mask_c Audio channel layouts
* @{
* */
#define AV_CH_LAYOUT_MONO (AV_CH_FRONT_CENTER)
#define AV_CH_LAYOUT_STEREO (AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT)
#define AV_CH_LAYOUT_2POINT1 (AV_CH_LAYOUT_STEREO|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_2_1 (AV_CH_LAYOUT_STEREO|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_SURROUND (AV_CH_LAYOUT_STEREO|AV_CH_FRONT_CENTER)
#define AV_CH_LAYOUT_3POINT1 (AV_CH_LAYOUT_SURROUND|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_4POINT0 (AV_CH_LAYOUT_SURROUND|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_4POINT1 (AV_CH_LAYOUT_4POINT0|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_2_2 (AV_CH_LAYOUT_STEREO|AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT)
#define AV_CH_LAYOUT_QUAD (AV_CH_LAYOUT_STEREO|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_5POINT0 (AV_CH_LAYOUT_SURROUND|AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT)
#define AV_CH_LAYOUT_5POINT1 (AV_CH_LAYOUT_5POINT0|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_5POINT0_BACK (AV_CH_LAYOUT_SURROUND|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_5POINT1_BACK (AV_CH_LAYOUT_5POINT0_BACK|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_6POINT0 (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_6POINT0_FRONT (AV_CH_LAYOUT_2_2|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
#define AV_CH_LAYOUT_HEXAGONAL (AV_CH_LAYOUT_5POINT0_BACK|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_6POINT1 (AV_CH_LAYOUT_5POINT1|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_6POINT1_BACK (AV_CH_LAYOUT_5POINT1_BACK|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_6POINT1_FRONT (AV_CH_LAYOUT_6POINT0_FRONT|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_7POINT0 (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_7POINT0_FRONT (AV_CH_LAYOUT_5POINT0|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
#define AV_CH_LAYOUT_7POINT1 (AV_CH_LAYOUT_5POINT1|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_7POINT1_WIDE (AV_CH_LAYOUT_5POINT1|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
#define AV_CH_LAYOUT_7POINT1_WIDE_BACK (AV_CH_LAYOUT_5POINT1_BACK|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
#define AV_CH_LAYOUT_OCTAGONAL (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_LEFT|AV_CH_BACK_CENTER|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_HEXADECAGONAL (AV_CH_LAYOUT_OCTAGONAL|AV_CH_WIDE_LEFT|AV_CH_WIDE_RIGHT|AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT|AV_CH_TOP_BACK_CENTER|AV_CH_TOP_FRONT_CENTER|AV_CH_TOP_FRONT_LEFT|AV_CH_TOP_FRONT_RIGHT)
#define AV_CH_LAYOUT_STEREO_DOWNMIX (AV_CH_STEREO_LEFT|AV_CH_STEREO_RIGHT)
6.1.2 打開音頻裝置
打開音頻裝置并建立音頻處理線程,通過調用SDL_OpenAudio()或SDL_OpenAudioDevice()實作。輸入參數是預期的參數,輸出參數是實際參數
1) SDL提供兩種使音頻裝置取得音頻資料方法:
a. push,SDL以特定的頻率調用回調函數,在回調函數中取得音頻資料
b. pull,使用者程式以特定的頻率調用SDL_QueueAudio(),向音頻裝置提供資料。此種情況wanted_spec.callback=NULL
2) 音頻裝置打開後播放靜音,不啟動回調,調用SDL_PauseAudio(0)後啟動回調,開始正常播放音頻
SDL_OpenAudioDevice()第一個參數為NULL時,等價于SDL_OpenAudio()
6.2 音頻重采樣
音頻重采樣在
audio_decode_frame()
中實作,
audio_decode_frame()
就是從音頻frame隊列中取出一個frame,按指定格式經過重采樣後輸出。
audio_decode_frame()
函數名起得不太好,它隻是進行重采樣,并不進行解碼,叫
audio_resample_frame()
可能更貼切。
重采樣的細節很瑣碎,直接看注釋:
/**
* Decode one audio frame and return its uncompressed size.
*
* The processed audio frame is decoded, converted if required, and
* stored in is->audio_buf, with size in bytes given by the return
* value.
*/
static int audio_decode_frame(VideoState *is)
{
int data_size, resampled_data_size;
int64_t dec_channel_layout;
av_unused double audio_clock0;
int wanted_nb_samples;
Frame *af;
if (is->paused)
return -1;
do {
#if defined(_WIN32)
while (frame_queue_nb_remaining(&is->sampq) == 0) {
if ((av_gettime_relative() - audio_callback_time) > 1000000LL * is->audio_hw_buf_size / is->audio_tgt.bytes_per_sec / 2)
return -1;
av_usleep (1000);
}
#endif
// 若隊列頭部可讀,則由af指向可讀幀
if (!(af = frame_queue_peek_readable(&is->sampq)))
return -1;
frame_queue_next(&is->sampq);
} while (af->serial != is->audioq.serial);
// 根據frame中指定的音頻參數擷取緩沖區的大小
data_size = av_samples_get_buffer_size(NULL, af->frame->channels, // 本行兩參數:linesize,聲道數
af->frame->nb_samples, // 本行一參數:本幀中包含的單個聲道中的樣本數
af->frame->format, 1); // 本行兩參數:采樣格式,不對齊
// 擷取聲道布局
dec_channel_layout =
(af->frame->channel_layout && af->frame->channels == av_get_channel_layout_nb_channels(af->frame->channel_layout)) ?
af->frame->channel_layout : av_get_default_channel_layout(af->frame->channels);
// 擷取樣本數校正值:若同步時鐘是音頻,則不調整樣本數;否則根據同步需要調整樣本數
wanted_nb_samples = synchronize_audio(is, af->frame->nb_samples);
// is->audio_tgt是SDL可接受的音頻幀數,是audio_open()中取得的參數
// 在audio_open()函數中又有“is->audio_src = is->audio_tgt”
// 此處表示:如果frame中的音頻參數 == is->audio_src == is->audio_tgt,那音頻重采樣的過程就免了(是以時is->swr_ctr是NULL)
// 否則使用frame(源)和is->audio_tgt(目标)中的音頻參數來設定is->swr_ctx,并使用frame中的音頻參數來指派is->audio_src
if (af->frame->format != is->audio_src.fmt ||
dec_channel_layout != is->audio_src.channel_layout ||
af->frame->sample_rate != is->audio_src.freq ||
(wanted_nb_samples != af->frame->nb_samples && !is->swr_ctx)) {
swr_free(&is->swr_ctx);
// 使用frame(源)和is->audio_tgt(目标)中的音頻參數來設定is->swr_ctx
is->swr_ctx = swr_alloc_set_opts(NULL,
is->audio_tgt.channel_layout, is->audio_tgt.fmt, is->audio_tgt.freq,
dec_channel_layout, af->frame->format, af->frame->sample_rate,
0, NULL);
if (!is->swr_ctx || swr_init(is->swr_ctx) < 0) {
av_log(NULL, AV_LOG_ERROR,
"Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
af->frame->sample_rate, av_get_sample_fmt_name(af->frame->format), af->frame->channels,
is->audio_tgt.freq, av_get_sample_fmt_name(is->audio_tgt.fmt), is->audio_tgt.channels);
swr_free(&is->swr_ctx);
return -1;
}
// 使用frame中的參數更新is->audio_src,第一次更新後後面基本不用執行此if分支了,因為一個音頻流中各frame通用參數一樣
is->audio_src.channel_layout = dec_channel_layout;
is->audio_src.channels = af->frame->channels;
is->audio_src.freq = af->frame->sample_rate;
is->audio_src.fmt = af->frame->format;
}
if (is->swr_ctx) {
// 重采樣輸入參數1:輸入音頻樣本數是af->frame->nb_samples
// 重采樣輸入參數2:輸入音頻緩沖區
const uint8_t **in = (const uint8_t **)af->frame->extended_data;
// 重采樣輸出參數1:輸出音頻緩沖區尺寸
// 重采樣輸出參數2:輸出音頻緩沖區
uint8_t **out = &is->audio_buf1;
// 重采樣輸出參數:輸出音頻樣本數(多加了256個樣本)
int out_count = (int64_t)wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate + 256;
// 重采樣輸出參數:輸出音頻緩沖區尺寸(以位元組為機關)
int out_size = av_samples_get_buffer_size(NULL, is->audio_tgt.channels, out_count, is->audio_tgt.fmt, 0);
int len2;
if (out_size < 0) {
av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n");
return -1;
}
// 如果frame中的樣本數經過校正,則條件成立
if (wanted_nb_samples != af->frame->nb_samples) {
// 重采樣補償:不清楚參數怎麼算的
if (swr_set_compensation(is->swr_ctx, (wanted_nb_samples - af->frame->nb_samples) * is->audio_tgt.freq / af->frame->sample_rate,
wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate) < 0) {
av_log(NULL, AV_LOG_ERROR, "swr_set_compensation() failed\n");
return -1;
}
}
av_fast_malloc(&is->audio_buf1, &is->audio_buf1_size, out_size);
if (!is->audio_buf1)
return AVERROR(ENOMEM);
// 音頻重采樣:傳回值是重采樣後得到的音頻資料中單個聲道的樣本數
len2 = swr_convert(is->swr_ctx, out, out_count, in, af->frame->nb_samples);
if (len2 < 0) {
av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n");
return -1;
}
if (len2 == out_count) {
av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n");
if (swr_init(is->swr_ctx) < 0)
swr_free(&is->swr_ctx);
}
is->audio_buf = is->audio_buf1;
// 重采樣傳回的一幀音頻資料大小(以位元組為機關)
resampled_data_size = len2 * is->audio_tgt.channels * av_get_bytes_per_sample(is->audio_tgt.fmt);
} else {
// 未經重采樣,則将指針指向frame中的音頻資料
is->audio_buf = af->frame->data[0];
resampled_data_size = data_size;
}
audio_clock0 = is->audio_clock;
/* update the audio clock with the pts */
if (!isnan(af->pts))
is->audio_clock = af->pts + (double) af->frame->nb_samples / af->frame->sample_rate;
else
is->audio_clock = NAN;
is->audio_clock_serial = af->serial;
#ifdef DEBUG
{
static double last_clock;
printf("audio: delay=%0.3f clock=%0.3f clock0=%0.3f\n",
is->audio_clock - last_clock,
is->audio_clock, audio_clock0);
last_clock = is->audio_clock;
}
#endif
return resampled_data_size;
}
「視訊雲技術」你最值得關注的音視訊技術公衆号,每周推送來自阿裡雲一線的實踐技術文章,在這裡與音視訊領域一流工程師交流切磋。
