2023-03-23:音視訊解混合(demuxer)為PCM和YUV420P,用go語言編寫。
答案2023-03-23:
大體步驟如下:
1.打開媒體檔案,并擷取音頻和視訊流。
2.對于每個流,找到對應的解碼器、建立解碼上下文并打開解碼器。
3.一幀一幀讀取壓縮的音頻或視訊資料AVPacket,并調用對應的解碼器進行解碼。
4.對于音頻:重采樣成16bit 44100 PCM格式,并将資料寫入輸出檔案或緩沖區。
5.對于視訊:轉換成YUV420P格式,并将資料寫入輸出檔案或緩沖區。
6.清理已配置設定的資源。
代碼見github/moonfdd/ffmpeg-go庫。
執行指令:
go run ./examples/a23.video_demuxer_mp42yuvpcm/main.go
代碼參考[23:音視訊解混合(demuxer)為PCM和YUV420P](https://feater.top/ffmpeg/ffmpeg-demuxer-video-to-pcm-and-yuv420p),代碼如下:
// https://feater.top/ffmpeg/ffmpeg-demuxer-video-to-pcm-and-yuv420
package main
import (
"fmt"
"os"
"os/exec"
"unsafe"
"github.com/moonfdd/ffmpeg-go/ffcommon"
"github.com/moonfdd/ffmpeg-go/libavcodec"
"github.com/moonfdd/ffmpeg-go/libavdevice"
"github.com/moonfdd/ffmpeg-go/libavformat"
"github.com/moonfdd/ffmpeg-go/libavutil"
"github.com/moonfdd/ffmpeg-go/libswresample"
"github.com/moonfdd/ffmpeg-go/libswscale"
)
/*
* 音頻播放指令:ffplay -ar 44100 -ac 1 -f s16le -i out.pcm
*/
const i44100 = 22050
func avcodec_save_audio_file(pFormatCtx *libavformat.AVFormatContext, streamIndex ffcommon.FInt, fileName string) ffcommon.FInt {
var pCodec *libavcodec.AVCodec
var pCodecCtx *libavcodec.AVCodecContext
codecpar := pFormatCtx.GetStream(uint32(streamIndex)).Codecpar
//4.擷取解碼器(一):音頻
//根據索引拿到對應的流
pCodec = libavcodec.AvcodecFindDecoder(codecpar.CodecId)
if pCodec == nil {
fmt.Printf("can't decoder audio\n")
return -1
}
//申請一個解碼上下文
pCodecCtx = pCodec.AvcodecAllocContext3()
if pCodecCtx == nil {
fmt.Printf("can't allocate a audio decoding context\n")
return -1
}
//用流解碼資訊初始化編碼參數
pCodecCtx.AvcodecParametersToContext(codecpar)
//沒有此句會出現:Could not update timestamps for skipped samples
pCodecCtx.PktTimebase = pFormatCtx.GetStream(uint32(streamIndex)).TimeBase
//5.打開解碼器
if pCodecCtx.AvcodecOpen2(pCodec, nil) < 0 {
fmt.Printf("can't open codec\n")
return -1
}
// printf("--------------- File Information ----------------\n");
// av_dump_format(pFormatCtx, 0, fileName, 0);
// printf("-------------------------------------------------\n");
//編碼資料
packet := new(libavcodec.AVPacket)
//解壓縮資料
frame := libavutil.AvFrameAlloc()
//frame->16bit 44100 PCM統一音頻采樣格式與采樣率
swrCtx := libswresample.SwrAlloc()
//重采樣設定選項-----------------------------------------------------------start
//輸入的采樣格式
inSampleFmt := pCodecCtx.SampleFmt
//輸出的采樣格式
var outSampleFmt libswresample.AVSampleFormat = libavutil.AV_SAMPLE_FMT_S16
//輸入的采樣率
inSampleRate := pCodecCtx.SampleRate
//輸出的采樣率
var outSampleRate ffcommon.FInt = i44100
//輸入的聲道布局
var inChannelLayout ffcommon.FUint64T = pCodecCtx.ChannelLayout
//輸出的聲道布局:CHANNEL_IN_MONO為單聲道,CHANNEL_IN_STEREO為雙聲道
var outChannelLayout ffcommon.FUint64T = libavutil.AV_CH_LAYOUT_MONO
fmt.Printf("inSampleFmt = %d, inSampleRate = %d, inChannelLayout = %d, name = %s\n", inSampleFmt, inSampleRate,
inChannelLayout, ffcommon.StringFromPtr(pCodec.Name))
swrCtx.SwrAllocSetOpts(int64(outChannelLayout), outSampleFmt, outSampleRate,
int64(inChannelLayout), inSampleFmt, inSampleRate, 0, uintptr(0))
swrCtx.SwrInit()
//重采樣設定選項-----------------------------------------------------------end
//擷取輸出的聲道個數
outChannelNb := libavutil.AvGetChannelLayoutNbChannels(outChannelLayout)
fmt.Printf("outChannelNb = %d\n", outChannelNb)
// //存儲PCM資料
outBuffer := (*byte)(unsafe.Pointer(libavutil.AvMalloc(2 * i44100)))
// FILE *fp = fopen(fileName, "wb");
fp, _ := os.Create(fileName)
//回到流的初始位置
pFormatCtx.AvSeekFrame(streamIndex, 0, libavformat.AVSEEK_FLAG_BACKWARD)
//6.一幀一幀讀取壓縮的音頻資料AVPacket
for pFormatCtx.AvReadFrame(packet) >= 0 {
if packet.StreamIndex == uint32(streamIndex) {
//解碼AVPacket --> AVFrame
ret := pCodecCtx.AvcodecSendPacket(packet)
if ret < 0 {
fmt.Printf("Decode error\n")
break
}
if pCodecCtx.AvcodecReceiveFrame(frame) >= 0 {
swrCtx.SwrConvert(&outBuffer, 2*i44100, (**byte)(unsafe.Pointer(&frame.Data)), frame.NbSamples)
//擷取sample的size
outBufferSize := libavutil.AvSamplesGetBufferSize(nil, outChannelNb, frame.NbSamples, outSampleFmt, 1)
//寫入檔案
fp.Write(ffcommon.ByteSliceFromByteP(outBuffer, int(outBufferSize)))
}
}
packet.AvPacketUnref()
}
fp.Close()
libavutil.AvFrameFree(&frame)
libavutil.AvFree(uintptr(unsafe.Pointer(outBuffer)))
libswresample.SwrFree(&swrCtx)
pCodecCtx.AvcodecClose()
return 0
}
/*
* 視訊播放指令:ffplay -video_size 654x368 -i out.yuv
*/
func avcodec_save_video_file(pFormatCtx *libavformat.AVFormatContext, streamIndex ffcommon.FInt, fileName string) ffcommon.FInt {
var pCodec *libavcodec.AVCodec
var pCodecCtx *libavcodec.AVCodecContext
codecpar := pFormatCtx.GetStream(uint32(streamIndex)).Codecpar
//4.擷取解碼器(一):音頻
//根據索引拿到對應的流
pCodec = libavcodec.AvcodecFindDecoder(codecpar.CodecId)
if pCodec == nil {
fmt.Printf("can't decoder audio\n")
return -1
}
//申請一個解碼上下文
pCodecCtx = pCodec.AvcodecAllocContext3()
if pCodecCtx == nil {
fmt.Printf("can't allocate a audio decoding context\n")
return -1
}
//用流解碼資訊初始化編碼參數
pCodecCtx.AvcodecParametersToContext(codecpar)
//沒有此句會出現:Could not update timestamps for skipped samples
pCodecCtx.PktTimebase = pFormatCtx.GetStream(uint32(streamIndex)).TimeBase
//5.打開解碼器
if pCodecCtx.AvcodecOpen2(pCodec, nil) < 0 {
fmt.Printf("can't open codec\n")
return -1
}
// printf("--------------- File Information ----------------\n");
// av_dump_format(pFormatCtx, 0, fileName, 0);
// printf("-------------------------------------------------\n");
//編碼資料
pPacket := new(libavcodec.AVPacket)
//解壓縮資料
pFrame := libavutil.AvFrameAlloc()
pFrameYUV := libavutil.AvFrameAlloc()
outBuffer := (*byte)(unsafe.Pointer(libavutil.AvMalloc(
uint64(libavutil.AvImageGetBufferSize(libavutil.AV_PIX_FMT_YUV420P, pCodecCtx.Width, pCodecCtx.Height, 1)))))
libavutil.AvImageFillArrays((*[4]*byte)(unsafe.Pointer(&pFrameYUV.Data)), (*[4]int32)(unsafe.Pointer(&pFrameYUV.Linesize)), outBuffer,
libavutil.AV_PIX_FMT_YUV420P, pCodecCtx.Width,
pCodecCtx.Height, 1)
pImgConvertCtx := libswscale.SwsGetContext(pCodecCtx.Width, pCodecCtx.Height, pCodecCtx.PixFmt,
pCodecCtx.Width, pCodecCtx.Height, libavutil.AV_PIX_FMT_YUV420P, libswscale.SWS_BICUBIC, nil, nil, nil)
fmt.Printf("width = %d, height = %d, name = %s\n", pCodecCtx.Width, pCodecCtx.Height, ffcommon.StringFromPtr(pCodec.Name))
fp, _ := os.Create(fileName)
//回到流的初始位置
pFormatCtx.AvSeekFrame(streamIndex, 0, libavformat.AVSEEK_FLAG_BACKWARD)
//6.一幀一幀讀取壓縮的視訊資料AVPacket
for pFormatCtx.AvReadFrame(pPacket) >= 0 {
if pPacket.StreamIndex == uint32(streamIndex) {
//解碼AVPacket --> AVFrame
ret := pCodecCtx.AvcodecSendPacket(pPacket)
if ret < 0 {
fmt.Printf("Decode error\n")
break
}
if pCodecCtx.AvcodecReceiveFrame(pFrame) >= 0 {
pImgConvertCtx.SwsScale((**byte)(unsafe.Pointer(&pFrame.Data)), (*int32)(unsafe.Pointer(&pFrame.Linesize)), 0,
uint32(pCodecCtx.Height), (**byte)(unsafe.Pointer(&pFrameYUV.Data)), (*int32)(unsafe.Pointer(&pFrameYUV.Linesize)))
y_size := pCodecCtx.Width * pCodecCtx.Height
fp.Write(ffcommon.ByteSliceFromByteP(pFrameYUV.Data[0], int(y_size))) //Y
fp.Write(ffcommon.ByteSliceFromByteP(pFrameYUV.Data[1], int(y_size/4))) //U
fp.Write(ffcommon.ByteSliceFromByteP(pFrameYUV.Data[2], int(y_size/4))) //V
}
}
pPacket.AvPacketUnref()
}
fp.Close()
libavutil.AvFrameFree(&pFrame)
libavutil.AvFrameFree(&pFrameYUV)
libavutil.AvFree(uintptr(unsafe.Pointer(outBuffer)))
pCodecCtx.AvcodecClose()
return 0
}
func main() {
os.Setenv("Path", os.Getenv("Path")+";./lib")
ffcommon.SetAvutilPath("./lib/avutil-56.dll")
ffcommon.SetAvcodecPath("./lib/avcodec-58.dll")
ffcommon.SetAvdevicePath("./lib/avdevice-58.dll")
ffcommon.SetAvfilterPath("./lib/avfilter-56.dll")
ffcommon.SetAvformatPath("./lib/avformat-58.dll")
ffcommon.SetAvpostprocPath("./lib/postproc-55.dll")
ffcommon.SetAvswresamplePath("./lib/swresample-3.dll")
ffcommon.SetAvswscalePath("./lib/swscale-5.dll")
genDir := "./out"
_, err := os.Stat(genDir)
if err != nil {
if os.IsNotExist(err) {
os.Mkdir(genDir, 0777) // Everyone can read write and execute
}
}
inputFile := "./resources/big_buck_bunny.mp4"
outAudioFile := "./out/a23.pcm"
outVideoFile := "./out/a23.yuv"
var videoStreamIndex ffcommon.FInt = -1
var audioStreamIndex ffcommon.FInt = -1
var i ffcommon.FUnsignedInt = 0
var pFormatCtx *libavformat.AVFormatContext
//1.注冊元件
libavdevice.AvdeviceRegisterAll()
//封裝格式上下文
pFormatCtx = libavformat.AvformatAllocContext()
//2.打開輸入檔案
if libavformat.AvformatOpenInput(&pFormatCtx, inputFile, nil, nil) != 0 {
fmt.Printf("can't open input file\n")
return
}
//3.擷取音視訊資訊
if pFormatCtx.AvformatFindStreamInfo(nil) < 0 {
fmt.Printf("can't find stream info\n")
return
}
//音視訊編碼,找到對應的音視訊流的索引位置
//找到音頻流的索引
for i = 0; i < pFormatCtx.NbStreams; i++ {
if pFormatCtx.GetStream(i).Codecpar.CodecType == libavutil.AVMEDIA_TYPE_AUDIO {
audioStreamIndex = int32(i)
break
}
}
//找到視訊流的索引
for i = 0; i < pFormatCtx.NbStreams; i++ {
if pFormatCtx.GetStream(i).Codecpar.CodecType == libavutil.AVMEDIA_TYPE_VIDEO {
videoStreamIndex = int32(i)
break
}
}
fmt.Printf("audioStreamIndex = %d, videoStreamIndex = %d\n", audioStreamIndex, videoStreamIndex)
if audioStreamIndex == -1 {
fmt.Printf("can't find a audio stream\n")
} else {
fmt.Printf("try to save audio stream\n")
avcodec_save_audio_file(pFormatCtx, audioStreamIndex, outAudioFile)
}
if videoStreamIndex == -1 {
fmt.Printf("can't find a video stream\n")
} else {
fmt.Printf("try to save video stream\n")
avcodec_save_video_file(pFormatCtx, videoStreamIndex, outVideoFile)
}
libavformat.AvformatCloseInput(&pFormatCtx)
fmt.Println("-----------------------------------------")
go func() {
_, err = exec.Command("./lib/ffplay.exe", "-ar", "22050", "-ac", "1", "-f", "s16le", "-i", outAudioFile).Output()
if err != nil {
fmt.Println("play err = ", err)
}
}()
_, err = exec.Command("./lib/ffplay.exe", "-video_size", "640*360", "-i", outVideoFile).Output()
if err != nil {
fmt.Println("play err = ", err)
}
}