天天看點

2023-03-23:音視訊解混合(demuxer)為PCM和YUV420P,用go語言

作者:福大大架構師每日一題

2023-03-23:音視訊解混合(demuxer)為PCM和YUV420P,用go語言編寫。

答案2023-03-23:

大體步驟如下:

1.打開媒體檔案,并擷取音頻和視訊流。

2.對于每個流,找到對應的解碼器、建立解碼上下文并打開解碼器。

3.一幀一幀讀取壓縮的音頻或視訊資料AVPacket,并調用對應的解碼器進行解碼。

4.對于音頻:重采樣成16bit 44100 PCM格式,并将資料寫入輸出檔案或緩沖區。

5.對于視訊:轉換成YUV420P格式,并将資料寫入輸出檔案或緩沖區。

6.清理已配置設定的資源。

代碼見github/moonfdd/ffmpeg-go庫。

執行指令:

go run ./examples/a23.video_demuxer_mp42yuvpcm/main.go           

代碼參考[23:音視訊解混合(demuxer)為PCM和YUV420P](https://feater.top/ffmpeg/ffmpeg-demuxer-video-to-pcm-and-yuv420p),代碼如下:

// https://feater.top/ffmpeg/ffmpeg-demuxer-video-to-pcm-and-yuv420
package main


import (
  "fmt"
  "os"
  "os/exec"
  "unsafe"


  "github.com/moonfdd/ffmpeg-go/ffcommon"
  "github.com/moonfdd/ffmpeg-go/libavcodec"
  "github.com/moonfdd/ffmpeg-go/libavdevice"
  "github.com/moonfdd/ffmpeg-go/libavformat"
  "github.com/moonfdd/ffmpeg-go/libavutil"
  "github.com/moonfdd/ffmpeg-go/libswresample"
  "github.com/moonfdd/ffmpeg-go/libswscale"
)


/*
 * 音頻播放指令:ffplay -ar 44100 -ac 1 -f s16le -i out.pcm
 */
const i44100 = 22050


func avcodec_save_audio_file(pFormatCtx *libavformat.AVFormatContext, streamIndex ffcommon.FInt, fileName string) ffcommon.FInt {
  var pCodec *libavcodec.AVCodec
  var pCodecCtx *libavcodec.AVCodecContext
  codecpar := pFormatCtx.GetStream(uint32(streamIndex)).Codecpar


  //4.擷取解碼器(一):音頻
  //根據索引拿到對應的流
  pCodec = libavcodec.AvcodecFindDecoder(codecpar.CodecId)
  if pCodec == nil {
    fmt.Printf("can't decoder audio\n")
    return -1
  }
  //申請一個解碼上下文
  pCodecCtx = pCodec.AvcodecAllocContext3()
  if pCodecCtx == nil {
    fmt.Printf("can't allocate a audio decoding context\n")
    return -1
  }


  //用流解碼資訊初始化編碼參數
  pCodecCtx.AvcodecParametersToContext(codecpar)


  //沒有此句會出現:Could not update timestamps for skipped samples
  pCodecCtx.PktTimebase = pFormatCtx.GetStream(uint32(streamIndex)).TimeBase


  //5.打開解碼器
  if pCodecCtx.AvcodecOpen2(pCodec, nil) < 0 {
    fmt.Printf("can't open codec\n")
    return -1
  }


  //  printf("--------------- File Information ----------------\n");
  //  av_dump_format(pFormatCtx, 0, fileName, 0);
  //  printf("-------------------------------------------------\n");


  //編碼資料
  packet := new(libavcodec.AVPacket)
  //解壓縮資料
  frame := libavutil.AvFrameAlloc()


  //frame->16bit 44100 PCM統一音頻采樣格式與采樣率
  swrCtx := libswresample.SwrAlloc()
  //重采樣設定選項-----------------------------------------------------------start
  //輸入的采樣格式
  inSampleFmt := pCodecCtx.SampleFmt
  //輸出的采樣格式
  var outSampleFmt libswresample.AVSampleFormat = libavutil.AV_SAMPLE_FMT_S16
  //輸入的采樣率
  inSampleRate := pCodecCtx.SampleRate
  //輸出的采樣率
  var outSampleRate ffcommon.FInt = i44100
  //輸入的聲道布局
  var inChannelLayout ffcommon.FUint64T = pCodecCtx.ChannelLayout
  //輸出的聲道布局:CHANNEL_IN_MONO為單聲道,CHANNEL_IN_STEREO為雙聲道
  var outChannelLayout ffcommon.FUint64T = libavutil.AV_CH_LAYOUT_MONO


  fmt.Printf("inSampleFmt = %d, inSampleRate = %d, inChannelLayout = %d, name = %s\n", inSampleFmt, inSampleRate,
    inChannelLayout, ffcommon.StringFromPtr(pCodec.Name))


  swrCtx.SwrAllocSetOpts(int64(outChannelLayout), outSampleFmt, outSampleRate,
    int64(inChannelLayout), inSampleFmt, inSampleRate, 0, uintptr(0))
  swrCtx.SwrInit()
  //重采樣設定選項-----------------------------------------------------------end


  //擷取輸出的聲道個數
  outChannelNb := libavutil.AvGetChannelLayoutNbChannels(outChannelLayout)
  fmt.Printf("outChannelNb = %d\n", outChannelNb)


  //    //存儲PCM資料
  outBuffer := (*byte)(unsafe.Pointer(libavutil.AvMalloc(2 * i44100)))


  //    FILE *fp = fopen(fileName, "wb");
  fp, _ := os.Create(fileName)


  //回到流的初始位置
  pFormatCtx.AvSeekFrame(streamIndex, 0, libavformat.AVSEEK_FLAG_BACKWARD)


  //6.一幀一幀讀取壓縮的音頻資料AVPacket
  for pFormatCtx.AvReadFrame(packet) >= 0 {
    if packet.StreamIndex == uint32(streamIndex) {
      //解碼AVPacket --> AVFrame
      ret := pCodecCtx.AvcodecSendPacket(packet)
      if ret < 0 {
        fmt.Printf("Decode error\n")
        break
      }


      if pCodecCtx.AvcodecReceiveFrame(frame) >= 0 {
        swrCtx.SwrConvert(&outBuffer, 2*i44100, (**byte)(unsafe.Pointer(&frame.Data)), frame.NbSamples)
        //擷取sample的size
        outBufferSize := libavutil.AvSamplesGetBufferSize(nil, outChannelNb, frame.NbSamples, outSampleFmt, 1)
        //寫入檔案
        fp.Write(ffcommon.ByteSliceFromByteP(outBuffer, int(outBufferSize)))
      }
    }


    packet.AvPacketUnref()
  }


  fp.Close()
  libavutil.AvFrameFree(&frame)
  libavutil.AvFree(uintptr(unsafe.Pointer(outBuffer)))
  libswresample.SwrFree(&swrCtx)
  pCodecCtx.AvcodecClose()


  return 0
}


/*
 * 視訊播放指令:ffplay -video_size 654x368 -i out.yuv
 */
func avcodec_save_video_file(pFormatCtx *libavformat.AVFormatContext, streamIndex ffcommon.FInt, fileName string) ffcommon.FInt {
  var pCodec *libavcodec.AVCodec
  var pCodecCtx *libavcodec.AVCodecContext
  codecpar := pFormatCtx.GetStream(uint32(streamIndex)).Codecpar


  //4.擷取解碼器(一):音頻
  //根據索引拿到對應的流
  pCodec = libavcodec.AvcodecFindDecoder(codecpar.CodecId)
  if pCodec == nil {
    fmt.Printf("can't decoder audio\n")
    return -1
  }
  //申請一個解碼上下文
  pCodecCtx = pCodec.AvcodecAllocContext3()
  if pCodecCtx == nil {
    fmt.Printf("can't allocate a audio decoding context\n")
    return -1
  }


  //用流解碼資訊初始化編碼參數
  pCodecCtx.AvcodecParametersToContext(codecpar)


  //沒有此句會出現:Could not update timestamps for skipped samples
  pCodecCtx.PktTimebase = pFormatCtx.GetStream(uint32(streamIndex)).TimeBase


  //5.打開解碼器
  if pCodecCtx.AvcodecOpen2(pCodec, nil) < 0 {
    fmt.Printf("can't open codec\n")
    return -1
  }


  //  printf("--------------- File Information ----------------\n");
  //  av_dump_format(pFormatCtx, 0, fileName, 0);
  //  printf("-------------------------------------------------\n");


  //編碼資料
  pPacket := new(libavcodec.AVPacket)
  //解壓縮資料
  pFrame := libavutil.AvFrameAlloc()
  pFrameYUV := libavutil.AvFrameAlloc()


  outBuffer := (*byte)(unsafe.Pointer(libavutil.AvMalloc(
    uint64(libavutil.AvImageGetBufferSize(libavutil.AV_PIX_FMT_YUV420P, pCodecCtx.Width, pCodecCtx.Height, 1)))))
  libavutil.AvImageFillArrays((*[4]*byte)(unsafe.Pointer(&pFrameYUV.Data)), (*[4]int32)(unsafe.Pointer(&pFrameYUV.Linesize)), outBuffer,
    libavutil.AV_PIX_FMT_YUV420P, pCodecCtx.Width,
    pCodecCtx.Height, 1)


  pImgConvertCtx := libswscale.SwsGetContext(pCodecCtx.Width, pCodecCtx.Height, pCodecCtx.PixFmt,
    pCodecCtx.Width, pCodecCtx.Height, libavutil.AV_PIX_FMT_YUV420P, libswscale.SWS_BICUBIC, nil, nil, nil)


  fmt.Printf("width = %d, height = %d, name = %s\n", pCodecCtx.Width, pCodecCtx.Height, ffcommon.StringFromPtr(pCodec.Name))


  fp, _ := os.Create(fileName)


  //回到流的初始位置
  pFormatCtx.AvSeekFrame(streamIndex, 0, libavformat.AVSEEK_FLAG_BACKWARD)


  //6.一幀一幀讀取壓縮的視訊資料AVPacket
  for pFormatCtx.AvReadFrame(pPacket) >= 0 {
    if pPacket.StreamIndex == uint32(streamIndex) {
      //解碼AVPacket --> AVFrame
      ret := pCodecCtx.AvcodecSendPacket(pPacket)
      if ret < 0 {
        fmt.Printf("Decode error\n")
        break
      }


      if pCodecCtx.AvcodecReceiveFrame(pFrame) >= 0 {
        pImgConvertCtx.SwsScale((**byte)(unsafe.Pointer(&pFrame.Data)), (*int32)(unsafe.Pointer(&pFrame.Linesize)), 0,
          uint32(pCodecCtx.Height), (**byte)(unsafe.Pointer(&pFrameYUV.Data)), (*int32)(unsafe.Pointer(&pFrameYUV.Linesize)))


        y_size := pCodecCtx.Width * pCodecCtx.Height
        fp.Write(ffcommon.ByteSliceFromByteP(pFrameYUV.Data[0], int(y_size)))   //Y
        fp.Write(ffcommon.ByteSliceFromByteP(pFrameYUV.Data[1], int(y_size/4))) //U
        fp.Write(ffcommon.ByteSliceFromByteP(pFrameYUV.Data[2], int(y_size/4))) //V
      }
    }


    pPacket.AvPacketUnref()
  }


  fp.Close()
  libavutil.AvFrameFree(&pFrame)
  libavutil.AvFrameFree(&pFrameYUV)
  libavutil.AvFree(uintptr(unsafe.Pointer(outBuffer)))
  pCodecCtx.AvcodecClose()


  return 0
}


func main() {
  os.Setenv("Path", os.Getenv("Path")+";./lib")
  ffcommon.SetAvutilPath("./lib/avutil-56.dll")
  ffcommon.SetAvcodecPath("./lib/avcodec-58.dll")
  ffcommon.SetAvdevicePath("./lib/avdevice-58.dll")
  ffcommon.SetAvfilterPath("./lib/avfilter-56.dll")
  ffcommon.SetAvformatPath("./lib/avformat-58.dll")
  ffcommon.SetAvpostprocPath("./lib/postproc-55.dll")
  ffcommon.SetAvswresamplePath("./lib/swresample-3.dll")
  ffcommon.SetAvswscalePath("./lib/swscale-5.dll")


  genDir := "./out"
  _, err := os.Stat(genDir)
  if err != nil {
    if os.IsNotExist(err) {
      os.Mkdir(genDir, 0777) //  Everyone can read write and execute
    }
  }


  inputFile := "./resources/big_buck_bunny.mp4"
  outAudioFile := "./out/a23.pcm"
  outVideoFile := "./out/a23.yuv"


  var videoStreamIndex ffcommon.FInt = -1
  var audioStreamIndex ffcommon.FInt = -1
  var i ffcommon.FUnsignedInt = 0
  var pFormatCtx *libavformat.AVFormatContext


  //1.注冊元件
  libavdevice.AvdeviceRegisterAll()


  //封裝格式上下文
  pFormatCtx = libavformat.AvformatAllocContext()


  //2.打開輸入檔案
  if libavformat.AvformatOpenInput(&pFormatCtx, inputFile, nil, nil) != 0 {
    fmt.Printf("can't open input file\n")
    return
  }


  //3.擷取音視訊資訊
  if pFormatCtx.AvformatFindStreamInfo(nil) < 0 {
    fmt.Printf("can't find stream info\n")
    return
  }


  //音視訊編碼,找到對應的音視訊流的索引位置
  //找到音頻流的索引
  for i = 0; i < pFormatCtx.NbStreams; i++ {
    if pFormatCtx.GetStream(i).Codecpar.CodecType == libavutil.AVMEDIA_TYPE_AUDIO {
      audioStreamIndex = int32(i)
      break
    }
  }


  //找到視訊流的索引
  for i = 0; i < pFormatCtx.NbStreams; i++ {
    if pFormatCtx.GetStream(i).Codecpar.CodecType == libavutil.AVMEDIA_TYPE_VIDEO {
      videoStreamIndex = int32(i)
      break
    }
  }


  fmt.Printf("audioStreamIndex = %d, videoStreamIndex = %d\n", audioStreamIndex, videoStreamIndex)


  if audioStreamIndex == -1 {
    fmt.Printf("can't find a audio stream\n")
  } else {
    fmt.Printf("try to save audio stream\n")
    avcodec_save_audio_file(pFormatCtx, audioStreamIndex, outAudioFile)
  }


  if videoStreamIndex == -1 {
    fmt.Printf("can't find a video stream\n")
  } else {
    fmt.Printf("try to save video stream\n")
    avcodec_save_video_file(pFormatCtx, videoStreamIndex, outVideoFile)
  }


  libavformat.AvformatCloseInput(&pFormatCtx)


  fmt.Println("-----------------------------------------")
  go func() {
    _, err = exec.Command("./lib/ffplay.exe", "-ar", "22050", "-ac", "1", "-f", "s16le", "-i", outAudioFile).Output()
    if err != nil {
      fmt.Println("play err = ", err)
    }
  }()
  _, err = exec.Command("./lib/ffplay.exe", "-video_size", "640*360", "-i", outVideoFile).Output()
  if err != nil {
    fmt.Println("play err = ", err)
  }
}           
2023-03-23:音視訊解混合(demuxer)為PCM和YUV420P,用go語言