天天看點

2023-04-06:擁抱Golang,優化FFmpeg音頻編碼器,探究encode_aud

作者:福大大架構師每日一題

2023-04-06:擁抱Golang,優化FFmpeg音頻編碼器,探究encode_audio.c的内部結構。

答案2023-04-06:

見moonfdd/ffmpeg-go庫。

這段代碼是一個示例程式,用于将音頻 PCM 資料編碼為 MP2 格式的音頻檔案。下面是代碼的詳細步驟:

1.導入 ffmpeg-go 和 os 等 Go 庫;

2.定義一些變量,包括輸出檔案名、音頻編解碼器、音頻編解碼上下文、音頻幀、音頻資料包等;

3.查找 MP2 編碼器并配置設定音頻編解碼上下文;

4.配置音頻編解碼參數,設定音頻采樣率、通道數、位率等;

5.打開音頻編解碼器;

6.建立輸出檔案;

7.開始編碼過程,并将編碼後的音頻資料寫入輸出檔案中。

具體地,編碼過程包括以下幾個步驟:

1.初始化音頻幀;

2.将音頻 PCM 資料填充到音頻幀中;

3.發送音頻幀到編解碼器中進行編碼;

4.從編解碼器中讀取編碼後的音頻資料包;

5.将編碼後的音頻資料包寫入輸出檔案中。

最後,釋放記憶體空間并關閉檔案和編碼器。在該示例程式中,我們需要手動設定 FFmpeg 庫的路徑,以便正确加載庫檔案。

指令如下:

go run ./examples/internalexamples/encode_audio/main.go ./out/encode_audio.mp2


./lib/ffplay ./out/encode_audio.mp2           

golang代碼如下:

package main


import (
  "fmt"
  "math"
  "os"
  "unsafe"


  "github.com/moonfdd/ffmpeg-go/ffcommon"
  "github.com/moonfdd/ffmpeg-go/libavcodec"
  "github.com/moonfdd/ffmpeg-go/libavutil"
)


func main0() (ret ffcommon.FInt) {
  var filename string
  var codec *libavcodec.AVCodec
  var c *libavcodec.AVCodecContext
  var frame *libavutil.AVFrame
  var pkt *libavcodec.AVPacket
  var i, j, k ffcommon.FInt
  var f *os.File
  var samples *ffcommon.FUint16T
  var t, tincr ffcommon.FFloat


  if len(os.Args) <= 1 {
    fmt.Printf("Usage: %s <output file>\n", os.Args[0])
    return 0
  }
  filename = os.Args[1]


  /* find the MP2 encoder */
  codec = libavcodec.AvcodecFindEncoder(libavcodec.AV_CODEC_ID_MP2)
  if codec == nil {
    fmt.Printf("Codec not found\n")
    os.Exit(1)
  }


  c = codec.AvcodecAllocContext3()
  if c == nil {
    fmt.Printf("Could not allocate audio codec context\n")
    os.Exit(1)
  }


  /* put sample parameters */
  c.BitRate = 64000


  /* check that the encoder supports s16 pcm input */
  c.SampleFmt = libavutil.AV_SAMPLE_FMT_S16
  if check_sample_fmt(codec, c.SampleFmt) == 0 {
    fmt.Printf("Encoder does not support sample format %s",
      libavutil.AvGetSampleFmtName(c.SampleFmt))
    os.Exit(1)
  }


  /* select other audio parameters supported by the encoder */
  c.SampleRate = select_sample_rate(codec)
  c.ChannelLayout = uint64(select_channel_layout(codec))
  c.Channels = libavutil.AvGetChannelLayoutNbChannels(c.ChannelLayout)


  /* open it */
  if c.AvcodecOpen2(codec, nil) < 0 {
    fmt.Printf("Could not open codec\n")
    os.Exit(1)
  }


  f, _ = os.Create(filename)
  if f == nil {
    fmt.Printf("Could not open %s\n", filename)
    os.Exit(1)
  }


  /* packet for holding encoded output */
  pkt = libavcodec.AvPacketAlloc()
  if pkt == nil {
    fmt.Printf("could not allocate the packet\n")
    os.Exit(1)
  }


  /* frame containing input raw audio */
  frame = libavutil.AvFrameAlloc()
  if frame == nil {
    fmt.Printf("Could not allocate audio frame\n")
    os.Exit(1)
  }


  frame.NbSamples = c.FrameSize
  frame.Format = int32(c.SampleFmt)
  frame.ChannelLayout = c.ChannelLayout


  /* allocate the data buffers */
  ret = frame.AvFrameGetBuffer(0)
  if ret < 0 {
    fmt.Printf("Could not allocate audio data buffers\n")
    os.Exit(1)
  }


  /* encode a single tone sound */
  t = 0
  tincr = float32(2 * libavutil.M_PI * 440.0 / float64(c.SampleRate))
  for i = 0; i < 200; i++ {
    /* make sure the frame is writable -- makes a copy if the encoder
     * kept a reference internally */
    ret = frame.AvFrameMakeWritable()
    if ret < 0 {
      os.Exit(1)
    }
    samples = (*ffcommon.FUint16T)(unsafe.Pointer(frame.Data[0]))


    for j = 0; j < c.FrameSize; j++ {
      *(*ffcommon.FUint16T)(unsafe.Pointer(uintptr(unsafe.Pointer(samples)) + uintptr(2*j*2))) = ffcommon.FUint16T(math.Sin(float64(t)) * 10000)


      for k = 1; k < c.Channels; k++ {
        *(*ffcommon.FUint16T)(unsafe.Pointer(uintptr(unsafe.Pointer(samples)) + uintptr((2*j+k)*2))) = *(*ffcommon.FUint16T)(unsafe.Pointer(uintptr(unsafe.Pointer(samples)) + uintptr(2*j*2)))
      }
      t += tincr
    }
    encode(c, frame, pkt, f)
  }


  /* flush the encoder */
  encode(c, nil, pkt, f)


  f.Close()


  libavutil.AvFrameFree(&frame)
  libavcodec.AvPacketFree(&pkt)
  libavcodec.AvcodecFreeContext(&c)


  return 0
}


/* check that a given sample format is supported by the encoder */
func check_sample_fmt(codec *libavcodec.AVCodec, sample_fmt libavutil.AVSampleFormat) ffcommon.FInt {
  p := codec.SampleFmts


  for *p != libavutil.AV_SAMPLE_FMT_NONE {
    if *p == sample_fmt {
      return 1
    }
    p = (*libavutil.AVSampleFormat)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + uintptr(8)))
  }
  return 0
}


/* just pick the highest supported samplerate */
func select_sample_rate(codec *libavcodec.AVCodec) ffcommon.FInt {
  var p *ffcommon.FInt
  var best_samplerate ffcommon.FInt


  if codec.SupportedSamplerates == nil {
    return 44100
  }


  p = codec.SupportedSamplerates
  for *p != 0 {
    if best_samplerate == 0 || int32(math.Abs(float64(44100-*p))) < int32(math.Abs(float64(44100-best_samplerate))) {
      best_samplerate = *p
    }
    p = (*int32)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + uintptr(4)))
  }
  return best_samplerate
}


/* select layout with the highest channel count */
func select_channel_layout(codec *libavcodec.AVCodec) ffcommon.FInt {


  var p *ffcommon.FUint64T
  var best_ch_layout ffcommon.FUint64T
  var best_nb_channels ffcommon.FInt


  if codec.ChannelLayouts == nil {
    return libavutil.AV_CH_LAYOUT_STEREO
  }


  p = codec.ChannelLayouts
  for *p != 0 {
    nb_channels := libavutil.AvGetChannelLayoutNbChannels(*p)


    if nb_channels > best_nb_channels {
      best_ch_layout = *p
      best_nb_channels = nb_channels
    }
    p = (*uint64)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + uintptr(8)))
  }
  return ffcommon.FInt(best_ch_layout)
}


func encode(ctx *libavcodec.AVCodecContext, frame *libavutil.AVFrame, pkt *libavcodec.AVPacket, output *os.File) {
  var ret ffcommon.FInt


  /* send the frame for encoding */
  ret = ctx.AvcodecSendFrame(frame)
  if ret < 0 {
    fmt.Printf("Error sending the frame to the encoder\n")
    os.Exit(1)
  }


  /* read all the available output packets (in general there may be any
   * number of them */
  for ret >= 0 {
    ret = ctx.AvcodecReceivePacket(pkt)
    if ret == -libavutil.EAGAIN || ret == libavutil.AVERROR_EOF {
      return
    } else if ret < 0 {
      fmt.Printf("Error encoding audio frame\n")
      os.Exit(1)
    }


    output.Write(ffcommon.ByteSliceFromByteP(pkt.Data, int(pkt.Size)))
    pkt.AvPacketUnref()
  }
}


func main() {
  os.Setenv("Path", os.Getenv("Path")+";./lib")
  ffcommon.SetAvutilPath("./lib/avutil-56.dll")
  ffcommon.SetAvcodecPath("./lib/avcodec-58.dll")
  ffcommon.SetAvdevicePath("./lib/avdevice-58.dll")
  ffcommon.SetAvfilterPath("./lib/avfilter-56.dll")
  ffcommon.SetAvformatPath("./lib/avformat-58.dll")
  ffcommon.SetAvpostprocPath("./lib/postproc-55.dll")
  ffcommon.SetAvswresamplePath("./lib/swresample-3.dll")
  ffcommon.SetAvswscalePath("./lib/swscale-5.dll")


  genDir := "./out"
  _, err := os.Stat(genDir)
  if err != nil {
    if os.IsNotExist(err) {
      os.Mkdir(genDir, 0777) //  Everyone can read write and execute
    }
  }


  main0()
}           
2023-04-06:擁抱Golang,優化FFmpeg音頻編碼器,探究encode_aud