ffmpeg h264和aac封裝為mp4檔案

1、I幀/P幀/B幀

I幀：幀内編碼圖像幀，也叫關鍵幀，包含一幅完整的圖像資訊，不含運動矢量，在解碼時不需要參考其它幀圖像。在閉合式GOP（畫面組）中，

每個GOP的開始是IDR幀，且目前GOP的資料不會參考前後GOP的資料

。

在編解碼中，為了友善将首個I幀（IDR，即時解碼器重新整理）和其它I幀差別開來，這樣就能友善控制編碼和解碼流程。 IDR幀的作用是立刻重新整理，使錯誤不至于傳播，從IDR幀開始重新算一個新的序列開始編碼 。IDR會導緻DPB（參考幀清單）清空，在IDR幀之後的所有幀都不能引用IDR幀之前的幀的内容。

P幀：預測編碼圖像幀，是幀間編碼幀的一種，利用之前的I幀或P幀進行預測編碼。

B幀：雙向預測編碼圖像幀，是幀間編碼幀的一種，利用之前和之後的I幀或P幀，進行雙向預測編碼。

B幀不可以作為參考幀。 B幀具有更高的壓縮率，但需要更多的緩沖時間以及更高的CPU占用率。 是以B幀更适合本地存儲以及視訊點播，不适用于對實時性要求高的直播系統。

2、DTS和PTS

DTS（decoding time stamp，解碼時間戳），PTS（presentation time stamp，顯示時間戳）；

音頻中DTS和PTS是相同的，視訊中如果存在B幀（雙向預測編碼幀），需要依賴其前面和後面的幀，是以含有B幀的解碼和顯示順序不同的

。

More：ffmpeg時間戳詳解

H264碼流與AAC碼流複用合并為MP4檔案

1、原料準備：

從mp4中抽取音頻碼流：

ffmpeg -i 001.mp4 -acodec copy -vn 001.aac

從mp4中抽取視訊碼流：

ffmpeg -i 001.mp4 -codec copy -bsf: h264_mp4toannexb -f h264 001.h264

2、封裝代碼：

#include <stdio.h>
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"

struct AVState {
    AVFormatContext *fmt_ctx;   // 格式上下文
    AVStream *stream;   // 碼流
    int type;           // 碼流類型
    int stream_index;   // 碼流下标
};


int init_fmt_ctx(AVFormatContext **fmt_ctx, const char *file) {
    AVFormatContext *me = avformat_alloc_context();
    if (me == NULL) {
        printf("avformat_alloc failed.\n");
        return -1;
    } 
    if (avformat_open_input(&me, file, NULL, NULL) != 0) {
        printf("Couldn't open input stream.\n");
        return -1;
    }
    if (avformat_find_stream_info(me, NULL) < 0) {
        printf("Couldn't find stream information.\n");
        return -1;
    }
    //av_dump_format(me, 0, file, 0);
    *fmt_ctx = me;
    return 0;
}


int AVState_Init(struct AVState **state, int type, const char *file) {
    struct AVState *me = malloc(sizeof(*me));

    if (init_fmt_ctx(&me->fmt_ctx, file) < 0) {
        printf("failed to init audio_fmt_ctx\n");
        return -1;
    }

    int index = av_find_best_stream(me->fmt_ctx, type, -1, -1, NULL, 0);
    if (index < 0) {
        printf("failed to find stream_index\n");
        return -1;
    }
    me->stream_index = index; 
    
    me->type = type;
    me->stream = me->fmt_ctx->streams[me->stream_index];
    *state = me;
    return 0;
}

void AVState_Destroy(struct AVState *state) {
    if (state->fmt_ctx) {
        avformat_close_input(&state->fmt_ctx);
        avformat_free_context(state->fmt_ctx);
    }
    free(state);
}


void muxer(const char *mp4file, const char *h264file, const char *aacfile) {
    // 初始輸入碼流狀态結構體
    struct AVState *audio, *video;
    if (AVState_Init(&audio, AVMEDIA_TYPE_AUDIO, aacfile) < 0) {
        goto _Error;
    }
    if (AVState_Init(&video, AVMEDIA_TYPE_VIDEO, h264file) < 0) {
        goto _Error;
    }

    // 初始化mp4的格式上下文
    AVFormatContext *fmt_ctx;
    avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, mp4file);
    if (fmt_ctx == NULL) {
        printf("failed to alloc output fmt_ctx\n");
        goto _Error;
    }

    // 設定輸出流
    AVStream *out_stream_audio = avformat_new_stream(fmt_ctx, NULL);
    AVStream *out_stream_video = avformat_new_stream(fmt_ctx, NULL);
    avcodec_parameters_copy(out_stream_audio->codecpar, audio->stream->codecpar);
    avcodec_parameters_copy(out_stream_video->codecpar, video->stream->codecpar);
    int out_audio_index = out_stream_audio->index;
    int out_video_index = out_stream_video->index;
    printf("out_audio_index:%d out_video_index:%d\n", out_audio_index, out_video_index);
    av_dump_format(fmt_ctx, 0, mp4file, 1);

    
    // 打開輸出檔案io
    if (! (fmt_ctx->oformat->flags & AVFMT_NOFILE)) {
        if (avio_open(&fmt_ctx->pb, mp4file, AVIO_FLAG_WRITE) < 0) {
            printf("failed to open output file\n");
            goto _Error;
        }
    }

    // 寫檔案頭
    if (avformat_write_header(fmt_ctx, NULL) < 0) {
        printf("failed to write header\n");
        goto _Error;
    }
    AVPacket *packet = av_packet_alloc();

    // 音頻編碼資料和視訊編碼資料合并到MP4檔案
    int frame_index = 0;
    int64_t cur_video_pts = 0, cur_audio_pts = 0;
    while (1) {
        struct AVState *inputstate = NULL;  
        AVStream *out_stream = NULL;
        int out_index = -1;
        
        //比較時間戳，判斷目前應該寫什麼幀
        if (av_compare_ts(cur_video_pts, video->stream->time_base, \
                    cur_audio_pts, audio->stream->time_base) < 0) {
            inputstate = video;
            out_index = out_video_index;
        } else {
            inputstate = audio;
            out_index = out_audio_index;
        }
        out_stream = fmt_ctx->streams[out_index];
        
        // 從輸入流讀取編碼資料
        if (av_read_frame(inputstate->fmt_ctx, packet) < 0) {
            break;
        }
       
        // 如果該幀沒有pts，需要補上
        if (packet->pts == AV_NOPTS_VALUE) {
            AVRational timebase = inputstate->stream->time_base;
            // 兩幀之間的持續時間
            int64_t calc_duration = (double)AV_TIME_BASE / av_q2d(inputstate->stream->r_frame_rate);
            
            packet->pts = (double)(frame_index * calc_duration) / (double)(av_q2d(timebase)*AV_TIME_BASE);
            packet->dts = packet->pts;
            packet->duration = (double)calc_duration / (double)(av_q2d(timebase)*AV_TIME_BASE);
            frame_index++;
        } 

        // 記錄pts
        if (out_index == out_video_index) cur_video_pts = packet->pts;
        else cur_audio_pts = packet->pts;

        // 更新PTS/DTS
        packet->pts = av_rescale_q_rnd(packet->pts, inputstate->stream->time_base, \
                out_stream->time_base, (AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
        packet->dts = av_rescale_q_rnd(packet->dts, inputstate->stream->time_base, \
                out_stream->time_base, (AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
        packet->duration = av_rescale_q(packet->dts, inputstate->stream->time_base, out_stream->time_base);
        packet->pos = -1;
        packet->stream_index = out_index;

        // 寫檔案
        if (av_interleaved_write_frame(fmt_ctx, packet) < 0) {
            printf("failed to muxing packet\n");
            break;
        } 

        av_packet_unref(packet);
    }

    // 寫檔案尾
    av_write_trailer(fmt_ctx);

_Error:
    if (audio) AVState_Destroy(audio);
    if (video) AVState_Destroy(video);
    if (fmt_ctx->pb) avio_close(fmt_ctx->pb);
    if (fmt_ctx) avformat_free_context(fmt_ctx);
    if (packet)  av_packet_free(&packet);
}


int main(int argc, char const* argv[])
{
    muxer(argv[1], argv[2], argv[3]);
    return 0;
}

3、測試結果：

ffmpeg h264和aac封裝為mp4檔案

問題：檔案大小相同，但在時間和比特率上有些許差異。

原來的testvideo/test.mp4檔案：

ffmpeg h264和aac封裝為mp4檔案

得到的out.mp4檔案

ffmpeg h264和aac封裝為mp4檔案

4、優化時間基轉換過程

packet->pts = av_rescale_q_rnd(packet->pts, inputstate->stream->time_base, \
                out_stream->time_base, (AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
packet->dts = av_rescale_q_rnd(packet->dts, inputstate->stream->time_base, \
                out_stream->time_base, (AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
packet->duration = av_rescale_q(packet->dts, inputstate->stream->time_base, out_stream->time_base);

替換為

// 将 packet 中的各時間值從輸入流封裝格式時間基轉換到輸出流封裝格式時間基
av_packet_rescale_ts(packet, inputstate->stream->time_base, out_stream->time_base);

優化效果：

ffmpeg h264和aac封裝為mp4檔案

ffmpeg h264和aac封裝為mp4檔案

1、I幀/P幀/B幀

2、DTS和PTS

H264碼流與AAC碼流複用合并為MP4檔案

1、原料準備：

2、封裝代碼：

3、測試結果：

4、優化時間基轉換過程

繼續閱讀

h.264 rtp打包

RTP RTSP H.264 實時視訊

mp4解包成h264資料進行分析。

H264的Profile和Level

YUV編碼為H264 H264封裝為MP4

h264 Nalu 詳解

【H264/AVC 句法和語義詳解】(四)：通過學習"描述子"實作碼流解析的第一步

淺析H.264技術發展

H.264 Profile對比簡圖h.264 profile：級别（Level）簡圖h.264 software encoder

《FFmpeg從入門到精通》讀書筆記（四）

VS2005編譯T264總結

音視訊開發進階｜第六講：色彩和色彩空間·下篇

說一說TS碼流裡面的PCR

VS中引入并使用WebRTC庫

壓縮編碼M-JPEG、MPEG4、H.264

音視訊基礎1：H264、H265、MPEG-4、VP8、VP9編碼基礎知識個人認知，程式員職業發展出路編碼器發展史編碼原理H264H265