ffmpeg h264和aac封装为mp4文件

1、I帧/P帧/B帧

I帧：帧内编码图像帧，也叫关键帧，包含一幅完整的图像信息，不含运动矢量，在解码时不需要参考其它帧图像。在闭合式GOP（画面组）中，

每个GOP的开始是IDR帧，且当前GOP的数据不会参考前后GOP的数据

。

在编解码中，为了方便将首个I帧（IDR，即时解码器刷新）和其它I帧区别开来，这样就能方便控制编码和解码流程。 IDR帧的作用是立刻刷新，使错误不至于传播，从IDR帧开始重新算一个新的序列开始编码 。IDR会导致DPB（参考帧列表）清空，在IDR帧之后的所有帧都不能引用IDR帧之前的帧的内容。

P帧：预测编码图像帧，是帧间编码帧的一种，利用之前的I帧或P帧进行预测编码。

B帧：双向预测编码图像帧，是帧间编码帧的一种，利用之前和之后的I帧或P帧，进行双向预测编码。

B帧不可以作为参考帧。 B帧具有更高的压缩率，但需要更多的缓冲时间以及更高的CPU占用率。 因此B帧更适合本地存储以及视频点播，不适用于对实时性要求高的直播系统。

2、DTS和PTS

DTS（decoding time stamp，解码时间戳），PTS（presentation time stamp，显示时间戳）；

音频中DTS和PTS是相同的，视频中如果存在B帧（双向预测编码帧），需要依赖其前面和后面的帧，因此含有B帧的解码和显示顺序不同的

。

More：ffmpeg时间戳详解

H264码流与AAC码流复用合并为MP4文件

1、原料准备：

从mp4中抽取音频码流：

ffmpeg -i 001.mp4 -acodec copy -vn 001.aac

从mp4中抽取视频码流：

ffmpeg -i 001.mp4 -codec copy -bsf: h264_mp4toannexb -f h264 001.h264

2、封装代码：

#include <stdio.h>
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"

struct AVState {
    AVFormatContext *fmt_ctx;   // 格式上下文
    AVStream *stream;   // 码流
    int type;           // 码流类型
    int stream_index;   // 码流下标
};


int init_fmt_ctx(AVFormatContext **fmt_ctx, const char *file) {
    AVFormatContext *me = avformat_alloc_context();
    if (me == NULL) {
        printf("avformat_alloc failed.\n");
        return -1;
    } 
    if (avformat_open_input(&me, file, NULL, NULL) != 0) {
        printf("Couldn't open input stream.\n");
        return -1;
    }
    if (avformat_find_stream_info(me, NULL) < 0) {
        printf("Couldn't find stream information.\n");
        return -1;
    }
    //av_dump_format(me, 0, file, 0);
    *fmt_ctx = me;
    return 0;
}


int AVState_Init(struct AVState **state, int type, const char *file) {
    struct AVState *me = malloc(sizeof(*me));

    if (init_fmt_ctx(&me->fmt_ctx, file) < 0) {
        printf("failed to init audio_fmt_ctx\n");
        return -1;
    }

    int index = av_find_best_stream(me->fmt_ctx, type, -1, -1, NULL, 0);
    if (index < 0) {
        printf("failed to find stream_index\n");
        return -1;
    }
    me->stream_index = index; 
    
    me->type = type;
    me->stream = me->fmt_ctx->streams[me->stream_index];
    *state = me;
    return 0;
}

void AVState_Destroy(struct AVState *state) {
    if (state->fmt_ctx) {
        avformat_close_input(&state->fmt_ctx);
        avformat_free_context(state->fmt_ctx);
    }
    free(state);
}


void muxer(const char *mp4file, const char *h264file, const char *aacfile) {
    // 初始输入码流状态结构体
    struct AVState *audio, *video;
    if (AVState_Init(&audio, AVMEDIA_TYPE_AUDIO, aacfile) < 0) {
        goto _Error;
    }
    if (AVState_Init(&video, AVMEDIA_TYPE_VIDEO, h264file) < 0) {
        goto _Error;
    }

    // 初始化mp4的格式上下文
    AVFormatContext *fmt_ctx;
    avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, mp4file);
    if (fmt_ctx == NULL) {
        printf("failed to alloc output fmt_ctx\n");
        goto _Error;
    }

    // 设置输出流
    AVStream *out_stream_audio = avformat_new_stream(fmt_ctx, NULL);
    AVStream *out_stream_video = avformat_new_stream(fmt_ctx, NULL);
    avcodec_parameters_copy(out_stream_audio->codecpar, audio->stream->codecpar);
    avcodec_parameters_copy(out_stream_video->codecpar, video->stream->codecpar);
    int out_audio_index = out_stream_audio->index;
    int out_video_index = out_stream_video->index;
    printf("out_audio_index:%d out_video_index:%d\n", out_audio_index, out_video_index);
    av_dump_format(fmt_ctx, 0, mp4file, 1);

    
    // 打开输出文件io
    if (! (fmt_ctx->oformat->flags & AVFMT_NOFILE)) {
        if (avio_open(&fmt_ctx->pb, mp4file, AVIO_FLAG_WRITE) < 0) {
            printf("failed to open output file\n");
            goto _Error;
        }
    }

    // 写文件头
    if (avformat_write_header(fmt_ctx, NULL) < 0) {
        printf("failed to write header\n");
        goto _Error;
    }
    AVPacket *packet = av_packet_alloc();

    // 音频编码数据和视频编码数据合并到MP4文件
    int frame_index = 0;
    int64_t cur_video_pts = 0, cur_audio_pts = 0;
    while (1) {
        struct AVState *inputstate = NULL;  
        AVStream *out_stream = NULL;
        int out_index = -1;
        
        //比较时间戳，判断当前应该写什么帧
        if (av_compare_ts(cur_video_pts, video->stream->time_base, \
                    cur_audio_pts, audio->stream->time_base) < 0) {
            inputstate = video;
            out_index = out_video_index;
        } else {
            inputstate = audio;
            out_index = out_audio_index;
        }
        out_stream = fmt_ctx->streams[out_index];
        
        // 从输入流读取编码数据
        if (av_read_frame(inputstate->fmt_ctx, packet) < 0) {
            break;
        }
       
        // 如果该帧没有pts，需要补上
        if (packet->pts == AV_NOPTS_VALUE) {
            AVRational timebase = inputstate->stream->time_base;
            // 两帧之间的持续时间
            int64_t calc_duration = (double)AV_TIME_BASE / av_q2d(inputstate->stream->r_frame_rate);
            
            packet->pts = (double)(frame_index * calc_duration) / (double)(av_q2d(timebase)*AV_TIME_BASE);
            packet->dts = packet->pts;
            packet->duration = (double)calc_duration / (double)(av_q2d(timebase)*AV_TIME_BASE);
            frame_index++;
        } 

        // 记录pts
        if (out_index == out_video_index) cur_video_pts = packet->pts;
        else cur_audio_pts = packet->pts;

        // 更新PTS/DTS
        packet->pts = av_rescale_q_rnd(packet->pts, inputstate->stream->time_base, \
                out_stream->time_base, (AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
        packet->dts = av_rescale_q_rnd(packet->dts, inputstate->stream->time_base, \
                out_stream->time_base, (AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
        packet->duration = av_rescale_q(packet->dts, inputstate->stream->time_base, out_stream->time_base);
        packet->pos = -1;
        packet->stream_index = out_index;

        // 写文件
        if (av_interleaved_write_frame(fmt_ctx, packet) < 0) {
            printf("failed to muxing packet\n");
            break;
        } 

        av_packet_unref(packet);
    }

    // 写文件尾
    av_write_trailer(fmt_ctx);

_Error:
    if (audio) AVState_Destroy(audio);
    if (video) AVState_Destroy(video);
    if (fmt_ctx->pb) avio_close(fmt_ctx->pb);
    if (fmt_ctx) avformat_free_context(fmt_ctx);
    if (packet)  av_packet_free(&packet);
}


int main(int argc, char const* argv[])
{
    muxer(argv[1], argv[2], argv[3]);
    return 0;
}

3、测试结果：

ffmpeg h264和aac封装为mp4文件

问题：文件大小相同，但在时间和比特率上有些许差异。

原来的testvideo/test.mp4文件：

ffmpeg h264和aac封装为mp4文件

得到的out.mp4文件

ffmpeg h264和aac封装为mp4文件

4、优化时间基转换过程

packet->pts = av_rescale_q_rnd(packet->pts, inputstate->stream->time_base, \
                out_stream->time_base, (AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
packet->dts = av_rescale_q_rnd(packet->dts, inputstate->stream->time_base, \
                out_stream->time_base, (AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
packet->duration = av_rescale_q(packet->dts, inputstate->stream->time_base, out_stream->time_base);

替换为

// 将 packet 中的各时间值从输入流封装格式时间基转换到输出流封装格式时间基
av_packet_rescale_ts(packet, inputstate->stream->time_base, out_stream->time_base);

优化效果：

ffmpeg h264和aac封装为mp4文件

ffmpeg h264和aac封装为mp4文件

1、I帧/P帧/B帧

2、DTS和PTS

H264码流与AAC码流复用合并为MP4文件

1、原料准备：

2、封装代码：

3、测试结果：

4、优化时间基转换过程

继续阅读

h.264 rtp打包

RTP RTSP H.264 实时视频

mp4解包成h264数据进行分析。

H264的Profile和Level

YUV编码为H264 H264封装为MP4

h264 Nalu 详解

【H264/AVC 句法和语义详解】(四)：通过学习"描述子"实现码流解析的第一步

浅析H.264技术发展

H.264 Profile对比简图h.264 profile：级别（Level）简图h.264 software encoder

《FFmpeg从入门到精通》读书笔记（四）

VS2005编译T264总结

音视频开发进阶｜第六讲：色彩和色彩空间·下篇

说一说TS码流里面的PCR

VS中引入并使用WebRTC库

压缩编码M-JPEG、MPEG4、H.264

音视频基础1：H264、H265、MPEG-4、VP8、VP9编码基础知识个人认知，程序员职业发展出路编码器发展史编码原理H264H265