#include "video.h" #include #include #include #include #include "log.h" AVFrame *out_frame = NULL; struct SwsContext *sws_ctx = NULL; AVStream *video_st = NULL, *audio_st = NULL; AVFormatContext *oc = NULL; AVOutputFormat *fmt = NULL; AVPicture dst_picture; uint8_t **dst_samples_data = NULL; SwrContext *swr_ctx = NULL; int current_n_out = 0; int current_in_buff = 0; uint8_t buff[4096 * 2]; int min(int val1, int val2) { return val1 < val2 ? val1 : val2; } int prepare_for_video_conversion(const char *dst_filename, AVCodecContext *video_dec_ctx, AVCodecContext *audio_dec_ctx, AVFormatContext *fmt_ctx, AVStream *src_video_stream, AVStream *src_audio_stream, int bitr) { if (!video_dec_ctx && !audio_dec_ctx) { onError(); return -1; } avformat_alloc_output_context2(&oc, NULL, "mp4", dst_filename); if (!oc) { onError(); return -1; } fmt = oc->oformat; av_dict_copy(&oc->metadata, fmt_ctx->metadata, 0); int ret = 0; if (!(fmt->flags & AVFMT_NOFILE)) { ret = avio_open(&oc->pb, dst_filename, AVIO_FLAG_WRITE); if (ret < 0) { LOGD("Could not open '%s': %s\n", dst_filename, av_err2str(ret)); onError(); return -1; } } AVCodecContext *c; if (video_dec_ctx && src_video_stream && fmt_ctx) { //calculate video resolution int dst_width = video_dec_ctx->width, dst_height = video_dec_ctx->height; if (video_dec_ctx->width > video_dec_ctx->height) { if (video_dec_ctx->width > 480) { float scale = video_dec_ctx->width / 480.0f; dst_width = 480; dst_height = ceilf(video_dec_ctx->height / scale); } } else { if (video_dec_ctx->width > 480) { float scale = video_dec_ctx->height / 480.0f; dst_height = 480; dst_width = ceilf(video_dec_ctx->width / scale); } } if (video_dec_ctx->height != dst_height || video_dec_ctx->width != dst_width || video_dec_ctx->pix_fmt != AV_PIX_FMT_YUV420P) { sws_ctx = sws_getContext(video_dec_ctx->width, video_dec_ctx->height, video_dec_ctx->pix_fmt, dst_width, dst_height, AV_PIX_FMT_YUV420P, SWS_BILINEAR, NULL, NULL, NULL); if (!sws_ctx) { LOGD("Could not initialize the conversion context\n"); onError(); return -1; } } //create video stream AVCodec *codec = avcodec_find_encoder(AV_CODEC_ID_MPEG4); if (!codec) { LOGD("Could not find encoder for '%s'\n", avcodec_get_name(AV_CODEC_ID_MPEG4)); onError(); return -1; } video_st = avformat_new_stream(oc, codec); if (!video_st) { LOGD("Could not allocate stream\n"); onError(); return -1; } video_st->id = oc->nb_streams - 1; av_dict_copy(&video_st->metadata, src_video_stream->metadata, 0); c = video_st->codec; c->codec_id = AV_CODEC_ID_MPEG4; c->bit_rate = bitr; c->width = dst_width; c->height = dst_height; double fps = (double)src_video_stream->avg_frame_rate.num / (double)src_video_stream->avg_frame_rate.den; c->time_base.den = 65535; c->time_base.num = floor(65635 / fps); c->gop_size = 12; c->pix_fmt = AV_PIX_FMT_YUV420P; if (oc->oformat->flags & AVFMT_GLOBALHEADER) { c->flags |= CODEC_FLAG_GLOBAL_HEADER; } ret = avcodec_open2(c, codec, NULL); if (ret < 0) { LOGD("Could not open video codec: %s\n", av_err2str(ret)); onError(); return -1; } out_frame = avcodec_alloc_frame(); if (!out_frame) { LOGD("Could not allocate video frame\n"); onError(); return -1; } ret = avpicture_alloc(&dst_picture, c->pix_fmt, c->width, c->height); if (ret < 0) { LOGD("Could not allocate picture: %s\n", av_err2str(ret)); onError(); return -1; } *((AVPicture *)out_frame) = dst_picture; } //create audio stream if (audio_dec_ctx && src_audio_stream) { AVCodec *codec = avcodec_find_encoder(AV_CODEC_ID_AAC); if (!codec) { LOGD("Could not find encoder for '%s'\n", avcodec_get_name(AV_CODEC_ID_AAC)); onError(); return -1; } audio_st = avformat_new_stream(oc, codec); if (!audio_st) { LOGD("Could not allocate stream\n"); onError(); return -1; } audio_st->id = oc->nb_streams - 1; av_dict_copy(&audio_st->metadata, src_audio_stream->metadata, 0); c = audio_st->codec; c->sample_fmt = AV_SAMPLE_FMT_FLTP; c->bit_rate = 40000; c->sample_rate = min(audio_dec_ctx->sample_rate, 44100); c->channels = 1; if (oc->oformat->flags & AVFMT_GLOBALHEADER) { c->flags |= CODEC_FLAG_GLOBAL_HEADER; } c = audio_st->codec; c->strict_std_compliance = -2; swr_ctx = swr_alloc_set_opts(NULL, AV_CH_LAYOUT_MONO, c->sample_fmt, c->sample_rate, audio_dec_ctx->channel_layout, audio_dec_ctx->sample_fmt, audio_dec_ctx->sample_rate, 0, NULL); if (!swr_ctx) { LOGD("Could not allocate resampler context\n"); onError(); return -1; } if ((ret = swr_init(swr_ctx)) < 0) { LOGD("Failed to initialize the resampling context\n"); onError(); return -1; } ret = avcodec_open2(c, codec, NULL); if (ret < 0) { LOGD("Could not open audio codec: %s\n", av_err2str(ret)); onError(); return -1; } av_dump_format(oc, 0, dst_filename, 1); ret = avformat_write_header(oc, NULL); if (ret < 0) { LOGD("Error occurred when opening output file: %s\n", av_err2str(ret)); onError(); return -1; } if (out_frame) { out_frame->pts = 0; } } return 0; } void cleanup_out() { if (video_st) { avcodec_close(video_st->codec); if (dst_picture.data) { av_free(dst_picture.data[0]); } if (out_frame) { av_free(out_frame); out_frame = NULL; } video_st = NULL; } if (audio_st) { avcodec_close(audio_st->codec); if (dst_samples_data) { av_free(dst_samples_data[0]); dst_samples_data = NULL; } audio_st = NULL; } if (fmt && !(fmt->flags & AVFMT_NOFILE)) { avio_close(oc->pb); fmt = NULL; } if (oc) { avformat_free_context(oc); oc = NULL; } if (sws_ctx) { sws_freeContext(sws_ctx); sws_ctx = NULL; } if (swr_ctx) { swr_free(&swr_ctx); swr_ctx = NULL; } current_n_out = 0; current_in_buff = 0; } int write_video_frame(AVFrame *src_frame) { int ret; if (sws_ctx) { ret = sws_scale(sws_ctx, (const uint8_t * const *)src_frame->data, src_frame->linesize, 0, src_frame->height, out_frame->data, out_frame->linesize); if (ret < 0) { LOGD("scale error: %s\n", av_err2str(ret)); onError(); return -1; } } else { for (int i = 0; i < 4; i++){ out_frame->data[i] = src_frame->data[i]; out_frame->linesize[i] = src_frame->linesize[i]; } } AVPacket pkt = { 0 }; int got_packet; av_init_packet(&pkt); ret = avcodec_encode_video2(video_st->codec, &pkt, out_frame, &got_packet); if (ret < 0) { LOGD("Error encoding video frame: %s\n", av_err2str(ret)); onError(); return -1; } if (!ret && got_packet && pkt.size) { pkt.stream_index = video_st->index; ret = av_interleaved_write_frame(oc, &pkt); } else { ret = 0; } if (ret != 0) { LOGD("Error while writing video frame: %s\n", av_err2str(ret)); onError(); return -1; } int64_t val = av_rescale_q(1, video_st->codec->time_base, video_st->time_base); out_frame->pts += val; return 0; } int check_write_packet(int flush) { int got_packet, ret; int writed = 0; int dst_samples_size = av_samples_get_buffer_size(NULL, audio_st->codec->channels, audio_st->codec->frame_size, audio_st->codec->sample_fmt, 1); while (current_n_out > audio_st->codec->frame_size || (flush && current_n_out)) { AVFrame *frame = avcodec_alloc_frame(); AVPacket pkt2 = { 0 }; av_init_packet(&pkt2); frame->nb_samples = min(audio_st->codec->frame_size, current_n_out); int nb_samples_size = min(dst_samples_size, current_in_buff); ret = avcodec_fill_audio_frame(frame, audio_st->codec->channels, audio_st->codec->sample_fmt, buff + writed, nb_samples_size, 1); if (ret < 0) { LOGD("Error fill frame: %s\n", av_err2str(ret)); onError(); return -1; } ret = avcodec_encode_audio2(audio_st->codec, &pkt2, frame, &got_packet); if (ret < 0) { LOGD("Error encoding audio frame: %s\n", av_err2str(ret)); onError(); return -1; } if (got_packet) { pkt2.stream_index = audio_st->index; ret = av_interleaved_write_frame(oc, &pkt2); if (ret != 0) { LOGD("Error while writing audio frame: %s\n", av_err2str(ret)); onError(); return -1; } } writed += dst_samples_size; current_n_out -= frame->nb_samples; current_in_buff -= nb_samples_size; avcodec_free_frame(&frame); } if (current_in_buff != 0 && writed != 0) { memcpy(buff, buff + writed, current_in_buff); } return 0; } int write_audio_frame(AVFrame *src_frame, AVCodecContext *src_codec) { const int n_in = src_frame->nb_samples; double ratio = (double)audio_st->codec->sample_rate / src_frame->sample_rate; int n_out = n_in * ratio + 32; int64_t delay = swr_get_delay(swr_ctx, audio_st->codec->sample_rate); if (delay > 0) { n_out += delay; } if (!dst_samples_data) { int ret = av_samples_alloc_array_and_samples(&dst_samples_data, NULL, audio_st->codec->channels, n_out, audio_st->codec->sample_fmt, 0); if (ret < 0) { LOGD("Could not allocate destination samples\n"); onError(); return -1; } } n_out = swr_convert(swr_ctx, dst_samples_data, n_out, (const uint8_t **)src_frame->extended_data, src_frame->nb_samples); if (n_out <= 0) { LOGD("Error while converting\n"); onError(); return -1; } int total_size = av_samples_get_buffer_size(NULL, audio_st->codec->channels, n_out, audio_st->codec->sample_fmt, 1); memcpy(buff + current_in_buff, dst_samples_data[0], total_size); current_n_out += n_out; current_in_buff += total_size; return check_write_packet(0); } void post_video_conversion() { check_write_packet(1); av_write_trailer(oc); }