#include "VideoStreamingPart.h" #include "rtc_base/logging.h" #include "rtc_base/third_party/base64/base64.h" #include "api/video/i420_buffer.h" #include "AVIOContextImpl.h" #include #include #include namespace tgcalls { namespace { class MediaDataPacket { public: MediaDataPacket() : _packet(av_packet_alloc()) { } MediaDataPacket(MediaDataPacket &&other) : _packet(other._packet) { other._packet = nullptr; } ~MediaDataPacket() { if (_packet) { av_packet_free(&_packet); } } AVPacket *packet() { return _packet; } private: AVPacket *_packet = nullptr; }; class DecodableFrame { public: DecodableFrame(MediaDataPacket packet, int64_t pts, int64_t dts): _packet(std::move(packet)), _pts(pts), _dts(dts) { } ~DecodableFrame() { } MediaDataPacket &packet() { return _packet; } int64_t pts() { return _pts; } int64_t dts() { return _dts; } private: MediaDataPacket _packet; int64_t _pts = 0; int64_t _dts = 0; }; class Frame { public: Frame() { _frame = av_frame_alloc(); } Frame(Frame &&other) { _frame = other._frame; other._frame = nullptr; } ~Frame() { if (_frame) { av_frame_unref(_frame); } } AVFrame *frame() { return _frame; } double pts(AVStream *stream, double &firstFramePts) { int64_t framePts = _frame->pts; double spf = av_q2d(stream->time_base); double value = ((double)framePts) * spf; if (firstFramePts < 0.0) { firstFramePts = value; } return value - firstFramePts; } private: AVFrame *_frame = nullptr; }; struct VideoStreamEvent { int32_t offset = 0; std::string endpointId; int32_t rotation = 0; int32_t extra = 0; }; struct VideoStreamInfo { std::string container; int32_t activeMask = 0; std::vector events; }; absl::optional readInt32(std::vector const &data, int &offset) { if (offset + 4 > data.size()) { return absl::nullopt; } int32_t value = 0; memcpy(&value, data.data() + offset, 4); offset += 4; return value; } absl::optional readBytesAsInt32(std::vector const &data, int &offset, int count) { if (offset + count > data.size()) { return absl::nullopt; } if (count == 0) { return absl::nullopt; } if (count <= 4) { int32_t value = 0; memcpy(&value, data.data() + offset, count); offset += count; return value; } else { return absl::nullopt; } } int32_t roundUp(int32_t numToRound, int32_t multiple) { if (multiple == 0) { return numToRound; } int32_t remainder = numToRound % multiple; if (remainder == 0) { return numToRound; } return numToRound + multiple - remainder; } absl::optional readSerializedString(std::vector const &data, int &offset) { if (const auto tmp = readBytesAsInt32(data, offset, 1)) { int paddingBytes = 0; int length = 0; if (tmp.value() == 254) { if (const auto len = readBytesAsInt32(data, offset, 3)) { length = len.value(); paddingBytes = roundUp(length, 4) - length; } else { return absl::nullopt; } } else { length = tmp.value(); paddingBytes = roundUp(length + 1, 4) - (length + 1); } if (offset + length > data.size()) { return absl::nullopt; } std::string result(data.data() + offset, data.data() + offset + length); offset += length; offset += paddingBytes; return result; } else { return absl::nullopt; } } absl::optional readVideoStreamEvent(std::vector const &data, int &offset) { VideoStreamEvent event; if (const auto offsetValue = readInt32(data, offset)) { event.offset = offsetValue.value(); } else { return absl::nullopt; } if (const auto endpointId = readSerializedString(data, offset)) { event.endpointId = endpointId.value(); } else { return absl::nullopt; } if (const auto rotation = readInt32(data, offset)) { event.rotation = rotation.value(); } else { return absl::nullopt; } if (const auto extra = readInt32(data, offset)) { event.extra = extra.value(); } else { return absl::nullopt; } return event; } absl::optional consumeVideoStreamInfo(std::vector &data) { int offset = 0; if (const auto signature = readInt32(data, offset)) { if (signature.value() != 0xa12e810d) { return absl::nullopt; } } else { return absl::nullopt; } VideoStreamInfo info; if (const auto container = readSerializedString(data, offset)) { info.container = container.value(); } else { return absl::nullopt; } if (const auto activeMask = readInt32(data, offset)) { info.activeMask = activeMask.value(); } else { return absl::nullopt; } if (const auto eventCount = readInt32(data, offset)) { if (eventCount > 0) { if (const auto event = readVideoStreamEvent(data, offset)) { info.events.push_back(event.value()); } else { return absl::nullopt; } } else { return absl::nullopt; } } else { return absl::nullopt; } data.erase(data.begin(), data.begin() + offset); return info; } } class VideoStreamingPartInternal { public: VideoStreamingPartInternal(std::string endpointId, webrtc::VideoRotation rotation, std::vector &&fileData, std::string const &container) : _endpointId(endpointId), _rotation(rotation) { _avIoContext = std::make_unique(std::move(fileData)); int ret = 0; #if LIBAVFORMAT_VERSION_MAJOR >= 59 const #endif AVInputFormat *inputFormat = av_find_input_format(container.c_str()); if (!inputFormat) { _didReadToEnd = true; return; } _inputFormatContext = avformat_alloc_context(); if (!_inputFormatContext) { _didReadToEnd = true; return; } _inputFormatContext->pb = _avIoContext->getContext(); if ((ret = avformat_open_input(&_inputFormatContext, "", inputFormat, nullptr)) < 0) { _didReadToEnd = true; return; } if ((ret = avformat_find_stream_info(_inputFormatContext, nullptr)) < 0) { _didReadToEnd = true; avformat_close_input(&_inputFormatContext); _inputFormatContext = nullptr; return; } AVCodecParameters *videoCodecParameters = nullptr; AVStream *videoStream = nullptr; for (int i = 0; i < _inputFormatContext->nb_streams; i++) { AVStream *inStream = _inputFormatContext->streams[i]; AVCodecParameters *inCodecpar = inStream->codecpar; if (inCodecpar->codec_type != AVMEDIA_TYPE_VIDEO) { continue; } videoCodecParameters = inCodecpar; videoStream = inStream; break; } if (videoCodecParameters && videoStream) { const AVCodec *codec = avcodec_find_decoder(videoCodecParameters->codec_id); if (codec) { _codecContext = avcodec_alloc_context3(codec); ret = avcodec_parameters_to_context(_codecContext, videoCodecParameters); if (ret < 0) { _didReadToEnd = true; avcodec_free_context(&_codecContext); _codecContext = nullptr; } else { _codecContext->pkt_timebase = videoStream->time_base; ret = avcodec_open2(_codecContext, codec, nullptr); if (ret < 0) { _didReadToEnd = true; avcodec_free_context(&_codecContext); _codecContext = nullptr; } else { _videoStream = videoStream; } } } } } ~VideoStreamingPartInternal() { if (_codecContext) { avcodec_close(_codecContext); avcodec_free_context(&_codecContext); } if (_inputFormatContext) { avformat_close_input(&_inputFormatContext); } } std::string endpointId() { return _endpointId; } absl::optional readPacket() { if (_didReadToEnd) { return absl::nullopt; } if (!_inputFormatContext) { return absl::nullopt; } MediaDataPacket packet; int result = av_read_frame(_inputFormatContext, packet.packet()); if (result < 0) { return absl::nullopt; } return packet; } std::shared_ptr readNextDecodableFrame() { while (true) { absl::optional packet = readPacket(); if (packet) { if (_videoStream && packet->packet()->stream_index == _videoStream->index) { return std::make_shared(std::move(packet.value()), packet->packet()->pts, packet->packet()->dts); } } else { return nullptr; } } } absl::optional convertCurrentFrame() { rtc::scoped_refptr i420Buffer = webrtc::I420Buffer::Copy( _frame.frame()->width, _frame.frame()->height, _frame.frame()->data[0], _frame.frame()->linesize[0], _frame.frame()->data[1], _frame.frame()->linesize[1], _frame.frame()->data[2], _frame.frame()->linesize[2] ); if (i420Buffer) { auto videoFrame = webrtc::VideoFrame::Builder() .set_video_frame_buffer(i420Buffer) .set_rotation(_rotation) .build(); return VideoStreamingPartFrame(_endpointId, videoFrame, _frame.pts(_videoStream, _firstFramePts), _frameIndex); } else { return absl::nullopt; } } absl::optional getNextFrame() { if (!_codecContext) { return {}; } while (true) { if (_didReadToEnd) { if (!_finalFrames.empty()) { auto frame = _finalFrames[0]; _finalFrames.erase(_finalFrames.begin()); return frame; } else { break; } } else { const auto frame = readNextDecodableFrame(); if (frame) { auto status = avcodec_send_packet(_codecContext, frame->packet().packet()); if (status == 0) { auto status = avcodec_receive_frame(_codecContext, _frame.frame()); if (status == 0) { auto convertedFrame = convertCurrentFrame(); if (convertedFrame) { _frameIndex++; return convertedFrame; } } else if (status == AVERROR(EAGAIN)) { // more data needed } else { _didReadToEnd = true; break; } } else { _didReadToEnd = true; return {}; } } else { _didReadToEnd = true; int status = avcodec_send_packet(_codecContext, nullptr); if (status == 0) { while (true) { auto status = avcodec_receive_frame(_codecContext, _frame.frame()); if (status == 0) { auto convertedFrame = convertCurrentFrame(); if (convertedFrame) { _frameIndex++; _finalFrames.push_back(convertedFrame.value()); } } else { break; } } } } } } return {}; } private: std::string _endpointId; webrtc::VideoRotation _rotation = webrtc::VideoRotation::kVideoRotation_0; std::unique_ptr _avIoContext; AVFormatContext *_inputFormatContext = nullptr; AVCodecContext *_codecContext = nullptr; AVStream *_videoStream = nullptr; Frame _frame; std::vector _finalFrames; int _frameIndex = 0; double _firstFramePts = -1.0; bool _didReadToEnd = false; }; class VideoStreamingPartState { public: VideoStreamingPartState(std::vector &&data, VideoStreamingPart::ContentType contentType) { _videoStreamInfo = consumeVideoStreamInfo(data); if (!_videoStreamInfo) { return; } for (size_t i = 0; i < _videoStreamInfo->events.size(); i++) { if (_videoStreamInfo->events[i].offset < 0) { continue; } size_t endOffset = 0; if (i == _videoStreamInfo->events.size() - 1) { endOffset = data.size(); } else { endOffset = _videoStreamInfo->events[i + 1].offset; } if (endOffset <= _videoStreamInfo->events[i].offset) { continue; } if (endOffset > data.size()) { continue; } std::vector dataSlice(data.begin() + _videoStreamInfo->events[i].offset, data.begin() + endOffset); webrtc::VideoRotation rotation = webrtc::VideoRotation::kVideoRotation_0; switch (_videoStreamInfo->events[i].rotation) { case 0: { rotation = webrtc::VideoRotation::kVideoRotation_0; break; } case 90: { rotation = webrtc::VideoRotation::kVideoRotation_90; break; } case 180: { rotation = webrtc::VideoRotation::kVideoRotation_180; break; } case 270: { rotation = webrtc::VideoRotation::kVideoRotation_270; break; } default: { break; } } switch (contentType) { case VideoStreamingPart::ContentType::Audio: { auto part = std::make_unique(std::move(dataSlice), _videoStreamInfo->container, true); _parsedAudioParts.push_back(std::move(part)); break; } case VideoStreamingPart::ContentType::Video: { auto part = std::make_unique(_videoStreamInfo->events[i].endpointId, rotation, std::move(dataSlice), _videoStreamInfo->container); _parsedVideoParts.push_back(std::move(part)); break; } default: { break; } } } } ~VideoStreamingPartState() { } absl::optional getFrameAtRelativeTimestamp(double timestamp) { while (true) { while (_availableFrames.size() >= 2) { if (timestamp >= _availableFrames[1].pts) { _availableFrames.erase(_availableFrames.begin()); } else { break; } } if (_availableFrames.size() < 2) { if (!_parsedVideoParts.empty()) { auto result = _parsedVideoParts[0]->getNextFrame(); if (result) { _availableFrames.push_back(result.value()); } else { _parsedVideoParts.erase(_parsedVideoParts.begin()); } continue; } } if (!_availableFrames.empty()) { for (size_t i = 1; i < _availableFrames.size(); i++) { if (timestamp < _availableFrames[i].pts) { return _availableFrames[i - 1]; } } return _availableFrames[_availableFrames.size() - 1]; } else { return absl::nullopt; } } } absl::optional getActiveEndpointId() const { if (!_parsedVideoParts.empty()) { return _parsedVideoParts[0]->endpointId(); } else { return absl::nullopt; } } bool hasRemainingFrames() const { return !_parsedVideoParts.empty(); } int getAudioRemainingMilliseconds() { while (!_parsedAudioParts.empty()) { auto firstPartResult = _parsedAudioParts[0]->getRemainingMilliseconds(); if (firstPartResult <= 0) { _parsedAudioParts.erase(_parsedAudioParts.begin()); } else { return firstPartResult; } } return 0; } std::vector getAudio10msPerChannel(AudioStreamingPartPersistentDecoder &persistentDecoder) { while (!_parsedAudioParts.empty()) { auto firstPartResult = _parsedAudioParts[0]->get10msPerChannel(persistentDecoder); if (firstPartResult.empty()) { _parsedAudioParts.erase(_parsedAudioParts.begin()); } else { return firstPartResult; } } return {}; } private: absl::optional _videoStreamInfo; std::vector> _parsedVideoParts; std::vector _availableFrames; std::vector> _parsedAudioParts; }; VideoStreamingPart::VideoStreamingPart(std::vector &&data, VideoStreamingPart::ContentType contentType) { if (!data.empty()) { _state = new VideoStreamingPartState(std::move(data), contentType); } } VideoStreamingPart::~VideoStreamingPart() { if (_state) { delete _state; } } absl::optional VideoStreamingPart::getFrameAtRelativeTimestamp(double timestamp) { return _state ? _state->getFrameAtRelativeTimestamp(timestamp) : absl::nullopt; } absl::optional VideoStreamingPart::getActiveEndpointId() const { return _state ? _state->getActiveEndpointId() : absl::nullopt; } bool VideoStreamingPart::hasRemainingFrames() const { return _state ? _state->hasRemainingFrames() : false; } int VideoStreamingPart::getAudioRemainingMilliseconds() { return _state ? _state->getAudioRemainingMilliseconds() : 0; } std::vector VideoStreamingPart::getAudio10msPerChannel(AudioStreamingPartPersistentDecoder &persistentDecoder) { return _state ? _state->getAudio10msPerChannel(persistentDecoder) : std::vector(); } }