Nagram/TMessagesProj/jni/voip/tgcalls/group/VideoStreamingPart.cpp
2021-08-31 22:06:39 +03:00

660 lines
18 KiB
C++

#include "VideoStreamingPart.h"
#include "rtc_base/logging.h"
#include "rtc_base/third_party/base64/base64.h"
#include "api/video/i420_buffer.h"
extern "C" {
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
}
#include <string>
#include <set>
#include <map>
namespace tgcalls {
namespace {
class AVIOContextImpl {
public:
AVIOContextImpl(std::vector<uint8_t> &&fileData) :
_fileData(std::move(fileData)) {
_buffer.resize(4 * 1024);
_context = avio_alloc_context(_buffer.data(), (int)_buffer.size(), 0, this, &AVIOContextImpl::read, NULL, &AVIOContextImpl::seek);
}
~AVIOContextImpl() {
av_free(_context);
}
static int read(void *opaque, unsigned char *buffer, int bufferSize) {
AVIOContextImpl *instance = static_cast<AVIOContextImpl *>(opaque);
int bytesToRead = std::min(bufferSize, ((int)instance->_fileData.size()) - instance->_fileReadPosition);
if (bytesToRead < 0) {
bytesToRead = 0;
}
if (bytesToRead > 0) {
memcpy(buffer, instance->_fileData.data() + instance->_fileReadPosition, bytesToRead);
instance->_fileReadPosition += bytesToRead;
return bytesToRead;
} else {
return AVERROR_EOF;
}
}
static int64_t seek(void *opaque, int64_t offset, int whence) {
AVIOContextImpl *instance = static_cast<AVIOContextImpl *>(opaque);
if (whence == 0x10000) {
return (int64_t)instance->_fileData.size();
} else {
int64_t seekOffset = std::min(offset, (int64_t)instance->_fileData.size());
if (seekOffset < 0) {
seekOffset = 0;
}
instance->_fileReadPosition = (int)seekOffset;
return seekOffset;
}
}
AVIOContext *getContext() {
return _context;
}
private:
std::vector<uint8_t> _fileData;
int _fileReadPosition = 0;
std::vector<uint8_t> _buffer;
AVIOContext *_context = nullptr;
};
class MediaDataPacket {
public:
MediaDataPacket() : _packet(av_packet_alloc()) {
}
MediaDataPacket(MediaDataPacket &&other) : _packet(other._packet) {
other._packet = nullptr;
}
~MediaDataPacket() {
if (_packet) {
av_packet_free(&_packet);
}
}
AVPacket *packet() {
return _packet;
}
private:
AVPacket *_packet = nullptr;
};
class DecodableFrame {
public:
DecodableFrame(MediaDataPacket packet, int64_t pts, int64_t dts):
_packet(std::move(packet)),
_pts(pts),
_dts(dts) {
}
~DecodableFrame() {
}
MediaDataPacket &packet() {
return _packet;
}
int64_t pts() {
return _pts;
}
int64_t dts() {
return _dts;
}
private:
MediaDataPacket _packet;
int64_t _pts = 0;
int64_t _dts = 0;
};
class Frame {
public:
Frame() {
_frame = av_frame_alloc();
}
Frame(Frame &&other) {
_frame = other._frame;
other._frame = nullptr;
}
~Frame() {
if (_frame) {
av_frame_unref(_frame);
}
}
AVFrame *frame() {
return _frame;
}
double pts(AVStream *stream) {
int64_t framePts = _frame->pts;
double spf = av_q2d(stream->time_base);
return ((double)framePts) * spf;
}
double duration(AVStream *stream) {
int64_t frameDuration = _frame->pkt_duration;
double spf = av_q2d(stream->time_base);
if (frameDuration != 0) {
return ((double)frameDuration) * spf;
} else {
return spf;
}
}
private:
AVFrame *_frame = nullptr;
};
struct VideoStreamEvent {
int32_t offset = 0;
std::string endpointId;
int32_t rotation = 0;
int32_t extra = 0;
};
struct VideoStreamInfo {
std::string container;
int32_t activeMask = 0;
std::vector<VideoStreamEvent> events;
};
absl::optional<int32_t> readInt32(std::vector<uint8_t> const &data, int &offset) {
if (offset + 4 > data.size()) {
return absl::nullopt;
}
int32_t value = 0;
memcpy(&value, data.data() + offset, 4);
offset += 4;
return value;
}
absl::optional<uint8_t> readBytesAsInt32(std::vector<uint8_t> const &data, int &offset, int count) {
if (offset + count > data.size()) {
return absl::nullopt;
}
if (count == 0) {
return absl::nullopt;
}
if (count <= 4) {
int32_t value = 0;
memcpy(&value, data.data() + offset, count);
offset += count;
return value;
} else {
return absl::nullopt;
}
}
int32_t roundUp(int32_t numToRound, int32_t multiple) {
if (multiple == 0) {
return numToRound;
}
int32_t remainder = numToRound % multiple;
if (remainder == 0) {
return numToRound;
}
return numToRound + multiple - remainder;
}
absl::optional<std::string> readSerializedString(std::vector<uint8_t> const &data, int &offset) {
if (const auto tmp = readBytesAsInt32(data, offset, 1)) {
int paddingBytes = 0;
int length = 0;
if (tmp.value() == 254) {
if (const auto len = readBytesAsInt32(data, offset, 3)) {
length = len.value();
paddingBytes = roundUp(length, 4) - length;
} else {
return absl::nullopt;
}
}
else {
length = tmp.value();
paddingBytes = roundUp(length + 1, 4) - (length + 1);
}
if (offset + length > data.size()) {
return absl::nullopt;
}
std::string result(data.data() + offset, data.data() + offset + length);
offset += length;
offset += paddingBytes;
return result;
} else {
return absl::nullopt;
}
}
absl::optional<VideoStreamEvent> readVideoStreamEvent(std::vector<uint8_t> const &data, int &offset) {
VideoStreamEvent event;
if (const auto offsetValue = readInt32(data, offset)) {
event.offset = offsetValue.value();
} else {
return absl::nullopt;
}
if (const auto endpointId = readSerializedString(data, offset)) {
event.endpointId = endpointId.value();
} else {
return absl::nullopt;
}
if (const auto rotation = readInt32(data, offset)) {
event.rotation = rotation.value();
} else {
return absl::nullopt;
}
if (const auto extra = readInt32(data, offset)) {
event.extra = extra.value();
} else {
return absl::nullopt;
}
return event;
}
absl::optional<VideoStreamInfo> consumeVideoStreamInfo(std::vector<uint8_t> &data) {
int offset = 0;
if (const auto signature = readInt32(data, offset)) {
if (signature.value() != 0xa12e810d) {
return absl::nullopt;
}
} else {
return absl::nullopt;
}
VideoStreamInfo info;
if (const auto container = readSerializedString(data, offset)) {
info.container = container.value();
} else {
return absl::nullopt;
}
if (const auto activeMask = readInt32(data, offset)) {
info.activeMask = activeMask.value();
} else {
return absl::nullopt;
}
if (const auto eventCount = readInt32(data, offset)) {
if (const auto event = readVideoStreamEvent(data, offset)) {
info.events.push_back(event.value());
} else {
return absl::nullopt;
}
} else {
return absl::nullopt;
}
data.erase(data.begin(), data.begin() + offset);
return info;
}
}
class VideoStreamingPartInternal {
public:
VideoStreamingPartInternal(std::string endpointId, webrtc::VideoRotation rotation, std::vector<uint8_t> &&fileData, std::string const &container) :
_endpointId(endpointId),
_rotation(rotation) {
_avIoContext = std::make_unique<AVIOContextImpl>(std::move(fileData));
int ret = 0;
AVInputFormat *inputFormat = av_find_input_format(container.c_str());
if (!inputFormat) {
_didReadToEnd = true;
return;
}
_inputFormatContext = avformat_alloc_context();
if (!_inputFormatContext) {
_didReadToEnd = true;
return;
}
_inputFormatContext->pb = _avIoContext->getContext();
if ((ret = avformat_open_input(&_inputFormatContext, "", inputFormat, nullptr)) < 0) {
_didReadToEnd = true;
return;
}
if ((ret = avformat_find_stream_info(_inputFormatContext, nullptr)) < 0) {
_didReadToEnd = true;
avformat_close_input(&_inputFormatContext);
_inputFormatContext = nullptr;
return;
}
AVCodecParameters *videoCodecParameters = nullptr;
AVStream *videoStream = nullptr;
for (int i = 0; i < _inputFormatContext->nb_streams; i++) {
AVStream *inStream = _inputFormatContext->streams[i];
AVCodecParameters *inCodecpar = inStream->codecpar;
if (inCodecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
continue;
}
videoCodecParameters = inCodecpar;
videoStream = inStream;
break;
}
if (videoCodecParameters && videoStream) {
AVCodec *codec = avcodec_find_decoder(videoCodecParameters->codec_id);
if (codec) {
_codecContext = avcodec_alloc_context3(codec);
ret = avcodec_parameters_to_context(_codecContext, videoCodecParameters);
if (ret < 0) {
_didReadToEnd = true;
avcodec_free_context(&_codecContext);
_codecContext = nullptr;
} else {
_codecContext->pkt_timebase = videoStream->time_base;
ret = avcodec_open2(_codecContext, codec, nullptr);
if (ret < 0) {
_didReadToEnd = true;
avcodec_free_context(&_codecContext);
_codecContext = nullptr;
} else {
_videoStream = videoStream;
}
}
}
}
}
~VideoStreamingPartInternal() {
if (_codecContext) {
avcodec_close(_codecContext);
avcodec_free_context(&_codecContext);
}
if (_inputFormatContext) {
avformat_close_input(&_inputFormatContext);
}
}
std::string endpointId() {
return _endpointId;
}
absl::optional<MediaDataPacket> readPacket() {
if (_didReadToEnd) {
return absl::nullopt;
}
if (!_inputFormatContext) {
return absl::nullopt;
}
MediaDataPacket packet;
int result = av_read_frame(_inputFormatContext, packet.packet());
if (result < 0) {
return absl::nullopt;
}
return packet;
}
std::shared_ptr<DecodableFrame> readNextDecodableFrame() {
while (true) {
absl::optional<MediaDataPacket> packet = readPacket();
if (packet) {
if (_videoStream && packet->packet()->stream_index == _videoStream->index) {
return std::make_shared<DecodableFrame>(std::move(packet.value()), packet->packet()->pts, packet->packet()->dts);
}
} else {
return nullptr;
}
}
}
absl::optional<VideoStreamingPartFrame> convertCurrentFrame() {
rtc::scoped_refptr<webrtc::I420Buffer> i420Buffer = webrtc::I420Buffer::Copy(
_frame.frame()->width,
_frame.frame()->height,
_frame.frame()->data[0],
_frame.frame()->linesize[0],
_frame.frame()->data[1],
_frame.frame()->linesize[1],
_frame.frame()->data[2],
_frame.frame()->linesize[2]
);
if (i420Buffer) {
auto videoFrame = webrtc::VideoFrame::Builder()
.set_video_frame_buffer(i420Buffer)
.set_rotation(_rotation)
.build();
return VideoStreamingPartFrame(_endpointId, videoFrame, _frame.pts(_videoStream), _frame.duration(_videoStream), _frameIndex);
} else {
return absl::nullopt;
}
}
absl::optional<VideoStreamingPartFrame> getNextFrame() {
if (!_codecContext) {
return {};
}
while (true) {
if (_didReadToEnd) {
if (!_finalFrames.empty()) {
auto frame = _finalFrames[0];
_finalFrames.erase(_finalFrames.begin());
return frame;
} else {
break;
}
} else {
const auto frame = readNextDecodableFrame();
if (frame) {
auto status = avcodec_send_packet(_codecContext, frame->packet().packet());
if (status == 0) {
auto status = avcodec_receive_frame(_codecContext, _frame.frame());
if (status == 0) {
auto convertedFrame = convertCurrentFrame();
if (convertedFrame) {
_frameIndex++;
return convertedFrame;
}
} else if (status == -35) {
// more data needed
} else {
_didReadToEnd = true;
break;
}
} else {
_didReadToEnd = true;
return {};
}
} else {
_didReadToEnd = true;
int status = avcodec_send_packet(_codecContext, nullptr);
if (status == 0) {
while (true) {
auto status = avcodec_receive_frame(_codecContext, _frame.frame());
if (status == 0) {
auto convertedFrame = convertCurrentFrame();
if (convertedFrame) {
_frameIndex++;
_finalFrames.push_back(convertedFrame.value());
}
} else {
break;
}
}
}
}
}
}
return {};
}
private:
std::string _endpointId;
webrtc::VideoRotation _rotation = webrtc::VideoRotation::kVideoRotation_0;
std::unique_ptr<AVIOContextImpl> _avIoContext;
AVFormatContext *_inputFormatContext = nullptr;
AVCodecContext *_codecContext = nullptr;
AVStream *_videoStream = nullptr;
Frame _frame;
std::vector<VideoStreamingPartFrame> _finalFrames;
int _frameIndex = 0;
bool _didReadToEnd = false;
};
class VideoStreamingPartState {
public:
VideoStreamingPartState(std::vector<uint8_t> &&data) {
_videoStreamInfo = consumeVideoStreamInfo(data);
if (!_videoStreamInfo) {
return;
}
for (size_t i = 0; i < _videoStreamInfo->events.size(); i++) {
std::vector<uint8_t> dataSlice(data.begin() + _videoStreamInfo->events[i].offset, i == (_videoStreamInfo->events.size() - 1) ? data.end() : (data.begin() + _videoStreamInfo->events[i + 1].offset));
webrtc::VideoRotation rotation = webrtc::VideoRotation::kVideoRotation_0;
switch (_videoStreamInfo->events[i].rotation) {
case 0: {
rotation = webrtc::VideoRotation::kVideoRotation_0;
break;
}
case 90: {
rotation = webrtc::VideoRotation::kVideoRotation_90;
break;
}
case 180: {
rotation = webrtc::VideoRotation::kVideoRotation_180;
break;
}
case 270: {
rotation = webrtc::VideoRotation::kVideoRotation_270;
break;
}
default: {
break;
}
}
auto part = std::make_unique<VideoStreamingPartInternal>(_videoStreamInfo->events[i].endpointId, rotation, std::move(dataSlice), _videoStreamInfo->container);
_parsedParts.push_back(std::move(part));
}
}
~VideoStreamingPartState() {
}
absl::optional<VideoStreamingPartFrame> getFrameAtRelativeTimestamp(double timestamp) {
while (true) {
if (!_currentFrame) {
if (!_parsedParts.empty()) {
auto result = _parsedParts[0]->getNextFrame();
if (result) {
_currentFrame = result;
_relativeTimestamp += result->duration;
} else {
_parsedParts.erase(_parsedParts.begin());
continue;
}
}
}
if (_currentFrame) {
if (timestamp <= _relativeTimestamp) {
return _currentFrame;
} else {
_currentFrame = absl::nullopt;
}
} else {
return absl::nullopt;
}
}
}
absl::optional<std::string> getActiveEndpointId() const {
if (!_parsedParts.empty()) {
return _parsedParts[0]->endpointId();
} else {
return absl::nullopt;
}
}
private:
absl::optional<VideoStreamInfo> _videoStreamInfo;
std::vector<std::unique_ptr<VideoStreamingPartInternal>> _parsedParts;
absl::optional<VideoStreamingPartFrame> _currentFrame;
double _relativeTimestamp = 0.0;
};
VideoStreamingPart::VideoStreamingPart(std::vector<uint8_t> &&data) {
if (!data.empty()) {
_state = new VideoStreamingPartState(std::move(data));
}
}
VideoStreamingPart::~VideoStreamingPart() {
if (_state) {
delete _state;
}
}
absl::optional<VideoStreamingPartFrame> VideoStreamingPart::getFrameAtRelativeTimestamp(double timestamp) {
return _state
? _state->getFrameAtRelativeTimestamp(timestamp)
: absl::nullopt;
}
absl::optional<std::string> VideoStreamingPart::getActiveEndpointId() const {
return _state
? _state->getActiveEndpointId()
: absl::nullopt;
}
}