688 lines
20 KiB
C++
688 lines
20 KiB
C++
#include "VideoStreamingPart.h"
|
|
|
|
#include "rtc_base/logging.h"
|
|
#include "rtc_base/third_party/base64/base64.h"
|
|
#include "api/video/i420_buffer.h"
|
|
|
|
#include "AVIOContextImpl.h"
|
|
|
|
#include <string>
|
|
#include <set>
|
|
#include <map>
|
|
|
|
namespace tgcalls {
|
|
|
|
namespace {
|
|
|
|
class MediaDataPacket {
|
|
public:
|
|
MediaDataPacket() : _packet(av_packet_alloc()) {
|
|
}
|
|
|
|
MediaDataPacket(MediaDataPacket &&other) : _packet(other._packet) {
|
|
other._packet = nullptr;
|
|
}
|
|
|
|
~MediaDataPacket() {
|
|
if (_packet) {
|
|
av_packet_free(&_packet);
|
|
}
|
|
}
|
|
|
|
AVPacket *packet() {
|
|
return _packet;
|
|
}
|
|
|
|
private:
|
|
AVPacket *_packet = nullptr;
|
|
};
|
|
|
|
class DecodableFrame {
|
|
public:
|
|
DecodableFrame(MediaDataPacket packet, int64_t pts, int64_t dts):
|
|
_packet(std::move(packet)),
|
|
_pts(pts),
|
|
_dts(dts) {
|
|
}
|
|
|
|
~DecodableFrame() {
|
|
}
|
|
|
|
MediaDataPacket &packet() {
|
|
return _packet;
|
|
}
|
|
|
|
int64_t pts() {
|
|
return _pts;
|
|
}
|
|
|
|
int64_t dts() {
|
|
return _dts;
|
|
}
|
|
|
|
private:
|
|
MediaDataPacket _packet;
|
|
int64_t _pts = 0;
|
|
int64_t _dts = 0;
|
|
};
|
|
|
|
class Frame {
|
|
public:
|
|
Frame() {
|
|
_frame = av_frame_alloc();
|
|
}
|
|
|
|
Frame(Frame &&other) {
|
|
_frame = other._frame;
|
|
other._frame = nullptr;
|
|
}
|
|
|
|
~Frame() {
|
|
if (_frame) {
|
|
av_frame_unref(_frame);
|
|
}
|
|
}
|
|
|
|
AVFrame *frame() {
|
|
return _frame;
|
|
}
|
|
|
|
double pts(AVStream *stream, double &firstFramePts) {
|
|
int64_t framePts = _frame->pts;
|
|
double spf = av_q2d(stream->time_base);
|
|
double value = ((double)framePts) * spf;
|
|
|
|
if (firstFramePts < 0.0) {
|
|
firstFramePts = value;
|
|
}
|
|
|
|
return value - firstFramePts;
|
|
}
|
|
|
|
private:
|
|
AVFrame *_frame = nullptr;
|
|
};
|
|
|
|
struct VideoStreamEvent {
|
|
int32_t offset = 0;
|
|
std::string endpointId;
|
|
int32_t rotation = 0;
|
|
int32_t extra = 0;
|
|
};
|
|
|
|
struct VideoStreamInfo {
|
|
std::string container;
|
|
int32_t activeMask = 0;
|
|
std::vector<VideoStreamEvent> events;
|
|
};
|
|
|
|
absl::optional<int32_t> readInt32(std::vector<uint8_t> const &data, int &offset) {
|
|
if (offset + 4 > data.size()) {
|
|
return absl::nullopt;
|
|
}
|
|
|
|
int32_t value = 0;
|
|
memcpy(&value, data.data() + offset, 4);
|
|
offset += 4;
|
|
|
|
return value;
|
|
}
|
|
|
|
absl::optional<uint8_t> readBytesAsInt32(std::vector<uint8_t> const &data, int &offset, int count) {
|
|
if (offset + count > data.size()) {
|
|
return absl::nullopt;
|
|
}
|
|
|
|
if (count == 0) {
|
|
return absl::nullopt;
|
|
}
|
|
|
|
if (count <= 4) {
|
|
int32_t value = 0;
|
|
memcpy(&value, data.data() + offset, count);
|
|
offset += count;
|
|
return value;
|
|
} else {
|
|
return absl::nullopt;
|
|
}
|
|
}
|
|
|
|
int32_t roundUp(int32_t numToRound, int32_t multiple) {
|
|
if (multiple == 0) {
|
|
return numToRound;
|
|
}
|
|
|
|
int32_t remainder = numToRound % multiple;
|
|
if (remainder == 0) {
|
|
return numToRound;
|
|
}
|
|
|
|
return numToRound + multiple - remainder;
|
|
}
|
|
|
|
absl::optional<std::string> readSerializedString(std::vector<uint8_t> const &data, int &offset) {
|
|
if (const auto tmp = readBytesAsInt32(data, offset, 1)) {
|
|
int paddingBytes = 0;
|
|
int length = 0;
|
|
if (tmp.value() == 254) {
|
|
if (const auto len = readBytesAsInt32(data, offset, 3)) {
|
|
length = len.value();
|
|
paddingBytes = roundUp(length, 4) - length;
|
|
} else {
|
|
return absl::nullopt;
|
|
}
|
|
}
|
|
else {
|
|
length = tmp.value();
|
|
paddingBytes = roundUp(length + 1, 4) - (length + 1);
|
|
}
|
|
|
|
if (offset + length > data.size()) {
|
|
return absl::nullopt;
|
|
}
|
|
|
|
std::string result(data.data() + offset, data.data() + offset + length);
|
|
|
|
offset += length;
|
|
offset += paddingBytes;
|
|
|
|
return result;
|
|
} else {
|
|
return absl::nullopt;
|
|
}
|
|
}
|
|
|
|
absl::optional<VideoStreamEvent> readVideoStreamEvent(std::vector<uint8_t> const &data, int &offset) {
|
|
VideoStreamEvent event;
|
|
|
|
if (const auto offsetValue = readInt32(data, offset)) {
|
|
event.offset = offsetValue.value();
|
|
} else {
|
|
return absl::nullopt;
|
|
}
|
|
|
|
if (const auto endpointId = readSerializedString(data, offset)) {
|
|
event.endpointId = endpointId.value();
|
|
} else {
|
|
return absl::nullopt;
|
|
}
|
|
|
|
if (const auto rotation = readInt32(data, offset)) {
|
|
event.rotation = rotation.value();
|
|
} else {
|
|
return absl::nullopt;
|
|
}
|
|
|
|
if (const auto extra = readInt32(data, offset)) {
|
|
event.extra = extra.value();
|
|
} else {
|
|
return absl::nullopt;
|
|
}
|
|
|
|
return event;
|
|
}
|
|
|
|
absl::optional<VideoStreamInfo> consumeVideoStreamInfo(std::vector<uint8_t> &data) {
|
|
int offset = 0;
|
|
if (const auto signature = readInt32(data, offset)) {
|
|
if (signature.value() != 0xa12e810d) {
|
|
return absl::nullopt;
|
|
}
|
|
} else {
|
|
return absl::nullopt;
|
|
}
|
|
|
|
VideoStreamInfo info;
|
|
|
|
if (const auto container = readSerializedString(data, offset)) {
|
|
info.container = container.value();
|
|
} else {
|
|
return absl::nullopt;
|
|
}
|
|
|
|
if (const auto activeMask = readInt32(data, offset)) {
|
|
info.activeMask = activeMask.value();
|
|
} else {
|
|
return absl::nullopt;
|
|
}
|
|
|
|
if (const auto eventCount = readInt32(data, offset)) {
|
|
if (eventCount > 0) {
|
|
if (const auto event = readVideoStreamEvent(data, offset)) {
|
|
info.events.push_back(event.value());
|
|
} else {
|
|
return absl::nullopt;
|
|
}
|
|
} else {
|
|
return absl::nullopt;
|
|
}
|
|
} else {
|
|
return absl::nullopt;
|
|
}
|
|
|
|
data.erase(data.begin(), data.begin() + offset);
|
|
|
|
return info;
|
|
}
|
|
|
|
}
|
|
|
|
class VideoStreamingPartInternal {
|
|
public:
|
|
VideoStreamingPartInternal(std::string endpointId, webrtc::VideoRotation rotation, std::vector<uint8_t> &&fileData, std::string const &container) :
|
|
_endpointId(endpointId),
|
|
_rotation(rotation) {
|
|
_avIoContext = std::make_unique<AVIOContextImpl>(std::move(fileData));
|
|
|
|
int ret = 0;
|
|
|
|
#if LIBAVFORMAT_VERSION_MAJOR >= 59
|
|
const
|
|
#endif
|
|
AVInputFormat *inputFormat = av_find_input_format(container.c_str());
|
|
if (!inputFormat) {
|
|
_didReadToEnd = true;
|
|
return;
|
|
}
|
|
|
|
_inputFormatContext = avformat_alloc_context();
|
|
if (!_inputFormatContext) {
|
|
_didReadToEnd = true;
|
|
return;
|
|
}
|
|
|
|
_inputFormatContext->pb = _avIoContext->getContext();
|
|
|
|
if ((ret = avformat_open_input(&_inputFormatContext, "", inputFormat, nullptr)) < 0) {
|
|
_didReadToEnd = true;
|
|
return;
|
|
}
|
|
|
|
if ((ret = avformat_find_stream_info(_inputFormatContext, nullptr)) < 0) {
|
|
_didReadToEnd = true;
|
|
|
|
avformat_close_input(&_inputFormatContext);
|
|
_inputFormatContext = nullptr;
|
|
return;
|
|
}
|
|
|
|
AVCodecParameters *videoCodecParameters = nullptr;
|
|
AVStream *videoStream = nullptr;
|
|
for (int i = 0; i < _inputFormatContext->nb_streams; i++) {
|
|
AVStream *inStream = _inputFormatContext->streams[i];
|
|
|
|
AVCodecParameters *inCodecpar = inStream->codecpar;
|
|
if (inCodecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
|
|
continue;
|
|
}
|
|
videoCodecParameters = inCodecpar;
|
|
videoStream = inStream;
|
|
|
|
break;
|
|
}
|
|
|
|
if (videoCodecParameters && videoStream) {
|
|
const AVCodec *codec = avcodec_find_decoder(videoCodecParameters->codec_id);
|
|
if (codec) {
|
|
_codecContext = avcodec_alloc_context3(codec);
|
|
ret = avcodec_parameters_to_context(_codecContext, videoCodecParameters);
|
|
if (ret < 0) {
|
|
_didReadToEnd = true;
|
|
|
|
avcodec_free_context(&_codecContext);
|
|
_codecContext = nullptr;
|
|
} else {
|
|
_codecContext->pkt_timebase = videoStream->time_base;
|
|
|
|
ret = avcodec_open2(_codecContext, codec, nullptr);
|
|
if (ret < 0) {
|
|
_didReadToEnd = true;
|
|
|
|
avcodec_free_context(&_codecContext);
|
|
_codecContext = nullptr;
|
|
} else {
|
|
_videoStream = videoStream;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
~VideoStreamingPartInternal() {
|
|
if (_codecContext) {
|
|
avcodec_close(_codecContext);
|
|
avcodec_free_context(&_codecContext);
|
|
}
|
|
if (_inputFormatContext) {
|
|
avformat_close_input(&_inputFormatContext);
|
|
}
|
|
}
|
|
|
|
std::string endpointId() {
|
|
return _endpointId;
|
|
}
|
|
|
|
absl::optional<MediaDataPacket> readPacket() {
|
|
if (_didReadToEnd) {
|
|
return absl::nullopt;
|
|
}
|
|
if (!_inputFormatContext) {
|
|
return absl::nullopt;
|
|
}
|
|
|
|
MediaDataPacket packet;
|
|
int result = av_read_frame(_inputFormatContext, packet.packet());
|
|
if (result < 0) {
|
|
return absl::nullopt;
|
|
}
|
|
|
|
return packet;
|
|
}
|
|
|
|
std::shared_ptr<DecodableFrame> readNextDecodableFrame() {
|
|
while (true) {
|
|
absl::optional<MediaDataPacket> packet = readPacket();
|
|
if (packet) {
|
|
if (_videoStream && packet->packet()->stream_index == _videoStream->index) {
|
|
return std::make_shared<DecodableFrame>(std::move(packet.value()), packet->packet()->pts, packet->packet()->dts);
|
|
}
|
|
} else {
|
|
return nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
absl::optional<VideoStreamingPartFrame> convertCurrentFrame() {
|
|
rtc::scoped_refptr<webrtc::I420Buffer> i420Buffer = webrtc::I420Buffer::Copy(
|
|
_frame.frame()->width,
|
|
_frame.frame()->height,
|
|
_frame.frame()->data[0],
|
|
_frame.frame()->linesize[0],
|
|
_frame.frame()->data[1],
|
|
_frame.frame()->linesize[1],
|
|
_frame.frame()->data[2],
|
|
_frame.frame()->linesize[2]
|
|
);
|
|
if (i420Buffer) {
|
|
auto videoFrame = webrtc::VideoFrame::Builder()
|
|
.set_video_frame_buffer(i420Buffer)
|
|
.set_rotation(_rotation)
|
|
.build();
|
|
|
|
return VideoStreamingPartFrame(_endpointId, videoFrame, _frame.pts(_videoStream, _firstFramePts), _frameIndex);
|
|
} else {
|
|
return absl::nullopt;
|
|
}
|
|
}
|
|
|
|
absl::optional<VideoStreamingPartFrame> getNextFrame() {
|
|
if (!_codecContext) {
|
|
return {};
|
|
}
|
|
|
|
while (true) {
|
|
if (_didReadToEnd) {
|
|
if (!_finalFrames.empty()) {
|
|
auto frame = _finalFrames[0];
|
|
_finalFrames.erase(_finalFrames.begin());
|
|
return frame;
|
|
} else {
|
|
break;
|
|
}
|
|
} else {
|
|
const auto frame = readNextDecodableFrame();
|
|
if (frame) {
|
|
auto status = avcodec_send_packet(_codecContext, frame->packet().packet());
|
|
if (status == 0) {
|
|
auto status = avcodec_receive_frame(_codecContext, _frame.frame());
|
|
if (status == 0) {
|
|
auto convertedFrame = convertCurrentFrame();
|
|
if (convertedFrame) {
|
|
_frameIndex++;
|
|
return convertedFrame;
|
|
}
|
|
} else if (status == AVERROR(EAGAIN)) {
|
|
// more data needed
|
|
} else {
|
|
_didReadToEnd = true;
|
|
break;
|
|
}
|
|
} else {
|
|
_didReadToEnd = true;
|
|
return {};
|
|
}
|
|
} else {
|
|
_didReadToEnd = true;
|
|
int status = avcodec_send_packet(_codecContext, nullptr);
|
|
if (status == 0) {
|
|
while (true) {
|
|
auto status = avcodec_receive_frame(_codecContext, _frame.frame());
|
|
if (status == 0) {
|
|
auto convertedFrame = convertCurrentFrame();
|
|
if (convertedFrame) {
|
|
_frameIndex++;
|
|
_finalFrames.push_back(convertedFrame.value());
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return {};
|
|
}
|
|
|
|
private:
|
|
std::string _endpointId;
|
|
webrtc::VideoRotation _rotation = webrtc::VideoRotation::kVideoRotation_0;
|
|
|
|
std::unique_ptr<AVIOContextImpl> _avIoContext;
|
|
|
|
AVFormatContext *_inputFormatContext = nullptr;
|
|
AVCodecContext *_codecContext = nullptr;
|
|
AVStream *_videoStream = nullptr;
|
|
Frame _frame;
|
|
|
|
std::vector<VideoStreamingPartFrame> _finalFrames;
|
|
|
|
int _frameIndex = 0;
|
|
double _firstFramePts = -1.0;
|
|
bool _didReadToEnd = false;
|
|
};
|
|
|
|
class VideoStreamingPartState {
|
|
public:
|
|
VideoStreamingPartState(std::vector<uint8_t> &&data, VideoStreamingPart::ContentType contentType) {
|
|
_videoStreamInfo = consumeVideoStreamInfo(data);
|
|
if (!_videoStreamInfo) {
|
|
return;
|
|
}
|
|
|
|
for (size_t i = 0; i < _videoStreamInfo->events.size(); i++) {
|
|
if (_videoStreamInfo->events[i].offset < 0) {
|
|
continue;
|
|
}
|
|
size_t endOffset = 0;
|
|
if (i == _videoStreamInfo->events.size() - 1) {
|
|
endOffset = data.size();
|
|
} else {
|
|
endOffset = _videoStreamInfo->events[i + 1].offset;
|
|
}
|
|
if (endOffset <= _videoStreamInfo->events[i].offset) {
|
|
continue;
|
|
}
|
|
if (endOffset > data.size()) {
|
|
continue;
|
|
}
|
|
std::vector<uint8_t> dataSlice(data.begin() + _videoStreamInfo->events[i].offset, data.begin() + endOffset);
|
|
webrtc::VideoRotation rotation = webrtc::VideoRotation::kVideoRotation_0;
|
|
switch (_videoStreamInfo->events[i].rotation) {
|
|
case 0: {
|
|
rotation = webrtc::VideoRotation::kVideoRotation_0;
|
|
break;
|
|
}
|
|
case 90: {
|
|
rotation = webrtc::VideoRotation::kVideoRotation_90;
|
|
break;
|
|
}
|
|
case 180: {
|
|
rotation = webrtc::VideoRotation::kVideoRotation_180;
|
|
break;
|
|
}
|
|
case 270: {
|
|
rotation = webrtc::VideoRotation::kVideoRotation_270;
|
|
break;
|
|
}
|
|
default: {
|
|
break;
|
|
}
|
|
}
|
|
|
|
switch (contentType) {
|
|
case VideoStreamingPart::ContentType::Audio: {
|
|
auto part = std::make_unique<AudioStreamingPart>(std::move(dataSlice), _videoStreamInfo->container, true);
|
|
_parsedAudioParts.push_back(std::move(part));
|
|
|
|
break;
|
|
}
|
|
case VideoStreamingPart::ContentType::Video: {
|
|
auto part = std::make_unique<VideoStreamingPartInternal>(_videoStreamInfo->events[i].endpointId, rotation, std::move(dataSlice), _videoStreamInfo->container);
|
|
_parsedVideoParts.push_back(std::move(part));
|
|
|
|
break;
|
|
}
|
|
default: {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
~VideoStreamingPartState() {
|
|
}
|
|
|
|
absl::optional<VideoStreamingPartFrame> getFrameAtRelativeTimestamp(double timestamp) {
|
|
while (true) {
|
|
while (_availableFrames.size() >= 2) {
|
|
if (timestamp >= _availableFrames[1].pts) {
|
|
_availableFrames.erase(_availableFrames.begin());
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (_availableFrames.size() < 2) {
|
|
if (!_parsedVideoParts.empty()) {
|
|
auto result = _parsedVideoParts[0]->getNextFrame();
|
|
if (result) {
|
|
_availableFrames.push_back(result.value());
|
|
} else {
|
|
_parsedVideoParts.erase(_parsedVideoParts.begin());
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (!_availableFrames.empty()) {
|
|
for (size_t i = 1; i < _availableFrames.size(); i++) {
|
|
if (timestamp < _availableFrames[i].pts) {
|
|
return _availableFrames[i - 1];
|
|
}
|
|
}
|
|
return _availableFrames[_availableFrames.size() - 1];
|
|
} else {
|
|
return absl::nullopt;
|
|
}
|
|
}
|
|
}
|
|
|
|
absl::optional<std::string> getActiveEndpointId() const {
|
|
if (!_parsedVideoParts.empty()) {
|
|
return _parsedVideoParts[0]->endpointId();
|
|
} else {
|
|
return absl::nullopt;
|
|
}
|
|
}
|
|
|
|
bool hasRemainingFrames() const {
|
|
return !_parsedVideoParts.empty();
|
|
}
|
|
|
|
int getAudioRemainingMilliseconds() {
|
|
while (!_parsedAudioParts.empty()) {
|
|
auto firstPartResult = _parsedAudioParts[0]->getRemainingMilliseconds();
|
|
if (firstPartResult <= 0) {
|
|
_parsedAudioParts.erase(_parsedAudioParts.begin());
|
|
} else {
|
|
return firstPartResult;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
std::vector<AudioStreamingPart::StreamingPartChannel> getAudio10msPerChannel(AudioStreamingPartPersistentDecoder &persistentDecoder) {
|
|
while (!_parsedAudioParts.empty()) {
|
|
auto firstPartResult = _parsedAudioParts[0]->get10msPerChannel(persistentDecoder);
|
|
if (firstPartResult.empty()) {
|
|
_parsedAudioParts.erase(_parsedAudioParts.begin());
|
|
} else {
|
|
return firstPartResult;
|
|
}
|
|
}
|
|
return {};
|
|
}
|
|
|
|
private:
|
|
absl::optional<VideoStreamInfo> _videoStreamInfo;
|
|
std::vector<std::unique_ptr<VideoStreamingPartInternal>> _parsedVideoParts;
|
|
std::vector<VideoStreamingPartFrame> _availableFrames;
|
|
|
|
std::vector<std::unique_ptr<AudioStreamingPart>> _parsedAudioParts;
|
|
};
|
|
|
|
VideoStreamingPart::VideoStreamingPart(std::vector<uint8_t> &&data, VideoStreamingPart::ContentType contentType) {
|
|
if (!data.empty()) {
|
|
_state = new VideoStreamingPartState(std::move(data), contentType);
|
|
}
|
|
}
|
|
|
|
VideoStreamingPart::~VideoStreamingPart() {
|
|
if (_state) {
|
|
delete _state;
|
|
}
|
|
}
|
|
|
|
absl::optional<VideoStreamingPartFrame> VideoStreamingPart::getFrameAtRelativeTimestamp(double timestamp) {
|
|
return _state
|
|
? _state->getFrameAtRelativeTimestamp(timestamp)
|
|
: absl::nullopt;
|
|
}
|
|
|
|
absl::optional<std::string> VideoStreamingPart::getActiveEndpointId() const {
|
|
return _state
|
|
? _state->getActiveEndpointId()
|
|
: absl::nullopt;
|
|
}
|
|
|
|
bool VideoStreamingPart::hasRemainingFrames() const {
|
|
return _state
|
|
? _state->hasRemainingFrames()
|
|
: false;
|
|
}
|
|
|
|
int VideoStreamingPart::getAudioRemainingMilliseconds() {
|
|
return _state
|
|
? _state->getAudioRemainingMilliseconds()
|
|
: 0;
|
|
}
|
|
std::vector<AudioStreamingPart::StreamingPartChannel> VideoStreamingPart::getAudio10msPerChannel(AudioStreamingPartPersistentDecoder &persistentDecoder) {
|
|
return _state
|
|
? _state->getAudio10msPerChannel(persistentDecoder)
|
|
: std::vector<AudioStreamingPart::StreamingPartChannel>();
|
|
}
|
|
|
|
}
|