From 0189c5d825a6754d88acb4fc2434d9faff03f63b Mon Sep 17 00:00:00 2001 From: Qiu Jianlin Date: Mon, 18 Sep 2023 16:45:33 +0800 Subject: [PATCH] Experiment support for decoding into YUV444P by SW FFMPEG decoders. Application must call GlobalConfiguration::SetRangeExtensionEnabled(true) to allow OWT switching to SW HEVC decoder. By default range extension decoding of HEVC is disabled. --- talk/owt/BUILD.gn | 2 + talk/owt/sdk/base/globalconfiguration.cc | 1 + .../base/peerconnectiondependencyfactory.cc | 3 + talk/owt/sdk/base/webrtcvideorendererimpl.cc | 30 +- talk/owt/sdk/base/win/d3d11_video_decoder.cc | 33 +- .../base/win/externalvideodecoderfactory.cc | 10 +- .../base/win/externalvideodecoderfactory.h | 1 + talk/owt/sdk/base/win/ffmpeg_decoder_impl.cc | 570 ++++++++++++++++++ talk/owt/sdk/base/win/ffmpeg_decoder_impl.h | 102 ++++ .../cpp/owt/base/globalconfiguration.h | 11 + .../cpp/owt/base/videorendererinterface.h | 3 +- 11 files changed, 760 insertions(+), 6 deletions(-) create mode 100644 talk/owt/sdk/base/win/ffmpeg_decoder_impl.cc create mode 100644 talk/owt/sdk/base/win/ffmpeg_decoder_impl.h diff --git a/talk/owt/BUILD.gn b/talk/owt/BUILD.gn index 22da5d5ab..98989d041 100644 --- a/talk/owt/BUILD.gn +++ b/talk/owt/BUILD.gn @@ -374,6 +374,8 @@ static_library("owt_sdk_base") { sources += [ "sdk/base/win/d3d11_video_decoder.cc", "sdk/base/win/d3d11_video_decoder.h", + "sdk/base/win/ffmpeg_decoder_impl.cc", + "sdk/base/win/ffmpeg_decoder_impl.h", ] } defines += [ "OWT_USE_FFMPEG" ] diff --git a/talk/owt/sdk/base/globalconfiguration.cc b/talk/owt/sdk/base/globalconfiguration.cc index 299c79493..6cab14129 100644 --- a/talk/owt/sdk/base/globalconfiguration.cc +++ b/talk/owt/sdk/base/globalconfiguration.cc @@ -12,6 +12,7 @@ bool GlobalConfiguration::hardware_acceleration_enabled_ = true; ID3D11Device* GlobalConfiguration::d3d11_decoding_device_ = nullptr; #endif bool GlobalConfiguration::flex_fec_enabled_ = false; +bool GlobalConfiguration::range_extension_enabled_ = false; int GlobalConfiguration::link_mtu_ = 0; // not set; int GlobalConfiguration::min_port_ = 0; // not set; int GlobalConfiguration::max_port_ = 0; // not set; diff --git a/talk/owt/sdk/base/peerconnectiondependencyfactory.cc b/talk/owt/sdk/base/peerconnectiondependencyfactory.cc index 6f26d32b2..af9a77d3d 100644 --- a/talk/owt/sdk/base/peerconnectiondependencyfactory.cc +++ b/talk/owt/sdk/base/peerconnectiondependencyfactory.cc @@ -149,6 +149,9 @@ void PeerConnectionDependencyFactory:: if (GlobalConfiguration::GetFlexFecEnabled()) { field_trial_ += "OWT-FlexFEC/Enabled/"; } + if (GlobalConfiguration::GetRangeExtensionEnabled()) { + field_trial_ += "OWT-RangeExtension/Enabled/"; + } int delay_bwe_weight = GlobalConfiguration::GetDelayBasedBweWeight(); field_trial_ += "OWT-DelayBweWeight/" + std::to_string(delay_bwe_weight) + "/"; diff --git a/talk/owt/sdk/base/webrtcvideorendererimpl.cc b/talk/owt/sdk/base/webrtcvideorendererimpl.cc index 00c55de16..d065bf081 100644 --- a/talk/owt/sdk/base/webrtcvideorendererimpl.cc +++ b/talk/owt/sdk/base/webrtcvideorendererimpl.cc @@ -9,6 +9,7 @@ #include #include #endif +#include "rtc_base/logging.h" #include "talk/owt/sdk/base/nativehandlebuffer.h" #include "talk/owt/sdk/base/webrtcvideorendererimpl.h" #if defined(WEBRTC_WIN) @@ -71,13 +72,18 @@ void WebrtcVideoRendererImpl::OnFrame(const webrtc::VideoFrame& frame) { (uint8_t*)render_ptr, resolution, VideoBufferType::kD3D11}); renderer_.RenderFrame(std::move(video_buffer)); + return; #else return; #endif } + + // Non-native buffer. Only for I444 buffer and I444 renderer, we do a + // direct copy. Otherwise we convert to renderer type. VideoRendererType renderer_type = renderer_.Type(); if (renderer_type != VideoRendererType::kI420 && - renderer_type != VideoRendererType::kARGB) + renderer_type != VideoRendererType::kARGB && + renderer_type != VideoRendererType::kI444) return; Resolution resolution(frame.width(), frame.height()); if (renderer_type == VideoRendererType::kARGB) { @@ -87,6 +93,28 @@ void WebrtcVideoRendererImpl::OnFrame(const webrtc::VideoFrame& frame) { std::unique_ptr video_buffer( new VideoBuffer{buffer, resolution, VideoBufferType::kARGB}); renderer_.RenderFrame(std::move(video_buffer)); + } else if (renderer_type == VideoRendererType::kI444 && + frame.video_frame_buffer()->type() == + webrtc::VideoFrameBuffer::Type::kI444) { + // Assume stride equals to width(might not stand here?) + uint8_t* buffer = new uint8_t[resolution.width * resolution.height * 3]; + rtc::scoped_refptr frame_buffer = + frame.video_frame_buffer(); + const webrtc::PlanarYuv8Buffer* planar_yuv_buffer = + reinterpret_cast( + frame_buffer->GetI444()); + size_t data_ptr = 0; + memcpy(buffer, planar_yuv_buffer->DataY(), + planar_yuv_buffer->StrideY() * planar_yuv_buffer->height()); + data_ptr += planar_yuv_buffer->StrideY() * planar_yuv_buffer->height(); + memcpy(buffer + data_ptr, planar_yuv_buffer->DataU(), + planar_yuv_buffer->StrideU() * planar_yuv_buffer->height()); + data_ptr += planar_yuv_buffer->StrideU() * planar_yuv_buffer->height(); + memcpy(buffer + data_ptr, planar_yuv_buffer->DataV(), + planar_yuv_buffer->StrideV() * planar_yuv_buffer->height()); + std::unique_ptr video_buffer( + new VideoBuffer{buffer, resolution, VideoBufferType::kI444}); + renderer_.RenderFrame(std::move(video_buffer)); } else { uint8_t* buffer = new uint8_t[resolution.width * resolution.height * 3 / 2]; webrtc::ConvertFromI420(frame, webrtc::VideoType::kI420, 0, diff --git a/talk/owt/sdk/base/win/d3d11_video_decoder.cc b/talk/owt/sdk/base/win/d3d11_video_decoder.cc index 4df7d0849..6c1611a9e 100644 --- a/talk/owt/sdk/base/win/d3d11_video_decoder.cc +++ b/talk/owt/sdk/base/win/d3d11_video_decoder.cc @@ -310,11 +310,24 @@ int32_t D3D11VideoDecoder::Decode(const webrtc::EncodedImage& input_image, goto fail; } + if (!frame) { + RTC_LOG(LS_ERROR) << "Failed to decode current frame."; + goto fail; + } + + int width = frame->width; + int height = frame->height; + + VideoFrame* input_frame = + static_cast(av_buffer_get_opaque(frame->buf[0])); + RTC_DCHECK(input_frame); + + rtc::scoped_refptr frame_buffer = + input_frame->video_frame_buffer(); + // We get one frame from the decoder. - if (frame != nullptr && frame->format == hw_pix_fmt) { + if (frame->format != AV_PIX_FMT_YUV444P) { ID3D11Texture2D* texture = (ID3D11Texture2D*)frame->data[0]; - int width = frame->width; - int height = frame->height; int index = (intptr_t)frame->data[1]; D3D11_TEXTURE2D_DESC texture_desc; @@ -380,6 +393,20 @@ int32_t D3D11VideoDecoder::Decode(const webrtc::EncodedImage& input_image, } av_frame_free(&frame); + } else { // YUV444P. Which will be a software frame. + rtc::scoped_refptr dst_buffer; + dst_buffer = + WrapI444Buffer(width, height, frame->data[0], frame->linesize[0], + frame->data[1], frame->linesize[1], frame->data[2], + frame->linesize[2], [frame_buffer] {}); + webrtc::VideoFrame decoded_frame(dst_buffer, input_image.Timestamp(), 0, + webrtc::kVideoRotation_0); + decoded_frame.set_ntp_time_ms(input_image.ntp_time_ms_); + decoded_frame.set_timestamp(input_image.Timestamp()); + if (decoded_image_callback_) { + decoded_image_callback_->Decoded(decoded_frame); + } + av_frame_free(&frame); } } return WEBRTC_VIDEO_CODEC_OK; diff --git a/talk/owt/sdk/base/win/externalvideodecoderfactory.cc b/talk/owt/sdk/base/win/externalvideodecoderfactory.cc index 61cb1db16..c606d3dc7 100644 --- a/talk/owt/sdk/base/win/externalvideodecoderfactory.cc +++ b/talk/owt/sdk/base/win/externalvideodecoderfactory.cc @@ -13,10 +13,12 @@ #include "talk/owt/sdk/base/codecutils.h" #ifdef OWT_USE_FFMPEG #include "talk/owt/sdk/base/win/d3d11_video_decoder.h" +#include "talk/owt/sdk/base/win/ffmpeg_decoder_impl.h" #endif #ifdef OWT_USE_MSDK #include "talk/owt/sdk/base/win/msdkvideodecoder.h" #endif +#include "system_wrappers/include/field_trial.h" #include "webrtc/rtc_base/checks.h" #include "webrtc/rtc_base/logging.h" @@ -24,6 +26,8 @@ namespace owt { namespace base { ExternalVideoDecoderFactory::ExternalVideoDecoderFactory(ID3D11Device* d3d11_device_external) { + range_extension_enabled_ = + webrtc::field_trial::IsEnabled("OWT-RangeExtension"); supported_codec_types_.clear(); bool is_vp8_hw_supported = false, is_vp9_hw_supported = false; @@ -109,7 +113,11 @@ ExternalVideoDecoderFactory::CreateVideoDecoder( } if (vp8_hw || vp9_hw || h264_hw || h265_hw || av1_hw) { #if defined(OWT_USE_FFMPEG) - return owt::base::D3D11VideoDecoder::Create(cricket::VideoCodec(format)); + if (range_extension_enabled_) { + return std::make_unique(); + } else { + return owt::base::D3D11VideoDecoder::Create(cricket::VideoCodec(format)); + } #endif #if defined(OWT_USE_MSDK) return owt::base::MSDKVideoDecoder::Create(cricket::VideoCodec(format)); diff --git a/talk/owt/sdk/base/win/externalvideodecoderfactory.h b/talk/owt/sdk/base/win/externalvideodecoderfactory.h index 490170025..9be33146e 100644 --- a/talk/owt/sdk/base/win/externalvideodecoderfactory.h +++ b/talk/owt/sdk/base/win/externalvideodecoderfactory.h @@ -35,6 +35,7 @@ class ExternalVideoDecoderFactory : public webrtc::VideoDecoderFactory { private: std::vector supported_codec_types_; ID3D11Device* external_device_ = nullptr; + bool range_extension_enabled_ = false; }; } // namespace base } // namespace owt diff --git a/talk/owt/sdk/base/win/ffmpeg_decoder_impl.cc b/talk/owt/sdk/base/win/ffmpeg_decoder_impl.cc new file mode 100644 index 000000000..d8f5080b2 --- /dev/null +++ b/talk/owt/sdk/base/win/ffmpeg_decoder_impl.cc @@ -0,0 +1,570 @@ +// Copyright (C) <2023> Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +#include "talk/owt/sdk/base/win/ffmpeg_decoder_impl.h" + +#include +#include +#include + +extern "C" { +#include "third_party/ffmpeg/libavcodec/avcodec.h" +#include "third_party/ffmpeg/libavformat/avformat.h" +#include "third_party/ffmpeg/libavutil/imgutils.h" +} // extern "C" + +#include "api/scoped_refptr.h" +#include "api/video/color_space.h" +#include "api/video/i420_buffer.h" +#include "api/video/i444_buffer.h" +#include "api/video/render_resolution.h" +#include "api/video/video_frame.h" +#include "common_video/include/video_frame_buffer.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/metrics.h" +#include "third_party/libyuv/include/libyuv/convert.h" +#include "webrtc/system_wrappers/include/clock.h" + +#include "talk/owt/sdk/base/nativehandlebuffer.h" + +namespace owt { +namespace base { + +namespace { + +constexpr std::array kPixelFormatsSupported = { + AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P}; +constexpr size_t kYPlaneIndex = 0; +constexpr size_t kUPlaneIndex = 1; +constexpr size_t kVPlaneIndex = 2; + +struct ScopedPtrAVFreePacket { + void operator()(AVPacket* packet) { av_packet_free(&packet); } +}; +typedef std::unique_ptr ScopedAVPacket; + +ScopedAVPacket MakeScopedAVPacket() { + ScopedAVPacket packet(av_packet_alloc()); + return packet; +} + +} // namespace + +int FFMpegDecoderImpl::AVGetBuffer2(AVCodecContext* context, + AVFrame* av_frame, + int flags) { + // Set in `Configure`. + FFMpegDecoderImpl* decoder = static_cast(context->opaque); + // DCHECK values set in `Configure`. + RTC_DCHECK(decoder); + // Necessary capability to be allowed to provide our own buffers. + RTC_DCHECK(context->codec->capabilities | AV_CODEC_CAP_DR1); + + auto pixelFormatSupported = std::find_if( + kPixelFormatsSupported.begin(), kPixelFormatsSupported.end(), + [context](AVPixelFormat format) { return context->pix_fmt == format; }); + + RTC_CHECK(pixelFormatSupported != kPixelFormatsSupported.end()); + + // `av_frame->width` and `av_frame->height` are set by FFmpeg. These are the + // actual image's dimensions and may be different from `context->width` and + // `context->coded_width` due to reordering. + int width = av_frame->width; + int height = av_frame->height; + // See `lowres`, if used the decoder scales the image by 1/2^(lowres). This + // has implications on which resolutions are valid, but we don't use it. + RTC_CHECK_EQ(context->lowres, 0); + // Adjust the `width` and `height` to values acceptable by the decoder. + // Without this, FFmpeg may overflow the buffer. If modified, `width` and/or + // `height` are larger than the actual image and the image has to be cropped + // (top-left corner) after decoding to avoid visible borders to the right and + // bottom of the actual image. + avcodec_align_dimensions(context, &width, &height); + + RTC_CHECK_GE(width, 0); + RTC_CHECK_GE(height, 0); + int ret = av_image_check_size(static_cast(width), + static_cast(height), 0, nullptr); + if (ret < 0) { + RTC_LOG(LS_ERROR) << "Invalid picture size " << width << "x" << height; + decoder->ReportError(); + return ret; + } + + // The video frame is stored in `frame_buffer`. `av_frame` is FFmpeg's version + // of a video frame and will be set up to reference `frame_buffer`'s data. + rtc::scoped_refptr frame_buffer; + rtc::scoped_refptr i444_buffer; + rtc::scoped_refptr i420_buffer; + + // TODO:We only support 8bpp formats. If 10b/12b-444 is going to be supported, + // will need to update this. + int bytes_per_pixel = 1; + switch (context->pix_fmt) { + case AV_PIX_FMT_YUV420P: + i420_buffer = + decoder->ffmpeg_buffer_pool_.CreateI420Buffer(width, height); + // Set `av_frame` members as required by FFmpeg. + av_frame->data[kYPlaneIndex] = i420_buffer->MutableDataY(); + av_frame->linesize[kYPlaneIndex] = i420_buffer->StrideY(); + av_frame->data[kUPlaneIndex] = i420_buffer->MutableDataU(); + av_frame->linesize[kUPlaneIndex] = i420_buffer->StrideU(); + av_frame->data[kVPlaneIndex] = i420_buffer->MutableDataV(); + av_frame->linesize[kVPlaneIndex] = i420_buffer->StrideV(); + RTC_DCHECK_EQ(av_frame->extended_data, av_frame->data); + frame_buffer = i420_buffer; + break; + case AV_PIX_FMT_YUV444P: + i444_buffer = + decoder->ffmpeg_buffer_pool_.CreateI444Buffer(width, height); + // Set `av_frame` members as required by FFmpeg. + av_frame->data[kYPlaneIndex] = i444_buffer->MutableDataY(); + av_frame->linesize[kYPlaneIndex] = i444_buffer->StrideY(); + av_frame->data[kUPlaneIndex] = i444_buffer->MutableDataU(); + av_frame->linesize[kUPlaneIndex] = i444_buffer->StrideU(); + av_frame->data[kVPlaneIndex] = i444_buffer->MutableDataV(); + av_frame->linesize[kVPlaneIndex] = i444_buffer->StrideV(); + frame_buffer = i444_buffer; + break; + default: + RTC_LOG(LS_ERROR) << "Unsupported buffer type " << context->pix_fmt + << ". Check supported supported pixel formats!"; + decoder->ReportError(); + return -1; + } + + int y_size = width * height * bytes_per_pixel; + int uv_size = frame_buffer->ChromaWidth() * frame_buffer->ChromaHeight() * + bytes_per_pixel; + // DCHECK that we have a continuous buffer as is required. + RTC_DCHECK_EQ(av_frame->data[kUPlaneIndex], + av_frame->data[kYPlaneIndex] + y_size); + RTC_DCHECK_EQ(av_frame->data[kVPlaneIndex], + av_frame->data[kUPlaneIndex] + uv_size); + int total_size = y_size + 2 * uv_size; + + av_frame->format = context->pix_fmt; + av_frame->reordered_opaque = context->reordered_opaque; + + // Create a VideoFrame object, to keep a reference to the buffer. + av_frame->buf[0] = av_buffer_create( + av_frame->data[kYPlaneIndex], total_size, AVFreeBuffer2, + static_cast(std::make_unique( + webrtc::VideoFrame::Builder() + .set_video_frame_buffer(frame_buffer) + .set_rotation(webrtc::kVideoRotation_0) + .set_timestamp_us(0) + .build()) + .release()), + 0); + RTC_CHECK(av_frame->buf[0]); + return 0; +} + +void FFMpegDecoderImpl::AVFreeBuffer2(void* opaque, uint8_t* data) { + // The buffer pool recycles the buffer used by `video_frame` when there are no + // more references to it. `video_frame` is a thin buffer holder and is not + // recycled. + webrtc::VideoFrame* video_frame = static_cast(opaque); + delete video_frame; +} + +FFMpegDecoderImpl::FFMpegDecoderImpl() + : ffmpeg_buffer_pool_(true), + decoded_image_callback_(nullptr), + has_reported_init_(false), + has_reported_error_(false), + clock_(webrtc::Clock::GetRealTimeClock()) { + surface_handle_.reset(new D3D11VAHandle()); +} + +FFMpegDecoderImpl::~FFMpegDecoderImpl() { + Release(); +} + +bool FFMpegDecoderImpl::Configure( + const webrtc::VideoDecoder::Settings& settings) { + ReportInit(); + if (settings.codec_type() != webrtc::kVideoCodecH265) { + RTC_LOG(LS_ERROR) << "FFmpegDecoder only supports H265 codec."; + ReportError(); + return false; + } + + // Release necessary in case of re-initializing. + int32_t ret = Release(); + if (ret != WEBRTC_VIDEO_CODEC_OK) { + ReportError(); + return false; + } + RTC_DCHECK(!av_context_); + + // Initialize AVCodecContext. + av_context_.reset(avcodec_alloc_context3(nullptr)); + + av_context_->codec_type = AVMEDIA_TYPE_VIDEO; + av_context_->codec_id = AV_CODEC_ID_H265; + const webrtc::RenderResolution& resolution = settings.max_render_resolution(); + if (resolution.Valid()) { + av_context_->coded_width = resolution.Width(); + av_context_->coded_height = resolution.Height(); + } + av_context_->extradata = nullptr; + av_context_->extradata_size = 0; + + // If this is ever increased, look at `av_context_->thread_safe_callbacks` and + // make it possible to disable the thread checker in the frame buffer pool. + av_context_->thread_count = 1; + av_context_->thread_type = FF_THREAD_SLICE; + + // Function used by FFmpeg to get buffers to store decoded frames in. + av_context_->get_buffer2 = AVGetBuffer2; + // `get_buffer2` is called with the context, there `opaque` can be used to get + // a pointer `this`. + av_context_->opaque = this; + + const AVCodec* codec = avcodec_find_decoder(av_context_->codec_id); + if (!codec) { + // This is an indication that FFmpeg has not been initialized or it has not + // been compiled/initialized with the correct set of codecs. + RTC_LOG(LS_ERROR) << "FFmpeg H.265 decoder not found."; + Release(); + ReportError(); + return false; + } + int res = avcodec_open2(av_context_.get(), codec, nullptr); + if (res < 0) { + RTC_LOG(LS_ERROR) << "avcodec_open2 error: " << res; + Release(); + ReportError(); + return false; + } + + av_frame_.reset(av_frame_alloc()); + + if (absl::optional buffer_pool_size = settings.buffer_pool_size()) { + if (!ffmpeg_buffer_pool_.Resize(*buffer_pool_size)) { + return false; + } + } + HRESULT hr; + UINT creation_flags = D3D11_CREATE_DEVICE_VIDEO_SUPPORT; + m_padapter_ = nullptr; + static D3D_FEATURE_LEVEL feature_levels[] = { + D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_1 }; + + D3D_FEATURE_LEVEL feature_levels_out; + + hr = D3D11CreateDevice( + nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, creation_flags, feature_levels, + sizeof(feature_levels) / sizeof(feature_levels[0]), D3D11_SDK_VERSION, + &d3d11_device_, &feature_levels_out, &d3d11_device_context_); + if (FAILED(hr)) { + RTC_LOG(LS_ERROR) << "Failed to create D3D11 device for decode output."; + return false; + } + + if (d3d11_device_) { + hr = d3d11_device_->QueryInterface(__uuidof(ID3D11VideoDevice), + (void**)&d3d11_video_device_); + if (FAILED(hr)) { + RTC_LOG(LS_ERROR) << "Failed to get video device from D3D11 device."; + return false; + } + } + if (d3d11_device_context_) { + hr = d3d11_device_context_->QueryInterface(__uuidof(ID3D11VideoContext), + (void**)&d3d11_video_context_); + if (FAILED(hr)) { + RTC_LOG(LS_ERROR) << "Failed to get video context from D3D11 device context."; + return false; + } + } + return true; +} + +int32_t FFMpegDecoderImpl::Release() { + av_context_.reset(); + av_frame_.reset(); + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t FFMpegDecoderImpl::RegisterDecodeCompleteCallback( + webrtc::DecodedImageCallback* callback) { + decoded_image_callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +void FFMpegDecoderImpl::CreateStagingTextureIfNeeded(int width, int height) { + HRESULT hr = S_OK; + D3D11_TEXTURE2D_DESC desc = {0}; + if (staging_texture_) { + D3D11_TEXTURE2D_DESC desc = {0}; + staging_texture_->GetDesc(&desc); + if (desc.Width != (unsigned int)width || + desc.Height != (unsigned int)height) { + staging_texture_.Release(); + } else { + goto output; + } + } + + desc.Width = (unsigned int)width; + desc.Height = (unsigned int)height; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Usage = D3D11_USAGE_STAGING; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE | D3D11_CPU_ACCESS_READ; + desc.MiscFlags = 0; + desc.BindFlags = 0; + + d3d11_device_->CreateTexture2D(&desc, nullptr, &staging_texture_); + +output: + D3D11_TEXTURE2D_DESC output_desc = {0}; + staging_texture_->GetDesc(&output_desc); + if (output_texture_) { + D3D11_TEXTURE2D_DESC orig_desc = {0}; + output_texture_->GetDesc(&orig_desc); + if (orig_desc.Width != (unsigned int)width || + orig_desc.Height != (unsigned int)height) { + output_texture_.Release(); + } else { + return; + } + } + output_desc.Usage = D3D11_USAGE_DEFAULT; + output_desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED; + output_desc.BindFlags = D3D11_BIND_RENDER_TARGET; + d3d11_device_->CreateTexture2D(&output_desc, nullptr, &output_texture_); + + return; +} + +int32_t FFMpegDecoderImpl::Decode(const webrtc::EncodedImage& input_image, + bool /*missing_frames*/, + int64_t /*render_time_ms*/) { + if (!IsInitialized()) { + ReportError(); + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (!decoded_image_callback_) { + RTC_LOG(LS_WARNING) + << "Configure() has been called, but a callback function " + "has not been set with RegisterDecodeCompleteCallback()"; + ReportError(); + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (!input_image.data() || !input_image.size()) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + int64_t decode_start_time = clock_->CurrentTime().ms_or(0); + + ScopedAVPacket packet = MakeScopedAVPacket(); + if (!packet) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + // packet.data has a non-const type, but isn't modified by + // avcodec_send_packet. + packet->data = const_cast(input_image.data()); + if (input_image.size() > + static_cast(std::numeric_limits::max())) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + packet->size = static_cast(input_image.size()); + int64_t frame_timestamp_us = input_image.ntp_time_ms_ * 1000; // ms -> μs + av_context_->reordered_opaque = frame_timestamp_us; + + int result = avcodec_send_packet(av_context_.get(), packet.get()); + + if (result < 0) { + RTC_LOG(LS_ERROR) << "avcodec_send_packet error: " << result; + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + result = avcodec_receive_frame(av_context_.get(), av_frame_.get()); + if (result < 0) { + RTC_LOG(LS_ERROR) << "avcodec_receive_frame error: " << result; + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + // We don't expect reordering. Decoded frame timestamp should match + // the input one. + RTC_DCHECK_EQ(av_frame_->reordered_opaque, frame_timestamp_us); + + // Maybe it is possible to get QP directly from FFmpeg. + h265_bitstream_parser_.ParseBitstream(input_image); + absl::optional qp = h265_bitstream_parser_.GetLastSliceQp(); + + // Obtain the `video_frame` containing the decoded image. + webrtc::VideoFrame* input_frame = + static_cast(av_buffer_get_opaque(av_frame_->buf[0])); + RTC_DCHECK(input_frame); + rtc::scoped_refptr frame_buffer = + input_frame->video_frame_buffer(); + + // Instantiate Planar YUV buffer according to video frame buffer type + const webrtc::PlanarYuvBuffer* planar_yuv_buffer = nullptr; + const webrtc::PlanarYuv8Buffer* planar_yuv8_buffer = nullptr; + const webrtc::PlanarYuv16BBuffer* planar_yuv16_buffer = nullptr; + webrtc::VideoFrameBuffer::Type video_frame_buffer_type = frame_buffer->type(); + switch (video_frame_buffer_type) { + case webrtc::VideoFrameBuffer::Type::kI420: + planar_yuv_buffer = frame_buffer->GetI420(); + planar_yuv8_buffer = + reinterpret_cast(planar_yuv_buffer); + break; + case webrtc::VideoFrameBuffer::Type::kI444: + planar_yuv_buffer = frame_buffer->GetI444(); + planar_yuv8_buffer = + reinterpret_cast(planar_yuv_buffer); + break; + default: + RTC_LOG(LS_ERROR) << "frame_buffer type: " + << static_cast(video_frame_buffer_type) + << " is not supported!"; + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + // When needed, FFmpeg applies cropping by moving plane pointers and adjusting + // frame width/height. Ensure that cropped buffers lie within the allocated + // memory. + RTC_DCHECK_LE(av_frame_->width, planar_yuv_buffer->width()); + RTC_DCHECK_LE(av_frame_->height, planar_yuv_buffer->height()); + switch (video_frame_buffer_type) { + case webrtc::VideoFrameBuffer::Type::kI420: + case webrtc::VideoFrameBuffer::Type::kI444: { + RTC_DCHECK_GE(av_frame_->data[kYPlaneIndex], planar_yuv8_buffer->DataY()); + RTC_DCHECK_LE( + av_frame_->data[kYPlaneIndex] + + av_frame_->linesize[kYPlaneIndex] * av_frame_->height, + planar_yuv8_buffer->DataY() + + planar_yuv8_buffer->StrideY() * planar_yuv8_buffer->height()); + RTC_DCHECK_GE(av_frame_->data[kUPlaneIndex], planar_yuv8_buffer->DataU()); + RTC_DCHECK_LE( + av_frame_->data[kUPlaneIndex] + + av_frame_->linesize[kUPlaneIndex] * + planar_yuv8_buffer->ChromaHeight(), + planar_yuv8_buffer->DataU() + planar_yuv8_buffer->StrideU() * + planar_yuv8_buffer->ChromaHeight()); + RTC_DCHECK_GE(av_frame_->data[kVPlaneIndex], planar_yuv8_buffer->DataV()); + RTC_DCHECK_LE( + av_frame_->data[kVPlaneIndex] + + av_frame_->linesize[kVPlaneIndex] * + planar_yuv8_buffer->ChromaHeight(), + planar_yuv8_buffer->DataV() + planar_yuv8_buffer->StrideV() * + planar_yuv8_buffer->ChromaHeight()); + break; + } + default: + RTC_LOG(LS_ERROR) << "frame_buffer type: " + << static_cast(video_frame_buffer_type) + << " is not supported!"; + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + CreateStagingTextureIfNeeded(av_frame_->width, av_frame_->height); + D3D11_MAPPED_SUBRESOURCE sub_resource = {0}; + HRESULT hr = d3d11_device_context_->Map(staging_texture_, 0, D3D11_MAP_READ_WRITE, 0, &sub_resource); + if (FAILED(hr)) { + RTC_LOG(LS_ERROR) << "Failed to map texture."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + + switch (video_frame_buffer_type) { + case webrtc::VideoFrameBuffer::Type::kI444: + libyuv::I444ToARGB(av_frame_->data[kYPlaneIndex], av_frame_->linesize[kYPlaneIndex], + av_frame_->data[kUPlaneIndex], av_frame_->linesize[kUPlaneIndex], + av_frame_->data[kVPlaneIndex], av_frame_->linesize[kVPlaneIndex], + static_cast(sub_resource.pData), + sub_resource.RowPitch, + av_frame_->width, + av_frame_->height); + break; + case webrtc::VideoFrameBuffer::Type::kI420: + libyuv::I420ToARGB(av_frame_->data[kYPlaneIndex], av_frame_->linesize[kYPlaneIndex], + av_frame_->data[kUPlaneIndex], av_frame_->linesize[kUPlaneIndex], + av_frame_->data[kVPlaneIndex], av_frame_->linesize[kVPlaneIndex], + static_cast(sub_resource.pData), + sub_resource.RowPitch, + av_frame_->width, + av_frame_->height); + break; + default: + RTC_LOG(LS_ERROR) << "frame_buffer type: " + << static_cast(video_frame_buffer_type) + << " conversion is not supported!"; + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + d3d11_device_context_->Unmap(staging_texture_, 0); + + d3d11_device_context_->CopyResource(output_texture_, staging_texture_); + + surface_handle_->texture = output_texture_.p; + surface_handle_->d3d11_device = d3d11_device_.p; + surface_handle_->d3d11_video_device = d3d11_video_device_.p; + surface_handle_->context = d3d11_video_context_.p; + surface_handle_->array_index = 0; + surface_handle_->side_data_size = 0; + surface_handle_->cursor_data_size = 0; + surface_handle_->decode_start = decode_start_time; + surface_handle_->decode_end = clock_->CurrentTime().ms_or(0); + surface_handle_->start_duration = + input_image.bwe_stats_.start_duration_; + surface_handle_->last_duration = input_image.bwe_stats_.last_duration_; + surface_handle_->packet_loss = input_image.bwe_stats_.packets_lost_; + surface_handle_->frame_size = input_image.size(); + rtc::scoped_refptr buffer = + rtc::make_ref_counted( + (void*)surface_handle_.get(), av_frame_->width, av_frame_->height); + webrtc::VideoFrame decoded_frame(buffer, input_image.Timestamp(), 0, + webrtc::kVideoRotation_0); + decoded_frame.set_ntp_time_ms(input_image.ntp_time_ms_); + decoded_frame.set_timestamp(input_image.Timestamp()); + decoded_image_callback_->Decoded(decoded_frame, absl::nullopt, qp); + + // Stop referencing it, possibly freeing `input_frame`. + av_frame_unref(av_frame_.get()); + input_frame = nullptr; + + return WEBRTC_VIDEO_CODEC_OK; +} + +const char* FFMpegDecoderImpl::ImplementationName() const { + return "FFmpegDecoder"; +} + +bool FFMpegDecoderImpl::IsInitialized() const { + return av_context_ != nullptr; +} + +void FFMpegDecoderImpl::ReportInit() { + if (has_reported_init_) + return; + has_reported_init_ = true; +} + +void FFMpegDecoderImpl::ReportError() { + if (has_reported_error_) + return; + has_reported_error_ = true; +} + +} // namespace base +} // namespace owt diff --git a/talk/owt/sdk/base/win/ffmpeg_decoder_impl.h b/talk/owt/sdk/base/win/ffmpeg_decoder_impl.h new file mode 100644 index 000000000..3bf2605ce --- /dev/null +++ b/talk/owt/sdk/base/win/ffmpeg_decoder_impl.h @@ -0,0 +1,102 @@ +// Copyright (C) <2023> Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef OWT_BASE_WIN_FFMPEG_DECODER_IMPL_H_ +#define OWT_BASE_WIN_FFMPEG_DECODER_IMPL_H_ + +#include + +#include +#include +#include + +#include "modules/video_coding/codecs/h265/include/h265_globals.h" + +extern "C" { +#include "third_party/ffmpeg/libavcodec/avcodec.h" +} // extern "C" + +#include "api/video_codecs/video_decoder.h" +#include "common_video/h265/h265_bitstream_parser.h" +#include "common_video/include/video_frame_buffer_pool.h" + +#include "talk/owt/sdk/include/cpp/owt/base/videorendererinterface.h" + +namespace webrtc { +class Clock; +} + +namespace owt { +namespace base { + +struct AVCodecContextDeleter { + void operator()(AVCodecContext* ptr) const { avcodec_free_context(&ptr); } +}; +struct AVFrameDeleter { + void operator()(AVFrame* ptr) const { av_frame_free(&ptr); } +}; + +class FFMpegDecoderImpl : public webrtc::VideoDecoder { + public: + FFMpegDecoderImpl(); + ~FFMpegDecoderImpl() override; + + bool Configure(const webrtc::VideoDecoder::Settings& settings) override; + int32_t Release() override; + + int32_t RegisterDecodeCompleteCallback( + webrtc::DecodedImageCallback* callback) override; + + // `missing_frames`, `fragmentation` and `render_time_ms` are ignored. + int32_t Decode(const webrtc::EncodedImage& input_image, + bool /*missing_frames*/, + int64_t render_time_ms = -1) override; + + const char* ImplementationName() const override; + + private: + // Called by FFmpeg when it needs a frame buffer to store decoded frames in. + // The `VideoFrame` returned by FFmpeg at `Decode` originate from here. Their + // buffers are reference counted and freed by FFmpeg using `AVFreeBuffer2`. + static int AVGetBuffer2(AVCodecContext* context, + AVFrame* av_frame, + int flags); + // Called by FFmpeg when it is done with a video frame, see `AVGetBuffer2`. + static void AVFreeBuffer2(void* opaque, uint8_t* data); + + void CreateStagingTextureIfNeeded(int width, int height); + + bool IsInitialized() const; + + // Reports statistics with histograms. + void ReportInit(); + void ReportError(); + + // Used by ffmpeg via `AVGetBuffer2()` to allocate I420/I444 images. + webrtc::VideoFrameBufferPool ffmpeg_buffer_pool_; + std::unique_ptr av_context_; + std::unique_ptr av_frame_; + CComPtr d3d11_device_; + CComPtr d3d11_device_context_; + CComPtr d3d11_video_device_; + CComPtr d3d11_video_context_; + CComQIPtr m_padapter_; + CComPtr m_pdxgi_factory_; + CComPtr staging_texture_; + CComPtr output_texture_; + std::unique_ptr surface_handle_; + + webrtc::DecodedImageCallback* decoded_image_callback_; + + bool has_reported_init_; + bool has_reported_error_; + + webrtc::H265BitstreamParser h265_bitstream_parser_; + webrtc::Clock* clock_ = nullptr; +}; + +} // namespace base +} // namespace owt + +#endif // OWT_BASE_WIN_FFMPEG_DECODER_IMPL_H_ \ No newline at end of file diff --git a/talk/owt/sdk/include/cpp/owt/base/globalconfiguration.h b/talk/owt/sdk/include/cpp/owt/base/globalconfiguration.h index 2b8dc29fa..15035918b 100644 --- a/talk/owt/sdk/include/cpp/owt/base/globalconfiguration.h +++ b/talk/owt/sdk/include/cpp/owt/base/globalconfiguration.h @@ -96,6 +96,14 @@ class OWT_EXPORT GlobalConfiguration { flex_fec_enabled_ = enabled; } + /** + @brief This enables decoding of HEVC range extensionb video. By default + disabled. When this is enabled, will use ffmpeg software decoder for HEVC. + */ + static void SetRangeExtensionEnabled(bool enabled) { + range_extension_enabled_ = enabled; + } + /** @brief Set the global bitrate limits applied to external BWE. If any of the value is set to 0, will use the stack default for that. @@ -311,6 +319,9 @@ class OWT_EXPORT GlobalConfiguration { } static bool flex_fec_enabled_; + static bool GetRangeExtensionEnabled() { return range_extension_enabled_; } + static bool range_extension_enabled_; + static void GetBweRateLimits(int& start_bitrate_kbps, int& min_bitrate_kbps, int& max_bitrate_kbps) { diff --git a/talk/owt/sdk/include/cpp/owt/base/videorendererinterface.h b/talk/owt/sdk/include/cpp/owt/base/videorendererinterface.h index a597108ec..d353e95ca 100755 --- a/talk/owt/sdk/include/cpp/owt/base/videorendererinterface.h +++ b/talk/owt/sdk/include/cpp/owt/base/videorendererinterface.h @@ -21,14 +21,15 @@ enum class VideoBufferType { kI420, kARGB, kD3D11, // Format self-described. + kI444, }; enum class VideoRendererType { kI420, kARGB, kD3D11, // Format self-described. + kI444, }; - #if defined(WEBRTC_WIN) struct OWT_EXPORT D3D11ImageHandle { ID3D11Device* d3d11_device;