From 0189c5d825a6754d88acb4fc2434d9faff03f63b Mon Sep 17 00:00:00 2001
From: Qiu Jianlin <jianlin.qiu@intel.com>
Date: Mon, 18 Sep 2023 16:45:33 +0800
Subject: [PATCH] Experiment support for decoding into YUV444P by SW FFMPEG
 decoders.

Application must call GlobalConfiguration::SetRangeExtensionEnabled(true)
to allow OWT switching to SW HEVC decoder.
By default range extension decoding of HEVC is disabled.
---
 talk/owt/BUILD.gn                             |   2 +
 talk/owt/sdk/base/globalconfiguration.cc      |   1 +
 .../base/peerconnectiondependencyfactory.cc   |   3 +
 talk/owt/sdk/base/webrtcvideorendererimpl.cc  |  30 +-
 talk/owt/sdk/base/win/d3d11_video_decoder.cc  |  33 +-
 .../base/win/externalvideodecoderfactory.cc   |  10 +-
 .../base/win/externalvideodecoderfactory.h    |   1 +
 talk/owt/sdk/base/win/ffmpeg_decoder_impl.cc  | 570 ++++++++++++++++++
 talk/owt/sdk/base/win/ffmpeg_decoder_impl.h   | 102 ++++
 .../cpp/owt/base/globalconfiguration.h        |  11 +
 .../cpp/owt/base/videorendererinterface.h     |   3 +-
 11 files changed, 760 insertions(+), 6 deletions(-)
 create mode 100644 talk/owt/sdk/base/win/ffmpeg_decoder_impl.cc
 create mode 100644 talk/owt/sdk/base/win/ffmpeg_decoder_impl.h

diff --git a/talk/owt/BUILD.gn b/talk/owt/BUILD.gn
index 22da5d5ab..98989d041 100644
--- a/talk/owt/BUILD.gn
+++ b/talk/owt/BUILD.gn
@@ -374,6 +374,8 @@ static_library("owt_sdk_base") {
       sources += [
         "sdk/base/win/d3d11_video_decoder.cc",
         "sdk/base/win/d3d11_video_decoder.h",
+        "sdk/base/win/ffmpeg_decoder_impl.cc",
+        "sdk/base/win/ffmpeg_decoder_impl.h",
       ]
     }
     defines += [ "OWT_USE_FFMPEG" ]
diff --git a/talk/owt/sdk/base/globalconfiguration.cc b/talk/owt/sdk/base/globalconfiguration.cc
index 299c79493..6cab14129 100644
--- a/talk/owt/sdk/base/globalconfiguration.cc
+++ b/talk/owt/sdk/base/globalconfiguration.cc
@@ -12,6 +12,7 @@ bool GlobalConfiguration::hardware_acceleration_enabled_ = true;
 ID3D11Device* GlobalConfiguration::d3d11_decoding_device_ = nullptr;
 #endif
 bool GlobalConfiguration::flex_fec_enabled_ = false;
+bool GlobalConfiguration::range_extension_enabled_ = false;
 int GlobalConfiguration::link_mtu_ = 0; // not set;
 int GlobalConfiguration::min_port_ = 0; // not set;
 int GlobalConfiguration::max_port_ = 0; // not set;
diff --git a/talk/owt/sdk/base/peerconnectiondependencyfactory.cc b/talk/owt/sdk/base/peerconnectiondependencyfactory.cc
index 6f26d32b2..af9a77d3d 100644
--- a/talk/owt/sdk/base/peerconnectiondependencyfactory.cc
+++ b/talk/owt/sdk/base/peerconnectiondependencyfactory.cc
@@ -149,6 +149,9 @@ void PeerConnectionDependencyFactory::
   if (GlobalConfiguration::GetFlexFecEnabled()) {
     field_trial_ += "OWT-FlexFEC/Enabled/";
   }
+  if (GlobalConfiguration::GetRangeExtensionEnabled()) {
+    field_trial_ += "OWT-RangeExtension/Enabled/";
+  }
   int delay_bwe_weight = GlobalConfiguration::GetDelayBasedBweWeight();
   field_trial_ +=
       "OWT-DelayBweWeight/" + std::to_string(delay_bwe_weight) + "/";
diff --git a/talk/owt/sdk/base/webrtcvideorendererimpl.cc b/talk/owt/sdk/base/webrtcvideorendererimpl.cc
index 00c55de16..d065bf081 100644
--- a/talk/owt/sdk/base/webrtcvideorendererimpl.cc
+++ b/talk/owt/sdk/base/webrtcvideorendererimpl.cc
@@ -9,6 +9,7 @@
 #include <d3d9.h>
 #include <dxva2api.h>
 #endif
+#include "rtc_base/logging.h"
 #include "talk/owt/sdk/base/nativehandlebuffer.h"
 #include "talk/owt/sdk/base/webrtcvideorendererimpl.h"
 #if defined(WEBRTC_WIN)
@@ -71,13 +72,18 @@ void WebrtcVideoRendererImpl::OnFrame(const webrtc::VideoFrame& frame) {
         (uint8_t*)render_ptr, resolution, VideoBufferType::kD3D11});
 
     renderer_.RenderFrame(std::move(video_buffer));
+    return;
 #else
     return;
 #endif
   }
+
+  // Non-native buffer. Only for I444 buffer and I444 renderer, we do a
+  // direct copy. Otherwise we convert to renderer type.
   VideoRendererType renderer_type = renderer_.Type();
   if (renderer_type != VideoRendererType::kI420 &&
-      renderer_type != VideoRendererType::kARGB)
+      renderer_type != VideoRendererType::kARGB &&
+      renderer_type != VideoRendererType::kI444)
     return;
   Resolution resolution(frame.width(), frame.height());
   if (renderer_type == VideoRendererType::kARGB) {
@@ -87,6 +93,28 @@ void WebrtcVideoRendererImpl::OnFrame(const webrtc::VideoFrame& frame) {
     std::unique_ptr<VideoBuffer> video_buffer(
         new VideoBuffer{buffer, resolution, VideoBufferType::kARGB});
     renderer_.RenderFrame(std::move(video_buffer));
+  } else if (renderer_type == VideoRendererType::kI444 &&
+             frame.video_frame_buffer()->type() ==
+                 webrtc::VideoFrameBuffer::Type::kI444) {
+    // Assume stride equals to width(might not stand here?)
+    uint8_t* buffer = new uint8_t[resolution.width * resolution.height * 3];
+    rtc::scoped_refptr<VideoFrameBuffer> frame_buffer =
+        frame.video_frame_buffer();
+    const webrtc::PlanarYuv8Buffer* planar_yuv_buffer =
+        reinterpret_cast<const webrtc::PlanarYuv8Buffer*>(
+            frame_buffer->GetI444());
+    size_t data_ptr = 0;
+    memcpy(buffer, planar_yuv_buffer->DataY(),
+           planar_yuv_buffer->StrideY() * planar_yuv_buffer->height());
+    data_ptr += planar_yuv_buffer->StrideY() * planar_yuv_buffer->height();
+    memcpy(buffer + data_ptr, planar_yuv_buffer->DataU(),
+           planar_yuv_buffer->StrideU() * planar_yuv_buffer->height());
+    data_ptr += planar_yuv_buffer->StrideU() * planar_yuv_buffer->height();
+    memcpy(buffer + data_ptr, planar_yuv_buffer->DataV(),
+           planar_yuv_buffer->StrideV() * planar_yuv_buffer->height());
+    std::unique_ptr<VideoBuffer> video_buffer(
+        new VideoBuffer{buffer, resolution, VideoBufferType::kI444});
+    renderer_.RenderFrame(std::move(video_buffer));
   } else {
     uint8_t* buffer = new uint8_t[resolution.width * resolution.height * 3 / 2];
     webrtc::ConvertFromI420(frame, webrtc::VideoType::kI420, 0,
diff --git a/talk/owt/sdk/base/win/d3d11_video_decoder.cc b/talk/owt/sdk/base/win/d3d11_video_decoder.cc
index 4df7d0849..6c1611a9e 100644
--- a/talk/owt/sdk/base/win/d3d11_video_decoder.cc
+++ b/talk/owt/sdk/base/win/d3d11_video_decoder.cc
@@ -310,11 +310,24 @@ int32_t D3D11VideoDecoder::Decode(const webrtc::EncodedImage& input_image,
       goto fail;
     }
 
+    if (!frame) {
+      RTC_LOG(LS_ERROR) << "Failed to decode current frame.";
+      goto fail;
+    }
+
+    int width = frame->width;
+    int height = frame->height;
+
+    VideoFrame* input_frame =
+        static_cast<VideoFrame*>(av_buffer_get_opaque(frame->buf[0]));
+    RTC_DCHECK(input_frame);
+
+    rtc::scoped_refptr<VideoFrameBuffer> frame_buffer =
+        input_frame->video_frame_buffer();
+
     // We get one frame from the decoder.
-    if (frame != nullptr && frame->format == hw_pix_fmt) {
+    if (frame->format != AV_PIX_FMT_YUV444P) {
       ID3D11Texture2D* texture = (ID3D11Texture2D*)frame->data[0];
-      int width = frame->width;
-      int height = frame->height;
       int index = (intptr_t)frame->data[1];
       D3D11_TEXTURE2D_DESC texture_desc;
 
@@ -380,6 +393,20 @@ int32_t D3D11VideoDecoder::Decode(const webrtc::EncodedImage& input_image,
 
       }
       av_frame_free(&frame);
+    } else {  // YUV444P. Which will be a software frame.
+      rtc::scoped_refptr<webrtc::VideoFrameBuffer> dst_buffer;
+      dst_buffer =
+          WrapI444Buffer(width, height, frame->data[0], frame->linesize[0],
+                         frame->data[1], frame->linesize[1], frame->data[2],
+                         frame->linesize[2], [frame_buffer] {});
+      webrtc::VideoFrame decoded_frame(dst_buffer, input_image.Timestamp(), 0,
+                                       webrtc::kVideoRotation_0);
+      decoded_frame.set_ntp_time_ms(input_image.ntp_time_ms_);
+      decoded_frame.set_timestamp(input_image.Timestamp());
+      if (decoded_image_callback_) {
+        decoded_image_callback_->Decoded(decoded_frame);
+      }
+      av_frame_free(&frame);
     }
   }
   return WEBRTC_VIDEO_CODEC_OK;
diff --git a/talk/owt/sdk/base/win/externalvideodecoderfactory.cc b/talk/owt/sdk/base/win/externalvideodecoderfactory.cc
index 61cb1db16..c606d3dc7 100644
--- a/talk/owt/sdk/base/win/externalvideodecoderfactory.cc
+++ b/talk/owt/sdk/base/win/externalvideodecoderfactory.cc
@@ -13,10 +13,12 @@
 #include "talk/owt/sdk/base/codecutils.h"
 #ifdef OWT_USE_FFMPEG
 #include "talk/owt/sdk/base/win/d3d11_video_decoder.h"
+#include "talk/owt/sdk/base/win/ffmpeg_decoder_impl.h"
 #endif
 #ifdef OWT_USE_MSDK
 #include "talk/owt/sdk/base/win/msdkvideodecoder.h"
 #endif
+#include "system_wrappers/include/field_trial.h"
 #include "webrtc/rtc_base/checks.h"
 #include "webrtc/rtc_base/logging.h"
 
@@ -24,6 +26,8 @@ namespace owt {
 namespace base {
 
 ExternalVideoDecoderFactory::ExternalVideoDecoderFactory(ID3D11Device* d3d11_device_external) {
+  range_extension_enabled_ =
+      webrtc::field_trial::IsEnabled("OWT-RangeExtension");
   supported_codec_types_.clear();
 
   bool is_vp8_hw_supported = false, is_vp9_hw_supported = false;
@@ -109,7 +113,11 @@ ExternalVideoDecoderFactory::CreateVideoDecoder(
   }
   if (vp8_hw || vp9_hw || h264_hw || h265_hw || av1_hw) {
 #if defined(OWT_USE_FFMPEG)
-    return owt::base::D3D11VideoDecoder::Create(cricket::VideoCodec(format));
+    if (range_extension_enabled_) {
+      return std::make_unique<FFMpegDecoderImpl>();
+    } else {
+      return owt::base::D3D11VideoDecoder::Create(cricket::VideoCodec(format));
+    }
 #endif
 #if defined(OWT_USE_MSDK)
     return owt::base::MSDKVideoDecoder::Create(cricket::VideoCodec(format));
diff --git a/talk/owt/sdk/base/win/externalvideodecoderfactory.h b/talk/owt/sdk/base/win/externalvideodecoderfactory.h
index 490170025..9be33146e 100644
--- a/talk/owt/sdk/base/win/externalvideodecoderfactory.h
+++ b/talk/owt/sdk/base/win/externalvideodecoderfactory.h
@@ -35,6 +35,7 @@ class ExternalVideoDecoderFactory : public webrtc::VideoDecoderFactory {
  private:
   std::vector<webrtc::VideoCodecType> supported_codec_types_;
   ID3D11Device* external_device_ = nullptr;
+  bool range_extension_enabled_ = false;
 };
 }  // namespace base
 }  // namespace owt
diff --git a/talk/owt/sdk/base/win/ffmpeg_decoder_impl.cc b/talk/owt/sdk/base/win/ffmpeg_decoder_impl.cc
new file mode 100644
index 000000000..d8f5080b2
--- /dev/null
+++ b/talk/owt/sdk/base/win/ffmpeg_decoder_impl.cc
@@ -0,0 +1,570 @@
+// Copyright (C) <2023> Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "talk/owt/sdk/base/win/ffmpeg_decoder_impl.h"
+
+#include <algorithm>
+#include <limits>
+#include <memory>
+
+extern "C" {
+#include "third_party/ffmpeg/libavcodec/avcodec.h"
+#include "third_party/ffmpeg/libavformat/avformat.h"
+#include "third_party/ffmpeg/libavutil/imgutils.h"
+}  // extern "C"
+
+#include "api/scoped_refptr.h"
+#include "api/video/color_space.h"
+#include "api/video/i420_buffer.h"
+#include "api/video/i444_buffer.h"
+#include "api/video/render_resolution.h"
+#include "api/video/video_frame.h"
+#include "common_video/include/video_frame_buffer.h"
+#include "modules/video_coding/include/video_error_codes.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+#include "system_wrappers/include/metrics.h"
+#include "third_party/libyuv/include/libyuv/convert.h"
+#include "webrtc/system_wrappers/include/clock.h"
+
+#include "talk/owt/sdk/base/nativehandlebuffer.h"
+
+namespace owt {
+namespace base {
+
+namespace {
+
+constexpr std::array<AVPixelFormat, 2> kPixelFormatsSupported = {
+    AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P};
+constexpr size_t kYPlaneIndex = 0;
+constexpr size_t kUPlaneIndex = 1;
+constexpr size_t kVPlaneIndex = 2;
+
+struct ScopedPtrAVFreePacket {
+  void operator()(AVPacket* packet) { av_packet_free(&packet); }
+};
+typedef std::unique_ptr<AVPacket, ScopedPtrAVFreePacket> ScopedAVPacket;
+
+ScopedAVPacket MakeScopedAVPacket() {
+  ScopedAVPacket packet(av_packet_alloc());
+  return packet;
+}
+
+}  // namespace
+
+int FFMpegDecoderImpl::AVGetBuffer2(AVCodecContext* context,
+                                    AVFrame* av_frame,
+                                    int flags) {
+  // Set in `Configure`.
+  FFMpegDecoderImpl* decoder = static_cast<FFMpegDecoderImpl*>(context->opaque);
+  // DCHECK values set in `Configure`.
+  RTC_DCHECK(decoder);
+  // Necessary capability to be allowed to provide our own buffers.
+  RTC_DCHECK(context->codec->capabilities | AV_CODEC_CAP_DR1);
+
+  auto pixelFormatSupported = std::find_if(
+      kPixelFormatsSupported.begin(), kPixelFormatsSupported.end(),
+      [context](AVPixelFormat format) { return context->pix_fmt == format; });
+
+  RTC_CHECK(pixelFormatSupported != kPixelFormatsSupported.end());
+
+  // `av_frame->width` and `av_frame->height` are set by FFmpeg. These are the
+  // actual image's dimensions and may be different from `context->width` and
+  // `context->coded_width` due to reordering.
+  int width = av_frame->width;
+  int height = av_frame->height;
+  // See `lowres`, if used the decoder scales the image by 1/2^(lowres). This
+  // has implications on which resolutions are valid, but we don't use it.
+  RTC_CHECK_EQ(context->lowres, 0);
+  // Adjust the `width` and `height` to values acceptable by the decoder.
+  // Without this, FFmpeg may overflow the buffer. If modified, `width` and/or
+  // `height` are larger than the actual image and the image has to be cropped
+  // (top-left corner) after decoding to avoid visible borders to the right and
+  // bottom of the actual image.
+  avcodec_align_dimensions(context, &width, &height);
+
+  RTC_CHECK_GE(width, 0);
+  RTC_CHECK_GE(height, 0);
+  int ret = av_image_check_size(static_cast<unsigned int>(width),
+                                static_cast<unsigned int>(height), 0, nullptr);
+  if (ret < 0) {
+    RTC_LOG(LS_ERROR) << "Invalid picture size " << width << "x" << height;
+    decoder->ReportError();
+    return ret;
+  }
+
+  // The video frame is stored in `frame_buffer`. `av_frame` is FFmpeg's version
+  // of a video frame and will be set up to reference `frame_buffer`'s data.
+  rtc::scoped_refptr<webrtc::PlanarYuvBuffer> frame_buffer;
+  rtc::scoped_refptr<webrtc::I444Buffer> i444_buffer;
+  rtc::scoped_refptr<webrtc::I420Buffer> i420_buffer;
+
+  // TODO:We only support 8bpp formats. If 10b/12b-444 is going to be supported,
+  // will need to update this.
+  int bytes_per_pixel = 1;
+  switch (context->pix_fmt) {
+    case AV_PIX_FMT_YUV420P:
+      i420_buffer =
+          decoder->ffmpeg_buffer_pool_.CreateI420Buffer(width, height);
+      // Set `av_frame` members as required by FFmpeg.
+      av_frame->data[kYPlaneIndex] = i420_buffer->MutableDataY();
+      av_frame->linesize[kYPlaneIndex] = i420_buffer->StrideY();
+      av_frame->data[kUPlaneIndex] = i420_buffer->MutableDataU();
+      av_frame->linesize[kUPlaneIndex] = i420_buffer->StrideU();
+      av_frame->data[kVPlaneIndex] = i420_buffer->MutableDataV();
+      av_frame->linesize[kVPlaneIndex] = i420_buffer->StrideV();
+      RTC_DCHECK_EQ(av_frame->extended_data, av_frame->data);
+      frame_buffer = i420_buffer;
+      break;
+    case AV_PIX_FMT_YUV444P:
+      i444_buffer =
+          decoder->ffmpeg_buffer_pool_.CreateI444Buffer(width, height);
+      // Set `av_frame` members as required by FFmpeg.
+      av_frame->data[kYPlaneIndex] = i444_buffer->MutableDataY();
+      av_frame->linesize[kYPlaneIndex] = i444_buffer->StrideY();
+      av_frame->data[kUPlaneIndex] = i444_buffer->MutableDataU();
+      av_frame->linesize[kUPlaneIndex] = i444_buffer->StrideU();
+      av_frame->data[kVPlaneIndex] = i444_buffer->MutableDataV();
+      av_frame->linesize[kVPlaneIndex] = i444_buffer->StrideV();
+      frame_buffer = i444_buffer;
+      break;
+    default:
+      RTC_LOG(LS_ERROR) << "Unsupported buffer type " << context->pix_fmt
+                        << ". Check supported supported pixel formats!";
+      decoder->ReportError();
+      return -1;
+  }
+
+  int y_size = width * height * bytes_per_pixel;
+  int uv_size = frame_buffer->ChromaWidth() * frame_buffer->ChromaHeight() *
+                bytes_per_pixel;
+  // DCHECK that we have a continuous buffer as is required.
+  RTC_DCHECK_EQ(av_frame->data[kUPlaneIndex],
+                av_frame->data[kYPlaneIndex] + y_size);
+  RTC_DCHECK_EQ(av_frame->data[kVPlaneIndex],
+                av_frame->data[kUPlaneIndex] + uv_size);
+  int total_size = y_size + 2 * uv_size;
+
+  av_frame->format = context->pix_fmt;
+  av_frame->reordered_opaque = context->reordered_opaque;
+
+  // Create a VideoFrame object, to keep a reference to the buffer.
+  av_frame->buf[0] = av_buffer_create(
+      av_frame->data[kYPlaneIndex], total_size, AVFreeBuffer2,
+      static_cast<void*>(std::make_unique<webrtc::VideoFrame>(
+                             webrtc::VideoFrame::Builder()
+                                 .set_video_frame_buffer(frame_buffer)
+                                 .set_rotation(webrtc::kVideoRotation_0)
+                                 .set_timestamp_us(0)
+                                 .build())
+                             .release()),
+      0);
+  RTC_CHECK(av_frame->buf[0]);
+  return 0;
+}
+
+void FFMpegDecoderImpl::AVFreeBuffer2(void* opaque, uint8_t* data) {
+  // The buffer pool recycles the buffer used by `video_frame` when there are no
+  // more references to it. `video_frame` is a thin buffer holder and is not
+  // recycled.
+  webrtc::VideoFrame* video_frame = static_cast<webrtc::VideoFrame*>(opaque);
+  delete video_frame;
+}
+
+FFMpegDecoderImpl::FFMpegDecoderImpl()
+    : ffmpeg_buffer_pool_(true),
+      decoded_image_callback_(nullptr),
+      has_reported_init_(false),
+      has_reported_error_(false),
+      clock_(webrtc::Clock::GetRealTimeClock())  {
+  surface_handle_.reset(new D3D11VAHandle());
+}
+
+FFMpegDecoderImpl::~FFMpegDecoderImpl() {
+  Release();
+}
+
+bool FFMpegDecoderImpl::Configure(
+    const webrtc::VideoDecoder::Settings& settings) {
+  ReportInit();
+  if (settings.codec_type() != webrtc::kVideoCodecH265) {
+    RTC_LOG(LS_ERROR) << "FFmpegDecoder only supports H265 codec.";
+    ReportError();
+    return false;
+  }
+
+  // Release necessary in case of re-initializing.
+  int32_t ret = Release();
+  if (ret != WEBRTC_VIDEO_CODEC_OK) {
+    ReportError();
+    return false;
+  }
+  RTC_DCHECK(!av_context_);
+
+  // Initialize AVCodecContext.
+  av_context_.reset(avcodec_alloc_context3(nullptr));
+
+  av_context_->codec_type = AVMEDIA_TYPE_VIDEO;
+  av_context_->codec_id = AV_CODEC_ID_H265;
+  const webrtc::RenderResolution& resolution = settings.max_render_resolution();
+  if (resolution.Valid()) {
+    av_context_->coded_width = resolution.Width();
+    av_context_->coded_height = resolution.Height();
+  }
+  av_context_->extradata = nullptr;
+  av_context_->extradata_size = 0;
+
+  // If this is ever increased, look at `av_context_->thread_safe_callbacks` and
+  // make it possible to disable the thread checker in the frame buffer pool.
+  av_context_->thread_count = 1;
+  av_context_->thread_type = FF_THREAD_SLICE;
+
+  // Function used by FFmpeg to get buffers to store decoded frames in.
+  av_context_->get_buffer2 = AVGetBuffer2;
+  // `get_buffer2` is called with the context, there `opaque` can be used to get
+  // a pointer `this`.
+  av_context_->opaque = this;
+
+  const AVCodec* codec = avcodec_find_decoder(av_context_->codec_id);
+  if (!codec) {
+    // This is an indication that FFmpeg has not been initialized or it has not
+    // been compiled/initialized with the correct set of codecs.
+    RTC_LOG(LS_ERROR) << "FFmpeg H.265 decoder not found.";
+    Release();
+    ReportError();
+    return false;
+  }
+  int res = avcodec_open2(av_context_.get(), codec, nullptr);
+  if (res < 0) {
+    RTC_LOG(LS_ERROR) << "avcodec_open2 error: " << res;
+    Release();
+    ReportError();
+    return false;
+  }
+
+  av_frame_.reset(av_frame_alloc());
+
+  if (absl::optional<int> buffer_pool_size = settings.buffer_pool_size()) {
+    if (!ffmpeg_buffer_pool_.Resize(*buffer_pool_size)) {
+      return false;
+    }
+  }
+  HRESULT hr;
+  UINT creation_flags = D3D11_CREATE_DEVICE_VIDEO_SUPPORT;
+  m_padapter_ = nullptr;
+  static D3D_FEATURE_LEVEL feature_levels[] = {
+      D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1,
+      D3D_FEATURE_LEVEL_10_1 };
+
+  D3D_FEATURE_LEVEL feature_levels_out;
+
+  hr = D3D11CreateDevice(
+      nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, creation_flags, feature_levels,
+      sizeof(feature_levels) / sizeof(feature_levels[0]), D3D11_SDK_VERSION,
+      &d3d11_device_, &feature_levels_out, &d3d11_device_context_);
+  if (FAILED(hr)) {
+    RTC_LOG(LS_ERROR) << "Failed to create D3D11 device for decode output.";
+    return false;
+  }
+
+  if (d3d11_device_) {
+    hr = d3d11_device_->QueryInterface(__uuidof(ID3D11VideoDevice),
+        (void**)&d3d11_video_device_);
+    if (FAILED(hr)) {
+      RTC_LOG(LS_ERROR) << "Failed to get video device from D3D11 device.";
+      return false;
+    }
+  }
+  if (d3d11_device_context_) {
+    hr = d3d11_device_context_->QueryInterface(__uuidof(ID3D11VideoContext),
+        (void**)&d3d11_video_context_);
+    if (FAILED(hr)) {
+      RTC_LOG(LS_ERROR) << "Failed to get video context from D3D11 device context.";
+      return false;
+    }
+  }
+  return true;
+}
+
+int32_t FFMpegDecoderImpl::Release() {
+  av_context_.reset();
+  av_frame_.reset();
+  return WEBRTC_VIDEO_CODEC_OK;
+}
+
+int32_t FFMpegDecoderImpl::RegisterDecodeCompleteCallback(
+    webrtc::DecodedImageCallback* callback) {
+  decoded_image_callback_ = callback;
+  return WEBRTC_VIDEO_CODEC_OK;
+}
+
+void FFMpegDecoderImpl::CreateStagingTextureIfNeeded(int width, int height) {
+  HRESULT hr = S_OK;
+  D3D11_TEXTURE2D_DESC desc = {0};
+  if (staging_texture_) {
+    D3D11_TEXTURE2D_DESC desc = {0};
+    staging_texture_->GetDesc(&desc);
+    if (desc.Width != (unsigned int)width ||
+        desc.Height != (unsigned int)height) {
+      staging_texture_.Release();
+    } else {
+      goto output;
+    }
+  }
+
+  desc.Width = (unsigned int)width;
+  desc.Height = (unsigned int)height;
+  desc.MipLevels = 1;
+  desc.ArraySize = 1;
+  desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
+  desc.SampleDesc.Count = 1;
+  desc.SampleDesc.Quality = 0;
+  desc.Usage = D3D11_USAGE_STAGING;
+  desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE | D3D11_CPU_ACCESS_READ;
+  desc.MiscFlags = 0;
+  desc.BindFlags = 0;
+
+  d3d11_device_->CreateTexture2D(&desc, nullptr, &staging_texture_);
+
+output:
+  D3D11_TEXTURE2D_DESC output_desc = {0};
+  staging_texture_->GetDesc(&output_desc);
+  if (output_texture_) {
+    D3D11_TEXTURE2D_DESC orig_desc = {0};
+    output_texture_->GetDesc(&orig_desc);
+    if (orig_desc.Width != (unsigned int)width ||
+        orig_desc.Height != (unsigned int)height) {
+      output_texture_.Release();
+    } else {
+      return;
+    }
+  }
+  output_desc.Usage = D3D11_USAGE_DEFAULT;
+  output_desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED;
+  output_desc.BindFlags = D3D11_BIND_RENDER_TARGET;
+  d3d11_device_->CreateTexture2D(&output_desc, nullptr, &output_texture_);
+
+  return;
+}
+
+int32_t FFMpegDecoderImpl::Decode(const webrtc::EncodedImage& input_image,
+                                  bool /*missing_frames*/,
+                                  int64_t /*render_time_ms*/) {
+  if (!IsInitialized()) {
+    ReportError();
+    return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
+  }
+  if (!decoded_image_callback_) {
+    RTC_LOG(LS_WARNING)
+        << "Configure() has been called, but a callback function "
+           "has not been set with RegisterDecodeCompleteCallback()";
+    ReportError();
+    return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
+  }
+  if (!input_image.data() || !input_image.size()) {
+    ReportError();
+    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+  }
+
+  int64_t decode_start_time = clock_->CurrentTime().ms_or(0);
+
+  ScopedAVPacket packet = MakeScopedAVPacket();
+  if (!packet) {
+    ReportError();
+    return WEBRTC_VIDEO_CODEC_ERROR;
+  }
+  // packet.data has a non-const type, but isn't modified by
+  // avcodec_send_packet.
+  packet->data = const_cast<uint8_t*>(input_image.data());
+  if (input_image.size() >
+      static_cast<size_t>(std::numeric_limits<int>::max())) {
+    ReportError();
+    return WEBRTC_VIDEO_CODEC_ERROR;
+  }
+  packet->size = static_cast<int>(input_image.size());
+  int64_t frame_timestamp_us = input_image.ntp_time_ms_ * 1000;  // ms -> μs
+  av_context_->reordered_opaque = frame_timestamp_us;
+
+  int result = avcodec_send_packet(av_context_.get(), packet.get());
+
+  if (result < 0) {
+    RTC_LOG(LS_ERROR) << "avcodec_send_packet error: " << result;
+    ReportError();
+    return WEBRTC_VIDEO_CODEC_ERROR;
+  }
+
+  result = avcodec_receive_frame(av_context_.get(), av_frame_.get());
+  if (result < 0) {
+    RTC_LOG(LS_ERROR) << "avcodec_receive_frame error: " << result;
+    ReportError();
+    return WEBRTC_VIDEO_CODEC_ERROR;
+  }
+
+  // We don't expect reordering. Decoded frame timestamp should match
+  // the input one.
+  RTC_DCHECK_EQ(av_frame_->reordered_opaque, frame_timestamp_us);
+
+  // Maybe it is possible to get QP directly from FFmpeg.
+  h265_bitstream_parser_.ParseBitstream(input_image);
+  absl::optional<int> qp = h265_bitstream_parser_.GetLastSliceQp();
+
+  // Obtain the `video_frame` containing the decoded image.
+  webrtc::VideoFrame* input_frame =
+      static_cast<webrtc::VideoFrame*>(av_buffer_get_opaque(av_frame_->buf[0]));
+  RTC_DCHECK(input_frame);
+  rtc::scoped_refptr<webrtc::VideoFrameBuffer> frame_buffer =
+      input_frame->video_frame_buffer();
+
+  // Instantiate Planar YUV buffer according to video frame buffer type
+  const webrtc::PlanarYuvBuffer* planar_yuv_buffer = nullptr;
+  const webrtc::PlanarYuv8Buffer* planar_yuv8_buffer = nullptr;
+  const webrtc::PlanarYuv16BBuffer* planar_yuv16_buffer = nullptr;
+  webrtc::VideoFrameBuffer::Type video_frame_buffer_type = frame_buffer->type();
+  switch (video_frame_buffer_type) {
+    case webrtc::VideoFrameBuffer::Type::kI420:
+      planar_yuv_buffer = frame_buffer->GetI420();
+      planar_yuv8_buffer =
+          reinterpret_cast<const webrtc::PlanarYuv8Buffer*>(planar_yuv_buffer);
+      break;
+    case webrtc::VideoFrameBuffer::Type::kI444:
+      planar_yuv_buffer = frame_buffer->GetI444();
+      planar_yuv8_buffer =
+          reinterpret_cast<const webrtc::PlanarYuv8Buffer*>(planar_yuv_buffer);
+      break;
+    default:
+      RTC_LOG(LS_ERROR) << "frame_buffer type: "
+                        << static_cast<int32_t>(video_frame_buffer_type)
+                        << " is not supported!";
+      ReportError();
+      return WEBRTC_VIDEO_CODEC_ERROR;
+  }
+
+  // When needed, FFmpeg applies cropping by moving plane pointers and adjusting
+  // frame width/height. Ensure that cropped buffers lie within the allocated
+  // memory.
+  RTC_DCHECK_LE(av_frame_->width, planar_yuv_buffer->width());
+  RTC_DCHECK_LE(av_frame_->height, planar_yuv_buffer->height());
+  switch (video_frame_buffer_type) {
+    case webrtc::VideoFrameBuffer::Type::kI420:
+    case webrtc::VideoFrameBuffer::Type::kI444: {
+      RTC_DCHECK_GE(av_frame_->data[kYPlaneIndex], planar_yuv8_buffer->DataY());
+      RTC_DCHECK_LE(
+          av_frame_->data[kYPlaneIndex] +
+              av_frame_->linesize[kYPlaneIndex] * av_frame_->height,
+          planar_yuv8_buffer->DataY() +
+              planar_yuv8_buffer->StrideY() * planar_yuv8_buffer->height());
+      RTC_DCHECK_GE(av_frame_->data[kUPlaneIndex], planar_yuv8_buffer->DataU());
+      RTC_DCHECK_LE(
+          av_frame_->data[kUPlaneIndex] +
+              av_frame_->linesize[kUPlaneIndex] *
+                  planar_yuv8_buffer->ChromaHeight(),
+          planar_yuv8_buffer->DataU() + planar_yuv8_buffer->StrideU() *
+                                            planar_yuv8_buffer->ChromaHeight());
+      RTC_DCHECK_GE(av_frame_->data[kVPlaneIndex], planar_yuv8_buffer->DataV());
+      RTC_DCHECK_LE(
+          av_frame_->data[kVPlaneIndex] +
+              av_frame_->linesize[kVPlaneIndex] *
+                  planar_yuv8_buffer->ChromaHeight(),
+          planar_yuv8_buffer->DataV() + planar_yuv8_buffer->StrideV() *
+                                            planar_yuv8_buffer->ChromaHeight());
+      break;
+    }
+    default:
+      RTC_LOG(LS_ERROR) << "frame_buffer type: "
+                        << static_cast<int32_t>(video_frame_buffer_type)
+                        << " is not supported!";
+      ReportError();
+      return WEBRTC_VIDEO_CODEC_ERROR;
+  }
+
+  CreateStagingTextureIfNeeded(av_frame_->width, av_frame_->height);
+  D3D11_MAPPED_SUBRESOURCE sub_resource = {0};
+  HRESULT hr = d3d11_device_context_->Map(staging_texture_, 0,  D3D11_MAP_READ_WRITE, 0, &sub_resource);
+  if (FAILED(hr)) {
+    RTC_LOG(LS_ERROR) << "Failed to map texture.";
+    return WEBRTC_VIDEO_CODEC_ERROR;
+  }
+
+  switch (video_frame_buffer_type) {
+    case webrtc::VideoFrameBuffer::Type::kI444:
+      libyuv::I444ToARGB(av_frame_->data[kYPlaneIndex], av_frame_->linesize[kYPlaneIndex],
+                     av_frame_->data[kUPlaneIndex], av_frame_->linesize[kUPlaneIndex],
+                     av_frame_->data[kVPlaneIndex], av_frame_->linesize[kVPlaneIndex],
+                     static_cast<uint8_t*>(sub_resource.pData),
+                     sub_resource.RowPitch,
+                     av_frame_->width,
+                     av_frame_->height);
+      break;
+    case webrtc::VideoFrameBuffer::Type::kI420:
+      libyuv::I420ToARGB(av_frame_->data[kYPlaneIndex], av_frame_->linesize[kYPlaneIndex],
+                     av_frame_->data[kUPlaneIndex], av_frame_->linesize[kUPlaneIndex],
+                     av_frame_->data[kVPlaneIndex], av_frame_->linesize[kVPlaneIndex],
+                     static_cast<uint8_t*>(sub_resource.pData),
+                     sub_resource.RowPitch,
+                     av_frame_->width,
+                     av_frame_->height);
+      break;
+    default:
+      RTC_LOG(LS_ERROR) << "frame_buffer type: "
+                        << static_cast<int32_t>(video_frame_buffer_type)
+                        << " conversion is not supported!";
+      ReportError();
+      return WEBRTC_VIDEO_CODEC_ERROR;
+  }
+  d3d11_device_context_->Unmap(staging_texture_, 0);
+
+  d3d11_device_context_->CopyResource(output_texture_, staging_texture_);
+
+  surface_handle_->texture = output_texture_.p;
+  surface_handle_->d3d11_device = d3d11_device_.p;
+  surface_handle_->d3d11_video_device = d3d11_video_device_.p;
+  surface_handle_->context = d3d11_video_context_.p;
+  surface_handle_->array_index = 0;
+  surface_handle_->side_data_size = 0;
+  surface_handle_->cursor_data_size = 0;
+  surface_handle_->decode_start = decode_start_time;
+  surface_handle_->decode_end = clock_->CurrentTime().ms_or(0);
+  surface_handle_->start_duration =
+            input_image.bwe_stats_.start_duration_;
+  surface_handle_->last_duration = input_image.bwe_stats_.last_duration_;
+  surface_handle_->packet_loss = input_image.bwe_stats_.packets_lost_;
+  surface_handle_->frame_size = input_image.size();
+  rtc::scoped_refptr<owt::base::NativeHandleBuffer> buffer =
+            rtc::make_ref_counted<owt::base::NativeHandleBuffer>(
+                (void*)surface_handle_.get(), av_frame_->width, av_frame_->height);
+  webrtc::VideoFrame decoded_frame(buffer, input_image.Timestamp(), 0,
+          webrtc::kVideoRotation_0);
+  decoded_frame.set_ntp_time_ms(input_image.ntp_time_ms_);
+  decoded_frame.set_timestamp(input_image.Timestamp());
+  decoded_image_callback_->Decoded(decoded_frame, absl::nullopt, qp);
+
+  // Stop referencing it, possibly freeing `input_frame`.
+  av_frame_unref(av_frame_.get());
+  input_frame = nullptr;
+
+  return WEBRTC_VIDEO_CODEC_OK;
+}
+
+const char* FFMpegDecoderImpl::ImplementationName() const {
+  return "FFmpegDecoder";
+}
+
+bool FFMpegDecoderImpl::IsInitialized() const {
+  return av_context_ != nullptr;
+}
+
+void FFMpegDecoderImpl::ReportInit() {
+  if (has_reported_init_)
+    return;
+  has_reported_init_ = true;
+}
+
+void FFMpegDecoderImpl::ReportError() {
+  if (has_reported_error_)
+    return;
+  has_reported_error_ = true;
+}
+
+}  // namespace base
+}  // namespace owt
diff --git a/talk/owt/sdk/base/win/ffmpeg_decoder_impl.h b/talk/owt/sdk/base/win/ffmpeg_decoder_impl.h
new file mode 100644
index 000000000..3bf2605ce
--- /dev/null
+++ b/talk/owt/sdk/base/win/ffmpeg_decoder_impl.h
@@ -0,0 +1,102 @@
+// Copyright (C) <2023> Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef OWT_BASE_WIN_FFMPEG_DECODER_IMPL_H_
+#define OWT_BASE_WIN_FFMPEG_DECODER_IMPL_H_
+
+#include <memory>
+
+#include <atlbase.h>
+#include <d3d11.h>
+#include <dxgi1_2.h>
+
+#include "modules/video_coding/codecs/h265/include/h265_globals.h"
+
+extern "C" {
+#include "third_party/ffmpeg/libavcodec/avcodec.h"
+}  // extern "C"
+
+#include "api/video_codecs/video_decoder.h"
+#include "common_video/h265/h265_bitstream_parser.h"
+#include "common_video/include/video_frame_buffer_pool.h"
+
+#include "talk/owt/sdk/include/cpp/owt/base/videorendererinterface.h"
+
+namespace webrtc {
+class Clock;
+}
+
+namespace owt {
+namespace base {
+
+struct AVCodecContextDeleter {
+  void operator()(AVCodecContext* ptr) const { avcodec_free_context(&ptr); }
+};
+struct AVFrameDeleter {
+  void operator()(AVFrame* ptr) const { av_frame_free(&ptr); }
+};
+
+class FFMpegDecoderImpl : public webrtc::VideoDecoder {
+ public:
+  FFMpegDecoderImpl();
+  ~FFMpegDecoderImpl() override;
+
+  bool Configure(const webrtc::VideoDecoder::Settings& settings) override;
+  int32_t Release() override;
+
+  int32_t RegisterDecodeCompleteCallback(
+      webrtc::DecodedImageCallback* callback) override;
+
+  // `missing_frames`, `fragmentation` and `render_time_ms` are ignored.
+  int32_t Decode(const webrtc::EncodedImage& input_image,
+                 bool /*missing_frames*/,
+                 int64_t render_time_ms = -1) override;
+
+  const char* ImplementationName() const override;
+
+ private:
+  // Called by FFmpeg when it needs a frame buffer to store decoded frames in.
+  // The `VideoFrame` returned by FFmpeg at `Decode` originate from here. Their
+  // buffers are reference counted and freed by FFmpeg using `AVFreeBuffer2`.
+  static int AVGetBuffer2(AVCodecContext* context,
+                          AVFrame* av_frame,
+                          int flags);
+  // Called by FFmpeg when it is done with a video frame, see `AVGetBuffer2`.
+  static void AVFreeBuffer2(void* opaque, uint8_t* data);
+
+  void CreateStagingTextureIfNeeded(int width, int height);
+
+  bool IsInitialized() const;
+
+  // Reports statistics with histograms.
+  void ReportInit();
+  void ReportError();
+
+  // Used by ffmpeg via `AVGetBuffer2()` to allocate I420/I444 images.
+  webrtc::VideoFrameBufferPool ffmpeg_buffer_pool_;
+  std::unique_ptr<AVCodecContext, AVCodecContextDeleter> av_context_;
+  std::unique_ptr<AVFrame, AVFrameDeleter> av_frame_;
+  CComPtr<ID3D11Device> d3d11_device_;
+  CComPtr<ID3D11DeviceContext> d3d11_device_context_;
+  CComPtr<ID3D11VideoDevice> d3d11_video_device_;
+  CComPtr<ID3D11VideoContext> d3d11_video_context_;
+  CComQIPtr<IDXGIAdapter> m_padapter_;
+  CComPtr<IDXGIFactory2> m_pdxgi_factory_;
+  CComPtr<ID3D11Texture2D> staging_texture_;
+  CComPtr<ID3D11Texture2D> output_texture_;
+  std::unique_ptr<D3D11VAHandle> surface_handle_;
+
+  webrtc::DecodedImageCallback* decoded_image_callback_;
+
+  bool has_reported_init_;
+  bool has_reported_error_;
+
+  webrtc::H265BitstreamParser h265_bitstream_parser_;
+  webrtc::Clock* clock_ = nullptr;
+};
+
+}  // namespace base
+}  // namespace owt
+
+#endif  // OWT_BASE_WIN_FFMPEG_DECODER_IMPL_H_
\ No newline at end of file
diff --git a/talk/owt/sdk/include/cpp/owt/base/globalconfiguration.h b/talk/owt/sdk/include/cpp/owt/base/globalconfiguration.h
index 2b8dc29fa..15035918b 100644
--- a/talk/owt/sdk/include/cpp/owt/base/globalconfiguration.h
+++ b/talk/owt/sdk/include/cpp/owt/base/globalconfiguration.h
@@ -96,6 +96,14 @@ class OWT_EXPORT GlobalConfiguration {
     flex_fec_enabled_ = enabled;
   }
 
+  /**
+   @brief This enables decoding of HEVC range extensionb video. By default
+   disabled. When this is enabled, will use ffmpeg software decoder for HEVC.
+  */
+  static void SetRangeExtensionEnabled(bool enabled) {
+    range_extension_enabled_ = enabled;
+  }
+
   /**
    @brief Set the global bitrate limits applied to external BWE. If any of the
    value is set to 0, will use the stack default for that.
@@ -311,6 +319,9 @@ class OWT_EXPORT GlobalConfiguration {
   }
   static bool flex_fec_enabled_;
 
+  static bool GetRangeExtensionEnabled() { return range_extension_enabled_; }
+  static bool range_extension_enabled_;
+
   static void GetBweRateLimits(int& start_bitrate_kbps,
                                int& min_bitrate_kbps,
                                int& max_bitrate_kbps) {
diff --git a/talk/owt/sdk/include/cpp/owt/base/videorendererinterface.h b/talk/owt/sdk/include/cpp/owt/base/videorendererinterface.h
index a597108ec..d353e95ca 100755
--- a/talk/owt/sdk/include/cpp/owt/base/videorendererinterface.h
+++ b/talk/owt/sdk/include/cpp/owt/base/videorendererinterface.h
@@ -21,14 +21,15 @@ enum class VideoBufferType {
   kI420,
   kARGB,
   kD3D11,  // Format self-described.
+  kI444,
 };
 enum class VideoRendererType {
   kI420,
   kARGB,
   kD3D11,  // Format self-described.
+  kI444,
 };
 
-
 #if defined(WEBRTC_WIN)
 struct OWT_EXPORT D3D11ImageHandle {
   ID3D11Device* d3d11_device;