Skip to content
Open
2 changes: 1 addition & 1 deletion src/nvenc/nvenc_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ namespace nvenc {
};

auto buffer_is_yuv444 = [&]() {
return buffer_format == NV_ENC_BUFFER_FORMAT_AYUV || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
return buffer_format == NV_ENC_BUFFER_FORMAT_AYUV || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444 || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
};

{
Expand Down
3 changes: 3 additions & 0 deletions src/nvenc/nvenc_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ namespace nvenc {
case platf::pix_fmt_e::ayuv:
return NV_ENC_BUFFER_FORMAT_AYUV;

case platf::pix_fmt_e::yuv444p:
return NV_ENC_BUFFER_FORMAT_YUV444;

case platf::pix_fmt_e::yuv444p16:
return NV_ENC_BUFFER_FORMAT_YUV444_10BIT;

Expand Down
64 changes: 35 additions & 29 deletions src/nvhttp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,39 @@ namespace nvhttp {
return true;
}

uint32_t get_codec_mode_flags() {
uint32_t codec_mode_flags = SCM_H264;
if (video::last_encoder_probe_supported_yuv444_for_codec[0]) {
codec_mode_flags |= SCM_H264_HIGH8_444;
}
if (video::active_hevc_mode >= 2) {
codec_mode_flags |= SCM_HEVC;
if (video::last_encoder_probe_supported_yuv444_for_codec[1]) {
codec_mode_flags |= SCM_HEVC_REXT8_444;
}
}
if (video::active_hevc_mode == 3 || video::active_hevc_mode == 5) {
codec_mode_flags |= SCM_HEVC_MAIN10;
}
if ((video::active_hevc_mode == 4 || video::active_hevc_mode == 5) && video::last_encoder_probe_supported_yuv444_for_codec[1]) {
codec_mode_flags |= SCM_HEVC_REXT10_444;
}

if (video::active_av1_mode >= 2) {
codec_mode_flags |= SCM_AV1_MAIN8;
if (video::last_encoder_probe_supported_yuv444_for_codec[2]) {
codec_mode_flags |= SCM_AV1_HIGH8_444;
}
}
if (video::active_av1_mode == 3 || video::active_av1_mode == 5) {
codec_mode_flags |= SCM_AV1_MAIN10;
}
if ((video::active_av1_mode == 4 || video::active_av1_mode == 5) && video::last_encoder_probe_supported_yuv444_for_codec[2]) {
codec_mode_flags |= SCM_AV1_HIGH10_444;
}
return codec_mode_flags;
}

template<class T>
void serverinfo(std::shared_ptr<typename SimpleWeb::ServerBase<T>::Response> response, std::shared_ptr<typename SimpleWeb::ServerBase<T>::Request> request) {
print_req<T>(request);
Expand Down Expand Up @@ -735,34 +768,7 @@ namespace nvhttp {
tree.put("root.LocalIP", net::addr_to_normalized_string(local_endpoint.address()));
}

uint32_t codec_mode_flags = SCM_H264;
if (video::last_encoder_probe_supported_yuv444_for_codec[0]) {
codec_mode_flags |= SCM_H264_HIGH8_444;
}
if (video::active_hevc_mode >= 2) {
codec_mode_flags |= SCM_HEVC;
if (video::last_encoder_probe_supported_yuv444_for_codec[1]) {
codec_mode_flags |= SCM_HEVC_REXT8_444;
}
}
if (video::active_hevc_mode >= 3) {
codec_mode_flags |= SCM_HEVC_MAIN10;
if (video::last_encoder_probe_supported_yuv444_for_codec[1]) {
codec_mode_flags |= SCM_HEVC_REXT10_444;
}
}
if (video::active_av1_mode >= 2) {
codec_mode_flags |= SCM_AV1_MAIN8;
if (video::last_encoder_probe_supported_yuv444_for_codec[2]) {
codec_mode_flags |= SCM_AV1_HIGH8_444;
}
}
if (video::active_av1_mode >= 3) {
codec_mode_flags |= SCM_AV1_MAIN10;
if (video::last_encoder_probe_supported_yuv444_for_codec[2]) {
codec_mode_flags |= SCM_AV1_HIGH10_444;
}
}
const uint32_t codec_mode_flags = get_codec_mode_flags();
tree.put("root.ServerCodecModeSupport", codec_mode_flags);

if (!config::nvhttp.external_ip.empty()) {
Expand Down Expand Up @@ -815,7 +821,7 @@ namespace nvhttp {
for (auto &proc : proc::proc.get_apps()) {
pt::ptree app;

app.put("IsHdrSupported"s, video::active_hevc_mode == 3 ? 1 : 0);
app.put("IsHdrSupported"s, video::active_hevc_mode >= 3 ? 1 : 0);
app.put("AppTitle"s, proc.name);
app.put("ID", proc.id);

Expand Down
2 changes: 2 additions & 0 deletions src/platform/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ namespace platf {
p010, ///< P010
ayuv, ///< AYUV
yuv444p16, ///< Planar 10-bit (shifted to 16-bit) YUV 4:4:4
yuv444p, ///< Planar 8-bit YUV 4:4:4
y410, ///< Y410
unknown ///< Unknown
};
Expand All @@ -259,6 +260,7 @@ namespace platf {
_CONVERT(p010);
_CONVERT(ayuv);
_CONVERT(yuv444p16);
_CONVERT(yuv444p);
_CONVERT(y410);
_CONVERT(unknown);
}
Expand Down
152 changes: 113 additions & 39 deletions src/platform/linux/cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,10 @@ namespace cuda {
this->frame = frame;

auto hwframe_ctx = (AVHWFramesContext *) hw_frames_ctx->data;
if (hwframe_ctx->sw_format != AV_PIX_FMT_NV12) {
BOOST_LOG(error) << "cuda::cuda_t doesn't support any format other than AV_PIX_FMT_NV12"sv;

if (hwframe_ctx->sw_format != AV_PIX_FMT_NV12 &&
hwframe_ctx->sw_format != AV_PIX_FMT_YUV444P) {
BOOST_LOG(error) << "cuda::cuda_t doesn't support any format other than AV_PIX_FMT_NV12 and AV_PIX_FMT_YUV444P"sv;
return -1;
}

Expand All @@ -132,6 +134,8 @@ namespace cuda {
}
}

is_yuv444 = (hwframe_ctx->sw_format == AV_PIX_FMT_YUV444P);

auto cuda_ctx = (AVCUDADeviceContext *) hwframe_ctx->device_ctx->hwctx;

stream = make_stream();
Expand Down Expand Up @@ -178,7 +182,11 @@ namespace cuda {
return;
}

sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture.linear, stream.get(), {frame->width, frame->height, 0, 0});
if (is_yuv444) {
sws.convert_yuv444(frame->data[0], frame->data[1], frame->data[2], frame->linesize[0], tex->texture.linear, stream.get(), {frame->width, frame->height, 0, 0});
} else {
sws.convert_nv12(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture.linear, stream.get(), {frame->width, frame->height, 0, 0});
}
}

cudaTextureObject_t tex_obj(const tex_t &tex) const {
Expand All @@ -194,13 +202,18 @@ namespace cuda {
// When height and width don't change, it's not necessary to use linear interpolation
bool linear_interpolation;

bool is_yuv444;

sws_t sws;
};

class cuda_ram_t: public cuda_t {
public:
int convert(platf::img_t &img) override {
return sws.load_ram(img, tex.array) || sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(tex), stream.get());
if (is_yuv444) {
return sws.load_ram(img, tex.array) || sws.convert_yuv444(frame->data[0], frame->data[1], frame->data[2], frame->linesize[0], tex_obj(tex), stream.get());
}
return sws.load_ram(img, tex.array) || sws.convert_nv12(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(tex), stream.get());
}

int set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override {
Expand All @@ -224,7 +237,10 @@ namespace cuda {
class cuda_vram_t: public cuda_t {
public:
int convert(platf::img_t &img) override {
return sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(((img_t *) &img)->tex), stream.get());
if (is_yuv444) {
return sws.convert_yuv444(frame->data[0], frame->data[1], frame->data[2], frame->linesize[0], tex_obj(((img_t *) &img)->tex), stream.get());
}
return sws.convert_nv12(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(((img_t *) &img)->tex), stream.get());
}
};

Expand Down Expand Up @@ -274,6 +290,13 @@ namespace cuda {
return -1;
}

struct cu_resources {
registered_resource_t y_res;
registered_resource_t u_res;
registered_resource_t v_res;
registered_resource_t uv_res;
};

class gl_cuda_vram_t: public platf::avcodec_encode_device_t {
public:
/**
Expand Down Expand Up @@ -335,28 +358,44 @@ namespace cuda {
this->hwframe.reset(frame);
this->frame = frame;

auto hw_frames_ctx = (AVHWFramesContext *) hw_frames_ctx_buf->data;

if (hw_frames_ctx->sw_format != AV_PIX_FMT_NV12 &&
hw_frames_ctx->sw_format != AV_PIX_FMT_YUV444P) {
BOOST_LOG(error) << "cuda::gl_cuda_vram_t doesn't support any format other than AV_PIX_FMT_NV12 and AV_PIX_FMT_YUV444P"sv;
return -1;
}

if (!frame->buf[0]) {
if (av_hwframe_get_buffer(hw_frames_ctx_buf, frame, 0)) {
BOOST_LOG(error) << "Couldn't get hwframe for VAAPI"sv;
BOOST_LOG(error) << "Couldn't get hwframe for NVENC_GL"sv;
return -1;
}
}

auto hw_frames_ctx = (AVHWFramesContext *) hw_frames_ctx_buf->data;
sw_format = hw_frames_ctx->sw_format;
is_yuv444 = (sw_format == AV_PIX_FMT_YUV444P);

auto nv12_opt = egl::create_target(frame->width, frame->height, sw_format);
if (!nv12_opt) {
return -1;
}

auto sws_opt = egl::sws_t::make(width, height, frame->width, frame->height, sw_format);
auto sws_opt = egl::sws_t::make(width, height, frame->width, frame->height, sw_format, is_yuv444);
if (!sws_opt) {
return -1;
}

this->sws = std::move(*sws_opt);
this->nv12 = std::move(*nv12_opt);

if (is_yuv444) {
auto yuv444_opt = egl::create_yuv444_target(frame->width, frame->height, sw_format);
if (!yuv444_opt) {
return -1;
}
this->yuv444 = std::move(*yuv444_opt);
} else {
auto nv12_opt = egl::create_nv12_target(frame->width, frame->height, sw_format);
if (!nv12_opt) {
return -1;
}
this->nv12 = std::move(*nv12_opt);
}

auto cuda_ctx = (AVCUDADeviceContext *) hw_frames_ctx->device_ctx->hwctx;

Expand All @@ -367,9 +406,14 @@ namespace cuda {

cuda_ctx->stream = stream.get();

CU_CHECK(cdf->cuGraphicsGLRegisterImage(&y_res, nv12->tex[0], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register Y plane texture");
CU_CHECK(cdf->cuGraphicsGLRegisterImage(&uv_res, nv12->tex[1], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register UV plane texture");

if (is_yuv444) {
CU_CHECK(cdf->cuGraphicsGLRegisterImage(&cu_res.y_res, yuv444->tex[0], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register Y texture");
CU_CHECK(cdf->cuGraphicsGLRegisterImage(&cu_res.u_res, yuv444->tex[1], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register U texture");
CU_CHECK(cdf->cuGraphicsGLRegisterImage(&cu_res.v_res, yuv444->tex[2], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register V texture");
} else {
CU_CHECK(cdf->cuGraphicsGLRegisterImage(&cu_res.y_res, nv12->tex[0], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register Y plane texture");
CU_CHECK(cdf->cuGraphicsGLRegisterImage(&cu_res.uv_res, nv12->tex[1], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register UV plane texture");
}
return 0;
}

Expand Down Expand Up @@ -398,33 +442,61 @@ namespace cuda {
rgb = std::move(*rgb_opt);
}

// Perform the color conversion and scaling in GL
sws.load_vram(descriptor, offset_x, offset_y, rgb->tex[0]);
sws.convert(nv12->buf);

auto fmt_desc = av_pix_fmt_desc_get(sw_format);

// Map the GL textures to read for CUDA
CUgraphicsResource resources[2] = {y_res.get(), uv_res.get()};
CU_CHECK(cdf->cuGraphicsMapResources(2, resources, stream.get()), "Couldn't map GL textures in CUDA");
sws.load_vram(descriptor, offset_x, offset_y, rgb->tex[0], is_yuv444);

if (is_yuv444) {
// Perform the color conversion and scaling in GL
sws.convert_yuv444(yuv444->buf);

// Copy from the GL textures to the target CUDA frame
for (int i = 0; i < 2; i++) {
CUDA_MEMCPY2D cpy = {};
cpy.srcMemoryType = CU_MEMORYTYPE_ARRAY;
CU_CHECK(cdf->cuGraphicsSubResourceGetMappedArray(&cpy.srcArray, resources[i], 0, 0), "Couldn't get mapped plane array");
// Map the GL textures to read for CUDA
std::array<CUgraphicsResource, 3> resources = {{cu_res.y_res.get(), cu_res.u_res.get(), cu_res.v_res.get()}};
CU_CHECK(cdf->cuGraphicsMapResources(resources.size(), resources.data(), stream.get()), "Couldn't map GL textures in CUDA");

cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE;
cpy.dstDevice = (CUdeviceptr) frame->data[i];
cpy.dstPitch = frame->linesize[i];
cpy.WidthInBytes = (frame->width * fmt_desc->comp[i].step) >> (i ? fmt_desc->log2_chroma_w : 0);
cpy.Height = frame->height >> (i ? fmt_desc->log2_chroma_h : 0);
// Copy from the GL textures to the target CUDA frame
for (int i = 0; i < 3; i++) {
CUDA_MEMCPY2D cpy = {};
cpy.srcMemoryType = CU_MEMORYTYPE_ARRAY;
CU_CHECK(cdf->cuGraphicsSubResourceGetMappedArray(&cpy.srcArray, resources[i], 0, 0), "Couldn't get mapped plane array");

CU_CHECK_IGNORE(cdf->cuMemcpy2DAsync(&cpy, stream.get()), "Couldn't copy texture to CUDA frame");
cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE;
cpy.dstDevice = (CUdeviceptr) frame->data[i];
cpy.dstPitch = frame->linesize[i];
cpy.WidthInBytes = (frame->width * fmt_desc->comp[i].step);
cpy.Height = frame->height;

CU_CHECK_IGNORE(cdf->cuMemcpy2DAsync(&cpy, stream.get()), "Couldn't copy texture to CUDA frame");
}
// Unmap the textures to allow modification from GL again
CU_CHECK(cdf->cuGraphicsUnmapResources(resources.size(), resources.data(), stream.get()), "Couldn't unmap GL textures from CUDA");

} else {
// Perform the color conversion and scaling in GL
sws.convert_nv12(nv12->buf);

// Map the GL textures to read for CUDA
std::array<CUgraphicsResource, 2> resources = {{cu_res.y_res.get(), cu_res.uv_res.get()}};
CU_CHECK(cdf->cuGraphicsMapResources(resources.size(), resources.data(), stream.get()), "Couldn't map GL textures in CUDA");

// Copy from the GL textures to the target CUDA frame
for (int i = 0; i < 2; i++) {
CUDA_MEMCPY2D cpy = {};
cpy.srcMemoryType = CU_MEMORYTYPE_ARRAY;
CU_CHECK(cdf->cuGraphicsSubResourceGetMappedArray(&cpy.srcArray, resources[i], 0, 0), "Couldn't get mapped plane array");

cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE;
cpy.dstDevice = (CUdeviceptr) frame->data[i];
cpy.dstPitch = frame->linesize[i];
cpy.WidthInBytes = (frame->width * fmt_desc->comp[i].step) >> (i ? fmt_desc->log2_chroma_w : 0);
cpy.Height = frame->height >> (i ? fmt_desc->log2_chroma_h : 0);

CU_CHECK_IGNORE(cdf->cuMemcpy2DAsync(&cpy, stream.get()), "Couldn't copy texture to CUDA frame");
}
// Unmap the textures to allow modification from GL again
CU_CHECK(cdf->cuGraphicsUnmapResources(resources.size(), resources.data(), stream.get()), "Couldn't unmap GL textures from CUDA");
}

// Unmap the textures to allow modification from GL again
CU_CHECK(cdf->cuGraphicsUnmapResources(2, resources, stream.get()), "Couldn't unmap GL textures from CUDA");
return 0;
}

Expand All @@ -446,6 +518,7 @@ namespace cuda {

egl::sws_t sws;
egl::nv12_t nv12;
egl::yuv444_t yuv444;
AVPixelFormat sw_format;

int height;
Expand All @@ -454,11 +527,12 @@ namespace cuda {
std::uint64_t sequence;
egl::rgb_t rgb;

registered_resource_t y_res;
registered_resource_t uv_res;
cu_resources cu_res;

int offset_x;
int offset_y;

bool is_yuv444;
};

std::unique_ptr<platf::avcodec_encode_device_t> make_avcodec_encode_device(int width, int height, bool vram) {
Expand Down
Loading
Loading