libobs: NV12 textures only for active GPU encoders

Intel GPUs in particular are slow to copy NV12/P010 textures. We can use
ordinary UNORM textures for CPU encoders.
This commit is contained in:
jpark37 2022-03-02 22:19:51 -08:00 committed by Jim
parent 1c4148408f
commit c4fb34897e
3 changed files with 263 additions and 190 deletions

View file

@ -246,10 +246,15 @@ struct obs_task_info {
struct obs_core_video {
graphics_t *graphics;
gs_stagesurf_t *active_copy_surfaces[NUM_TEXTURES][NUM_CHANNELS];
gs_stagesurf_t *copy_surfaces[NUM_TEXTURES][NUM_CHANNELS];
gs_texture_t *convert_textures[NUM_CHANNELS];
#ifdef _WIN32
gs_stagesurf_t *copy_surfaces_encode[NUM_TEXTURES];
gs_texture_t *convert_textures_encode[NUM_CHANNELS];
#endif
gs_texture_t *render_texture;
gs_texture_t *output_texture;
gs_texture_t *convert_textures[NUM_CHANNELS];
bool texture_rendered;
bool textures_copied[NUM_TEXTURES];
bool texture_converted;

View file

@ -298,6 +298,7 @@ static void render_convert_plane(gs_effect_t *effect, gs_texture_t *target,
static const char *render_convert_texture_name = "render_convert_texture";
static void render_convert_texture(struct obs_core_video *video,
gs_texture_t *const *const convert_textures,
gs_texture_t *texture)
{
profile_start(render_convert_texture_name);
@ -322,28 +323,28 @@ static void render_convert_texture(struct obs_core_video *video,
gs_enable_blending(false);
if (video->convert_textures[0]) {
if (convert_textures[0]) {
gs_effect_set_texture(image, texture);
gs_effect_set_vec4(color_vec0, &vec0);
render_convert_plane(effect, video->convert_textures[0],
render_convert_plane(effect, convert_textures[0],
video->conversion_techs[0]);
if (video->convert_textures[1]) {
if (convert_textures[1]) {
gs_effect_set_texture(image, texture);
gs_effect_set_vec4(color_vec1, &vec1);
if (!video->convert_textures[2])
if (!convert_textures[2])
gs_effect_set_vec4(color_vec2, &vec2);
gs_effect_set_float(width_i, video->conversion_width_i);
render_convert_plane(effect, video->convert_textures[1],
render_convert_plane(effect, convert_textures[1],
video->conversion_techs[1]);
if (video->convert_textures[2]) {
if (convert_textures[2]) {
gs_effect_set_texture(image, texture);
gs_effect_set_vec4(color_vec2, &vec2);
gs_effect_set_float(width_i,
video->conversion_width_i);
render_convert_plane(
effect, video->convert_textures[2],
effect, convert_textures[2],
video->conversion_techs[2]);
}
}
@ -357,26 +358,32 @@ static void render_convert_texture(struct obs_core_video *video,
}
static const char *stage_output_texture_name = "stage_output_texture";
static inline void stage_output_texture(struct obs_core_video *video,
int cur_texture)
static inline void
stage_output_texture(struct obs_core_video *video, int cur_texture,
gs_texture_t *const *const convert_textures,
gs_stagesurf_t *const *const copy_surfaces,
size_t channel_count)
{
profile_start(stage_output_texture_name);
unmap_last_surface(video);
if (!video->gpu_conversion) {
gs_stagesurf_t *copy = video->copy_surfaces[cur_texture][0];
if (copy)
gs_stagesurf_t *copy = copy_surfaces[0];
if (copy) {
gs_stage_texture(copy, video->output_texture);
video->active_copy_surfaces[cur_texture][0] = copy;
}
video->textures_copied[cur_texture] = true;
} else if (video->texture_converted) {
for (int i = 0; i < NUM_CHANNELS; i++) {
gs_stagesurf_t *copy =
video->copy_surfaces[cur_texture][i];
if (copy)
gs_stage_texture(copy,
video->convert_textures[i]);
for (int i = 0; i < channel_count; i++) {
gs_stagesurf_t *copy = copy_surfaces[i];
if (copy) {
gs_stage_texture(copy, convert_textures[i]);
video->active_copy_surfaces[cur_texture][i] =
copy;
}
}
video->textures_copied[cur_texture] = true;
@ -421,13 +428,13 @@ static inline bool queue_frame(struct obs_core_video *video, bool raw_active,
* reason. otherwise, it goes to the 'duplicate' case above, which
* will ensure better performance. */
if (raw_active || vframe_info->count > 1) {
gs_copy_texture(tf.tex, video->convert_textures[0]);
gs_copy_texture(tf.tex, video->convert_textures_encode[0]);
} else {
gs_texture_t *tex = video->convert_textures[0];
gs_texture_t *tex_uv = video->convert_textures[1];
gs_texture_t *tex = video->convert_textures_encode[0];
gs_texture_t *tex_uv = video->convert_textures_encode[1];
video->convert_textures[0] = tf.tex;
video->convert_textures[1] = tf.tex_uv;
video->convert_textures_encode[0] = tf.tex;
video->convert_textures_encode[1] = tf.tex_uv;
tf.tex = tex;
tf.tex_uv = tex_uv;
@ -489,15 +496,24 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
render_main_texture(video);
if (raw_active || gpu_active) {
gs_texture_t *const *convert_textures = video->convert_textures;
gs_stagesurf_t *const *copy_surfaces =
video->copy_surfaces[cur_texture];
size_t channel_count = NUM_CHANNELS;
gs_texture_t *texture = render_output_texture(video);
#ifdef _WIN32
if (gpu_active)
if (gpu_active) {
convert_textures = video->convert_textures_encode;
copy_surfaces = video->copy_surfaces_encode;
channel_count = 1;
gs_flush();
}
#endif
if (video->gpu_conversion)
render_convert_texture(video, texture);
render_convert_texture(video, convert_textures,
texture);
#ifdef _WIN32
if (gpu_active) {
@ -507,7 +523,9 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
#endif
if (raw_active)
stage_output_texture(video, cur_texture);
stage_output_texture(video, cur_texture,
convert_textures, copy_surfaces,
channel_count);
}
gs_set_render_target(NULL, NULL);
@ -524,7 +542,7 @@ static inline bool download_frame(struct obs_core_video *video,
for (int channel = 0; channel < NUM_CHANNELS; ++channel) {
gs_stagesurf_t *surface =
video->copy_surfaces[prev_texture][channel];
video->active_copy_surfaces[prev_texture][channel];
if (surface) {
if (!gs_stagesurface_map(surface, &frame->data[channel],
&frame->linesize[channel]))
@ -561,108 +579,91 @@ static void set_gpu_converted_data(struct obs_core_video *video,
const struct video_data *input,
const struct video_output_info *info)
{
if (video->using_nv12_tex) {
switch (info->format) {
case VIDEO_FORMAT_I420: {
const uint32_t width = info->width;
const uint32_t height = info->height;
const uint8_t *const in_uv = set_gpu_converted_plane(
width, height, input->linesize[0], output->linesize[0],
input->data[0], output->data[0]);
set_gpu_converted_plane(width, height, input->linesize[0],
output->linesize[0], input->data[0],
output->data[0]);
const uint32_t width_d2 = width / 2;
const uint32_t height_d2 = height / 2;
set_gpu_converted_plane(width, height_d2, input->linesize[0],
output->linesize[1], in_uv,
set_gpu_converted_plane(width_d2, height_d2, input->linesize[1],
output->linesize[1], input->data[1],
output->data[1]);
} else {
switch (info->format) {
case VIDEO_FORMAT_I420: {
const uint32_t width = info->width;
const uint32_t height = info->height;
set_gpu_converted_plane(width_d2, height_d2, input->linesize[2],
output->linesize[2], input->data[2],
output->data[2]);
break;
}
case VIDEO_FORMAT_NV12: {
const uint32_t width = info->width;
const uint32_t height = info->height;
const uint32_t height_d2 = height / 2;
if (input->linesize[1]) {
set_gpu_converted_plane(width, height,
input->linesize[0],
output->linesize[0],
input->data[0],
output->data[0]);
const uint32_t width_d2 = width / 2;
const uint32_t height_d2 = height / 2;
set_gpu_converted_plane(width_d2, height_d2,
input->linesize[1],
output->linesize[1],
input->data[1],
output->data[1]);
set_gpu_converted_plane(width_d2, height_d2,
input->linesize[2],
output->linesize[2],
input->data[2],
output->data[2]);
break;
}
case VIDEO_FORMAT_NV12: {
const uint32_t width = info->width;
const uint32_t height = info->height;
set_gpu_converted_plane(width, height,
input->linesize[0],
output->linesize[0],
input->data[0],
output->data[0]);
const uint32_t height_d2 = height / 2;
set_gpu_converted_plane(width, height_d2,
input->linesize[1],
output->linesize[1],
input->data[1],
output->data[1]);
break;
}
case VIDEO_FORMAT_I444: {
const uint32_t width = info->width;
const uint32_t height = info->height;
set_gpu_converted_plane(width, height,
} else {
const uint8_t *const in_uv = set_gpu_converted_plane(
width, height, input->linesize[0],
output->linesize[0], input->data[0],
output->data[0]);
set_gpu_converted_plane(width, height_d2,
input->linesize[0],
output->linesize[0],
input->data[0],
output->data[0]);
set_gpu_converted_plane(width, height,
input->linesize[1],
output->linesize[1],
input->data[1],
output->linesize[1], in_uv,
output->data[1]);
set_gpu_converted_plane(width, height,
input->linesize[2],
output->linesize[2],
input->data[2],
output->data[2]);
break;
}
case VIDEO_FORMAT_NONE:
case VIDEO_FORMAT_YVYU:
case VIDEO_FORMAT_YUY2:
case VIDEO_FORMAT_UYVY:
case VIDEO_FORMAT_RGBA:
case VIDEO_FORMAT_BGRA:
case VIDEO_FORMAT_BGRX:
case VIDEO_FORMAT_Y800:
case VIDEO_FORMAT_BGR3:
case VIDEO_FORMAT_I422:
case VIDEO_FORMAT_I40A:
case VIDEO_FORMAT_I42A:
case VIDEO_FORMAT_YUVA:
case VIDEO_FORMAT_AYUV:
/* unimplemented */
;
}
break;
}
case VIDEO_FORMAT_I444: {
const uint32_t width = info->width;
const uint32_t height = info->height;
set_gpu_converted_plane(width, height, input->linesize[0],
output->linesize[0], input->data[0],
output->data[0]);
set_gpu_converted_plane(width, height, input->linesize[1],
output->linesize[1], input->data[1],
output->data[1]);
set_gpu_converted_plane(width, height, input->linesize[2],
output->linesize[2], input->data[2],
output->data[2]);
break;
}
case VIDEO_FORMAT_NONE:
case VIDEO_FORMAT_YVYU:
case VIDEO_FORMAT_YUY2:
case VIDEO_FORMAT_UYVY:
case VIDEO_FORMAT_RGBA:
case VIDEO_FORMAT_BGRA:
case VIDEO_FORMAT_BGRX:
case VIDEO_FORMAT_Y800:
case VIDEO_FORMAT_BGR3:
case VIDEO_FORMAT_I422:
case VIDEO_FORMAT_I40A:
case VIDEO_FORMAT_I42A:
case VIDEO_FORMAT_YUVA:
case VIDEO_FORMAT_AYUV:
/* unimplemented */
;
}
}

View file

@ -101,74 +101,99 @@ static bool obs_init_gpu_conversion(struct obs_video_info *ovi)
else
blog(LOG_INFO, "NV12 texture support not available");
video->convert_textures[0] = NULL;
video->convert_textures[1] = NULL;
video->convert_textures[2] = NULL;
#ifdef _WIN32
video->convert_textures_encode[0] = NULL;
video->convert_textures_encode[1] = NULL;
video->convert_textures_encode[2] = NULL;
if (video->using_nv12_tex) {
gs_texture_create_nv12(&video->convert_textures[0],
&video->convert_textures[1],
ovi->output_width, ovi->output_height,
GS_RENDER_TARGET | GS_SHARED_KM_TEX);
} else {
#endif
video->convert_textures[0] =
gs_texture_create(ovi->output_width, ovi->output_height,
GS_R8, 1, NULL, GS_RENDER_TARGET);
const struct video_output_info *info =
video_output_get_info(video->video);
switch (info->format) {
case VIDEO_FORMAT_I420:
video->convert_textures[1] = gs_texture_create(
ovi->output_width / 2, ovi->output_height / 2,
GS_R8, 1, NULL, GS_RENDER_TARGET);
video->convert_textures[2] = gs_texture_create(
ovi->output_width / 2, ovi->output_height / 2,
GS_R8, 1, NULL, GS_RENDER_TARGET);
if (!video->convert_textures[2])
return false;
break;
case VIDEO_FORMAT_NV12:
video->convert_textures[1] = gs_texture_create(
ovi->output_width / 2, ovi->output_height / 2,
GS_R8G8, 1, NULL, GS_RENDER_TARGET);
break;
case VIDEO_FORMAT_I444:
video->convert_textures[1] = gs_texture_create(
ovi->output_width, ovi->output_height, GS_R8, 1,
NULL, GS_RENDER_TARGET);
video->convert_textures[2] = gs_texture_create(
ovi->output_width, ovi->output_height, GS_R8, 1,
NULL, GS_RENDER_TARGET);
if (!video->convert_textures[2])
return false;
break;
default:
break;
if (!gs_texture_create_nv12(
&video->convert_textures_encode[0],
&video->convert_textures_encode[1],
ovi->output_width, ovi->output_height,
GS_RENDER_TARGET | GS_SHARED_KM_TEX)) {
return false;
}
#ifdef _WIN32
}
#endif
if (!video->convert_textures[0])
return false;
if (!video->convert_textures[1])
return false;
bool success = true;
return true;
const struct video_output_info *info =
video_output_get_info(video->video);
switch (info->format) {
case VIDEO_FORMAT_I420:
video->convert_textures[0] =
gs_texture_create(ovi->output_width, ovi->output_height,
GS_R8, 1, NULL, GS_RENDER_TARGET);
video->convert_textures[1] = gs_texture_create(
ovi->output_width / 2, ovi->output_height / 2, GS_R8, 1,
NULL, GS_RENDER_TARGET);
video->convert_textures[2] = gs_texture_create(
ovi->output_width / 2, ovi->output_height / 2, GS_R8, 1,
NULL, GS_RENDER_TARGET);
if (!video->convert_textures[0] ||
!video->convert_textures[1] || !video->convert_textures[2])
success = false;
break;
case VIDEO_FORMAT_NV12:
video->convert_textures[0] =
gs_texture_create(ovi->output_width, ovi->output_height,
GS_R8, 1, NULL, GS_RENDER_TARGET);
video->convert_textures[1] = gs_texture_create(
ovi->output_width / 2, ovi->output_height / 2, GS_R8G8,
1, NULL, GS_RENDER_TARGET);
if (!video->convert_textures[0] || !video->convert_textures[1])
success = false;
break;
case VIDEO_FORMAT_I444:
video->convert_textures[0] =
gs_texture_create(ovi->output_width, ovi->output_height,
GS_R8, 1, NULL, GS_RENDER_TARGET);
video->convert_textures[1] =
gs_texture_create(ovi->output_width, ovi->output_height,
GS_R8, 1, NULL, GS_RENDER_TARGET);
video->convert_textures[2] =
gs_texture_create(ovi->output_width, ovi->output_height,
GS_R8, 1, NULL, GS_RENDER_TARGET);
if (!video->convert_textures[0] ||
!video->convert_textures[1] || !video->convert_textures[2])
success = false;
}
if (!success) {
for (size_t c = 0; c < NUM_CHANNELS; c++) {
if (video->convert_textures[c]) {
gs_texture_destroy(video->convert_textures[c]);
video->convert_textures[c] = NULL;
}
#ifdef _WIN32
if (video->convert_textures_encode[c]) {
gs_texture_destroy(
video->convert_textures_encode[c]);
video->convert_textures_encode[c] = NULL;
}
#endif
}
}
return success;
}
static bool obs_init_gpu_copy_surfaces(struct obs_video_info *ovi, size_t i)
{
struct obs_core_video *video = &obs->video;
video->copy_surfaces[i][0] = gs_stagesurface_create(
ovi->output_width, ovi->output_height, GS_R8);
if (!video->copy_surfaces[i][0])
return false;
const struct video_output_info *info =
video_output_get_info(video->video);
switch (info->format) {
case VIDEO_FORMAT_I420:
video->copy_surfaces[i][0] = gs_stagesurface_create(
ovi->output_width, ovi->output_height, GS_R8);
if (!video->copy_surfaces[i][0])
return false;
video->copy_surfaces[i][1] = gs_stagesurface_create(
ovi->output_width / 2, ovi->output_height / 2, GS_R8);
if (!video->copy_surfaces[i][1])
@ -179,12 +204,20 @@ static bool obs_init_gpu_copy_surfaces(struct obs_video_info *ovi, size_t i)
return false;
break;
case VIDEO_FORMAT_NV12:
video->copy_surfaces[i][0] = gs_stagesurface_create(
ovi->output_width, ovi->output_height, GS_R8);
if (!video->copy_surfaces[i][0])
return false;
video->copy_surfaces[i][1] = gs_stagesurface_create(
ovi->output_width / 2, ovi->output_height / 2, GS_R8G8);
if (!video->copy_surfaces[i][1])
return false;
break;
case VIDEO_FORMAT_I444:
video->copy_surfaces[i][0] = gs_stagesurface_create(
ovi->output_width, ovi->output_height, GS_R8);
if (!video->copy_surfaces[i][0])
return false;
video->copy_surfaces[i][1] = gs_stagesurface_create(
ovi->output_width, ovi->output_height, GS_R8);
if (!video->copy_surfaces[i][1])
@ -205,48 +238,78 @@ static bool obs_init_textures(struct obs_video_info *ovi)
{
struct obs_core_video *video = &obs->video;
bool success = true;
for (size_t i = 0; i < NUM_TEXTURES; i++) {
#ifdef _WIN32
if (video->using_nv12_tex) {
video->copy_surfaces[i][0] =
video->copy_surfaces_encode[i] =
gs_stagesurface_create_nv12(ovi->output_width,
ovi->output_height);
if (!video->copy_surfaces[i][0])
return false;
} else {
#endif
if (video->gpu_conversion) {
if (!obs_init_gpu_copy_surfaces(ovi, i))
return false;
} else {
video->copy_surfaces[i][0] =
gs_stagesurface_create(
ovi->output_width,
ovi->output_height, GS_RGBA);
if (!video->copy_surfaces[i][0])
return false;
if (!video->copy_surfaces_encode[i]) {
success = false;
break;
}
#ifdef _WIN32
}
#endif
if (video->gpu_conversion) {
if (!obs_init_gpu_copy_surfaces(ovi, i)) {
success = false;
break;
}
} else {
video->copy_surfaces[i][0] = gs_stagesurface_create(
ovi->output_width, ovi->output_height, GS_RGBA);
if (!video->copy_surfaces[i][0]) {
success = false;
break;
}
}
}
video->render_texture = gs_texture_create(ovi->base_width,
ovi->base_height, GS_RGBA, 1,
NULL, GS_RENDER_TARGET);
if (!video->render_texture)
return false;
success = false;
video->output_texture = gs_texture_create(ovi->output_width,
ovi->output_height, GS_RGBA,
1, NULL, GS_RENDER_TARGET);
if (!video->output_texture)
return false;
success = false;
return true;
if (!success) {
for (size_t i = 0; i < NUM_TEXTURES; i++) {
for (size_t c = 0; c < NUM_CHANNELS; c++) {
if (video->copy_surfaces[i][c]) {
gs_stagesurface_destroy(
video->copy_surfaces[i][c]);
video->copy_surfaces[i][c] = NULL;
}
}
#ifdef _WIN32
if (video->copy_surfaces_encode[i]) {
gs_stagesurface_destroy(
video->copy_surfaces_encode[i]);
video->copy_surfaces_encode[i] = NULL;
}
#endif
}
if (video->render_texture) {
gs_texture_destroy(video->render_texture);
video->render_texture = NULL;
}
if (video->output_texture) {
gs_texture_destroy(video->output_texture);
video->output_texture = NULL;
}
}
return success;
}
gs_effect_t *obs_load_effect(gs_effect_t **effect, const char *file)
@ -484,6 +547,13 @@ static void obs_free_video(void)
video->copy_surfaces[i][c] = NULL;
}
}
#ifdef _WIN32
if (video->copy_surfaces_encode[i]) {
gs_stagesurface_destroy(
video->copy_surfaces_encode[i]);
video->copy_surfaces_encode[i] = NULL;
}
#endif
}
gs_texture_destroy(video->render_texture);
@ -493,16 +563,13 @@ static void obs_free_video(void)
gs_texture_destroy(video->convert_textures[c]);
video->convert_textures[c] = NULL;
}
}
for (size_t i = 0; i < NUM_TEXTURES; i++) {
for (size_t c = 0; c < NUM_CHANNELS; c++) {
if (video->copy_surfaces[i][c]) {
gs_stagesurface_destroy(
video->copy_surfaces[i][c]);
video->copy_surfaces[i][c] = NULL;
}
#ifdef _WIN32
if (video->convert_textures_encode[c]) {
gs_texture_destroy(
video->convert_textures_encode[c]);
video->convert_textures_encode[c] = NULL;
}
#endif
}
gs_texture_destroy(video->output_texture);