libobs: Separate textures for YUV input

The shaders to unpack YUV information from the same texture were rather
complicated. Breaking them up into separate textures makes the shaders
much simpler, and we can remove the PRECISION_OFFSET hack.

Performance also gets a nice boost on Intel for planar textures.

Intel GPA, SetStablePowerState, Intel HD Graphics 530, 1920x1080

UYVY: 473 us -> 457 us
YUY2: 492 us -> 422 us
YVYU: 491 us -> 441 us
I420: 1637 us -> 505 us
I422: 1644 us -> 482 us
I444: 1653 us -> 504 us
NV12: 1656 us -> 369 us
Y800 (limited): 270 us -> 277 us
Y800 (full): 263 us -> 289 us
RGB (limited): 341 us -> 411 us
BGR3 (limited): 512 us -> 509 us
BGR3 (full): 527 us -> 534 us
This commit is contained in:
jpark37 2019-08-09 20:43:14 -07:00
parent 164f731320
commit bdd8d64053
5 changed files with 404 additions and 347 deletions

View file

@ -19,23 +19,18 @@ uniform float width;
uniform float height;
uniform float width_i;
uniform float width_d2;
uniform float width_d2_i;
uniform float input_width_i_d2;
uniform float height_d2;
uniform float width_x2_i;
uniform int int_width;
uniform int int_input_width;
uniform int int_u_plane_offset;
uniform int int_v_plane_offset;
uniform float4x4 color_matrix;
uniform float4 color_vec0;
uniform float4 color_vec1;
uniform float4 color_vec2;
uniform float3 color_range_min = {0.0, 0.0, 0.0};
uniform float3 color_range_max = {1.0, 1.0, 1.0};
uniform float4 color_vec_y;
uniform float4 color_vec_u;
uniform float4 color_vec_v;
uniform texture2d image;
uniform texture2d image1;
uniform texture2d image2;
sampler_state def_sampler {
Filter = Linear;
@ -52,6 +47,11 @@ struct VertTexPos {
float4 pos : POSITION;
};
struct VertPosWide {
float3 pos_wide : TEXCOORD0;
float4 pos : POSITION;
};
struct VertTexPosWide {
float3 uuv : TEXCOORD0;
float4 pos : POSITION;
@ -61,6 +61,10 @@ struct FragTex {
float2 uv : TEXCOORD0;
};
struct FragPosWide {
float3 pos_wide : TEXCOORD0;
};
struct FragTexWide {
float3 uuv : TEXCOORD0;
};
@ -78,24 +82,7 @@ FragPos VSPos(uint id : VERTEXID)
return vert_out;
}
VertTexPos VSTexPos(uint id : VERTEXID)
{
float idHigh = float(id >> 1);
float idLow = float(id & uint(1));
float x = idHigh * 4.0 - 1.0;
float y = idLow * 4.0 - 1.0;
float u = idHigh * 2.0;
float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
VertTexPos vert_out;
vert_out.uv = float2(u, v);
vert_out.pos = float4(x, y, 0.0, 1.0);
return vert_out;
}
VertTexPosWide VSTexPosLeft(uint id : VERTEXID)
VertTexPosWide VSTexPos_Left(uint id : VERTEXID)
{
float idHigh = float(id >> 1);
float idLow = float(id & uint(1));
@ -108,20 +95,66 @@ VertTexPosWide VSTexPosLeft(uint id : VERTEXID)
float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
VertTexPosWide vert_out;
vert_out.uuv.x = u_left;
vert_out.uuv.y = u_right;
vert_out.uuv.z = v;
vert_out.uuv = float3(u_left, u_right, v);
vert_out.pos = float4(x, y, 0.0, 1.0);
return vert_out;
}
/* used to prevent internal GPU precision issues width fmod in particular */
#define PRECISION_OFFSET 0.2
VertTexPos VSTexPosHalf_Reverse(uint id : VERTEXID)
{
float idHigh = float(id >> 1);
float idLow = float(id & uint(1));
float x = idHigh * 4.0 - 1.0;
float y = idLow * 4.0 - 1.0;
float u = idHigh * 2.0;
float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
VertTexPos vert_out;
vert_out.uv = float2(width_d2 * u, height * v);
vert_out.pos = float4(x, y, 0.0, 1.0);
return vert_out;
}
VertTexPos VSTexPosHalfHalf_Reverse(uint id : VERTEXID)
{
float idHigh = float(id >> 1);
float idLow = float(id & uint(1));
float x = idHigh * 4.0 - 1.0;
float y = idLow * 4.0 - 1.0;
float u = idHigh * 2.0;
float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
VertTexPos vert_out;
vert_out.uv = float2(width_d2 * u, height_d2 * v);
vert_out.pos = float4(x, y, 0.0, 1.0);
return vert_out;
}
VertPosWide VSPosWide_Reverse(uint id : VERTEXID)
{
float idHigh = float(id >> 1);
float idLow = float(id & uint(1));
float x = idHigh * 4.0 - 1.0;
float y = idLow * 4.0 - 1.0;
float u = idHigh * 2.0;
float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
VertPosWide vert_out;
vert_out.pos_wide = float3(float2(width, width_d2) * u, height * v);
vert_out.pos = float4(x, y, 0.0, 1.0);
return vert_out;
}
float PS_Y(FragPos frag_in) : TARGET
{
float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb;
float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w;
float y = dot(color_vec0.xyz, rgb) + color_vec0.w;
return y;
}
@ -130,22 +163,22 @@ float2 PS_UV_Wide(FragTexWide frag_in) : TARGET
float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb;
float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb;
float3 rgb = (rgb_left + rgb_right) * 0.5;
float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
float u = dot(color_vec1.xyz, rgb) + color_vec1.w;
float v = dot(color_vec2.xyz, rgb) + color_vec2.w;
return float2(u, v);
}
float PS_U(FragTex frag_in) : TARGET
float PS_U(FragPos frag_in) : TARGET
{
float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb;
float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb;
float u = dot(color_vec1.xyz, rgb) + color_vec1.w;
return u;
}
float PS_V(FragTex frag_in) : TARGET
float PS_V(FragPos frag_in) : TARGET
{
float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb;
float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb;
float v = dot(color_vec2.xyz, rgb) + color_vec2.w;
return v;
}
@ -154,7 +187,7 @@ float PS_U_Wide(FragTexWide frag_in) : TARGET
float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb;
float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb;
float3 rgb = (rgb_left + rgb_right) * 0.5;
float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
float u = dot(color_vec1.xyz, rgb) + color_vec1.w;
return u;
}
@ -163,161 +196,138 @@ float PS_V_Wide(FragTexWide frag_in) : TARGET
float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb;
float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb;
float3 rgb = (rgb_left + rgb_right) * 0.5;
float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
float v = dot(color_vec2.xyz, rgb) + color_vec2.w;
return v;
}
float GetIntOffsetColor(int offset)
float3 YUV_to_RGB(float3 yuv)
{
return image.Load(int3(offset % int_input_width,
offset / int_input_width,
0)).r;
}
float4 PSPacked422_Reverse(FragTex frag_in, int u_pos, int v_pos,
int y0_pos, int y1_pos) : TARGET
{
float y = frag_in.uv.y;
float odd = floor(fmod(width * frag_in.uv.x + PRECISION_OFFSET, 2.0));
float x = floor(width_d2 * frag_in.uv.x + PRECISION_OFFSET) *
width_d2_i;
x += input_width_i_d2;
float4 texel = image.Sample(def_sampler, float2(x, y));
float3 yuv = float3(odd > 0.5 ? texel[y1_pos] : texel[y0_pos],
texel[u_pos], texel[v_pos]);
yuv = clamp(yuv, color_range_min, color_range_max);
return saturate(mul(float4(yuv, 1.0), color_matrix));
float r = dot(color_vec0.xyz, yuv) + color_vec0.w;
float g = dot(color_vec1.xyz, yuv) + color_vec1.w;
float b = dot(color_vec2.xyz, yuv) + color_vec2.w;
return float3(r, g, b);
}
float4 PSPlanar420_Reverse(FragTex frag_in) : TARGET
float3 PSUYVY_Reverse(FragTex frag_in) : TARGET
{
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
int lum_offset = y * int_width + x;
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
int chroma1 = int_u_plane_offset + chroma_offset;
int chroma2 = int_v_plane_offset + chroma_offset;
float3 yuv = float3(
GetIntOffsetColor(lum_offset),
GetIntOffsetColor(chroma1),
GetIntOffsetColor(chroma2)
);
yuv = clamp(yuv, color_range_min, color_range_max);
return saturate(mul(float4(yuv, 1.0), color_matrix));
float4 y2uv = image.Load(int3(frag_in.uv.xy, 0));
float2 y01 = y2uv.yw;
float2 cbcr = y2uv.zx;
float leftover = frac(frag_in.uv.x);
float y = (leftover < 0.5) ? y01.x : y01.y;
float3 yuv = float3(y, cbcr);
float3 rgb = YUV_to_RGB(yuv);
return rgb;
}
float4 PSPlanar422_Reverse(FragTex frag_in) : TARGET
float3 PSYUY2_Reverse(FragTex frag_in) : TARGET
{
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
int lum_offset = y * int_width + x;
int chroma_offset = y * (int_width / 2) + x / 2;
int chroma1 = int_u_plane_offset + chroma_offset;
int chroma2 = int_v_plane_offset + chroma_offset;
float3 yuv = float3(
GetIntOffsetColor(lum_offset),
GetIntOffsetColor(chroma1),
GetIntOffsetColor(chroma2)
);
yuv = clamp(yuv, color_range_min, color_range_max);
return saturate(mul(float4(yuv, 1.0), color_matrix));
float4 y2uv = image.Load(int3(frag_in.uv.xy, 0));
float2 y01 = y2uv.zx;
float2 cbcr = y2uv.yw;
float leftover = frac(frag_in.uv.x);
float y = (leftover < 0.5) ? y01.x : y01.y;
float3 yuv = float3(y, cbcr);
float3 rgb = YUV_to_RGB(yuv);
return rgb;
}
float4 PSPlanar444_Reverse(FragTex frag_in) : TARGET
float3 PSYVYU_Reverse(FragTex frag_in) : TARGET
{
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
int lum_offset = y * int_width + x;
int chroma_offset = y * int_width + x;
int chroma1 = int_u_plane_offset + chroma_offset;
int chroma2 = int_v_plane_offset + chroma_offset;
float3 yuv = float3(
GetIntOffsetColor(lum_offset),
GetIntOffsetColor(chroma1),
GetIntOffsetColor(chroma2)
);
yuv = clamp(yuv, color_range_min, color_range_max);
return saturate(mul(float4(yuv, 1.0), color_matrix));
float4 y2uv = image.Load(int3(frag_in.uv.xy, 0));
float2 y01 = y2uv.zx;
float2 cbcr = y2uv.wy;
float leftover = frac(frag_in.uv.x);
float y = (leftover < 0.5) ? y01.x : y01.y;
float3 yuv = float3(y, cbcr);
float3 rgb = YUV_to_RGB(yuv);
return rgb;
}
float4 PSNV12_Reverse(FragTex frag_in) : TARGET
float3 PSPlanar420_Reverse(VertTexPos frag_in) : TARGET
{
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
int lum_offset = y * int_width + x;
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
int chroma = int_u_plane_offset + chroma_offset * 2;
float3 yuv = float3(
GetIntOffsetColor(lum_offset),
GetIntOffsetColor(chroma),
GetIntOffsetColor(chroma + 1)
);
yuv = clamp(yuv, color_range_min, color_range_max);
return saturate(mul(float4(yuv, 1.0), color_matrix));
float y = image.Load(int3(frag_in.pos.xy, 0)).x;
int3 xy0_chroma = int3(frag_in.uv, 0);
float cb = image1.Load(xy0_chroma).x;
float cr = image2.Load(xy0_chroma).x;
float3 yuv = float3(y, cb, cr);
float3 rgb = YUV_to_RGB(yuv);
return rgb;
}
float4 PSY800_Limited(FragTex frag_in) : TARGET
float3 PSPlanar422_Reverse(FragPosWide frag_in) : TARGET
{
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
float limited = image.Load(int3(x, y, 0)).x;
float full = saturate((limited - (16.0 / 255.0)) * (255.0 / 219.0));
return float4(full, full, full, 1.0);
float y = image.Load(int3(frag_in.pos_wide.xz, 0)).x;
int3 xy0_chroma = int3(frag_in.pos_wide.yz, 0);
float cb = image1.Load(xy0_chroma).x;
float cr = image2.Load(xy0_chroma).x;
float3 yuv = float3(y, cb, cr);
float3 rgb = YUV_to_RGB(yuv);
return rgb;
}
float4 PSY800_Full(FragTex frag_in) : TARGET
float3 PSPlanar444_Reverse(FragPos frag_in) : TARGET
{
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
float3 full = image.Load(int3(x, y, 0)).xxx;
return float4(full, 1.0);
int3 xy0 = int3(frag_in.pos.xy, 0);
float y = image.Load(xy0).x;
float cb = image1.Load(xy0).x;
float cr = image2.Load(xy0).x;
float3 yuv = float3(y, cb, cr);
float3 rgb = YUV_to_RGB(yuv);
return rgb;
}
float4 PSRGB_Limited(FragTex frag_in) : TARGET
float3 PSNV12_Reverse(VertTexPos frag_in) : TARGET
{
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
float y = image.Load(int3(frag_in.pos.xy, 0)).x;
float2 cbcr = image1.Load(int3(frag_in.uv, 0)).xy;
float3 yuv = float3(y, cbcr);
float3 rgb = YUV_to_RGB(yuv);
return rgb;
}
float4 rgba = image.Load(int3(x, y, 0));
rgba.rgb = saturate((rgba.rgb - (16.0 / 255.0)) * (255.0 / 219.0));
float3 PSY800_Limited(FragPos frag_in) : TARGET
{
float limited = image.Load(int3(frag_in.pos.xy, 0)).x;
float full = (255.0 / 219.0) * limited - (16.0 / 219.0);
return float3(full, full, full);
}
float3 PSY800_Full(FragPos frag_in) : TARGET
{
float3 full = image.Load(int3(frag_in.pos.xy, 0)).xxx;
return full;
}
float4 PSRGB_Limited(FragPos frag_in) : TARGET
{
float4 rgba = image.Load(int3(frag_in.pos.xy, 0));
rgba.rgb = (255.0 / 219.0) * rgba.rgb - (16.0 / 219.0);
return rgba;
}
float4 PSBGR3_Limited(FragTex frag_in) : TARGET
float3 PSBGR3_Limited(FragPos frag_in) : TARGET
{
int x = int(frag_in.uv.x * width * 3.0 + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
float b = image.Load(int3(x - 1, y, 0)).x;
float x = frag_in.pos.x * 3.0;
float y = frag_in.pos.y;
float b = image.Load(int3(x - 1.0, y, 0)).x;
float g = image.Load(int3(x, y, 0)).x;
float r = image.Load(int3(x + 1, y, 0)).x;
float r = image.Load(int3(x + 1.0, y, 0)).x;
float3 rgb = float3(r, g, b);
rgb = saturate((rgb - (16.0 / 255.0)) * (255.0 / 219.0));
return float4(rgb, 1.0);
rgb = (255.0 / 219.0) * rgb - (16.0 / 219.0);
return rgb;
}
float4 PSBGR3_Full(FragTex frag_in) : TARGET
float3 PSBGR3_Full(FragPos frag_in) : TARGET
{
int x = int(frag_in.uv.x * width * 3.0 + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
float b = image.Load(int3(x - 1, y, 0)).x;
float x = frag_in.pos.x * 3.0;
float y = frag_in.pos.y;
float b = image.Load(int3(x - 1.0, y, 0)).x;
float g = image.Load(int3(x, y, 0)).x;
float r = image.Load(int3(x + 1, y, 0)).x;
float r = image.Load(int3(x + 1.0, y, 0)).x;
float3 rgb = float3(r, g, b);
return float4(rgb, 1.0);
return rgb;
}
technique Planar_Y
@ -333,7 +343,7 @@ technique Planar_U
{
pass
{
vertex_shader = VSTexPos(id);
vertex_shader = VSPos(id);
pixel_shader = PS_U(frag_in);
}
}
@ -342,7 +352,7 @@ technique Planar_V
{
pass
{
vertex_shader = VSTexPos(id);
vertex_shader = VSPos(id);
pixel_shader = PS_V(frag_in);
}
}
@ -351,7 +361,7 @@ technique Planar_U_Left
{
pass
{
vertex_shader = VSTexPosLeft(id);
vertex_shader = VSTexPos_Left(id);
pixel_shader = PS_U_Wide(frag_in);
}
}
@ -360,7 +370,7 @@ technique Planar_V_Left
{
pass
{
vertex_shader = VSTexPosLeft(id);
vertex_shader = VSTexPos_Left(id);
pixel_shader = PS_V_Wide(frag_in);
}
}
@ -378,7 +388,7 @@ technique NV12_UV
{
pass
{
vertex_shader = VSTexPosLeft(id);
vertex_shader = VSTexPos_Left(id);
pixel_shader = PS_UV_Wide(frag_in);
}
}
@ -387,8 +397,8 @@ technique UYVY_Reverse
{
pass
{
vertex_shader = VSTexPos(id);
pixel_shader = PSPacked422_Reverse(frag_in, 2, 0, 1, 3);
vertex_shader = VSTexPosHalf_Reverse(id);
pixel_shader = PSUYVY_Reverse(frag_in);
}
}
@ -396,8 +406,8 @@ technique YUY2_Reverse
{
pass
{
vertex_shader = VSTexPos(id);
pixel_shader = PSPacked422_Reverse(frag_in, 1, 3, 2, 0);
vertex_shader = VSTexPosHalf_Reverse(id);
pixel_shader = PSYUY2_Reverse(frag_in);
}
}
@ -405,8 +415,8 @@ technique YVYU_Reverse
{
pass
{
vertex_shader = VSTexPos(id);
pixel_shader = PSPacked422_Reverse(frag_in, 3, 1, 2, 0);
vertex_shader = VSTexPosHalf_Reverse(id);
pixel_shader = PSYVYU_Reverse(frag_in);
}
}
@ -414,7 +424,7 @@ technique I420_Reverse
{
pass
{
vertex_shader = VSTexPos(id);
vertex_shader = VSTexPosHalfHalf_Reverse(id);
pixel_shader = PSPlanar420_Reverse(frag_in);
}
}
@ -423,7 +433,7 @@ technique I422_Reverse
{
pass
{
vertex_shader = VSTexPos(id);
vertex_shader = VSPosWide_Reverse(id);
pixel_shader = PSPlanar422_Reverse(frag_in);
}
}
@ -432,7 +442,7 @@ technique I444_Reverse
{
pass
{
vertex_shader = VSTexPos(id);
vertex_shader = VSPos(id);
pixel_shader = PSPlanar444_Reverse(frag_in);
}
}
@ -441,7 +451,7 @@ technique NV12_Reverse
{
pass
{
vertex_shader = VSTexPos(id);
vertex_shader = VSTexPosHalfHalf_Reverse(id);
pixel_shader = PSNV12_Reverse(frag_in);
}
}
@ -450,7 +460,7 @@ technique Y800_Limited
{
pass
{
vertex_shader = VSTexPos(id);
vertex_shader = VSPos(id);
pixel_shader = PSY800_Limited(frag_in);
}
}
@ -459,7 +469,7 @@ technique Y800_Full
{
pass
{
vertex_shader = VSTexPos(id);
vertex_shader = VSPos(id);
pixel_shader = PSY800_Full(frag_in);
}
}
@ -468,7 +478,7 @@ technique RGB_Limited
{
pass
{
vertex_shader = VSTexPos(id);
vertex_shader = VSPos(id);
pixel_shader = PSRGB_Limited(frag_in);
}
}
@ -477,7 +487,7 @@ technique BGR3_Limited
{
pass
{
vertex_shader = VSTexPos(id);
vertex_shader = VSPos(id);
pixel_shader = PSBGR3_Limited(frag_in);
}
}
@ -486,7 +496,7 @@ technique BGR3_Full
{
pass
{
vertex_shader = VSTexPos(id);
vertex_shader = VSPos(id);
pixel_shader = PSBGR3_Full(frag_in);
}
}

View file

@ -628,7 +628,7 @@ struct obs_source {
float balance;
/* async video data */
gs_texture_t *async_texture;
gs_texture_t *async_textures[MAX_AV_PLANES];
gs_texrender_t *async_texrender;
struct obs_source_frame *cur_async_frame;
bool async_gpu_conversion;
@ -636,8 +636,8 @@ struct obs_source {
bool async_full_range;
enum video_format async_cache_format;
bool async_cache_full_range;
enum gs_color_format async_texture_format;
int async_plane_offset[2];
enum gs_color_format async_texture_formats[MAX_AV_PLANES];
int async_channel_count;
bool async_flip;
bool async_active;
bool async_update_texture;
@ -651,15 +651,15 @@ struct obs_source {
uint32_t async_height;
uint32_t async_cache_width;
uint32_t async_cache_height;
uint32_t async_convert_width;
uint32_t async_convert_height;
uint32_t async_convert_width[MAX_AV_PLANES];
uint32_t async_convert_height[MAX_AV_PLANES];
/* async video deinterlacing */
uint64_t deinterlace_offset;
uint64_t deinterlace_frame_ts;
gs_effect_t *deinterlace_effect;
struct obs_source_frame *prev_async_frame;
gs_texture_t *async_prev_texture;
gs_texture_t *async_prev_textures[MAX_AV_PLANES];
gs_texrender_t *async_prev_texrender;
uint32_t deinterlace_half_duration;
enum obs_deinterlace_mode deinterlace_mode;
@ -797,6 +797,10 @@ extern struct obs_source_frame *filter_async_video(obs_source_t *source,
extern bool update_async_texture(struct obs_source *source,
const struct obs_source_frame *frame,
gs_texture_t *tex, gs_texrender_t *texrender);
extern bool update_async_textures(struct obs_source *source,
const struct obs_source_frame *frame,
gs_texture_t *tex[MAX_AV_PLANES],
gs_texrender_t *texrender);
extern bool set_async_texture_size(struct obs_source *source,
const struct obs_source_frame *frame);
extern void remove_async_frame(obs_source_t *source,

View file

@ -236,16 +236,18 @@ void set_deinterlace_texture_size(obs_source_t *source)
source->async_prev_texrender =
gs_texrender_create(GS_BGRX, GS_ZS_NONE);
source->async_prev_texture = gs_texture_create(
source->async_convert_width,
source->async_convert_height,
source->async_texture_format, 1, NULL, GS_DYNAMIC);
for (int c = 0; c < source->async_channel_count; c++)
source->async_prev_textures[c] = gs_texture_create(
source->async_convert_width[c],
source->async_convert_height[c],
source->async_texture_formats[c], 1, NULL,
GS_DYNAMIC);
} else {
enum gs_color_format format =
convert_video_format(source->async_format);
source->async_prev_texture = gs_texture_create(
source->async_prev_textures[0] = gs_texture_create(
source->async_width, source->async_height, format, 1,
NULL, GS_DYNAMIC);
}
@ -286,17 +288,20 @@ void deinterlace_update_async_video(obs_source_t *source)
if (frame) {
if (set_async_texture_size(source, frame)) {
update_async_texture(source, frame,
source->async_prev_texture,
source->async_prev_texrender);
update_async_textures(source, frame,
source->async_prev_textures,
source->async_prev_texrender);
}
obs_source_release_frame(source, frame);
} else if (updated) { /* swap cur/prev if no previous texture */
gs_texture_t *prev_tex = source->async_prev_texture;
source->async_prev_texture = source->async_texture;
source->async_texture = prev_tex;
for (size_t c = 0; c < MAX_AV_PLANES; c++) {
gs_texture_t *prev_tex = source->async_prev_textures[c];
source->async_prev_textures[c] =
source->async_textures[c];
source->async_textures[c] = prev_tex;
}
if (source->async_texrender) {
gs_texrender_t *prev = source->async_prev_texrender;
@ -358,11 +363,11 @@ void deinterlace_render(obs_source_t *s)
gs_texture_t *cur_tex =
s->async_texrender
? gs_texrender_get_texture(s->async_texrender)
: s->async_texture;
: s->async_textures[0];
gs_texture_t *prev_tex =
s->async_prev_texrender
? gs_texrender_get_texture(s->async_prev_texrender)
: s->async_prev_texture;
: s->async_prev_textures[0];
if (!cur_tex || !prev_tex || !s->async_width || !s->async_height)
return;
@ -407,10 +412,14 @@ static void enable_deinterlacing(obs_source_t *source,
static void disable_deinterlacing(obs_source_t *source)
{
obs_enter_graphics();
gs_texture_destroy(source->async_prev_texture);
gs_texture_destroy(source->async_prev_textures[0]);
gs_texture_destroy(source->async_prev_textures[1]);
gs_texture_destroy(source->async_prev_textures[2]);
gs_texrender_destroy(source->async_prev_texrender);
source->deinterlace_mode = OBS_DEINTERLACE_MODE_DISABLE;
source->async_prev_texture = NULL;
source->async_prev_textures[0] = NULL;
source->async_prev_textures[1] = NULL;
source->async_prev_textures[2] = NULL;
source->async_prev_texrender = NULL;
obs_leave_graphics();
}

View file

@ -578,10 +578,10 @@ void obs_source_destroy(struct obs_source *source)
gs_texrender_destroy(source->async_texrender);
if (source->async_prev_texrender)
gs_texrender_destroy(source->async_prev_texrender);
if (source->async_texture)
gs_texture_destroy(source->async_texture);
if (source->async_prev_texture)
gs_texture_destroy(source->async_prev_texture);
for (size_t c = 0; c < MAX_AV_PLANES; c++) {
gs_texture_destroy(source->async_textures[c]);
gs_texture_destroy(source->async_prev_textures[c]);
}
if (source->filter_texrender)
gs_texrender_destroy(source->filter_texrender);
gs_leave_context();
@ -1381,88 +1381,101 @@ static inline enum convert_type get_convert_type(enum video_format format,
static inline bool set_packed422_sizes(struct obs_source *source,
const struct obs_source_frame *frame)
{
source->async_convert_width = frame->width / 2;
source->async_convert_height = frame->height;
source->async_texture_format = GS_BGRA;
source->async_convert_width[0] = frame->width / 2;
source->async_convert_height[0] = frame->height;
source->async_texture_formats[0] = GS_BGRA;
source->async_channel_count = 1;
return true;
}
static inline bool set_planar444_sizes(struct obs_source *source,
const struct obs_source_frame *frame)
{
source->async_convert_width = frame->width;
source->async_convert_height = frame->height * 3;
source->async_texture_format = GS_R8;
source->async_plane_offset[0] = (int)(frame->data[1] - frame->data[0]);
source->async_plane_offset[1] = (int)(frame->data[2] - frame->data[0]);
source->async_convert_width[0] = frame->width;
source->async_convert_width[1] = frame->width;
source->async_convert_width[2] = frame->width;
source->async_convert_height[0] = frame->height;
source->async_convert_height[1] = frame->height;
source->async_convert_height[2] = frame->height;
source->async_texture_formats[0] = GS_R8;
source->async_texture_formats[1] = GS_R8;
source->async_texture_formats[2] = GS_R8;
source->async_channel_count = 3;
return true;
}
static inline bool set_planar420_sizes(struct obs_source *source,
const struct obs_source_frame *frame)
{
uint32_t size = frame->width * frame->height;
size += size / 2;
source->async_convert_width = frame->width;
source->async_convert_height = size / frame->width;
source->async_texture_format = GS_R8;
source->async_plane_offset[0] = (int)(frame->data[1] - frame->data[0]);
source->async_plane_offset[1] = (int)(frame->data[2] - frame->data[0]);
source->async_convert_width[0] = frame->width;
source->async_convert_width[1] = frame->width / 2;
source->async_convert_width[2] = frame->width / 2;
source->async_convert_height[0] = frame->height;
source->async_convert_height[1] = frame->height / 2;
source->async_convert_height[2] = frame->height / 2;
source->async_texture_formats[0] = GS_R8;
source->async_texture_formats[1] = GS_R8;
source->async_texture_formats[2] = GS_R8;
source->async_channel_count = 3;
return true;
}
static inline bool set_planar422_sizes(struct obs_source *source,
const struct obs_source_frame *frame)
{
uint32_t size = frame->width * frame->height;
size *= 2;
source->async_convert_width = frame->width;
source->async_convert_height = size / frame->width;
source->async_texture_format = GS_R8;
source->async_plane_offset[0] = (int)(frame->data[1] - frame->data[0]);
source->async_plane_offset[1] = (int)(frame->data[2] - frame->data[0]);
source->async_convert_width[0] = frame->width;
source->async_convert_width[1] = frame->width / 2;
source->async_convert_width[2] = frame->width / 2;
source->async_convert_height[0] = frame->height;
source->async_convert_height[1] = frame->height;
source->async_convert_height[2] = frame->height;
source->async_texture_formats[0] = GS_R8;
source->async_texture_formats[1] = GS_R8;
source->async_texture_formats[2] = GS_R8;
source->async_channel_count = 3;
return true;
}
static inline bool set_nv12_sizes(struct obs_source *source,
const struct obs_source_frame *frame)
{
uint32_t size = frame->width * frame->height;
size += size / 2;
source->async_convert_width = frame->width;
source->async_convert_height = size / frame->width;
source->async_texture_format = GS_R8;
source->async_plane_offset[0] = (int)(frame->data[1] - frame->data[0]);
source->async_convert_width[0] = frame->width;
source->async_convert_width[1] = frame->width / 2;
source->async_convert_height[0] = frame->height;
source->async_convert_height[1] = frame->height / 2;
source->async_texture_formats[0] = GS_R8;
source->async_texture_formats[1] = GS_R8G8;
source->async_channel_count = 2;
return true;
}
static inline bool set_y800_sizes(struct obs_source *source,
const struct obs_source_frame *frame)
{
source->async_convert_width = frame->width;
source->async_convert_height = frame->height;
source->async_texture_format = GS_R8;
source->async_convert_width[0] = frame->width;
source->async_convert_height[0] = frame->height;
source->async_texture_formats[0] = GS_R8;
source->async_channel_count = 1;
return true;
}
static inline bool set_rgb_limited_sizes(struct obs_source *source,
const struct obs_source_frame *frame)
{
source->async_convert_width = frame->width;
source->async_convert_height = frame->height;
source->async_texture_format = convert_video_format(frame->format);
source->async_convert_width[0] = frame->width;
source->async_convert_height[0] = frame->height;
source->async_texture_formats[0] = convert_video_format(frame->format);
source->async_channel_count = 1;
return true;
}
static inline bool set_bgr3_sizes(struct obs_source *source,
const struct obs_source_frame *frame)
{
source->async_convert_width = frame->width * 3;
source->async_convert_height = frame->height;
source->async_texture_format = GS_R8;
source->async_convert_width[0] = frame->width * 3;
source->async_convert_height[0] = frame->height;
source->async_texture_formats[0] = GS_R8;
source->async_channel_count = 1;
return true;
}
@ -1521,38 +1534,36 @@ bool set_async_texture_size(struct obs_source *source,
gs_enter_context(obs->video.graphics);
gs_texture_destroy(source->async_texture);
gs_texture_destroy(source->async_prev_texture);
for (size_t c = 0; c < MAX_AV_PLANES; c++) {
gs_texture_destroy(source->async_textures[c]);
source->async_textures[c] = NULL;
gs_texture_destroy(source->async_prev_textures[c]);
source->async_prev_textures[c] = NULL;
}
gs_texrender_destroy(source->async_texrender);
gs_texrender_destroy(source->async_prev_texrender);
source->async_texture = NULL;
source->async_prev_texture = NULL;
source->async_texrender = NULL;
source->async_prev_texrender = NULL;
if (cur != CONVERT_NONE && init_gpu_conversion(source, frame)) {
source->async_gpu_conversion = true;
enum gs_color_format format =
(cur == CONVERT_RGB_LIMITED)
? convert_video_format(frame->format)
: GS_BGRX;
const enum gs_color_format format = convert_video_format(frame->format);
const bool async_gpu_conversion = (cur != CONVERT_NONE) &&
init_gpu_conversion(source, frame);
source->async_gpu_conversion = async_gpu_conversion;
if (async_gpu_conversion) {
source->async_texrender =
gs_texrender_create(format, GS_ZS_NONE);
source->async_texture = gs_texture_create(
source->async_convert_width,
source->async_convert_height,
source->async_texture_format, 1, NULL, GS_DYNAMIC);
for (int c = 0; c < source->async_channel_count; ++c)
source->async_textures[c] = gs_texture_create(
source->async_convert_width[c],
source->async_convert_height[c],
source->async_texture_formats[c], 1, NULL,
GS_DYNAMIC);
} else {
enum gs_color_format format =
convert_video_format(frame->format);
source->async_gpu_conversion = false;
source->async_texture = gs_texture_create(frame->width,
frame->height, format,
1, NULL, GS_DYNAMIC);
source->async_textures[0] =
gs_texture_create(frame->width, frame->height, format,
1, NULL, GS_DYNAMIC);
}
if (deinterlacing_enabled(source))
@ -1560,10 +1571,10 @@ bool set_async_texture_size(struct obs_source *source,
gs_leave_context();
return !!source->async_texture;
return source->async_textures[0] != NULL;
}
static void upload_raw_frame(gs_texture_t *tex,
static void upload_raw_frame(gs_texture_t *tex[MAX_AV_PLANES],
const struct obs_source_frame *frame)
{
switch (get_convert_type(frame->format, frame->full_range)) {
@ -1572,15 +1583,15 @@ static void upload_raw_frame(gs_texture_t *tex,
case CONVERT_800:
case CONVERT_RGB_LIMITED:
case CONVERT_BGR3:
gs_texture_set_image(tex, frame->data[0], frame->linesize[0],
false);
break;
case CONVERT_420:
case CONVERT_422:
case CONVERT_NV12:
case CONVERT_444:
gs_texture_set_image(tex, frame->data[0], frame->width, false);
for (size_t c = 0; c < MAX_AV_PLANES; c++) {
if (tex[c])
gs_texture_set_image(tex[c], frame->data[c],
frame->linesize[c], false);
}
break;
case CONVERT_NONE:
@ -1647,7 +1658,8 @@ static inline void set_eparami(gs_effect_t *effect, const char *name, int val)
static bool update_async_texrender(struct obs_source *source,
const struct obs_source_frame *frame,
gs_texture_t *tex, gs_texrender_t *texrender)
gs_texture_t *tex[MAX_AV_PLANES],
gs_texrender_t *texrender)
{
GS_DEBUG_MARKER_BEGIN(GS_DEBUG_COLOR_CONVERT_FORMAT, "Convert Format");
@ -1658,66 +1670,88 @@ static bool update_async_texrender(struct obs_source *source,
uint32_t cx = source->async_width;
uint32_t cy = source->async_height;
float convert_width = (float)source->async_convert_width;
gs_effect_t *conv = obs->video.conversion_effect;
const char *tech_name =
select_conversion_technique(frame->format, frame->full_range);
gs_technique_t *tech = gs_effect_get_technique(conv, tech_name);
if (!gs_texrender_begin(texrender, cx, cy)) {
GS_DEBUG_MARKER_END();
return false;
const bool success = gs_texrender_begin(texrender, cx, cy);
if (success) {
gs_enable_blending(false);
gs_technique_begin(tech);
gs_technique_begin_pass(tech, 0);
if (tex[0])
gs_effect_set_texture(
gs_effect_get_param_by_name(conv, "image"),
tex[0]);
if (tex[1])
gs_effect_set_texture(
gs_effect_get_param_by_name(conv, "image1"),
tex[1]);
if (tex[2])
gs_effect_set_texture(
gs_effect_get_param_by_name(conv, "image2"),
tex[2]);
set_eparam(conv, "width", (float)cx);
set_eparam(conv, "height", (float)cy);
set_eparam(conv, "width_d2", (float)cx * 0.5f);
set_eparam(conv, "height_d2", (float)cy * 0.5f);
set_eparam(conv, "width_x2_i", 0.5f / (float)cx);
struct vec4 vec0, vec1, vec2;
vec4_set(&vec0, frame->color_matrix[0], frame->color_matrix[1],
frame->color_matrix[2], frame->color_matrix[3]);
vec4_set(&vec1, frame->color_matrix[4], frame->color_matrix[5],
frame->color_matrix[6], frame->color_matrix[7]);
vec4_set(&vec2, frame->color_matrix[8], frame->color_matrix[9],
frame->color_matrix[10], frame->color_matrix[11]);
gs_effect_set_vec4(
gs_effect_get_param_by_name(conv, "color_vec0"), &vec0);
gs_effect_set_vec4(
gs_effect_get_param_by_name(conv, "color_vec1"), &vec1);
gs_effect_set_vec4(
gs_effect_get_param_by_name(conv, "color_vec2"), &vec2);
if (!frame->full_range) {
gs_eparam_t *min_param = gs_effect_get_param_by_name(
conv, "color_range_min");
gs_effect_set_val(min_param, frame->color_range_min,
sizeof(float) * 3);
gs_eparam_t *max_param = gs_effect_get_param_by_name(
conv, "color_range_max");
gs_effect_set_val(max_param, frame->color_range_max,
sizeof(float) * 3);
}
gs_draw(GS_TRIS, 0, 3);
gs_technique_end_pass(tech);
gs_technique_end(tech);
gs_enable_blending(true);
gs_texrender_end(texrender);
}
gs_enable_blending(false);
gs_technique_begin(tech);
gs_technique_begin_pass(tech, 0);
gs_effect_set_texture(gs_effect_get_param_by_name(conv, "image"), tex);
set_eparam(conv, "width", (float)cx);
set_eparam(conv, "height", (float)cy);
set_eparam(conv, "width_d2", cx * 0.5f);
set_eparam(conv, "width_d2_i", 1.0f / (cx * 0.5f));
set_eparam(conv, "input_width_i_d2", (1.0f / convert_width) * 0.5f);
set_eparami(conv, "int_width", (int)cx);
set_eparami(conv, "int_input_width", (int)source->async_convert_width);
set_eparami(conv, "int_u_plane_offset",
(int)source->async_plane_offset[0]);
set_eparami(conv, "int_v_plane_offset",
(int)source->async_plane_offset[1]);
gs_effect_set_val(gs_effect_get_param_by_name(conv, "color_matrix"),
frame->color_matrix, sizeof(float) * 16);
if (!frame->full_range) {
gs_eparam_t *min_param =
gs_effect_get_param_by_name(conv, "color_range_min");
gs_effect_set_val(min_param, frame->color_range_min,
sizeof(float) * 3);
gs_eparam_t *max_param =
gs_effect_get_param_by_name(conv, "color_range_max");
gs_effect_set_val(max_param, frame->color_range_max,
sizeof(float) * 3);
}
gs_draw(GS_TRIS, 0, 3);
gs_technique_end_pass(tech);
gs_technique_end(tech);
gs_enable_blending(true);
gs_texrender_end(texrender);
GS_DEBUG_MARKER_END();
return true;
return success;
}
bool update_async_texture(struct obs_source *source,
const struct obs_source_frame *frame,
gs_texture_t *tex, gs_texrender_t *texrender)
{
gs_texture_t *tex3[MAX_AV_PLANES] = {tex, NULL, NULL, NULL,
NULL, NULL, NULL, NULL};
return update_async_textures(source, frame, tex3, texrender);
}
bool update_async_textures(struct obs_source *source,
const struct obs_source_frame *frame,
gs_texture_t *tex[MAX_AV_PLANES],
gs_texrender_t *texrender)
{
enum convert_type type;
@ -1728,7 +1762,7 @@ bool update_async_texture(struct obs_source *source,
type = get_convert_type(frame->format, frame->full_range);
if (type == CONVERT_NONE) {
gs_texture_set_image(tex, frame->data[0], frame->linesize[0],
gs_texture_set_image(tex[0], frame->data[0], frame->linesize[0],
false);
return true;
}
@ -1739,7 +1773,7 @@ bool update_async_texture(struct obs_source *source,
static inline void obs_source_draw_texture(struct obs_source *source,
gs_effect_t *effect)
{
gs_texture_t *tex = source->async_texture;
gs_texture_t *tex = source->async_textures[0];
gs_eparam_t *param;
if (source->async_texrender)
@ -1790,9 +1824,9 @@ static void obs_source_update_async_video(obs_source_t *source)
}
if (source->async_update_texture) {
update_async_texture(source, frame,
source->async_texture,
source->async_texrender);
update_async_textures(source, frame,
source->async_textures,
source->async_texrender);
source->async_update_texture = false;
}
@ -1803,7 +1837,7 @@ static void obs_source_update_async_video(obs_source_t *source)
static inline void obs_source_render_async_video(obs_source_t *source)
{
if (source->async_texture && source->async_active)
if (source->async_textures[0] && source->async_active)
obs_source_draw_async_texture(source);
}
@ -2575,8 +2609,8 @@ obs_source_preload_video_internal(obs_source_t *source,
copy_frame_data(source->async_preload_frame, frame);
set_async_texture_size(source, source->async_preload_frame);
update_async_texture(source, source->async_preload_frame,
source->async_texture, source->async_texrender);
update_async_textures(source, source->async_preload_frame,
source->async_textures, source->async_texrender);
source->last_frame_ts = frame->timestamp;

View file

@ -293,37 +293,37 @@ static void render_convert_texture(struct obs_core_video *video,
profile_start(render_convert_texture_name);
gs_effect_t *effect = video->conversion_effect;
gs_eparam_t *color_vec_y =
gs_effect_get_param_by_name(effect, "color_vec_y");
gs_eparam_t *color_vec_u =
gs_effect_get_param_by_name(effect, "color_vec_u");
gs_eparam_t *color_vec_v =
gs_effect_get_param_by_name(effect, "color_vec_v");
gs_eparam_t *color_vec0 =
gs_effect_get_param_by_name(effect, "color_vec0");
gs_eparam_t *color_vec1 =
gs_effect_get_param_by_name(effect, "color_vec1");
gs_eparam_t *color_vec2 =
gs_effect_get_param_by_name(effect, "color_vec2");
gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
gs_eparam_t *width_i = gs_effect_get_param_by_name(effect, "width_i");
struct vec4 vec_y, vec_u, vec_v;
vec4_set(&vec_y, video->color_matrix[4], video->color_matrix[5],
struct vec4 vec0, vec1, vec2;
vec4_set(&vec0, video->color_matrix[4], video->color_matrix[5],
video->color_matrix[6], video->color_matrix[7]);
vec4_set(&vec_u, video->color_matrix[0], video->color_matrix[1],
vec4_set(&vec1, video->color_matrix[0], video->color_matrix[1],
video->color_matrix[2], video->color_matrix[3]);
vec4_set(&vec_v, video->color_matrix[8], video->color_matrix[9],
vec4_set(&vec2, video->color_matrix[8], video->color_matrix[9],
video->color_matrix[10], video->color_matrix[11]);
gs_enable_blending(false);
if (video->convert_textures[0]) {
gs_effect_set_texture(image, texture);
gs_effect_set_vec4(color_vec_y, &vec_y);
gs_effect_set_vec4(color_vec0, &vec0);
render_convert_plane(effect, texture,
video->convert_textures[0],
video->conversion_techs[0]);
if (video->convert_textures[1]) {
gs_effect_set_texture(image, texture);
gs_effect_set_vec4(color_vec_u, &vec_u);
gs_effect_set_vec4(color_vec1, &vec1);
if (!video->convert_textures[2])
gs_effect_set_vec4(color_vec_v, &vec_v);
gs_effect_set_vec4(color_vec2, &vec2);
gs_effect_set_float(width_i, video->conversion_width_i);
render_convert_plane(effect, texture,
video->convert_textures[1],
@ -331,7 +331,7 @@ static void render_convert_texture(struct obs_core_video *video,
if (video->convert_textures[2]) {
gs_effect_set_texture(image, texture);
gs_effect_set_vec4(color_vec_v, &vec_v);
gs_effect_set_vec4(color_vec2, &vec2);
gs_effect_set_float(width_i,
video->conversion_width_i);
render_convert_plane(