This is an automated email from the git hooks/post-receive script.
pierov pushed a commit to branch geckoview-99.0.1-11.0-1 in repository tor-browser.
commit 8259e9ab200d8ce8cbe154ff3ee2bd6e4f3f3dac Author: Lee Salzman lsalzman@mozilla.com AuthorDate: Thu Mar 10 16:36:05 2022 +0000
Bug 1758736 - Support RG16 textures in SWGL. r=bradwerth a=dmeehan
Differential Revision: https://phabricator.services.mozilla.com/D140716 --- gfx/webrender_bindings/RenderTextureHostSWGL.cpp | 17 +++- gfx/wr/swgl/src/gl.cc | 6 ++ gfx/wr/swgl/src/gl_defs.h | 1 + gfx/wr/swgl/src/glsl.h | 2 +- gfx/wr/swgl/src/swgl_ext.h | 17 +++- gfx/wr/swgl/src/texture.h | 121 +++++++++++++++++++++++ gfx/wr/webrender/res/brush_yuv_image.glsl | 4 +- gfx/wr/webrender/res/composite.glsl | 6 +- gfx/wr/webrender/res/yuv.glsl | 65 +++++------- 9 files changed, 190 insertions(+), 49 deletions(-)
diff --git a/gfx/webrender_bindings/RenderTextureHostSWGL.cpp b/gfx/webrender_bindings/RenderTextureHostSWGL.cpp index 17f911fd28740..0e1b869d48f5b 100644 --- a/gfx/webrender_bindings/RenderTextureHostSWGL.cpp +++ b/gfx/webrender_bindings/RenderTextureHostSWGL.cpp @@ -55,8 +55,20 @@ bool RenderTextureHostSWGL::UpdatePlanes(RenderCompositor* aCompositor, } break; case gfx::SurfaceFormat::NV12: - MOZ_ASSERT(colorDepth == gfx::ColorDepth::COLOR_8); - internalFormat = i > 0 ? LOCAL_GL_RG8 : LOCAL_GL_R8; + switch (colorDepth) { + case gfx::ColorDepth::COLOR_8: + internalFormat = i > 0 ? LOCAL_GL_RG8 : LOCAL_GL_R8; + break; + case gfx::ColorDepth::COLOR_10: + case gfx::ColorDepth::COLOR_12: + case gfx::ColorDepth::COLOR_16: + internalFormat = i > 0 ? LOCAL_GL_RG16 : LOCAL_GL_R16; + break; + } + break; + case gfx::SurfaceFormat::P010: + MOZ_ASSERT(colorDepth == gfx::ColorDepth::COLOR_10); + internalFormat = i > 0 ? LOCAL_GL_RG16 : LOCAL_GL_R16; break; case gfx::SurfaceFormat::YUV422: MOZ_ASSERT(colorDepth == gfx::ColorDepth::COLOR_8); @@ -164,6 +176,7 @@ bool RenderTextureHostSWGL::LockSWGLCompositeSurface( switch (GetFormat()) { case gfx::SurfaceFormat::YUV: case gfx::SurfaceFormat::NV12: + case gfx::SurfaceFormat::P010: case gfx::SurfaceFormat::YUV422: { aInfo->yuv_planes = mPlanes.size(); auto colorSpace = GetYUVColorSpace(); diff --git a/gfx/wr/swgl/src/gl.cc b/gfx/wr/swgl/src/gl.cc index dcf2df547c9eb..2a06d3af5dcc1 100644 --- a/gfx/wr/swgl/src/gl.cc +++ b/gfx/wr/swgl/src/gl.cc @@ -256,6 +256,8 @@ static int bytes_for_internal_format(GLenum internal_format) { return 2; case GL_R16: return 2; + case GL_RG16: + return 4; default: debugf("internal format: %x\n", internal_format); assert(0); @@ -279,6 +281,8 @@ static TextureFormat gl_format_to_texture_format(int type) { return TextureFormat::RG8; case GL_R16: return TextureFormat::R16; + case GL_RG16: + return TextureFormat::RG16; case GL_RGB_RAW_422_APPLE: return TextureFormat::YUV422; default: @@ -1745,6 +1749,8 @@ GLenum internal_format_for_data(GLenum format, GLenum ty) { return GL_RGB_RAW_422_APPLE; } else if (format == GL_RED && ty == GL_UNSIGNED_SHORT) { return GL_R16; + } else if (format == GL_RG && ty == GL_UNSIGNED_SHORT) { + return GL_RG16; } else { debugf("unknown internal format for format %x, type %x\n", format, ty); assert(false); diff --git a/gfx/wr/swgl/src/gl_defs.h b/gfx/wr/swgl/src/gl_defs.h index 75eb6ca35981e..b60eaad0ece78 100644 --- a/gfx/wr/swgl/src/gl_defs.h +++ b/gfx/wr/swgl/src/gl_defs.h @@ -34,6 +34,7 @@ typedef intptr_t GLintptr; #define GL_RGBA8 0x8058 #define GL_R8 0x8229 #define GL_R16 0x822A +#define GL_RG16 0x822C #define GL_RGBA32I 0x8D82 #define GL_BGRA8 0x93A1 #define GL_RG8 0x822B diff --git a/gfx/wr/swgl/src/glsl.h b/gfx/wr/swgl/src/glsl.h index 3be1e49d1b2a1..3f6a59a0beacc 100644 --- a/gfx/wr/swgl/src/glsl.h +++ b/gfx/wr/swgl/src/glsl.h @@ -8,7 +8,7 @@
namespace glsl {
-enum TextureFormat { RGBA32F, RGBA32I, RGBA8, R8, RG8, R16, YUV422 }; +enum TextureFormat { RGBA32F, RGBA32I, RGBA8, R8, RG8, R16, RG16, YUV422 };
enum TextureFilter { NEAREST, LINEAR };
diff --git a/gfx/wr/swgl/src/swgl_ext.h b/gfx/wr/swgl/src/swgl_ext.h index d3dee3cb88bea..3c686dab262a4 100644 --- a/gfx/wr/swgl/src/swgl_ext.h +++ b/gfx/wr/swgl/src/swgl_ext.h @@ -1035,7 +1035,7 @@ template <typename S0, typename S1> static ALWAYS_INLINE PackedRGBA8 sampleYUV(S0 sampler0, ivec2 uv0, S1 sampler1, ivec2 uv1, const YUVMatrix& rgb_from_ycbcr, - UNUSED int rescaleFactor) { + int rescaleFactor) { switch (sampler1->format) { case TextureFormat::RG8: { assert(sampler0->format == TextureFormat::R8); @@ -1051,6 +1051,21 @@ static ALWAYS_INLINE PackedRGBA8 sampleYUV(S0 sampler0, ivec2 uv0, S1 sampler1, return convertYUV(rgb_from_ycbcr, y, lowHalf(planar.ba), highHalf(planar.rg)); } + case TextureFormat::RG16: { + assert(sampler0->format == TextureFormat::R16); + // The rescaling factor represents how many bits to add to renormalize the + // texture to 16 bits, and so the color depth is actually 16 minus the + // rescaling factor. + // Need to right shift the sample by the amount of bits over 8 it + // occupies. On output from textureLinearUnpackedR16, we have lost 1 bit + // of precision at the low end already, hence 1 is subtracted from the + // color depth. + int colorDepth = 16 - rescaleFactor; + int rescaleBits = (colorDepth - 1) - 8; + auto y = textureLinearUnpackedR16(sampler0, uv0) >> rescaleBits; + auto uv = textureLinearUnpackedRG16(sampler1, uv1) >> rescaleBits; + return rgb_from_ycbcr.convert(zip(y, y), uv); + } default: assert(false); return PackedRGBA8(0); diff --git a/gfx/wr/swgl/src/texture.h b/gfx/wr/swgl/src/texture.h index 8f6988887da48..3f7ed4a518e61 100644 --- a/gfx/wr/swgl/src/texture.h +++ b/gfx/wr/swgl/src/texture.h @@ -160,6 +160,21 @@ vec4 texelFetchR16(S sampler, ivec2 P) { return vec4(fetchOffsetsR16(sampler, offset), 0.0f, 0.0f, 1.0f); }
+template <typename S> +SI vec4 fetchOffsetsRG16(S sampler, I32 offset) { + U32 pixels = {sampler->buf[offset.x], sampler->buf[offset.y], + sampler->buf[offset.z], sampler->buf[offset.w]}; + Float r = cast(pixels & 0xFFFF) * (1.0f / 65535.0f); + Float g = cast(pixels >> 16) * (1.0f / 65535.0f); + return vec4(r, g, 0.0f, 1.0f); +} + +template <typename S> +vec4 texelFetchRG16(S sampler, ivec2 P) { + I32 offset = P.x + P.y * sampler->stride; + return fetchOffsetsRG16(sampler, offset); +} + SI vec4 fetchOffsetsFloat(const uint32_t* buf, I32 offset) { return pixel_float_to_vec4(*(Float*)&buf[offset.x], *(Float*)&buf[offset.y], *(Float*)&buf[offset.z], *(Float*)&buf[offset.w]); @@ -212,6 +227,8 @@ vec4 texelFetch(sampler2D sampler, ivec2 P, int lod) { return texelFetchRG8(sampler, P); case TextureFormat::R16: return texelFetchR16(sampler, P); + case TextureFormat::RG16: + return texelFetchRG16(sampler, P); case TextureFormat::YUV422: return texelFetchYUV422(sampler, P); default: @@ -301,6 +318,8 @@ vec4 texelFetch(sampler2DRect sampler, ivec2 P) { return texelFetchRG8(sampler, P); case TextureFormat::R16: return texelFetchR16(sampler, P); + case TextureFormat::RG16: + return texelFetchRG16(sampler, P); case TextureFormat::YUV422: return texelFetchYUV422(sampler, P); default: @@ -710,6 +729,104 @@ vec4 textureLinearR16(S sampler, vec2 P) { return vec4(r * (1.0f / 32767.0f), 0.0f, 0.0f, 1.0f); }
+// Samples RG16 texture with linear filtering and returns results packed as +// signed I16. One bit of precision is shifted away from the bottom end to +// accommodate the sign bit, so only 15 bits of precision is left. +template <typename S> +static inline V8<int16_t> textureLinearUnpackedRG16(S sampler, ivec2 i) { + assert(sampler->format == TextureFormat::R16); + + ivec2 frac = i; + i >>= 7; + + I32 row0 = computeRow(sampler, i); + I32 row1 = row0 + computeNextRowOffset(sampler, i); + + I16 fracx = + CONVERT( + ((frac.x & (i.x >= 0)) | (i.x > int32_t(sampler->width) - 2)) & 0x7F, + I16) + << 8; + I16 fracy = computeFracY(frac) << 8; + + // Sample the 2x16 bit data for both rows + auto a0 = unaligned_load<V4<uint16_t>>(&sampler->buf[row0.x]); + auto b0 = unaligned_load<V4<uint16_t>>(&sampler->buf[row0.y]); + auto ab0 = CONVERT(combine(a0, b0) >> 1, V8<int16_t>); + auto c0 = unaligned_load<V4<uint16_t>>(&sampler->buf[row0.z]); + auto d0 = unaligned_load<V4<uint16_t>>(&sampler->buf[row0.w]); + auto cd0 = CONVERT(combine(c0, d0) >> 1, V8<int16_t>); + + auto a1 = unaligned_load<V4<uint16_t>>(&sampler->buf[row1.x]); + auto b1 = unaligned_load<V4<uint16_t>>(&sampler->buf[row1.y]); + auto ab1 = CONVERT(combine(a1, b1) >> 1, V8<int16_t>); + auto c1 = unaligned_load<V4<uint16_t>>(&sampler->buf[row1.z]); + auto d1 = unaligned_load<V4<uint16_t>>(&sampler->buf[row1.w]); + auto cd1 = CONVERT(combine(c1, d1) >> 1, V8<int16_t>); + + // The samples occupy 15 bits and the fraction occupies 15 bits, so that when + // they are multiplied together, the new scaled sample will fit in the high + // 14 bits of the result. It is left shifted once to make it 15 bits again + // for the final multiply. +#if USE_SSE2 + ab0 += bit_cast<V8<int16_t>>(_mm_mulhi_epi16(ab1 - ab0, fracy.xxxxyyyy)) << 1; + cd0 += bit_cast<V8<int16_t>>(_mm_mulhi_epi16(cd1 - cd0, fracy.zzzzwwww)) << 1; +#elif USE_NEON + // NEON has a convenient instruction that does both the multiply and the + // doubling, so doesn't need an extra shift. + ab0 += bit_cast<V8<int16_t>>(vqrdmulhq_s16(ab1 - ab0, fracy.xxxxyyyy)); + cd0 += bit_cast<V8<int16_t>>(vqrdmulhq_s16(cd1 - cd0, fracy.zzzzwwww)); +#else + ab0 += CONVERT((CONVERT(ab1 - ab0, V8<int32_t>) * + CONVERT(fracy.xxxxyyyy, V8<int32_t>)) >> + 16, + V8<int16_t>) + << 1; + cd0 += CONVERT((CONVERT(cd1 - cd0, V8<int32_t>) * + CONVERT(fracy.zzzzwwww, V8<int32_t>)) >> + 16, + V8<int16_t>) + << 1; +#endif + + // ab = a.rgRG,b.rgRG + // cd = c.rgRG,d.rgRG + // ... ac = a.rg,c.rg,a.RG,c.RG + // ... bd = b.rg,d.rg,b.RG,d.RG + auto ac = zip2Low(ab0, cd0); + auto bd = zip2High(ab0, cd0); + // a.rg,b.rg,c.rg,d.rg + // a.RG,b.RG,c.RG,d.RG + auto abcdl = zip2Low(ac, bd); + auto abcdh = zip2High(ac, bd); + // Blend columns +#if USE_SSE2 + abcdl += bit_cast<V8<int16_t>>(_mm_mulhi_epi16(abcdh - abcdl, fracx.xxyyzzww)) + << 1; +#elif USE_NEON + abcdl += bit_cast<V8<int16_t>>(vqrdmulhq_s16(abcdh - abcdl, fracx.xxyyzzww)); +#else + abcdl += CONVERT((CONVERT(abcdh - abcdl, V8<int32_t>) * + CONVERT(fracx.xxyyzzww, V8<int32_t>)) >> + 16, + V8<int16_t>) + << 1; +#endif + + return abcdl; +} + +template <typename S> +vec4 textureLinearRG16(S sampler, vec2 P) { + assert(sampler->format == TextureFormat::RG16); + + ivec2 i(linearQuantize(P, 128, sampler)); + auto rg = bit_cast<V4<int32_t>>(textureLinearUnpackedRG16(sampler, i)); + auto r = cast(rg & 0xFFFF) * (1.0f / 32767.0f); + auto g = cast(rg >> 16) * (1.0f / 32767.0f); + return vec4(r, g, 0.0f, 1.0f); +} + using PackedRGBA32F = V16<float>; using WideRGBA32F = V16<float>;
@@ -854,6 +971,8 @@ SI vec4 texture(sampler2D sampler, vec2 P) { return textureLinearRG8(sampler, P); case TextureFormat::R16: return textureLinearR16(sampler, P); + case TextureFormat::RG16: + return textureLinearRG16(sampler, P); case TextureFormat::YUV422: return textureLinearYUV422(sampler, P); default: @@ -878,6 +997,8 @@ vec4 texture(sampler2DRect sampler, vec2 P) { return textureLinearRG8(sampler, P); case TextureFormat::R16: return textureLinearR16(sampler, P); + case TextureFormat::RG16: + return textureLinearRG16(sampler, P); case TextureFormat::YUV422: return textureLinearYUV422(sampler, P); default: diff --git a/gfx/wr/webrender/res/brush_yuv_image.glsl b/gfx/wr/webrender/res/brush_yuv_image.glsl index 9d4446320f35a..eb41ecb490fbc 100644 --- a/gfx/wr/webrender/res/brush_yuv_image.glsl +++ b/gfx/wr/webrender/res/brush_yuv_image.glsl @@ -55,9 +55,9 @@ void brush_vs( #ifdef SWGL_DRAW_SPAN // swgl_commitTextureLinearYUV needs to know the color space specifier and // also needs to know how many bits of scaling are required to normalize - // HDR textures. + // HDR textures. Note that MSB HDR formats don't need renormalization. vRescaleFactor = 0; - if (prim.channel_bit_depth > 8) { + if (prim.channel_bit_depth > 8 && prim.yuv_format != YUV_FORMAT_P010) { vRescaleFactor = 16 - prim.channel_bit_depth; } // Since SWGL rescales filtered YUV values to 8bpc before yuv->rgb diff --git a/gfx/wr/webrender/res/composite.glsl b/gfx/wr/webrender/res/composite.glsl index 3cfb36de51c83..576199fe5c46e 100644 --- a/gfx/wr/webrender/res/composite.glsl +++ b/gfx/wr/webrender/res/composite.glsl @@ -92,9 +92,9 @@ void main(void) { #ifdef SWGL_DRAW_SPAN // swgl_commitTextureLinearYUV needs to know the color space specifier and // also needs to know how many bits of scaling are required to normalize - // HDR textures. + // HDR textures. Note that MSB HDR formats don't need renormalization. vRescaleFactor = 0; - if (prim.channel_bit_depth > 8) { + if (prim.channel_bit_depth > 8 && prim.yuv_format != YUV_FORMAT_P010) { vRescaleFactor = 16 - prim.channel_bit_depth; } // Since SWGL rescales filtered YUV values to 8bpc before yuv->rgb @@ -208,7 +208,7 @@ void swgl_drawSpanRGBA8() { vYcbcrBias, vRgbFromDebiasedYcbcr, vRescaleFactor); - } else if (vYuvFormat.x == YUV_FORMAT_NV12) { + } else if (vYuvFormat.x == YUV_FORMAT_NV12 || vYuvFormat.x == YUV_FORMAT_P010) { swgl_commitTextureLinearYUV(sColor0, vUV_y, vUVBounds_y, sColor1, vUV_u, vUVBounds_u, vYcbcrBias, diff --git a/gfx/wr/webrender/res/yuv.glsl b/gfx/wr/webrender/res/yuv.glsl index 9fd3af875ef74..064ba3b8afaa4 100644 --- a/gfx/wr/webrender/res/yuv.glsl +++ b/gfx/wr/webrender/res/yuv.glsl @@ -77,75 +77,60 @@ struct YuvColorMatrixInfo {
// -
-vec4 yuv_channel_zero_one_identity(int bit_depth, int format) { - int channel_depth = 8; - if (bit_depth > 8) { - if (format == YUV_FORMAT_P010) { - // This is an msb format. - channel_depth = min(bit_depth, 16); - } else { - // For >8bpc, we get the low bits, not the high bits: - // 10bpc(1.0): 0b0000_0011_1111_1111 - channel_depth = 16; - } - } - - float all_ones_normalized = float((1 << bit_depth) - 1) / float((1 << channel_depth) - 1); +vec4 yuv_channel_zero_one_identity(int bit_depth, float channel_max) { + float all_ones_normalized = float((1 << bit_depth) - 1) / channel_max; return vec4(0.0, 0.0, all_ones_normalized, all_ones_normalized); }
-vec4 yuv_channel_zero_one_narrow_range(int bit_depth, int format) { +vec4 yuv_channel_zero_one_narrow_range(int bit_depth, float channel_max) { // Note: 512/1023 != 128/255 ivec4 zero_one_ints = ivec4(16, 128, 235, 240) << (bit_depth - 8); - - int channel_depth = 8; - if (bit_depth > 8) { - if (format == YUV_FORMAT_P010) { - // This is an msb format. - channel_depth = min(bit_depth, 16); - } else { - // For >8bpc, we get the low bits, not the high bits: - // 10bpc(1.0): 0b0000_0011_1111_1111 - channel_depth = 16; - } - } - - return vec4(zero_one_ints) / float((1 << channel_depth) - 1); + return vec4(zero_one_ints) / channel_max; }
-vec4 yuv_channel_zero_one_full_range(int bit_depth, int format) { - vec4 narrow = yuv_channel_zero_one_narrow_range(bit_depth, format); - vec4 identity = yuv_channel_zero_one_identity(bit_depth, format); - +vec4 yuv_channel_zero_one_full_range(int bit_depth, float channel_max) { + vec4 narrow = yuv_channel_zero_one_narrow_range(bit_depth, channel_max); + vec4 identity = yuv_channel_zero_one_identity(bit_depth, channel_max); return vec4(0.0, narrow.y, identity.z, identity.w); }
YuvColorSamplingInfo get_yuv_color_info(YuvPrimitive prim) { + float channel_max = 255.0; + if (prim.channel_bit_depth > 8) { + if (prim.yuv_format == YUV_FORMAT_P010) { + // This is an MSB format. + channel_max = float((1 << prim.channel_bit_depth) - 1); + } else { + // For >8bpc, we get the low bits, not the high bits: + // 10bpc(1.0): 0b0000_0011_1111_1111 + channel_max = 65535.0; + } + } if (prim.color_space == YUV_COLOR_SPACE_REC601_NARROW) { return YuvColorSamplingInfo(RgbFromYuv_Rec601, - yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, prim.yuv_format)); + yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, channel_max)); } else if (prim.color_space == YUV_COLOR_SPACE_REC601_FULL) { return YuvColorSamplingInfo(RgbFromYuv_Rec601, - yuv_channel_zero_one_full_range(prim.channel_bit_depth, prim.yuv_format)); + yuv_channel_zero_one_full_range(prim.channel_bit_depth, channel_max));
} else if (prim.color_space == YUV_COLOR_SPACE_REC709_NARROW) { return YuvColorSamplingInfo(RgbFromYuv_Rec709, - yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, prim.yuv_format)); + yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, channel_max)); } else if (prim.color_space == YUV_COLOR_SPACE_REC709_FULL) { return YuvColorSamplingInfo(RgbFromYuv_Rec709, - yuv_channel_zero_one_full_range(prim.channel_bit_depth, prim.yuv_format)); + yuv_channel_zero_one_full_range(prim.channel_bit_depth, channel_max));
} else if (prim.color_space == YUV_COLOR_SPACE_REC2020_NARROW) { return YuvColorSamplingInfo(RgbFromYuv_Rec2020, - yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, prim.yuv_format)); + yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, channel_max)); } else if (prim.color_space == YUV_COLOR_SPACE_REC2020_FULL) { return YuvColorSamplingInfo(RgbFromYuv_Rec2020, - yuv_channel_zero_one_full_range(prim.channel_bit_depth, prim.yuv_format)); + yuv_channel_zero_one_full_range(prim.channel_bit_depth, channel_max));
} else { // Identity return YuvColorSamplingInfo(RgbFromYuv_GbrIdentity, - yuv_channel_zero_one_identity(prim.channel_bit_depth, prim.yuv_format)); + yuv_channel_zero_one_identity(prim.channel_bit_depth, channel_max)); } }