diff --git a/modules/betsy/CrossPlatformSettings_piece_all.glsl b/modules/betsy/CrossPlatformSettings_piece_all.glsl deleted file mode 100644 index 001d8e63b23..00000000000 --- a/modules/betsy/CrossPlatformSettings_piece_all.glsl +++ /dev/null @@ -1,75 +0,0 @@ -#define min3(a, b, c) min(a, min(b, c)) -#define max3(a, b, c) max(a, max(b, c)) - -#define float2 vec2 -#define float3 vec3 -#define float4 vec4 - -#define int2 ivec2 -#define int3 ivec3 -#define int4 ivec4 - -#define uint2 uvec2 -#define uint3 uvec3 -#define uint4 uvec4 - -#define float2x2 mat2 -#define float3x3 mat3 -#define float4x4 mat4 -#define ogre_float4x3 mat3x4 - -#define ushort uint -#define ushort3 uint3 -#define ushort4 uint4 - -//Short used for read operations. It's an int in GLSL & HLSL. An ushort in Metal -#define rshort int -#define rshort2 int2 -#define rint int -//Short used for write operations. It's an int in GLSL. An ushort in HLSL & Metal -#define wshort2 int2 -#define wshort3 int3 - -#define toFloat3x3(x) mat3(x) -#define buildFloat3x3(row0, row1, row2) mat3(row0, row1, row2) - -#define mul(x, y) ((x) * (y)) -#define saturate(x) clamp((x), 0.0, 1.0) -#define lerp mix -#define rsqrt inversesqrt -#define INLINE -#define NO_INTERPOLATION_PREFIX flat -#define NO_INTERPOLATION_SUFFIX - -#define PARAMS_ARG_DECL -#define PARAMS_ARG - -#define reversebits bitfieldReverse - -#define OGRE_Sample(tex, sampler, uv) texture(tex, uv) -#define OGRE_SampleLevel(tex, sampler, uv, lod) textureLod(tex, uv, lod) -#define OGRE_SampleArray2D(tex, sampler, uv, arrayIdx) texture(tex, vec3(uv, arrayIdx)) -#define OGRE_SampleArray2DLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec3(uv, arrayIdx), lod) -#define OGRE_SampleArrayCubeLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec4(uv, arrayIdx), lod) -#define OGRE_SampleGrad(tex, sampler, uv, ddx, ddy) textureGrad(tex, uv, ddx, ddy) -#define OGRE_SampleArray2DGrad(tex, sampler, uv, arrayIdx, ddx, ddy) textureGrad(tex, vec3(uv, arrayIdx), ddx, ddy) -#define OGRE_ddx(val) dFdx(val) -#define OGRE_ddy(val) dFdy(val) -#define OGRE_Load2D(tex, iuv, lod) texelFetch(tex, iuv, lod) -#define OGRE_LoadArray2D(tex, iuv, arrayIdx, lod) texelFetch(tex, ivec3(iuv, arrayIdx), lod) -#define OGRE_Load2DMS(tex, iuv, subsample) texelFetch(tex, iuv, subsample) - -#define OGRE_Load3D(tex, iuv, lod) texelFetch(tex, ivec3(iuv), lod) - -#define OGRE_GatherRed(tex, sampler, uv) textureGather(tex, uv, 0) -#define OGRE_GatherGreen(tex, sampler, uv) textureGather(tex, uv, 1) -#define OGRE_GatherBlue(tex, sampler, uv) textureGather(tex, uv, 2) - -#define bufferFetch1(buffer, idx) texelFetch(buffer, idx).x - -#define OGRE_SAMPLER_ARG_DECL(samplerName) -#define OGRE_SAMPLER_ARG(samplerName) - -#define OGRE_Texture3D_float4 sampler3D -#define OGRE_OUT_REF(declType, variableName) out declType variableName -#define OGRE_INOUT_REF(declType, variableName) inout declType variableName diff --git a/modules/betsy/alpha_stitch.glsl b/modules/betsy/alpha_stitch.glsl index 4245b6419c7..fb38ff8505b 100644 --- a/modules/betsy/alpha_stitch.glsl +++ b/modules/betsy/alpha_stitch.glsl @@ -1,12 +1,10 @@ -// RGB and Alpha components of ETC2 RGBA are computed separately. +// RGB and Alpha components of ETC2 RGBA/DXT5 are computed separately. // This compute shader merely stitches them together to form the final result -// It's also used by RG11 driver to stitch two R11 into one RG11 +// It's also used by RG11/BC4 driver to stitch two R11/BC4 into one RG11/BC5 #[compute] #version 450 -#include "CrossPlatformSettings_piece_all.glsl" - layout(local_size_x = 8, // local_size_y = 8, // local_size_z = 1) in; @@ -16,8 +14,8 @@ layout(binding = 1) uniform usampler2D srcAlpha; layout(binding = 2, rgba32ui) uniform restrict writeonly uimage2D dstTexture; void main() { - uint2 rgbBlock = OGRE_Load2D(srcRGB, int2(gl_GlobalInvocationID.xy), 0).xy; - uint2 alphaBlock = OGRE_Load2D(srcAlpha, int2(gl_GlobalInvocationID.xy), 0).xy; + uvec2 rgbBlock = texelFetch(srcRGB, ivec2(gl_GlobalInvocationID.xy), 0).xy; + uvec2 alphaBlock = texelFetch(srcAlpha, ivec2(gl_GlobalInvocationID.xy), 0).xy; - imageStore(dstTexture, int2(gl_GlobalInvocationID.xy), uint4(rgbBlock.xy, alphaBlock.xy)); + imageStore(dstTexture, ivec2(gl_GlobalInvocationID.xy), uvec4(rgbBlock.xy, alphaBlock.xy)); } diff --git a/modules/betsy/bc1.glsl b/modules/betsy/bc1.glsl index 6b6346aacf3..36dbbfb4d97 100644 --- a/modules/betsy/bc1.glsl +++ b/modules/betsy/bc1.glsl @@ -6,7 +6,7 @@ dithered = "#define BC1_DITHER"; #[compute] #version 450 -#include "CrossPlatformSettings_piece_all.glsl" +#VERSION_DEFINES #define FLT_MAX 340282346638528859811704183484516925440.0f @@ -14,8 +14,8 @@ layout(binding = 0) uniform sampler2D srcTex; layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture; layout(std430, binding = 2) readonly restrict buffer globalBuffer { - float2 c_oMatch5[256]; - float2 c_oMatch6[256]; + vec2 c_oMatch5[256]; + vec2 c_oMatch6[256]; }; layout(push_constant, std430) uniform Params { @@ -28,14 +28,14 @@ layout(local_size_x = 8, // local_size_y = 8, // local_size_z = 1) in; -float3 rgb565to888(float rgb565) { - float3 retVal; +vec3 rgb565to888(float rgb565) { + vec3 retVal; retVal.x = floor(rgb565 / 2048.0f); retVal.y = floor(mod(rgb565, 2048.0f) / 32.0f); retVal.z = floor(mod(rgb565, 32.0f)); // This is the correct 565 to 888 conversion: - // rgb = floor( rgb * ( 255.0f / float3( 31.0f, 63.0f, 31.0f ) ) + 0.5f ) + // rgb = floor( rgb * ( 255.0f / vec3( 31.0f, 63.0f, 31.0f ) ) + 0.5f ) // // However stb_dxt follows a different one: // rb = floor( rb * ( 256 / 32 + 8 / 32 ) ); @@ -52,10 +52,10 @@ float3 rgb565to888(float rgb565) { // Perhaps when we make 888 -> 565 -> 888 it doesn't matter // because they end up mapping to the original number - return floor(retVal * float3(8.25f, 4.0625f, 8.25f)); + return floor(retVal * vec3(8.25f, 4.0625f, 8.25f)); } -float rgb888to565(float3 rgbValue) { +float rgb888to565(vec3 rgbValue) { rgbValue.rb = floor(rgbValue.rb * 31.0f / 255.0f + 0.5f); rgbValue.g = floor(rgbValue.g * 63.0f / 255.0f + 0.5f); @@ -63,7 +63,7 @@ float rgb888to565(float3 rgbValue) { } // linear interpolation at 1/3 point between a and b, using desired rounding type -float3 lerp13(float3 a, float3 b) { +vec3 lerp13(vec3 a, vec3 b) { #ifdef STB_DXT_USE_ROUNDING_BIAS // with rounding bias return a + floor((b - a) * (1.0f / 3.0f) + 0.5f); @@ -74,7 +74,7 @@ float3 lerp13(float3 a, float3 b) { } /// Unpacks a block of 4 colors from two 16-bit endpoints -void EvalColors(out float3 colors[4], float c0, float c1) { +void EvalColors(out vec3 colors[4], float c0, float c1) { colors[0] = rgb565to888(c0); colors[1] = rgb565to888(c1); colors[2] = lerp13(colors[0], colors[1]); @@ -89,13 +89,13 @@ void EvalColors(out float3 colors[4], float c0, float c1) { */ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16, out float outMaxEndp16) { // determine color distribution - float3 avgColor; - float3 minColor; - float3 maxColor; + vec3 avgColor; + vec3 minColor; + vec3 maxColor; avgColor = minColor = maxColor = unpackUnorm4x8(srcPixelsBlock[0]).xyz; for (int i = 1; i < 16; ++i) { - const float3 currColorUnorm = unpackUnorm4x8(srcPixelsBlock[i]).xyz; + const vec3 currColorUnorm = unpackUnorm4x8(srcPixelsBlock[i]).xyz; avgColor += currColorUnorm; minColor = min(minColor, currColorUnorm); maxColor = max(maxColor, currColorUnorm); @@ -112,8 +112,8 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16, } for (int i = 0; i < 16; ++i) { - const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f; - float3 rgbDiff = currColor - avgColor; + const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f; + vec3 rgbDiff = currColor - avgColor; cov[0] += rgbDiff.r * rgbDiff.r; cov[1] += rgbDiff.r * rgbDiff.g; @@ -128,7 +128,7 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16, cov[i] /= 255.0f; } - float3 vF = maxColor - minColor; + vec3 vF = maxColor - minColor; const int nIterPower = 4; for (int iter = 0; iter < nIterPower; ++iter) { @@ -141,8 +141,8 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16, vF.b = b; } - float magn = max3(abs(vF.r), abs(vF.g), abs(vF.b)); - float3 v; + float magn = max(abs(vF.r), max(abs(vF.g), abs(vF.b))); + vec3 v; if (magn < 4.0f) { // too small, default to luminance v.r = 299.0f; // JPEG YCbCr luma coefs, scaled by 1000. @@ -153,11 +153,11 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16, } // Pick colors at extreme points - float3 minEndpoint, maxEndpoint; + vec3 minEndpoint, maxEndpoint; float minDot = FLT_MAX; float maxDot = -FLT_MAX; for (int i = 0; i < 16; ++i) { - const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f; + const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f; const float dotValue = dot(currColor, v); if (dotValue < minDot) { @@ -176,9 +176,9 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16, } // The color matching function -uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) { +uint MatchColorsBlock(const uint srcPixelsBlock[16], vec3 color[4]) { uint mask = 0u; - float3 dir = color[0] - color[1]; + vec3 dir = color[0] - color[1]; float stops[4]; for (int i = 0; i < 4; ++i) { @@ -200,7 +200,7 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) { #ifndef BC1_DITHER // the version without dithering is straightforward for (uint i = 16u; i-- > 0u;) { - const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f; + const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f; const float dotValue = dot(currColor, dir); mask <<= 2u; @@ -213,8 +213,8 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) { } #else // with floyd-steinberg dithering - float4 ep1 = float4(0, 0, 0, 0); - float4 ep2 = float4(0, 0, 0, 0); + vec4 ep1 = vec4(0, 0, 0, 0); + vec4 ep2 = vec4(0, 0, 0, 0); c0Point *= 16.0f; halfPoint *= 16.0f; @@ -224,7 +224,7 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) { float ditherDot; uint lmask, step; - float3 currColor; + vec3 currColor; float dotValue; currColor = unpackUnorm4x8(srcPixelsBlock[y * 4 + 0]).xyz * 255.0f; @@ -277,7 +277,7 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) { mask |= lmask << (y * 8u); { - float4 tmp = ep1; + vec4 tmp = ep1; ep1 = ep2; ep2 = tmp; } // swap @@ -300,7 +300,7 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE { // yes, linear system would be singular; solve using optimal // single-color match on average color - float3 rgbVal = float3(8.0f / 255.0f, 8.0f / 255.0f, 8.0f / 255.0f); + vec3 rgbVal = vec3(8.0f / 255.0f, 8.0f / 255.0f, 8.0f / 255.0f); for (int i = 0; i < 16; ++i) { rgbVal += unpackUnorm4x8(srcPixelsBlock[i]).xyz; } @@ -322,10 +322,10 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE float akku = 0.0f; uint cm = mask; - float3 at1 = float3(0, 0, 0); - float3 at2 = float3(0, 0, 0); + vec3 at1 = vec3(0, 0, 0); + vec3 at2 = vec3(0, 0, 0); for (int i = 0; i < 16; ++i, cm >>= 2u) { - const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f; + const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f; const uint step = cm & 3u; const float w1 = w1Tab[step]; @@ -341,17 +341,17 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE const float yy = floor(mod(akku, 65535.0f) / 256.0f); const float xy = mod(akku, 256.0f); - float2 f_rb_g; + vec2 f_rb_g; f_rb_g.x = 3.0f * 31.0f / 255.0f / (xx * yy - xy * xy); f_rb_g.y = f_rb_g.x * 63.0f / 31.0f; // solve. - const float3 newMaxVal = clamp(floor((at1 * yy - at2 * xy) * f_rb_g.xyx + 0.5f), - float3(0.0f, 0.0f, 0.0f), float3(31, 63, 31)); + const vec3 newMaxVal = clamp(floor((at1 * yy - at2 * xy) * f_rb_g.xyx + 0.5f), + vec3(0.0f, 0.0f, 0.0f), vec3(31, 63, 31)); newMax16 = newMaxVal.x * 2048.0f + newMaxVal.y * 32.0f + newMaxVal.z; - const float3 newMinVal = clamp(floor((at2 * xx - at1 * xy) * f_rb_g.xyx + 0.5f), - float3(0.0f, 0.0f, 0.0f), float3(31, 63, 31)); + const vec3 newMinVal = clamp(floor((at2 * xx - at1 * xy) * f_rb_g.xyx + 0.5f), + vec3(0.0f, 0.0f, 0.0f), vec3(31, 63, 31)); newMin16 = newMinVal.x * 2048.0f + newMinVal.y * 32.0f + newMinVal.z; } @@ -364,48 +364,48 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE #ifdef BC1_DITHER /// Quantizes 'srcValue' which is originally in 888 (full range), /// converting it to 565 and then back to 888 (quantized) -float3 quant(float3 srcValue) { +vec3 quant(vec3 srcValue) { srcValue = clamp(srcValue, 0.0f, 255.0f); // Convert 888 -> 565 - srcValue = floor(srcValue * float3(31.0f / 255.0f, 63.0f / 255.0f, 31.0f / 255.0f) + 0.5f); + srcValue = floor(srcValue * vec3(31.0f / 255.0f, 63.0f / 255.0f, 31.0f / 255.0f) + 0.5f); // Convert 565 -> 888 back - srcValue = floor(srcValue * float3(8.25f, 4.0625f, 8.25f)); + srcValue = floor(srcValue * vec3(8.25f, 4.0625f, 8.25f)); return srcValue; } void DitherBlock(const uint srcPixBlck[16], out uint dthPixBlck[16]) { - float3 ep1[4] = { float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0) }; - float3 ep2[4] = { float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0) }; + vec3 ep1[4] = { vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0) }; + vec3 ep2[4] = { vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0) }; for (uint y = 0u; y < 16u; y += 4u) { - float3 srcPixel, dithPixel; + vec3 srcPixel, dithPixel; srcPixel = unpackUnorm4x8(srcPixBlck[y + 0u]).xyz * 255.0f; dithPixel = quant(srcPixel + trunc((3 * ep2[1] + 5 * ep2[0]) * (1.0f / 16.0f))); ep1[0] = srcPixel - dithPixel; - dthPixBlck[y + 0u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f)); + dthPixBlck[y + 0u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f)); srcPixel = unpackUnorm4x8(srcPixBlck[y + 1u]).xyz * 255.0f; dithPixel = quant( srcPixel + trunc((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) * (1.0f / 16.0f))); ep1[1] = srcPixel - dithPixel; - dthPixBlck[y + 1u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f)); + dthPixBlck[y + 1u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f)); srcPixel = unpackUnorm4x8(srcPixBlck[y + 2u]).xyz * 255.0f; dithPixel = quant( srcPixel + trunc((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) * (1.0f / 16.0f))); ep1[2] = srcPixel - dithPixel; - dthPixBlck[y + 2u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f)); + dthPixBlck[y + 2u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f)); srcPixel = unpackUnorm4x8(srcPixBlck[y + 3u]).xyz * 255.0f; dithPixel = quant(srcPixel + trunc((7 * ep1[2] + 5 * ep2[3] + ep2[2]) * (1.0f / 16.0f))); ep1[3] = srcPixel - dithPixel; - dthPixBlck[y + 3u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f)); + dthPixBlck[y + 3u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f)); // swap( ep1, ep2 ) for (uint i = 0u; i < 4u; ++i) { - float3 tmp = ep1[i]; + vec3 tmp = ep1[i]; ep1[i] = ep2[i]; ep2[i] = tmp; } @@ -419,11 +419,11 @@ void main() { bool bAllColorsEqual = true; // Load the whole 4x4 block - const uint2 pixelsToLoadBase = gl_GlobalInvocationID.xy << 2u; + const uvec2 pixelsToLoadBase = gl_GlobalInvocationID.xy << 2u; for (uint i = 0u; i < 16u; ++i) { - const uint2 pixelsToLoad = pixelsToLoadBase + uint2(i & 0x03u, i >> 2u); - const float3 srcPixels0 = OGRE_Load2D(srcTex, int2(pixelsToLoad), 0).xyz; - srcPixelsBlock[i] = packUnorm4x8(float4(srcPixels0, 1.0f)); + const uvec2 pixelsToLoad = pixelsToLoadBase + uvec2(i & 0x03u, i >> 2u); + const vec3 srcPixels0 = texelFetch(srcTex, ivec2(pixelsToLoad), 0).xyz; + srcPixelsBlock[i] = packUnorm4x8(vec4(srcPixels0, 1.0f)); bAllColorsEqual = bAllColorsEqual && srcPixelsBlock[0] == srcPixelsBlock[i]; } @@ -431,7 +431,7 @@ void main() { uint mask = 0u; if (bAllColorsEqual) { - const uint3 rgbVal = uint3(unpackUnorm4x8(srcPixelsBlock[0]).xyz * 255.0f); + const uvec3 rgbVal = uvec3(unpackUnorm4x8(srcPixelsBlock[0]).xyz * 255.0f); mask = 0xAAAAAAAAu; maxEndp16 = c_oMatch5[rgbVal.r][0] * 2048.0f + c_oMatch6[rgbVal.g][0] * 32.0f + c_oMatch5[rgbVal.b][0]; @@ -449,7 +449,7 @@ void main() { // second step: pca+map along principal axis OptimizeColorsBlock(ditherPixelsBlock, minEndp16, maxEndp16); if (minEndp16 != maxEndp16) { - float3 colors[4]; + vec3 colors[4]; EvalColors(colors, maxEndp16, minEndp16); // Note min/max are inverted mask = MatchColorsBlock(srcPixelsBlock, colors); } @@ -461,7 +461,7 @@ void main() { if (RefineBlock(ditherPixelsBlock, mask, minEndp16, maxEndp16)) { if (minEndp16 != maxEndp16) { - float3 colors[4]; + vec3 colors[4]; EvalColors(colors, maxEndp16, minEndp16); // Note min/max are inverted mask = MatchColorsBlock(srcPixelsBlock, colors); } else { @@ -482,10 +482,10 @@ void main() { mask ^= 0x55555555u; } - uint2 outputBytes; + uvec2 outputBytes; outputBytes.x = uint(maxEndp16) | (uint(minEndp16) << 16u); outputBytes.y = mask; - uint2 dstUV = gl_GlobalInvocationID.xy; - imageStore(dstTexture, int2(dstUV), uint4(outputBytes.xy, 0u, 0u)); + uvec2 dstUV = gl_GlobalInvocationID.xy; + imageStore(dstTexture, ivec2(dstUV), uvec4(outputBytes.xy, 0u, 0u)); } diff --git a/modules/betsy/bc4.glsl b/modules/betsy/bc4.glsl index 90151f84ebf..91eba22145b 100644 --- a/modules/betsy/bc4.glsl +++ b/modules/betsy/bc4.glsl @@ -6,12 +6,10 @@ signed = "#define SNORM"; #[compute] #version 450 -#include "CrossPlatformSettings_piece_all.glsl" - #VERSION_DEFINES -shared float2 g_minMaxValues[4u * 4u * 4u]; -shared uint2 g_mask[4u * 4u]; +shared vec2 g_minMaxValues[4u * 4u * 4u]; +shared uvec2 g_mask[4u * 4u]; layout(binding = 0) uniform sampler2D srcTex; layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture; @@ -40,30 +38,30 @@ layout(local_size_x = 4, // /// - Long threads (e.g. 1 thread per block) misses parallelism opportunities void main() { float minVal, maxVal; - float4 srcPixel; + vec4 srcPixel; const uint blockThreadId = gl_LocalInvocationID.x; - const uint2 pixelsToLoadBase = gl_GlobalInvocationID.yz << 2u; + const uvec2 pixelsToLoadBase = gl_GlobalInvocationID.yz << 2u; for (uint i = 0u; i < 4u; ++i) { - const uint2 pixelsToLoad = pixelsToLoadBase + uint2(i, blockThreadId); + const uvec2 pixelsToLoad = pixelsToLoadBase + uvec2(i, blockThreadId); - const float4 value = OGRE_Load2D(srcTex, int2(pixelsToLoad), 0).xyzw; + const vec4 value = texelFetch(srcTex, ivec2(pixelsToLoad), 0).xyzw; srcPixel[i] = params.p_channelIdx == 0 ? value.x : (params.p_channelIdx == 1 ? value.y : value.w); srcPixel[i] *= 255.0f; } - minVal = min3(srcPixel.x, srcPixel.y, srcPixel.z); - maxVal = max3(srcPixel.x, srcPixel.y, srcPixel.z); + minVal = min(srcPixel.x, min(srcPixel.y, srcPixel.z)); + maxVal = max(srcPixel.x, max(srcPixel.y, srcPixel.z)); minVal = min(minVal, srcPixel.w); maxVal = max(maxVal, srcPixel.w); const uint minMaxIdxBase = (gl_LocalInvocationID.z << 4u) + (gl_LocalInvocationID.y << 2u); const uint maskIdxBase = (gl_LocalInvocationID.z << 2u) + gl_LocalInvocationID.y; - g_minMaxValues[minMaxIdxBase + blockThreadId] = float2(minVal, maxVal); - g_mask[maskIdxBase] = uint2(0u, 0u); + g_minMaxValues[minMaxIdxBase + blockThreadId] = vec2(minVal, maxVal); + g_mask[maskIdxBase] = uvec2(0u, 0u); memoryBarrierShared(); barrier(); @@ -133,21 +131,21 @@ void main() { if (blockThreadId == 0u) { // Save data - uint2 outputBytes; + uvec2 outputBytes; #ifdef SNORM outputBytes.x = - packSnorm4x8(float4(maxVal * (1.0f / 255.0f) * 2.0f - 1.0f, + packSnorm4x8(vec4(maxVal * (1.0f / 255.0f) * 2.0f - 1.0f, minVal * (1.0f / 255.0f) * 2.0f - 1.0f, 0.0f, 0.0f)); #else outputBytes.x = packUnorm4x8( - float4(maxVal * (1.0f / 255.0f), minVal * (1.0f / 255.0f), 0.0f, 0.0f)); + vec4(maxVal * (1.0f / 255.0f), minVal * (1.0f / 255.0f), 0.0f, 0.0f)); #endif outputBytes.x |= g_mask[maskIdxBase].x; outputBytes.y = g_mask[maskIdxBase].y; - uint2 dstUV = gl_GlobalInvocationID.yz; - imageStore(dstTexture, int2(dstUV), uint4(outputBytes.xy, 0u, 0u)); + uvec2 dstUV = gl_GlobalInvocationID.yz; + imageStore(dstTexture, ivec2(dstUV), uvec4(outputBytes.xy, 0u, 0u)); } } diff --git a/modules/betsy/bc6h.glsl b/modules/betsy/bc6h.glsl index d00bbafbd22..2738a30ad38 100644 --- a/modules/betsy/bc6h.glsl +++ b/modules/betsy/bc6h.glsl @@ -6,24 +6,22 @@ unsigned = "#define QUALITY"; // The "Quality" preset causes artifacting on sign #[compute] #version 450 -#include "CrossPlatformSettings_piece_all.glsl" - #VERSION_DEFINES -float3 f32tof16(float3 value) { - return float3(packHalf2x16(float2(value.x, 0.0)), - packHalf2x16(float2(value.y, 0.0)), - packHalf2x16(float2(value.z, 0.0))); +vec3 f32tof16(vec3 value) { + return vec3(packHalf2x16(vec2(value.x, 0.0)), + packHalf2x16(vec2(value.y, 0.0)), + packHalf2x16(vec2(value.z, 0.0))); } -float3 f16tof32(uint3 value) { - return float3(unpackHalf2x16(value.x).x, +vec3 f16tof32(uvec3 value) { + return vec3(unpackHalf2x16(value.x).x, unpackHalf2x16(value.y).x, unpackHalf2x16(value.z).x); } float f32tof16(float value) { - return packHalf2x16(float2(value.x, 0.0)); + return packHalf2x16(vec2(value.x, 0.0)); } float f16tof32(uint value) { @@ -34,7 +32,7 @@ layout(binding = 0) uniform sampler2D srcTexture; layout(binding = 1, rgba32ui) uniform restrict writeonly uimage2D dstTexture; layout(push_constant, std430) uniform Params { - float2 p_textureSizeRcp; + vec2 p_textureSizeRcp; uint padding0; uint padding1; } @@ -69,7 +67,7 @@ float CrossCalcMSLE(float a, float b) { return result; } -float CalcMSLE(float3 a, float3 b) { +float CalcMSLE(vec3 a, vec3 b) { float result = 0.0f; if (isNegative(a.x) != isNegative(b.x)) { result += CrossCalcMSLE(a.x, b.x); @@ -91,32 +89,32 @@ float CalcMSLE(float3 a, float3 b) { } // Adapt the log function to make sense when a < 0 -float3 customLog2(float3 a) { - return float3( +vec3 customLog2(vec3 a) { + return vec3( a.x >= 0 ? log2(a.x + 1.0f) : -log2(-a.x + 1.0f), a.y >= 0 ? log2(a.y + 1.0f) : -log2(-a.y + 1.0f), a.z >= 0 ? log2(a.z + 1.0f) : -log2(-a.z + 1.0f)); } // Inverse of customLog2() -float3 customExp2(float3 a) { - return float3( +vec3 customExp2(vec3 a) { + return vec3( a.x >= 0 ? exp2(a.x) - 1.0f : -(exp2(-a.x) - 1.0f), a.y >= 0 ? exp2(a.y) - 1.0f : -(exp2(-a.y) - 1.0f), a.z >= 0 ? exp2(a.z) - 1.0f : -(exp2(-a.z) - 1.0f)); } #else -float CalcMSLE(float3 a, float3 b) { - float3 err = log2((b + 1.0f) / (a + 1.0f)); +float CalcMSLE(vec3 a, vec3 b) { + vec3 err = log2((b + 1.0f) / (a + 1.0f)); err = err * err; return err.x + err.y + err.z; } -float3 customLog2(float3 a) { +vec3 customLog2(vec3 a) { return log2(a + 1.0f); } -float3 customExp2(float3 a) { +vec3 customExp2(vec3 a) { return exp2(a) - 1.0f; } #endif @@ -157,98 +155,98 @@ uint Pattern(uint p, uint i) { #ifndef SIGNED //UF -float3 Quantize7(float3 x) { +vec3 Quantize7(vec3 x) { return (f32tof16(x) * 128.0f) / (0x7bff + 1.0f); } -float3 Quantize9(float3 x) { +vec3 Quantize9(vec3 x) { return (f32tof16(x) * 512.0f) / (0x7bff + 1.0f); } -float3 Quantize10(float3 x) { +vec3 Quantize10(vec3 x) { return (f32tof16(x) * 1024.0f) / (0x7bff + 1.0f); } -float3 Unquantize7(float3 x) { +vec3 Unquantize7(vec3 x) { return (x * 65536.0f + 0x8000) / 128.0f; } -float3 Unquantize9(float3 x) { +vec3 Unquantize9(vec3 x) { return (x * 65536.0f + 0x8000) / 512.0f; } -float3 Unquantize10(float3 x) { +vec3 Unquantize10(vec3 x) { return (x * 65536.0f + 0x8000) / 1024.0f; } -float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) { - float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f); - return f16tof32(uint3(comp)); +vec3 FinishUnquantize(vec3 endpoint0Unq, vec3 endpoint1Unq, float weight) { + vec3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f); + return f16tof32(uvec3(comp)); } #else //SF -float3 cmpSign(float3 value) { - float3 signVal; +vec3 cmpSign(vec3 value) { + vec3 signVal; signVal.x = value.x >= 0.0f ? 1.0f : -1.0f; signVal.y = value.y >= 0.0f ? 1.0f : -1.0f; signVal.z = value.z >= 0.0f ? 1.0f : -1.0f; return signVal; } -float3 Quantize7(float3 x) { - float3 signVal = cmpSign(x); +vec3 Quantize7(vec3 x) { + vec3 signVal = cmpSign(x); return signVal * (f32tof16(abs(x)) * 64.0f) / (0x7bff + 1.0f); } -float3 Quantize9(float3 x) { - float3 signVal = cmpSign(x); +vec3 Quantize9(vec3 x) { + vec3 signVal = cmpSign(x); return signVal * (f32tof16(abs(x)) * 256.0f) / (0x7bff + 1.0f); } -float3 Quantize10(float3 x) { - float3 signVal = cmpSign(x); +vec3 Quantize10(vec3 x) { + vec3 signVal = cmpSign(x); return signVal * (f32tof16(abs(x)) * 512.0f) / (0x7bff + 1.0f); } -float3 Unquantize7(float3 x) { - float3 signVal = sign(x); +vec3 Unquantize7(vec3 x) { + vec3 signVal = sign(x); x = abs(x); - float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 64.0f; + vec3 finalVal = signVal * (x * 32768.0f + 0x4000) / 64.0f; finalVal.x = x.x >= 64.0f ? 32767.0 : finalVal.x; finalVal.y = x.y >= 64.0f ? 32767.0 : finalVal.y; finalVal.z = x.z >= 64.0f ? 32767.0 : finalVal.z; return finalVal; } -float3 Unquantize9(float3 x) { - float3 signVal = sign(x); +vec3 Unquantize9(vec3 x) { + vec3 signVal = sign(x); x = abs(x); - float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 256.0f; + vec3 finalVal = signVal * (x * 32768.0f + 0x4000) / 256.0f; finalVal.x = x.x >= 256.0f ? 32767.0 : finalVal.x; finalVal.y = x.y >= 256.0f ? 32767.0 : finalVal.y; finalVal.z = x.z >= 256.0f ? 32767.0 : finalVal.z; return finalVal; } -float3 Unquantize10(float3 x) { - float3 signVal = sign(x); +vec3 Unquantize10(vec3 x) { + vec3 signVal = sign(x); x = abs(x); - float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 512.0f; + vec3 finalVal = signVal * (x * 32768.0f + 0x4000) / 512.0f; finalVal.x = x.x >= 512.0f ? 32767.0 : finalVal.x; finalVal.y = x.y >= 512.0f ? 32767.0 : finalVal.y; finalVal.z = x.z >= 512.0f ? 32767.0 : finalVal.z; return finalVal; } -float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) { - float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 2048.0f); - return f16tof32(uint3(comp)); +vec3 FinishUnquantize(vec3 endpoint0Unq, vec3 endpoint1Unq, float weight) { + vec3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 2048.0f); + return f16tof32(uvec3(comp)); } #endif -void Swap(inout float3 a, inout float3 b) { - float3 tmp = a; +void Swap(inout vec3 a, inout vec3 b) { + vec3 tmp = a; a = b; b = tmp; } @@ -270,8 +268,8 @@ uint ComputeIndex4(float texelPos, float endPoint0Pos, float endPoint1Pos) { } // This adds a bitflag to quantized values that signifies whether they are negative. -void SignExtend(inout float3 v1, uint mask, uint signFlag) { - int3 v = int3(v1); +void SignExtend(inout vec3 v1, uint mask, uint signFlag) { + ivec3 v = ivec3(v1); v.x = (v.x & int(mask)) | (v.x < 0 ? int(signFlag) : 0); v.y = (v.y & int(mask)) | (v.y < 0 ? int(signFlag) : 0); v.z = (v.z & int(mask)) | (v.z < 0 ? int(signFlag) : 0); @@ -279,38 +277,39 @@ void SignExtend(inout float3 v1, uint mask, uint signFlag) { } // Encodes a block with mode 11 (2x 10-bit endpoints). -void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) { +void EncodeP1(inout uvec4 block, inout float blockMSLE, vec3 texels[16]) { // compute endpoints (min/max RGB bbox) - float3 blockMin = texels[0]; - float3 blockMax = texels[0]; + vec3 blockMin = texels[0]; + vec3 blockMax = texels[0]; for (uint i = 1u; i < 16u; ++i) { blockMin = min(blockMin, texels[i]); blockMax = max(blockMax, texels[i]); } // refine endpoints in log2 RGB space - float3 refinedBlockMin = blockMax; - float3 refinedBlockMax = blockMin; + vec3 refinedBlockMin = blockMax; + vec3 refinedBlockMax = blockMin; for (uint i = 0u; i < 16u; ++i) { refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]); refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]); } - float3 logBlockMax = customLog2(blockMax); - float3 logBlockMin = customLog2(blockMin); - float3 logRefinedBlockMax = customLog2(refinedBlockMax); - float3 logRefinedBlockMin = customLog2(refinedBlockMin); - float3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f); + vec3 logBlockMax = customLog2(blockMax); + vec3 logBlockMin = customLog2(blockMin); + vec3 logRefinedBlockMax = customLog2(refinedBlockMax); + vec3 logRefinedBlockMin = customLog2(refinedBlockMin); + vec3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f); + logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt); logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt); blockMin = customExp2(logBlockMin); blockMax = customExp2(logBlockMax); - float3 blockDir = blockMax - blockMin; + vec3 blockDir = blockMax - blockMin; blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z); - float3 endpoint0 = Quantize10(blockMin); - float3 endpoint1 = Quantize10(blockMax); + vec3 endpoint0 = Quantize10(blockMin); + vec3 endpoint1 = Quantize10(blockMax); float endPoint0Pos = f32tof16(dot(blockMin, blockDir)); float endPoint1Pos = f32tof16(dot(blockMax, blockDir)); @@ -336,12 +335,12 @@ void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) { } // compute compression error (MSLE) - float3 endpoint0Unq = Unquantize10(endpoint0); - float3 endpoint1Unq = Unquantize10(endpoint1); + vec3 endpoint0Unq = Unquantize10(endpoint0); + vec3 endpoint1Unq = Unquantize10(endpoint1); float msle = 0.0f; for (uint i = 0u; i < 16u; ++i) { float weight = floor((indices[i] * 64.0f) / 15.0f + 0.5f); - float3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight); + vec3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight); msle += CalcMSLE(texels[i], texelUnc); } @@ -384,19 +383,19 @@ void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) { block.w |= indices[15] << 28u; } -float DistToLineSq(float3 PointOnLine, float3 LineDirection, float3 Point) { - float3 w = Point - PointOnLine; - float3 x = w - dot(w, LineDirection) * LineDirection; +float DistToLineSq(vec3 PointOnLine, vec3 LineDirection, vec3 Point) { + vec3 w = Point - PointOnLine; + vec3 x = w - dot(w, LineDirection) * LineDirection; return dot(x, x); } // Gets the deviation from the source data of a particular pattern (smaller is better). -float EvaluateP2Pattern(uint pattern, float3 texels[16]) { - float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); - float3 p0BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN); - float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); - float3 p1BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN); +float EvaluateP2Pattern(uint pattern, vec3 texels[16]) { + vec3 p0BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX); + vec3 p0BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN); + vec3 p1BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX); + vec3 p1BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN); for (uint i = 0; i < 16; ++i) { uint paletteID = Pattern(pattern, i); @@ -409,8 +408,8 @@ float EvaluateP2Pattern(uint pattern, float3 texels[16]) { } } - float3 p0BlockDir = normalize(p0BlockMax - p0BlockMin); - float3 p1BlockDir = normalize(p1BlockMax - p1BlockMin); + vec3 p0BlockDir = normalize(p0BlockMax - p0BlockMin); + vec3 p1BlockDir = normalize(p1BlockMax - p1BlockMin); float sqDistanceFromLine = 0.0f; @@ -427,11 +426,11 @@ float EvaluateP2Pattern(uint pattern, float3 texels[16]) { } // Encodes a block with either mode 2 (7-bit base, 3x 6-bit delta), or mode 6 (9-bit base, 3x 5-bit delta). Both use pattern encoding. -void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, float3 texels[16]) { - float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); - float3 p0BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN); - float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); - float3 p1BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN); +void EncodeP2Pattern(inout uvec4 block, inout float blockMSLE, uint pattern, vec3 texels[16]) { + vec3 p0BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX); + vec3 p0BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN); + vec3 p1BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX); + vec3 p1BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN); for (uint i = 0u; i < 16u; ++i) { uint paletteID = Pattern(pattern, i); @@ -444,8 +443,8 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo } } - float3 p0BlockDir = p0BlockMax - p0BlockMin; - float3 p1BlockDir = p1BlockMax - p1BlockMin; + vec3 p0BlockDir = p0BlockMax - p0BlockMin; + vec3 p1BlockDir = p1BlockMax - p1BlockMin; p0BlockDir = p0BlockDir / (p0BlockDir.x + p0BlockDir.y + p0BlockDir.z); p1BlockDir = p1BlockDir / (p1BlockDir.x + p1BlockDir.y + p1BlockDir.z); @@ -479,15 +478,15 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo indices[i] = paletteID == 0u ? p0Index : p1Index; } - float3 endpoint760 = floor(Quantize7(p0BlockMin)); - float3 endpoint761 = floor(Quantize7(p0BlockMax)); - float3 endpoint762 = floor(Quantize7(p1BlockMin)); - float3 endpoint763 = floor(Quantize7(p1BlockMax)); + vec3 endpoint760 = floor(Quantize7(p0BlockMin)); + vec3 endpoint761 = floor(Quantize7(p0BlockMax)); + vec3 endpoint762 = floor(Quantize7(p1BlockMin)); + vec3 endpoint763 = floor(Quantize7(p1BlockMax)); - float3 endpoint950 = floor(Quantize9(p0BlockMin)); - float3 endpoint951 = floor(Quantize9(p0BlockMax)); - float3 endpoint952 = floor(Quantize9(p1BlockMin)); - float3 endpoint953 = floor(Quantize9(p1BlockMax)); + vec3 endpoint950 = floor(Quantize9(p0BlockMin)); + vec3 endpoint951 = floor(Quantize9(p0BlockMax)); + vec3 endpoint952 = floor(Quantize9(p1BlockMin)); + vec3 endpoint953 = floor(Quantize9(p1BlockMax)); endpoint761 = endpoint761 - endpoint760; endpoint762 = endpoint762 - endpoint760; @@ -514,28 +513,28 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo endpoint950 = clamp(endpoint950, -maxVal9, maxVal9); #endif - float3 endpoint760Unq = Unquantize7(endpoint760); - float3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761); - float3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762); - float3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763); - float3 endpoint950Unq = Unquantize9(endpoint950); - float3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951); - float3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952); - float3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953); + vec3 endpoint760Unq = Unquantize7(endpoint760); + vec3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761); + vec3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762); + vec3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763); + vec3 endpoint950Unq = Unquantize9(endpoint950); + vec3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951); + vec3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952); + vec3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953); float msle76 = 0.0f; float msle95 = 0.0f; for (uint i = 0u; i < 16u; ++i) { uint paletteID = Pattern(pattern, i); - float3 tmp760Unq = paletteID == 0u ? endpoint760Unq : endpoint762Unq; - float3 tmp761Unq = paletteID == 0u ? endpoint761Unq : endpoint763Unq; - float3 tmp950Unq = paletteID == 0u ? endpoint950Unq : endpoint952Unq; - float3 tmp951Unq = paletteID == 0u ? endpoint951Unq : endpoint953Unq; + vec3 tmp760Unq = paletteID == 0u ? endpoint760Unq : endpoint762Unq; + vec3 tmp761Unq = paletteID == 0u ? endpoint761Unq : endpoint763Unq; + vec3 tmp950Unq = paletteID == 0u ? endpoint950Unq : endpoint952Unq; + vec3 tmp951Unq = paletteID == 0u ? endpoint951Unq : endpoint953Unq; float weight = floor((indices[i] * 64.0f) / 7.0f + 0.5f); - float3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight); - float3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight); + vec3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight); + vec3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight); msle76 += CalcMSLE(texels[i], texelUnc76); msle95 += CalcMSLE(texels[i], texelUnc95); @@ -558,7 +557,7 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo float p2MSLE = min(msle76, msle95); if (p2MSLE < blockMSLE) { blockMSLE = p2MSLE; - block = uint4(0u, 0u, 0u, 0u); + block = uvec4(0u, 0u, 0u, 0u); if (p2MSLE == msle76) { // 7.6 @@ -681,43 +680,43 @@ void main() { // 4 5 6 7 // 8 9 10 11 // 12 13 14 15 - float2 uv = gl_GlobalInvocationID.xy * params.p_textureSizeRcp * 4.0f + params.p_textureSizeRcp; - float2 block0UV = uv; - float2 block1UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 0.0f); - float2 block2UV = uv + float2(0.0f, 2.0f * params.p_textureSizeRcp.y); - float2 block3UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 2.0f * params.p_textureSizeRcp.y); - float4 block0X = OGRE_GatherRed(srcTexture, pointSampler, block0UV); - float4 block1X = OGRE_GatherRed(srcTexture, pointSampler, block1UV); - float4 block2X = OGRE_GatherRed(srcTexture, pointSampler, block2UV); - float4 block3X = OGRE_GatherRed(srcTexture, pointSampler, block3UV); - float4 block0Y = OGRE_GatherGreen(srcTexture, pointSampler, block0UV); - float4 block1Y = OGRE_GatherGreen(srcTexture, pointSampler, block1UV); - float4 block2Y = OGRE_GatherGreen(srcTexture, pointSampler, block2UV); - float4 block3Y = OGRE_GatherGreen(srcTexture, pointSampler, block3UV); - float4 block0Z = OGRE_GatherBlue(srcTexture, pointSampler, block0UV); - float4 block1Z = OGRE_GatherBlue(srcTexture, pointSampler, block1UV); - float4 block2Z = OGRE_GatherBlue(srcTexture, pointSampler, block2UV); - float4 block3Z = OGRE_GatherBlue(srcTexture, pointSampler, block3UV); + vec2 uv = gl_GlobalInvocationID.xy * params.p_textureSizeRcp * 4.0f + params.p_textureSizeRcp; + vec2 block0UV = uv; + vec2 block1UV = uv + vec2(2.0f * params.p_textureSizeRcp.x, 0.0f); + vec2 block2UV = uv + vec2(0.0f, 2.0f * params.p_textureSizeRcp.y); + vec2 block3UV = uv + vec2(2.0f * params.p_textureSizeRcp.x, 2.0f * params.p_textureSizeRcp.y); + vec4 block0X = textureGather(srcTexture, block0UV, 0); + vec4 block1X = textureGather(srcTexture, block1UV, 0); + vec4 block2X = textureGather(srcTexture, block2UV, 0); + vec4 block3X = textureGather(srcTexture, block3UV, 0); + vec4 block0Y = textureGather(srcTexture, block0UV, 1); + vec4 block1Y = textureGather(srcTexture, block1UV, 1); + vec4 block2Y = textureGather(srcTexture, block2UV, 1); + vec4 block3Y = textureGather(srcTexture, block3UV, 1); + vec4 block0Z = textureGather(srcTexture, block0UV, 2); + vec4 block1Z = textureGather(srcTexture, block1UV, 2); + vec4 block2Z = textureGather(srcTexture, block2UV, 2); + vec4 block3Z = textureGather(srcTexture, block3UV, 2); - float3 texels[16]; - texels[0] = float3(block0X.w, block0Y.w, block0Z.w); - texels[1] = float3(block0X.z, block0Y.z, block0Z.z); - texels[2] = float3(block1X.w, block1Y.w, block1Z.w); - texels[3] = float3(block1X.z, block1Y.z, block1Z.z); - texels[4] = float3(block0X.x, block0Y.x, block0Z.x); - texels[5] = float3(block0X.y, block0Y.y, block0Z.y); - texels[6] = float3(block1X.x, block1Y.x, block1Z.x); - texels[7] = float3(block1X.y, block1Y.y, block1Z.y); - texels[8] = float3(block2X.w, block2Y.w, block2Z.w); - texels[9] = float3(block2X.z, block2Y.z, block2Z.z); - texels[10] = float3(block3X.w, block3Y.w, block3Z.w); - texels[11] = float3(block3X.z, block3Y.z, block3Z.z); - texels[12] = float3(block2X.x, block2Y.x, block2Z.x); - texels[13] = float3(block2X.y, block2Y.y, block2Z.y); - texels[14] = float3(block3X.x, block3Y.x, block3Z.x); - texels[15] = float3(block3X.y, block3Y.y, block3Z.y); + vec3 texels[16]; + texels[0] = vec3(block0X.w, block0Y.w, block0Z.w); + texels[1] = vec3(block0X.z, block0Y.z, block0Z.z); + texels[2] = vec3(block1X.w, block1Y.w, block1Z.w); + texels[3] = vec3(block1X.z, block1Y.z, block1Z.z); + texels[4] = vec3(block0X.x, block0Y.x, block0Z.x); + texels[5] = vec3(block0X.y, block0Y.y, block0Z.y); + texels[6] = vec3(block1X.x, block1Y.x, block1Z.x); + texels[7] = vec3(block1X.y, block1Y.y, block1Z.y); + texels[8] = vec3(block2X.w, block2Y.w, block2Z.w); + texels[9] = vec3(block2X.z, block2Y.z, block2Z.z); + texels[10] = vec3(block3X.w, block3Y.w, block3Z.w); + texels[11] = vec3(block3X.z, block3Y.z, block3Z.z); + texels[12] = vec3(block2X.x, block2Y.x, block2Z.x); + texels[13] = vec3(block2X.y, block2Y.y, block2Z.y); + texels[14] = vec3(block3X.x, block3Y.x, block3Z.x); + texels[15] = vec3(block3X.y, block3Y.y, block3Z.y); - uint4 block = uint4(0u, 0u, 0u, 0u); + uvec4 block = uvec4(0u, 0u, 0u, 0u); float blockMSLE = 0.0f; EncodeP1(block, blockMSLE, texels); @@ -738,5 +737,5 @@ void main() { EncodeP2Pattern(block, blockMSLE, bestPattern, texels); #endif - imageStore(dstTexture, int2(gl_GlobalInvocationID.xy), block); + imageStore(dstTexture, ivec2(gl_GlobalInvocationID.xy), block); }