1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-04 12:00:25 +00:00

Betsy: Remove OGRE aliases

This commit is contained in:
BlueCube3310
2025-02-10 19:03:19 +01:00
parent 9f03bbf908
commit affa27d188
5 changed files with 221 additions and 301 deletions

View File

@@ -1,75 +0,0 @@
#define min3(a, b, c) min(a, min(b, c))
#define max3(a, b, c) max(a, max(b, c))
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define int2 ivec2
#define int3 ivec3
#define int4 ivec4
#define uint2 uvec2
#define uint3 uvec3
#define uint4 uvec4
#define float2x2 mat2
#define float3x3 mat3
#define float4x4 mat4
#define ogre_float4x3 mat3x4
#define ushort uint
#define ushort3 uint3
#define ushort4 uint4
//Short used for read operations. It's an int in GLSL & HLSL. An ushort in Metal
#define rshort int
#define rshort2 int2
#define rint int
//Short used for write operations. It's an int in GLSL. An ushort in HLSL & Metal
#define wshort2 int2
#define wshort3 int3
#define toFloat3x3(x) mat3(x)
#define buildFloat3x3(row0, row1, row2) mat3(row0, row1, row2)
#define mul(x, y) ((x) * (y))
#define saturate(x) clamp((x), 0.0, 1.0)
#define lerp mix
#define rsqrt inversesqrt
#define INLINE
#define NO_INTERPOLATION_PREFIX flat
#define NO_INTERPOLATION_SUFFIX
#define PARAMS_ARG_DECL
#define PARAMS_ARG
#define reversebits bitfieldReverse
#define OGRE_Sample(tex, sampler, uv) texture(tex, uv)
#define OGRE_SampleLevel(tex, sampler, uv, lod) textureLod(tex, uv, lod)
#define OGRE_SampleArray2D(tex, sampler, uv, arrayIdx) texture(tex, vec3(uv, arrayIdx))
#define OGRE_SampleArray2DLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec3(uv, arrayIdx), lod)
#define OGRE_SampleArrayCubeLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec4(uv, arrayIdx), lod)
#define OGRE_SampleGrad(tex, sampler, uv, ddx, ddy) textureGrad(tex, uv, ddx, ddy)
#define OGRE_SampleArray2DGrad(tex, sampler, uv, arrayIdx, ddx, ddy) textureGrad(tex, vec3(uv, arrayIdx), ddx, ddy)
#define OGRE_ddx(val) dFdx(val)
#define OGRE_ddy(val) dFdy(val)
#define OGRE_Load2D(tex, iuv, lod) texelFetch(tex, iuv, lod)
#define OGRE_LoadArray2D(tex, iuv, arrayIdx, lod) texelFetch(tex, ivec3(iuv, arrayIdx), lod)
#define OGRE_Load2DMS(tex, iuv, subsample) texelFetch(tex, iuv, subsample)
#define OGRE_Load3D(tex, iuv, lod) texelFetch(tex, ivec3(iuv), lod)
#define OGRE_GatherRed(tex, sampler, uv) textureGather(tex, uv, 0)
#define OGRE_GatherGreen(tex, sampler, uv) textureGather(tex, uv, 1)
#define OGRE_GatherBlue(tex, sampler, uv) textureGather(tex, uv, 2)
#define bufferFetch1(buffer, idx) texelFetch(buffer, idx).x
#define OGRE_SAMPLER_ARG_DECL(samplerName)
#define OGRE_SAMPLER_ARG(samplerName)
#define OGRE_Texture3D_float4 sampler3D
#define OGRE_OUT_REF(declType, variableName) out declType variableName
#define OGRE_INOUT_REF(declType, variableName) inout declType variableName

View File

@@ -1,12 +1,10 @@
// RGB and Alpha components of ETC2 RGBA are computed separately. // RGB and Alpha components of ETC2 RGBA/DXT5 are computed separately.
// This compute shader merely stitches them together to form the final result // This compute shader merely stitches them together to form the final result
// It's also used by RG11 driver to stitch two R11 into one RG11 // It's also used by RG11/BC4 driver to stitch two R11/BC4 into one RG11/BC5
#[compute] #[compute]
#version 450 #version 450
#include "CrossPlatformSettings_piece_all.glsl"
layout(local_size_x = 8, // layout(local_size_x = 8, //
local_size_y = 8, // local_size_y = 8, //
local_size_z = 1) in; local_size_z = 1) in;
@@ -16,8 +14,8 @@ layout(binding = 1) uniform usampler2D srcAlpha;
layout(binding = 2, rgba32ui) uniform restrict writeonly uimage2D dstTexture; layout(binding = 2, rgba32ui) uniform restrict writeonly uimage2D dstTexture;
void main() { void main() {
uint2 rgbBlock = OGRE_Load2D(srcRGB, int2(gl_GlobalInvocationID.xy), 0).xy; uvec2 rgbBlock = texelFetch(srcRGB, ivec2(gl_GlobalInvocationID.xy), 0).xy;
uint2 alphaBlock = OGRE_Load2D(srcAlpha, int2(gl_GlobalInvocationID.xy), 0).xy; uvec2 alphaBlock = texelFetch(srcAlpha, ivec2(gl_GlobalInvocationID.xy), 0).xy;
imageStore(dstTexture, int2(gl_GlobalInvocationID.xy), uint4(rgbBlock.xy, alphaBlock.xy)); imageStore(dstTexture, ivec2(gl_GlobalInvocationID.xy), uvec4(rgbBlock.xy, alphaBlock.xy));
} }

View File

@@ -6,7 +6,7 @@ dithered = "#define BC1_DITHER";
#[compute] #[compute]
#version 450 #version 450
#include "CrossPlatformSettings_piece_all.glsl" #VERSION_DEFINES
#define FLT_MAX 340282346638528859811704183484516925440.0f #define FLT_MAX 340282346638528859811704183484516925440.0f
@@ -14,8 +14,8 @@ layout(binding = 0) uniform sampler2D srcTex;
layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture; layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture;
layout(std430, binding = 2) readonly restrict buffer globalBuffer { layout(std430, binding = 2) readonly restrict buffer globalBuffer {
float2 c_oMatch5[256]; vec2 c_oMatch5[256];
float2 c_oMatch6[256]; vec2 c_oMatch6[256];
}; };
layout(push_constant, std430) uniform Params { layout(push_constant, std430) uniform Params {
@@ -28,14 +28,14 @@ layout(local_size_x = 8, //
local_size_y = 8, // local_size_y = 8, //
local_size_z = 1) in; local_size_z = 1) in;
float3 rgb565to888(float rgb565) { vec3 rgb565to888(float rgb565) {
float3 retVal; vec3 retVal;
retVal.x = floor(rgb565 / 2048.0f); retVal.x = floor(rgb565 / 2048.0f);
retVal.y = floor(mod(rgb565, 2048.0f) / 32.0f); retVal.y = floor(mod(rgb565, 2048.0f) / 32.0f);
retVal.z = floor(mod(rgb565, 32.0f)); retVal.z = floor(mod(rgb565, 32.0f));
// This is the correct 565 to 888 conversion: // This is the correct 565 to 888 conversion:
// rgb = floor( rgb * ( 255.0f / float3( 31.0f, 63.0f, 31.0f ) ) + 0.5f ) // rgb = floor( rgb * ( 255.0f / vec3( 31.0f, 63.0f, 31.0f ) ) + 0.5f )
// //
// However stb_dxt follows a different one: // However stb_dxt follows a different one:
// rb = floor( rb * ( 256 / 32 + 8 / 32 ) ); // rb = floor( rb * ( 256 / 32 + 8 / 32 ) );
@@ -52,10 +52,10 @@ float3 rgb565to888(float rgb565) {
// Perhaps when we make 888 -> 565 -> 888 it doesn't matter // Perhaps when we make 888 -> 565 -> 888 it doesn't matter
// because they end up mapping to the original number // because they end up mapping to the original number
return floor(retVal * float3(8.25f, 4.0625f, 8.25f)); return floor(retVal * vec3(8.25f, 4.0625f, 8.25f));
} }
float rgb888to565(float3 rgbValue) { float rgb888to565(vec3 rgbValue) {
rgbValue.rb = floor(rgbValue.rb * 31.0f / 255.0f + 0.5f); rgbValue.rb = floor(rgbValue.rb * 31.0f / 255.0f + 0.5f);
rgbValue.g = floor(rgbValue.g * 63.0f / 255.0f + 0.5f); rgbValue.g = floor(rgbValue.g * 63.0f / 255.0f + 0.5f);
@@ -63,7 +63,7 @@ float rgb888to565(float3 rgbValue) {
} }
// linear interpolation at 1/3 point between a and b, using desired rounding type // linear interpolation at 1/3 point between a and b, using desired rounding type
float3 lerp13(float3 a, float3 b) { vec3 lerp13(vec3 a, vec3 b) {
#ifdef STB_DXT_USE_ROUNDING_BIAS #ifdef STB_DXT_USE_ROUNDING_BIAS
// with rounding bias // with rounding bias
return a + floor((b - a) * (1.0f / 3.0f) + 0.5f); return a + floor((b - a) * (1.0f / 3.0f) + 0.5f);
@@ -74,7 +74,7 @@ float3 lerp13(float3 a, float3 b) {
} }
/// Unpacks a block of 4 colors from two 16-bit endpoints /// Unpacks a block of 4 colors from two 16-bit endpoints
void EvalColors(out float3 colors[4], float c0, float c1) { void EvalColors(out vec3 colors[4], float c0, float c1) {
colors[0] = rgb565to888(c0); colors[0] = rgb565to888(c0);
colors[1] = rgb565to888(c1); colors[1] = rgb565to888(c1);
colors[2] = lerp13(colors[0], colors[1]); colors[2] = lerp13(colors[0], colors[1]);
@@ -89,13 +89,13 @@ void EvalColors(out float3 colors[4], float c0, float c1) {
*/ */
void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16, out float outMaxEndp16) { void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16, out float outMaxEndp16) {
// determine color distribution // determine color distribution
float3 avgColor; vec3 avgColor;
float3 minColor; vec3 minColor;
float3 maxColor; vec3 maxColor;
avgColor = minColor = maxColor = unpackUnorm4x8(srcPixelsBlock[0]).xyz; avgColor = minColor = maxColor = unpackUnorm4x8(srcPixelsBlock[0]).xyz;
for (int i = 1; i < 16; ++i) { for (int i = 1; i < 16; ++i) {
const float3 currColorUnorm = unpackUnorm4x8(srcPixelsBlock[i]).xyz; const vec3 currColorUnorm = unpackUnorm4x8(srcPixelsBlock[i]).xyz;
avgColor += currColorUnorm; avgColor += currColorUnorm;
minColor = min(minColor, currColorUnorm); minColor = min(minColor, currColorUnorm);
maxColor = max(maxColor, currColorUnorm); maxColor = max(maxColor, currColorUnorm);
@@ -112,8 +112,8 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
} }
for (int i = 0; i < 16; ++i) { for (int i = 0; i < 16; ++i) {
const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f; const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
float3 rgbDiff = currColor - avgColor; vec3 rgbDiff = currColor - avgColor;
cov[0] += rgbDiff.r * rgbDiff.r; cov[0] += rgbDiff.r * rgbDiff.r;
cov[1] += rgbDiff.r * rgbDiff.g; cov[1] += rgbDiff.r * rgbDiff.g;
@@ -128,7 +128,7 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
cov[i] /= 255.0f; cov[i] /= 255.0f;
} }
float3 vF = maxColor - minColor; vec3 vF = maxColor - minColor;
const int nIterPower = 4; const int nIterPower = 4;
for (int iter = 0; iter < nIterPower; ++iter) { for (int iter = 0; iter < nIterPower; ++iter) {
@@ -141,8 +141,8 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
vF.b = b; vF.b = b;
} }
float magn = max3(abs(vF.r), abs(vF.g), abs(vF.b)); float magn = max(abs(vF.r), max(abs(vF.g), abs(vF.b)));
float3 v; vec3 v;
if (magn < 4.0f) { // too small, default to luminance if (magn < 4.0f) { // too small, default to luminance
v.r = 299.0f; // JPEG YCbCr luma coefs, scaled by 1000. v.r = 299.0f; // JPEG YCbCr luma coefs, scaled by 1000.
@@ -153,11 +153,11 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
} }
// Pick colors at extreme points // Pick colors at extreme points
float3 minEndpoint, maxEndpoint; vec3 minEndpoint, maxEndpoint;
float minDot = FLT_MAX; float minDot = FLT_MAX;
float maxDot = -FLT_MAX; float maxDot = -FLT_MAX;
for (int i = 0; i < 16; ++i) { for (int i = 0; i < 16; ++i) {
const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f; const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
const float dotValue = dot(currColor, v); const float dotValue = dot(currColor, v);
if (dotValue < minDot) { if (dotValue < minDot) {
@@ -176,9 +176,9 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
} }
// The color matching function // The color matching function
uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) { uint MatchColorsBlock(const uint srcPixelsBlock[16], vec3 color[4]) {
uint mask = 0u; uint mask = 0u;
float3 dir = color[0] - color[1]; vec3 dir = color[0] - color[1];
float stops[4]; float stops[4];
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
@@ -200,7 +200,7 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
#ifndef BC1_DITHER #ifndef BC1_DITHER
// the version without dithering is straightforward // the version without dithering is straightforward
for (uint i = 16u; i-- > 0u;) { for (uint i = 16u; i-- > 0u;) {
const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f; const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
const float dotValue = dot(currColor, dir); const float dotValue = dot(currColor, dir);
mask <<= 2u; mask <<= 2u;
@@ -213,8 +213,8 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
} }
#else #else
// with floyd-steinberg dithering // with floyd-steinberg dithering
float4 ep1 = float4(0, 0, 0, 0); vec4 ep1 = vec4(0, 0, 0, 0);
float4 ep2 = float4(0, 0, 0, 0); vec4 ep2 = vec4(0, 0, 0, 0);
c0Point *= 16.0f; c0Point *= 16.0f;
halfPoint *= 16.0f; halfPoint *= 16.0f;
@@ -224,7 +224,7 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
float ditherDot; float ditherDot;
uint lmask, step; uint lmask, step;
float3 currColor; vec3 currColor;
float dotValue; float dotValue;
currColor = unpackUnorm4x8(srcPixelsBlock[y * 4 + 0]).xyz * 255.0f; currColor = unpackUnorm4x8(srcPixelsBlock[y * 4 + 0]).xyz * 255.0f;
@@ -277,7 +277,7 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
mask |= lmask << (y * 8u); mask |= lmask << (y * 8u);
{ {
float4 tmp = ep1; vec4 tmp = ep1;
ep1 = ep2; ep1 = ep2;
ep2 = tmp; ep2 = tmp;
} // swap } // swap
@@ -300,7 +300,7 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE
{ {
// yes, linear system would be singular; solve using optimal // yes, linear system would be singular; solve using optimal
// single-color match on average color // single-color match on average color
float3 rgbVal = float3(8.0f / 255.0f, 8.0f / 255.0f, 8.0f / 255.0f); vec3 rgbVal = vec3(8.0f / 255.0f, 8.0f / 255.0f, 8.0f / 255.0f);
for (int i = 0; i < 16; ++i) { for (int i = 0; i < 16; ++i) {
rgbVal += unpackUnorm4x8(srcPixelsBlock[i]).xyz; rgbVal += unpackUnorm4x8(srcPixelsBlock[i]).xyz;
} }
@@ -322,10 +322,10 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE
float akku = 0.0f; float akku = 0.0f;
uint cm = mask; uint cm = mask;
float3 at1 = float3(0, 0, 0); vec3 at1 = vec3(0, 0, 0);
float3 at2 = float3(0, 0, 0); vec3 at2 = vec3(0, 0, 0);
for (int i = 0; i < 16; ++i, cm >>= 2u) { for (int i = 0; i < 16; ++i, cm >>= 2u) {
const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f; const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
const uint step = cm & 3u; const uint step = cm & 3u;
const float w1 = w1Tab[step]; const float w1 = w1Tab[step];
@@ -341,17 +341,17 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE
const float yy = floor(mod(akku, 65535.0f) / 256.0f); const float yy = floor(mod(akku, 65535.0f) / 256.0f);
const float xy = mod(akku, 256.0f); const float xy = mod(akku, 256.0f);
float2 f_rb_g; vec2 f_rb_g;
f_rb_g.x = 3.0f * 31.0f / 255.0f / (xx * yy - xy * xy); f_rb_g.x = 3.0f * 31.0f / 255.0f / (xx * yy - xy * xy);
f_rb_g.y = f_rb_g.x * 63.0f / 31.0f; f_rb_g.y = f_rb_g.x * 63.0f / 31.0f;
// solve. // solve.
const float3 newMaxVal = clamp(floor((at1 * yy - at2 * xy) * f_rb_g.xyx + 0.5f), const vec3 newMaxVal = clamp(floor((at1 * yy - at2 * xy) * f_rb_g.xyx + 0.5f),
float3(0.0f, 0.0f, 0.0f), float3(31, 63, 31)); vec3(0.0f, 0.0f, 0.0f), vec3(31, 63, 31));
newMax16 = newMaxVal.x * 2048.0f + newMaxVal.y * 32.0f + newMaxVal.z; newMax16 = newMaxVal.x * 2048.0f + newMaxVal.y * 32.0f + newMaxVal.z;
const float3 newMinVal = clamp(floor((at2 * xx - at1 * xy) * f_rb_g.xyx + 0.5f), const vec3 newMinVal = clamp(floor((at2 * xx - at1 * xy) * f_rb_g.xyx + 0.5f),
float3(0.0f, 0.0f, 0.0f), float3(31, 63, 31)); vec3(0.0f, 0.0f, 0.0f), vec3(31, 63, 31));
newMin16 = newMinVal.x * 2048.0f + newMinVal.y * 32.0f + newMinVal.z; newMin16 = newMinVal.x * 2048.0f + newMinVal.y * 32.0f + newMinVal.z;
} }
@@ -364,48 +364,48 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE
#ifdef BC1_DITHER #ifdef BC1_DITHER
/// Quantizes 'srcValue' which is originally in 888 (full range), /// Quantizes 'srcValue' which is originally in 888 (full range),
/// converting it to 565 and then back to 888 (quantized) /// converting it to 565 and then back to 888 (quantized)
float3 quant(float3 srcValue) { vec3 quant(vec3 srcValue) {
srcValue = clamp(srcValue, 0.0f, 255.0f); srcValue = clamp(srcValue, 0.0f, 255.0f);
// Convert 888 -> 565 // Convert 888 -> 565
srcValue = floor(srcValue * float3(31.0f / 255.0f, 63.0f / 255.0f, 31.0f / 255.0f) + 0.5f); srcValue = floor(srcValue * vec3(31.0f / 255.0f, 63.0f / 255.0f, 31.0f / 255.0f) + 0.5f);
// Convert 565 -> 888 back // Convert 565 -> 888 back
srcValue = floor(srcValue * float3(8.25f, 4.0625f, 8.25f)); srcValue = floor(srcValue * vec3(8.25f, 4.0625f, 8.25f));
return srcValue; return srcValue;
} }
void DitherBlock(const uint srcPixBlck[16], out uint dthPixBlck[16]) { void DitherBlock(const uint srcPixBlck[16], out uint dthPixBlck[16]) {
float3 ep1[4] = { float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0) }; vec3 ep1[4] = { vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0) };
float3 ep2[4] = { float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0) }; vec3 ep2[4] = { vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0) };
for (uint y = 0u; y < 16u; y += 4u) { for (uint y = 0u; y < 16u; y += 4u) {
float3 srcPixel, dithPixel; vec3 srcPixel, dithPixel;
srcPixel = unpackUnorm4x8(srcPixBlck[y + 0u]).xyz * 255.0f; srcPixel = unpackUnorm4x8(srcPixBlck[y + 0u]).xyz * 255.0f;
dithPixel = quant(srcPixel + trunc((3 * ep2[1] + 5 * ep2[0]) * (1.0f / 16.0f))); dithPixel = quant(srcPixel + trunc((3 * ep2[1] + 5 * ep2[0]) * (1.0f / 16.0f)));
ep1[0] = srcPixel - dithPixel; ep1[0] = srcPixel - dithPixel;
dthPixBlck[y + 0u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f)); dthPixBlck[y + 0u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f));
srcPixel = unpackUnorm4x8(srcPixBlck[y + 1u]).xyz * 255.0f; srcPixel = unpackUnorm4x8(srcPixBlck[y + 1u]).xyz * 255.0f;
dithPixel = quant( dithPixel = quant(
srcPixel + trunc((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) * (1.0f / 16.0f))); srcPixel + trunc((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) * (1.0f / 16.0f)));
ep1[1] = srcPixel - dithPixel; ep1[1] = srcPixel - dithPixel;
dthPixBlck[y + 1u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f)); dthPixBlck[y + 1u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f));
srcPixel = unpackUnorm4x8(srcPixBlck[y + 2u]).xyz * 255.0f; srcPixel = unpackUnorm4x8(srcPixBlck[y + 2u]).xyz * 255.0f;
dithPixel = quant( dithPixel = quant(
srcPixel + trunc((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) * (1.0f / 16.0f))); srcPixel + trunc((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) * (1.0f / 16.0f)));
ep1[2] = srcPixel - dithPixel; ep1[2] = srcPixel - dithPixel;
dthPixBlck[y + 2u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f)); dthPixBlck[y + 2u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f));
srcPixel = unpackUnorm4x8(srcPixBlck[y + 3u]).xyz * 255.0f; srcPixel = unpackUnorm4x8(srcPixBlck[y + 3u]).xyz * 255.0f;
dithPixel = quant(srcPixel + trunc((7 * ep1[2] + 5 * ep2[3] + ep2[2]) * (1.0f / 16.0f))); dithPixel = quant(srcPixel + trunc((7 * ep1[2] + 5 * ep2[3] + ep2[2]) * (1.0f / 16.0f)));
ep1[3] = srcPixel - dithPixel; ep1[3] = srcPixel - dithPixel;
dthPixBlck[y + 3u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f)); dthPixBlck[y + 3u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f));
// swap( ep1, ep2 ) // swap( ep1, ep2 )
for (uint i = 0u; i < 4u; ++i) { for (uint i = 0u; i < 4u; ++i) {
float3 tmp = ep1[i]; vec3 tmp = ep1[i];
ep1[i] = ep2[i]; ep1[i] = ep2[i];
ep2[i] = tmp; ep2[i] = tmp;
} }
@@ -419,11 +419,11 @@ void main() {
bool bAllColorsEqual = true; bool bAllColorsEqual = true;
// Load the whole 4x4 block // Load the whole 4x4 block
const uint2 pixelsToLoadBase = gl_GlobalInvocationID.xy << 2u; const uvec2 pixelsToLoadBase = gl_GlobalInvocationID.xy << 2u;
for (uint i = 0u; i < 16u; ++i) { for (uint i = 0u; i < 16u; ++i) {
const uint2 pixelsToLoad = pixelsToLoadBase + uint2(i & 0x03u, i >> 2u); const uvec2 pixelsToLoad = pixelsToLoadBase + uvec2(i & 0x03u, i >> 2u);
const float3 srcPixels0 = OGRE_Load2D(srcTex, int2(pixelsToLoad), 0).xyz; const vec3 srcPixels0 = texelFetch(srcTex, ivec2(pixelsToLoad), 0).xyz;
srcPixelsBlock[i] = packUnorm4x8(float4(srcPixels0, 1.0f)); srcPixelsBlock[i] = packUnorm4x8(vec4(srcPixels0, 1.0f));
bAllColorsEqual = bAllColorsEqual && srcPixelsBlock[0] == srcPixelsBlock[i]; bAllColorsEqual = bAllColorsEqual && srcPixelsBlock[0] == srcPixelsBlock[i];
} }
@@ -431,7 +431,7 @@ void main() {
uint mask = 0u; uint mask = 0u;
if (bAllColorsEqual) { if (bAllColorsEqual) {
const uint3 rgbVal = uint3(unpackUnorm4x8(srcPixelsBlock[0]).xyz * 255.0f); const uvec3 rgbVal = uvec3(unpackUnorm4x8(srcPixelsBlock[0]).xyz * 255.0f);
mask = 0xAAAAAAAAu; mask = 0xAAAAAAAAu;
maxEndp16 = maxEndp16 =
c_oMatch5[rgbVal.r][0] * 2048.0f + c_oMatch6[rgbVal.g][0] * 32.0f + c_oMatch5[rgbVal.b][0]; c_oMatch5[rgbVal.r][0] * 2048.0f + c_oMatch6[rgbVal.g][0] * 32.0f + c_oMatch5[rgbVal.b][0];
@@ -449,7 +449,7 @@ void main() {
// second step: pca+map along principal axis // second step: pca+map along principal axis
OptimizeColorsBlock(ditherPixelsBlock, minEndp16, maxEndp16); OptimizeColorsBlock(ditherPixelsBlock, minEndp16, maxEndp16);
if (minEndp16 != maxEndp16) { if (minEndp16 != maxEndp16) {
float3 colors[4]; vec3 colors[4];
EvalColors(colors, maxEndp16, minEndp16); // Note min/max are inverted EvalColors(colors, maxEndp16, minEndp16); // Note min/max are inverted
mask = MatchColorsBlock(srcPixelsBlock, colors); mask = MatchColorsBlock(srcPixelsBlock, colors);
} }
@@ -461,7 +461,7 @@ void main() {
if (RefineBlock(ditherPixelsBlock, mask, minEndp16, maxEndp16)) { if (RefineBlock(ditherPixelsBlock, mask, minEndp16, maxEndp16)) {
if (minEndp16 != maxEndp16) { if (minEndp16 != maxEndp16) {
float3 colors[4]; vec3 colors[4];
EvalColors(colors, maxEndp16, minEndp16); // Note min/max are inverted EvalColors(colors, maxEndp16, minEndp16); // Note min/max are inverted
mask = MatchColorsBlock(srcPixelsBlock, colors); mask = MatchColorsBlock(srcPixelsBlock, colors);
} else { } else {
@@ -482,10 +482,10 @@ void main() {
mask ^= 0x55555555u; mask ^= 0x55555555u;
} }
uint2 outputBytes; uvec2 outputBytes;
outputBytes.x = uint(maxEndp16) | (uint(minEndp16) << 16u); outputBytes.x = uint(maxEndp16) | (uint(minEndp16) << 16u);
outputBytes.y = mask; outputBytes.y = mask;
uint2 dstUV = gl_GlobalInvocationID.xy; uvec2 dstUV = gl_GlobalInvocationID.xy;
imageStore(dstTexture, int2(dstUV), uint4(outputBytes.xy, 0u, 0u)); imageStore(dstTexture, ivec2(dstUV), uvec4(outputBytes.xy, 0u, 0u));
} }

View File

@@ -6,12 +6,10 @@ signed = "#define SNORM";
#[compute] #[compute]
#version 450 #version 450
#include "CrossPlatformSettings_piece_all.glsl"
#VERSION_DEFINES #VERSION_DEFINES
shared float2 g_minMaxValues[4u * 4u * 4u]; shared vec2 g_minMaxValues[4u * 4u * 4u];
shared uint2 g_mask[4u * 4u]; shared uvec2 g_mask[4u * 4u];
layout(binding = 0) uniform sampler2D srcTex; layout(binding = 0) uniform sampler2D srcTex;
layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture; layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture;
@@ -40,30 +38,30 @@ layout(local_size_x = 4, //
/// - Long threads (e.g. 1 thread per block) misses parallelism opportunities /// - Long threads (e.g. 1 thread per block) misses parallelism opportunities
void main() { void main() {
float minVal, maxVal; float minVal, maxVal;
float4 srcPixel; vec4 srcPixel;
const uint blockThreadId = gl_LocalInvocationID.x; const uint blockThreadId = gl_LocalInvocationID.x;
const uint2 pixelsToLoadBase = gl_GlobalInvocationID.yz << 2u; const uvec2 pixelsToLoadBase = gl_GlobalInvocationID.yz << 2u;
for (uint i = 0u; i < 4u; ++i) { for (uint i = 0u; i < 4u; ++i) {
const uint2 pixelsToLoad = pixelsToLoadBase + uint2(i, blockThreadId); const uvec2 pixelsToLoad = pixelsToLoadBase + uvec2(i, blockThreadId);
const float4 value = OGRE_Load2D(srcTex, int2(pixelsToLoad), 0).xyzw; const vec4 value = texelFetch(srcTex, ivec2(pixelsToLoad), 0).xyzw;
srcPixel[i] = params.p_channelIdx == 0 ? value.x : (params.p_channelIdx == 1 ? value.y : value.w); srcPixel[i] = params.p_channelIdx == 0 ? value.x : (params.p_channelIdx == 1 ? value.y : value.w);
srcPixel[i] *= 255.0f; srcPixel[i] *= 255.0f;
} }
minVal = min3(srcPixel.x, srcPixel.y, srcPixel.z); minVal = min(srcPixel.x, min(srcPixel.y, srcPixel.z));
maxVal = max3(srcPixel.x, srcPixel.y, srcPixel.z); maxVal = max(srcPixel.x, max(srcPixel.y, srcPixel.z));
minVal = min(minVal, srcPixel.w); minVal = min(minVal, srcPixel.w);
maxVal = max(maxVal, srcPixel.w); maxVal = max(maxVal, srcPixel.w);
const uint minMaxIdxBase = (gl_LocalInvocationID.z << 4u) + (gl_LocalInvocationID.y << 2u); const uint minMaxIdxBase = (gl_LocalInvocationID.z << 4u) + (gl_LocalInvocationID.y << 2u);
const uint maskIdxBase = (gl_LocalInvocationID.z << 2u) + gl_LocalInvocationID.y; const uint maskIdxBase = (gl_LocalInvocationID.z << 2u) + gl_LocalInvocationID.y;
g_minMaxValues[minMaxIdxBase + blockThreadId] = float2(minVal, maxVal); g_minMaxValues[minMaxIdxBase + blockThreadId] = vec2(minVal, maxVal);
g_mask[maskIdxBase] = uint2(0u, 0u); g_mask[maskIdxBase] = uvec2(0u, 0u);
memoryBarrierShared(); memoryBarrierShared();
barrier(); barrier();
@@ -133,21 +131,21 @@ void main() {
if (blockThreadId == 0u) { if (blockThreadId == 0u) {
// Save data // Save data
uint2 outputBytes; uvec2 outputBytes;
#ifdef SNORM #ifdef SNORM
outputBytes.x = outputBytes.x =
packSnorm4x8(float4(maxVal * (1.0f / 255.0f) * 2.0f - 1.0f, packSnorm4x8(vec4(maxVal * (1.0f / 255.0f) * 2.0f - 1.0f,
minVal * (1.0f / 255.0f) * 2.0f - 1.0f, 0.0f, 0.0f)); minVal * (1.0f / 255.0f) * 2.0f - 1.0f, 0.0f, 0.0f));
#else #else
outputBytes.x = packUnorm4x8( outputBytes.x = packUnorm4x8(
float4(maxVal * (1.0f / 255.0f), minVal * (1.0f / 255.0f), 0.0f, 0.0f)); vec4(maxVal * (1.0f / 255.0f), minVal * (1.0f / 255.0f), 0.0f, 0.0f));
#endif #endif
outputBytes.x |= g_mask[maskIdxBase].x; outputBytes.x |= g_mask[maskIdxBase].x;
outputBytes.y = g_mask[maskIdxBase].y; outputBytes.y = g_mask[maskIdxBase].y;
uint2 dstUV = gl_GlobalInvocationID.yz; uvec2 dstUV = gl_GlobalInvocationID.yz;
imageStore(dstTexture, int2(dstUV), uint4(outputBytes.xy, 0u, 0u)); imageStore(dstTexture, ivec2(dstUV), uvec4(outputBytes.xy, 0u, 0u));
} }
} }

View File

@@ -6,24 +6,22 @@ unsigned = "#define QUALITY"; // The "Quality" preset causes artifacting on sign
#[compute] #[compute]
#version 450 #version 450
#include "CrossPlatformSettings_piece_all.glsl"
#VERSION_DEFINES #VERSION_DEFINES
float3 f32tof16(float3 value) { vec3 f32tof16(vec3 value) {
return float3(packHalf2x16(float2(value.x, 0.0)), return vec3(packHalf2x16(vec2(value.x, 0.0)),
packHalf2x16(float2(value.y, 0.0)), packHalf2x16(vec2(value.y, 0.0)),
packHalf2x16(float2(value.z, 0.0))); packHalf2x16(vec2(value.z, 0.0)));
} }
float3 f16tof32(uint3 value) { vec3 f16tof32(uvec3 value) {
return float3(unpackHalf2x16(value.x).x, return vec3(unpackHalf2x16(value.x).x,
unpackHalf2x16(value.y).x, unpackHalf2x16(value.y).x,
unpackHalf2x16(value.z).x); unpackHalf2x16(value.z).x);
} }
float f32tof16(float value) { float f32tof16(float value) {
return packHalf2x16(float2(value.x, 0.0)); return packHalf2x16(vec2(value.x, 0.0));
} }
float f16tof32(uint value) { float f16tof32(uint value) {
@@ -34,7 +32,7 @@ layout(binding = 0) uniform sampler2D srcTexture;
layout(binding = 1, rgba32ui) uniform restrict writeonly uimage2D dstTexture; layout(binding = 1, rgba32ui) uniform restrict writeonly uimage2D dstTexture;
layout(push_constant, std430) uniform Params { layout(push_constant, std430) uniform Params {
float2 p_textureSizeRcp; vec2 p_textureSizeRcp;
uint padding0; uint padding0;
uint padding1; uint padding1;
} }
@@ -69,7 +67,7 @@ float CrossCalcMSLE(float a, float b) {
return result; return result;
} }
float CalcMSLE(float3 a, float3 b) { float CalcMSLE(vec3 a, vec3 b) {
float result = 0.0f; float result = 0.0f;
if (isNegative(a.x) != isNegative(b.x)) { if (isNegative(a.x) != isNegative(b.x)) {
result += CrossCalcMSLE(a.x, b.x); result += CrossCalcMSLE(a.x, b.x);
@@ -91,32 +89,32 @@ float CalcMSLE(float3 a, float3 b) {
} }
// Adapt the log function to make sense when a < 0 // Adapt the log function to make sense when a < 0
float3 customLog2(float3 a) { vec3 customLog2(vec3 a) {
return float3( return vec3(
a.x >= 0 ? log2(a.x + 1.0f) : -log2(-a.x + 1.0f), a.x >= 0 ? log2(a.x + 1.0f) : -log2(-a.x + 1.0f),
a.y >= 0 ? log2(a.y + 1.0f) : -log2(-a.y + 1.0f), a.y >= 0 ? log2(a.y + 1.0f) : -log2(-a.y + 1.0f),
a.z >= 0 ? log2(a.z + 1.0f) : -log2(-a.z + 1.0f)); a.z >= 0 ? log2(a.z + 1.0f) : -log2(-a.z + 1.0f));
} }
// Inverse of customLog2() // Inverse of customLog2()
float3 customExp2(float3 a) { vec3 customExp2(vec3 a) {
return float3( return vec3(
a.x >= 0 ? exp2(a.x) - 1.0f : -(exp2(-a.x) - 1.0f), a.x >= 0 ? exp2(a.x) - 1.0f : -(exp2(-a.x) - 1.0f),
a.y >= 0 ? exp2(a.y) - 1.0f : -(exp2(-a.y) - 1.0f), a.y >= 0 ? exp2(a.y) - 1.0f : -(exp2(-a.y) - 1.0f),
a.z >= 0 ? exp2(a.z) - 1.0f : -(exp2(-a.z) - 1.0f)); a.z >= 0 ? exp2(a.z) - 1.0f : -(exp2(-a.z) - 1.0f));
} }
#else #else
float CalcMSLE(float3 a, float3 b) { float CalcMSLE(vec3 a, vec3 b) {
float3 err = log2((b + 1.0f) / (a + 1.0f)); vec3 err = log2((b + 1.0f) / (a + 1.0f));
err = err * err; err = err * err;
return err.x + err.y + err.z; return err.x + err.y + err.z;
} }
float3 customLog2(float3 a) { vec3 customLog2(vec3 a) {
return log2(a + 1.0f); return log2(a + 1.0f);
} }
float3 customExp2(float3 a) { vec3 customExp2(vec3 a) {
return exp2(a) - 1.0f; return exp2(a) - 1.0f;
} }
#endif #endif
@@ -157,98 +155,98 @@ uint Pattern(uint p, uint i) {
#ifndef SIGNED #ifndef SIGNED
//UF //UF
float3 Quantize7(float3 x) { vec3 Quantize7(vec3 x) {
return (f32tof16(x) * 128.0f) / (0x7bff + 1.0f); return (f32tof16(x) * 128.0f) / (0x7bff + 1.0f);
} }
float3 Quantize9(float3 x) { vec3 Quantize9(vec3 x) {
return (f32tof16(x) * 512.0f) / (0x7bff + 1.0f); return (f32tof16(x) * 512.0f) / (0x7bff + 1.0f);
} }
float3 Quantize10(float3 x) { vec3 Quantize10(vec3 x) {
return (f32tof16(x) * 1024.0f) / (0x7bff + 1.0f); return (f32tof16(x) * 1024.0f) / (0x7bff + 1.0f);
} }
float3 Unquantize7(float3 x) { vec3 Unquantize7(vec3 x) {
return (x * 65536.0f + 0x8000) / 128.0f; return (x * 65536.0f + 0x8000) / 128.0f;
} }
float3 Unquantize9(float3 x) { vec3 Unquantize9(vec3 x) {
return (x * 65536.0f + 0x8000) / 512.0f; return (x * 65536.0f + 0x8000) / 512.0f;
} }
float3 Unquantize10(float3 x) { vec3 Unquantize10(vec3 x) {
return (x * 65536.0f + 0x8000) / 1024.0f; return (x * 65536.0f + 0x8000) / 1024.0f;
} }
float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) { vec3 FinishUnquantize(vec3 endpoint0Unq, vec3 endpoint1Unq, float weight) {
float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f); vec3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f);
return f16tof32(uint3(comp)); return f16tof32(uvec3(comp));
} }
#else #else
//SF //SF
float3 cmpSign(float3 value) { vec3 cmpSign(vec3 value) {
float3 signVal; vec3 signVal;
signVal.x = value.x >= 0.0f ? 1.0f : -1.0f; signVal.x = value.x >= 0.0f ? 1.0f : -1.0f;
signVal.y = value.y >= 0.0f ? 1.0f : -1.0f; signVal.y = value.y >= 0.0f ? 1.0f : -1.0f;
signVal.z = value.z >= 0.0f ? 1.0f : -1.0f; signVal.z = value.z >= 0.0f ? 1.0f : -1.0f;
return signVal; return signVal;
} }
float3 Quantize7(float3 x) { vec3 Quantize7(vec3 x) {
float3 signVal = cmpSign(x); vec3 signVal = cmpSign(x);
return signVal * (f32tof16(abs(x)) * 64.0f) / (0x7bff + 1.0f); return signVal * (f32tof16(abs(x)) * 64.0f) / (0x7bff + 1.0f);
} }
float3 Quantize9(float3 x) { vec3 Quantize9(vec3 x) {
float3 signVal = cmpSign(x); vec3 signVal = cmpSign(x);
return signVal * (f32tof16(abs(x)) * 256.0f) / (0x7bff + 1.0f); return signVal * (f32tof16(abs(x)) * 256.0f) / (0x7bff + 1.0f);
} }
float3 Quantize10(float3 x) { vec3 Quantize10(vec3 x) {
float3 signVal = cmpSign(x); vec3 signVal = cmpSign(x);
return signVal * (f32tof16(abs(x)) * 512.0f) / (0x7bff + 1.0f); return signVal * (f32tof16(abs(x)) * 512.0f) / (0x7bff + 1.0f);
} }
float3 Unquantize7(float3 x) { vec3 Unquantize7(vec3 x) {
float3 signVal = sign(x); vec3 signVal = sign(x);
x = abs(x); x = abs(x);
float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 64.0f; vec3 finalVal = signVal * (x * 32768.0f + 0x4000) / 64.0f;
finalVal.x = x.x >= 64.0f ? 32767.0 : finalVal.x; finalVal.x = x.x >= 64.0f ? 32767.0 : finalVal.x;
finalVal.y = x.y >= 64.0f ? 32767.0 : finalVal.y; finalVal.y = x.y >= 64.0f ? 32767.0 : finalVal.y;
finalVal.z = x.z >= 64.0f ? 32767.0 : finalVal.z; finalVal.z = x.z >= 64.0f ? 32767.0 : finalVal.z;
return finalVal; return finalVal;
} }
float3 Unquantize9(float3 x) { vec3 Unquantize9(vec3 x) {
float3 signVal = sign(x); vec3 signVal = sign(x);
x = abs(x); x = abs(x);
float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 256.0f; vec3 finalVal = signVal * (x * 32768.0f + 0x4000) / 256.0f;
finalVal.x = x.x >= 256.0f ? 32767.0 : finalVal.x; finalVal.x = x.x >= 256.0f ? 32767.0 : finalVal.x;
finalVal.y = x.y >= 256.0f ? 32767.0 : finalVal.y; finalVal.y = x.y >= 256.0f ? 32767.0 : finalVal.y;
finalVal.z = x.z >= 256.0f ? 32767.0 : finalVal.z; finalVal.z = x.z >= 256.0f ? 32767.0 : finalVal.z;
return finalVal; return finalVal;
} }
float3 Unquantize10(float3 x) { vec3 Unquantize10(vec3 x) {
float3 signVal = sign(x); vec3 signVal = sign(x);
x = abs(x); x = abs(x);
float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 512.0f; vec3 finalVal = signVal * (x * 32768.0f + 0x4000) / 512.0f;
finalVal.x = x.x >= 512.0f ? 32767.0 : finalVal.x; finalVal.x = x.x >= 512.0f ? 32767.0 : finalVal.x;
finalVal.y = x.y >= 512.0f ? 32767.0 : finalVal.y; finalVal.y = x.y >= 512.0f ? 32767.0 : finalVal.y;
finalVal.z = x.z >= 512.0f ? 32767.0 : finalVal.z; finalVal.z = x.z >= 512.0f ? 32767.0 : finalVal.z;
return finalVal; return finalVal;
} }
float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) { vec3 FinishUnquantize(vec3 endpoint0Unq, vec3 endpoint1Unq, float weight) {
float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 2048.0f); vec3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 2048.0f);
return f16tof32(uint3(comp)); return f16tof32(uvec3(comp));
} }
#endif #endif
void Swap(inout float3 a, inout float3 b) { void Swap(inout vec3 a, inout vec3 b) {
float3 tmp = a; vec3 tmp = a;
a = b; a = b;
b = tmp; b = tmp;
} }
@@ -270,8 +268,8 @@ uint ComputeIndex4(float texelPos, float endPoint0Pos, float endPoint1Pos) {
} }
// This adds a bitflag to quantized values that signifies whether they are negative. // This adds a bitflag to quantized values that signifies whether they are negative.
void SignExtend(inout float3 v1, uint mask, uint signFlag) { void SignExtend(inout vec3 v1, uint mask, uint signFlag) {
int3 v = int3(v1); ivec3 v = ivec3(v1);
v.x = (v.x & int(mask)) | (v.x < 0 ? int(signFlag) : 0); v.x = (v.x & int(mask)) | (v.x < 0 ? int(signFlag) : 0);
v.y = (v.y & int(mask)) | (v.y < 0 ? int(signFlag) : 0); v.y = (v.y & int(mask)) | (v.y < 0 ? int(signFlag) : 0);
v.z = (v.z & int(mask)) | (v.z < 0 ? int(signFlag) : 0); v.z = (v.z & int(mask)) | (v.z < 0 ? int(signFlag) : 0);
@@ -279,38 +277,39 @@ void SignExtend(inout float3 v1, uint mask, uint signFlag) {
} }
// Encodes a block with mode 11 (2x 10-bit endpoints). // Encodes a block with mode 11 (2x 10-bit endpoints).
void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) { void EncodeP1(inout uvec4 block, inout float blockMSLE, vec3 texels[16]) {
// compute endpoints (min/max RGB bbox) // compute endpoints (min/max RGB bbox)
float3 blockMin = texels[0]; vec3 blockMin = texels[0];
float3 blockMax = texels[0]; vec3 blockMax = texels[0];
for (uint i = 1u; i < 16u; ++i) { for (uint i = 1u; i < 16u; ++i) {
blockMin = min(blockMin, texels[i]); blockMin = min(blockMin, texels[i]);
blockMax = max(blockMax, texels[i]); blockMax = max(blockMax, texels[i]);
} }
// refine endpoints in log2 RGB space // refine endpoints in log2 RGB space
float3 refinedBlockMin = blockMax; vec3 refinedBlockMin = blockMax;
float3 refinedBlockMax = blockMin; vec3 refinedBlockMax = blockMin;
for (uint i = 0u; i < 16u; ++i) { for (uint i = 0u; i < 16u; ++i) {
refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]); refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]);
refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]); refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]);
} }
float3 logBlockMax = customLog2(blockMax); vec3 logBlockMax = customLog2(blockMax);
float3 logBlockMin = customLog2(blockMin); vec3 logBlockMin = customLog2(blockMin);
float3 logRefinedBlockMax = customLog2(refinedBlockMax); vec3 logRefinedBlockMax = customLog2(refinedBlockMax);
float3 logRefinedBlockMin = customLog2(refinedBlockMin); vec3 logRefinedBlockMin = customLog2(refinedBlockMin);
float3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f); vec3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f);
logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt); logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt);
logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt); logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt);
blockMin = customExp2(logBlockMin); blockMin = customExp2(logBlockMin);
blockMax = customExp2(logBlockMax); blockMax = customExp2(logBlockMax);
float3 blockDir = blockMax - blockMin; vec3 blockDir = blockMax - blockMin;
blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z); blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z);
float3 endpoint0 = Quantize10(blockMin); vec3 endpoint0 = Quantize10(blockMin);
float3 endpoint1 = Quantize10(blockMax); vec3 endpoint1 = Quantize10(blockMax);
float endPoint0Pos = f32tof16(dot(blockMin, blockDir)); float endPoint0Pos = f32tof16(dot(blockMin, blockDir));
float endPoint1Pos = f32tof16(dot(blockMax, blockDir)); float endPoint1Pos = f32tof16(dot(blockMax, blockDir));
@@ -336,12 +335,12 @@ void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) {
} }
// compute compression error (MSLE) // compute compression error (MSLE)
float3 endpoint0Unq = Unquantize10(endpoint0); vec3 endpoint0Unq = Unquantize10(endpoint0);
float3 endpoint1Unq = Unquantize10(endpoint1); vec3 endpoint1Unq = Unquantize10(endpoint1);
float msle = 0.0f; float msle = 0.0f;
for (uint i = 0u; i < 16u; ++i) { for (uint i = 0u; i < 16u; ++i) {
float weight = floor((indices[i] * 64.0f) / 15.0f + 0.5f); float weight = floor((indices[i] * 64.0f) / 15.0f + 0.5f);
float3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight); vec3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight);
msle += CalcMSLE(texels[i], texelUnc); msle += CalcMSLE(texels[i], texelUnc);
} }
@@ -384,19 +383,19 @@ void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) {
block.w |= indices[15] << 28u; block.w |= indices[15] << 28u;
} }
float DistToLineSq(float3 PointOnLine, float3 LineDirection, float3 Point) { float DistToLineSq(vec3 PointOnLine, vec3 LineDirection, vec3 Point) {
float3 w = Point - PointOnLine; vec3 w = Point - PointOnLine;
float3 x = w - dot(w, LineDirection) * LineDirection; vec3 x = w - dot(w, LineDirection) * LineDirection;
return dot(x, x); return dot(x, x);
} }
// Gets the deviation from the source data of a particular pattern (smaller is better). // Gets the deviation from the source data of a particular pattern (smaller is better).
float EvaluateP2Pattern(uint pattern, float3 texels[16]) { float EvaluateP2Pattern(uint pattern, vec3 texels[16]) {
float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); vec3 p0BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX);
float3 p0BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN); vec3 p0BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN);
float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); vec3 p1BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX);
float3 p1BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN); vec3 p1BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN);
for (uint i = 0; i < 16; ++i) { for (uint i = 0; i < 16; ++i) {
uint paletteID = Pattern(pattern, i); uint paletteID = Pattern(pattern, i);
@@ -409,8 +408,8 @@ float EvaluateP2Pattern(uint pattern, float3 texels[16]) {
} }
} }
float3 p0BlockDir = normalize(p0BlockMax - p0BlockMin); vec3 p0BlockDir = normalize(p0BlockMax - p0BlockMin);
float3 p1BlockDir = normalize(p1BlockMax - p1BlockMin); vec3 p1BlockDir = normalize(p1BlockMax - p1BlockMin);
float sqDistanceFromLine = 0.0f; float sqDistanceFromLine = 0.0f;
@@ -427,11 +426,11 @@ float EvaluateP2Pattern(uint pattern, float3 texels[16]) {
} }
// Encodes a block with either mode 2 (7-bit base, 3x 6-bit delta), or mode 6 (9-bit base, 3x 5-bit delta). Both use pattern encoding. // Encodes a block with either mode 2 (7-bit base, 3x 6-bit delta), or mode 6 (9-bit base, 3x 5-bit delta). Both use pattern encoding.
void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, float3 texels[16]) { void EncodeP2Pattern(inout uvec4 block, inout float blockMSLE, uint pattern, vec3 texels[16]) {
float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); vec3 p0BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX);
float3 p0BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN); vec3 p0BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN);
float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); vec3 p1BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX);
float3 p1BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN); vec3 p1BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN);
for (uint i = 0u; i < 16u; ++i) { for (uint i = 0u; i < 16u; ++i) {
uint paletteID = Pattern(pattern, i); uint paletteID = Pattern(pattern, i);
@@ -444,8 +443,8 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo
} }
} }
float3 p0BlockDir = p0BlockMax - p0BlockMin; vec3 p0BlockDir = p0BlockMax - p0BlockMin;
float3 p1BlockDir = p1BlockMax - p1BlockMin; vec3 p1BlockDir = p1BlockMax - p1BlockMin;
p0BlockDir = p0BlockDir / (p0BlockDir.x + p0BlockDir.y + p0BlockDir.z); p0BlockDir = p0BlockDir / (p0BlockDir.x + p0BlockDir.y + p0BlockDir.z);
p1BlockDir = p1BlockDir / (p1BlockDir.x + p1BlockDir.y + p1BlockDir.z); p1BlockDir = p1BlockDir / (p1BlockDir.x + p1BlockDir.y + p1BlockDir.z);
@@ -479,15 +478,15 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo
indices[i] = paletteID == 0u ? p0Index : p1Index; indices[i] = paletteID == 0u ? p0Index : p1Index;
} }
float3 endpoint760 = floor(Quantize7(p0BlockMin)); vec3 endpoint760 = floor(Quantize7(p0BlockMin));
float3 endpoint761 = floor(Quantize7(p0BlockMax)); vec3 endpoint761 = floor(Quantize7(p0BlockMax));
float3 endpoint762 = floor(Quantize7(p1BlockMin)); vec3 endpoint762 = floor(Quantize7(p1BlockMin));
float3 endpoint763 = floor(Quantize7(p1BlockMax)); vec3 endpoint763 = floor(Quantize7(p1BlockMax));
float3 endpoint950 = floor(Quantize9(p0BlockMin)); vec3 endpoint950 = floor(Quantize9(p0BlockMin));
float3 endpoint951 = floor(Quantize9(p0BlockMax)); vec3 endpoint951 = floor(Quantize9(p0BlockMax));
float3 endpoint952 = floor(Quantize9(p1BlockMin)); vec3 endpoint952 = floor(Quantize9(p1BlockMin));
float3 endpoint953 = floor(Quantize9(p1BlockMax)); vec3 endpoint953 = floor(Quantize9(p1BlockMax));
endpoint761 = endpoint761 - endpoint760; endpoint761 = endpoint761 - endpoint760;
endpoint762 = endpoint762 - endpoint760; endpoint762 = endpoint762 - endpoint760;
@@ -514,28 +513,28 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo
endpoint950 = clamp(endpoint950, -maxVal9, maxVal9); endpoint950 = clamp(endpoint950, -maxVal9, maxVal9);
#endif #endif
float3 endpoint760Unq = Unquantize7(endpoint760); vec3 endpoint760Unq = Unquantize7(endpoint760);
float3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761); vec3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761);
float3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762); vec3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762);
float3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763); vec3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763);
float3 endpoint950Unq = Unquantize9(endpoint950); vec3 endpoint950Unq = Unquantize9(endpoint950);
float3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951); vec3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951);
float3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952); vec3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952);
float3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953); vec3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953);
float msle76 = 0.0f; float msle76 = 0.0f;
float msle95 = 0.0f; float msle95 = 0.0f;
for (uint i = 0u; i < 16u; ++i) { for (uint i = 0u; i < 16u; ++i) {
uint paletteID = Pattern(pattern, i); uint paletteID = Pattern(pattern, i);
float3 tmp760Unq = paletteID == 0u ? endpoint760Unq : endpoint762Unq; vec3 tmp760Unq = paletteID == 0u ? endpoint760Unq : endpoint762Unq;
float3 tmp761Unq = paletteID == 0u ? endpoint761Unq : endpoint763Unq; vec3 tmp761Unq = paletteID == 0u ? endpoint761Unq : endpoint763Unq;
float3 tmp950Unq = paletteID == 0u ? endpoint950Unq : endpoint952Unq; vec3 tmp950Unq = paletteID == 0u ? endpoint950Unq : endpoint952Unq;
float3 tmp951Unq = paletteID == 0u ? endpoint951Unq : endpoint953Unq; vec3 tmp951Unq = paletteID == 0u ? endpoint951Unq : endpoint953Unq;
float weight = floor((indices[i] * 64.0f) / 7.0f + 0.5f); float weight = floor((indices[i] * 64.0f) / 7.0f + 0.5f);
float3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight); vec3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight);
float3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight); vec3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight);
msle76 += CalcMSLE(texels[i], texelUnc76); msle76 += CalcMSLE(texels[i], texelUnc76);
msle95 += CalcMSLE(texels[i], texelUnc95); msle95 += CalcMSLE(texels[i], texelUnc95);
@@ -558,7 +557,7 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo
float p2MSLE = min(msle76, msle95); float p2MSLE = min(msle76, msle95);
if (p2MSLE < blockMSLE) { if (p2MSLE < blockMSLE) {
blockMSLE = p2MSLE; blockMSLE = p2MSLE;
block = uint4(0u, 0u, 0u, 0u); block = uvec4(0u, 0u, 0u, 0u);
if (p2MSLE == msle76) { if (p2MSLE == msle76) {
// 7.6 // 7.6
@@ -681,43 +680,43 @@ void main() {
// 4 5 6 7 // 4 5 6 7
// 8 9 10 11 // 8 9 10 11
// 12 13 14 15 // 12 13 14 15
float2 uv = gl_GlobalInvocationID.xy * params.p_textureSizeRcp * 4.0f + params.p_textureSizeRcp; vec2 uv = gl_GlobalInvocationID.xy * params.p_textureSizeRcp * 4.0f + params.p_textureSizeRcp;
float2 block0UV = uv; vec2 block0UV = uv;
float2 block1UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 0.0f); vec2 block1UV = uv + vec2(2.0f * params.p_textureSizeRcp.x, 0.0f);
float2 block2UV = uv + float2(0.0f, 2.0f * params.p_textureSizeRcp.y); vec2 block2UV = uv + vec2(0.0f, 2.0f * params.p_textureSizeRcp.y);
float2 block3UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 2.0f * params.p_textureSizeRcp.y); vec2 block3UV = uv + vec2(2.0f * params.p_textureSizeRcp.x, 2.0f * params.p_textureSizeRcp.y);
float4 block0X = OGRE_GatherRed(srcTexture, pointSampler, block0UV); vec4 block0X = textureGather(srcTexture, block0UV, 0);
float4 block1X = OGRE_GatherRed(srcTexture, pointSampler, block1UV); vec4 block1X = textureGather(srcTexture, block1UV, 0);
float4 block2X = OGRE_GatherRed(srcTexture, pointSampler, block2UV); vec4 block2X = textureGather(srcTexture, block2UV, 0);
float4 block3X = OGRE_GatherRed(srcTexture, pointSampler, block3UV); vec4 block3X = textureGather(srcTexture, block3UV, 0);
float4 block0Y = OGRE_GatherGreen(srcTexture, pointSampler, block0UV); vec4 block0Y = textureGather(srcTexture, block0UV, 1);
float4 block1Y = OGRE_GatherGreen(srcTexture, pointSampler, block1UV); vec4 block1Y = textureGather(srcTexture, block1UV, 1);
float4 block2Y = OGRE_GatherGreen(srcTexture, pointSampler, block2UV); vec4 block2Y = textureGather(srcTexture, block2UV, 1);
float4 block3Y = OGRE_GatherGreen(srcTexture, pointSampler, block3UV); vec4 block3Y = textureGather(srcTexture, block3UV, 1);
float4 block0Z = OGRE_GatherBlue(srcTexture, pointSampler, block0UV); vec4 block0Z = textureGather(srcTexture, block0UV, 2);
float4 block1Z = OGRE_GatherBlue(srcTexture, pointSampler, block1UV); vec4 block1Z = textureGather(srcTexture, block1UV, 2);
float4 block2Z = OGRE_GatherBlue(srcTexture, pointSampler, block2UV); vec4 block2Z = textureGather(srcTexture, block2UV, 2);
float4 block3Z = OGRE_GatherBlue(srcTexture, pointSampler, block3UV); vec4 block3Z = textureGather(srcTexture, block3UV, 2);
float3 texels[16]; vec3 texels[16];
texels[0] = float3(block0X.w, block0Y.w, block0Z.w); texels[0] = vec3(block0X.w, block0Y.w, block0Z.w);
texels[1] = float3(block0X.z, block0Y.z, block0Z.z); texels[1] = vec3(block0X.z, block0Y.z, block0Z.z);
texels[2] = float3(block1X.w, block1Y.w, block1Z.w); texels[2] = vec3(block1X.w, block1Y.w, block1Z.w);
texels[3] = float3(block1X.z, block1Y.z, block1Z.z); texels[3] = vec3(block1X.z, block1Y.z, block1Z.z);
texels[4] = float3(block0X.x, block0Y.x, block0Z.x); texels[4] = vec3(block0X.x, block0Y.x, block0Z.x);
texels[5] = float3(block0X.y, block0Y.y, block0Z.y); texels[5] = vec3(block0X.y, block0Y.y, block0Z.y);
texels[6] = float3(block1X.x, block1Y.x, block1Z.x); texels[6] = vec3(block1X.x, block1Y.x, block1Z.x);
texels[7] = float3(block1X.y, block1Y.y, block1Z.y); texels[7] = vec3(block1X.y, block1Y.y, block1Z.y);
texels[8] = float3(block2X.w, block2Y.w, block2Z.w); texels[8] = vec3(block2X.w, block2Y.w, block2Z.w);
texels[9] = float3(block2X.z, block2Y.z, block2Z.z); texels[9] = vec3(block2X.z, block2Y.z, block2Z.z);
texels[10] = float3(block3X.w, block3Y.w, block3Z.w); texels[10] = vec3(block3X.w, block3Y.w, block3Z.w);
texels[11] = float3(block3X.z, block3Y.z, block3Z.z); texels[11] = vec3(block3X.z, block3Y.z, block3Z.z);
texels[12] = float3(block2X.x, block2Y.x, block2Z.x); texels[12] = vec3(block2X.x, block2Y.x, block2Z.x);
texels[13] = float3(block2X.y, block2Y.y, block2Z.y); texels[13] = vec3(block2X.y, block2Y.y, block2Z.y);
texels[14] = float3(block3X.x, block3Y.x, block3Z.x); texels[14] = vec3(block3X.x, block3Y.x, block3Z.x);
texels[15] = float3(block3X.y, block3Y.y, block3Z.y); texels[15] = vec3(block3X.y, block3Y.y, block3Z.y);
uint4 block = uint4(0u, 0u, 0u, 0u); uvec4 block = uvec4(0u, 0u, 0u, 0u);
float blockMSLE = 0.0f; float blockMSLE = 0.0f;
EncodeP1(block, blockMSLE, texels); EncodeP1(block, blockMSLE, texels);
@@ -738,5 +737,5 @@ void main() {
EncodeP2Pattern(block, blockMSLE, bestPattern, texels); EncodeP2Pattern(block, blockMSLE, bestPattern, texels);
#endif #endif
imageStore(dstTexture, int2(gl_GlobalInvocationID.xy), block); imageStore(dstTexture, ivec2(gl_GlobalInvocationID.xy), block);
} }