You've already forked godot
mirror of
https://github.com/godotengine/godot.git
synced 2025-11-04 12:00:25 +00:00
Betsy: Remove OGRE aliases
This commit is contained in:
@@ -1,75 +0,0 @@
|
||||
#define min3(a, b, c) min(a, min(b, c))
|
||||
#define max3(a, b, c) max(a, max(b, c))
|
||||
|
||||
#define float2 vec2
|
||||
#define float3 vec3
|
||||
#define float4 vec4
|
||||
|
||||
#define int2 ivec2
|
||||
#define int3 ivec3
|
||||
#define int4 ivec4
|
||||
|
||||
#define uint2 uvec2
|
||||
#define uint3 uvec3
|
||||
#define uint4 uvec4
|
||||
|
||||
#define float2x2 mat2
|
||||
#define float3x3 mat3
|
||||
#define float4x4 mat4
|
||||
#define ogre_float4x3 mat3x4
|
||||
|
||||
#define ushort uint
|
||||
#define ushort3 uint3
|
||||
#define ushort4 uint4
|
||||
|
||||
//Short used for read operations. It's an int in GLSL & HLSL. An ushort in Metal
|
||||
#define rshort int
|
||||
#define rshort2 int2
|
||||
#define rint int
|
||||
//Short used for write operations. It's an int in GLSL. An ushort in HLSL & Metal
|
||||
#define wshort2 int2
|
||||
#define wshort3 int3
|
||||
|
||||
#define toFloat3x3(x) mat3(x)
|
||||
#define buildFloat3x3(row0, row1, row2) mat3(row0, row1, row2)
|
||||
|
||||
#define mul(x, y) ((x) * (y))
|
||||
#define saturate(x) clamp((x), 0.0, 1.0)
|
||||
#define lerp mix
|
||||
#define rsqrt inversesqrt
|
||||
#define INLINE
|
||||
#define NO_INTERPOLATION_PREFIX flat
|
||||
#define NO_INTERPOLATION_SUFFIX
|
||||
|
||||
#define PARAMS_ARG_DECL
|
||||
#define PARAMS_ARG
|
||||
|
||||
#define reversebits bitfieldReverse
|
||||
|
||||
#define OGRE_Sample(tex, sampler, uv) texture(tex, uv)
|
||||
#define OGRE_SampleLevel(tex, sampler, uv, lod) textureLod(tex, uv, lod)
|
||||
#define OGRE_SampleArray2D(tex, sampler, uv, arrayIdx) texture(tex, vec3(uv, arrayIdx))
|
||||
#define OGRE_SampleArray2DLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec3(uv, arrayIdx), lod)
|
||||
#define OGRE_SampleArrayCubeLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec4(uv, arrayIdx), lod)
|
||||
#define OGRE_SampleGrad(tex, sampler, uv, ddx, ddy) textureGrad(tex, uv, ddx, ddy)
|
||||
#define OGRE_SampleArray2DGrad(tex, sampler, uv, arrayIdx, ddx, ddy) textureGrad(tex, vec3(uv, arrayIdx), ddx, ddy)
|
||||
#define OGRE_ddx(val) dFdx(val)
|
||||
#define OGRE_ddy(val) dFdy(val)
|
||||
#define OGRE_Load2D(tex, iuv, lod) texelFetch(tex, iuv, lod)
|
||||
#define OGRE_LoadArray2D(tex, iuv, arrayIdx, lod) texelFetch(tex, ivec3(iuv, arrayIdx), lod)
|
||||
#define OGRE_Load2DMS(tex, iuv, subsample) texelFetch(tex, iuv, subsample)
|
||||
|
||||
#define OGRE_Load3D(tex, iuv, lod) texelFetch(tex, ivec3(iuv), lod)
|
||||
|
||||
#define OGRE_GatherRed(tex, sampler, uv) textureGather(tex, uv, 0)
|
||||
#define OGRE_GatherGreen(tex, sampler, uv) textureGather(tex, uv, 1)
|
||||
#define OGRE_GatherBlue(tex, sampler, uv) textureGather(tex, uv, 2)
|
||||
|
||||
#define bufferFetch1(buffer, idx) texelFetch(buffer, idx).x
|
||||
|
||||
#define OGRE_SAMPLER_ARG_DECL(samplerName)
|
||||
#define OGRE_SAMPLER_ARG(samplerName)
|
||||
|
||||
#define OGRE_Texture3D_float4 sampler3D
|
||||
#define OGRE_OUT_REF(declType, variableName) out declType variableName
|
||||
#define OGRE_INOUT_REF(declType, variableName) inout declType variableName
|
||||
@@ -1,12 +1,10 @@
|
||||
// RGB and Alpha components of ETC2 RGBA are computed separately.
|
||||
// RGB and Alpha components of ETC2 RGBA/DXT5 are computed separately.
|
||||
// This compute shader merely stitches them together to form the final result
|
||||
// It's also used by RG11 driver to stitch two R11 into one RG11
|
||||
// It's also used by RG11/BC4 driver to stitch two R11/BC4 into one RG11/BC5
|
||||
|
||||
#[compute]
|
||||
#version 450
|
||||
|
||||
#include "CrossPlatformSettings_piece_all.glsl"
|
||||
|
||||
layout(local_size_x = 8, //
|
||||
local_size_y = 8, //
|
||||
local_size_z = 1) in;
|
||||
@@ -16,8 +14,8 @@ layout(binding = 1) uniform usampler2D srcAlpha;
|
||||
layout(binding = 2, rgba32ui) uniform restrict writeonly uimage2D dstTexture;
|
||||
|
||||
void main() {
|
||||
uint2 rgbBlock = OGRE_Load2D(srcRGB, int2(gl_GlobalInvocationID.xy), 0).xy;
|
||||
uint2 alphaBlock = OGRE_Load2D(srcAlpha, int2(gl_GlobalInvocationID.xy), 0).xy;
|
||||
uvec2 rgbBlock = texelFetch(srcRGB, ivec2(gl_GlobalInvocationID.xy), 0).xy;
|
||||
uvec2 alphaBlock = texelFetch(srcAlpha, ivec2(gl_GlobalInvocationID.xy), 0).xy;
|
||||
|
||||
imageStore(dstTexture, int2(gl_GlobalInvocationID.xy), uint4(rgbBlock.xy, alphaBlock.xy));
|
||||
imageStore(dstTexture, ivec2(gl_GlobalInvocationID.xy), uvec4(rgbBlock.xy, alphaBlock.xy));
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ dithered = "#define BC1_DITHER";
|
||||
#[compute]
|
||||
#version 450
|
||||
|
||||
#include "CrossPlatformSettings_piece_all.glsl"
|
||||
#VERSION_DEFINES
|
||||
|
||||
#define FLT_MAX 340282346638528859811704183484516925440.0f
|
||||
|
||||
@@ -14,8 +14,8 @@ layout(binding = 0) uniform sampler2D srcTex;
|
||||
layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture;
|
||||
|
||||
layout(std430, binding = 2) readonly restrict buffer globalBuffer {
|
||||
float2 c_oMatch5[256];
|
||||
float2 c_oMatch6[256];
|
||||
vec2 c_oMatch5[256];
|
||||
vec2 c_oMatch6[256];
|
||||
};
|
||||
|
||||
layout(push_constant, std430) uniform Params {
|
||||
@@ -28,14 +28,14 @@ layout(local_size_x = 8, //
|
||||
local_size_y = 8, //
|
||||
local_size_z = 1) in;
|
||||
|
||||
float3 rgb565to888(float rgb565) {
|
||||
float3 retVal;
|
||||
vec3 rgb565to888(float rgb565) {
|
||||
vec3 retVal;
|
||||
retVal.x = floor(rgb565 / 2048.0f);
|
||||
retVal.y = floor(mod(rgb565, 2048.0f) / 32.0f);
|
||||
retVal.z = floor(mod(rgb565, 32.0f));
|
||||
|
||||
// This is the correct 565 to 888 conversion:
|
||||
// rgb = floor( rgb * ( 255.0f / float3( 31.0f, 63.0f, 31.0f ) ) + 0.5f )
|
||||
// rgb = floor( rgb * ( 255.0f / vec3( 31.0f, 63.0f, 31.0f ) ) + 0.5f )
|
||||
//
|
||||
// However stb_dxt follows a different one:
|
||||
// rb = floor( rb * ( 256 / 32 + 8 / 32 ) );
|
||||
@@ -52,10 +52,10 @@ float3 rgb565to888(float rgb565) {
|
||||
// Perhaps when we make 888 -> 565 -> 888 it doesn't matter
|
||||
// because they end up mapping to the original number
|
||||
|
||||
return floor(retVal * float3(8.25f, 4.0625f, 8.25f));
|
||||
return floor(retVal * vec3(8.25f, 4.0625f, 8.25f));
|
||||
}
|
||||
|
||||
float rgb888to565(float3 rgbValue) {
|
||||
float rgb888to565(vec3 rgbValue) {
|
||||
rgbValue.rb = floor(rgbValue.rb * 31.0f / 255.0f + 0.5f);
|
||||
rgbValue.g = floor(rgbValue.g * 63.0f / 255.0f + 0.5f);
|
||||
|
||||
@@ -63,7 +63,7 @@ float rgb888to565(float3 rgbValue) {
|
||||
}
|
||||
|
||||
// linear interpolation at 1/3 point between a and b, using desired rounding type
|
||||
float3 lerp13(float3 a, float3 b) {
|
||||
vec3 lerp13(vec3 a, vec3 b) {
|
||||
#ifdef STB_DXT_USE_ROUNDING_BIAS
|
||||
// with rounding bias
|
||||
return a + floor((b - a) * (1.0f / 3.0f) + 0.5f);
|
||||
@@ -74,7 +74,7 @@ float3 lerp13(float3 a, float3 b) {
|
||||
}
|
||||
|
||||
/// Unpacks a block of 4 colors from two 16-bit endpoints
|
||||
void EvalColors(out float3 colors[4], float c0, float c1) {
|
||||
void EvalColors(out vec3 colors[4], float c0, float c1) {
|
||||
colors[0] = rgb565to888(c0);
|
||||
colors[1] = rgb565to888(c1);
|
||||
colors[2] = lerp13(colors[0], colors[1]);
|
||||
@@ -89,13 +89,13 @@ void EvalColors(out float3 colors[4], float c0, float c1) {
|
||||
*/
|
||||
void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16, out float outMaxEndp16) {
|
||||
// determine color distribution
|
||||
float3 avgColor;
|
||||
float3 minColor;
|
||||
float3 maxColor;
|
||||
vec3 avgColor;
|
||||
vec3 minColor;
|
||||
vec3 maxColor;
|
||||
|
||||
avgColor = minColor = maxColor = unpackUnorm4x8(srcPixelsBlock[0]).xyz;
|
||||
for (int i = 1; i < 16; ++i) {
|
||||
const float3 currColorUnorm = unpackUnorm4x8(srcPixelsBlock[i]).xyz;
|
||||
const vec3 currColorUnorm = unpackUnorm4x8(srcPixelsBlock[i]).xyz;
|
||||
avgColor += currColorUnorm;
|
||||
minColor = min(minColor, currColorUnorm);
|
||||
maxColor = max(maxColor, currColorUnorm);
|
||||
@@ -112,8 +112,8 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
|
||||
}
|
||||
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
||||
float3 rgbDiff = currColor - avgColor;
|
||||
const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
||||
vec3 rgbDiff = currColor - avgColor;
|
||||
|
||||
cov[0] += rgbDiff.r * rgbDiff.r;
|
||||
cov[1] += rgbDiff.r * rgbDiff.g;
|
||||
@@ -128,7 +128,7 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
|
||||
cov[i] /= 255.0f;
|
||||
}
|
||||
|
||||
float3 vF = maxColor - minColor;
|
||||
vec3 vF = maxColor - minColor;
|
||||
|
||||
const int nIterPower = 4;
|
||||
for (int iter = 0; iter < nIterPower; ++iter) {
|
||||
@@ -141,8 +141,8 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
|
||||
vF.b = b;
|
||||
}
|
||||
|
||||
float magn = max3(abs(vF.r), abs(vF.g), abs(vF.b));
|
||||
float3 v;
|
||||
float magn = max(abs(vF.r), max(abs(vF.g), abs(vF.b)));
|
||||
vec3 v;
|
||||
|
||||
if (magn < 4.0f) { // too small, default to luminance
|
||||
v.r = 299.0f; // JPEG YCbCr luma coefs, scaled by 1000.
|
||||
@@ -153,11 +153,11 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
|
||||
}
|
||||
|
||||
// Pick colors at extreme points
|
||||
float3 minEndpoint, maxEndpoint;
|
||||
vec3 minEndpoint, maxEndpoint;
|
||||
float minDot = FLT_MAX;
|
||||
float maxDot = -FLT_MAX;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
||||
const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
||||
const float dotValue = dot(currColor, v);
|
||||
|
||||
if (dotValue < minDot) {
|
||||
@@ -176,9 +176,9 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
|
||||
}
|
||||
|
||||
// The color matching function
|
||||
uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
|
||||
uint MatchColorsBlock(const uint srcPixelsBlock[16], vec3 color[4]) {
|
||||
uint mask = 0u;
|
||||
float3 dir = color[0] - color[1];
|
||||
vec3 dir = color[0] - color[1];
|
||||
float stops[4];
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
@@ -200,7 +200,7 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
|
||||
#ifndef BC1_DITHER
|
||||
// the version without dithering is straightforward
|
||||
for (uint i = 16u; i-- > 0u;) {
|
||||
const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
||||
const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
||||
|
||||
const float dotValue = dot(currColor, dir);
|
||||
mask <<= 2u;
|
||||
@@ -213,8 +213,8 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
|
||||
}
|
||||
#else
|
||||
// with floyd-steinberg dithering
|
||||
float4 ep1 = float4(0, 0, 0, 0);
|
||||
float4 ep2 = float4(0, 0, 0, 0);
|
||||
vec4 ep1 = vec4(0, 0, 0, 0);
|
||||
vec4 ep2 = vec4(0, 0, 0, 0);
|
||||
|
||||
c0Point *= 16.0f;
|
||||
halfPoint *= 16.0f;
|
||||
@@ -224,7 +224,7 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
|
||||
float ditherDot;
|
||||
uint lmask, step;
|
||||
|
||||
float3 currColor;
|
||||
vec3 currColor;
|
||||
float dotValue;
|
||||
|
||||
currColor = unpackUnorm4x8(srcPixelsBlock[y * 4 + 0]).xyz * 255.0f;
|
||||
@@ -277,7 +277,7 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
|
||||
|
||||
mask |= lmask << (y * 8u);
|
||||
{
|
||||
float4 tmp = ep1;
|
||||
vec4 tmp = ep1;
|
||||
ep1 = ep2;
|
||||
ep2 = tmp;
|
||||
} // swap
|
||||
@@ -300,7 +300,7 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE
|
||||
{
|
||||
// yes, linear system would be singular; solve using optimal
|
||||
// single-color match on average color
|
||||
float3 rgbVal = float3(8.0f / 255.0f, 8.0f / 255.0f, 8.0f / 255.0f);
|
||||
vec3 rgbVal = vec3(8.0f / 255.0f, 8.0f / 255.0f, 8.0f / 255.0f);
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
rgbVal += unpackUnorm4x8(srcPixelsBlock[i]).xyz;
|
||||
}
|
||||
@@ -322,10 +322,10 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE
|
||||
|
||||
float akku = 0.0f;
|
||||
uint cm = mask;
|
||||
float3 at1 = float3(0, 0, 0);
|
||||
float3 at2 = float3(0, 0, 0);
|
||||
vec3 at1 = vec3(0, 0, 0);
|
||||
vec3 at2 = vec3(0, 0, 0);
|
||||
for (int i = 0; i < 16; ++i, cm >>= 2u) {
|
||||
const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
||||
const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
||||
|
||||
const uint step = cm & 3u;
|
||||
const float w1 = w1Tab[step];
|
||||
@@ -341,17 +341,17 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE
|
||||
const float yy = floor(mod(akku, 65535.0f) / 256.0f);
|
||||
const float xy = mod(akku, 256.0f);
|
||||
|
||||
float2 f_rb_g;
|
||||
vec2 f_rb_g;
|
||||
f_rb_g.x = 3.0f * 31.0f / 255.0f / (xx * yy - xy * xy);
|
||||
f_rb_g.y = f_rb_g.x * 63.0f / 31.0f;
|
||||
|
||||
// solve.
|
||||
const float3 newMaxVal = clamp(floor((at1 * yy - at2 * xy) * f_rb_g.xyx + 0.5f),
|
||||
float3(0.0f, 0.0f, 0.0f), float3(31, 63, 31));
|
||||
const vec3 newMaxVal = clamp(floor((at1 * yy - at2 * xy) * f_rb_g.xyx + 0.5f),
|
||||
vec3(0.0f, 0.0f, 0.0f), vec3(31, 63, 31));
|
||||
newMax16 = newMaxVal.x * 2048.0f + newMaxVal.y * 32.0f + newMaxVal.z;
|
||||
|
||||
const float3 newMinVal = clamp(floor((at2 * xx - at1 * xy) * f_rb_g.xyx + 0.5f),
|
||||
float3(0.0f, 0.0f, 0.0f), float3(31, 63, 31));
|
||||
const vec3 newMinVal = clamp(floor((at2 * xx - at1 * xy) * f_rb_g.xyx + 0.5f),
|
||||
vec3(0.0f, 0.0f, 0.0f), vec3(31, 63, 31));
|
||||
newMin16 = newMinVal.x * 2048.0f + newMinVal.y * 32.0f + newMinVal.z;
|
||||
}
|
||||
|
||||
@@ -364,48 +364,48 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE
|
||||
#ifdef BC1_DITHER
|
||||
/// Quantizes 'srcValue' which is originally in 888 (full range),
|
||||
/// converting it to 565 and then back to 888 (quantized)
|
||||
float3 quant(float3 srcValue) {
|
||||
vec3 quant(vec3 srcValue) {
|
||||
srcValue = clamp(srcValue, 0.0f, 255.0f);
|
||||
// Convert 888 -> 565
|
||||
srcValue = floor(srcValue * float3(31.0f / 255.0f, 63.0f / 255.0f, 31.0f / 255.0f) + 0.5f);
|
||||
srcValue = floor(srcValue * vec3(31.0f / 255.0f, 63.0f / 255.0f, 31.0f / 255.0f) + 0.5f);
|
||||
// Convert 565 -> 888 back
|
||||
srcValue = floor(srcValue * float3(8.25f, 4.0625f, 8.25f));
|
||||
srcValue = floor(srcValue * vec3(8.25f, 4.0625f, 8.25f));
|
||||
|
||||
return srcValue;
|
||||
}
|
||||
|
||||
void DitherBlock(const uint srcPixBlck[16], out uint dthPixBlck[16]) {
|
||||
float3 ep1[4] = { float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0) };
|
||||
float3 ep2[4] = { float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0) };
|
||||
vec3 ep1[4] = { vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0) };
|
||||
vec3 ep2[4] = { vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0) };
|
||||
|
||||
for (uint y = 0u; y < 16u; y += 4u) {
|
||||
float3 srcPixel, dithPixel;
|
||||
vec3 srcPixel, dithPixel;
|
||||
|
||||
srcPixel = unpackUnorm4x8(srcPixBlck[y + 0u]).xyz * 255.0f;
|
||||
dithPixel = quant(srcPixel + trunc((3 * ep2[1] + 5 * ep2[0]) * (1.0f / 16.0f)));
|
||||
ep1[0] = srcPixel - dithPixel;
|
||||
dthPixBlck[y + 0u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f));
|
||||
dthPixBlck[y + 0u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f));
|
||||
|
||||
srcPixel = unpackUnorm4x8(srcPixBlck[y + 1u]).xyz * 255.0f;
|
||||
dithPixel = quant(
|
||||
srcPixel + trunc((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) * (1.0f / 16.0f)));
|
||||
ep1[1] = srcPixel - dithPixel;
|
||||
dthPixBlck[y + 1u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f));
|
||||
dthPixBlck[y + 1u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f));
|
||||
|
||||
srcPixel = unpackUnorm4x8(srcPixBlck[y + 2u]).xyz * 255.0f;
|
||||
dithPixel = quant(
|
||||
srcPixel + trunc((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) * (1.0f / 16.0f)));
|
||||
ep1[2] = srcPixel - dithPixel;
|
||||
dthPixBlck[y + 2u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f));
|
||||
dthPixBlck[y + 2u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f));
|
||||
|
||||
srcPixel = unpackUnorm4x8(srcPixBlck[y + 3u]).xyz * 255.0f;
|
||||
dithPixel = quant(srcPixel + trunc((7 * ep1[2] + 5 * ep2[3] + ep2[2]) * (1.0f / 16.0f)));
|
||||
ep1[3] = srcPixel - dithPixel;
|
||||
dthPixBlck[y + 3u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f));
|
||||
dthPixBlck[y + 3u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f));
|
||||
|
||||
// swap( ep1, ep2 )
|
||||
for (uint i = 0u; i < 4u; ++i) {
|
||||
float3 tmp = ep1[i];
|
||||
vec3 tmp = ep1[i];
|
||||
ep1[i] = ep2[i];
|
||||
ep2[i] = tmp;
|
||||
}
|
||||
@@ -419,11 +419,11 @@ void main() {
|
||||
bool bAllColorsEqual = true;
|
||||
|
||||
// Load the whole 4x4 block
|
||||
const uint2 pixelsToLoadBase = gl_GlobalInvocationID.xy << 2u;
|
||||
const uvec2 pixelsToLoadBase = gl_GlobalInvocationID.xy << 2u;
|
||||
for (uint i = 0u; i < 16u; ++i) {
|
||||
const uint2 pixelsToLoad = pixelsToLoadBase + uint2(i & 0x03u, i >> 2u);
|
||||
const float3 srcPixels0 = OGRE_Load2D(srcTex, int2(pixelsToLoad), 0).xyz;
|
||||
srcPixelsBlock[i] = packUnorm4x8(float4(srcPixels0, 1.0f));
|
||||
const uvec2 pixelsToLoad = pixelsToLoadBase + uvec2(i & 0x03u, i >> 2u);
|
||||
const vec3 srcPixels0 = texelFetch(srcTex, ivec2(pixelsToLoad), 0).xyz;
|
||||
srcPixelsBlock[i] = packUnorm4x8(vec4(srcPixels0, 1.0f));
|
||||
bAllColorsEqual = bAllColorsEqual && srcPixelsBlock[0] == srcPixelsBlock[i];
|
||||
}
|
||||
|
||||
@@ -431,7 +431,7 @@ void main() {
|
||||
uint mask = 0u;
|
||||
|
||||
if (bAllColorsEqual) {
|
||||
const uint3 rgbVal = uint3(unpackUnorm4x8(srcPixelsBlock[0]).xyz * 255.0f);
|
||||
const uvec3 rgbVal = uvec3(unpackUnorm4x8(srcPixelsBlock[0]).xyz * 255.0f);
|
||||
mask = 0xAAAAAAAAu;
|
||||
maxEndp16 =
|
||||
c_oMatch5[rgbVal.r][0] * 2048.0f + c_oMatch6[rgbVal.g][0] * 32.0f + c_oMatch5[rgbVal.b][0];
|
||||
@@ -449,7 +449,7 @@ void main() {
|
||||
// second step: pca+map along principal axis
|
||||
OptimizeColorsBlock(ditherPixelsBlock, minEndp16, maxEndp16);
|
||||
if (minEndp16 != maxEndp16) {
|
||||
float3 colors[4];
|
||||
vec3 colors[4];
|
||||
EvalColors(colors, maxEndp16, minEndp16); // Note min/max are inverted
|
||||
mask = MatchColorsBlock(srcPixelsBlock, colors);
|
||||
}
|
||||
@@ -461,7 +461,7 @@ void main() {
|
||||
|
||||
if (RefineBlock(ditherPixelsBlock, mask, minEndp16, maxEndp16)) {
|
||||
if (minEndp16 != maxEndp16) {
|
||||
float3 colors[4];
|
||||
vec3 colors[4];
|
||||
EvalColors(colors, maxEndp16, minEndp16); // Note min/max are inverted
|
||||
mask = MatchColorsBlock(srcPixelsBlock, colors);
|
||||
} else {
|
||||
@@ -482,10 +482,10 @@ void main() {
|
||||
mask ^= 0x55555555u;
|
||||
}
|
||||
|
||||
uint2 outputBytes;
|
||||
uvec2 outputBytes;
|
||||
outputBytes.x = uint(maxEndp16) | (uint(minEndp16) << 16u);
|
||||
outputBytes.y = mask;
|
||||
|
||||
uint2 dstUV = gl_GlobalInvocationID.xy;
|
||||
imageStore(dstTexture, int2(dstUV), uint4(outputBytes.xy, 0u, 0u));
|
||||
uvec2 dstUV = gl_GlobalInvocationID.xy;
|
||||
imageStore(dstTexture, ivec2(dstUV), uvec4(outputBytes.xy, 0u, 0u));
|
||||
}
|
||||
|
||||
@@ -6,12 +6,10 @@ signed = "#define SNORM";
|
||||
#[compute]
|
||||
#version 450
|
||||
|
||||
#include "CrossPlatformSettings_piece_all.glsl"
|
||||
|
||||
#VERSION_DEFINES
|
||||
|
||||
shared float2 g_minMaxValues[4u * 4u * 4u];
|
||||
shared uint2 g_mask[4u * 4u];
|
||||
shared vec2 g_minMaxValues[4u * 4u * 4u];
|
||||
shared uvec2 g_mask[4u * 4u];
|
||||
|
||||
layout(binding = 0) uniform sampler2D srcTex;
|
||||
layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture;
|
||||
@@ -40,30 +38,30 @@ layout(local_size_x = 4, //
|
||||
/// - Long threads (e.g. 1 thread per block) misses parallelism opportunities
|
||||
void main() {
|
||||
float minVal, maxVal;
|
||||
float4 srcPixel;
|
||||
vec4 srcPixel;
|
||||
|
||||
const uint blockThreadId = gl_LocalInvocationID.x;
|
||||
|
||||
const uint2 pixelsToLoadBase = gl_GlobalInvocationID.yz << 2u;
|
||||
const uvec2 pixelsToLoadBase = gl_GlobalInvocationID.yz << 2u;
|
||||
|
||||
for (uint i = 0u; i < 4u; ++i) {
|
||||
const uint2 pixelsToLoad = pixelsToLoadBase + uint2(i, blockThreadId);
|
||||
const uvec2 pixelsToLoad = pixelsToLoadBase + uvec2(i, blockThreadId);
|
||||
|
||||
const float4 value = OGRE_Load2D(srcTex, int2(pixelsToLoad), 0).xyzw;
|
||||
const vec4 value = texelFetch(srcTex, ivec2(pixelsToLoad), 0).xyzw;
|
||||
srcPixel[i] = params.p_channelIdx == 0 ? value.x : (params.p_channelIdx == 1 ? value.y : value.w);
|
||||
srcPixel[i] *= 255.0f;
|
||||
}
|
||||
|
||||
minVal = min3(srcPixel.x, srcPixel.y, srcPixel.z);
|
||||
maxVal = max3(srcPixel.x, srcPixel.y, srcPixel.z);
|
||||
minVal = min(srcPixel.x, min(srcPixel.y, srcPixel.z));
|
||||
maxVal = max(srcPixel.x, max(srcPixel.y, srcPixel.z));
|
||||
minVal = min(minVal, srcPixel.w);
|
||||
maxVal = max(maxVal, srcPixel.w);
|
||||
|
||||
const uint minMaxIdxBase = (gl_LocalInvocationID.z << 4u) + (gl_LocalInvocationID.y << 2u);
|
||||
const uint maskIdxBase = (gl_LocalInvocationID.z << 2u) + gl_LocalInvocationID.y;
|
||||
|
||||
g_minMaxValues[minMaxIdxBase + blockThreadId] = float2(minVal, maxVal);
|
||||
g_mask[maskIdxBase] = uint2(0u, 0u);
|
||||
g_minMaxValues[minMaxIdxBase + blockThreadId] = vec2(minVal, maxVal);
|
||||
g_mask[maskIdxBase] = uvec2(0u, 0u);
|
||||
|
||||
memoryBarrierShared();
|
||||
barrier();
|
||||
@@ -133,21 +131,21 @@ void main() {
|
||||
|
||||
if (blockThreadId == 0u) {
|
||||
// Save data
|
||||
uint2 outputBytes;
|
||||
uvec2 outputBytes;
|
||||
|
||||
#ifdef SNORM
|
||||
outputBytes.x =
|
||||
packSnorm4x8(float4(maxVal * (1.0f / 255.0f) * 2.0f - 1.0f,
|
||||
packSnorm4x8(vec4(maxVal * (1.0f / 255.0f) * 2.0f - 1.0f,
|
||||
minVal * (1.0f / 255.0f) * 2.0f - 1.0f, 0.0f, 0.0f));
|
||||
#else
|
||||
outputBytes.x = packUnorm4x8(
|
||||
float4(maxVal * (1.0f / 255.0f), minVal * (1.0f / 255.0f), 0.0f, 0.0f));
|
||||
vec4(maxVal * (1.0f / 255.0f), minVal * (1.0f / 255.0f), 0.0f, 0.0f));
|
||||
#endif
|
||||
|
||||
outputBytes.x |= g_mask[maskIdxBase].x;
|
||||
outputBytes.y = g_mask[maskIdxBase].y;
|
||||
|
||||
uint2 dstUV = gl_GlobalInvocationID.yz;
|
||||
imageStore(dstTexture, int2(dstUV), uint4(outputBytes.xy, 0u, 0u));
|
||||
uvec2 dstUV = gl_GlobalInvocationID.yz;
|
||||
imageStore(dstTexture, ivec2(dstUV), uvec4(outputBytes.xy, 0u, 0u));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,24 +6,22 @@ unsigned = "#define QUALITY"; // The "Quality" preset causes artifacting on sign
|
||||
#[compute]
|
||||
#version 450
|
||||
|
||||
#include "CrossPlatformSettings_piece_all.glsl"
|
||||
|
||||
#VERSION_DEFINES
|
||||
|
||||
float3 f32tof16(float3 value) {
|
||||
return float3(packHalf2x16(float2(value.x, 0.0)),
|
||||
packHalf2x16(float2(value.y, 0.0)),
|
||||
packHalf2x16(float2(value.z, 0.0)));
|
||||
vec3 f32tof16(vec3 value) {
|
||||
return vec3(packHalf2x16(vec2(value.x, 0.0)),
|
||||
packHalf2x16(vec2(value.y, 0.0)),
|
||||
packHalf2x16(vec2(value.z, 0.0)));
|
||||
}
|
||||
|
||||
float3 f16tof32(uint3 value) {
|
||||
return float3(unpackHalf2x16(value.x).x,
|
||||
vec3 f16tof32(uvec3 value) {
|
||||
return vec3(unpackHalf2x16(value.x).x,
|
||||
unpackHalf2x16(value.y).x,
|
||||
unpackHalf2x16(value.z).x);
|
||||
}
|
||||
|
||||
float f32tof16(float value) {
|
||||
return packHalf2x16(float2(value.x, 0.0));
|
||||
return packHalf2x16(vec2(value.x, 0.0));
|
||||
}
|
||||
|
||||
float f16tof32(uint value) {
|
||||
@@ -34,7 +32,7 @@ layout(binding = 0) uniform sampler2D srcTexture;
|
||||
layout(binding = 1, rgba32ui) uniform restrict writeonly uimage2D dstTexture;
|
||||
|
||||
layout(push_constant, std430) uniform Params {
|
||||
float2 p_textureSizeRcp;
|
||||
vec2 p_textureSizeRcp;
|
||||
uint padding0;
|
||||
uint padding1;
|
||||
}
|
||||
@@ -69,7 +67,7 @@ float CrossCalcMSLE(float a, float b) {
|
||||
return result;
|
||||
}
|
||||
|
||||
float CalcMSLE(float3 a, float3 b) {
|
||||
float CalcMSLE(vec3 a, vec3 b) {
|
||||
float result = 0.0f;
|
||||
if (isNegative(a.x) != isNegative(b.x)) {
|
||||
result += CrossCalcMSLE(a.x, b.x);
|
||||
@@ -91,32 +89,32 @@ float CalcMSLE(float3 a, float3 b) {
|
||||
}
|
||||
|
||||
// Adapt the log function to make sense when a < 0
|
||||
float3 customLog2(float3 a) {
|
||||
return float3(
|
||||
vec3 customLog2(vec3 a) {
|
||||
return vec3(
|
||||
a.x >= 0 ? log2(a.x + 1.0f) : -log2(-a.x + 1.0f),
|
||||
a.y >= 0 ? log2(a.y + 1.0f) : -log2(-a.y + 1.0f),
|
||||
a.z >= 0 ? log2(a.z + 1.0f) : -log2(-a.z + 1.0f));
|
||||
}
|
||||
|
||||
// Inverse of customLog2()
|
||||
float3 customExp2(float3 a) {
|
||||
return float3(
|
||||
vec3 customExp2(vec3 a) {
|
||||
return vec3(
|
||||
a.x >= 0 ? exp2(a.x) - 1.0f : -(exp2(-a.x) - 1.0f),
|
||||
a.y >= 0 ? exp2(a.y) - 1.0f : -(exp2(-a.y) - 1.0f),
|
||||
a.z >= 0 ? exp2(a.z) - 1.0f : -(exp2(-a.z) - 1.0f));
|
||||
}
|
||||
#else
|
||||
float CalcMSLE(float3 a, float3 b) {
|
||||
float3 err = log2((b + 1.0f) / (a + 1.0f));
|
||||
float CalcMSLE(vec3 a, vec3 b) {
|
||||
vec3 err = log2((b + 1.0f) / (a + 1.0f));
|
||||
err = err * err;
|
||||
return err.x + err.y + err.z;
|
||||
}
|
||||
|
||||
float3 customLog2(float3 a) {
|
||||
vec3 customLog2(vec3 a) {
|
||||
return log2(a + 1.0f);
|
||||
}
|
||||
|
||||
float3 customExp2(float3 a) {
|
||||
vec3 customExp2(vec3 a) {
|
||||
return exp2(a) - 1.0f;
|
||||
}
|
||||
#endif
|
||||
@@ -157,98 +155,98 @@ uint Pattern(uint p, uint i) {
|
||||
|
||||
#ifndef SIGNED
|
||||
//UF
|
||||
float3 Quantize7(float3 x) {
|
||||
vec3 Quantize7(vec3 x) {
|
||||
return (f32tof16(x) * 128.0f) / (0x7bff + 1.0f);
|
||||
}
|
||||
|
||||
float3 Quantize9(float3 x) {
|
||||
vec3 Quantize9(vec3 x) {
|
||||
return (f32tof16(x) * 512.0f) / (0x7bff + 1.0f);
|
||||
}
|
||||
|
||||
float3 Quantize10(float3 x) {
|
||||
vec3 Quantize10(vec3 x) {
|
||||
return (f32tof16(x) * 1024.0f) / (0x7bff + 1.0f);
|
||||
}
|
||||
|
||||
float3 Unquantize7(float3 x) {
|
||||
vec3 Unquantize7(vec3 x) {
|
||||
return (x * 65536.0f + 0x8000) / 128.0f;
|
||||
}
|
||||
|
||||
float3 Unquantize9(float3 x) {
|
||||
vec3 Unquantize9(vec3 x) {
|
||||
return (x * 65536.0f + 0x8000) / 512.0f;
|
||||
}
|
||||
|
||||
float3 Unquantize10(float3 x) {
|
||||
vec3 Unquantize10(vec3 x) {
|
||||
return (x * 65536.0f + 0x8000) / 1024.0f;
|
||||
}
|
||||
|
||||
float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) {
|
||||
float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f);
|
||||
return f16tof32(uint3(comp));
|
||||
vec3 FinishUnquantize(vec3 endpoint0Unq, vec3 endpoint1Unq, float weight) {
|
||||
vec3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f);
|
||||
return f16tof32(uvec3(comp));
|
||||
}
|
||||
#else
|
||||
//SF
|
||||
|
||||
float3 cmpSign(float3 value) {
|
||||
float3 signVal;
|
||||
vec3 cmpSign(vec3 value) {
|
||||
vec3 signVal;
|
||||
signVal.x = value.x >= 0.0f ? 1.0f : -1.0f;
|
||||
signVal.y = value.y >= 0.0f ? 1.0f : -1.0f;
|
||||
signVal.z = value.z >= 0.0f ? 1.0f : -1.0f;
|
||||
return signVal;
|
||||
}
|
||||
|
||||
float3 Quantize7(float3 x) {
|
||||
float3 signVal = cmpSign(x);
|
||||
vec3 Quantize7(vec3 x) {
|
||||
vec3 signVal = cmpSign(x);
|
||||
return signVal * (f32tof16(abs(x)) * 64.0f) / (0x7bff + 1.0f);
|
||||
}
|
||||
|
||||
float3 Quantize9(float3 x) {
|
||||
float3 signVal = cmpSign(x);
|
||||
vec3 Quantize9(vec3 x) {
|
||||
vec3 signVal = cmpSign(x);
|
||||
return signVal * (f32tof16(abs(x)) * 256.0f) / (0x7bff + 1.0f);
|
||||
}
|
||||
|
||||
float3 Quantize10(float3 x) {
|
||||
float3 signVal = cmpSign(x);
|
||||
vec3 Quantize10(vec3 x) {
|
||||
vec3 signVal = cmpSign(x);
|
||||
return signVal * (f32tof16(abs(x)) * 512.0f) / (0x7bff + 1.0f);
|
||||
}
|
||||
|
||||
float3 Unquantize7(float3 x) {
|
||||
float3 signVal = sign(x);
|
||||
vec3 Unquantize7(vec3 x) {
|
||||
vec3 signVal = sign(x);
|
||||
x = abs(x);
|
||||
float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 64.0f;
|
||||
vec3 finalVal = signVal * (x * 32768.0f + 0x4000) / 64.0f;
|
||||
finalVal.x = x.x >= 64.0f ? 32767.0 : finalVal.x;
|
||||
finalVal.y = x.y >= 64.0f ? 32767.0 : finalVal.y;
|
||||
finalVal.z = x.z >= 64.0f ? 32767.0 : finalVal.z;
|
||||
return finalVal;
|
||||
}
|
||||
|
||||
float3 Unquantize9(float3 x) {
|
||||
float3 signVal = sign(x);
|
||||
vec3 Unquantize9(vec3 x) {
|
||||
vec3 signVal = sign(x);
|
||||
x = abs(x);
|
||||
float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 256.0f;
|
||||
vec3 finalVal = signVal * (x * 32768.0f + 0x4000) / 256.0f;
|
||||
finalVal.x = x.x >= 256.0f ? 32767.0 : finalVal.x;
|
||||
finalVal.y = x.y >= 256.0f ? 32767.0 : finalVal.y;
|
||||
finalVal.z = x.z >= 256.0f ? 32767.0 : finalVal.z;
|
||||
return finalVal;
|
||||
}
|
||||
|
||||
float3 Unquantize10(float3 x) {
|
||||
float3 signVal = sign(x);
|
||||
vec3 Unquantize10(vec3 x) {
|
||||
vec3 signVal = sign(x);
|
||||
x = abs(x);
|
||||
float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 512.0f;
|
||||
vec3 finalVal = signVal * (x * 32768.0f + 0x4000) / 512.0f;
|
||||
finalVal.x = x.x >= 512.0f ? 32767.0 : finalVal.x;
|
||||
finalVal.y = x.y >= 512.0f ? 32767.0 : finalVal.y;
|
||||
finalVal.z = x.z >= 512.0f ? 32767.0 : finalVal.z;
|
||||
return finalVal;
|
||||
}
|
||||
|
||||
float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) {
|
||||
float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 2048.0f);
|
||||
return f16tof32(uint3(comp));
|
||||
vec3 FinishUnquantize(vec3 endpoint0Unq, vec3 endpoint1Unq, float weight) {
|
||||
vec3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 2048.0f);
|
||||
return f16tof32(uvec3(comp));
|
||||
}
|
||||
#endif
|
||||
|
||||
void Swap(inout float3 a, inout float3 b) {
|
||||
float3 tmp = a;
|
||||
void Swap(inout vec3 a, inout vec3 b) {
|
||||
vec3 tmp = a;
|
||||
a = b;
|
||||
b = tmp;
|
||||
}
|
||||
@@ -270,8 +268,8 @@ uint ComputeIndex4(float texelPos, float endPoint0Pos, float endPoint1Pos) {
|
||||
}
|
||||
|
||||
// This adds a bitflag to quantized values that signifies whether they are negative.
|
||||
void SignExtend(inout float3 v1, uint mask, uint signFlag) {
|
||||
int3 v = int3(v1);
|
||||
void SignExtend(inout vec3 v1, uint mask, uint signFlag) {
|
||||
ivec3 v = ivec3(v1);
|
||||
v.x = (v.x & int(mask)) | (v.x < 0 ? int(signFlag) : 0);
|
||||
v.y = (v.y & int(mask)) | (v.y < 0 ? int(signFlag) : 0);
|
||||
v.z = (v.z & int(mask)) | (v.z < 0 ? int(signFlag) : 0);
|
||||
@@ -279,38 +277,39 @@ void SignExtend(inout float3 v1, uint mask, uint signFlag) {
|
||||
}
|
||||
|
||||
// Encodes a block with mode 11 (2x 10-bit endpoints).
|
||||
void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) {
|
||||
void EncodeP1(inout uvec4 block, inout float blockMSLE, vec3 texels[16]) {
|
||||
// compute endpoints (min/max RGB bbox)
|
||||
float3 blockMin = texels[0];
|
||||
float3 blockMax = texels[0];
|
||||
vec3 blockMin = texels[0];
|
||||
vec3 blockMax = texels[0];
|
||||
for (uint i = 1u; i < 16u; ++i) {
|
||||
blockMin = min(blockMin, texels[i]);
|
||||
blockMax = max(blockMax, texels[i]);
|
||||
}
|
||||
|
||||
// refine endpoints in log2 RGB space
|
||||
float3 refinedBlockMin = blockMax;
|
||||
float3 refinedBlockMax = blockMin;
|
||||
vec3 refinedBlockMin = blockMax;
|
||||
vec3 refinedBlockMax = blockMin;
|
||||
for (uint i = 0u; i < 16u; ++i) {
|
||||
refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]);
|
||||
refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]);
|
||||
}
|
||||
|
||||
float3 logBlockMax = customLog2(blockMax);
|
||||
float3 logBlockMin = customLog2(blockMin);
|
||||
float3 logRefinedBlockMax = customLog2(refinedBlockMax);
|
||||
float3 logRefinedBlockMin = customLog2(refinedBlockMin);
|
||||
float3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f);
|
||||
vec3 logBlockMax = customLog2(blockMax);
|
||||
vec3 logBlockMin = customLog2(blockMin);
|
||||
vec3 logRefinedBlockMax = customLog2(refinedBlockMax);
|
||||
vec3 logRefinedBlockMin = customLog2(refinedBlockMin);
|
||||
vec3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f);
|
||||
|
||||
logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt);
|
||||
logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt);
|
||||
blockMin = customExp2(logBlockMin);
|
||||
blockMax = customExp2(logBlockMax);
|
||||
|
||||
float3 blockDir = blockMax - blockMin;
|
||||
vec3 blockDir = blockMax - blockMin;
|
||||
blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z);
|
||||
|
||||
float3 endpoint0 = Quantize10(blockMin);
|
||||
float3 endpoint1 = Quantize10(blockMax);
|
||||
vec3 endpoint0 = Quantize10(blockMin);
|
||||
vec3 endpoint1 = Quantize10(blockMax);
|
||||
float endPoint0Pos = f32tof16(dot(blockMin, blockDir));
|
||||
float endPoint1Pos = f32tof16(dot(blockMax, blockDir));
|
||||
|
||||
@@ -336,12 +335,12 @@ void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) {
|
||||
}
|
||||
|
||||
// compute compression error (MSLE)
|
||||
float3 endpoint0Unq = Unquantize10(endpoint0);
|
||||
float3 endpoint1Unq = Unquantize10(endpoint1);
|
||||
vec3 endpoint0Unq = Unquantize10(endpoint0);
|
||||
vec3 endpoint1Unq = Unquantize10(endpoint1);
|
||||
float msle = 0.0f;
|
||||
for (uint i = 0u; i < 16u; ++i) {
|
||||
float weight = floor((indices[i] * 64.0f) / 15.0f + 0.5f);
|
||||
float3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight);
|
||||
vec3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight);
|
||||
|
||||
msle += CalcMSLE(texels[i], texelUnc);
|
||||
}
|
||||
@@ -384,19 +383,19 @@ void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) {
|
||||
block.w |= indices[15] << 28u;
|
||||
}
|
||||
|
||||
float DistToLineSq(float3 PointOnLine, float3 LineDirection, float3 Point) {
|
||||
float3 w = Point - PointOnLine;
|
||||
float3 x = w - dot(w, LineDirection) * LineDirection;
|
||||
float DistToLineSq(vec3 PointOnLine, vec3 LineDirection, vec3 Point) {
|
||||
vec3 w = Point - PointOnLine;
|
||||
vec3 x = w - dot(w, LineDirection) * LineDirection;
|
||||
|
||||
return dot(x, x);
|
||||
}
|
||||
|
||||
// Gets the deviation from the source data of a particular pattern (smaller is better).
|
||||
float EvaluateP2Pattern(uint pattern, float3 texels[16]) {
|
||||
float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
|
||||
float3 p0BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN);
|
||||
float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
|
||||
float3 p1BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN);
|
||||
float EvaluateP2Pattern(uint pattern, vec3 texels[16]) {
|
||||
vec3 p0BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX);
|
||||
vec3 p0BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN);
|
||||
vec3 p1BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX);
|
||||
vec3 p1BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN);
|
||||
|
||||
for (uint i = 0; i < 16; ++i) {
|
||||
uint paletteID = Pattern(pattern, i);
|
||||
@@ -409,8 +408,8 @@ float EvaluateP2Pattern(uint pattern, float3 texels[16]) {
|
||||
}
|
||||
}
|
||||
|
||||
float3 p0BlockDir = normalize(p0BlockMax - p0BlockMin);
|
||||
float3 p1BlockDir = normalize(p1BlockMax - p1BlockMin);
|
||||
vec3 p0BlockDir = normalize(p0BlockMax - p0BlockMin);
|
||||
vec3 p1BlockDir = normalize(p1BlockMax - p1BlockMin);
|
||||
|
||||
float sqDistanceFromLine = 0.0f;
|
||||
|
||||
@@ -427,11 +426,11 @@ float EvaluateP2Pattern(uint pattern, float3 texels[16]) {
|
||||
}
|
||||
|
||||
// Encodes a block with either mode 2 (7-bit base, 3x 6-bit delta), or mode 6 (9-bit base, 3x 5-bit delta). Both use pattern encoding.
|
||||
void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, float3 texels[16]) {
|
||||
float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
|
||||
float3 p0BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN);
|
||||
float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
|
||||
float3 p1BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN);
|
||||
void EncodeP2Pattern(inout uvec4 block, inout float blockMSLE, uint pattern, vec3 texels[16]) {
|
||||
vec3 p0BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX);
|
||||
vec3 p0BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN);
|
||||
vec3 p1BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX);
|
||||
vec3 p1BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN);
|
||||
|
||||
for (uint i = 0u; i < 16u; ++i) {
|
||||
uint paletteID = Pattern(pattern, i);
|
||||
@@ -444,8 +443,8 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo
|
||||
}
|
||||
}
|
||||
|
||||
float3 p0BlockDir = p0BlockMax - p0BlockMin;
|
||||
float3 p1BlockDir = p1BlockMax - p1BlockMin;
|
||||
vec3 p0BlockDir = p0BlockMax - p0BlockMin;
|
||||
vec3 p1BlockDir = p1BlockMax - p1BlockMin;
|
||||
p0BlockDir = p0BlockDir / (p0BlockDir.x + p0BlockDir.y + p0BlockDir.z);
|
||||
p1BlockDir = p1BlockDir / (p1BlockDir.x + p1BlockDir.y + p1BlockDir.z);
|
||||
|
||||
@@ -479,15 +478,15 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo
|
||||
indices[i] = paletteID == 0u ? p0Index : p1Index;
|
||||
}
|
||||
|
||||
float3 endpoint760 = floor(Quantize7(p0BlockMin));
|
||||
float3 endpoint761 = floor(Quantize7(p0BlockMax));
|
||||
float3 endpoint762 = floor(Quantize7(p1BlockMin));
|
||||
float3 endpoint763 = floor(Quantize7(p1BlockMax));
|
||||
vec3 endpoint760 = floor(Quantize7(p0BlockMin));
|
||||
vec3 endpoint761 = floor(Quantize7(p0BlockMax));
|
||||
vec3 endpoint762 = floor(Quantize7(p1BlockMin));
|
||||
vec3 endpoint763 = floor(Quantize7(p1BlockMax));
|
||||
|
||||
float3 endpoint950 = floor(Quantize9(p0BlockMin));
|
||||
float3 endpoint951 = floor(Quantize9(p0BlockMax));
|
||||
float3 endpoint952 = floor(Quantize9(p1BlockMin));
|
||||
float3 endpoint953 = floor(Quantize9(p1BlockMax));
|
||||
vec3 endpoint950 = floor(Quantize9(p0BlockMin));
|
||||
vec3 endpoint951 = floor(Quantize9(p0BlockMax));
|
||||
vec3 endpoint952 = floor(Quantize9(p1BlockMin));
|
||||
vec3 endpoint953 = floor(Quantize9(p1BlockMax));
|
||||
|
||||
endpoint761 = endpoint761 - endpoint760;
|
||||
endpoint762 = endpoint762 - endpoint760;
|
||||
@@ -514,28 +513,28 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo
|
||||
endpoint950 = clamp(endpoint950, -maxVal9, maxVal9);
|
||||
#endif
|
||||
|
||||
float3 endpoint760Unq = Unquantize7(endpoint760);
|
||||
float3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761);
|
||||
float3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762);
|
||||
float3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763);
|
||||
float3 endpoint950Unq = Unquantize9(endpoint950);
|
||||
float3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951);
|
||||
float3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952);
|
||||
float3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953);
|
||||
vec3 endpoint760Unq = Unquantize7(endpoint760);
|
||||
vec3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761);
|
||||
vec3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762);
|
||||
vec3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763);
|
||||
vec3 endpoint950Unq = Unquantize9(endpoint950);
|
||||
vec3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951);
|
||||
vec3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952);
|
||||
vec3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953);
|
||||
|
||||
float msle76 = 0.0f;
|
||||
float msle95 = 0.0f;
|
||||
for (uint i = 0u; i < 16u; ++i) {
|
||||
uint paletteID = Pattern(pattern, i);
|
||||
|
||||
float3 tmp760Unq = paletteID == 0u ? endpoint760Unq : endpoint762Unq;
|
||||
float3 tmp761Unq = paletteID == 0u ? endpoint761Unq : endpoint763Unq;
|
||||
float3 tmp950Unq = paletteID == 0u ? endpoint950Unq : endpoint952Unq;
|
||||
float3 tmp951Unq = paletteID == 0u ? endpoint951Unq : endpoint953Unq;
|
||||
vec3 tmp760Unq = paletteID == 0u ? endpoint760Unq : endpoint762Unq;
|
||||
vec3 tmp761Unq = paletteID == 0u ? endpoint761Unq : endpoint763Unq;
|
||||
vec3 tmp950Unq = paletteID == 0u ? endpoint950Unq : endpoint952Unq;
|
||||
vec3 tmp951Unq = paletteID == 0u ? endpoint951Unq : endpoint953Unq;
|
||||
|
||||
float weight = floor((indices[i] * 64.0f) / 7.0f + 0.5f);
|
||||
float3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight);
|
||||
float3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight);
|
||||
vec3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight);
|
||||
vec3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight);
|
||||
|
||||
msle76 += CalcMSLE(texels[i], texelUnc76);
|
||||
msle95 += CalcMSLE(texels[i], texelUnc95);
|
||||
@@ -558,7 +557,7 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo
|
||||
float p2MSLE = min(msle76, msle95);
|
||||
if (p2MSLE < blockMSLE) {
|
||||
blockMSLE = p2MSLE;
|
||||
block = uint4(0u, 0u, 0u, 0u);
|
||||
block = uvec4(0u, 0u, 0u, 0u);
|
||||
|
||||
if (p2MSLE == msle76) {
|
||||
// 7.6
|
||||
@@ -681,43 +680,43 @@ void main() {
|
||||
// 4 5 6 7
|
||||
// 8 9 10 11
|
||||
// 12 13 14 15
|
||||
float2 uv = gl_GlobalInvocationID.xy * params.p_textureSizeRcp * 4.0f + params.p_textureSizeRcp;
|
||||
float2 block0UV = uv;
|
||||
float2 block1UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 0.0f);
|
||||
float2 block2UV = uv + float2(0.0f, 2.0f * params.p_textureSizeRcp.y);
|
||||
float2 block3UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 2.0f * params.p_textureSizeRcp.y);
|
||||
float4 block0X = OGRE_GatherRed(srcTexture, pointSampler, block0UV);
|
||||
float4 block1X = OGRE_GatherRed(srcTexture, pointSampler, block1UV);
|
||||
float4 block2X = OGRE_GatherRed(srcTexture, pointSampler, block2UV);
|
||||
float4 block3X = OGRE_GatherRed(srcTexture, pointSampler, block3UV);
|
||||
float4 block0Y = OGRE_GatherGreen(srcTexture, pointSampler, block0UV);
|
||||
float4 block1Y = OGRE_GatherGreen(srcTexture, pointSampler, block1UV);
|
||||
float4 block2Y = OGRE_GatherGreen(srcTexture, pointSampler, block2UV);
|
||||
float4 block3Y = OGRE_GatherGreen(srcTexture, pointSampler, block3UV);
|
||||
float4 block0Z = OGRE_GatherBlue(srcTexture, pointSampler, block0UV);
|
||||
float4 block1Z = OGRE_GatherBlue(srcTexture, pointSampler, block1UV);
|
||||
float4 block2Z = OGRE_GatherBlue(srcTexture, pointSampler, block2UV);
|
||||
float4 block3Z = OGRE_GatherBlue(srcTexture, pointSampler, block3UV);
|
||||
vec2 uv = gl_GlobalInvocationID.xy * params.p_textureSizeRcp * 4.0f + params.p_textureSizeRcp;
|
||||
vec2 block0UV = uv;
|
||||
vec2 block1UV = uv + vec2(2.0f * params.p_textureSizeRcp.x, 0.0f);
|
||||
vec2 block2UV = uv + vec2(0.0f, 2.0f * params.p_textureSizeRcp.y);
|
||||
vec2 block3UV = uv + vec2(2.0f * params.p_textureSizeRcp.x, 2.0f * params.p_textureSizeRcp.y);
|
||||
vec4 block0X = textureGather(srcTexture, block0UV, 0);
|
||||
vec4 block1X = textureGather(srcTexture, block1UV, 0);
|
||||
vec4 block2X = textureGather(srcTexture, block2UV, 0);
|
||||
vec4 block3X = textureGather(srcTexture, block3UV, 0);
|
||||
vec4 block0Y = textureGather(srcTexture, block0UV, 1);
|
||||
vec4 block1Y = textureGather(srcTexture, block1UV, 1);
|
||||
vec4 block2Y = textureGather(srcTexture, block2UV, 1);
|
||||
vec4 block3Y = textureGather(srcTexture, block3UV, 1);
|
||||
vec4 block0Z = textureGather(srcTexture, block0UV, 2);
|
||||
vec4 block1Z = textureGather(srcTexture, block1UV, 2);
|
||||
vec4 block2Z = textureGather(srcTexture, block2UV, 2);
|
||||
vec4 block3Z = textureGather(srcTexture, block3UV, 2);
|
||||
|
||||
float3 texels[16];
|
||||
texels[0] = float3(block0X.w, block0Y.w, block0Z.w);
|
||||
texels[1] = float3(block0X.z, block0Y.z, block0Z.z);
|
||||
texels[2] = float3(block1X.w, block1Y.w, block1Z.w);
|
||||
texels[3] = float3(block1X.z, block1Y.z, block1Z.z);
|
||||
texels[4] = float3(block0X.x, block0Y.x, block0Z.x);
|
||||
texels[5] = float3(block0X.y, block0Y.y, block0Z.y);
|
||||
texels[6] = float3(block1X.x, block1Y.x, block1Z.x);
|
||||
texels[7] = float3(block1X.y, block1Y.y, block1Z.y);
|
||||
texels[8] = float3(block2X.w, block2Y.w, block2Z.w);
|
||||
texels[9] = float3(block2X.z, block2Y.z, block2Z.z);
|
||||
texels[10] = float3(block3X.w, block3Y.w, block3Z.w);
|
||||
texels[11] = float3(block3X.z, block3Y.z, block3Z.z);
|
||||
texels[12] = float3(block2X.x, block2Y.x, block2Z.x);
|
||||
texels[13] = float3(block2X.y, block2Y.y, block2Z.y);
|
||||
texels[14] = float3(block3X.x, block3Y.x, block3Z.x);
|
||||
texels[15] = float3(block3X.y, block3Y.y, block3Z.y);
|
||||
vec3 texels[16];
|
||||
texels[0] = vec3(block0X.w, block0Y.w, block0Z.w);
|
||||
texels[1] = vec3(block0X.z, block0Y.z, block0Z.z);
|
||||
texels[2] = vec3(block1X.w, block1Y.w, block1Z.w);
|
||||
texels[3] = vec3(block1X.z, block1Y.z, block1Z.z);
|
||||
texels[4] = vec3(block0X.x, block0Y.x, block0Z.x);
|
||||
texels[5] = vec3(block0X.y, block0Y.y, block0Z.y);
|
||||
texels[6] = vec3(block1X.x, block1Y.x, block1Z.x);
|
||||
texels[7] = vec3(block1X.y, block1Y.y, block1Z.y);
|
||||
texels[8] = vec3(block2X.w, block2Y.w, block2Z.w);
|
||||
texels[9] = vec3(block2X.z, block2Y.z, block2Z.z);
|
||||
texels[10] = vec3(block3X.w, block3Y.w, block3Z.w);
|
||||
texels[11] = vec3(block3X.z, block3Y.z, block3Z.z);
|
||||
texels[12] = vec3(block2X.x, block2Y.x, block2Z.x);
|
||||
texels[13] = vec3(block2X.y, block2Y.y, block2Z.y);
|
||||
texels[14] = vec3(block3X.x, block3Y.x, block3Z.x);
|
||||
texels[15] = vec3(block3X.y, block3Y.y, block3Z.y);
|
||||
|
||||
uint4 block = uint4(0u, 0u, 0u, 0u);
|
||||
uvec4 block = uvec4(0u, 0u, 0u, 0u);
|
||||
float blockMSLE = 0.0f;
|
||||
|
||||
EncodeP1(block, blockMSLE, texels);
|
||||
@@ -738,5 +737,5 @@ void main() {
|
||||
EncodeP2Pattern(block, blockMSLE, bestPattern, texels);
|
||||
#endif
|
||||
|
||||
imageStore(dstTexture, int2(gl_GlobalInvocationID.xy), block);
|
||||
imageStore(dstTexture, ivec2(gl_GlobalInvocationID.xy), block);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user