You've already forked godot
mirror of
https://github.com/godotengine/godot.git
synced 2025-11-04 12:00:25 +00:00
Betsy: Remove OGRE aliases
This commit is contained in:
@@ -1,75 +0,0 @@
|
|||||||
#define min3(a, b, c) min(a, min(b, c))
|
|
||||||
#define max3(a, b, c) max(a, max(b, c))
|
|
||||||
|
|
||||||
#define float2 vec2
|
|
||||||
#define float3 vec3
|
|
||||||
#define float4 vec4
|
|
||||||
|
|
||||||
#define int2 ivec2
|
|
||||||
#define int3 ivec3
|
|
||||||
#define int4 ivec4
|
|
||||||
|
|
||||||
#define uint2 uvec2
|
|
||||||
#define uint3 uvec3
|
|
||||||
#define uint4 uvec4
|
|
||||||
|
|
||||||
#define float2x2 mat2
|
|
||||||
#define float3x3 mat3
|
|
||||||
#define float4x4 mat4
|
|
||||||
#define ogre_float4x3 mat3x4
|
|
||||||
|
|
||||||
#define ushort uint
|
|
||||||
#define ushort3 uint3
|
|
||||||
#define ushort4 uint4
|
|
||||||
|
|
||||||
//Short used for read operations. It's an int in GLSL & HLSL. An ushort in Metal
|
|
||||||
#define rshort int
|
|
||||||
#define rshort2 int2
|
|
||||||
#define rint int
|
|
||||||
//Short used for write operations. It's an int in GLSL. An ushort in HLSL & Metal
|
|
||||||
#define wshort2 int2
|
|
||||||
#define wshort3 int3
|
|
||||||
|
|
||||||
#define toFloat3x3(x) mat3(x)
|
|
||||||
#define buildFloat3x3(row0, row1, row2) mat3(row0, row1, row2)
|
|
||||||
|
|
||||||
#define mul(x, y) ((x) * (y))
|
|
||||||
#define saturate(x) clamp((x), 0.0, 1.0)
|
|
||||||
#define lerp mix
|
|
||||||
#define rsqrt inversesqrt
|
|
||||||
#define INLINE
|
|
||||||
#define NO_INTERPOLATION_PREFIX flat
|
|
||||||
#define NO_INTERPOLATION_SUFFIX
|
|
||||||
|
|
||||||
#define PARAMS_ARG_DECL
|
|
||||||
#define PARAMS_ARG
|
|
||||||
|
|
||||||
#define reversebits bitfieldReverse
|
|
||||||
|
|
||||||
#define OGRE_Sample(tex, sampler, uv) texture(tex, uv)
|
|
||||||
#define OGRE_SampleLevel(tex, sampler, uv, lod) textureLod(tex, uv, lod)
|
|
||||||
#define OGRE_SampleArray2D(tex, sampler, uv, arrayIdx) texture(tex, vec3(uv, arrayIdx))
|
|
||||||
#define OGRE_SampleArray2DLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec3(uv, arrayIdx), lod)
|
|
||||||
#define OGRE_SampleArrayCubeLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec4(uv, arrayIdx), lod)
|
|
||||||
#define OGRE_SampleGrad(tex, sampler, uv, ddx, ddy) textureGrad(tex, uv, ddx, ddy)
|
|
||||||
#define OGRE_SampleArray2DGrad(tex, sampler, uv, arrayIdx, ddx, ddy) textureGrad(tex, vec3(uv, arrayIdx), ddx, ddy)
|
|
||||||
#define OGRE_ddx(val) dFdx(val)
|
|
||||||
#define OGRE_ddy(val) dFdy(val)
|
|
||||||
#define OGRE_Load2D(tex, iuv, lod) texelFetch(tex, iuv, lod)
|
|
||||||
#define OGRE_LoadArray2D(tex, iuv, arrayIdx, lod) texelFetch(tex, ivec3(iuv, arrayIdx), lod)
|
|
||||||
#define OGRE_Load2DMS(tex, iuv, subsample) texelFetch(tex, iuv, subsample)
|
|
||||||
|
|
||||||
#define OGRE_Load3D(tex, iuv, lod) texelFetch(tex, ivec3(iuv), lod)
|
|
||||||
|
|
||||||
#define OGRE_GatherRed(tex, sampler, uv) textureGather(tex, uv, 0)
|
|
||||||
#define OGRE_GatherGreen(tex, sampler, uv) textureGather(tex, uv, 1)
|
|
||||||
#define OGRE_GatherBlue(tex, sampler, uv) textureGather(tex, uv, 2)
|
|
||||||
|
|
||||||
#define bufferFetch1(buffer, idx) texelFetch(buffer, idx).x
|
|
||||||
|
|
||||||
#define OGRE_SAMPLER_ARG_DECL(samplerName)
|
|
||||||
#define OGRE_SAMPLER_ARG(samplerName)
|
|
||||||
|
|
||||||
#define OGRE_Texture3D_float4 sampler3D
|
|
||||||
#define OGRE_OUT_REF(declType, variableName) out declType variableName
|
|
||||||
#define OGRE_INOUT_REF(declType, variableName) inout declType variableName
|
|
||||||
@@ -1,12 +1,10 @@
|
|||||||
// RGB and Alpha components of ETC2 RGBA are computed separately.
|
// RGB and Alpha components of ETC2 RGBA/DXT5 are computed separately.
|
||||||
// This compute shader merely stitches them together to form the final result
|
// This compute shader merely stitches them together to form the final result
|
||||||
// It's also used by RG11 driver to stitch two R11 into one RG11
|
// It's also used by RG11/BC4 driver to stitch two R11/BC4 into one RG11/BC5
|
||||||
|
|
||||||
#[compute]
|
#[compute]
|
||||||
#version 450
|
#version 450
|
||||||
|
|
||||||
#include "CrossPlatformSettings_piece_all.glsl"
|
|
||||||
|
|
||||||
layout(local_size_x = 8, //
|
layout(local_size_x = 8, //
|
||||||
local_size_y = 8, //
|
local_size_y = 8, //
|
||||||
local_size_z = 1) in;
|
local_size_z = 1) in;
|
||||||
@@ -16,8 +14,8 @@ layout(binding = 1) uniform usampler2D srcAlpha;
|
|||||||
layout(binding = 2, rgba32ui) uniform restrict writeonly uimage2D dstTexture;
|
layout(binding = 2, rgba32ui) uniform restrict writeonly uimage2D dstTexture;
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
uint2 rgbBlock = OGRE_Load2D(srcRGB, int2(gl_GlobalInvocationID.xy), 0).xy;
|
uvec2 rgbBlock = texelFetch(srcRGB, ivec2(gl_GlobalInvocationID.xy), 0).xy;
|
||||||
uint2 alphaBlock = OGRE_Load2D(srcAlpha, int2(gl_GlobalInvocationID.xy), 0).xy;
|
uvec2 alphaBlock = texelFetch(srcAlpha, ivec2(gl_GlobalInvocationID.xy), 0).xy;
|
||||||
|
|
||||||
imageStore(dstTexture, int2(gl_GlobalInvocationID.xy), uint4(rgbBlock.xy, alphaBlock.xy));
|
imageStore(dstTexture, ivec2(gl_GlobalInvocationID.xy), uvec4(rgbBlock.xy, alphaBlock.xy));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ dithered = "#define BC1_DITHER";
|
|||||||
#[compute]
|
#[compute]
|
||||||
#version 450
|
#version 450
|
||||||
|
|
||||||
#include "CrossPlatformSettings_piece_all.glsl"
|
#VERSION_DEFINES
|
||||||
|
|
||||||
#define FLT_MAX 340282346638528859811704183484516925440.0f
|
#define FLT_MAX 340282346638528859811704183484516925440.0f
|
||||||
|
|
||||||
@@ -14,8 +14,8 @@ layout(binding = 0) uniform sampler2D srcTex;
|
|||||||
layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture;
|
layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture;
|
||||||
|
|
||||||
layout(std430, binding = 2) readonly restrict buffer globalBuffer {
|
layout(std430, binding = 2) readonly restrict buffer globalBuffer {
|
||||||
float2 c_oMatch5[256];
|
vec2 c_oMatch5[256];
|
||||||
float2 c_oMatch6[256];
|
vec2 c_oMatch6[256];
|
||||||
};
|
};
|
||||||
|
|
||||||
layout(push_constant, std430) uniform Params {
|
layout(push_constant, std430) uniform Params {
|
||||||
@@ -28,14 +28,14 @@ layout(local_size_x = 8, //
|
|||||||
local_size_y = 8, //
|
local_size_y = 8, //
|
||||||
local_size_z = 1) in;
|
local_size_z = 1) in;
|
||||||
|
|
||||||
float3 rgb565to888(float rgb565) {
|
vec3 rgb565to888(float rgb565) {
|
||||||
float3 retVal;
|
vec3 retVal;
|
||||||
retVal.x = floor(rgb565 / 2048.0f);
|
retVal.x = floor(rgb565 / 2048.0f);
|
||||||
retVal.y = floor(mod(rgb565, 2048.0f) / 32.0f);
|
retVal.y = floor(mod(rgb565, 2048.0f) / 32.0f);
|
||||||
retVal.z = floor(mod(rgb565, 32.0f));
|
retVal.z = floor(mod(rgb565, 32.0f));
|
||||||
|
|
||||||
// This is the correct 565 to 888 conversion:
|
// This is the correct 565 to 888 conversion:
|
||||||
// rgb = floor( rgb * ( 255.0f / float3( 31.0f, 63.0f, 31.0f ) ) + 0.5f )
|
// rgb = floor( rgb * ( 255.0f / vec3( 31.0f, 63.0f, 31.0f ) ) + 0.5f )
|
||||||
//
|
//
|
||||||
// However stb_dxt follows a different one:
|
// However stb_dxt follows a different one:
|
||||||
// rb = floor( rb * ( 256 / 32 + 8 / 32 ) );
|
// rb = floor( rb * ( 256 / 32 + 8 / 32 ) );
|
||||||
@@ -52,10 +52,10 @@ float3 rgb565to888(float rgb565) {
|
|||||||
// Perhaps when we make 888 -> 565 -> 888 it doesn't matter
|
// Perhaps when we make 888 -> 565 -> 888 it doesn't matter
|
||||||
// because they end up mapping to the original number
|
// because they end up mapping to the original number
|
||||||
|
|
||||||
return floor(retVal * float3(8.25f, 4.0625f, 8.25f));
|
return floor(retVal * vec3(8.25f, 4.0625f, 8.25f));
|
||||||
}
|
}
|
||||||
|
|
||||||
float rgb888to565(float3 rgbValue) {
|
float rgb888to565(vec3 rgbValue) {
|
||||||
rgbValue.rb = floor(rgbValue.rb * 31.0f / 255.0f + 0.5f);
|
rgbValue.rb = floor(rgbValue.rb * 31.0f / 255.0f + 0.5f);
|
||||||
rgbValue.g = floor(rgbValue.g * 63.0f / 255.0f + 0.5f);
|
rgbValue.g = floor(rgbValue.g * 63.0f / 255.0f + 0.5f);
|
||||||
|
|
||||||
@@ -63,7 +63,7 @@ float rgb888to565(float3 rgbValue) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// linear interpolation at 1/3 point between a and b, using desired rounding type
|
// linear interpolation at 1/3 point between a and b, using desired rounding type
|
||||||
float3 lerp13(float3 a, float3 b) {
|
vec3 lerp13(vec3 a, vec3 b) {
|
||||||
#ifdef STB_DXT_USE_ROUNDING_BIAS
|
#ifdef STB_DXT_USE_ROUNDING_BIAS
|
||||||
// with rounding bias
|
// with rounding bias
|
||||||
return a + floor((b - a) * (1.0f / 3.0f) + 0.5f);
|
return a + floor((b - a) * (1.0f / 3.0f) + 0.5f);
|
||||||
@@ -74,7 +74,7 @@ float3 lerp13(float3 a, float3 b) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Unpacks a block of 4 colors from two 16-bit endpoints
|
/// Unpacks a block of 4 colors from two 16-bit endpoints
|
||||||
void EvalColors(out float3 colors[4], float c0, float c1) {
|
void EvalColors(out vec3 colors[4], float c0, float c1) {
|
||||||
colors[0] = rgb565to888(c0);
|
colors[0] = rgb565to888(c0);
|
||||||
colors[1] = rgb565to888(c1);
|
colors[1] = rgb565to888(c1);
|
||||||
colors[2] = lerp13(colors[0], colors[1]);
|
colors[2] = lerp13(colors[0], colors[1]);
|
||||||
@@ -89,13 +89,13 @@ void EvalColors(out float3 colors[4], float c0, float c1) {
|
|||||||
*/
|
*/
|
||||||
void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16, out float outMaxEndp16) {
|
void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16, out float outMaxEndp16) {
|
||||||
// determine color distribution
|
// determine color distribution
|
||||||
float3 avgColor;
|
vec3 avgColor;
|
||||||
float3 minColor;
|
vec3 minColor;
|
||||||
float3 maxColor;
|
vec3 maxColor;
|
||||||
|
|
||||||
avgColor = minColor = maxColor = unpackUnorm4x8(srcPixelsBlock[0]).xyz;
|
avgColor = minColor = maxColor = unpackUnorm4x8(srcPixelsBlock[0]).xyz;
|
||||||
for (int i = 1; i < 16; ++i) {
|
for (int i = 1; i < 16; ++i) {
|
||||||
const float3 currColorUnorm = unpackUnorm4x8(srcPixelsBlock[i]).xyz;
|
const vec3 currColorUnorm = unpackUnorm4x8(srcPixelsBlock[i]).xyz;
|
||||||
avgColor += currColorUnorm;
|
avgColor += currColorUnorm;
|
||||||
minColor = min(minColor, currColorUnorm);
|
minColor = min(minColor, currColorUnorm);
|
||||||
maxColor = max(maxColor, currColorUnorm);
|
maxColor = max(maxColor, currColorUnorm);
|
||||||
@@ -112,8 +112,8 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < 16; ++i) {
|
for (int i = 0; i < 16; ++i) {
|
||||||
const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
||||||
float3 rgbDiff = currColor - avgColor;
|
vec3 rgbDiff = currColor - avgColor;
|
||||||
|
|
||||||
cov[0] += rgbDiff.r * rgbDiff.r;
|
cov[0] += rgbDiff.r * rgbDiff.r;
|
||||||
cov[1] += rgbDiff.r * rgbDiff.g;
|
cov[1] += rgbDiff.r * rgbDiff.g;
|
||||||
@@ -128,7 +128,7 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
|
|||||||
cov[i] /= 255.0f;
|
cov[i] /= 255.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 vF = maxColor - minColor;
|
vec3 vF = maxColor - minColor;
|
||||||
|
|
||||||
const int nIterPower = 4;
|
const int nIterPower = 4;
|
||||||
for (int iter = 0; iter < nIterPower; ++iter) {
|
for (int iter = 0; iter < nIterPower; ++iter) {
|
||||||
@@ -141,8 +141,8 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
|
|||||||
vF.b = b;
|
vF.b = b;
|
||||||
}
|
}
|
||||||
|
|
||||||
float magn = max3(abs(vF.r), abs(vF.g), abs(vF.b));
|
float magn = max(abs(vF.r), max(abs(vF.g), abs(vF.b)));
|
||||||
float3 v;
|
vec3 v;
|
||||||
|
|
||||||
if (magn < 4.0f) { // too small, default to luminance
|
if (magn < 4.0f) { // too small, default to luminance
|
||||||
v.r = 299.0f; // JPEG YCbCr luma coefs, scaled by 1000.
|
v.r = 299.0f; // JPEG YCbCr luma coefs, scaled by 1000.
|
||||||
@@ -153,11 +153,11 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Pick colors at extreme points
|
// Pick colors at extreme points
|
||||||
float3 minEndpoint, maxEndpoint;
|
vec3 minEndpoint, maxEndpoint;
|
||||||
float minDot = FLT_MAX;
|
float minDot = FLT_MAX;
|
||||||
float maxDot = -FLT_MAX;
|
float maxDot = -FLT_MAX;
|
||||||
for (int i = 0; i < 16; ++i) {
|
for (int i = 0; i < 16; ++i) {
|
||||||
const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
||||||
const float dotValue = dot(currColor, v);
|
const float dotValue = dot(currColor, v);
|
||||||
|
|
||||||
if (dotValue < minDot) {
|
if (dotValue < minDot) {
|
||||||
@@ -176,9 +176,9 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// The color matching function
|
// The color matching function
|
||||||
uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
|
uint MatchColorsBlock(const uint srcPixelsBlock[16], vec3 color[4]) {
|
||||||
uint mask = 0u;
|
uint mask = 0u;
|
||||||
float3 dir = color[0] - color[1];
|
vec3 dir = color[0] - color[1];
|
||||||
float stops[4];
|
float stops[4];
|
||||||
|
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
@@ -200,7 +200,7 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
|
|||||||
#ifndef BC1_DITHER
|
#ifndef BC1_DITHER
|
||||||
// the version without dithering is straightforward
|
// the version without dithering is straightforward
|
||||||
for (uint i = 16u; i-- > 0u;) {
|
for (uint i = 16u; i-- > 0u;) {
|
||||||
const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
||||||
|
|
||||||
const float dotValue = dot(currColor, dir);
|
const float dotValue = dot(currColor, dir);
|
||||||
mask <<= 2u;
|
mask <<= 2u;
|
||||||
@@ -213,8 +213,8 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
|
|||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// with floyd-steinberg dithering
|
// with floyd-steinberg dithering
|
||||||
float4 ep1 = float4(0, 0, 0, 0);
|
vec4 ep1 = vec4(0, 0, 0, 0);
|
||||||
float4 ep2 = float4(0, 0, 0, 0);
|
vec4 ep2 = vec4(0, 0, 0, 0);
|
||||||
|
|
||||||
c0Point *= 16.0f;
|
c0Point *= 16.0f;
|
||||||
halfPoint *= 16.0f;
|
halfPoint *= 16.0f;
|
||||||
@@ -224,7 +224,7 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
|
|||||||
float ditherDot;
|
float ditherDot;
|
||||||
uint lmask, step;
|
uint lmask, step;
|
||||||
|
|
||||||
float3 currColor;
|
vec3 currColor;
|
||||||
float dotValue;
|
float dotValue;
|
||||||
|
|
||||||
currColor = unpackUnorm4x8(srcPixelsBlock[y * 4 + 0]).xyz * 255.0f;
|
currColor = unpackUnorm4x8(srcPixelsBlock[y * 4 + 0]).xyz * 255.0f;
|
||||||
@@ -277,7 +277,7 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
|
|||||||
|
|
||||||
mask |= lmask << (y * 8u);
|
mask |= lmask << (y * 8u);
|
||||||
{
|
{
|
||||||
float4 tmp = ep1;
|
vec4 tmp = ep1;
|
||||||
ep1 = ep2;
|
ep1 = ep2;
|
||||||
ep2 = tmp;
|
ep2 = tmp;
|
||||||
} // swap
|
} // swap
|
||||||
@@ -300,7 +300,7 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE
|
|||||||
{
|
{
|
||||||
// yes, linear system would be singular; solve using optimal
|
// yes, linear system would be singular; solve using optimal
|
||||||
// single-color match on average color
|
// single-color match on average color
|
||||||
float3 rgbVal = float3(8.0f / 255.0f, 8.0f / 255.0f, 8.0f / 255.0f);
|
vec3 rgbVal = vec3(8.0f / 255.0f, 8.0f / 255.0f, 8.0f / 255.0f);
|
||||||
for (int i = 0; i < 16; ++i) {
|
for (int i = 0; i < 16; ++i) {
|
||||||
rgbVal += unpackUnorm4x8(srcPixelsBlock[i]).xyz;
|
rgbVal += unpackUnorm4x8(srcPixelsBlock[i]).xyz;
|
||||||
}
|
}
|
||||||
@@ -322,10 +322,10 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE
|
|||||||
|
|
||||||
float akku = 0.0f;
|
float akku = 0.0f;
|
||||||
uint cm = mask;
|
uint cm = mask;
|
||||||
float3 at1 = float3(0, 0, 0);
|
vec3 at1 = vec3(0, 0, 0);
|
||||||
float3 at2 = float3(0, 0, 0);
|
vec3 at2 = vec3(0, 0, 0);
|
||||||
for (int i = 0; i < 16; ++i, cm >>= 2u) {
|
for (int i = 0; i < 16; ++i, cm >>= 2u) {
|
||||||
const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
||||||
|
|
||||||
const uint step = cm & 3u;
|
const uint step = cm & 3u;
|
||||||
const float w1 = w1Tab[step];
|
const float w1 = w1Tab[step];
|
||||||
@@ -341,17 +341,17 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE
|
|||||||
const float yy = floor(mod(akku, 65535.0f) / 256.0f);
|
const float yy = floor(mod(akku, 65535.0f) / 256.0f);
|
||||||
const float xy = mod(akku, 256.0f);
|
const float xy = mod(akku, 256.0f);
|
||||||
|
|
||||||
float2 f_rb_g;
|
vec2 f_rb_g;
|
||||||
f_rb_g.x = 3.0f * 31.0f / 255.0f / (xx * yy - xy * xy);
|
f_rb_g.x = 3.0f * 31.0f / 255.0f / (xx * yy - xy * xy);
|
||||||
f_rb_g.y = f_rb_g.x * 63.0f / 31.0f;
|
f_rb_g.y = f_rb_g.x * 63.0f / 31.0f;
|
||||||
|
|
||||||
// solve.
|
// solve.
|
||||||
const float3 newMaxVal = clamp(floor((at1 * yy - at2 * xy) * f_rb_g.xyx + 0.5f),
|
const vec3 newMaxVal = clamp(floor((at1 * yy - at2 * xy) * f_rb_g.xyx + 0.5f),
|
||||||
float3(0.0f, 0.0f, 0.0f), float3(31, 63, 31));
|
vec3(0.0f, 0.0f, 0.0f), vec3(31, 63, 31));
|
||||||
newMax16 = newMaxVal.x * 2048.0f + newMaxVal.y * 32.0f + newMaxVal.z;
|
newMax16 = newMaxVal.x * 2048.0f + newMaxVal.y * 32.0f + newMaxVal.z;
|
||||||
|
|
||||||
const float3 newMinVal = clamp(floor((at2 * xx - at1 * xy) * f_rb_g.xyx + 0.5f),
|
const vec3 newMinVal = clamp(floor((at2 * xx - at1 * xy) * f_rb_g.xyx + 0.5f),
|
||||||
float3(0.0f, 0.0f, 0.0f), float3(31, 63, 31));
|
vec3(0.0f, 0.0f, 0.0f), vec3(31, 63, 31));
|
||||||
newMin16 = newMinVal.x * 2048.0f + newMinVal.y * 32.0f + newMinVal.z;
|
newMin16 = newMinVal.x * 2048.0f + newMinVal.y * 32.0f + newMinVal.z;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -364,48 +364,48 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE
|
|||||||
#ifdef BC1_DITHER
|
#ifdef BC1_DITHER
|
||||||
/// Quantizes 'srcValue' which is originally in 888 (full range),
|
/// Quantizes 'srcValue' which is originally in 888 (full range),
|
||||||
/// converting it to 565 and then back to 888 (quantized)
|
/// converting it to 565 and then back to 888 (quantized)
|
||||||
float3 quant(float3 srcValue) {
|
vec3 quant(vec3 srcValue) {
|
||||||
srcValue = clamp(srcValue, 0.0f, 255.0f);
|
srcValue = clamp(srcValue, 0.0f, 255.0f);
|
||||||
// Convert 888 -> 565
|
// Convert 888 -> 565
|
||||||
srcValue = floor(srcValue * float3(31.0f / 255.0f, 63.0f / 255.0f, 31.0f / 255.0f) + 0.5f);
|
srcValue = floor(srcValue * vec3(31.0f / 255.0f, 63.0f / 255.0f, 31.0f / 255.0f) + 0.5f);
|
||||||
// Convert 565 -> 888 back
|
// Convert 565 -> 888 back
|
||||||
srcValue = floor(srcValue * float3(8.25f, 4.0625f, 8.25f));
|
srcValue = floor(srcValue * vec3(8.25f, 4.0625f, 8.25f));
|
||||||
|
|
||||||
return srcValue;
|
return srcValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DitherBlock(const uint srcPixBlck[16], out uint dthPixBlck[16]) {
|
void DitherBlock(const uint srcPixBlck[16], out uint dthPixBlck[16]) {
|
||||||
float3 ep1[4] = { float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0) };
|
vec3 ep1[4] = { vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0) };
|
||||||
float3 ep2[4] = { float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0) };
|
vec3 ep2[4] = { vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0) };
|
||||||
|
|
||||||
for (uint y = 0u; y < 16u; y += 4u) {
|
for (uint y = 0u; y < 16u; y += 4u) {
|
||||||
float3 srcPixel, dithPixel;
|
vec3 srcPixel, dithPixel;
|
||||||
|
|
||||||
srcPixel = unpackUnorm4x8(srcPixBlck[y + 0u]).xyz * 255.0f;
|
srcPixel = unpackUnorm4x8(srcPixBlck[y + 0u]).xyz * 255.0f;
|
||||||
dithPixel = quant(srcPixel + trunc((3 * ep2[1] + 5 * ep2[0]) * (1.0f / 16.0f)));
|
dithPixel = quant(srcPixel + trunc((3 * ep2[1] + 5 * ep2[0]) * (1.0f / 16.0f)));
|
||||||
ep1[0] = srcPixel - dithPixel;
|
ep1[0] = srcPixel - dithPixel;
|
||||||
dthPixBlck[y + 0u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f));
|
dthPixBlck[y + 0u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f));
|
||||||
|
|
||||||
srcPixel = unpackUnorm4x8(srcPixBlck[y + 1u]).xyz * 255.0f;
|
srcPixel = unpackUnorm4x8(srcPixBlck[y + 1u]).xyz * 255.0f;
|
||||||
dithPixel = quant(
|
dithPixel = quant(
|
||||||
srcPixel + trunc((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) * (1.0f / 16.0f)));
|
srcPixel + trunc((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) * (1.0f / 16.0f)));
|
||||||
ep1[1] = srcPixel - dithPixel;
|
ep1[1] = srcPixel - dithPixel;
|
||||||
dthPixBlck[y + 1u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f));
|
dthPixBlck[y + 1u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f));
|
||||||
|
|
||||||
srcPixel = unpackUnorm4x8(srcPixBlck[y + 2u]).xyz * 255.0f;
|
srcPixel = unpackUnorm4x8(srcPixBlck[y + 2u]).xyz * 255.0f;
|
||||||
dithPixel = quant(
|
dithPixel = quant(
|
||||||
srcPixel + trunc((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) * (1.0f / 16.0f)));
|
srcPixel + trunc((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) * (1.0f / 16.0f)));
|
||||||
ep1[2] = srcPixel - dithPixel;
|
ep1[2] = srcPixel - dithPixel;
|
||||||
dthPixBlck[y + 2u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f));
|
dthPixBlck[y + 2u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f));
|
||||||
|
|
||||||
srcPixel = unpackUnorm4x8(srcPixBlck[y + 3u]).xyz * 255.0f;
|
srcPixel = unpackUnorm4x8(srcPixBlck[y + 3u]).xyz * 255.0f;
|
||||||
dithPixel = quant(srcPixel + trunc((7 * ep1[2] + 5 * ep2[3] + ep2[2]) * (1.0f / 16.0f)));
|
dithPixel = quant(srcPixel + trunc((7 * ep1[2] + 5 * ep2[3] + ep2[2]) * (1.0f / 16.0f)));
|
||||||
ep1[3] = srcPixel - dithPixel;
|
ep1[3] = srcPixel - dithPixel;
|
||||||
dthPixBlck[y + 3u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f));
|
dthPixBlck[y + 3u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f));
|
||||||
|
|
||||||
// swap( ep1, ep2 )
|
// swap( ep1, ep2 )
|
||||||
for (uint i = 0u; i < 4u; ++i) {
|
for (uint i = 0u; i < 4u; ++i) {
|
||||||
float3 tmp = ep1[i];
|
vec3 tmp = ep1[i];
|
||||||
ep1[i] = ep2[i];
|
ep1[i] = ep2[i];
|
||||||
ep2[i] = tmp;
|
ep2[i] = tmp;
|
||||||
}
|
}
|
||||||
@@ -419,11 +419,11 @@ void main() {
|
|||||||
bool bAllColorsEqual = true;
|
bool bAllColorsEqual = true;
|
||||||
|
|
||||||
// Load the whole 4x4 block
|
// Load the whole 4x4 block
|
||||||
const uint2 pixelsToLoadBase = gl_GlobalInvocationID.xy << 2u;
|
const uvec2 pixelsToLoadBase = gl_GlobalInvocationID.xy << 2u;
|
||||||
for (uint i = 0u; i < 16u; ++i) {
|
for (uint i = 0u; i < 16u; ++i) {
|
||||||
const uint2 pixelsToLoad = pixelsToLoadBase + uint2(i & 0x03u, i >> 2u);
|
const uvec2 pixelsToLoad = pixelsToLoadBase + uvec2(i & 0x03u, i >> 2u);
|
||||||
const float3 srcPixels0 = OGRE_Load2D(srcTex, int2(pixelsToLoad), 0).xyz;
|
const vec3 srcPixels0 = texelFetch(srcTex, ivec2(pixelsToLoad), 0).xyz;
|
||||||
srcPixelsBlock[i] = packUnorm4x8(float4(srcPixels0, 1.0f));
|
srcPixelsBlock[i] = packUnorm4x8(vec4(srcPixels0, 1.0f));
|
||||||
bAllColorsEqual = bAllColorsEqual && srcPixelsBlock[0] == srcPixelsBlock[i];
|
bAllColorsEqual = bAllColorsEqual && srcPixelsBlock[0] == srcPixelsBlock[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -431,7 +431,7 @@ void main() {
|
|||||||
uint mask = 0u;
|
uint mask = 0u;
|
||||||
|
|
||||||
if (bAllColorsEqual) {
|
if (bAllColorsEqual) {
|
||||||
const uint3 rgbVal = uint3(unpackUnorm4x8(srcPixelsBlock[0]).xyz * 255.0f);
|
const uvec3 rgbVal = uvec3(unpackUnorm4x8(srcPixelsBlock[0]).xyz * 255.0f);
|
||||||
mask = 0xAAAAAAAAu;
|
mask = 0xAAAAAAAAu;
|
||||||
maxEndp16 =
|
maxEndp16 =
|
||||||
c_oMatch5[rgbVal.r][0] * 2048.0f + c_oMatch6[rgbVal.g][0] * 32.0f + c_oMatch5[rgbVal.b][0];
|
c_oMatch5[rgbVal.r][0] * 2048.0f + c_oMatch6[rgbVal.g][0] * 32.0f + c_oMatch5[rgbVal.b][0];
|
||||||
@@ -449,7 +449,7 @@ void main() {
|
|||||||
// second step: pca+map along principal axis
|
// second step: pca+map along principal axis
|
||||||
OptimizeColorsBlock(ditherPixelsBlock, minEndp16, maxEndp16);
|
OptimizeColorsBlock(ditherPixelsBlock, minEndp16, maxEndp16);
|
||||||
if (minEndp16 != maxEndp16) {
|
if (minEndp16 != maxEndp16) {
|
||||||
float3 colors[4];
|
vec3 colors[4];
|
||||||
EvalColors(colors, maxEndp16, minEndp16); // Note min/max are inverted
|
EvalColors(colors, maxEndp16, minEndp16); // Note min/max are inverted
|
||||||
mask = MatchColorsBlock(srcPixelsBlock, colors);
|
mask = MatchColorsBlock(srcPixelsBlock, colors);
|
||||||
}
|
}
|
||||||
@@ -461,7 +461,7 @@ void main() {
|
|||||||
|
|
||||||
if (RefineBlock(ditherPixelsBlock, mask, minEndp16, maxEndp16)) {
|
if (RefineBlock(ditherPixelsBlock, mask, minEndp16, maxEndp16)) {
|
||||||
if (minEndp16 != maxEndp16) {
|
if (minEndp16 != maxEndp16) {
|
||||||
float3 colors[4];
|
vec3 colors[4];
|
||||||
EvalColors(colors, maxEndp16, minEndp16); // Note min/max are inverted
|
EvalColors(colors, maxEndp16, minEndp16); // Note min/max are inverted
|
||||||
mask = MatchColorsBlock(srcPixelsBlock, colors);
|
mask = MatchColorsBlock(srcPixelsBlock, colors);
|
||||||
} else {
|
} else {
|
||||||
@@ -482,10 +482,10 @@ void main() {
|
|||||||
mask ^= 0x55555555u;
|
mask ^= 0x55555555u;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint2 outputBytes;
|
uvec2 outputBytes;
|
||||||
outputBytes.x = uint(maxEndp16) | (uint(minEndp16) << 16u);
|
outputBytes.x = uint(maxEndp16) | (uint(minEndp16) << 16u);
|
||||||
outputBytes.y = mask;
|
outputBytes.y = mask;
|
||||||
|
|
||||||
uint2 dstUV = gl_GlobalInvocationID.xy;
|
uvec2 dstUV = gl_GlobalInvocationID.xy;
|
||||||
imageStore(dstTexture, int2(dstUV), uint4(outputBytes.xy, 0u, 0u));
|
imageStore(dstTexture, ivec2(dstUV), uvec4(outputBytes.xy, 0u, 0u));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,12 +6,10 @@ signed = "#define SNORM";
|
|||||||
#[compute]
|
#[compute]
|
||||||
#version 450
|
#version 450
|
||||||
|
|
||||||
#include "CrossPlatformSettings_piece_all.glsl"
|
|
||||||
|
|
||||||
#VERSION_DEFINES
|
#VERSION_DEFINES
|
||||||
|
|
||||||
shared float2 g_minMaxValues[4u * 4u * 4u];
|
shared vec2 g_minMaxValues[4u * 4u * 4u];
|
||||||
shared uint2 g_mask[4u * 4u];
|
shared uvec2 g_mask[4u * 4u];
|
||||||
|
|
||||||
layout(binding = 0) uniform sampler2D srcTex;
|
layout(binding = 0) uniform sampler2D srcTex;
|
||||||
layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture;
|
layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture;
|
||||||
@@ -40,30 +38,30 @@ layout(local_size_x = 4, //
|
|||||||
/// - Long threads (e.g. 1 thread per block) misses parallelism opportunities
|
/// - Long threads (e.g. 1 thread per block) misses parallelism opportunities
|
||||||
void main() {
|
void main() {
|
||||||
float minVal, maxVal;
|
float minVal, maxVal;
|
||||||
float4 srcPixel;
|
vec4 srcPixel;
|
||||||
|
|
||||||
const uint blockThreadId = gl_LocalInvocationID.x;
|
const uint blockThreadId = gl_LocalInvocationID.x;
|
||||||
|
|
||||||
const uint2 pixelsToLoadBase = gl_GlobalInvocationID.yz << 2u;
|
const uvec2 pixelsToLoadBase = gl_GlobalInvocationID.yz << 2u;
|
||||||
|
|
||||||
for (uint i = 0u; i < 4u; ++i) {
|
for (uint i = 0u; i < 4u; ++i) {
|
||||||
const uint2 pixelsToLoad = pixelsToLoadBase + uint2(i, blockThreadId);
|
const uvec2 pixelsToLoad = pixelsToLoadBase + uvec2(i, blockThreadId);
|
||||||
|
|
||||||
const float4 value = OGRE_Load2D(srcTex, int2(pixelsToLoad), 0).xyzw;
|
const vec4 value = texelFetch(srcTex, ivec2(pixelsToLoad), 0).xyzw;
|
||||||
srcPixel[i] = params.p_channelIdx == 0 ? value.x : (params.p_channelIdx == 1 ? value.y : value.w);
|
srcPixel[i] = params.p_channelIdx == 0 ? value.x : (params.p_channelIdx == 1 ? value.y : value.w);
|
||||||
srcPixel[i] *= 255.0f;
|
srcPixel[i] *= 255.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
minVal = min3(srcPixel.x, srcPixel.y, srcPixel.z);
|
minVal = min(srcPixel.x, min(srcPixel.y, srcPixel.z));
|
||||||
maxVal = max3(srcPixel.x, srcPixel.y, srcPixel.z);
|
maxVal = max(srcPixel.x, max(srcPixel.y, srcPixel.z));
|
||||||
minVal = min(minVal, srcPixel.w);
|
minVal = min(minVal, srcPixel.w);
|
||||||
maxVal = max(maxVal, srcPixel.w);
|
maxVal = max(maxVal, srcPixel.w);
|
||||||
|
|
||||||
const uint minMaxIdxBase = (gl_LocalInvocationID.z << 4u) + (gl_LocalInvocationID.y << 2u);
|
const uint minMaxIdxBase = (gl_LocalInvocationID.z << 4u) + (gl_LocalInvocationID.y << 2u);
|
||||||
const uint maskIdxBase = (gl_LocalInvocationID.z << 2u) + gl_LocalInvocationID.y;
|
const uint maskIdxBase = (gl_LocalInvocationID.z << 2u) + gl_LocalInvocationID.y;
|
||||||
|
|
||||||
g_minMaxValues[minMaxIdxBase + blockThreadId] = float2(minVal, maxVal);
|
g_minMaxValues[minMaxIdxBase + blockThreadId] = vec2(minVal, maxVal);
|
||||||
g_mask[maskIdxBase] = uint2(0u, 0u);
|
g_mask[maskIdxBase] = uvec2(0u, 0u);
|
||||||
|
|
||||||
memoryBarrierShared();
|
memoryBarrierShared();
|
||||||
barrier();
|
barrier();
|
||||||
@@ -133,21 +131,21 @@ void main() {
|
|||||||
|
|
||||||
if (blockThreadId == 0u) {
|
if (blockThreadId == 0u) {
|
||||||
// Save data
|
// Save data
|
||||||
uint2 outputBytes;
|
uvec2 outputBytes;
|
||||||
|
|
||||||
#ifdef SNORM
|
#ifdef SNORM
|
||||||
outputBytes.x =
|
outputBytes.x =
|
||||||
packSnorm4x8(float4(maxVal * (1.0f / 255.0f) * 2.0f - 1.0f,
|
packSnorm4x8(vec4(maxVal * (1.0f / 255.0f) * 2.0f - 1.0f,
|
||||||
minVal * (1.0f / 255.0f) * 2.0f - 1.0f, 0.0f, 0.0f));
|
minVal * (1.0f / 255.0f) * 2.0f - 1.0f, 0.0f, 0.0f));
|
||||||
#else
|
#else
|
||||||
outputBytes.x = packUnorm4x8(
|
outputBytes.x = packUnorm4x8(
|
||||||
float4(maxVal * (1.0f / 255.0f), minVal * (1.0f / 255.0f), 0.0f, 0.0f));
|
vec4(maxVal * (1.0f / 255.0f), minVal * (1.0f / 255.0f), 0.0f, 0.0f));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
outputBytes.x |= g_mask[maskIdxBase].x;
|
outputBytes.x |= g_mask[maskIdxBase].x;
|
||||||
outputBytes.y = g_mask[maskIdxBase].y;
|
outputBytes.y = g_mask[maskIdxBase].y;
|
||||||
|
|
||||||
uint2 dstUV = gl_GlobalInvocationID.yz;
|
uvec2 dstUV = gl_GlobalInvocationID.yz;
|
||||||
imageStore(dstTexture, int2(dstUV), uint4(outputBytes.xy, 0u, 0u));
|
imageStore(dstTexture, ivec2(dstUV), uvec4(outputBytes.xy, 0u, 0u));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,24 +6,22 @@ unsigned = "#define QUALITY"; // The "Quality" preset causes artifacting on sign
|
|||||||
#[compute]
|
#[compute]
|
||||||
#version 450
|
#version 450
|
||||||
|
|
||||||
#include "CrossPlatformSettings_piece_all.glsl"
|
|
||||||
|
|
||||||
#VERSION_DEFINES
|
#VERSION_DEFINES
|
||||||
|
|
||||||
float3 f32tof16(float3 value) {
|
vec3 f32tof16(vec3 value) {
|
||||||
return float3(packHalf2x16(float2(value.x, 0.0)),
|
return vec3(packHalf2x16(vec2(value.x, 0.0)),
|
||||||
packHalf2x16(float2(value.y, 0.0)),
|
packHalf2x16(vec2(value.y, 0.0)),
|
||||||
packHalf2x16(float2(value.z, 0.0)));
|
packHalf2x16(vec2(value.z, 0.0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 f16tof32(uint3 value) {
|
vec3 f16tof32(uvec3 value) {
|
||||||
return float3(unpackHalf2x16(value.x).x,
|
return vec3(unpackHalf2x16(value.x).x,
|
||||||
unpackHalf2x16(value.y).x,
|
unpackHalf2x16(value.y).x,
|
||||||
unpackHalf2x16(value.z).x);
|
unpackHalf2x16(value.z).x);
|
||||||
}
|
}
|
||||||
|
|
||||||
float f32tof16(float value) {
|
float f32tof16(float value) {
|
||||||
return packHalf2x16(float2(value.x, 0.0));
|
return packHalf2x16(vec2(value.x, 0.0));
|
||||||
}
|
}
|
||||||
|
|
||||||
float f16tof32(uint value) {
|
float f16tof32(uint value) {
|
||||||
@@ -34,7 +32,7 @@ layout(binding = 0) uniform sampler2D srcTexture;
|
|||||||
layout(binding = 1, rgba32ui) uniform restrict writeonly uimage2D dstTexture;
|
layout(binding = 1, rgba32ui) uniform restrict writeonly uimage2D dstTexture;
|
||||||
|
|
||||||
layout(push_constant, std430) uniform Params {
|
layout(push_constant, std430) uniform Params {
|
||||||
float2 p_textureSizeRcp;
|
vec2 p_textureSizeRcp;
|
||||||
uint padding0;
|
uint padding0;
|
||||||
uint padding1;
|
uint padding1;
|
||||||
}
|
}
|
||||||
@@ -69,7 +67,7 @@ float CrossCalcMSLE(float a, float b) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
float CalcMSLE(float3 a, float3 b) {
|
float CalcMSLE(vec3 a, vec3 b) {
|
||||||
float result = 0.0f;
|
float result = 0.0f;
|
||||||
if (isNegative(a.x) != isNegative(b.x)) {
|
if (isNegative(a.x) != isNegative(b.x)) {
|
||||||
result += CrossCalcMSLE(a.x, b.x);
|
result += CrossCalcMSLE(a.x, b.x);
|
||||||
@@ -91,32 +89,32 @@ float CalcMSLE(float3 a, float3 b) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Adapt the log function to make sense when a < 0
|
// Adapt the log function to make sense when a < 0
|
||||||
float3 customLog2(float3 a) {
|
vec3 customLog2(vec3 a) {
|
||||||
return float3(
|
return vec3(
|
||||||
a.x >= 0 ? log2(a.x + 1.0f) : -log2(-a.x + 1.0f),
|
a.x >= 0 ? log2(a.x + 1.0f) : -log2(-a.x + 1.0f),
|
||||||
a.y >= 0 ? log2(a.y + 1.0f) : -log2(-a.y + 1.0f),
|
a.y >= 0 ? log2(a.y + 1.0f) : -log2(-a.y + 1.0f),
|
||||||
a.z >= 0 ? log2(a.z + 1.0f) : -log2(-a.z + 1.0f));
|
a.z >= 0 ? log2(a.z + 1.0f) : -log2(-a.z + 1.0f));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Inverse of customLog2()
|
// Inverse of customLog2()
|
||||||
float3 customExp2(float3 a) {
|
vec3 customExp2(vec3 a) {
|
||||||
return float3(
|
return vec3(
|
||||||
a.x >= 0 ? exp2(a.x) - 1.0f : -(exp2(-a.x) - 1.0f),
|
a.x >= 0 ? exp2(a.x) - 1.0f : -(exp2(-a.x) - 1.0f),
|
||||||
a.y >= 0 ? exp2(a.y) - 1.0f : -(exp2(-a.y) - 1.0f),
|
a.y >= 0 ? exp2(a.y) - 1.0f : -(exp2(-a.y) - 1.0f),
|
||||||
a.z >= 0 ? exp2(a.z) - 1.0f : -(exp2(-a.z) - 1.0f));
|
a.z >= 0 ? exp2(a.z) - 1.0f : -(exp2(-a.z) - 1.0f));
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
float CalcMSLE(float3 a, float3 b) {
|
float CalcMSLE(vec3 a, vec3 b) {
|
||||||
float3 err = log2((b + 1.0f) / (a + 1.0f));
|
vec3 err = log2((b + 1.0f) / (a + 1.0f));
|
||||||
err = err * err;
|
err = err * err;
|
||||||
return err.x + err.y + err.z;
|
return err.x + err.y + err.z;
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 customLog2(float3 a) {
|
vec3 customLog2(vec3 a) {
|
||||||
return log2(a + 1.0f);
|
return log2(a + 1.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 customExp2(float3 a) {
|
vec3 customExp2(vec3 a) {
|
||||||
return exp2(a) - 1.0f;
|
return exp2(a) - 1.0f;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@@ -157,98 +155,98 @@ uint Pattern(uint p, uint i) {
|
|||||||
|
|
||||||
#ifndef SIGNED
|
#ifndef SIGNED
|
||||||
//UF
|
//UF
|
||||||
float3 Quantize7(float3 x) {
|
vec3 Quantize7(vec3 x) {
|
||||||
return (f32tof16(x) * 128.0f) / (0x7bff + 1.0f);
|
return (f32tof16(x) * 128.0f) / (0x7bff + 1.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 Quantize9(float3 x) {
|
vec3 Quantize9(vec3 x) {
|
||||||
return (f32tof16(x) * 512.0f) / (0x7bff + 1.0f);
|
return (f32tof16(x) * 512.0f) / (0x7bff + 1.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 Quantize10(float3 x) {
|
vec3 Quantize10(vec3 x) {
|
||||||
return (f32tof16(x) * 1024.0f) / (0x7bff + 1.0f);
|
return (f32tof16(x) * 1024.0f) / (0x7bff + 1.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 Unquantize7(float3 x) {
|
vec3 Unquantize7(vec3 x) {
|
||||||
return (x * 65536.0f + 0x8000) / 128.0f;
|
return (x * 65536.0f + 0x8000) / 128.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 Unquantize9(float3 x) {
|
vec3 Unquantize9(vec3 x) {
|
||||||
return (x * 65536.0f + 0x8000) / 512.0f;
|
return (x * 65536.0f + 0x8000) / 512.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 Unquantize10(float3 x) {
|
vec3 Unquantize10(vec3 x) {
|
||||||
return (x * 65536.0f + 0x8000) / 1024.0f;
|
return (x * 65536.0f + 0x8000) / 1024.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) {
|
vec3 FinishUnquantize(vec3 endpoint0Unq, vec3 endpoint1Unq, float weight) {
|
||||||
float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f);
|
vec3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f);
|
||||||
return f16tof32(uint3(comp));
|
return f16tof32(uvec3(comp));
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
//SF
|
//SF
|
||||||
|
|
||||||
float3 cmpSign(float3 value) {
|
vec3 cmpSign(vec3 value) {
|
||||||
float3 signVal;
|
vec3 signVal;
|
||||||
signVal.x = value.x >= 0.0f ? 1.0f : -1.0f;
|
signVal.x = value.x >= 0.0f ? 1.0f : -1.0f;
|
||||||
signVal.y = value.y >= 0.0f ? 1.0f : -1.0f;
|
signVal.y = value.y >= 0.0f ? 1.0f : -1.0f;
|
||||||
signVal.z = value.z >= 0.0f ? 1.0f : -1.0f;
|
signVal.z = value.z >= 0.0f ? 1.0f : -1.0f;
|
||||||
return signVal;
|
return signVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 Quantize7(float3 x) {
|
vec3 Quantize7(vec3 x) {
|
||||||
float3 signVal = cmpSign(x);
|
vec3 signVal = cmpSign(x);
|
||||||
return signVal * (f32tof16(abs(x)) * 64.0f) / (0x7bff + 1.0f);
|
return signVal * (f32tof16(abs(x)) * 64.0f) / (0x7bff + 1.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 Quantize9(float3 x) {
|
vec3 Quantize9(vec3 x) {
|
||||||
float3 signVal = cmpSign(x);
|
vec3 signVal = cmpSign(x);
|
||||||
return signVal * (f32tof16(abs(x)) * 256.0f) / (0x7bff + 1.0f);
|
return signVal * (f32tof16(abs(x)) * 256.0f) / (0x7bff + 1.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 Quantize10(float3 x) {
|
vec3 Quantize10(vec3 x) {
|
||||||
float3 signVal = cmpSign(x);
|
vec3 signVal = cmpSign(x);
|
||||||
return signVal * (f32tof16(abs(x)) * 512.0f) / (0x7bff + 1.0f);
|
return signVal * (f32tof16(abs(x)) * 512.0f) / (0x7bff + 1.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 Unquantize7(float3 x) {
|
vec3 Unquantize7(vec3 x) {
|
||||||
float3 signVal = sign(x);
|
vec3 signVal = sign(x);
|
||||||
x = abs(x);
|
x = abs(x);
|
||||||
float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 64.0f;
|
vec3 finalVal = signVal * (x * 32768.0f + 0x4000) / 64.0f;
|
||||||
finalVal.x = x.x >= 64.0f ? 32767.0 : finalVal.x;
|
finalVal.x = x.x >= 64.0f ? 32767.0 : finalVal.x;
|
||||||
finalVal.y = x.y >= 64.0f ? 32767.0 : finalVal.y;
|
finalVal.y = x.y >= 64.0f ? 32767.0 : finalVal.y;
|
||||||
finalVal.z = x.z >= 64.0f ? 32767.0 : finalVal.z;
|
finalVal.z = x.z >= 64.0f ? 32767.0 : finalVal.z;
|
||||||
return finalVal;
|
return finalVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 Unquantize9(float3 x) {
|
vec3 Unquantize9(vec3 x) {
|
||||||
float3 signVal = sign(x);
|
vec3 signVal = sign(x);
|
||||||
x = abs(x);
|
x = abs(x);
|
||||||
float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 256.0f;
|
vec3 finalVal = signVal * (x * 32768.0f + 0x4000) / 256.0f;
|
||||||
finalVal.x = x.x >= 256.0f ? 32767.0 : finalVal.x;
|
finalVal.x = x.x >= 256.0f ? 32767.0 : finalVal.x;
|
||||||
finalVal.y = x.y >= 256.0f ? 32767.0 : finalVal.y;
|
finalVal.y = x.y >= 256.0f ? 32767.0 : finalVal.y;
|
||||||
finalVal.z = x.z >= 256.0f ? 32767.0 : finalVal.z;
|
finalVal.z = x.z >= 256.0f ? 32767.0 : finalVal.z;
|
||||||
return finalVal;
|
return finalVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 Unquantize10(float3 x) {
|
vec3 Unquantize10(vec3 x) {
|
||||||
float3 signVal = sign(x);
|
vec3 signVal = sign(x);
|
||||||
x = abs(x);
|
x = abs(x);
|
||||||
float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 512.0f;
|
vec3 finalVal = signVal * (x * 32768.0f + 0x4000) / 512.0f;
|
||||||
finalVal.x = x.x >= 512.0f ? 32767.0 : finalVal.x;
|
finalVal.x = x.x >= 512.0f ? 32767.0 : finalVal.x;
|
||||||
finalVal.y = x.y >= 512.0f ? 32767.0 : finalVal.y;
|
finalVal.y = x.y >= 512.0f ? 32767.0 : finalVal.y;
|
||||||
finalVal.z = x.z >= 512.0f ? 32767.0 : finalVal.z;
|
finalVal.z = x.z >= 512.0f ? 32767.0 : finalVal.z;
|
||||||
return finalVal;
|
return finalVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) {
|
vec3 FinishUnquantize(vec3 endpoint0Unq, vec3 endpoint1Unq, float weight) {
|
||||||
float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 2048.0f);
|
vec3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 2048.0f);
|
||||||
return f16tof32(uint3(comp));
|
return f16tof32(uvec3(comp));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void Swap(inout float3 a, inout float3 b) {
|
void Swap(inout vec3 a, inout vec3 b) {
|
||||||
float3 tmp = a;
|
vec3 tmp = a;
|
||||||
a = b;
|
a = b;
|
||||||
b = tmp;
|
b = tmp;
|
||||||
}
|
}
|
||||||
@@ -270,8 +268,8 @@ uint ComputeIndex4(float texelPos, float endPoint0Pos, float endPoint1Pos) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// This adds a bitflag to quantized values that signifies whether they are negative.
|
// This adds a bitflag to quantized values that signifies whether they are negative.
|
||||||
void SignExtend(inout float3 v1, uint mask, uint signFlag) {
|
void SignExtend(inout vec3 v1, uint mask, uint signFlag) {
|
||||||
int3 v = int3(v1);
|
ivec3 v = ivec3(v1);
|
||||||
v.x = (v.x & int(mask)) | (v.x < 0 ? int(signFlag) : 0);
|
v.x = (v.x & int(mask)) | (v.x < 0 ? int(signFlag) : 0);
|
||||||
v.y = (v.y & int(mask)) | (v.y < 0 ? int(signFlag) : 0);
|
v.y = (v.y & int(mask)) | (v.y < 0 ? int(signFlag) : 0);
|
||||||
v.z = (v.z & int(mask)) | (v.z < 0 ? int(signFlag) : 0);
|
v.z = (v.z & int(mask)) | (v.z < 0 ? int(signFlag) : 0);
|
||||||
@@ -279,38 +277,39 @@ void SignExtend(inout float3 v1, uint mask, uint signFlag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Encodes a block with mode 11 (2x 10-bit endpoints).
|
// Encodes a block with mode 11 (2x 10-bit endpoints).
|
||||||
void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) {
|
void EncodeP1(inout uvec4 block, inout float blockMSLE, vec3 texels[16]) {
|
||||||
// compute endpoints (min/max RGB bbox)
|
// compute endpoints (min/max RGB bbox)
|
||||||
float3 blockMin = texels[0];
|
vec3 blockMin = texels[0];
|
||||||
float3 blockMax = texels[0];
|
vec3 blockMax = texels[0];
|
||||||
for (uint i = 1u; i < 16u; ++i) {
|
for (uint i = 1u; i < 16u; ++i) {
|
||||||
blockMin = min(blockMin, texels[i]);
|
blockMin = min(blockMin, texels[i]);
|
||||||
blockMax = max(blockMax, texels[i]);
|
blockMax = max(blockMax, texels[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// refine endpoints in log2 RGB space
|
// refine endpoints in log2 RGB space
|
||||||
float3 refinedBlockMin = blockMax;
|
vec3 refinedBlockMin = blockMax;
|
||||||
float3 refinedBlockMax = blockMin;
|
vec3 refinedBlockMax = blockMin;
|
||||||
for (uint i = 0u; i < 16u; ++i) {
|
for (uint i = 0u; i < 16u; ++i) {
|
||||||
refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]);
|
refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]);
|
||||||
refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]);
|
refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 logBlockMax = customLog2(blockMax);
|
vec3 logBlockMax = customLog2(blockMax);
|
||||||
float3 logBlockMin = customLog2(blockMin);
|
vec3 logBlockMin = customLog2(blockMin);
|
||||||
float3 logRefinedBlockMax = customLog2(refinedBlockMax);
|
vec3 logRefinedBlockMax = customLog2(refinedBlockMax);
|
||||||
float3 logRefinedBlockMin = customLog2(refinedBlockMin);
|
vec3 logRefinedBlockMin = customLog2(refinedBlockMin);
|
||||||
float3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f);
|
vec3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f);
|
||||||
|
|
||||||
logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt);
|
logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt);
|
||||||
logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt);
|
logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt);
|
||||||
blockMin = customExp2(logBlockMin);
|
blockMin = customExp2(logBlockMin);
|
||||||
blockMax = customExp2(logBlockMax);
|
blockMax = customExp2(logBlockMax);
|
||||||
|
|
||||||
float3 blockDir = blockMax - blockMin;
|
vec3 blockDir = blockMax - blockMin;
|
||||||
blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z);
|
blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z);
|
||||||
|
|
||||||
float3 endpoint0 = Quantize10(blockMin);
|
vec3 endpoint0 = Quantize10(blockMin);
|
||||||
float3 endpoint1 = Quantize10(blockMax);
|
vec3 endpoint1 = Quantize10(blockMax);
|
||||||
float endPoint0Pos = f32tof16(dot(blockMin, blockDir));
|
float endPoint0Pos = f32tof16(dot(blockMin, blockDir));
|
||||||
float endPoint1Pos = f32tof16(dot(blockMax, blockDir));
|
float endPoint1Pos = f32tof16(dot(blockMax, blockDir));
|
||||||
|
|
||||||
@@ -336,12 +335,12 @@ void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// compute compression error (MSLE)
|
// compute compression error (MSLE)
|
||||||
float3 endpoint0Unq = Unquantize10(endpoint0);
|
vec3 endpoint0Unq = Unquantize10(endpoint0);
|
||||||
float3 endpoint1Unq = Unquantize10(endpoint1);
|
vec3 endpoint1Unq = Unquantize10(endpoint1);
|
||||||
float msle = 0.0f;
|
float msle = 0.0f;
|
||||||
for (uint i = 0u; i < 16u; ++i) {
|
for (uint i = 0u; i < 16u; ++i) {
|
||||||
float weight = floor((indices[i] * 64.0f) / 15.0f + 0.5f);
|
float weight = floor((indices[i] * 64.0f) / 15.0f + 0.5f);
|
||||||
float3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight);
|
vec3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight);
|
||||||
|
|
||||||
msle += CalcMSLE(texels[i], texelUnc);
|
msle += CalcMSLE(texels[i], texelUnc);
|
||||||
}
|
}
|
||||||
@@ -384,19 +383,19 @@ void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) {
|
|||||||
block.w |= indices[15] << 28u;
|
block.w |= indices[15] << 28u;
|
||||||
}
|
}
|
||||||
|
|
||||||
float DistToLineSq(float3 PointOnLine, float3 LineDirection, float3 Point) {
|
float DistToLineSq(vec3 PointOnLine, vec3 LineDirection, vec3 Point) {
|
||||||
float3 w = Point - PointOnLine;
|
vec3 w = Point - PointOnLine;
|
||||||
float3 x = w - dot(w, LineDirection) * LineDirection;
|
vec3 x = w - dot(w, LineDirection) * LineDirection;
|
||||||
|
|
||||||
return dot(x, x);
|
return dot(x, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Gets the deviation from the source data of a particular pattern (smaller is better).
|
// Gets the deviation from the source data of a particular pattern (smaller is better).
|
||||||
float EvaluateP2Pattern(uint pattern, float3 texels[16]) {
|
float EvaluateP2Pattern(uint pattern, vec3 texels[16]) {
|
||||||
float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
|
vec3 p0BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX);
|
||||||
float3 p0BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN);
|
vec3 p0BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN);
|
||||||
float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
|
vec3 p1BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX);
|
||||||
float3 p1BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN);
|
vec3 p1BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN);
|
||||||
|
|
||||||
for (uint i = 0; i < 16; ++i) {
|
for (uint i = 0; i < 16; ++i) {
|
||||||
uint paletteID = Pattern(pattern, i);
|
uint paletteID = Pattern(pattern, i);
|
||||||
@@ -409,8 +408,8 @@ float EvaluateP2Pattern(uint pattern, float3 texels[16]) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 p0BlockDir = normalize(p0BlockMax - p0BlockMin);
|
vec3 p0BlockDir = normalize(p0BlockMax - p0BlockMin);
|
||||||
float3 p1BlockDir = normalize(p1BlockMax - p1BlockMin);
|
vec3 p1BlockDir = normalize(p1BlockMax - p1BlockMin);
|
||||||
|
|
||||||
float sqDistanceFromLine = 0.0f;
|
float sqDistanceFromLine = 0.0f;
|
||||||
|
|
||||||
@@ -427,11 +426,11 @@ float EvaluateP2Pattern(uint pattern, float3 texels[16]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Encodes a block with either mode 2 (7-bit base, 3x 6-bit delta), or mode 6 (9-bit base, 3x 5-bit delta). Both use pattern encoding.
|
// Encodes a block with either mode 2 (7-bit base, 3x 6-bit delta), or mode 6 (9-bit base, 3x 5-bit delta). Both use pattern encoding.
|
||||||
void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, float3 texels[16]) {
|
void EncodeP2Pattern(inout uvec4 block, inout float blockMSLE, uint pattern, vec3 texels[16]) {
|
||||||
float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
|
vec3 p0BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX);
|
||||||
float3 p0BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN);
|
vec3 p0BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN);
|
||||||
float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
|
vec3 p1BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX);
|
||||||
float3 p1BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN);
|
vec3 p1BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN);
|
||||||
|
|
||||||
for (uint i = 0u; i < 16u; ++i) {
|
for (uint i = 0u; i < 16u; ++i) {
|
||||||
uint paletteID = Pattern(pattern, i);
|
uint paletteID = Pattern(pattern, i);
|
||||||
@@ -444,8 +443,8 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 p0BlockDir = p0BlockMax - p0BlockMin;
|
vec3 p0BlockDir = p0BlockMax - p0BlockMin;
|
||||||
float3 p1BlockDir = p1BlockMax - p1BlockMin;
|
vec3 p1BlockDir = p1BlockMax - p1BlockMin;
|
||||||
p0BlockDir = p0BlockDir / (p0BlockDir.x + p0BlockDir.y + p0BlockDir.z);
|
p0BlockDir = p0BlockDir / (p0BlockDir.x + p0BlockDir.y + p0BlockDir.z);
|
||||||
p1BlockDir = p1BlockDir / (p1BlockDir.x + p1BlockDir.y + p1BlockDir.z);
|
p1BlockDir = p1BlockDir / (p1BlockDir.x + p1BlockDir.y + p1BlockDir.z);
|
||||||
|
|
||||||
@@ -479,15 +478,15 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo
|
|||||||
indices[i] = paletteID == 0u ? p0Index : p1Index;
|
indices[i] = paletteID == 0u ? p0Index : p1Index;
|
||||||
}
|
}
|
||||||
|
|
||||||
float3 endpoint760 = floor(Quantize7(p0BlockMin));
|
vec3 endpoint760 = floor(Quantize7(p0BlockMin));
|
||||||
float3 endpoint761 = floor(Quantize7(p0BlockMax));
|
vec3 endpoint761 = floor(Quantize7(p0BlockMax));
|
||||||
float3 endpoint762 = floor(Quantize7(p1BlockMin));
|
vec3 endpoint762 = floor(Quantize7(p1BlockMin));
|
||||||
float3 endpoint763 = floor(Quantize7(p1BlockMax));
|
vec3 endpoint763 = floor(Quantize7(p1BlockMax));
|
||||||
|
|
||||||
float3 endpoint950 = floor(Quantize9(p0BlockMin));
|
vec3 endpoint950 = floor(Quantize9(p0BlockMin));
|
||||||
float3 endpoint951 = floor(Quantize9(p0BlockMax));
|
vec3 endpoint951 = floor(Quantize9(p0BlockMax));
|
||||||
float3 endpoint952 = floor(Quantize9(p1BlockMin));
|
vec3 endpoint952 = floor(Quantize9(p1BlockMin));
|
||||||
float3 endpoint953 = floor(Quantize9(p1BlockMax));
|
vec3 endpoint953 = floor(Quantize9(p1BlockMax));
|
||||||
|
|
||||||
endpoint761 = endpoint761 - endpoint760;
|
endpoint761 = endpoint761 - endpoint760;
|
||||||
endpoint762 = endpoint762 - endpoint760;
|
endpoint762 = endpoint762 - endpoint760;
|
||||||
@@ -514,28 +513,28 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo
|
|||||||
endpoint950 = clamp(endpoint950, -maxVal9, maxVal9);
|
endpoint950 = clamp(endpoint950, -maxVal9, maxVal9);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
float3 endpoint760Unq = Unquantize7(endpoint760);
|
vec3 endpoint760Unq = Unquantize7(endpoint760);
|
||||||
float3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761);
|
vec3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761);
|
||||||
float3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762);
|
vec3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762);
|
||||||
float3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763);
|
vec3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763);
|
||||||
float3 endpoint950Unq = Unquantize9(endpoint950);
|
vec3 endpoint950Unq = Unquantize9(endpoint950);
|
||||||
float3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951);
|
vec3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951);
|
||||||
float3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952);
|
vec3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952);
|
||||||
float3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953);
|
vec3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953);
|
||||||
|
|
||||||
float msle76 = 0.0f;
|
float msle76 = 0.0f;
|
||||||
float msle95 = 0.0f;
|
float msle95 = 0.0f;
|
||||||
for (uint i = 0u; i < 16u; ++i) {
|
for (uint i = 0u; i < 16u; ++i) {
|
||||||
uint paletteID = Pattern(pattern, i);
|
uint paletteID = Pattern(pattern, i);
|
||||||
|
|
||||||
float3 tmp760Unq = paletteID == 0u ? endpoint760Unq : endpoint762Unq;
|
vec3 tmp760Unq = paletteID == 0u ? endpoint760Unq : endpoint762Unq;
|
||||||
float3 tmp761Unq = paletteID == 0u ? endpoint761Unq : endpoint763Unq;
|
vec3 tmp761Unq = paletteID == 0u ? endpoint761Unq : endpoint763Unq;
|
||||||
float3 tmp950Unq = paletteID == 0u ? endpoint950Unq : endpoint952Unq;
|
vec3 tmp950Unq = paletteID == 0u ? endpoint950Unq : endpoint952Unq;
|
||||||
float3 tmp951Unq = paletteID == 0u ? endpoint951Unq : endpoint953Unq;
|
vec3 tmp951Unq = paletteID == 0u ? endpoint951Unq : endpoint953Unq;
|
||||||
|
|
||||||
float weight = floor((indices[i] * 64.0f) / 7.0f + 0.5f);
|
float weight = floor((indices[i] * 64.0f) / 7.0f + 0.5f);
|
||||||
float3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight);
|
vec3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight);
|
||||||
float3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight);
|
vec3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight);
|
||||||
|
|
||||||
msle76 += CalcMSLE(texels[i], texelUnc76);
|
msle76 += CalcMSLE(texels[i], texelUnc76);
|
||||||
msle95 += CalcMSLE(texels[i], texelUnc95);
|
msle95 += CalcMSLE(texels[i], texelUnc95);
|
||||||
@@ -558,7 +557,7 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo
|
|||||||
float p2MSLE = min(msle76, msle95);
|
float p2MSLE = min(msle76, msle95);
|
||||||
if (p2MSLE < blockMSLE) {
|
if (p2MSLE < blockMSLE) {
|
||||||
blockMSLE = p2MSLE;
|
blockMSLE = p2MSLE;
|
||||||
block = uint4(0u, 0u, 0u, 0u);
|
block = uvec4(0u, 0u, 0u, 0u);
|
||||||
|
|
||||||
if (p2MSLE == msle76) {
|
if (p2MSLE == msle76) {
|
||||||
// 7.6
|
// 7.6
|
||||||
@@ -681,43 +680,43 @@ void main() {
|
|||||||
// 4 5 6 7
|
// 4 5 6 7
|
||||||
// 8 9 10 11
|
// 8 9 10 11
|
||||||
// 12 13 14 15
|
// 12 13 14 15
|
||||||
float2 uv = gl_GlobalInvocationID.xy * params.p_textureSizeRcp * 4.0f + params.p_textureSizeRcp;
|
vec2 uv = gl_GlobalInvocationID.xy * params.p_textureSizeRcp * 4.0f + params.p_textureSizeRcp;
|
||||||
float2 block0UV = uv;
|
vec2 block0UV = uv;
|
||||||
float2 block1UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 0.0f);
|
vec2 block1UV = uv + vec2(2.0f * params.p_textureSizeRcp.x, 0.0f);
|
||||||
float2 block2UV = uv + float2(0.0f, 2.0f * params.p_textureSizeRcp.y);
|
vec2 block2UV = uv + vec2(0.0f, 2.0f * params.p_textureSizeRcp.y);
|
||||||
float2 block3UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 2.0f * params.p_textureSizeRcp.y);
|
vec2 block3UV = uv + vec2(2.0f * params.p_textureSizeRcp.x, 2.0f * params.p_textureSizeRcp.y);
|
||||||
float4 block0X = OGRE_GatherRed(srcTexture, pointSampler, block0UV);
|
vec4 block0X = textureGather(srcTexture, block0UV, 0);
|
||||||
float4 block1X = OGRE_GatherRed(srcTexture, pointSampler, block1UV);
|
vec4 block1X = textureGather(srcTexture, block1UV, 0);
|
||||||
float4 block2X = OGRE_GatherRed(srcTexture, pointSampler, block2UV);
|
vec4 block2X = textureGather(srcTexture, block2UV, 0);
|
||||||
float4 block3X = OGRE_GatherRed(srcTexture, pointSampler, block3UV);
|
vec4 block3X = textureGather(srcTexture, block3UV, 0);
|
||||||
float4 block0Y = OGRE_GatherGreen(srcTexture, pointSampler, block0UV);
|
vec4 block0Y = textureGather(srcTexture, block0UV, 1);
|
||||||
float4 block1Y = OGRE_GatherGreen(srcTexture, pointSampler, block1UV);
|
vec4 block1Y = textureGather(srcTexture, block1UV, 1);
|
||||||
float4 block2Y = OGRE_GatherGreen(srcTexture, pointSampler, block2UV);
|
vec4 block2Y = textureGather(srcTexture, block2UV, 1);
|
||||||
float4 block3Y = OGRE_GatherGreen(srcTexture, pointSampler, block3UV);
|
vec4 block3Y = textureGather(srcTexture, block3UV, 1);
|
||||||
float4 block0Z = OGRE_GatherBlue(srcTexture, pointSampler, block0UV);
|
vec4 block0Z = textureGather(srcTexture, block0UV, 2);
|
||||||
float4 block1Z = OGRE_GatherBlue(srcTexture, pointSampler, block1UV);
|
vec4 block1Z = textureGather(srcTexture, block1UV, 2);
|
||||||
float4 block2Z = OGRE_GatherBlue(srcTexture, pointSampler, block2UV);
|
vec4 block2Z = textureGather(srcTexture, block2UV, 2);
|
||||||
float4 block3Z = OGRE_GatherBlue(srcTexture, pointSampler, block3UV);
|
vec4 block3Z = textureGather(srcTexture, block3UV, 2);
|
||||||
|
|
||||||
float3 texels[16];
|
vec3 texels[16];
|
||||||
texels[0] = float3(block0X.w, block0Y.w, block0Z.w);
|
texels[0] = vec3(block0X.w, block0Y.w, block0Z.w);
|
||||||
texels[1] = float3(block0X.z, block0Y.z, block0Z.z);
|
texels[1] = vec3(block0X.z, block0Y.z, block0Z.z);
|
||||||
texels[2] = float3(block1X.w, block1Y.w, block1Z.w);
|
texels[2] = vec3(block1X.w, block1Y.w, block1Z.w);
|
||||||
texels[3] = float3(block1X.z, block1Y.z, block1Z.z);
|
texels[3] = vec3(block1X.z, block1Y.z, block1Z.z);
|
||||||
texels[4] = float3(block0X.x, block0Y.x, block0Z.x);
|
texels[4] = vec3(block0X.x, block0Y.x, block0Z.x);
|
||||||
texels[5] = float3(block0X.y, block0Y.y, block0Z.y);
|
texels[5] = vec3(block0X.y, block0Y.y, block0Z.y);
|
||||||
texels[6] = float3(block1X.x, block1Y.x, block1Z.x);
|
texels[6] = vec3(block1X.x, block1Y.x, block1Z.x);
|
||||||
texels[7] = float3(block1X.y, block1Y.y, block1Z.y);
|
texels[7] = vec3(block1X.y, block1Y.y, block1Z.y);
|
||||||
texels[8] = float3(block2X.w, block2Y.w, block2Z.w);
|
texels[8] = vec3(block2X.w, block2Y.w, block2Z.w);
|
||||||
texels[9] = float3(block2X.z, block2Y.z, block2Z.z);
|
texels[9] = vec3(block2X.z, block2Y.z, block2Z.z);
|
||||||
texels[10] = float3(block3X.w, block3Y.w, block3Z.w);
|
texels[10] = vec3(block3X.w, block3Y.w, block3Z.w);
|
||||||
texels[11] = float3(block3X.z, block3Y.z, block3Z.z);
|
texels[11] = vec3(block3X.z, block3Y.z, block3Z.z);
|
||||||
texels[12] = float3(block2X.x, block2Y.x, block2Z.x);
|
texels[12] = vec3(block2X.x, block2Y.x, block2Z.x);
|
||||||
texels[13] = float3(block2X.y, block2Y.y, block2Z.y);
|
texels[13] = vec3(block2X.y, block2Y.y, block2Z.y);
|
||||||
texels[14] = float3(block3X.x, block3Y.x, block3Z.x);
|
texels[14] = vec3(block3X.x, block3Y.x, block3Z.x);
|
||||||
texels[15] = float3(block3X.y, block3Y.y, block3Z.y);
|
texels[15] = vec3(block3X.y, block3Y.y, block3Z.y);
|
||||||
|
|
||||||
uint4 block = uint4(0u, 0u, 0u, 0u);
|
uvec4 block = uvec4(0u, 0u, 0u, 0u);
|
||||||
float blockMSLE = 0.0f;
|
float blockMSLE = 0.0f;
|
||||||
|
|
||||||
EncodeP1(block, blockMSLE, texels);
|
EncodeP1(block, blockMSLE, texels);
|
||||||
@@ -738,5 +737,5 @@ void main() {
|
|||||||
EncodeP2Pattern(block, blockMSLE, bestPattern, texels);
|
EncodeP2Pattern(block, blockMSLE, bestPattern, texels);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
imageStore(dstTexture, int2(gl_GlobalInvocationID.xy), block);
|
imageStore(dstTexture, ivec2(gl_GlobalInvocationID.xy), block);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user