You've already forked godot
mirror of
https://github.com/godotengine/godot.git
synced 2025-11-04 12:00:25 +00:00
This is necessary because we will always deadlock if a thread takes on multiple pump tasks since pump tasks never return. This means when using separate threads for certain systems (like physics or rendering), we need to be sure that there are enough threads to have at least one per system (to ensure forward progress).
776 lines
25 KiB
C++
776 lines
25 KiB
C++
/**************************************************************************/
|
|
/* image_compress_betsy.cpp */
|
|
/**************************************************************************/
|
|
/* This file is part of: */
|
|
/* GODOT ENGINE */
|
|
/* https://godotengine.org */
|
|
/**************************************************************************/
|
|
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
|
|
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
|
|
/* */
|
|
/* Permission is hereby granted, free of charge, to any person obtaining */
|
|
/* a copy of this software and associated documentation files (the */
|
|
/* "Software"), to deal in the Software without restriction, including */
|
|
/* without limitation the rights to use, copy, modify, merge, publish, */
|
|
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
|
/* permit persons to whom the Software is furnished to do so, subject to */
|
|
/* the following conditions: */
|
|
/* */
|
|
/* The above copyright notice and this permission notice shall be */
|
|
/* included in all copies or substantial portions of the Software. */
|
|
/* */
|
|
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
|
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
|
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
|
|
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
|
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
|
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
|
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|
/**************************************************************************/
|
|
|
|
#include "image_compress_betsy.h"
|
|
|
|
#include "core/config/project_settings.h"
|
|
|
|
#include "betsy_bc1.h"
|
|
|
|
#include "alpha_stitch.glsl.gen.h"
|
|
#include "bc1.glsl.gen.h"
|
|
#include "bc4.glsl.gen.h"
|
|
#include "bc6h.glsl.gen.h"
|
|
#include "servers/display_server.h"
|
|
|
|
static Mutex betsy_mutex;
|
|
static BetsyCompressor *betsy = nullptr;
|
|
|
|
static const BetsyShaderType FORMAT_TO_TYPE[BETSY_FORMAT_MAX] = {
|
|
BETSY_SHADER_BC1_STANDARD,
|
|
BETSY_SHADER_BC1_DITHER,
|
|
BETSY_SHADER_BC1_STANDARD,
|
|
BETSY_SHADER_BC4_SIGNED,
|
|
BETSY_SHADER_BC4_UNSIGNED,
|
|
BETSY_SHADER_BC4_SIGNED,
|
|
BETSY_SHADER_BC4_UNSIGNED,
|
|
BETSY_SHADER_BC6_SIGNED,
|
|
BETSY_SHADER_BC6_UNSIGNED,
|
|
};
|
|
|
|
static const RD::DataFormat BETSY_TO_RD_FORMAT[BETSY_FORMAT_MAX] = {
|
|
RD::DATA_FORMAT_R32G32_UINT,
|
|
RD::DATA_FORMAT_R32G32_UINT,
|
|
RD::DATA_FORMAT_R32G32_UINT,
|
|
RD::DATA_FORMAT_R32G32_UINT,
|
|
RD::DATA_FORMAT_R32G32_UINT,
|
|
RD::DATA_FORMAT_R32G32_UINT,
|
|
RD::DATA_FORMAT_R32G32_UINT,
|
|
RD::DATA_FORMAT_R32G32B32A32_UINT,
|
|
RD::DATA_FORMAT_R32G32B32A32_UINT,
|
|
};
|
|
|
|
static const Image::Format BETSY_TO_IMAGE_FORMAT[BETSY_FORMAT_MAX] = {
|
|
Image::FORMAT_DXT1,
|
|
Image::FORMAT_DXT1,
|
|
Image::FORMAT_DXT5,
|
|
Image::FORMAT_RGTC_R,
|
|
Image::FORMAT_RGTC_R,
|
|
Image::FORMAT_RGTC_RG,
|
|
Image::FORMAT_RGTC_RG,
|
|
Image::FORMAT_BPTC_RGBF,
|
|
Image::FORMAT_BPTC_RGBFU,
|
|
};
|
|
|
|
void BetsyCompressor::_init() {
|
|
if (!DisplayServer::can_create_rendering_device()) {
|
|
return;
|
|
}
|
|
|
|
// Create local RD.
|
|
RenderingContextDriver *rcd = nullptr;
|
|
RenderingDevice *rd = RenderingServer::get_singleton()->create_local_rendering_device();
|
|
|
|
if (rd == nullptr) {
|
|
#if defined(RD_ENABLED)
|
|
#if defined(METAL_ENABLED)
|
|
rcd = memnew(RenderingContextDriverMetal);
|
|
rd = memnew(RenderingDevice);
|
|
#endif
|
|
#if defined(VULKAN_ENABLED)
|
|
if (rcd == nullptr) {
|
|
rcd = memnew(RenderingContextDriverVulkan);
|
|
rd = memnew(RenderingDevice);
|
|
}
|
|
#endif
|
|
#endif
|
|
if (rcd != nullptr && rd != nullptr) {
|
|
Error err = rcd->initialize();
|
|
if (err == OK) {
|
|
err = rd->initialize(rcd);
|
|
}
|
|
|
|
if (err != OK) {
|
|
memdelete(rd);
|
|
memdelete(rcd);
|
|
rd = nullptr;
|
|
rcd = nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
ERR_FAIL_NULL_MSG(rd, "Unable to create a local RenderingDevice.");
|
|
|
|
compress_rd = rd;
|
|
compress_rcd = rcd;
|
|
|
|
// Create the sampler state.
|
|
RD::SamplerState src_sampler_state;
|
|
{
|
|
src_sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
|
|
src_sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
|
|
src_sampler_state.mag_filter = RD::SAMPLER_FILTER_NEAREST;
|
|
src_sampler_state.min_filter = RD::SAMPLER_FILTER_NEAREST;
|
|
src_sampler_state.mip_filter = RD::SAMPLER_FILTER_NEAREST;
|
|
}
|
|
|
|
src_sampler = compress_rd->sampler_create(src_sampler_state);
|
|
|
|
// Initialize RDShaderFiles.
|
|
{
|
|
Ref<RDShaderFile> bc1_shader;
|
|
bc1_shader.instantiate();
|
|
Error err = bc1_shader->parse_versions_from_text(bc1_shader_glsl);
|
|
|
|
if (err != OK) {
|
|
bc1_shader->print_errors("Betsy BC1 compress shader");
|
|
}
|
|
|
|
// Standard BC1 compression.
|
|
cached_shaders[BETSY_SHADER_BC1_STANDARD].compiled = compress_rd->shader_create_from_spirv(bc1_shader->get_spirv_stages("standard"));
|
|
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC1_STANDARD].compiled.is_null());
|
|
|
|
cached_shaders[BETSY_SHADER_BC1_STANDARD].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC1_STANDARD].compiled);
|
|
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC1_STANDARD].pipeline.is_null());
|
|
|
|
// Dither BC1 variant. Unused, so comment out for now.
|
|
//cached_shaders[BETSY_SHADER_BC1_DITHER].compiled = compress_rd->shader_create_from_spirv(bc1_shader->get_spirv_stages("dithered"));
|
|
//ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC1_DITHER].compiled.is_null());
|
|
|
|
//cached_shaders[BETSY_SHADER_BC1_DITHER].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC1_DITHER].compiled);
|
|
//ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC1_DITHER].pipeline.is_null());
|
|
}
|
|
|
|
{
|
|
Ref<RDShaderFile> bc4_shader;
|
|
bc4_shader.instantiate();
|
|
Error err = bc4_shader->parse_versions_from_text(bc4_shader_glsl);
|
|
|
|
if (err != OK) {
|
|
bc4_shader->print_errors("Betsy BC4 compress shader");
|
|
}
|
|
|
|
// Signed BC4 compression. Unused, so comment out for now.
|
|
//cached_shaders[BETSY_SHADER_BC4_SIGNED].compiled = compress_rd->shader_create_from_spirv(bc4_shader->get_spirv_stages("signed"));
|
|
//ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC4_SIGNED].compiled.is_null());
|
|
|
|
//cached_shaders[BETSY_SHADER_BC4_SIGNED].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC4_SIGNED].compiled);
|
|
//ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC4_SIGNED].pipeline.is_null());
|
|
|
|
// Unsigned BC4 compression.
|
|
cached_shaders[BETSY_SHADER_BC4_UNSIGNED].compiled = compress_rd->shader_create_from_spirv(bc4_shader->get_spirv_stages("unsigned"));
|
|
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC4_UNSIGNED].compiled.is_null());
|
|
|
|
cached_shaders[BETSY_SHADER_BC4_UNSIGNED].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC4_UNSIGNED].compiled);
|
|
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC4_UNSIGNED].pipeline.is_null());
|
|
}
|
|
|
|
{
|
|
Ref<RDShaderFile> bc6h_shader;
|
|
bc6h_shader.instantiate();
|
|
Error err = bc6h_shader->parse_versions_from_text(bc6h_shader_glsl);
|
|
|
|
if (err != OK) {
|
|
bc6h_shader->print_errors("Betsy BC6 compress shader");
|
|
}
|
|
|
|
// Signed BC6 compression.
|
|
cached_shaders[BETSY_SHADER_BC6_SIGNED].compiled = compress_rd->shader_create_from_spirv(bc6h_shader->get_spirv_stages("signed"));
|
|
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC6_SIGNED].compiled.is_null());
|
|
|
|
cached_shaders[BETSY_SHADER_BC6_SIGNED].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC6_SIGNED].compiled);
|
|
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC6_SIGNED].pipeline.is_null());
|
|
|
|
// Unsigned BC6 compression.
|
|
cached_shaders[BETSY_SHADER_BC6_UNSIGNED].compiled = compress_rd->shader_create_from_spirv(bc6h_shader->get_spirv_stages("unsigned"));
|
|
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC6_UNSIGNED].compiled.is_null());
|
|
|
|
cached_shaders[BETSY_SHADER_BC6_UNSIGNED].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC6_UNSIGNED].compiled);
|
|
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC6_UNSIGNED].pipeline.is_null());
|
|
}
|
|
|
|
{
|
|
Ref<RDShaderFile> alpha_stitch_shader;
|
|
alpha_stitch_shader.instantiate();
|
|
Error err = alpha_stitch_shader->parse_versions_from_text(alpha_stitch_shader_glsl);
|
|
|
|
if (err != OK) {
|
|
alpha_stitch_shader->print_errors("Betsy alpha stitch shader");
|
|
}
|
|
cached_shaders[BETSY_SHADER_ALPHA_STITCH].compiled = compress_rd->shader_create_from_spirv(alpha_stitch_shader->get_spirv_stages());
|
|
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_ALPHA_STITCH].compiled.is_null());
|
|
|
|
cached_shaders[BETSY_SHADER_ALPHA_STITCH].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_ALPHA_STITCH].compiled);
|
|
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_ALPHA_STITCH].pipeline.is_null());
|
|
}
|
|
}
|
|
|
|
void BetsyCompressor::init() {
|
|
WorkerThreadPool::TaskID tid = WorkerThreadPool::get_singleton()->add_task(callable_mp(this, &BetsyCompressor::_thread_loop), true, "Betsy pump task", true);
|
|
command_queue.set_pump_task_id(tid);
|
|
command_queue.push(this, &BetsyCompressor::_assign_mt_ids, tid);
|
|
command_queue.push_and_sync(this, &BetsyCompressor::_init);
|
|
DEV_ASSERT(task_id == tid);
|
|
}
|
|
|
|
void BetsyCompressor::_assign_mt_ids(WorkerThreadPool::TaskID p_pump_task_id) {
|
|
task_id = p_pump_task_id;
|
|
}
|
|
|
|
// Yield thread to WTP so other tasks can be done on it.
|
|
// Automatically regains control as soon a task is pushed to the command queue.
|
|
void BetsyCompressor::_thread_loop() {
|
|
while (!exit) {
|
|
WorkerThreadPool::get_singleton()->yield();
|
|
command_queue.flush_all();
|
|
}
|
|
}
|
|
|
|
void BetsyCompressor::_thread_exit() {
|
|
exit = true;
|
|
|
|
if (compress_rd != nullptr) {
|
|
if (dxt1_encoding_table_buffer.is_valid()) {
|
|
compress_rd->free(dxt1_encoding_table_buffer);
|
|
}
|
|
|
|
compress_rd->free(src_sampler);
|
|
|
|
// Clear the shader cache, pipelines will be unreferenced automatically.
|
|
for (int i = 0; i < BETSY_SHADER_MAX; i++) {
|
|
if (cached_shaders[i].compiled.is_valid()) {
|
|
compress_rd->free(cached_shaders[i].compiled);
|
|
}
|
|
}
|
|
|
|
// Free the RD (and RCD if necessary).
|
|
memdelete(compress_rd);
|
|
compress_rd = nullptr;
|
|
if (compress_rcd != nullptr) {
|
|
memdelete(compress_rcd);
|
|
compress_rcd = nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
void BetsyCompressor::finish() {
|
|
command_queue.push(this, &BetsyCompressor::_thread_exit);
|
|
if (task_id != WorkerThreadPool::INVALID_TASK_ID) {
|
|
WorkerThreadPool::get_singleton()->wait_for_task_completion(task_id);
|
|
task_id = WorkerThreadPool::INVALID_TASK_ID;
|
|
}
|
|
}
|
|
|
|
// Helper functions.
|
|
|
|
static int get_next_multiple(int n, int m) {
|
|
return n + (m - (n % m));
|
|
}
|
|
|
|
static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
|
|
switch (r_img->get_format()) {
|
|
case Image::FORMAT_L8:
|
|
r_img->convert(Image::FORMAT_RGBA8);
|
|
r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
|
|
break;
|
|
|
|
case Image::FORMAT_LA8:
|
|
r_img->convert(Image::FORMAT_RGBA8);
|
|
r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
|
|
break;
|
|
|
|
case Image::FORMAT_R8:
|
|
r_format = RD::DATA_FORMAT_R8_UNORM;
|
|
break;
|
|
|
|
case Image::FORMAT_RG8:
|
|
r_format = RD::DATA_FORMAT_R8G8_UNORM;
|
|
break;
|
|
|
|
case Image::FORMAT_RGB8:
|
|
r_img->convert(Image::FORMAT_RGBA8);
|
|
r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
|
|
break;
|
|
|
|
case Image::FORMAT_RGBA8:
|
|
r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
|
|
break;
|
|
|
|
case Image::FORMAT_RH:
|
|
r_format = RD::DATA_FORMAT_R16_SFLOAT;
|
|
break;
|
|
|
|
case Image::FORMAT_RGH:
|
|
r_format = RD::DATA_FORMAT_R16G16_SFLOAT;
|
|
break;
|
|
|
|
case Image::FORMAT_RGBH:
|
|
r_img->convert(Image::FORMAT_RGBAH);
|
|
r_format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
|
|
break;
|
|
|
|
case Image::FORMAT_RGBAH:
|
|
r_format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
|
|
break;
|
|
|
|
case Image::FORMAT_RF:
|
|
r_format = RD::DATA_FORMAT_R32_SFLOAT;
|
|
break;
|
|
|
|
case Image::FORMAT_RGF:
|
|
r_format = RD::DATA_FORMAT_R32G32_SFLOAT;
|
|
break;
|
|
|
|
case Image::FORMAT_RGBF:
|
|
r_img->convert(Image::FORMAT_RGBAF);
|
|
r_format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
|
|
break;
|
|
|
|
case Image::FORMAT_RGBAF:
|
|
r_format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
|
|
break;
|
|
|
|
case Image::FORMAT_RGBE9995:
|
|
r_format = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32;
|
|
break;
|
|
|
|
default: {
|
|
return ERR_UNAVAILABLE;
|
|
}
|
|
}
|
|
|
|
return OK;
|
|
}
|
|
|
|
Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) {
|
|
uint64_t start_time = OS::get_singleton()->get_ticks_msec();
|
|
|
|
// Return an error so that the compression can fall back to cpu compression
|
|
if (compress_rd == nullptr) {
|
|
return ERR_CANT_CREATE;
|
|
}
|
|
|
|
if (r_img->is_compressed()) {
|
|
return ERR_INVALID_DATA;
|
|
}
|
|
|
|
int img_width = r_img->get_width();
|
|
int img_height = r_img->get_height();
|
|
if (img_width % 4 != 0 || img_height % 4 != 0) {
|
|
img_width = img_width <= 2 ? img_width : (img_width + 3) & ~3;
|
|
img_height = img_height <= 2 ? img_height : (img_height + 3) & ~3;
|
|
}
|
|
|
|
Error err = OK;
|
|
|
|
// Destination format.
|
|
Image::Format dest_format = BETSY_TO_IMAGE_FORMAT[p_format];
|
|
RD::DataFormat dst_rd_format = BETSY_TO_RD_FORMAT[p_format];
|
|
|
|
BetsyShaderType shader_type = FORMAT_TO_TYPE[p_format];
|
|
BetsyShader shader = cached_shaders[shader_type];
|
|
BetsyShader secondary_shader; // The secondary shader is used for alpha blocks. For BC it's BC4U and for ETC it's ETC2_RU (8-bit variant).
|
|
BetsyShader stitch_shader;
|
|
bool needs_alpha_block = false;
|
|
|
|
switch (p_format) {
|
|
case BETSY_FORMAT_BC3:
|
|
case BETSY_FORMAT_BC5_UNSIGNED:
|
|
needs_alpha_block = true;
|
|
secondary_shader = cached_shaders[BETSY_SHADER_BC4_UNSIGNED];
|
|
stitch_shader = cached_shaders[BETSY_SHADER_ALPHA_STITCH];
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
// src_texture format information.
|
|
RD::TextureFormat src_texture_format;
|
|
{
|
|
src_texture_format.array_layers = 1;
|
|
src_texture_format.depth = 1;
|
|
src_texture_format.mipmaps = 1;
|
|
src_texture_format.texture_type = RD::TEXTURE_TYPE_2D;
|
|
src_texture_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
|
|
}
|
|
|
|
err = get_src_texture_format(r_img, src_texture_format.format);
|
|
|
|
if (err != OK) {
|
|
return err;
|
|
}
|
|
|
|
// For the destination format just copy the source format and change the usage bits.
|
|
RD::TextureFormat dst_texture_format = src_texture_format;
|
|
dst_texture_format.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;
|
|
dst_texture_format.format = dst_rd_format;
|
|
|
|
RD::TextureFormat dst_texture_format_alpha;
|
|
RD::TextureFormat dst_texture_format_combined;
|
|
|
|
if (needs_alpha_block) {
|
|
dst_texture_format_combined = dst_texture_format;
|
|
dst_texture_format_combined.format = RD::DATA_FORMAT_R32G32B32A32_UINT;
|
|
|
|
dst_texture_format.usage_bits |= RD::TEXTURE_USAGE_SAMPLING_BIT;
|
|
|
|
dst_texture_format_alpha = dst_texture_format;
|
|
dst_texture_format_alpha.format = RD::DATA_FORMAT_R32G32_UINT;
|
|
}
|
|
|
|
// Encoding table setup.
|
|
if ((dest_format == Image::FORMAT_DXT1 || dest_format == Image::FORMAT_DXT5) && dxt1_encoding_table_buffer.is_null()) {
|
|
dxt1_encoding_table_buffer = compress_rd->storage_buffer_create(1024 * 4, Span(dxt1_encoding_table).reinterpret<uint8_t>());
|
|
}
|
|
|
|
const int mip_count = r_img->get_mipmap_count() + 1;
|
|
|
|
// Container for the compressed data.
|
|
Vector<uint8_t> dst_data;
|
|
dst_data.resize(Image::get_image_data_size(img_width, img_height, dest_format, r_img->has_mipmaps()));
|
|
uint8_t *dst_data_ptr = dst_data.ptrw();
|
|
|
|
Vector<Vector<uint8_t>> src_images;
|
|
src_images.push_back(Vector<uint8_t>());
|
|
Vector<uint8_t> *src_image_ptr = src_images.ptrw();
|
|
|
|
// Compress each mipmap.
|
|
for (int i = 0; i < mip_count; i++) {
|
|
int width, height;
|
|
Image::get_image_mipmap_offset_and_dimensions(img_width, img_height, dest_format, i, width, height);
|
|
|
|
int64_t src_mip_ofs, src_mip_size;
|
|
int src_mip_w, src_mip_h;
|
|
r_img->get_mipmap_offset_size_and_dimensions(i, src_mip_ofs, src_mip_size, src_mip_w, src_mip_h);
|
|
|
|
// Set the source texture width and size.
|
|
src_texture_format.height = height;
|
|
src_texture_format.width = width;
|
|
|
|
// Set the destination texture width and size.
|
|
dst_texture_format.height = (height + 3) >> 2;
|
|
dst_texture_format.width = (width + 3) >> 2;
|
|
|
|
// Pad textures to nearest block by smearing.
|
|
if (width != src_mip_w || height != src_mip_h) {
|
|
const uint8_t *src_mip_read = r_img->ptr() + src_mip_ofs;
|
|
|
|
// Reserve the buffer for padded image data.
|
|
int px_size = Image::get_format_pixel_size(r_img->get_format());
|
|
src_image_ptr[0].resize(width * height * px_size);
|
|
uint8_t *ptrw = src_image_ptr[0].ptrw();
|
|
|
|
int x = 0, y = 0;
|
|
for (y = 0; y < src_mip_h; y++) {
|
|
for (x = 0; x < src_mip_w; x++) {
|
|
memcpy(ptrw + (width * y + x) * px_size, src_mip_read + (src_mip_w * y + x) * px_size, px_size);
|
|
}
|
|
|
|
// First, smear in x.
|
|
for (; x < width; x++) {
|
|
memcpy(ptrw + (width * y + x) * px_size, ptrw + (width * y + x - 1) * px_size, px_size);
|
|
}
|
|
}
|
|
|
|
// Then, smear in y.
|
|
for (; y < height; y++) {
|
|
for (x = 0; x < width; x++) {
|
|
memcpy(ptrw + (width * y + x) * px_size, ptrw + (width * y + x - width) * px_size, px_size);
|
|
}
|
|
}
|
|
} else {
|
|
// Create a buffer filled with the source mip layer data.
|
|
src_image_ptr[0].resize(src_mip_size);
|
|
memcpy(src_image_ptr[0].ptrw(), r_img->ptr() + src_mip_ofs, src_mip_size);
|
|
}
|
|
|
|
// Create the textures on the GPU.
|
|
RID src_texture = compress_rd->texture_create(src_texture_format, RD::TextureView(), src_images);
|
|
RID dst_texture_primary = compress_rd->texture_create(dst_texture_format, RD::TextureView());
|
|
|
|
{
|
|
Vector<RD::Uniform> uniforms;
|
|
{
|
|
{
|
|
RD::Uniform u;
|
|
u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
|
|
u.binding = 0;
|
|
u.append_id(src_sampler);
|
|
u.append_id(src_texture);
|
|
uniforms.push_back(u);
|
|
}
|
|
{
|
|
RD::Uniform u;
|
|
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
|
|
u.binding = 1;
|
|
u.append_id(dst_texture_primary);
|
|
uniforms.push_back(u);
|
|
}
|
|
|
|
if (dest_format == Image::FORMAT_DXT1 || dest_format == Image::FORMAT_DXT5) {
|
|
RD::Uniform u;
|
|
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
|
|
u.binding = 2;
|
|
u.append_id(dxt1_encoding_table_buffer);
|
|
uniforms.push_back(u);
|
|
}
|
|
}
|
|
|
|
RID uniform_set = compress_rd->uniform_set_create(uniforms, shader.compiled, 0);
|
|
RD::ComputeListID compute_list = compress_rd->compute_list_begin();
|
|
|
|
compress_rd->compute_list_bind_compute_pipeline(compute_list, shader.pipeline);
|
|
compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
|
|
|
|
switch (shader_type) {
|
|
case BETSY_SHADER_BC6_SIGNED:
|
|
case BETSY_SHADER_BC6_UNSIGNED: {
|
|
BC6PushConstant push_constant;
|
|
push_constant.sizeX = 1.0f / width;
|
|
push_constant.sizeY = 1.0f / height;
|
|
|
|
compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC6PushConstant));
|
|
compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);
|
|
} break;
|
|
|
|
case BETSY_SHADER_BC1_STANDARD: {
|
|
BC1PushConstant push_constant;
|
|
push_constant.num_refines = 2;
|
|
|
|
compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC1PushConstant));
|
|
compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);
|
|
} break;
|
|
|
|
case BETSY_SHADER_BC4_UNSIGNED: {
|
|
BC4PushConstant push_constant;
|
|
push_constant.channel_idx = 0;
|
|
|
|
compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC4PushConstant));
|
|
compress_rd->compute_list_dispatch(compute_list, 1, get_next_multiple(width, 16) / 16, get_next_multiple(height, 16) / 16);
|
|
} break;
|
|
|
|
default: {
|
|
} break;
|
|
}
|
|
|
|
compress_rd->compute_list_end();
|
|
|
|
if (!needs_alpha_block) {
|
|
compress_rd->submit();
|
|
compress_rd->sync();
|
|
}
|
|
}
|
|
|
|
RID dst_texture_rid = dst_texture_primary;
|
|
|
|
if (needs_alpha_block) {
|
|
// Set the destination texture width and size.
|
|
dst_texture_format_alpha.height = (height + 3) >> 2;
|
|
dst_texture_format_alpha.width = (width + 3) >> 2;
|
|
|
|
RID dst_texture_alpha = compress_rd->texture_create(dst_texture_format_alpha, RD::TextureView());
|
|
|
|
{
|
|
Vector<RD::Uniform> uniforms;
|
|
{
|
|
{
|
|
RD::Uniform u;
|
|
u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
|
|
u.binding = 0;
|
|
u.append_id(src_sampler);
|
|
u.append_id(src_texture);
|
|
uniforms.push_back(u);
|
|
}
|
|
{
|
|
RD::Uniform u;
|
|
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
|
|
u.binding = 1;
|
|
u.append_id(dst_texture_alpha);
|
|
uniforms.push_back(u);
|
|
}
|
|
}
|
|
|
|
RID uniform_set = compress_rd->uniform_set_create(uniforms, secondary_shader.compiled, 0);
|
|
RD::ComputeListID compute_list = compress_rd->compute_list_begin();
|
|
|
|
compress_rd->compute_list_bind_compute_pipeline(compute_list, secondary_shader.pipeline);
|
|
compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
|
|
|
|
BC4PushConstant push_constant;
|
|
push_constant.channel_idx = dest_format == Image::FORMAT_DXT5 ? 3 : 1;
|
|
|
|
compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC4PushConstant));
|
|
compress_rd->compute_list_dispatch(compute_list, 1, get_next_multiple(width, 16) / 16, get_next_multiple(height, 16) / 16);
|
|
|
|
compress_rd->compute_list_end();
|
|
}
|
|
|
|
// Stitching
|
|
|
|
// Set the destination texture width and size.
|
|
dst_texture_format_combined.height = (height + 3) >> 2;
|
|
dst_texture_format_combined.width = (width + 3) >> 2;
|
|
|
|
RID dst_texture_combined = compress_rd->texture_create(dst_texture_format_combined, RD::TextureView());
|
|
|
|
{
|
|
Vector<RD::Uniform> uniforms;
|
|
{
|
|
{
|
|
RD::Uniform u;
|
|
u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
|
|
u.binding = 0;
|
|
u.append_id(src_sampler);
|
|
u.append_id(dest_format == Image::FORMAT_DXT5 ? dst_texture_alpha : dst_texture_primary);
|
|
uniforms.push_back(u);
|
|
}
|
|
{
|
|
RD::Uniform u;
|
|
u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
|
|
u.binding = 1;
|
|
u.append_id(src_sampler);
|
|
u.append_id(dest_format == Image::FORMAT_DXT5 ? dst_texture_primary : dst_texture_alpha);
|
|
uniforms.push_back(u);
|
|
}
|
|
{
|
|
RD::Uniform u;
|
|
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
|
|
u.binding = 2;
|
|
u.append_id(dst_texture_combined);
|
|
uniforms.push_back(u);
|
|
}
|
|
}
|
|
|
|
RID uniform_set = compress_rd->uniform_set_create(uniforms, stitch_shader.compiled, 0);
|
|
RD::ComputeListID compute_list = compress_rd->compute_list_begin();
|
|
|
|
compress_rd->compute_list_bind_compute_pipeline(compute_list, stitch_shader.pipeline);
|
|
compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
|
|
compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);
|
|
|
|
compress_rd->compute_list_end();
|
|
|
|
compress_rd->submit();
|
|
compress_rd->sync();
|
|
}
|
|
|
|
dst_texture_rid = dst_texture_combined;
|
|
|
|
compress_rd->free(dst_texture_primary);
|
|
compress_rd->free(dst_texture_alpha);
|
|
}
|
|
|
|
// Copy data from the GPU to the buffer.
|
|
const Vector<uint8_t> texture_data = compress_rd->texture_get_data(dst_texture_rid, 0);
|
|
int64_t dst_ofs = Image::get_image_mipmap_offset(img_width, img_height, dest_format, i);
|
|
|
|
memcpy(dst_data_ptr + dst_ofs, texture_data.ptr(), texture_data.size());
|
|
|
|
// Free the source and dest texture.
|
|
compress_rd->free(src_texture);
|
|
compress_rd->free(dst_texture_rid);
|
|
}
|
|
|
|
src_images.clear();
|
|
|
|
// Set the compressed data to the image.
|
|
r_img->set_data(img_width, img_height, r_img->has_mipmaps(), dest_format, dst_data);
|
|
|
|
print_verbose(
|
|
vformat("Betsy: Encoding a %dx%d image with %d mipmaps as %s took %d ms.",
|
|
img_width,
|
|
img_height,
|
|
r_img->get_mipmap_count(),
|
|
Image::get_format_name(dest_format),
|
|
OS::get_singleton()->get_ticks_msec() - start_time));
|
|
|
|
return OK;
|
|
}
|
|
|
|
void ensure_betsy_exists() {
|
|
betsy_mutex.lock();
|
|
if (betsy == nullptr) {
|
|
betsy = memnew(BetsyCompressor);
|
|
betsy->init();
|
|
}
|
|
betsy_mutex.unlock();
|
|
}
|
|
|
|
Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels) {
|
|
ensure_betsy_exists();
|
|
Image::Format format = r_img->get_format();
|
|
Error result = ERR_UNAVAILABLE;
|
|
|
|
if (format >= Image::FORMAT_RF && format <= Image::FORMAT_RGBE9995) {
|
|
if (r_img->detect_signed()) {
|
|
result = betsy->compress(BETSY_FORMAT_BC6_SIGNED, r_img);
|
|
} else {
|
|
result = betsy->compress(BETSY_FORMAT_BC6_UNSIGNED, r_img);
|
|
}
|
|
}
|
|
|
|
if (!GLOBAL_GET("rendering/textures/vram_compression/cache_gpu_compressor")) {
|
|
free_device();
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
Error _betsy_compress_s3tc(Image *r_img, Image::UsedChannels p_channels) {
|
|
ensure_betsy_exists();
|
|
Error result = ERR_UNAVAILABLE;
|
|
|
|
switch (p_channels) {
|
|
case Image::USED_CHANNELS_RGB:
|
|
case Image::USED_CHANNELS_L:
|
|
result = betsy->compress(BETSY_FORMAT_BC1, r_img);
|
|
break;
|
|
|
|
case Image::USED_CHANNELS_RGBA:
|
|
case Image::USED_CHANNELS_LA:
|
|
result = betsy->compress(BETSY_FORMAT_BC3, r_img);
|
|
break;
|
|
|
|
case Image::USED_CHANNELS_R:
|
|
result = betsy->compress(BETSY_FORMAT_BC4_UNSIGNED, r_img);
|
|
break;
|
|
|
|
case Image::USED_CHANNELS_RG:
|
|
result = betsy->compress(BETSY_FORMAT_BC5_UNSIGNED, r_img);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (!GLOBAL_GET("rendering/textures/vram_compression/cache_gpu_compressor")) {
|
|
free_device();
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
void free_device() {
|
|
if (betsy != nullptr) {
|
|
betsy->finish();
|
|
memdelete(betsy);
|
|
}
|
|
}
|