You've already forked godot
							
							
				mirror of
				https://github.com/godotengine/godot.git
				synced 2025-11-04 12:00:25 +00:00 
			
		
		
		
	Merge pull request #100365 from BlueCube3310/etc-decompress
Add support for decompressing ETC2
This commit is contained in:
		@@ -12,6 +12,7 @@ thirdparty_obj = []
 | 
			
		||||
 | 
			
		||||
thirdparty_dir = "#thirdparty/etcpak/"
 | 
			
		||||
thirdparty_sources = [
 | 
			
		||||
    "DecodeRGB.cpp",
 | 
			
		||||
    "Dither.cpp",
 | 
			
		||||
    "ProcessDxtc.cpp",
 | 
			
		||||
    "ProcessRGB.cpp",
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
def can_build(env, platform):
 | 
			
		||||
    return env.editor_build
 | 
			
		||||
    return True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def configure(env):
 | 
			
		||||
 
 | 
			
		||||
@@ -30,6 +30,8 @@
 | 
			
		||||
 | 
			
		||||
#include "image_compress_etcpak.h"
 | 
			
		||||
 | 
			
		||||
#ifdef TOOLS_ENABLED
 | 
			
		||||
 | 
			
		||||
#include "core/os/os.h"
 | 
			
		||||
#include "core/string/print_string.h"
 | 
			
		||||
 | 
			
		||||
@@ -303,3 +305,4 @@ void _compress_etcpak(EtcpakType p_compress_type, Image *r_img) {
 | 
			
		||||
 | 
			
		||||
	print_verbose(vformat("etcpak: Encoding took %d ms.", OS::get_singleton()->get_ticks_msec() - start_time));
 | 
			
		||||
}
 | 
			
		||||
#endif // TOOLS_ENABLED
 | 
			
		||||
 
 | 
			
		||||
@@ -31,6 +31,8 @@
 | 
			
		||||
#ifndef IMAGE_COMPRESS_ETCPAK_H
 | 
			
		||||
#define IMAGE_COMPRESS_ETCPAK_H
 | 
			
		||||
 | 
			
		||||
#ifdef TOOLS_ENABLED
 | 
			
		||||
 | 
			
		||||
#include "core/io/image.h"
 | 
			
		||||
 | 
			
		||||
enum class EtcpakType {
 | 
			
		||||
@@ -53,4 +55,6 @@ void _compress_bc(Image *r_img, Image::UsedChannels p_channels);
 | 
			
		||||
 | 
			
		||||
void _compress_etcpak(EtcpakType p_compress_type, Image *r_img);
 | 
			
		||||
 | 
			
		||||
#endif // TOOLS_ENABLED
 | 
			
		||||
 | 
			
		||||
#endif // IMAGE_COMPRESS_ETCPAK_H
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										197
									
								
								modules/etcpak/image_decompress_etcpak.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										197
									
								
								modules/etcpak/image_decompress_etcpak.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,197 @@
 | 
			
		||||
/**************************************************************************/
 | 
			
		||||
/*  image_decompress_etcpak.cpp                                           */
 | 
			
		||||
/**************************************************************************/
 | 
			
		||||
/*                         This file is part of:                          */
 | 
			
		||||
/*                             GODOT ENGINE                               */
 | 
			
		||||
/*                        https://godotengine.org                         */
 | 
			
		||||
/**************************************************************************/
 | 
			
		||||
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
 | 
			
		||||
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
 | 
			
		||||
/*                                                                        */
 | 
			
		||||
/* Permission is hereby granted, free of charge, to any person obtaining  */
 | 
			
		||||
/* a copy of this software and associated documentation files (the        */
 | 
			
		||||
/* "Software"), to deal in the Software without restriction, including    */
 | 
			
		||||
/* without limitation the rights to use, copy, modify, merge, publish,    */
 | 
			
		||||
/* distribute, sublicense, and/or sell copies of the Software, and to     */
 | 
			
		||||
/* permit persons to whom the Software is furnished to do so, subject to  */
 | 
			
		||||
/* the following conditions:                                              */
 | 
			
		||||
/*                                                                        */
 | 
			
		||||
/* The above copyright notice and this permission notice shall be         */
 | 
			
		||||
/* included in all copies or substantial portions of the Software.        */
 | 
			
		||||
/*                                                                        */
 | 
			
		||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
 | 
			
		||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
 | 
			
		||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
 | 
			
		||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
 | 
			
		||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
 | 
			
		||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
 | 
			
		||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
 | 
			
		||||
/**************************************************************************/
 | 
			
		||||
 | 
			
		||||
#include "image_decompress_etcpak.h"
 | 
			
		||||
 | 
			
		||||
#include "core/os/os.h"
 | 
			
		||||
#include "core/string/print_string.h"
 | 
			
		||||
 | 
			
		||||
#include <DecodeRGB.hpp>
 | 
			
		||||
 | 
			
		||||
#define ETCPAK_R_BLOCK_SIZE 8
 | 
			
		||||
#define ETCPAK_RG_BLOCK_SIZE 16
 | 
			
		||||
#define ETCPAK_RGB_BLOCK_SIZE 8
 | 
			
		||||
#define ETCPAK_RGBA_BLOCK_SIZE 16
 | 
			
		||||
 | 
			
		||||
static void decompress_image(EtcpakFormat format, const void *src, void *dst, const uint64_t width, const uint64_t height) {
 | 
			
		||||
	const uint8_t *src_blocks = reinterpret_cast<const uint8_t *>(src);
 | 
			
		||||
	uint8_t *dec_blocks = reinterpret_cast<uint8_t *>(dst);
 | 
			
		||||
 | 
			
		||||
#define DECOMPRESS_LOOP(m_func, m_block_size, m_color_bytesize)        \
 | 
			
		||||
	for (uint64_t y = 0; y < height; y += 4) {                         \
 | 
			
		||||
		for (uint64_t x = 0; x < width; x += 4) {                      \
 | 
			
		||||
			m_func(&src_blocks[src_pos], &dec_blocks[dst_pos], width); \
 | 
			
		||||
			src_pos += m_block_size;                                   \
 | 
			
		||||
			dst_pos += 4 * m_color_bytesize;                           \
 | 
			
		||||
		}                                                              \
 | 
			
		||||
		dst_pos += 3 * width * m_color_bytesize;                       \
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
#define DECOMPRESS_LOOP_SAFE(m_func, m_block_size, m_color_bytesize, m_output)                                                                                \
 | 
			
		||||
	for (uint64_t y = 0; y < height; y += 4) {                                                                                                                \
 | 
			
		||||
		for (uint64_t x = 0; x < width; x += 4) {                                                                                                             \
 | 
			
		||||
			const uint32_t yblock = MIN(height - y, 4ul);                                                                                                     \
 | 
			
		||||
			const uint32_t xblock = MIN(width - x, 4ul);                                                                                                      \
 | 
			
		||||
                                                                                                                                                              \
 | 
			
		||||
			const bool incomplete = yblock < 4 && xblock < 4;                                                                                                 \
 | 
			
		||||
			uint8_t *dec_out = incomplete ? m_output : &dec_blocks[y * 4 * width + x * m_color_bytesize];                                                     \
 | 
			
		||||
                                                                                                                                                              \
 | 
			
		||||
			m_func(&src_blocks[src_pos], dec_out, incomplete ? 4 : width);                                                                                    \
 | 
			
		||||
			src_pos += m_block_size;                                                                                                                          \
 | 
			
		||||
                                                                                                                                                              \
 | 
			
		||||
			if (incomplete) {                                                                                                                                 \
 | 
			
		||||
				for (uint32_t cy = 0; cy < yblock; cy++) {                                                                                                    \
 | 
			
		||||
					for (uint32_t cx = 0; cx < xblock; cx++) {                                                                                                \
 | 
			
		||||
						memcpy(&dec_blocks[(y + cy) * 4 * width + (x + cx) * m_color_bytesize], &m_output[cy * 4 + cx * m_color_bytesize], m_color_bytesize); \
 | 
			
		||||
					}                                                                                                                                         \
 | 
			
		||||
				}                                                                                                                                             \
 | 
			
		||||
			}                                                                                                                                                 \
 | 
			
		||||
		}                                                                                                                                                     \
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (width % 4 != 0 || height % 4 != 0) {
 | 
			
		||||
		uint64_t src_pos = 0;
 | 
			
		||||
 | 
			
		||||
		uint8_t rgba8_output[4 * 4 * 4];
 | 
			
		||||
 | 
			
		||||
		switch (format) {
 | 
			
		||||
			case Etcpak_R: {
 | 
			
		||||
				DECOMPRESS_LOOP_SAFE(DecodeRBlock, ETCPAK_R_BLOCK_SIZE, 4, rgba8_output)
 | 
			
		||||
			} break;
 | 
			
		||||
			case Etcpak_RG: {
 | 
			
		||||
				DECOMPRESS_LOOP_SAFE(DecodeRGBlock, ETCPAK_RG_BLOCK_SIZE, 4, rgba8_output)
 | 
			
		||||
			} break;
 | 
			
		||||
			case Etcpak_RGB: {
 | 
			
		||||
				DECOMPRESS_LOOP_SAFE(DecodeRGBBlock, ETCPAK_RGB_BLOCK_SIZE, 4, rgba8_output)
 | 
			
		||||
			} break;
 | 
			
		||||
			case Etcpak_RGBA: {
 | 
			
		||||
				DECOMPRESS_LOOP_SAFE(DecodeRGBABlock, ETCPAK_RGBA_BLOCK_SIZE, 4, rgba8_output)
 | 
			
		||||
			} break;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
	} else {
 | 
			
		||||
		uint64_t src_pos = 0, dst_pos = 0;
 | 
			
		||||
 | 
			
		||||
		switch (format) {
 | 
			
		||||
			case Etcpak_R: {
 | 
			
		||||
				DECOMPRESS_LOOP(DecodeRBlock, ETCPAK_R_BLOCK_SIZE, 4)
 | 
			
		||||
			} break;
 | 
			
		||||
			case Etcpak_RG: {
 | 
			
		||||
				DECOMPRESS_LOOP(DecodeRGBlock, ETCPAK_RG_BLOCK_SIZE, 4)
 | 
			
		||||
			} break;
 | 
			
		||||
			case Etcpak_RGB: {
 | 
			
		||||
				DECOMPRESS_LOOP(DecodeRGBBlock, ETCPAK_RGB_BLOCK_SIZE, 4)
 | 
			
		||||
			} break;
 | 
			
		||||
			case Etcpak_RGBA: {
 | 
			
		||||
				DECOMPRESS_LOOP(DecodeRGBABlock, ETCPAK_RGBA_BLOCK_SIZE, 4)
 | 
			
		||||
			} break;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
#undef DECOMPRESS_LOOP
 | 
			
		||||
#undef DECOMPRESS_LOOP_SAFE
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void _decompress_etc(Image *p_image) {
 | 
			
		||||
	uint64_t start_time = OS::get_singleton()->get_ticks_msec();
 | 
			
		||||
 | 
			
		||||
	int width = p_image->get_width();
 | 
			
		||||
	int height = p_image->get_height();
 | 
			
		||||
 | 
			
		||||
	// Compressed images' dimensions should be padded to the upper multiple of 4.
 | 
			
		||||
	// If they aren't, they need to be realigned (the actual data is correctly padded though).
 | 
			
		||||
	if (width % 4 != 0 || height % 4 != 0) {
 | 
			
		||||
		int new_width = width + (4 - (width % 4));
 | 
			
		||||
		int new_height = height + (4 - (height % 4));
 | 
			
		||||
 | 
			
		||||
		print_verbose(vformat("Compressed image (%s) has dimensions are not multiples of 4 (%dx%d), aligning to (%dx%d)", p_image->get_path(), width, height, new_width, new_height));
 | 
			
		||||
 | 
			
		||||
		width = new_width;
 | 
			
		||||
		height = new_height;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	Image::Format source_format = p_image->get_format();
 | 
			
		||||
	Image::Format target_format = Image::FORMAT_RGBA8;
 | 
			
		||||
 | 
			
		||||
	EtcpakFormat etcpak_format = Etcpak_R;
 | 
			
		||||
 | 
			
		||||
	switch (source_format) {
 | 
			
		||||
		case Image::FORMAT_ETC:
 | 
			
		||||
		case Image::FORMAT_ETC2_RGB8:
 | 
			
		||||
			etcpak_format = Etcpak_RGB;
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		case Image::FORMAT_ETC2_RGBA8:
 | 
			
		||||
		case Image::FORMAT_ETC2_RA_AS_RG:
 | 
			
		||||
			etcpak_format = Etcpak_RGBA;
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		case Image::FORMAT_ETC2_R11:
 | 
			
		||||
			etcpak_format = Etcpak_R;
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		case Image::FORMAT_ETC2_RG11:
 | 
			
		||||
			etcpak_format = Etcpak_RG;
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		default:
 | 
			
		||||
			ERR_FAIL_MSG(vformat("etcpak: Can't decompress image %s with an unknown format: %s.", p_image->get_path(), Image::get_format_name(source_format)));
 | 
			
		||||
			break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	int mm_count = p_image->get_mipmap_count();
 | 
			
		||||
	int64_t target_size = Image::get_image_data_size(width, height, target_format, p_image->has_mipmaps());
 | 
			
		||||
 | 
			
		||||
	// Decompressed data.
 | 
			
		||||
	Vector<uint8_t> data;
 | 
			
		||||
	data.resize(target_size);
 | 
			
		||||
	uint8_t *wb = data.ptrw();
 | 
			
		||||
 | 
			
		||||
	// Source data.
 | 
			
		||||
	const uint8_t *rb = p_image->ptr();
 | 
			
		||||
 | 
			
		||||
	// Decompress mipmaps.
 | 
			
		||||
	for (int i = 0; i <= mm_count; i++) {
 | 
			
		||||
		int mipmap_w = 0, mipmap_h = 0;
 | 
			
		||||
		int64_t src_ofs = Image::get_image_mipmap_offset_and_dimensions(width, height, source_format, i, mipmap_w, mipmap_h);
 | 
			
		||||
		int64_t dst_ofs = Image::get_image_mipmap_offset(width, height, target_format, i);
 | 
			
		||||
		decompress_image(etcpak_format, rb + src_ofs, wb + dst_ofs, mipmap_w, mipmap_h);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	p_image->set_data(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data);
 | 
			
		||||
 | 
			
		||||
	// Swap channels if the format is using a channel swizzle.
 | 
			
		||||
	if (source_format == Image::FORMAT_ETC2_RA_AS_RG) {
 | 
			
		||||
		p_image->convert_ra_rgba8_to_rg();
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	print_verbose(vformat("etcpak: Decompression of %dx%d %s image %s with %d mipmaps took %d ms.",
 | 
			
		||||
			p_image->get_width(), p_image->get_height(), Image::get_format_name(source_format), p_image->get_path(), p_image->get_mipmap_count(), OS::get_singleton()->get_ticks_msec() - start_time));
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										45
									
								
								modules/etcpak/image_decompress_etcpak.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								modules/etcpak/image_decompress_etcpak.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,45 @@
 | 
			
		||||
/**************************************************************************/
 | 
			
		||||
/*  image_decompress_etcpak.h                                             */
 | 
			
		||||
/**************************************************************************/
 | 
			
		||||
/*                         This file is part of:                          */
 | 
			
		||||
/*                             GODOT ENGINE                               */
 | 
			
		||||
/*                        https://godotengine.org                         */
 | 
			
		||||
/**************************************************************************/
 | 
			
		||||
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
 | 
			
		||||
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
 | 
			
		||||
/*                                                                        */
 | 
			
		||||
/* Permission is hereby granted, free of charge, to any person obtaining  */
 | 
			
		||||
/* a copy of this software and associated documentation files (the        */
 | 
			
		||||
/* "Software"), to deal in the Software without restriction, including    */
 | 
			
		||||
/* without limitation the rights to use, copy, modify, merge, publish,    */
 | 
			
		||||
/* distribute, sublicense, and/or sell copies of the Software, and to     */
 | 
			
		||||
/* permit persons to whom the Software is furnished to do so, subject to  */
 | 
			
		||||
/* the following conditions:                                              */
 | 
			
		||||
/*                                                                        */
 | 
			
		||||
/* The above copyright notice and this permission notice shall be         */
 | 
			
		||||
/* included in all copies or substantial portions of the Software.        */
 | 
			
		||||
/*                                                                        */
 | 
			
		||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
 | 
			
		||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
 | 
			
		||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
 | 
			
		||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
 | 
			
		||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
 | 
			
		||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
 | 
			
		||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
 | 
			
		||||
/**************************************************************************/
 | 
			
		||||
 | 
			
		||||
#ifndef IMAGE_DECOMPRESS_ETCPAK_H
 | 
			
		||||
#define IMAGE_DECOMPRESS_ETCPAK_H
 | 
			
		||||
 | 
			
		||||
#include "core/io/image.h"
 | 
			
		||||
 | 
			
		||||
enum EtcpakFormat {
 | 
			
		||||
	Etcpak_R,
 | 
			
		||||
	Etcpak_RG,
 | 
			
		||||
	Etcpak_RGB,
 | 
			
		||||
	Etcpak_RGBA,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void _decompress_etc(Image *p_image);
 | 
			
		||||
 | 
			
		||||
#endif // IMAGE_DECOMPRESS_ETCPAK_H
 | 
			
		||||
@@ -31,15 +31,21 @@
 | 
			
		||||
#include "register_types.h"
 | 
			
		||||
 | 
			
		||||
#include "image_compress_etcpak.h"
 | 
			
		||||
#include "image_decompress_etcpak.h"
 | 
			
		||||
 | 
			
		||||
void initialize_etcpak_module(ModuleInitializationLevel p_level) {
 | 
			
		||||
	if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) {
 | 
			
		||||
		return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
#ifdef TOOLS_ENABLED
 | 
			
		||||
	Image::_image_compress_etc1_func = _compress_etc1;
 | 
			
		||||
	Image::_image_compress_etc2_func = _compress_etc2;
 | 
			
		||||
	Image::_image_compress_bc_func = _compress_bc;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	Image::_image_decompress_etc1 = _decompress_etc;
 | 
			
		||||
	Image::_image_decompress_etc2 = _decompress_etc;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void uninitialize_etcpak_module(ModuleInitializationLevel p_level) {
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										1
									
								
								thirdparty/README.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								thirdparty/README.md
									
									
									
									
										vendored
									
									
								
							@@ -245,6 +245,7 @@ Files extracted from upstream source:
 | 
			
		||||
  Dither.{cpp,hpp} ForceInline.hpp Math.hpp ProcessCommon.hpp ProcessRGB.{cpp,hpp}
 | 
			
		||||
  ProcessDxtc.{cpp,hpp} Tables.{cpp,hpp} Vector.hpp
 | 
			
		||||
  ```
 | 
			
		||||
- The files `DecodeRGB.{cpp.hpp}` are based on the code from the original repository.
 | 
			
		||||
- `AUTHORS.txt` and `LICENSE.txt`
 | 
			
		||||
 | 
			
		||||
## fonts
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										797
									
								
								thirdparty/etcpak/DecodeRGB.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										797
									
								
								thirdparty/etcpak/DecodeRGB.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,797 @@
 | 
			
		||||
#include "DecodeRGB.hpp"
 | 
			
		||||
#include "Tables.hpp"
 | 
			
		||||
#include "Math.hpp"
 | 
			
		||||
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
#ifdef __ARM_NEON
 | 
			
		||||
#  include <arm_neon.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined __SSE4_1__ || defined __AVX2__ || defined _MSC_VER
 | 
			
		||||
#  ifdef _MSC_VER
 | 
			
		||||
#    include <intrin.h>
 | 
			
		||||
#    include <Windows.h>
 | 
			
		||||
#    define _bswap(x) _byteswap_ulong(x)
 | 
			
		||||
#    define _bswap64(x) _byteswap_uint64(x)
 | 
			
		||||
#  else
 | 
			
		||||
#    include <x86intrin.h>
 | 
			
		||||
#  endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef _bswap
 | 
			
		||||
#  define _bswap(x) __builtin_bswap32(x)
 | 
			
		||||
#  define _bswap64(x) __builtin_bswap64(x)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
static uint8_t table59T58H[8] = { 3,6,11,16,23,32,41,64 };
 | 
			
		||||
 | 
			
		||||
namespace
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
static etcpak_force_inline int32_t expand6(uint32_t value)
 | 
			
		||||
{
 | 
			
		||||
    return (value << 2) | (value >> 4);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static etcpak_force_inline int32_t expand7(uint32_t value)
 | 
			
		||||
{
 | 
			
		||||
    return (value << 1) | (value >> 6);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static etcpak_force_inline void DecodeT( uint64_t block, uint32_t* dst, uint32_t w )
 | 
			
		||||
{
 | 
			
		||||
    const auto r0 = ( block >> 24 ) & 0x1B;
 | 
			
		||||
    const auto rh0 = ( r0 >> 3 ) & 0x3;
 | 
			
		||||
    const auto rl0 = r0 & 0x3;
 | 
			
		||||
    const auto g0 = ( block >> 20 ) & 0xF;
 | 
			
		||||
    const auto b0 = ( block >> 16 ) & 0xF;
 | 
			
		||||
 | 
			
		||||
    const auto r1 = ( block >> 12 ) & 0xF;
 | 
			
		||||
    const auto g1 = ( block >> 8 ) & 0xF;
 | 
			
		||||
    const auto b1 = ( block >> 4 ) & 0xF;
 | 
			
		||||
 | 
			
		||||
    const auto cr0 = ( ( rh0 << 6 ) | ( rl0 << 4 ) | ( rh0 << 2 ) | rl0);
 | 
			
		||||
    const auto cg0 = ( g0 << 4 ) | g0;
 | 
			
		||||
    const auto cb0 = ( b0 << 4 ) | b0;
 | 
			
		||||
 | 
			
		||||
    const auto cr1 = ( r1 << 4 ) | r1;
 | 
			
		||||
    const auto cg1 = ( g1 << 4 ) | g1;
 | 
			
		||||
    const auto cb1 = ( b1 << 4 ) | b1;
 | 
			
		||||
 | 
			
		||||
    const auto codeword_hi = ( block >> 2 ) & 0x3;
 | 
			
		||||
    const auto codeword_lo = block & 0x1;
 | 
			
		||||
    const auto codeword = ( codeword_hi << 1 ) | codeword_lo;
 | 
			
		||||
 | 
			
		||||
    const auto c2r = clampu8( cr1 + table59T58H[codeword] );
 | 
			
		||||
    const auto c2g = clampu8( cg1 + table59T58H[codeword] );
 | 
			
		||||
    const auto c2b = clampu8( cb1 + table59T58H[codeword] );
 | 
			
		||||
 | 
			
		||||
    const auto c3r = clampu8( cr1 - table59T58H[codeword] );
 | 
			
		||||
    const auto c3g = clampu8( cg1 - table59T58H[codeword] );
 | 
			
		||||
    const auto c3b = clampu8( cb1 - table59T58H[codeword] );
 | 
			
		||||
 | 
			
		||||
    const uint32_t col_tab[4] = {
 | 
			
		||||
        uint32_t( cr0 | ( cg0 << 8 ) | ( cb0 << 16 ) | 0xFF000000 ),
 | 
			
		||||
        uint32_t( c2r | ( c2g << 8 ) | ( c2b << 16 ) | 0xFF000000 ),
 | 
			
		||||
        uint32_t( cr1 | ( cg1 << 8 ) | ( cb1 << 16 ) | 0xFF000000 ),
 | 
			
		||||
        uint32_t( c3r | ( c3g << 8 ) | ( c3b << 16 ) | 0xFF000000 )
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
 | 
			
		||||
    for( uint8_t j = 0; j < 4; j++ )
 | 
			
		||||
    {
 | 
			
		||||
        for( uint8_t i = 0; i < 4; i++ )
 | 
			
		||||
        {
 | 
			
		||||
            //2bit indices distributed on two lane 16bit numbers
 | 
			
		||||
            const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1) | ( ( indexes >> ( j + i * 4 ) ) & 0x1);
 | 
			
		||||
            dst[j * w + i] = col_tab[index];
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static etcpak_force_inline void DecodeTAlpha( uint64_t block, uint64_t alpha, uint32_t* dst, uint32_t w )
 | 
			
		||||
{
 | 
			
		||||
    const auto r0 = ( block >> 24 ) & 0x1B;
 | 
			
		||||
    const auto rh0 = ( r0 >> 3 ) & 0x3;
 | 
			
		||||
    const auto rl0 = r0 & 0x3;
 | 
			
		||||
    const auto g0 = ( block >> 20 ) & 0xF;
 | 
			
		||||
    const auto b0 = ( block >> 16 ) & 0xF;
 | 
			
		||||
 | 
			
		||||
    const auto r1 = ( block >> 12 ) & 0xF;
 | 
			
		||||
    const auto g1 = ( block >> 8 ) & 0xF;
 | 
			
		||||
    const auto b1 = ( block >> 4 ) & 0xF;
 | 
			
		||||
 | 
			
		||||
    const auto cr0 = ( ( rh0 << 6 ) | ( rl0 << 4 ) | ( rh0 << 2 ) | rl0);
 | 
			
		||||
    const auto cg0 = ( g0 << 4 ) | g0;
 | 
			
		||||
    const auto cb0 = ( b0 << 4 ) | b0;
 | 
			
		||||
 | 
			
		||||
    const auto cr1 = ( r1 << 4 ) | r1;
 | 
			
		||||
    const auto cg1 = ( g1 << 4 ) | g1;
 | 
			
		||||
    const auto cb1 = ( b1 << 4 ) | b1;
 | 
			
		||||
 | 
			
		||||
    const auto codeword_hi = ( block >> 2 ) & 0x3;
 | 
			
		||||
    const auto codeword_lo = block & 0x1;
 | 
			
		||||
    const auto codeword = (codeword_hi << 1) | codeword_lo;
 | 
			
		||||
 | 
			
		||||
    const int32_t base = alpha >> 56;
 | 
			
		||||
    const int32_t mul = ( alpha >> 52 ) & 0xF;
 | 
			
		||||
    const auto tbl = g_alpha[( alpha >> 48 ) & 0xF];
 | 
			
		||||
 | 
			
		||||
    const auto c2r = clampu8( cr1 + table59T58H[codeword] );
 | 
			
		||||
    const auto c2g = clampu8( cg1 + table59T58H[codeword] );
 | 
			
		||||
    const auto c2b = clampu8( cb1 + table59T58H[codeword] );
 | 
			
		||||
 | 
			
		||||
    const auto c3r = clampu8( cr1 - table59T58H[codeword] );
 | 
			
		||||
    const auto c3g = clampu8( cg1 - table59T58H[codeword] );
 | 
			
		||||
    const auto c3b = clampu8( cb1 - table59T58H[codeword] );
 | 
			
		||||
 | 
			
		||||
    const uint32_t col_tab[4] = {
 | 
			
		||||
        uint32_t( cr0 | ( cg0 << 8 ) | ( cb0 << 16 ) ),
 | 
			
		||||
        uint32_t( c2r | ( c2g << 8 ) | ( c2b << 16 ) ),
 | 
			
		||||
        uint32_t( cr1 | ( cg1 << 8 ) | ( cb1 << 16 ) ),
 | 
			
		||||
        uint32_t( c3r | ( c3g << 8 ) | ( c3b << 16 ) )
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
 | 
			
		||||
    for( uint8_t j = 0; j < 4; j++ )
 | 
			
		||||
    {
 | 
			
		||||
        for( uint8_t i = 0; i < 4; i++ )
 | 
			
		||||
        {
 | 
			
		||||
            //2bit indices distributed on two lane 16bit numbers
 | 
			
		||||
            const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1 ) | ( ( indexes >> ( j + i * 4 ) ) & 0x1 );
 | 
			
		||||
            const auto amod = tbl[( alpha >> ( 45 - j * 3 - i * 12 ) ) & 0x7];
 | 
			
		||||
            const uint32_t a = clampu8( base + amod * mul );
 | 
			
		||||
            dst[j * w + i] = col_tab[index] | ( a << 24 );
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static etcpak_force_inline void DecodeH( uint64_t block, uint32_t* dst, uint32_t w )
 | 
			
		||||
{
 | 
			
		||||
    const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
 | 
			
		||||
 | 
			
		||||
    const auto r0444 = ( block >> 27 ) & 0xF;
 | 
			
		||||
    const auto g0444 = ( ( block >> 20 ) & 0x1 ) | ( ( ( block >> 24 ) & 0x7 ) << 1 );
 | 
			
		||||
    const auto b0444 = ( ( block >> 15 ) & 0x7 ) | ( ( ( block >> 19 ) & 0x1 ) << 3 );
 | 
			
		||||
 | 
			
		||||
    const auto r1444 = ( block >> 11 ) & 0xF;
 | 
			
		||||
    const auto g1444 = ( block >> 7 ) & 0xF;
 | 
			
		||||
    const auto b1444 = ( block >> 3 ) & 0xF;
 | 
			
		||||
 | 
			
		||||
    const auto r0 = ( r0444 << 4 ) | r0444;
 | 
			
		||||
    const auto g0 = ( g0444 << 4 ) | g0444;
 | 
			
		||||
    const auto b0 = ( b0444 << 4 ) | b0444;
 | 
			
		||||
 | 
			
		||||
    const auto r1 = ( r1444 << 4 ) | r1444;
 | 
			
		||||
    const auto g1 = ( g1444 << 4 ) | g1444;
 | 
			
		||||
    const auto b1 = ( b1444 << 4 ) | b1444;
 | 
			
		||||
 | 
			
		||||
    const auto codeword_hi = ( ( block & 0x1 ) << 1 ) | ( ( block & 0x4 ) );
 | 
			
		||||
    const auto c0 = ( r0444 << 8 ) | ( g0444 << 4 ) | ( b0444 << 0 );
 | 
			
		||||
    const auto c1 = ( block >> 3 ) & ( ( 1 << 12 ) - 1 );
 | 
			
		||||
    const auto codeword_lo = ( c0 >= c1 ) ? 1 : 0;
 | 
			
		||||
    const auto codeword = codeword_hi | codeword_lo;
 | 
			
		||||
 | 
			
		||||
    const uint32_t col_tab[] = {
 | 
			
		||||
        uint32_t( clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 ) ),
 | 
			
		||||
        uint32_t( clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 ) ),
 | 
			
		||||
        uint32_t( clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 ) ),
 | 
			
		||||
        uint32_t( clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 ) )
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    for( uint8_t j = 0; j < 4; j++ )
 | 
			
		||||
    {
 | 
			
		||||
        for( uint8_t i = 0; i < 4; i++ )
 | 
			
		||||
        {
 | 
			
		||||
            const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1 ) | ( ( indexes >> ( j + i * 4 ) ) & 0x1 );
 | 
			
		||||
            dst[j * w + i] = col_tab[index] | 0xFF000000;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static etcpak_force_inline void DecodeHAlpha( uint64_t block, uint64_t alpha, uint32_t* dst, uint32_t w )
 | 
			
		||||
{
 | 
			
		||||
    const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
 | 
			
		||||
 | 
			
		||||
    const auto r0444 = ( block >> 27 ) & 0xF;
 | 
			
		||||
    const auto g0444 = ( ( block >> 20 ) & 0x1 ) | ( ( ( block >> 24 ) & 0x7 ) << 1 );
 | 
			
		||||
    const auto b0444 = ( ( block >> 15 ) & 0x7 ) | ( ( ( block >> 19 ) & 0x1 ) << 3 );
 | 
			
		||||
 | 
			
		||||
    const auto r1444 = ( block >> 11 ) & 0xF;
 | 
			
		||||
    const auto g1444 = ( block >> 7 ) & 0xF;
 | 
			
		||||
    const auto b1444 = ( block >> 3 ) & 0xF;
 | 
			
		||||
 | 
			
		||||
    const auto r0 = ( r0444 << 4 ) | r0444;
 | 
			
		||||
    const auto g0 = ( g0444 << 4 ) | g0444;
 | 
			
		||||
    const auto b0 = ( b0444 << 4 ) | b0444;
 | 
			
		||||
 | 
			
		||||
    const auto r1 = ( r1444 << 4 ) | r1444;
 | 
			
		||||
    const auto g1 = ( g1444 << 4 ) | g1444;
 | 
			
		||||
    const auto b1 = ( b1444 << 4 ) | b1444;
 | 
			
		||||
 | 
			
		||||
    const auto codeword_hi = ( ( block & 0x1 ) << 1 ) | ( ( block & 0x4 ) );
 | 
			
		||||
    const auto c0 = ( r0444 << 8 ) | ( g0444 << 4 ) | ( b0444 << 0 );
 | 
			
		||||
    const auto c1 = ( block >> 3 ) & ( ( 1 << 12 ) - 1 );
 | 
			
		||||
    const auto codeword_lo = ( c0 >= c1 ) ? 1 : 0;
 | 
			
		||||
    const auto codeword = codeword_hi | codeword_lo;
 | 
			
		||||
 | 
			
		||||
    const int32_t base = alpha >> 56;
 | 
			
		||||
    const int32_t mul = ( alpha >> 52 ) & 0xF;
 | 
			
		||||
    const auto tbl = g_alpha[(alpha >> 48) & 0xF];
 | 
			
		||||
 | 
			
		||||
    const uint32_t col_tab[] = {
 | 
			
		||||
        uint32_t( clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 ) ),
 | 
			
		||||
        uint32_t( clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 ) ),
 | 
			
		||||
        uint32_t( clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 ) ),
 | 
			
		||||
        uint32_t( clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 ) )
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    for( uint8_t j = 0; j < 4; j++ )
 | 
			
		||||
    {
 | 
			
		||||
        for( uint8_t i = 0; i < 4; i++ )
 | 
			
		||||
        {
 | 
			
		||||
            const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1 ) | ( ( indexes >> ( j + i * 4 ) ) & 0x1 );
 | 
			
		||||
            const auto amod = tbl[( alpha >> ( 45 - j * 3 - i * 12) ) & 0x7];
 | 
			
		||||
            const uint32_t a = clampu8( base + amod * mul );
 | 
			
		||||
            dst[j * w + i] = col_tab[index] | ( a << 24 );
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static etcpak_force_inline void DecodePlanar( uint64_t block, uint32_t* dst, uint32_t w )
 | 
			
		||||
{
 | 
			
		||||
    const auto bv = expand6((block >> ( 0 + 32)) & 0x3F);
 | 
			
		||||
    const auto gv = expand7((block >> ( 6 + 32)) & 0x7F);
 | 
			
		||||
    const auto rv = expand6((block >> (13 + 32)) & 0x3F);
 | 
			
		||||
 | 
			
		||||
    const auto bh = expand6((block >> (19 + 32)) & 0x3F);
 | 
			
		||||
    const auto gh = expand7((block >> (25 + 32)) & 0x7F);
 | 
			
		||||
 | 
			
		||||
    const auto rh0 = (block >> (32 - 32)) & 0x01;
 | 
			
		||||
    const auto rh1 = ((block >> (34 - 32)) & 0x1F) << 1;
 | 
			
		||||
    const auto rh = expand6(rh0 | rh1);
 | 
			
		||||
 | 
			
		||||
    const auto bo0 = (block >> (39 - 32)) & 0x07;
 | 
			
		||||
    const auto bo1 = ((block >> (43 - 32)) & 0x3) << 3;
 | 
			
		||||
    const auto bo2 = ((block >> (48 - 32)) & 0x1) << 5;
 | 
			
		||||
    const auto bo = expand6(bo0 | bo1 | bo2);
 | 
			
		||||
    const auto go0 = (block >> (49 - 32)) & 0x3F;
 | 
			
		||||
    const auto go1 = ((block >> (56 - 32)) & 0x01) << 6;
 | 
			
		||||
    const auto go = expand7(go0 | go1);
 | 
			
		||||
    const auto ro = expand6((block >> (57 - 32)) & 0x3F);
 | 
			
		||||
 | 
			
		||||
#ifdef __ARM_NEON
 | 
			
		||||
    uint64_t init = uint64_t(uint16_t(rh-ro)) | ( uint64_t(uint16_t(gh-go)) << 16 ) | ( uint64_t(uint16_t(bh-bo)) << 32 );
 | 
			
		||||
    int16x8_t chco = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
 | 
			
		||||
    init = uint64_t(uint16_t( (rv-ro) - 4 * (rh-ro) )) | ( uint64_t(uint16_t( (gv-go) - 4 * (gh-go) )) << 16 ) | ( uint64_t(uint16_t( (bv-bo) - 4 * (bh-bo) )) << 32 );
 | 
			
		||||
    int16x8_t cvco = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
 | 
			
		||||
    init = uint64_t(4*ro+2) | ( uint64_t(4*go+2) << 16 ) | ( uint64_t(4*bo+2) << 32 ) | ( uint64_t(0xFFF) << 48 );
 | 
			
		||||
    int16x8_t col = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
 | 
			
		||||
 | 
			
		||||
    for( int j=0; j<4; j++ )
 | 
			
		||||
    {
 | 
			
		||||
        for( int i=0; i<4; i++ )
 | 
			
		||||
        {
 | 
			
		||||
            uint8x8_t c = vqshrun_n_s16( col, 2 );
 | 
			
		||||
            vst1_lane_u32( dst+j*w+i, vreinterpret_u32_u8( c ), 0 );
 | 
			
		||||
            col = vaddq_s16( col, chco );
 | 
			
		||||
        }
 | 
			
		||||
        col = vaddq_s16( col, cvco );
 | 
			
		||||
    }
 | 
			
		||||
#elif defined __AVX2__
 | 
			
		||||
    const auto R0 = 4*ro+2;
 | 
			
		||||
    const auto G0 = 4*go+2;
 | 
			
		||||
    const auto B0 = 4*bo+2;
 | 
			
		||||
    const auto RHO = rh-ro;
 | 
			
		||||
    const auto GHO = gh-go;
 | 
			
		||||
    const auto BHO = bh-bo;
 | 
			
		||||
 | 
			
		||||
    __m256i cvco = _mm256_setr_epi16( rv - ro, gv - go, bv - bo, 0, rv - ro, gv - go, bv - bo, 0, rv - ro, gv - go, bv - bo, 0, rv - ro, gv - go, bv - bo, 0 );
 | 
			
		||||
    __m256i col = _mm256_setr_epi16( R0, G0, B0, 0xFFF, R0+RHO, G0+GHO, B0+BHO, 0xFFF, R0+2*RHO, G0+2*GHO, B0+2*BHO, 0xFFF, R0+3*RHO, G0+3*GHO, B0+3*BHO, 0xFFF );
 | 
			
		||||
 | 
			
		||||
    for( int j=0; j<4; j++ )
 | 
			
		||||
    {
 | 
			
		||||
        __m256i c = _mm256_srai_epi16( col, 2 );
 | 
			
		||||
        __m128i s = _mm_packus_epi16( _mm256_castsi256_si128( c ), _mm256_extracti128_si256( c, 1 ) );
 | 
			
		||||
        _mm_storeu_si128( (__m128i*)(dst+j*w), s );
 | 
			
		||||
        col = _mm256_add_epi16( col, cvco );
 | 
			
		||||
    }
 | 
			
		||||
#elif defined __SSE4_1__
 | 
			
		||||
    __m128i chco = _mm_setr_epi16( rh - ro, gh - go, bh - bo, 0, 0, 0, 0, 0 );
 | 
			
		||||
    __m128i cvco = _mm_setr_epi16( (rv - ro) - 4 * (rh - ro), (gv - go) - 4 * (gh - go), (bv - bo) - 4 * (bh - bo), 0, 0, 0, 0, 0 );
 | 
			
		||||
    __m128i col = _mm_setr_epi16( 4*ro+2, 4*go+2, 4*bo+2, 0xFFF, 0, 0, 0, 0 );
 | 
			
		||||
 | 
			
		||||
    for( int j=0; j<4; j++ )
 | 
			
		||||
    {
 | 
			
		||||
        for( int i=0; i<4; i++ )
 | 
			
		||||
        {
 | 
			
		||||
            __m128i c = _mm_srai_epi16( col, 2 );
 | 
			
		||||
            __m128i s = _mm_packus_epi16( c, c );
 | 
			
		||||
            dst[j*w+i] = _mm_cvtsi128_si32( s );
 | 
			
		||||
            col = _mm_add_epi16( col, chco );
 | 
			
		||||
        }
 | 
			
		||||
        col = _mm_add_epi16( col, cvco );
 | 
			
		||||
    }
 | 
			
		||||
#else
 | 
			
		||||
    for( int j=0; j<4; j++ )
 | 
			
		||||
    {
 | 
			
		||||
        for( int i=0; i<4; i++ )
 | 
			
		||||
        {
 | 
			
		||||
            const uint32_t r = (i * (rh - ro) + j * (rv - ro) + 4 * ro + 2) >> 2;
 | 
			
		||||
            const uint32_t g = (i * (gh - go) + j * (gv - go) + 4 * go + 2) >> 2;
 | 
			
		||||
            const uint32_t b = (i * (bh - bo) + j * (bv - bo) + 4 * bo + 2) >> 2;
 | 
			
		||||
            if( ( ( r | g | b ) & ~0xFF ) == 0 )
 | 
			
		||||
            {
 | 
			
		||||
                dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | 0xFF000000;
 | 
			
		||||
            }
 | 
			
		||||
            else
 | 
			
		||||
            {
 | 
			
		||||
                const auto rc = clampu8( r );
 | 
			
		||||
                const auto gc = clampu8( g );
 | 
			
		||||
                const auto bc = clampu8( b );
 | 
			
		||||
                dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | 0xFF000000;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static etcpak_force_inline void DecodePlanarAlpha( uint64_t block, uint64_t alpha, uint32_t* dst, uint32_t w )
 | 
			
		||||
{
 | 
			
		||||
    const auto bv = expand6((block >> ( 0 + 32)) & 0x3F);
 | 
			
		||||
    const auto gv = expand7((block >> ( 6 + 32)) & 0x7F);
 | 
			
		||||
    const auto rv = expand6((block >> (13 + 32)) & 0x3F);
 | 
			
		||||
 | 
			
		||||
    const auto bh = expand6((block >> (19 + 32)) & 0x3F);
 | 
			
		||||
    const auto gh = expand7((block >> (25 + 32)) & 0x7F);
 | 
			
		||||
 | 
			
		||||
    const auto rh0 = (block >> (32 - 32)) & 0x01;
 | 
			
		||||
    const auto rh1 = ((block >> (34 - 32)) & 0x1F) << 1;
 | 
			
		||||
    const auto rh = expand6(rh0 | rh1);
 | 
			
		||||
 | 
			
		||||
    const auto bo0 = (block >> (39 - 32)) & 0x07;
 | 
			
		||||
    const auto bo1 = ((block >> (43 - 32)) & 0x3) << 3;
 | 
			
		||||
    const auto bo2 = ((block >> (48 - 32)) & 0x1) << 5;
 | 
			
		||||
    const auto bo = expand6(bo0 | bo1 | bo2);
 | 
			
		||||
    const auto go0 = (block >> (49 - 32)) & 0x3F;
 | 
			
		||||
    const auto go1 = ((block >> (56 - 32)) & 0x01) << 6;
 | 
			
		||||
    const auto go = expand7(go0 | go1);
 | 
			
		||||
    const auto ro = expand6((block >> (57 - 32)) & 0x3F);
 | 
			
		||||
 | 
			
		||||
    const int32_t base = alpha >> 56;
 | 
			
		||||
    const int32_t mul = ( alpha >> 52 ) & 0xF;
 | 
			
		||||
    const auto tbl = g_alpha[( alpha >> 48 ) & 0xF];
 | 
			
		||||
 | 
			
		||||
#ifdef __ARM_NEON
 | 
			
		||||
    uint64_t init = uint64_t(uint16_t(rh-ro)) | ( uint64_t(uint16_t(gh-go)) << 16 ) | ( uint64_t(uint16_t(bh-bo)) << 32 );
 | 
			
		||||
    int16x8_t chco = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
 | 
			
		||||
    init = uint64_t(uint16_t( (rv-ro) - 4 * (rh-ro) )) | ( uint64_t(uint16_t( (gv-go) - 4 * (gh-go) )) << 16 ) | ( uint64_t(uint16_t( (bv-bo) - 4 * (bh-bo) )) << 32 );
 | 
			
		||||
    int16x8_t cvco = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
 | 
			
		||||
    init = uint64_t(4*ro+2) | ( uint64_t(4*go+2) << 16 ) | ( uint64_t(4*bo+2) << 32 );
 | 
			
		||||
    int16x8_t col = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
 | 
			
		||||
 | 
			
		||||
    for( int j=0; j<4; j++ )
 | 
			
		||||
    {
 | 
			
		||||
        for( int i=0; i<4; i++ )
 | 
			
		||||
        {
 | 
			
		||||
            const auto amod = tbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
 | 
			
		||||
            const uint32_t a = clampu8( base + amod * mul );
 | 
			
		||||
            uint8x8_t c = vqshrun_n_s16( col, 2 );
 | 
			
		||||
            dst[j*w+i] = vget_lane_u32( vreinterpret_u32_u8( c ), 0 ) | ( a << 24 );
 | 
			
		||||
            col = vaddq_s16( col, chco );
 | 
			
		||||
        }
 | 
			
		||||
        col = vaddq_s16( col, cvco );
 | 
			
		||||
    }
 | 
			
		||||
#elif defined __SSE4_1__
 | 
			
		||||
    __m128i chco = _mm_setr_epi16( rh - ro, gh - go, bh - bo, 0, 0, 0, 0, 0 );
 | 
			
		||||
    __m128i cvco = _mm_setr_epi16( (rv - ro) - 4 * (rh - ro), (gv - go) - 4 * (gh - go), (bv - bo) - 4 * (bh - bo), 0, 0, 0, 0, 0 );
 | 
			
		||||
    __m128i col = _mm_setr_epi16( 4*ro+2, 4*go+2, 4*bo+2, 0, 0, 0, 0, 0 );
 | 
			
		||||
 | 
			
		||||
    for( int j=0; j<4; j++ )
 | 
			
		||||
    {
 | 
			
		||||
        for( int i=0; i<4; i++ )
 | 
			
		||||
        {
 | 
			
		||||
            const auto amod = tbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
 | 
			
		||||
            const uint32_t a = clampu8( base + amod * mul );
 | 
			
		||||
            __m128i c = _mm_srai_epi16( col, 2 );
 | 
			
		||||
            __m128i s = _mm_packus_epi16( c, c );
 | 
			
		||||
            dst[j*w+i] = _mm_cvtsi128_si32( s ) | ( a << 24 );
 | 
			
		||||
            col = _mm_add_epi16( col, chco );
 | 
			
		||||
        }
 | 
			
		||||
        col = _mm_add_epi16( col, cvco );
 | 
			
		||||
    }
 | 
			
		||||
#else
 | 
			
		||||
    for (auto j = 0; j < 4; j++)
 | 
			
		||||
    {
 | 
			
		||||
        for (auto i = 0; i < 4; i++)
 | 
			
		||||
        {
 | 
			
		||||
            const uint32_t r = (i * (rh - ro) + j * (rv - ro) + 4 * ro + 2) >> 2;
 | 
			
		||||
            const uint32_t g = (i * (gh - go) + j * (gv - go) + 4 * go + 2) >> 2;
 | 
			
		||||
            const uint32_t b = (i * (bh - bo) + j * (bv - bo) + 4 * bo + 2) >> 2;
 | 
			
		||||
            const auto amod = tbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
 | 
			
		||||
            const uint32_t a = clampu8( base + amod * mul );
 | 
			
		||||
            if( ( ( r | g | b ) & ~0xFF ) == 0 )
 | 
			
		||||
            {
 | 
			
		||||
                dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | ( a << 24 );
 | 
			
		||||
            }
 | 
			
		||||
            else
 | 
			
		||||
            {
 | 
			
		||||
                const auto rc = clampu8( r );
 | 
			
		||||
                const auto gc = clampu8( g );
 | 
			
		||||
                const auto bc = clampu8( b );
 | 
			
		||||
                dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | ( a << 24 );
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static etcpak_force_inline uint64_t ConvertByteOrder( uint64_t d )
 | 
			
		||||
{
 | 
			
		||||
    uint32_t word[2];
 | 
			
		||||
    memcpy( word, &d, 8 );
 | 
			
		||||
    word[0] = _bswap( word[0] );
 | 
			
		||||
    word[1] = _bswap( word[1] );
 | 
			
		||||
    memcpy( &d, word, 8 );
 | 
			
		||||
    return d;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static etcpak_force_inline void DecodeRGBPart( uint64_t d, uint32_t* dst, uint32_t w )
 | 
			
		||||
{
 | 
			
		||||
    d = ConvertByteOrder( d );
 | 
			
		||||
 | 
			
		||||
    uint32_t br[2], bg[2], bb[2];
 | 
			
		||||
 | 
			
		||||
    if( d & 0x2 )
 | 
			
		||||
    {
 | 
			
		||||
        int32_t dr, dg, db;
 | 
			
		||||
 | 
			
		||||
        uint32_t r0 = ( d & 0xF8000000 ) >> 27;
 | 
			
		||||
        uint32_t g0 = ( d & 0x00F80000 ) >> 19;
 | 
			
		||||
        uint32_t b0 = ( d & 0x0000F800 ) >> 11;
 | 
			
		||||
 | 
			
		||||
        dr = ( int32_t(d) << 5 ) >> 29;
 | 
			
		||||
        dg = ( int32_t(d) << 13 ) >> 29;
 | 
			
		||||
        db = ( int32_t(d) << 21 ) >> 29;
 | 
			
		||||
 | 
			
		||||
        int32_t r1 = int32_t(r0) + dr;
 | 
			
		||||
        int32_t g1 = int32_t(g0) + dg;
 | 
			
		||||
        int32_t b1 = int32_t(b0) + db;
 | 
			
		||||
 | 
			
		||||
        // T mode
 | 
			
		||||
        if ( (r1 < 0) || (r1 > 31) )
 | 
			
		||||
        {
 | 
			
		||||
            DecodeT( d, dst, w );
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // H mode
 | 
			
		||||
        if ((g1 < 0) || (g1 > 31))
 | 
			
		||||
        {
 | 
			
		||||
            DecodeH( d, dst, w );
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // P mode
 | 
			
		||||
        if( (b1 < 0) || (b1 > 31) )
 | 
			
		||||
        {
 | 
			
		||||
            DecodePlanar( d, dst, w );
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        br[0] = ( r0 << 3 ) | ( r0 >> 2 );
 | 
			
		||||
        br[1] = ( r1 << 3 ) | ( r1 >> 2 );
 | 
			
		||||
        bg[0] = ( g0 << 3 ) | ( g0 >> 2 );
 | 
			
		||||
        bg[1] = ( g1 << 3 ) | ( g1 >> 2 );
 | 
			
		||||
        bb[0] = ( b0 << 3 ) | ( b0 >> 2 );
 | 
			
		||||
        bb[1] = ( b1 << 3 ) | ( b1 >> 2 );
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
        br[0] = ( ( d & 0xF0000000 ) >> 24 ) | ( ( d & 0xF0000000 ) >> 28 );
 | 
			
		||||
        br[1] = ( ( d & 0x0F000000 ) >> 20 ) | ( ( d & 0x0F000000 ) >> 24 );
 | 
			
		||||
        bg[0] = ( ( d & 0x00F00000 ) >> 16 ) | ( ( d & 0x00F00000 ) >> 20 );
 | 
			
		||||
        bg[1] = ( ( d & 0x000F0000 ) >> 12 ) | ( ( d & 0x000F0000 ) >> 16 );
 | 
			
		||||
        bb[0] = ( ( d & 0x0000F000 ) >> 8  ) | ( ( d & 0x0000F000 ) >> 12 );
 | 
			
		||||
        bb[1] = ( ( d & 0x00000F00 ) >> 4  ) | ( ( d & 0x00000F00 ) >> 8  );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    unsigned int tcw[2];
 | 
			
		||||
    tcw[0] = ( d & 0xE0 ) >> 5;
 | 
			
		||||
    tcw[1] = ( d & 0x1C ) >> 2;
 | 
			
		||||
 | 
			
		||||
    uint32_t b1 = ( d >> 32 ) & 0xFFFF;
 | 
			
		||||
    uint32_t b2 = ( d >> 48 );
 | 
			
		||||
 | 
			
		||||
    b1 = ( b1 | ( b1 << 8 ) ) & 0x00FF00FF;
 | 
			
		||||
    b1 = ( b1 | ( b1 << 4 ) ) & 0x0F0F0F0F;
 | 
			
		||||
    b1 = ( b1 | ( b1 << 2 ) ) & 0x33333333;
 | 
			
		||||
    b1 = ( b1 | ( b1 << 1 ) ) & 0x55555555;
 | 
			
		||||
 | 
			
		||||
    b2 = ( b2 | ( b2 << 8 ) ) & 0x00FF00FF;
 | 
			
		||||
    b2 = ( b2 | ( b2 << 4 ) ) & 0x0F0F0F0F;
 | 
			
		||||
    b2 = ( b2 | ( b2 << 2 ) ) & 0x33333333;
 | 
			
		||||
    b2 = ( b2 | ( b2 << 1 ) ) & 0x55555555;
 | 
			
		||||
 | 
			
		||||
    uint32_t idx = b1 | ( b2 << 1 );
 | 
			
		||||
 | 
			
		||||
    if( d & 0x1 )
 | 
			
		||||
    {
 | 
			
		||||
        for( int i=0; i<4; i++ )
 | 
			
		||||
        {
 | 
			
		||||
            for( int j=0; j<4; j++ )
 | 
			
		||||
            {
 | 
			
		||||
                const auto mod = g_table[tcw[j/2]][idx & 0x3];
 | 
			
		||||
                const auto r = br[j/2] + mod;
 | 
			
		||||
                const auto g = bg[j/2] + mod;
 | 
			
		||||
                const auto b = bb[j/2] + mod;
 | 
			
		||||
                if( ( ( r | g | b ) & ~0xFF ) == 0 )
 | 
			
		||||
                {
 | 
			
		||||
                    dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | 0xFF000000;
 | 
			
		||||
                }
 | 
			
		||||
                else
 | 
			
		||||
                {
 | 
			
		||||
                    const auto rc = clampu8( r );
 | 
			
		||||
                    const auto gc = clampu8( g );
 | 
			
		||||
                    const auto bc = clampu8( b );
 | 
			
		||||
                    dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | 0xFF000000;
 | 
			
		||||
                }
 | 
			
		||||
                idx >>= 2;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
        for( int i=0; i<4; i++ )
 | 
			
		||||
        {
 | 
			
		||||
            const auto tbl = g_table[tcw[i/2]];
 | 
			
		||||
            const auto cr = br[i/2];
 | 
			
		||||
            const auto cg = bg[i/2];
 | 
			
		||||
            const auto cb = bb[i/2];
 | 
			
		||||
 | 
			
		||||
            for( int j=0; j<4; j++ )
 | 
			
		||||
            {
 | 
			
		||||
                const auto mod = tbl[idx & 0x3];
 | 
			
		||||
                const auto r = cr + mod;
 | 
			
		||||
                const auto g = cg + mod;
 | 
			
		||||
                const auto b = cb + mod;
 | 
			
		||||
                if( ( ( r | g | b ) & ~0xFF ) == 0 )
 | 
			
		||||
                {
 | 
			
		||||
                    dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | 0xFF000000;
 | 
			
		||||
                }
 | 
			
		||||
                else
 | 
			
		||||
                {
 | 
			
		||||
                    const auto rc = clampu8( r );
 | 
			
		||||
                    const auto gc = clampu8( g );
 | 
			
		||||
                    const auto bc = clampu8( b );
 | 
			
		||||
                    dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | 0xFF000000;
 | 
			
		||||
                }
 | 
			
		||||
                idx >>= 2;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static etcpak_force_inline void DecodeRGBAPart( uint64_t d, uint64_t alpha, uint32_t* dst, uint32_t w )
 | 
			
		||||
{
 | 
			
		||||
    d = ConvertByteOrder( d );
 | 
			
		||||
    alpha = _bswap64( alpha );
 | 
			
		||||
 | 
			
		||||
    uint32_t br[2], bg[2], bb[2];
 | 
			
		||||
 | 
			
		||||
    if( d & 0x2 )
 | 
			
		||||
    {
 | 
			
		||||
        int32_t dr, dg, db;
 | 
			
		||||
 | 
			
		||||
        uint32_t r0 = ( d & 0xF8000000 ) >> 27;
 | 
			
		||||
        uint32_t g0 = ( d & 0x00F80000 ) >> 19;
 | 
			
		||||
        uint32_t b0 = ( d & 0x0000F800 ) >> 11;
 | 
			
		||||
 | 
			
		||||
        dr = ( int32_t(d) << 5 ) >> 29;
 | 
			
		||||
        dg = ( int32_t(d) << 13 ) >> 29;
 | 
			
		||||
        db = ( int32_t(d) << 21 ) >> 29;
 | 
			
		||||
 | 
			
		||||
        int32_t r1 = int32_t(r0) + dr;
 | 
			
		||||
        int32_t g1 = int32_t(g0) + dg;
 | 
			
		||||
        int32_t b1 = int32_t(b0) + db;
 | 
			
		||||
 | 
			
		||||
        // T mode
 | 
			
		||||
        if ( (r1 < 0) || (r1 > 31) )
 | 
			
		||||
        {
 | 
			
		||||
            DecodeTAlpha( d, alpha, dst, w );
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // H mode
 | 
			
		||||
        if ( (g1 < 0) || (g1 > 31) )
 | 
			
		||||
        {
 | 
			
		||||
            DecodeHAlpha( d, alpha, dst, w );
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // P mode
 | 
			
		||||
        if ( (b1 < 0) || (b1 > 31) )
 | 
			
		||||
        {
 | 
			
		||||
            DecodePlanarAlpha( d, alpha, dst, w );
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        br[0] = ( r0 << 3 ) | ( r0 >> 2 );
 | 
			
		||||
        br[1] = ( r1 << 3 ) | ( r1 >> 2 );
 | 
			
		||||
        bg[0] = ( g0 << 3 ) | ( g0 >> 2 );
 | 
			
		||||
        bg[1] = ( g1 << 3 ) | ( g1 >> 2 );
 | 
			
		||||
        bb[0] = ( b0 << 3 ) | ( b0 >> 2 );
 | 
			
		||||
        bb[1] = ( b1 << 3 ) | ( b1 >> 2 );
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
        br[0] = ( ( d & 0xF0000000 ) >> 24 ) | ( ( d & 0xF0000000 ) >> 28 );
 | 
			
		||||
        br[1] = ( ( d & 0x0F000000 ) >> 20 ) | ( ( d & 0x0F000000 ) >> 24 );
 | 
			
		||||
        bg[0] = ( ( d & 0x00F00000 ) >> 16 ) | ( ( d & 0x00F00000 ) >> 20 );
 | 
			
		||||
        bg[1] = ( ( d & 0x000F0000 ) >> 12 ) | ( ( d & 0x000F0000 ) >> 16 );
 | 
			
		||||
        bb[0] = ( ( d & 0x0000F000 ) >> 8  ) | ( ( d & 0x0000F000 ) >> 12 );
 | 
			
		||||
        bb[1] = ( ( d & 0x00000F00 ) >> 4  ) | ( ( d & 0x00000F00 ) >> 8  );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    unsigned int tcw[2];
 | 
			
		||||
    tcw[0] = ( d & 0xE0 ) >> 5;
 | 
			
		||||
    tcw[1] = ( d & 0x1C ) >> 2;
 | 
			
		||||
 | 
			
		||||
    uint32_t b1 = ( d >> 32 ) & 0xFFFF;
 | 
			
		||||
    uint32_t b2 = ( d >> 48 );
 | 
			
		||||
 | 
			
		||||
    b1 = ( b1 | ( b1 << 8 ) ) & 0x00FF00FF;
 | 
			
		||||
    b1 = ( b1 | ( b1 << 4 ) ) & 0x0F0F0F0F;
 | 
			
		||||
    b1 = ( b1 | ( b1 << 2 ) ) & 0x33333333;
 | 
			
		||||
    b1 = ( b1 | ( b1 << 1 ) ) & 0x55555555;
 | 
			
		||||
 | 
			
		||||
    b2 = ( b2 | ( b2 << 8 ) ) & 0x00FF00FF;
 | 
			
		||||
    b2 = ( b2 | ( b2 << 4 ) ) & 0x0F0F0F0F;
 | 
			
		||||
    b2 = ( b2 | ( b2 << 2 ) ) & 0x33333333;
 | 
			
		||||
    b2 = ( b2 | ( b2 << 1 ) ) & 0x55555555;
 | 
			
		||||
 | 
			
		||||
    uint32_t idx = b1 | ( b2 << 1 );
 | 
			
		||||
 | 
			
		||||
    const int32_t base = alpha >> 56;
 | 
			
		||||
    const int32_t mul = ( alpha >> 52 ) & 0xF;
 | 
			
		||||
    const auto atbl = g_alpha[( alpha >> 48 ) & 0xF];
 | 
			
		||||
 | 
			
		||||
    if( d & 0x1 )
 | 
			
		||||
    {
 | 
			
		||||
        for( int i=0; i<4; i++ )
 | 
			
		||||
        {
 | 
			
		||||
            for( int j=0; j<4; j++ )
 | 
			
		||||
            {
 | 
			
		||||
                const auto mod = g_table[tcw[j/2]][idx & 0x3];
 | 
			
		||||
                const auto r = br[j/2] + mod;
 | 
			
		||||
                const auto g = bg[j/2] + mod;
 | 
			
		||||
                const auto b = bb[j/2] + mod;
 | 
			
		||||
                const auto amod = atbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
 | 
			
		||||
                const uint32_t a = clampu8( base + amod * mul );
 | 
			
		||||
                if( ( ( r | g | b ) & ~0xFF ) == 0 )
 | 
			
		||||
                {
 | 
			
		||||
                    dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | ( a << 24 );
 | 
			
		||||
                }
 | 
			
		||||
                else
 | 
			
		||||
                {
 | 
			
		||||
                    const auto rc = clampu8( r );
 | 
			
		||||
                    const auto gc = clampu8( g );
 | 
			
		||||
                    const auto bc = clampu8( b );
 | 
			
		||||
                    dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | ( a << 24 );
 | 
			
		||||
                }
 | 
			
		||||
                idx >>= 2;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
        for( int i=0; i<4; i++ )
 | 
			
		||||
        {
 | 
			
		||||
            const auto tbl = g_table[tcw[i/2]];
 | 
			
		||||
            const auto cr = br[i/2];
 | 
			
		||||
            const auto cg = bg[i/2];
 | 
			
		||||
            const auto cb = bb[i/2];
 | 
			
		||||
 | 
			
		||||
            for( int j=0; j<4; j++ )
 | 
			
		||||
            {
 | 
			
		||||
                const auto mod = tbl[idx & 0x3];
 | 
			
		||||
                const auto r = cr + mod;
 | 
			
		||||
                const auto g = cg + mod;
 | 
			
		||||
                const auto b = cb + mod;
 | 
			
		||||
                const auto amod = atbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
 | 
			
		||||
                const uint32_t a = clampu8( base + amod * mul );
 | 
			
		||||
                if( ( ( r | g | b ) & ~0xFF ) == 0 )
 | 
			
		||||
                {
 | 
			
		||||
                    dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | ( a << 24 );
 | 
			
		||||
                }
 | 
			
		||||
                else
 | 
			
		||||
                {
 | 
			
		||||
                    const auto rc = clampu8( r );
 | 
			
		||||
                    const auto gc = clampu8( g );
 | 
			
		||||
                    const auto bc = clampu8( b );
 | 
			
		||||
                    dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | ( a << 24 );
 | 
			
		||||
                }
 | 
			
		||||
                idx >>= 2;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static etcpak_force_inline void DecodeRPart( uint64_t r, uint32_t* dst, uint32_t w )
 | 
			
		||||
{
 | 
			
		||||
    r = _bswap64( r );
 | 
			
		||||
 | 
			
		||||
    const int32_t base = ( r >> 56 )*8+4;
 | 
			
		||||
    const int32_t mul = ( r >> 52 ) & 0xF;
 | 
			
		||||
    const auto atbl = g_alpha[( r >> 48 ) & 0xF];
 | 
			
		||||
 | 
			
		||||
    for( int i=0; i<4; i++ )
 | 
			
		||||
    {
 | 
			
		||||
        for ( int j=0; j<4; j++ )
 | 
			
		||||
        {
 | 
			
		||||
            const auto amod = atbl[(r >> ( 45 - j*3 - i*12 )) & 0x7];
 | 
			
		||||
            const uint32_t rc = clampu8( ( base + amod * g_alpha11Mul[mul] )/8 );
 | 
			
		||||
            dst[j*w+i] = rc | 0xFF000000;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static etcpak_force_inline void DecodeRGPart( uint64_t r, uint64_t g, uint32_t* dst, uint32_t w )
 | 
			
		||||
{
 | 
			
		||||
    r = _bswap64( r );
 | 
			
		||||
    g = _bswap64( g );
 | 
			
		||||
 | 
			
		||||
    const int32_t rbase = ( r >> 56 )*8+4;
 | 
			
		||||
    const int32_t rmul = ( r >> 52 ) & 0xF;
 | 
			
		||||
    const auto rtbl = g_alpha[( r >> 48 ) & 0xF];
 | 
			
		||||
 | 
			
		||||
    const int32_t gbase = ( g >> 56 )*8+4;
 | 
			
		||||
    const int32_t gmul = ( g >> 52 ) & 0xF;
 | 
			
		||||
    const auto gtbl = g_alpha[( g >> 48 ) & 0xF];
 | 
			
		||||
 | 
			
		||||
    for( int i=0; i<4; i++ )
 | 
			
		||||
    {
 | 
			
		||||
        for( int j=0; j<4; j++ )
 | 
			
		||||
        {
 | 
			
		||||
            const auto rmod = rtbl[(r >> ( 45 - j*3 - i*12 )) & 0x7];
 | 
			
		||||
            const uint32_t rc = clampu8( ( rbase + rmod * g_alpha11Mul[rmul] )/8 );
 | 
			
		||||
 | 
			
		||||
            const auto gmod = gtbl[(g >> ( 45 - j*3 - i*12 )) & 0x7];
 | 
			
		||||
            const uint32_t gc = clampu8( ( gbase + gmod * g_alpha11Mul[gmul] )/8 );
 | 
			
		||||
 | 
			
		||||
            dst[j*w+i] = rc | (gc << 8) | 0xFF000000;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void DecodeRBlock( const void* src, void* dst, size_t width )
 | 
			
		||||
{
 | 
			
		||||
	uint64_t* srcPtr = (uint64_t*)src;
 | 
			
		||||
    uint64_t r = *srcPtr++;
 | 
			
		||||
    DecodeRPart( r, (uint32_t*)dst, width );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void DecodeRGBlock( const void* src, void* dst, size_t width )
 | 
			
		||||
{
 | 
			
		||||
	uint64_t* srcPtr = (uint64_t*)src;
 | 
			
		||||
    uint64_t r = *srcPtr++;
 | 
			
		||||
    uint64_t g = *srcPtr++;
 | 
			
		||||
    DecodeRGPart( r, g, (uint32_t*)dst, width );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void DecodeRGBBlock( const void* src, void* dst, size_t width )
 | 
			
		||||
{
 | 
			
		||||
	uint64_t* srcPtr = (uint64_t*)src;
 | 
			
		||||
    uint64_t d = *srcPtr++;
 | 
			
		||||
    DecodeRGBPart( d, (uint32_t*)dst, width );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void DecodeRGBABlock( const void* src, void* dst, size_t width )
 | 
			
		||||
{
 | 
			
		||||
	uint64_t* srcPtr = (uint64_t*)src;
 | 
			
		||||
    uint64_t a = *srcPtr++;
 | 
			
		||||
    uint64_t d = *srcPtr++;
 | 
			
		||||
    DecodeRGBAPart( d, a, (uint32_t*)dst, width );
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										12
									
								
								thirdparty/etcpak/DecodeRGB.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								thirdparty/etcpak/DecodeRGB.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,12 @@
 | 
			
		||||
#ifndef __DECODERGB_HPP__
 | 
			
		||||
#define __DECODERGB_HPP__
 | 
			
		||||
 | 
			
		||||
#include <stddef.h>
 | 
			
		||||
#include <stdint.h>
 | 
			
		||||
 | 
			
		||||
void DecodeRBlock( const void* src, void* dst, size_t width );
 | 
			
		||||
void DecodeRGBlock( const void* src, void* dst, size_t width );
 | 
			
		||||
void DecodeRGBBlock( const void* src, void* dst, size_t width );
 | 
			
		||||
void DecodeRGBABlock( const void* src, void* dst, size_t width );
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
		Reference in New Issue
	
	Block a user