1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-08 12:40:44 +00:00

embree: Update to 4.3.1

This commit is contained in:
Jakub Mateusz Marcowski
2024-02-24 12:40:55 +01:00
committed by Jakub Marcowski
parent d2f9245ddc
commit c43eab55a4
219 changed files with 12630 additions and 6085 deletions

View File

@@ -2,6 +2,9 @@
// SPDX-License-Identifier: Apache-2.0
#include "device.h"
#include "../../common/tasking/taskscheduler.h"
#include "../hash.h"
#include "scene_triangle_mesh.h"
#include "scene_user_geometry.h"
@@ -19,9 +22,12 @@
#include "../bvh/bvh4_factory.h"
#include "../bvh/bvh8_factory.h"
#include "../../common/tasking/taskscheduler.h"
#include "../../common/sys/alloc.h"
#if defined(EMBREE_SYCL_SUPPORT)
# include "../level_zero/ze_wrapper.h"
#endif
namespace embree
{
/*! some global variables that can be set via rtcSetParameter1i for debugging purposes */
@@ -30,13 +36,18 @@ namespace embree
ssize_t Device::debug_int2 = 0;
ssize_t Device::debug_int3 = 0;
DECLARE_SYMBOL2(RayStreamFilterFuncs,rayStreamFilterFuncs);
static MutexSys g_mutex;
static std::map<Device*,size_t> g_cache_size_map;
static std::map<Device*,size_t> g_num_threads_map;
struct TaskArena
{
#if USE_TASK_ARENA
std::unique_ptr<tbb::task_arena> arena;
#endif
};
Device::Device (const char* cfg)
Device::Device (const char* cfg) : arena(new TaskArena())
{
/* check that CPU supports lowest ISA */
if (!hasISA(ISA)) {
@@ -48,12 +59,12 @@ namespace embree
case CPU::UNKNOWN: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD128; break;
case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD128; break;
case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD128; break;
case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
@@ -66,11 +77,7 @@ namespace embree
case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break;
case CPU::XEON_PHI_KNIGHTS_MILL : frequency_level = FREQUENCY_SIMD512; break;
case CPU::XEON_PHI_KNIGHTS_LANDING: frequency_level = FREQUENCY_SIMD512; break;
#if defined(__APPLE__)
case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break; // Apple M1 supports high throughput for SIMD4
#else
case CPU::ARM: frequency_level = FREQUENCY_SIMD128; break;
#endif
case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break;
}
/* initialize global state */
@@ -126,13 +133,6 @@ namespace embree
/* setup tasking system */
initTaskingSystem(numThreads);
/* ray stream SOA to AOS conversion */
#if defined(EMBREE_RAY_PACKETS)
RayStreamFilterFuncsType rayStreamFilterFuncs;
SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(enabled_cpu_features,rayStreamFilterFuncs);
rayStreamFilters = rayStreamFilterFuncs();
#endif
}
Device::~Device ()
@@ -174,6 +174,9 @@ namespace embree
#if defined (EMBREE_BACKFACE_CULLING_CURVES)
v += "backfacecullingcurves ";
#endif
#if defined (EMBREE_BACKFACE_CULLING_SPHERES)
v += "backfacecullingspheres ";
#endif
#if defined(EMBREE_FILTER_FUNCTION)
v += "intersection_filter ";
#endif
@@ -367,7 +370,7 @@ namespace embree
#if USE_TASK_ARENA
const size_t nThreads = min(maxNumThreads,TaskScheduler::threadCount());
const size_t uThreads = min(max(numUserThreads,(size_t)1),nThreads);
arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads));
arena->arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads));
#endif
}
@@ -386,10 +389,23 @@ namespace embree
TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
}
#if USE_TASK_ARENA
arena.reset();
arena->arena.reset();
#endif
}
void Device::execute(bool join, const std::function<void()>& func)
{
#if USE_TASK_ARENA
if (join) {
arena->arena->execute(func);
}
else
#endif
{
func();
}
}
void Device::setProperty(const RTCDeviceProperty prop, ssize_t val)
{
/* hidden internal properties */
@@ -450,12 +466,6 @@ namespace embree
case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return 0;
#endif
#if defined(EMBREE_RAY_PACKETS)
case RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED: return 0;
#endif
#if defined(EMBREE_RAY_MASK)
case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 1;
#else
@@ -474,6 +484,12 @@ namespace embree
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 0;
#endif
#if defined(EMBREE_BACKFACE_CULLING_SPHERES)
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 1;
#else
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 0;
#endif
#if defined(EMBREE_COMPACT_POLYS)
case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 1;
#else
@@ -557,4 +573,158 @@ namespace embree
default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break;
};
}
void* Device::malloc(size_t size, size_t align) {
return alignedMalloc(size,align);
}
void Device::free(void* ptr) {
alignedFree(ptr);
}
#if defined(EMBREE_SYCL_SUPPORT)
DeviceGPU::DeviceGPU(sycl::context sycl_context, const char* cfg)
: Device(cfg), gpu_context(sycl_context)
{
/* initialize ZeWrapper */
if (ZeWrapper::init() != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZeWrapper");
/* take first device as default device */
auto devices = gpu_context.get_devices();
if (devices.size() == 0)
throw_RTCError(RTC_ERROR_UNKNOWN, "SYCL context contains no device");
gpu_device = devices[0];
/* check if RTAS build extension is available */
sycl::platform platform = gpu_device.get_platform();
ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);
uint32_t count = 0;
std::vector<ze_driver_extension_properties_t> extensions;
ze_result_t result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
if (result != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
extensions.resize(count);
result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
if (result != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
#if defined(EMBREE_SYCL_L0_RTAS_BUILDER)
bool ze_rtas_builder = false;
for (uint32_t i=0; i<extensions.size(); i++)
{
if (strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0)
ze_rtas_builder = true;
}
if (!ze_rtas_builder)
throw_RTCError(RTC_ERROR_UNKNOWN, "ZE_experimental_rtas_builder extension not found");
result = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::LEVEL_ZERO);
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot load ZE_experimental_rtas_builder extension");
if (result != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZE_experimental_rtas_builder extension");
#else
ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::INTERNAL);
#endif
if (State::verbosity(1))
{
if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL)
std::cout << " Internal RTAS Builder" << std::endl;
else
std::cout << " Level Zero RTAS Builder" << std::endl;
}
/* check if extension library can get loaded */
ze_rtas_parallel_operation_exp_handle_t hParallelOperation;
result = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver, &hParallelOperation);
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
throw_RTCError(RTC_ERROR_UNKNOWN, "Level Zero RTAS Build Extension cannot get loaded");
if (result == ZE_RESULT_SUCCESS)
ZeWrapper::zeRTASParallelOperationDestroyExp(hParallelOperation);
gpu_maxWorkGroupSize = getGPUDevice().get_info<sycl::info::device::max_work_group_size>();
gpu_maxComputeUnits = getGPUDevice().get_info<sycl::info::device::max_compute_units>();
if (State::verbosity(1))
{
sycl::platform platform = gpu_context.get_platform();
std::cout << " Platform : " << platform.get_info<sycl::info::platform::name>() << std::endl;
std::cout << " Device : " << getGPUDevice().get_info<sycl::info::device::name>() << std::endl;
std::cout << " Max Work Group Size : " << gpu_maxWorkGroupSize << std::endl;
std::cout << " Max Compute Units : " << gpu_maxComputeUnits << std::endl;
std::cout << std::endl;
}
dispatchGlobalsPtr = zeRTASInitExp(gpu_device, gpu_context);
}
DeviceGPU::~DeviceGPU()
{
rthwifCleanup(this,dispatchGlobalsPtr,gpu_context);
}
void DeviceGPU::enter() {
enableUSMAllocEmbree(&gpu_context,&gpu_device);
}
void DeviceGPU::leave() {
disableUSMAllocEmbree();
}
void* DeviceGPU::malloc(size_t size, size_t align) {
return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EMBREE_USM_SHARED_DEVICE_READ_ONLY);
}
void DeviceGPU::free(void* ptr) {
alignedSYCLFree(&gpu_context,ptr);
}
void DeviceGPU::setSYCLDevice(const sycl::device sycl_device_in) {
gpu_device = sycl_device_in;
}
#endif
DeviceEnterLeave::DeviceEnterLeave (RTCDevice hdevice)
: device((Device*)hdevice)
{
assert(device);
device->refInc();
device->enter();
}
DeviceEnterLeave::DeviceEnterLeave (RTCScene hscene)
: device(((Scene*)hscene)->device)
{
assert(device);
device->refInc();
device->enter();
}
DeviceEnterLeave::DeviceEnterLeave (RTCGeometry hgeometry)
: device(((Geometry*)hgeometry)->device)
{
assert(device);
device->refInc();
device->enter();
}
DeviceEnterLeave::DeviceEnterLeave (RTCBuffer hbuffer)
: device(((Buffer*)hbuffer)->device)
{
assert(device);
device->refInc();
device->enter();
}
DeviceEnterLeave::~DeviceEnterLeave() {
device->leave();
device->refDec();
}
}