You've already forked godot
mirror of
https://github.com/godotengine/godot.git
synced 2025-11-08 12:40:44 +00:00
embree: Update to 4.3.1
This commit is contained in:
committed by
Jakub Marcowski
parent
d2f9245ddc
commit
c43eab55a4
228
thirdparty/embree/kernels/common/device.cpp
vendored
228
thirdparty/embree/kernels/common/device.cpp
vendored
@@ -2,6 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "device.h"
|
||||
|
||||
#include "../../common/tasking/taskscheduler.h"
|
||||
|
||||
#include "../hash.h"
|
||||
#include "scene_triangle_mesh.h"
|
||||
#include "scene_user_geometry.h"
|
||||
@@ -19,9 +22,12 @@
|
||||
#include "../bvh/bvh4_factory.h"
|
||||
#include "../bvh/bvh8_factory.h"
|
||||
|
||||
#include "../../common/tasking/taskscheduler.h"
|
||||
#include "../../common/sys/alloc.h"
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
# include "../level_zero/ze_wrapper.h"
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! some global variables that can be set via rtcSetParameter1i for debugging purposes */
|
||||
@@ -30,13 +36,18 @@ namespace embree
|
||||
ssize_t Device::debug_int2 = 0;
|
||||
ssize_t Device::debug_int3 = 0;
|
||||
|
||||
DECLARE_SYMBOL2(RayStreamFilterFuncs,rayStreamFilterFuncs);
|
||||
|
||||
static MutexSys g_mutex;
|
||||
static std::map<Device*,size_t> g_cache_size_map;
|
||||
static std::map<Device*,size_t> g_num_threads_map;
|
||||
|
||||
struct TaskArena
|
||||
{
|
||||
#if USE_TASK_ARENA
|
||||
std::unique_ptr<tbb::task_arena> arena;
|
||||
#endif
|
||||
};
|
||||
|
||||
Device::Device (const char* cfg)
|
||||
Device::Device (const char* cfg) : arena(new TaskArena())
|
||||
{
|
||||
/* check that CPU supports lowest ISA */
|
||||
if (!hasISA(ISA)) {
|
||||
@@ -48,12 +59,12 @@ namespace embree
|
||||
case CPU::UNKNOWN: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::XEON_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::CORE_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD128; break;
|
||||
case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD128; break;
|
||||
case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD128; break;
|
||||
case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
|
||||
case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::XEON_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
|
||||
case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
|
||||
case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::XEON_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::CORE_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::XEON_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
|
||||
@@ -66,11 +77,7 @@ namespace embree
|
||||
case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break;
|
||||
case CPU::XEON_PHI_KNIGHTS_MILL : frequency_level = FREQUENCY_SIMD512; break;
|
||||
case CPU::XEON_PHI_KNIGHTS_LANDING: frequency_level = FREQUENCY_SIMD512; break;
|
||||
#if defined(__APPLE__)
|
||||
case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break; // Apple M1 supports high throughput for SIMD4
|
||||
#else
|
||||
case CPU::ARM: frequency_level = FREQUENCY_SIMD128; break;
|
||||
#endif
|
||||
case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break;
|
||||
}
|
||||
|
||||
/* initialize global state */
|
||||
@@ -126,13 +133,6 @@ namespace embree
|
||||
|
||||
/* setup tasking system */
|
||||
initTaskingSystem(numThreads);
|
||||
|
||||
/* ray stream SOA to AOS conversion */
|
||||
#if defined(EMBREE_RAY_PACKETS)
|
||||
RayStreamFilterFuncsType rayStreamFilterFuncs;
|
||||
SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(enabled_cpu_features,rayStreamFilterFuncs);
|
||||
rayStreamFilters = rayStreamFilterFuncs();
|
||||
#endif
|
||||
}
|
||||
|
||||
Device::~Device ()
|
||||
@@ -174,6 +174,9 @@ namespace embree
|
||||
#if defined (EMBREE_BACKFACE_CULLING_CURVES)
|
||||
v += "backfacecullingcurves ";
|
||||
#endif
|
||||
#if defined (EMBREE_BACKFACE_CULLING_SPHERES)
|
||||
v += "backfacecullingspheres ";
|
||||
#endif
|
||||
#if defined(EMBREE_FILTER_FUNCTION)
|
||||
v += "intersection_filter ";
|
||||
#endif
|
||||
@@ -367,7 +370,7 @@ namespace embree
|
||||
#if USE_TASK_ARENA
|
||||
const size_t nThreads = min(maxNumThreads,TaskScheduler::threadCount());
|
||||
const size_t uThreads = min(max(numUserThreads,(size_t)1),nThreads);
|
||||
arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads));
|
||||
arena->arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads));
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -386,10 +389,23 @@ namespace embree
|
||||
TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
|
||||
}
|
||||
#if USE_TASK_ARENA
|
||||
arena.reset();
|
||||
arena->arena.reset();
|
||||
#endif
|
||||
}
|
||||
|
||||
void Device::execute(bool join, const std::function<void()>& func)
|
||||
{
|
||||
#if USE_TASK_ARENA
|
||||
if (join) {
|
||||
arena->arena->execute(func);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
func();
|
||||
}
|
||||
}
|
||||
|
||||
void Device::setProperty(const RTCDeviceProperty prop, ssize_t val)
|
||||
{
|
||||
/* hidden internal properties */
|
||||
@@ -450,12 +466,6 @@ namespace embree
|
||||
case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_RAY_PACKETS)
|
||||
case RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED: return 1;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_RAY_MASK)
|
||||
case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 1;
|
||||
#else
|
||||
@@ -474,6 +484,12 @@ namespace embree
|
||||
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_BACKFACE_CULLING_SPHERES)
|
||||
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 1;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_COMPACT_POLYS)
|
||||
case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 1;
|
||||
#else
|
||||
@@ -557,4 +573,158 @@ namespace embree
|
||||
default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break;
|
||||
};
|
||||
}
|
||||
|
||||
void* Device::malloc(size_t size, size_t align) {
|
||||
return alignedMalloc(size,align);
|
||||
}
|
||||
|
||||
void Device::free(void* ptr) {
|
||||
alignedFree(ptr);
|
||||
}
|
||||
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
|
||||
DeviceGPU::DeviceGPU(sycl::context sycl_context, const char* cfg)
|
||||
: Device(cfg), gpu_context(sycl_context)
|
||||
{
|
||||
/* initialize ZeWrapper */
|
||||
if (ZeWrapper::init() != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZeWrapper");
|
||||
|
||||
/* take first device as default device */
|
||||
auto devices = gpu_context.get_devices();
|
||||
if (devices.size() == 0)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "SYCL context contains no device");
|
||||
gpu_device = devices[0];
|
||||
|
||||
/* check if RTAS build extension is available */
|
||||
sycl::platform platform = gpu_device.get_platform();
|
||||
ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);
|
||||
|
||||
uint32_t count = 0;
|
||||
std::vector<ze_driver_extension_properties_t> extensions;
|
||||
ze_result_t result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
|
||||
if (result != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
|
||||
|
||||
extensions.resize(count);
|
||||
result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
|
||||
if (result != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
|
||||
|
||||
#if defined(EMBREE_SYCL_L0_RTAS_BUILDER)
|
||||
bool ze_rtas_builder = false;
|
||||
for (uint32_t i=0; i<extensions.size(); i++)
|
||||
{
|
||||
if (strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0)
|
||||
ze_rtas_builder = true;
|
||||
}
|
||||
if (!ze_rtas_builder)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "ZE_experimental_rtas_builder extension not found");
|
||||
|
||||
result = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::LEVEL_ZERO);
|
||||
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot load ZE_experimental_rtas_builder extension");
|
||||
if (result != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZE_experimental_rtas_builder extension");
|
||||
#else
|
||||
ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::INTERNAL);
|
||||
#endif
|
||||
|
||||
if (State::verbosity(1))
|
||||
{
|
||||
if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL)
|
||||
std::cout << " Internal RTAS Builder" << std::endl;
|
||||
else
|
||||
std::cout << " Level Zero RTAS Builder" << std::endl;
|
||||
}
|
||||
|
||||
/* check if extension library can get loaded */
|
||||
ze_rtas_parallel_operation_exp_handle_t hParallelOperation;
|
||||
result = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver, &hParallelOperation);
|
||||
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "Level Zero RTAS Build Extension cannot get loaded");
|
||||
if (result == ZE_RESULT_SUCCESS)
|
||||
ZeWrapper::zeRTASParallelOperationDestroyExp(hParallelOperation);
|
||||
|
||||
gpu_maxWorkGroupSize = getGPUDevice().get_info<sycl::info::device::max_work_group_size>();
|
||||
gpu_maxComputeUnits = getGPUDevice().get_info<sycl::info::device::max_compute_units>();
|
||||
|
||||
if (State::verbosity(1))
|
||||
{
|
||||
sycl::platform platform = gpu_context.get_platform();
|
||||
std::cout << " Platform : " << platform.get_info<sycl::info::platform::name>() << std::endl;
|
||||
std::cout << " Device : " << getGPUDevice().get_info<sycl::info::device::name>() << std::endl;
|
||||
std::cout << " Max Work Group Size : " << gpu_maxWorkGroupSize << std::endl;
|
||||
std::cout << " Max Compute Units : " << gpu_maxComputeUnits << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
dispatchGlobalsPtr = zeRTASInitExp(gpu_device, gpu_context);
|
||||
}
|
||||
|
||||
DeviceGPU::~DeviceGPU()
|
||||
{
|
||||
rthwifCleanup(this,dispatchGlobalsPtr,gpu_context);
|
||||
}
|
||||
|
||||
void DeviceGPU::enter() {
|
||||
enableUSMAllocEmbree(&gpu_context,&gpu_device);
|
||||
}
|
||||
|
||||
void DeviceGPU::leave() {
|
||||
disableUSMAllocEmbree();
|
||||
}
|
||||
|
||||
void* DeviceGPU::malloc(size_t size, size_t align) {
|
||||
return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EMBREE_USM_SHARED_DEVICE_READ_ONLY);
|
||||
}
|
||||
|
||||
void DeviceGPU::free(void* ptr) {
|
||||
alignedSYCLFree(&gpu_context,ptr);
|
||||
}
|
||||
|
||||
void DeviceGPU::setSYCLDevice(const sycl::device sycl_device_in) {
|
||||
gpu_device = sycl_device_in;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
DeviceEnterLeave::DeviceEnterLeave (RTCDevice hdevice)
|
||||
: device((Device*)hdevice)
|
||||
{
|
||||
assert(device);
|
||||
device->refInc();
|
||||
device->enter();
|
||||
}
|
||||
|
||||
DeviceEnterLeave::DeviceEnterLeave (RTCScene hscene)
|
||||
: device(((Scene*)hscene)->device)
|
||||
{
|
||||
assert(device);
|
||||
device->refInc();
|
||||
device->enter();
|
||||
}
|
||||
|
||||
DeviceEnterLeave::DeviceEnterLeave (RTCGeometry hgeometry)
|
||||
: device(((Geometry*)hgeometry)->device)
|
||||
{
|
||||
assert(device);
|
||||
device->refInc();
|
||||
device->enter();
|
||||
}
|
||||
|
||||
DeviceEnterLeave::DeviceEnterLeave (RTCBuffer hbuffer)
|
||||
: device(((Buffer*)hbuffer)->device)
|
||||
{
|
||||
assert(device);
|
||||
device->refInc();
|
||||
device->enter();
|
||||
}
|
||||
|
||||
DeviceEnterLeave::~DeviceEnterLeave() {
|
||||
device->leave();
|
||||
device->refDec();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user