1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-16 14:00:40 +00:00

Upgrade Embree to the latest official release.

Since Embree v3.13.0 supports AARCH64, switch back to the
official repo instead of using Embree-aarch64.

`thirdparty/embree/patches/godot-changes.patch` should now contain
an accurate diff of the changes done to the library.

(cherry picked from commit 767e374dce)
This commit is contained in:
jfons
2021-05-20 12:49:33 +02:00
committed by Rémi Verschelde
parent 2660fafcc0
commit a69cc9f13d
343 changed files with 12085 additions and 10390 deletions

View File

@@ -1,4 +1,4 @@
// Copyright 2009-2020 Intel Corporation
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once

View File

@@ -1,56 +0,0 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_filter.h"
#include "../sys/regression.h"
#include <map>
namespace embree
{
struct parallel_filter_regression_test : public RegressionTest
{
parallel_filter_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
auto pred = [&]( uint32_t v ) { return (v & 0x3) == 0; };
for (size_t N=10; N<1000000; N=size_t(2.1*N))
{
size_t N0 = rand() % N;
/* initialize array with random numbers */
std::vector<uint32_t> src(N);
std::map<uint32_t,int> m;
for (size_t i=0; i<N; i++) src[i] = rand();
/* count elements up */
for (size_t i=N0; i<N; i++)
if (pred(src[i]))
m[src[i]] = 0;
for (size_t i=N0; i<N; i++)
if (pred(src[i]))
m[src[i]]++;
/* filter array */
//size_t M = sequential_filter(src.data(),N0,N,pred);
size_t M = parallel_filter(src.data(),N0,N,size_t(1024),pred);
/* check if filtered data is correct */
for (size_t i=N0; i<M; i++) {
passed &= pred(src[i]);
m[src[i]]--;
}
for (size_t i=N0; i<M; i++)
passed &= (m[src[i]] == 0);
}
return passed;
}
};
parallel_filter_regression_test parallel_filter_regression("parallel_filter_regression");
}

View File

@@ -1,4 +1,4 @@
// Copyright 2009-2020 Intel Corporation
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once

View File

@@ -1,48 +0,0 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_for.h"
#include "../sys/regression.h"
namespace embree
{
struct parallel_for_regression_test : public RegressionTest
{
parallel_for_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
const size_t M = 10;
for (size_t N=10; N<10000000; N=size_t(2.1*N))
{
/* sequentially calculate sum of squares */
size_t sum0 = 0;
for (size_t i=0; i<N; i++) {
sum0 += i*i;
}
/* parallel calculation of sum of squares */
for (size_t m=0; m<M; m++)
{
std::atomic<size_t> sum1(0);
parallel_for( size_t(0), size_t(N), size_t(1024), [&](const range<size_t>& r)
{
size_t s = 0;
for (size_t i=r.begin(); i<r.end(); i++)
s += i*i;
sum1 += s;
});
passed = sum0 == sum1;
}
}
return passed;
}
};
parallel_for_regression_test parallel_for_regression("parallel_for_regression_test");
}

View File

@@ -1,4 +1,4 @@
// Copyright 2009-2020 Intel Corporation
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
@@ -8,12 +8,6 @@
#include "../math/math.h"
#include "../math/range.h"
#if defined(TASKING_GCD) && defined(BUILD_IOS)
#include <dispatch/dispatch.h>
#include <algorithm>
#include <type_traits>
#endif
namespace embree
{
/* parallel_for without range */
@@ -29,28 +23,10 @@ namespace embree
if (!TaskScheduler::wait())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
abort();
// -- GODOT end --
}
#elif defined(TASKING_GCD) && defined(BUILD_IOS)
const size_t baselineNumBlocks = (TaskScheduler::threadCount() > 1)? TaskScheduler::threadCount() : 1;
const size_t length = N;
const size_t blockSize = (length + baselineNumBlocks-1) / baselineNumBlocks;
const size_t numBlocks = (length + blockSize-1) / blockSize;
dispatch_apply(numBlocks, DISPATCH_APPLY_AUTO, ^(size_t currentBlock) {
const size_t start = (currentBlock * blockSize);
const size_t blockLength = std::min(length - start, blockSize);
const size_t end = start + blockLength;
for(size_t i=start; i < end; i++)
{
func(i);
}
});
#elif defined(TASKING_TBB)
#if TBB_INTERFACE_VERSION >= 12002
tbb::task_group_context context;
@@ -60,7 +36,7 @@ namespace embree
if (context.is_group_execution_cancelled())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
abort();
// -- GODOT end --
#else
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
@@ -69,7 +45,7 @@ namespace embree
if (tbb::task::self().is_cancelled())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
abort();
// -- GODOT end --
#endif
@@ -92,28 +68,9 @@ namespace embree
if (!TaskScheduler::wait())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
abort();
// -- GODOT end --
#elif defined(TASKING_GCD) && defined(BUILD_IOS)
const size_t baselineNumBlocks = (TaskScheduler::threadCount() > 1)? 4*TaskScheduler::threadCount() : 1;
const size_t length = last - first;
const size_t blockSizeByThreads = (length + baselineNumBlocks-1) / baselineNumBlocks;
size_t blockSize = std::max<size_t>(minStepSize,blockSizeByThreads);
blockSize += blockSize % 4;
const size_t numBlocks = (length + blockSize-1) / blockSize;
dispatch_apply(numBlocks, DISPATCH_APPLY_AUTO, ^(size_t currentBlock) {
const size_t start = first + (currentBlock * blockSize);
const size_t end = std::min<size_t>(last, start + blockSize);
func( embree::range<Index>(start,end) );
});
#elif defined(TASKING_TBB)
#if TBB_INTERFACE_VERSION >= 12002
tbb::task_group_context context;
@@ -123,7 +80,7 @@ namespace embree
if (context.is_group_execution_cancelled())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
abort();
// -- GODOT end --
#else
tbb::parallel_for(tbb::blocked_range<Index>(first,last,minStepSize),[&](const tbb::blocked_range<Index>& r) {
@@ -132,7 +89,7 @@ namespace embree
if (tbb::task::self().is_cancelled())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
abort();
// -- GODOT end --
#endif
@@ -167,7 +124,7 @@ namespace embree
if (context.is_group_execution_cancelled())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
abort();
// -- GODOT end --
#else
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
@@ -176,7 +133,7 @@ namespace embree
if (tbb::task::self().is_cancelled())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
abort();
// -- GODOT end --
#endif
}
@@ -192,10 +149,10 @@ namespace embree
func(i);
},ap,context);
if (context.is_group_execution_cancelled())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
// -- GODOT end --
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
// -- GODOT end --
#else
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
@@ -203,7 +160,7 @@ namespace embree
if (tbb::task::self().is_cancelled())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
abort();
// -- GODOT end --
#endif
}

View File

@@ -1,63 +0,0 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_for_for.h"
#include "../sys/regression.h"
namespace embree
{
struct parallel_for_for_regression_test : public RegressionTest
{
parallel_for_for_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
/* create vector with random numbers */
size_t sum0 = 0;
size_t K = 0;
const size_t M = 1000;
std::vector<std::vector<size_t>* > array2(M);
for (size_t i=0; i<M; i++) {
const size_t N = rand() % 1024;
K+=N;
array2[i] = new std::vector<size_t>(N);
for (size_t j=0; j<N; j++)
sum0 += (*array2[i])[j] = rand();
}
/* array to test global index */
std::vector<atomic<size_t>> verify_k(K);
for (size_t i=0; i<K; i++) verify_k[i].store(0);
/* add all numbers using parallel_for_for */
std::atomic<size_t> sum1(0);
parallel_for_for( array2, size_t(1), [&](std::vector<size_t>* v, const range<size_t>& r, size_t k) -> size_t
{
size_t s = 0;
for (size_t i=r.begin(); i<r.end(); i++) {
s += (*v)[i];
verify_k[k++]++;
}
sum1 += s;
return sum1;
});
passed &= (sum0 == sum1);
/* check global index */
for (size_t i=0; i<K; i++)
passed &= (verify_k[i] == 1);
/* delete vectors again */
for (size_t i=0; i<array2.size(); i++)
delete array2[i];
return passed;
}
};
parallel_for_for_regression_test parallel_for_for_regression("parallel_for_for_regression_test");
}

View File

@@ -1,4 +1,4 @@
// Copyright 2009-2020 Intel Corporation
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once

View File

@@ -1,85 +0,0 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_for_for_prefix_sum.h"
#include "../sys/regression.h"
namespace embree
{
struct parallel_for_for_prefix_sum_regression_test : public RegressionTest
{
parallel_for_for_prefix_sum_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
/* create vector with random numbers */
const size_t M = 10;
std::vector<atomic<size_t>> flattened;
typedef std::vector<std::vector<size_t>* > ArrayArray;
ArrayArray array2(M);
size_t K = 0;
for (size_t i=0; i<M; i++) {
const size_t N = rand() % 10;
K += N;
array2[i] = new std::vector<size_t>(N);
for (size_t j=0; j<N; j++)
(*array2[i])[j] = rand() % 10;
}
/* array to test global index */
std::vector<atomic<size_t>> verify_k(K);
for (size_t i=0; i<K; i++) verify_k[i].store(0);
ParallelForForPrefixSumState<size_t> state(array2,size_t(1));
/* dry run only counts */
size_t S = parallel_for_for_prefix_sum0( state, array2, size_t(0), [&](std::vector<size_t>* v, const range<size_t>& r, size_t k, size_t i) -> size_t
{
size_t s = 0;
for (size_t i=r.begin(); i<r.end(); i++) {
s += (*v)[i];
verify_k[k++]++;
}
return s;
}, [](size_t v0, size_t v1) { return v0+v1; });
/* create properly sized output array */
flattened.resize(S);
for (auto& a : flattened) a.store(0);
/* now we actually fill the flattened array */
parallel_for_for_prefix_sum1( state, array2, size_t(0), [&](std::vector<size_t>* v, const range<size_t>& r, size_t k, size_t i, const size_t base) -> size_t
{
size_t s = 0;
for (size_t i=r.begin(); i<r.end(); i++) {
for (size_t j=0; j<(*v)[i]; j++) {
flattened[base+s+j]++;
}
s += (*v)[i];
verify_k[k++]++;
}
return s;
}, [](size_t v0, size_t v1) { return v0+v1; });
/* check global index */
for (size_t i=0; i<K; i++)
passed &= (verify_k[i] == 2);
/* check if each element was assigned exactly once */
for (size_t i=0; i<flattened.size(); i++)
passed &= (flattened[i] == 1);
/* delete arrays again */
for (size_t i=0; i<array2.size(); i++)
delete array2[i];
return passed;
}
};
parallel_for_for_prefix_sum_regression_test parallel_for_for_prefix_sum_regression("parallel_for_for_prefix_sum_regression_test");
}

View File

@@ -1,4 +1,4 @@
// Copyright 2009-2020 Intel Corporation
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once

View File

@@ -1,47 +0,0 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_map.h"
#include "../sys/regression.h"
namespace embree
{
struct parallel_map_regression_test : public RegressionTest
{
parallel_map_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
/* create key/value vectors with random numbers */
const size_t N = 10000;
std::vector<uint32_t> keys(N);
std::vector<uint32_t> vals(N);
for (size_t i=0; i<N; i++) keys[i] = 2*unsigned(i)*647382649;
for (size_t i=0; i<N; i++) std::swap(keys[i],keys[rand()%N]);
for (size_t i=0; i<N; i++) vals[i] = 2*rand();
/* create map */
parallel_map<uint32_t,uint32_t> map;
map.init(keys,vals);
/* check that all keys are properly mapped */
for (size_t i=0; i<N; i++) {
const uint32_t* val = map.lookup(keys[i]);
passed &= val && (*val == vals[i]);
}
/* check that these keys are not in the map */
for (size_t i=0; i<N; i++) {
passed &= !map.lookup(keys[i]+1);
}
return passed;
}
};
parallel_map_regression_test parallel_map_regression("parallel_map_regression_test");
}

View File

@@ -1,4 +1,4 @@
// Copyright 2009-2020 Intel Corporation
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once

View File

@@ -1,53 +0,0 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_partition.h"
#include "../sys/regression.h"
namespace embree
{
struct parallel_partition_regression_test : public RegressionTest
{
parallel_partition_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
for (size_t i=0; i<100; i++)
{
/* create random permutation */
size_t N = std::rand() % 1000000;
std::vector<unsigned> array(N);
for (unsigned i=0; i<N; i++) array[i] = i;
for (auto& v : array) std::swap(v,array[std::rand()%array.size()]);
size_t split = std::rand() % (N+1);
/* perform parallel partitioning */
size_t left_sum = 0, right_sum = 0;
size_t mid = parallel_partitioning(array.data(),0,array.size(),0,left_sum,right_sum,
[&] ( size_t i ) { return i < split; },
[] ( size_t& sum, unsigned v) { sum += v; },
[] ( size_t& sum, size_t v) { sum += v; },
128);
/*serial_partitioning(array.data(),0,array.size(),left_sum,right_sum,
[&] ( size_t i ) { return i < split; },
[] ( size_t& left_sum, int v) { left_sum += v; });*/
/* verify result */
passed &= mid == split;
passed &= left_sum == split*(split-1)/2;
passed &= right_sum == N*(N-1)/2-left_sum;
for (size_t i=0; i<split; i++) passed &= array[i] < split;
for (size_t i=split; i<N; i++) passed &= array[i] >= split;
}
return passed;
}
};
parallel_partition_regression_test parallel_partition_regression("parallel_partition_regression_test");
}

View File

@@ -1,4 +1,4 @@
// Copyright 2009-2020 Intel Corporation
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once

View File

@@ -1,48 +0,0 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_prefix_sum.h"
#include "../sys/regression.h"
namespace embree
{
struct parallel_prefix_sum_regression_test : public RegressionTest
{
parallel_prefix_sum_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
const size_t M = 10;
for (size_t N=10; N<10000000; N=size_t(2.1*N))
{
/* initialize array with random numbers */
uint32_t sum0 = 0;
std::vector<uint32_t> src(N);
for (size_t i=0; i<N; i++) {
sum0 += src[i] = rand();
}
/* calculate parallel prefix sum */
std::vector<uint32_t> dst(N);
for (auto& v : dst) v = 0;
for (size_t i=0; i<M; i++) {
uint32_t sum1 = parallel_prefix_sum(src,dst,N,0,std::plus<uint32_t>());
passed &= (sum0 == sum1);
}
/* check if prefix sum is correct */
for (size_t i=0, sum=0; i<N; sum+=src[i++])
passed &= ((uint32_t)sum == dst[i]);
}
return passed;
}
};
parallel_prefix_sum_regression_test parallel_prefix_sum_regression("parallel_prefix_sum_regression");
}

View File

@@ -1,4 +1,4 @@
// Copyright 2009-2020 Intel Corporation
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once

View File

@@ -1,49 +0,0 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_reduce.h"
#include "../sys/regression.h"
namespace embree
{
struct parallel_reduce_regression_test : public RegressionTest
{
parallel_reduce_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
const size_t M = 10;
for (size_t N=10; N<10000000; N=size_t(2.1*N))
{
/* sequentially calculate sum of squares */
size_t sum0 = 0;
for (size_t i=0; i<N; i++) {
sum0 += i*i;
}
/* parallel calculation of sum of squares */
for (size_t m=0; m<M; m++)
{
size_t sum1 = parallel_reduce( size_t(0), size_t(N), size_t(1024), size_t(0), [&](const range<size_t>& r) -> size_t
{
size_t s = 0;
for (size_t i=r.begin(); i<r.end(); i++)
s += i*i;
return s;
},
[](const size_t v0, const size_t v1) {
return v0+v1;
});
passed = sum0 == sum1;
}
}
return passed;
}
};
parallel_reduce_regression_test parallel_reduce_regression("parallel_reduce_regression_test");
}

View File

@@ -1,4 +1,4 @@
// Copyright 2009-2020 Intel Corporation
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
@@ -43,7 +43,7 @@ namespace embree
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
{
#if defined(TASKING_INTERNAL) || (defined(TASKING_GCD) && defined(BUILD_IOS))
#if defined(TASKING_INTERNAL)
/* fast path for small number of iterations */
Index taskCount = (last-first+minStepSize-1)/minStepSize;

View File

@@ -1,43 +0,0 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_set.h"
#include "../sys/regression.h"
namespace embree
{
struct parallel_set_regression_test : public RegressionTest
{
parallel_set_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
/* create vector with random numbers */
const size_t N = 10000;
std::vector<uint32_t> unsorted(N);
for (size_t i=0; i<N; i++) unsorted[i] = 2*rand();
/* created set from numbers */
parallel_set<uint32_t> sorted;
sorted.init(unsorted);
/* check that all elements are in the set */
for (size_t i=0; i<N; i++) {
passed &= sorted.lookup(unsorted[i]);
}
/* check that these elements are not in the set */
for (size_t i=0; i<N; i++) {
passed &= !sorted.lookup(unsorted[i]+1);
}
return passed;
}
};
parallel_set_regression_test parallel_set_regression("parallel_set_regression_test");
}

View File

@@ -1,4 +1,4 @@
// Copyright 2009-2020 Intel Corporation
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once

View File

@@ -1,50 +0,0 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_sort.h"
#include "../sys/regression.h"
namespace embree
{
template<typename Key>
struct RadixSortRegressionTest : public RegressionTest
{
RadixSortRegressionTest(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
const size_t M = 10;
for (size_t N=10; N<1000000; N=size_t(2.1*N))
{
std::vector<Key> src(N); memset(src.data(),0,N*sizeof(Key));
std::vector<Key> tmp(N); memset(tmp.data(),0,N*sizeof(Key));
for (size_t i=0; i<N; i++) src[i] = uint64_t(rand())*uint64_t(rand());
/* calculate checksum */
Key sum0 = 0; for (size_t i=0; i<N; i++) sum0 += src[i];
/* sort numbers */
for (size_t i=0; i<M; i++) {
radix_sort<Key>(src.data(),tmp.data(),N);
}
/* calculate checksum */
Key sum1 = 0; for (size_t i=0; i<N; i++) sum1 += src[i];
if (sum0 != sum1) passed = false;
/* check if numbers are sorted */
for (size_t i=1; i<N; i++)
passed &= src[i-1] <= src[i];
}
return passed;
}
};
RadixSortRegressionTest<uint32_t> test_u32("RadixSortRegressionTestU32");
RadixSortRegressionTest<uint64_t> test_u64("RadixSortRegressionTestU64");
}

View File

@@ -1,13 +1,10 @@
// Copyright 2009-2020 Intel Corporation
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../simd/simd.h"
#include "parallel_for.h"
#if defined(TASKING_GCD) && defined(BUILD_IOS)
#include "../sys/alloc.h"
#endif
#include <algorithm>
namespace embree
@@ -323,7 +320,7 @@ namespace embree
#pragma nounroll
#endif
for (size_t i=startID; i<endID; i++) {
#if defined(__X86_64__) || defined(__aarch64__)
#if defined(__64BIT__)
const size_t index = ((size_t)(Key)src[i] >> (size_t)shift) & (size_t)mask;
#else
const Key index = ((Key)src[i] >> shift) & mask;
@@ -385,7 +382,7 @@ namespace embree
#endif
for (size_t i=startID; i<endID; i++) {
const Ty elt = src[i];
#if defined(__X86_64__) || defined(__aarch64__)
#if defined(__64BIT__)
const size_t index = ((size_t)(Key)src[i] >> (size_t)shift) & (size_t)mask;
#else
const size_t index = ((Key)src[i] >> shift) & mask;