Changeset View
Changeset View
Standalone View
Standalone View
intern/mikktspace/mikktspace.hh
- This file was added.
| /* SPDX-License-Identifier: Apache-2.0 | |||||
| * | |||||
| * Original C code: | |||||
| * Copyright 2011 by Morten S. Mikkelsen. | |||||
| * | |||||
| * C++ rewrite: | |||||
| * Copyright 2022 Blender Foundation. All rights reserved. | |||||
| */ | |||||
| /** \file | |||||
| * \ingroup mikktspace | |||||
| */ | |||||
| #include <algorithm> | |||||
| #include <cassert> | |||||
| #ifdef WITH_TBB | |||||
| # include <tbb/parallel_for.h> | |||||
brecht: We try to keep Blender building even when TBB is not available, so this should do the same. See… | |||||
| #endif | |||||
| #include "mikk_atomic_hash_set.hh" | |||||
| #include "mikk_float3.hh" | |||||
| #include "mikk_util.hh" | |||||
| namespace mikk { | |||||
| static constexpr uint UNSET_ENTRY = 0xffffffffu; | |||||
| template<typename Mesh> class Mikktspace { | |||||
| struct Triangle { | |||||
| /* Stores neighboring triangle for group assignment. */ | |||||
| std::array<uint, 3> neighbor; | |||||
| /* Stores assigned group of each vertex. */ | |||||
| std::array<uint, 3> group; | |||||
| /* Stores vertex indices that make up the triangle. */ | |||||
| std::array<uint, 3> vertices; | |||||
| /* Computed face tangent, will be accumulated into group. */ | |||||
| float3 tangent; | |||||
| /* Index of the face that this triangle belongs to. */ | |||||
| uint faceIdx; | |||||
| /* Index of the first of this triangle's vertices' TSpaces. */ | |||||
| uint tSpaceIdx; | |||||
| /* Stores mapping from this triangle's vertices to the original | |||||
| * face's vertices (relevant for quads). */ | |||||
| std::array<uint8_t, 3> faceVertex; | |||||
| // flags | |||||
| bool markDegenerate : 1; | |||||
| bool quadOneDegenTri : 1; | |||||
| bool groupWithAny : 1; | |||||
| bool orientPreserving : 1; | |||||
| Triangle(uint faceIdx_, uint tSpaceIdx_) | |||||
| : tangent{0.0f}, | |||||
| faceIdx{faceIdx_}, | |||||
| tSpaceIdx{tSpaceIdx_}, | |||||
| markDegenerate{false}, | |||||
| quadOneDegenTri{false}, | |||||
| groupWithAny{true}, | |||||
| orientPreserving{false} | |||||
| { | |||||
| neighbor.fill(UNSET_ENTRY); | |||||
| group.fill(UNSET_ENTRY); | |||||
| } | |||||
| void setVertices(uint8_t i0, uint8_t i1, uint8_t i2) | |||||
| { | |||||
| faceVertex[0] = i0; | |||||
| faceVertex[1] = i1; | |||||
| faceVertex[2] = i2; | |||||
| vertices[0] = pack_index(faceIdx, i0); | |||||
| vertices[1] = pack_index(faceIdx, i1); | |||||
| vertices[2] = pack_index(faceIdx, i2); | |||||
| } | |||||
| }; | |||||
| struct Group { | |||||
| float3 tangent; | |||||
| uint vertexRepresentative; | |||||
| bool orientPreserving; | |||||
| Group(uint vertexRepresentative_, bool orientPreserving_) | |||||
| : tangent{0.0f}, | |||||
| vertexRepresentative{vertexRepresentative_}, | |||||
| orientPreserving{orientPreserving_} | |||||
| { | |||||
| } | |||||
| void normalizeTSpace() | |||||
| { | |||||
| tangent = tangent.normalize(); | |||||
| } | |||||
| void accumulateTSpaceAtomic(float3 v_tangent) | |||||
| { | |||||
| float_add_atomic(&tangent.x, v_tangent.x); | |||||
| float_add_atomic(&tangent.y, v_tangent.y); | |||||
| float_add_atomic(&tangent.z, v_tangent.z); | |||||
| } | |||||
| void accumulateTSpace(float3 v_tangent) | |||||
| { | |||||
| tangent += v_tangent; | |||||
| } | |||||
| }; | |||||
| struct TSpace { | |||||
| float3 tangent = float3(1.0f, 0.0f, 0.0f); | |||||
| uint counter = 0; | |||||
| bool orientPreserving = false; | |||||
| void accumulateGroup(const Group &group) | |||||
| { | |||||
| assert(counter < 2); | |||||
| if (counter == 0) { | |||||
| tangent = group.tangent; | |||||
| } | |||||
| else if (tangent == group.tangent) { | |||||
| // this if is important. Due to floating point precision | |||||
| // averaging when ts0==ts1 will cause a slight difference | |||||
| // which results in tangent space splits later on, so do nothing | |||||
| } | |||||
| else { | |||||
| tangent = (tangent + group.tangent).normalize(); | |||||
| } | |||||
| counter++; | |||||
| orientPreserving = group.orientPreserving; | |||||
| } | |||||
| }; | |||||
| Mesh &mesh; | |||||
| std::vector<Triangle> triangles; | |||||
| std::vector<TSpace> tSpaces; | |||||
| std::vector<Group> groups; | |||||
| uint nrTSpaces, nrFaces, nrTriangles, totalTriangles; | |||||
| int nrThreads; | |||||
| bool isParallel; | |||||
| public: | |||||
| Mikktspace(Mesh &mesh_) : mesh(mesh_) | |||||
| { | |||||
| } | |||||
| void genTangSpace() | |||||
| { | |||||
| nrFaces = (uint)mesh.GetNumFaces(); | |||||
| #ifdef WITH_TBB | |||||
| nrThreads = tbb::this_task_arena::max_concurrency(); | |||||
| isParallel = (nrThreads > 1) && (nrFaces > 10000); | |||||
| #else | |||||
| nrThreads = 1; | |||||
| isParallel = false; | |||||
| #endif | |||||
| // make an initial triangle --> face index list | |||||
| generateInitialVerticesIndexList(); | |||||
| if (nrTriangles == 0) { | |||||
| return; | |||||
| } | |||||
| // make a welded index list of identical positions and attributes (pos, norm, texc) | |||||
| generateSharedVerticesIndexList(); | |||||
| // mark all triangle pairs that belong to a quad with only one | |||||
| // good triangle. These need special treatment in degenEpilogue(). | |||||
| // Additionally, move all good triangles to the start of | |||||
| // triangles[] without changing order and | |||||
| // put the degenerate triangles last. | |||||
| degenPrologue(); | |||||
| // evaluate triangle level attributes and neighbor list | |||||
| initTriangle(); | |||||
| // match up edge pairs | |||||
| buildNeighbors(); | |||||
| // based on the 4 rules, identify groups based on connectivity | |||||
| build4RuleGroups(); | |||||
| // make tspaces, each group is split up into subgroups. | |||||
| // Finally a tangent space is made for every resulting subgroup | |||||
| generateTSpaces(); | |||||
| // degenerate quads with one good triangle will be fixed by copying a space from | |||||
| // the good triangle to the coinciding vertex. | |||||
| // all other degenerate triangles will just copy a space from any good triangle | |||||
| // with the same welded index in vertices[]. | |||||
| degenEpilogue(); | |||||
| uint index = 0; | |||||
| for (uint f = 0; f < nrFaces; f++) { | |||||
| const uint verts = mesh.GetNumVerticesOfFace(f); | |||||
| if (verts != 3 && verts != 4) { | |||||
| continue; | |||||
| } | |||||
| // set data | |||||
| for (uint i = 0; i < verts; i++) { | |||||
| const TSpace &tSpace = tSpaces[index++]; | |||||
| mesh.SetTangentSpace(f, i, tSpace.tangent, tSpace.orientPreserving); | |||||
| } | |||||
| } | |||||
| } | |||||
| protected: | |||||
| template<typename F> void runParallel(uint start, uint end, F func) | |||||
| { | |||||
| #ifdef WITH_TBB | |||||
| if (isParallel) { | |||||
| tbb::parallel_for(start, end, func); | |||||
| } | |||||
Not Done Inline ActionsShould this be a duplicate of threading::parallel_for from BLI_task.hh? It seems that this has already been mentioned, but it should be added that this is not an example, but a unified interface. Whenever possible, you should use threading::parallel_for in C++ instead of recreating it. Also, not sure about tbb::this_task_arena::max_concurrency();, does it seem like it's already set somewhere? Moder: Should this be a duplicate of threading::parallel_for from `BLI_task.hh`? It seems that this… | |||||
Not Done Inline ActionsOh, I didn't notice, this doesn't use BLI, sorry for the extra comment Moder: Oh, I didn't notice, this doesn't use BLI, sorry for the extra comment | |||||
| else | |||||
| #endif | |||||
| { | |||||
| for (uint i = start; i < end; i++) { | |||||
| func(i); | |||||
| } | |||||
| } | |||||
| } | |||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | |||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | |||||
| float3 getPosition(uint vertexID) | |||||
| { | |||||
| uint f, v; | |||||
| unpack_index(f, v, vertexID); | |||||
| return mesh.GetPosition(f, v); | |||||
| } | |||||
| float3 getNormal(uint vertexID) | |||||
| { | |||||
| uint f, v; | |||||
| unpack_index(f, v, vertexID); | |||||
| return mesh.GetNormal(f, v); | |||||
| } | |||||
| float3 getTexCoord(uint vertexID) | |||||
| { | |||||
| uint f, v; | |||||
| unpack_index(f, v, vertexID); | |||||
| return mesh.GetTexCoord(f, v); | |||||
| } | |||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | |||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | |||||
| void generateInitialVerticesIndexList() | |||||
| { | |||||
| nrTriangles = 0; | |||||
| for (uint f = 0; f < nrFaces; f++) { | |||||
| const uint verts = mesh.GetNumVerticesOfFace(f); | |||||
| if (verts == 3) { | |||||
| nrTriangles += 1; | |||||
| } | |||||
| else if (verts == 4) { | |||||
| nrTriangles += 2; | |||||
| } | |||||
| } | |||||
| triangles.reserve(nrTriangles); | |||||
| nrTSpaces = 0; | |||||
| for (uint f = 0; f < nrFaces; f++) { | |||||
| const uint verts = mesh.GetNumVerticesOfFace(f); | |||||
| if (verts != 3 && verts != 4) | |||||
| continue; | |||||
| uint tA = (uint)triangles.size(); | |||||
| triangles.emplace_back(f, nrTSpaces); | |||||
| Triangle &triA = triangles[tA]; | |||||
| if (verts == 3) { | |||||
| triA.setVertices(0, 1, 2); | |||||
| } | |||||
| else { | |||||
| uint tB = (uint)triangles.size(); | |||||
| triangles.emplace_back(f, nrTSpaces); | |||||
| Triangle &triB = triangles[tB]; | |||||
| // need an order independent way to evaluate | |||||
| // tspace on quads. This is done by splitting | |||||
| // along the shortest diagonal. | |||||
| float distSQ_02 = (mesh.GetTexCoord(f, 2) - mesh.GetTexCoord(f, 0)).length_squared(); | |||||
| float distSQ_13 = (mesh.GetTexCoord(f, 3) - mesh.GetTexCoord(f, 1)).length_squared(); | |||||
| bool quadDiagIs_02; | |||||
| if (distSQ_02 != distSQ_13) | |||||
| quadDiagIs_02 = (distSQ_02 < distSQ_13); | |||||
| else { | |||||
| distSQ_02 = (mesh.GetPosition(f, 2) - mesh.GetPosition(f, 0)).length_squared(); | |||||
| distSQ_13 = (mesh.GetPosition(f, 3) - mesh.GetPosition(f, 1)).length_squared(); | |||||
| quadDiagIs_02 = !(distSQ_13 < distSQ_02); | |||||
| } | |||||
| if (quadDiagIs_02) { | |||||
| triA.setVertices(0, 1, 2); | |||||
| triB.setVertices(0, 2, 3); | |||||
| } | |||||
| else { | |||||
| triA.setVertices(0, 1, 3); | |||||
| triB.setVertices(1, 2, 3); | |||||
| } | |||||
| } | |||||
| nrTSpaces += verts; | |||||
| } | |||||
| } | |||||
| struct VertexHash { | |||||
| Mikktspace<Mesh> *mikk; | |||||
| inline uint operator()(const uint &k) const | |||||
| { | |||||
| return hash_float3x3(mikk->getPosition(k), mikk->getNormal(k), mikk->getTexCoord(k)); | |||||
| } | |||||
| }; | |||||
| struct VertexEqual { | |||||
| Mikktspace<Mesh> *mikk; | |||||
| inline bool operator()(const uint &kA, const uint &kB) const | |||||
| { | |||||
| return mikk->getTexCoord(kA) == mikk->getTexCoord(kB) && | |||||
| mikk->getNormal(kA) == mikk->getNormal(kB) && | |||||
| mikk->getPosition(kA) == mikk->getPosition(kB); | |||||
| } | |||||
| }; | |||||
| /* Merge identical vertices. | |||||
| * To find vertices with identical position, normal and texcoord, we calculate a hash of the 9 | |||||
| * values. Then, by sorting based on that hash, identical elements (having identical hashes) will | |||||
| * be moved next to each other. Since there might be hash collisions, the elements of each block | |||||
| * are then compared with each other and duplicates are merged. | |||||
| */ | |||||
| template<bool isAtomic> void generateSharedVerticesIndexList_impl() | |||||
| { | |||||
| uint numVertices = nrTriangles * 3; | |||||
| AtomicHashSet<uint, isAtomic, VertexHash, VertexEqual> set(numVertices, {this}, {this}); | |||||
| runParallel(0u, nrTriangles, [&](uint t) { | |||||
| for (uint i = 0; i < 3; i++) { | |||||
| auto res = set.emplace(triangles[t].vertices[i]); | |||||
| if (!res.second) { | |||||
| triangles[t].vertices[i] = res.first; | |||||
| } | |||||
| } | |||||
| }); | |||||
| } | |||||
| void generateSharedVerticesIndexList() | |||||
| { | |||||
| if (isParallel) { | |||||
| generateSharedVerticesIndexList_impl<true>(); | |||||
| } | |||||
| else { | |||||
| generateSharedVerticesIndexList_impl<false>(); | |||||
| } | |||||
| } | |||||
| ///////////////////////////////////////////////////////////////////////////////////////////// | |||||
| /////////////////////////////////// Degenerate triangles //////////////////////////////////// | |||||
| void degenPrologue() | |||||
| { | |||||
| // Mark all degenerate triangles | |||||
| totalTriangles = nrTriangles; | |||||
| std::atomic<uint> degenTriangles(0); | |||||
| runParallel(0u, totalTriangles, [&](uint t) { | |||||
| const float3 p0 = getPosition(triangles[t].vertices[0]); | |||||
| const float3 p1 = getPosition(triangles[t].vertices[1]); | |||||
| const float3 p2 = getPosition(triangles[t].vertices[2]); | |||||
| if (p0 == p1 || p0 == p2 || p1 == p2) // degenerate | |||||
| { | |||||
| triangles[t].markDegenerate = true; | |||||
| degenTriangles.fetch_add(1); | |||||
| } | |||||
| }); | |||||
| nrTriangles -= degenTriangles.load(); | |||||
| if (totalTriangles == nrTriangles) { | |||||
| return; | |||||
| } | |||||
| // locate quads with only one good triangle | |||||
| runParallel(0u, totalTriangles - 1, [&](uint t) { | |||||
| Triangle &triangleA = triangles[t], &triangleB = triangles[t + 1]; | |||||
| if (triangleA.faceIdx != triangleB.faceIdx) { | |||||
| /* Individual triangle, skip. */ | |||||
| return; | |||||
| } | |||||
| if (triangleA.markDegenerate != triangleB.markDegenerate) { | |||||
| triangleA.quadOneDegenTri = true; | |||||
| triangleB.quadOneDegenTri = true; | |||||
| } | |||||
| }); | |||||
| std::stable_partition(triangles.begin(), triangles.end(), [](const Triangle &tri) { | |||||
| return tri.markDegenerate; | |||||
| }); | |||||
| } | |||||
| void degenEpilogue() | |||||
| { | |||||
| if (nrTriangles == totalTriangles) { | |||||
| return; | |||||
| } | |||||
| std::unordered_map<uint, uint> goodTriangleMap; | |||||
| for (uint t = 0; t < nrTriangles; t++) { | |||||
| for (uint i = 0; i < 3; i++) { | |||||
| goodTriangleMap.emplace(triangles[t].vertices[i], pack_index(t, i)); | |||||
| } | |||||
| } | |||||
| // deal with degenerate triangles | |||||
| // punishment for degenerate triangles is O(nrTriangles) extra memory. | |||||
| for (uint t = nrTriangles; t < totalTriangles; t++) { | |||||
| // degenerate triangles on a quad with one good triangle are skipped | |||||
| // here but processed in the next loop | |||||
| if (triangles[t].quadOneDegenTri) { | |||||
| continue; | |||||
| } | |||||
| for (uint i = 0; i < 3; i++) { | |||||
| const auto entry = goodTriangleMap.find(triangles[t].vertices[i]); | |||||
| if (entry == goodTriangleMap.end()) { | |||||
| // Matching vertex from good triangle is not found. | |||||
| continue; | |||||
| } | |||||
| uint tSrc, iSrc; | |||||
| unpack_index(tSrc, iSrc, entry->second); | |||||
| const uint iSrcVert = triangles[tSrc].faceVertex[iSrc]; | |||||
| const uint iSrcOffs = triangles[tSrc].tSpaceIdx; | |||||
| const uint iDstVert = triangles[t].faceVertex[i]; | |||||
| const uint iDstOffs = triangles[t].tSpaceIdx; | |||||
| // copy tspace | |||||
| tSpaces[iDstOffs + iDstVert] = tSpaces[iSrcOffs + iSrcVert]; | |||||
| } | |||||
| } | |||||
| // deal with degenerate quads with one good triangle | |||||
| for (uint t = 0; t < nrTriangles; t++) { | |||||
| // this triangle belongs to a quad where the | |||||
| // other triangle is degenerate | |||||
| if (!triangles[t].quadOneDegenTri) { | |||||
| continue; | |||||
| } | |||||
| uint vertFlag = (1u << triangles[t].faceVertex[0]) | (1u << triangles[t].faceVertex[1]) | | |||||
| (1u << triangles[t].faceVertex[2]); | |||||
| uint missingFaceVertex = 0; | |||||
| if ((vertFlag & 2) == 0) | |||||
| missingFaceVertex = 1; | |||||
| else if ((vertFlag & 4) == 0) | |||||
| missingFaceVertex = 2; | |||||
| else if ((vertFlag & 8) == 0) | |||||
| missingFaceVertex = 3; | |||||
| uint faceIdx = triangles[t].faceIdx; | |||||
| float3 dstP = mesh.GetPosition(faceIdx, missingFaceVertex); | |||||
| bool found = false; | |||||
| for (uint i = 0; i < 3; i++) { | |||||
| const uint faceVertex = triangles[t].faceVertex[i]; | |||||
| const float3 srcP = mesh.GetPosition(faceIdx, faceVertex); | |||||
| if (srcP == dstP) { | |||||
| const uint offset = triangles[t].tSpaceIdx; | |||||
| tSpaces[offset + missingFaceVertex] = tSpaces[offset + faceVertex]; | |||||
| found = true; | |||||
| break; | |||||
| } | |||||
| } | |||||
| assert(found); | |||||
| (void)found; | |||||
| } | |||||
| } | |||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | |||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | |||||
| // returns the texture area times 2 | |||||
| float calcTexArea(uint tri) | |||||
| { | |||||
| const float3 t1 = getTexCoord(triangles[tri].vertices[0]); | |||||
| const float3 t2 = getTexCoord(triangles[tri].vertices[1]); | |||||
| const float3 t3 = getTexCoord(triangles[tri].vertices[2]); | |||||
| const float t21x = t2.x - t1.x; | |||||
| const float t21y = t2.y - t1.y; | |||||
| const float t31x = t3.x - t1.x; | |||||
| const float t31y = t3.y - t1.y; | |||||
| const float signedAreaSTx2 = t21x * t31y - t21y * t31x; | |||||
| return fabsf(signedAreaSTx2); | |||||
| } | |||||
| void initTriangle() | |||||
| { | |||||
| // triangles[f].iFlag is cleared in generateInitialVerticesIndexList() | |||||
| // which is called before this function. | |||||
| // evaluate first order derivatives | |||||
| runParallel(0u, nrTriangles, [&](uint t) { | |||||
| Triangle &triangle = triangles[t]; | |||||
| // initial values | |||||
| const float3 v1 = getPosition(triangle.vertices[0]); | |||||
| const float3 v2 = getPosition(triangle.vertices[1]); | |||||
| const float3 v3 = getPosition(triangle.vertices[2]); | |||||
| const float3 t1 = getTexCoord(triangle.vertices[0]); | |||||
| const float3 t2 = getTexCoord(triangle.vertices[1]); | |||||
| const float3 t3 = getTexCoord(triangle.vertices[2]); | |||||
| const float t21x = t2.x - t1.x; | |||||
| const float t21y = t2.y - t1.y; | |||||
| const float t31x = t3.x - t1.x; | |||||
| const float t31y = t3.y - t1.y; | |||||
| const float3 d1 = v2 - v1, d2 = v3 - v1; | |||||
| const float signedAreaSTx2 = t21x * t31y - t21y * t31x; | |||||
| const float3 vOs = (t31y * d1) - (t21y * d2); // eq 18 | |||||
| const float3 vOt = (-t31x * d1) + (t21x * d2); // eq 19 | |||||
| triangle.orientPreserving = (signedAreaSTx2 > 0); | |||||
| if (not_zero(signedAreaSTx2)) { | |||||
| const float lenOs2 = vOs.length_squared(); | |||||
| const float lenOt2 = vOt.length_squared(); | |||||
| const float fS = triangle.orientPreserving ? 1.0f : (-1.0f); | |||||
| if (not_zero(lenOs2)) | |||||
| triangle.tangent = vOs * (fS / sqrtf(lenOs2)); | |||||
| // if this is a good triangle | |||||
| if (not_zero(lenOs2) && not_zero(lenOt2)) | |||||
| triangle.groupWithAny = false; | |||||
| } | |||||
| }); | |||||
| // force otherwise healthy quads to a fixed orientation | |||||
| runParallel(0u, nrTriangles - 1, [&](uint t) { | |||||
| Triangle &triangleA = triangles[t], &triangleB = triangles[t + 1]; | |||||
| if (triangleA.faceIdx != triangleB.faceIdx) { | |||||
| // this is not a quad | |||||
| return; | |||||
| } | |||||
| // bad triangles should already have been removed by | |||||
| // degenPrologue(), but just in case check that neither are degenerate | |||||
| if (!(triangleA.markDegenerate || triangleB.markDegenerate)) { | |||||
| // if this happens the quad has extremely bad mapping!! | |||||
| if (triangleA.orientPreserving != triangleB.orientPreserving) { | |||||
| bool chooseOrientFirstTri = false; | |||||
| if (triangleB.groupWithAny) | |||||
| chooseOrientFirstTri = true; | |||||
| else if (calcTexArea(t) >= calcTexArea(t + 1)) | |||||
| chooseOrientFirstTri = true; | |||||
| // force match | |||||
| const uint t0 = chooseOrientFirstTri ? t : (t + 1); | |||||
| const uint t1 = chooseOrientFirstTri ? (t + 1) : t; | |||||
| triangles[t1].orientPreserving = triangles[t0].orientPreserving; | |||||
| } | |||||
| } | |||||
| }); | |||||
| } | |||||
| ///////////////////////////////////////////////////////////////////////////////////////////// | |||||
| /////////////////////////////////////////// Edges /////////////////////////////////////////// | |||||
| struct NeighborShard { | |||||
| struct Entry { | |||||
| Entry(uint32_t key_, uint data_) : key(key_), data(data_) | |||||
| { | |||||
| } | |||||
| uint key, data; | |||||
| }; | |||||
| std::vector<Entry> entries; | |||||
| NeighborShard(size_t capacity) | |||||
| { | |||||
| entries.reserve(capacity); | |||||
| } | |||||
| void buildNeighbors(Mikktspace<Mesh> *mikk) | |||||
| { | |||||
| /* Entries are added by iterating over t, so by using a stable sort, | |||||
| * we don't have to compare based on t as well. */ | |||||
| { | |||||
| std::vector<Entry> tempEntries(entries.size(), {0, 0}); | |||||
| radixsort(entries, tempEntries, [](const Entry &e) { return e.key; }); | |||||
| } | |||||
| for (uint i = 0; i < entries.size(); i++) { | |||||
| const Entry &a = entries[i]; | |||||
| uint tA, iA; | |||||
| unpack_index(tA, iA, a.data); | |||||
| Mikktspace<Mesh>::Triangle &triA = mikk->triangles[tA]; | |||||
| if (triA.neighbor[iA] != UNSET_ENTRY) { | |||||
| continue; | |||||
| } | |||||
| uint i0A = triA.vertices[iA], i1A = triA.vertices[(iA != 2) ? (iA + 1) : 0]; | |||||
| for (uint j = i + 1; j < entries.size(); j++) { | |||||
| const Entry &b = entries[j]; | |||||
| uint tB, iB; | |||||
| unpack_index(tB, iB, b.data); | |||||
| Mikktspace<Mesh>::Triangle &triB = mikk->triangles[tB]; | |||||
| if (b.key != a.key) | |||||
| break; | |||||
| if (triB.neighbor[iB] != UNSET_ENTRY) { | |||||
| continue; | |||||
| } | |||||
| uint i1B = triB.vertices[iB], i0B = triB.vertices[(iB != 2) ? (iB + 1) : 0]; | |||||
| if (i0A == i0B && i1A == i1B) { | |||||
| triA.neighbor[iA] = tB; | |||||
| triB.neighbor[iB] = tA; | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| }; | |||||
| void buildNeighbors() | |||||
| { | |||||
| /* In order to parallelize the processing, we divide the vertices into shards. | |||||
| * Since only vertex pairs with the same key will be checked, we can process | |||||
| * shards independently as long as we ensure that all vertices with the same | |||||
| * key go into the same shard. | |||||
| * This is done by hashing the key to get the shard index of each vertex. | |||||
| */ | |||||
| // TODO: Two-step filling that first counts and then fills? Could be parallel then. | |||||
| uint targetNrShards = isParallel ? uint(4 * nrThreads) : 1; | |||||
| uint nrShards = 1, hashShift = 32; | |||||
| while (nrShards < targetNrShards) { | |||||
| nrShards *= 2; | |||||
| hashShift -= 1; | |||||
| } | |||||
| /* Reserve 25% extra to account for variation due to hashing. */ | |||||
| size_t reserveSize = size_t(double(3 * nrTriangles) * 1.25 / nrShards); | |||||
| std::vector<NeighborShard> shards(nrShards, {reserveSize}); | |||||
| for (uint t = 0; t < nrTriangles; t++) { | |||||
| Triangle &triangle = triangles[t]; | |||||
| for (uint i = 0; i < 3; i++) { | |||||
| const uint i0 = triangle.vertices[i]; | |||||
| const uint i1 = triangle.vertices[(i != 2) ? (i + 1) : 0]; | |||||
| const uint high = std::max(i0, i1), low = std::min(i0, i1); | |||||
| const uint hash = hash_uint3(high, low, 0); | |||||
| /* TODO: Reusing the hash here means less hash space inside each shard. | |||||
| * Computing a second hash with a different seed it probably not worth it? */ | |||||
| const uint shard = isParallel ? (hash >> hashShift) : 0; | |||||
| shards[shard].entries.emplace_back(hash, pack_index(t, i)); | |||||
| } | |||||
| } | |||||
| runParallel(0u, nrShards, [&](uint s) { shards[s].buildNeighbors(this); }); | |||||
| } | |||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | |||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | |||||
| void assignRecur(const uint t, uint groupId) | |||||
| { | |||||
| if (t == UNSET_ENTRY) { | |||||
| return; | |||||
| } | |||||
| Triangle &triangle = triangles[t]; | |||||
| Group &group = groups[groupId]; | |||||
| // track down vertex | |||||
| const uint vertRep = group.vertexRepresentative; | |||||
| uint i = 3; | |||||
| if (triangle.vertices[0] == vertRep) | |||||
| i = 0; | |||||
| else if (triangle.vertices[1] == vertRep) | |||||
| i = 1; | |||||
| else if (triangle.vertices[2] == vertRep) | |||||
| i = 2; | |||||
| assert(i < 3); | |||||
| // early out | |||||
| if (triangle.group[i] != UNSET_ENTRY) | |||||
| return; | |||||
| if (triangle.groupWithAny) { | |||||
| // first to group with a group-with-anything triangle | |||||
| // determines its orientation. | |||||
| // This is the only existing order dependency in the code!! | |||||
| if (triangle.group[0] == UNSET_ENTRY && triangle.group[1] == UNSET_ENTRY && | |||||
| triangle.group[2] == UNSET_ENTRY) { | |||||
| triangle.orientPreserving = group.orientPreserving; | |||||
| } | |||||
| } | |||||
| if (triangle.orientPreserving != group.orientPreserving) | |||||
| return; | |||||
| triangle.group[i] = groupId; | |||||
| const uint t_L = triangle.neighbor[i]; | |||||
| const uint t_R = triangle.neighbor[i > 0 ? (i - 1) : 2]; | |||||
| assignRecur(t_L, groupId); | |||||
| assignRecur(t_R, groupId); | |||||
| } | |||||
| void build4RuleGroups() | |||||
| { | |||||
| /* Note: This could be parallelized by grouping all [t, i] pairs into | |||||
| * shards by hash(triangles[t].vertices[i]). This way, each shard can be processed | |||||
| * independently and in parallel. | |||||
| * However, the groupWithAny logic needs special handling (e.g. lock a mutex when | |||||
| * encountering a groupWithAny triangle, then sort it out, then unlock and proceed). | |||||
| */ | |||||
| for (uint t = 0; t < nrTriangles; t++) { | |||||
| Triangle &triangle = triangles[t]; | |||||
| for (uint i = 0; i < 3; i++) { | |||||
| // if not assigned to a group | |||||
| if (triangle.groupWithAny || triangle.group[i] != UNSET_ENTRY) { | |||||
| continue; | |||||
| } | |||||
| const uint newGroupId = (uint)groups.size(); | |||||
| triangle.group[i] = newGroupId; | |||||
| groups.emplace_back(triangle.vertices[i], bool(triangle.orientPreserving)); | |||||
| const uint t_L = triangle.neighbor[i]; | |||||
| const uint t_R = triangle.neighbor[i > 0 ? (i - 1) : 2]; | |||||
| assignRecur(t_L, newGroupId); | |||||
| assignRecur(t_R, newGroupId); | |||||
| } | |||||
| } | |||||
| } | |||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | |||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | |||||
| template<bool atomic> void accumulateTSpaces(uint t) | |||||
| { | |||||
| const Triangle &triangle = triangles[t]; | |||||
| // only valid triangles get to add their contribution | |||||
| if (triangle.groupWithAny) { | |||||
| return; | |||||
| } | |||||
| /* Todo: Vectorize? | |||||
| * Also: Could add special case for flat shading, when all normals are equal half of the fCos | |||||
| * projections and two of the three tangent projections are unnecessary. */ | |||||
| std::array<float3, 3> n, p; | |||||
| for (uint i = 0; i < 3; i++) { | |||||
| n[i] = getNormal(triangle.vertices[i]); | |||||
| p[i] = getPosition(triangle.vertices[i]); | |||||
| } | |||||
| std::array<float, 3> fCos = {dot(project(n[0], p[1] - p[0]), project(n[0], p[2] - p[0])), | |||||
| dot(project(n[1], p[2] - p[1]), project(n[1], p[0] - p[1])), | |||||
| dot(project(n[2], p[0] - p[2]), project(n[2], p[1] - p[2]))}; | |||||
| for (uint i = 0; i < 3; i++) { | |||||
| uint groupId = triangle.group[i]; | |||||
| if (groupId != UNSET_ENTRY) { | |||||
| float3 tangent = project(n[i], triangle.tangent) * | |||||
| fast_acosf(std::clamp(fCos[i], -1.0f, 1.0f)); | |||||
| if constexpr (atomic) { | |||||
| groups[groupId].accumulateTSpaceAtomic(tangent); | |||||
| } | |||||
| else { | |||||
| groups[groupId].accumulateTSpace(tangent); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| void generateTSpaces() | |||||
| { | |||||
| if (isParallel) { | |||||
| runParallel(0u, nrTriangles, [&](uint t) { accumulateTSpaces<true>(t); }); | |||||
| } | |||||
| else { | |||||
| for (uint t = 0; t < nrTriangles; t++) { | |||||
| accumulateTSpaces<false>(t); | |||||
| } | |||||
| } | |||||
| /* TODO: Worth parallelizing? Probably not. */ | |||||
| for (Group &group : groups) { | |||||
| group.normalizeTSpace(); | |||||
| } | |||||
| tSpaces.resize(nrTSpaces); | |||||
| for (uint t = 0; t < nrTriangles; t++) { | |||||
| Triangle &triangle = triangles[t]; | |||||
| for (uint i = 0; i < 3; i++) { | |||||
| uint groupId = triangle.group[i]; | |||||
| if (groupId == UNSET_ENTRY) { | |||||
| continue; | |||||
| } | |||||
| const Group group = groups[groupId]; | |||||
| assert(triangle.orientPreserving == group.orientPreserving); | |||||
| // output tspace | |||||
| const uint offset = triangle.tSpaceIdx; | |||||
| const uint faceVertex = triangle.faceVertex[i]; | |||||
| tSpaces[offset + faceVertex].accumulateGroup(group); | |||||
| } | |||||
| } | |||||
| } | |||||
| }; | |||||
| } // namespace mikk | |||||
We try to keep Blender building even when TBB is not available, so this should do the same. See BLI_task.hh for how we do this in Blender.