Changeset View
Changeset View
Standalone View
Standalone View
intern/mantaflow/intern/manta_develop/preprocessed/tbb/multigrid.cpp
- This file was added.
| // DO NOT EDIT ! | |||||
| // This file is generated using the MantaFlow preprocessor (prep generate). | |||||
| /****************************************************************************** | |||||
| * | |||||
| * MantaFlow fluid solver framework | |||||
| * Multigrid solver | |||||
| * | |||||
| * This program is free software, distributed under the terms of the | |||||
| * Apache License, Version 2.0 | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Copyright 2016, by Florian Ferstl (florian.ferstl.ff@gmail.com) | |||||
| * | |||||
| * This is an implementation of the solver developed by Dick et al. [1] | |||||
| * without topology awareness (= vertex duplication on coarser levels). This | |||||
| * simplification allows us to use regular grids for all levels of the multigrid | |||||
| * hierarchy and works well for moderately complex domains. | |||||
| * | |||||
| * [1] Solving the Fluid Pressure Poisson Equation Using Multigrid-Evaluation | |||||
| * and Improvements, C. Dick, M. Rogowsky, R. Westermann, IEEE TVCG 2015 | |||||
| * | |||||
| ******************************************************************************/ | |||||
| #include "multigrid.h" | |||||
| #define FOR_LVL(IDX,LVL) \ | |||||
| for(int IDX=0; IDX<mb[LVL].size(); IDX++) | |||||
| #define FOR_VEC_MINMAX(VEC,MIN,MAX) Vec3i VEC; \ | |||||
| const Vec3i VEC##__min = (MIN), VEC##__max = (MAX); \ | |||||
| for(VEC.z=VEC##__min.z; VEC.z<=VEC##__max.z; VEC.z++) \ | |||||
| for(VEC.y=VEC##__min.y; VEC.y<=VEC##__max.y; VEC.y++) \ | |||||
| for(VEC.x=VEC##__min.x; VEC.x<=VEC##__max.x; VEC.x++) | |||||
| #define FOR_VECLIN_MINMAX(VEC,LIN,MIN,MAX) Vec3i VEC; int LIN = 0; \ | |||||
| const Vec3i VEC##__min = (MIN), VEC##__max = (MAX); \ | |||||
| for(VEC.z=VEC##__min.z; VEC.z<=VEC##__max.z; VEC.z++) \ | |||||
| for(VEC.y=VEC##__min.y; VEC.y<=VEC##__max.y; VEC.y++) \ | |||||
| for(VEC.x=VEC##__min.x; VEC.x<=VEC##__max.x; VEC.x++, LIN++) | |||||
| #define MG_TIMINGS(X) | |||||
| //#define MG_TIMINGS(X) X | |||||
| using namespace std; | |||||
| namespace Manta | |||||
| { | |||||
| // Helper class for calling mantaflow kernels with a specific number of threads | |||||
| class ThreadSize { | |||||
| IndexInt s; | |||||
| public: | |||||
| ThreadSize(IndexInt _s) {s=_s;} | |||||
| IndexInt size() { return s; } | |||||
| }; | |||||
| // ---------------------------------------------------------------------------- | |||||
| // Efficient min heap for <ID, key> pairs with 0<=ID<N and 0<=key<K | |||||
| // (elements are stored in K buckets, where each bucket is a doubly linked list). | |||||
| // - if K<<N, all ops are O(1) on avg (worst case O(K)). | |||||
| // - memory usage O(K+N): (K+N) * 3 * sizeof(int). | |||||
| class NKMinHeap | |||||
| { | |||||
| private: | |||||
| struct Entry { | |||||
| int key, prev, next; | |||||
| Entry() : key(-1), prev(-1), next(-1) {} | |||||
| }; | |||||
| int mN, mK, mSize, mMinKey; | |||||
| // Double linked lists of IDs, one for each bucket/key. | |||||
| // The first K entries are the buckets' head pointers, | |||||
| // and the last N entries correspond to the IDs. | |||||
| std::vector<Entry> mEntries; | |||||
| public: | |||||
| NKMinHeap(int N, int K) : mN(N), mK(K), mSize(0), mMinKey(-1), mEntries(N+K) {} | |||||
| int size() { return mSize; } | |||||
| int getKey(int ID) { return mEntries[mK+ID].key; } | |||||
| // Insert, decrease or increase key (or delete by setting key to -1) | |||||
| void setKey(int ID, int key); | |||||
| // peek min key (returns ID/key pair) | |||||
| std::pair<int,int> peekMin(); | |||||
| // pop min key (returns ID/key pair) | |||||
| std::pair<int,int> popMin(); | |||||
| void print(); // for debugging | |||||
| }; | |||||
| void NKMinHeap::setKey(int ID, int key) | |||||
| { | |||||
| assertMsg(0 <=ID && ID <mN, "NKMinHeap::setKey: ID out of range"); | |||||
| assertMsg(-1<=key && key<mK, "NKMinHeap::setKey: key out of range"); | |||||
| const int kid = mK + ID; | |||||
| if (mEntries[kid].key == key) return; // nothing changes | |||||
| // remove from old key-list if ID existed previously | |||||
| if (mEntries[kid].key != -1) | |||||
| { | |||||
| int pred = mEntries[kid].prev; | |||||
| int succ = mEntries[kid].next; // can be -1 | |||||
| mEntries[pred].next = succ; | |||||
| if (succ != -1) mEntries[succ].prev = pred; | |||||
| // if removed key was minimum key, mMinKey may need to be updated | |||||
| int removedKey = mEntries[kid].key; | |||||
| if (removedKey==mMinKey) | |||||
| { | |||||
| if (mSize==1) { mMinKey = -1; } | |||||
| else { | |||||
| for (; mMinKey<mK; mMinKey++) { | |||||
| if (mEntries[mMinKey].next != -1) break; | |||||
| } | |||||
| } | |||||
| } | |||||
| mSize--; | |||||
| } | |||||
| // set new key of ID | |||||
| mEntries[kid].key = key; | |||||
| if (key==-1) { | |||||
| // finished if key was set to -1 | |||||
| mEntries[kid].next = mEntries[kid].prev = -1; | |||||
| return; | |||||
| } | |||||
| // add key | |||||
| mSize++; | |||||
| if (mMinKey == -1) mMinKey = key; | |||||
| else mMinKey = std::min(mMinKey, key); | |||||
| // insert into new key-list (headed by mEntries[key]) | |||||
| int tmp = mEntries[key].next; | |||||
| mEntries[key].next = kid; | |||||
| mEntries[kid].prev = key; | |||||
| mEntries[kid].next = tmp; | |||||
| if (tmp != -1) mEntries[tmp].prev = kid; | |||||
| } | |||||
| std::pair<int,int> NKMinHeap::peekMin() | |||||
| { | |||||
| if (mSize==0) return std::pair<int,int>(-1,-1); // error | |||||
| const int ID = mEntries[mMinKey].next - mK; | |||||
| return std::pair<int,int>(ID, mMinKey); | |||||
| } | |||||
| std::pair<int,int> NKMinHeap::popMin() | |||||
| { | |||||
| if (mSize==0) return std::pair<int,int>(-1,-1); // error | |||||
| const int kid = mEntries[mMinKey].next; | |||||
| const int ID = kid - mK; | |||||
| const int key = mMinKey; | |||||
| // remove from key-list | |||||
| int pred = mEntries[kid].prev; | |||||
| int succ = mEntries[kid].next; // can be -1 | |||||
| mEntries[pred].next = succ; | |||||
| if (succ != -1) mEntries[succ].prev = pred; | |||||
| // remove entry | |||||
| mEntries[kid] = Entry(); | |||||
| mSize--; | |||||
| // update mMinKey | |||||
| if (mSize==0) { mMinKey = -1; } | |||||
| else { | |||||
| for (; mMinKey<mK; mMinKey++) { | |||||
| if (mEntries[mMinKey].next != -1) break; | |||||
| } | |||||
| } | |||||
| // return result | |||||
| return std::pair<int,int>(ID, key); | |||||
| } | |||||
| void NKMinHeap::print() | |||||
| { | |||||
| std::cout << "Size: "<<mSize<<", MinKey: "<<mMinKey<< std::endl; | |||||
| for (int key=0; key<mK; key++) { | |||||
| if (mEntries[key].next != -1) { | |||||
| std::cout << "Key "<<key<<": "; | |||||
| int kid = mEntries[key].next; | |||||
| while (kid != -1) { | |||||
| std::cout << kid-mK<<" "; | |||||
| kid = mEntries[kid].next; | |||||
| } | |||||
| std::cout << std::endl; | |||||
| } | |||||
| } | |||||
| std::cout << std::endl; | |||||
| } | |||||
| // ---------------------------------------------------------------------------- | |||||
| // GridMg methods | |||||
| // | |||||
| // Illustration of 27-point stencil indices | |||||
| // y | z = -1 z = 0 z = 1 | |||||
| // ^ | 6 7 8, 15 16 17, 24 25 26 | |||||
| // | | 3 4 5, 12 13 14, 21 22 23 | |||||
| // o-> x | 0 1 2, 9 10 11, 18 19 20 | |||||
| // | |||||
| // Symmetric storage with only 14 entries per vertex | |||||
| // y | z = -1 z = 0 z = 1 | |||||
| // ^ | - - -, 2 3 4, 11 12 13 | |||||
| // | | - - -, - 0 1, 8 9 10 | |||||
| // o-> x | - - -, - - -, 5 6 7 | |||||
| GridMg::GridMg(const Vec3i& gridSize) | |||||
| : mNumPreSmooth(1), | |||||
| mNumPostSmooth(1), | |||||
| mCoarsestLevelAccuracy(Real(1E-8)), | |||||
| mTrivialEquationScale(Real(1E-6)), | |||||
| mIsASet(false), | |||||
| mIsRhsSet(false) | |||||
| { | |||||
| MG_TIMINGS(MuTime time;) | |||||
| // 2D or 3D mode | |||||
| mIs3D = (gridSize.z > 1); | |||||
| mDim = mIs3D ? 3 : 2; | |||||
| mStencilSize = mIs3D ? 14 : 5; // A has a full 27-point stencil on levels > 0 | |||||
| mStencilSize0 = mIs3D ? 4 : 3; // A has a 7-point stencil on level 0 | |||||
| mStencilMin = Vec3i(-1,-1, mIs3D ? -1:0); | |||||
| mStencilMax = Vec3i( 1, 1, mIs3D ? 1:0); | |||||
| // Create level 0 (=original grid) | |||||
| mSize.push_back(gridSize); | |||||
| mPitch.push_back(Vec3i(1, mSize.back().x, mSize.back().x*mSize.back().y)); | |||||
| int n = mSize.back().x * mSize.back().y * mSize.back().z; | |||||
| mA.push_back(std::vector<Real>(n * mStencilSize0)); | |||||
| mx.push_back(std::vector<Real>(n)); | |||||
| mb.push_back(std::vector<Real>(n)); | |||||
| mr.push_back(std::vector<Real>(n)); | |||||
| mType.push_back(std::vector<VertexType>(n)); | |||||
| mCGtmp1.push_back(std::vector<double>()); | |||||
| mCGtmp2.push_back(std::vector<double>()); | |||||
| mCGtmp3.push_back(std::vector<double>()); | |||||
| mCGtmp4.push_back(std::vector<double>()); | |||||
| debMsg("GridMg::GridMg level 0: "<<mSize[0].x<<" x " << mSize[0].y << " x " << mSize[0].z << " x ", 2); | |||||
| // Create coarse levels >0 | |||||
| for (int l=1; l<=100; l++) | |||||
| { | |||||
| if (mSize[l-1].x <= 5 && mSize[l-1].y <= 5 && mSize[l-1].z <= 5) break; | |||||
| if (n <= 1000) break; | |||||
| mSize.push_back((mSize[l-1] + 2) / 2); | |||||
| mPitch.push_back(Vec3i(1, mSize.back().x, mSize.back().x*mSize.back().y)); | |||||
| n = mSize.back().x * mSize.back().y * mSize.back().z; | |||||
| mA.push_back(std::vector<Real>(n * mStencilSize)); | |||||
| mx.push_back(std::vector<Real>(n)); | |||||
| mb.push_back(std::vector<Real>(n)); | |||||
| mr.push_back(std::vector<Real>(n)); | |||||
| mType.push_back(std::vector<VertexType>(n)); | |||||
| mCGtmp1.push_back(std::vector<double>()); | |||||
| mCGtmp2.push_back(std::vector<double>()); | |||||
| mCGtmp3.push_back(std::vector<double>()); | |||||
| mCGtmp4.push_back(std::vector<double>()); | |||||
| debMsg("GridMg::GridMg level "<<l<<": " << mSize[l].x << " x " << mSize[l].y << " x " << mSize[l].z << " x ", 2); | |||||
| } | |||||
| // Additional memory for CG on coarsest level | |||||
| mCGtmp1.back() = std::vector<double>(n); | |||||
| mCGtmp2.back() = std::vector<double>(n); | |||||
| mCGtmp3.back() = std::vector<double>(n); | |||||
| mCGtmp4.back() = std::vector<double>(n); | |||||
| MG_TIMINGS(debMsg("GridMg: Allocation done in "<<time.update(), 1);) | |||||
| // Precalculate coarsening paths: | |||||
| // (V) <--restriction-- (U) <--A_{l-1}-- (W) <--interpolation-- (N) | |||||
| Vec3i p7stencil[7] = { Vec3i(0,0,0), Vec3i(-1, 0, 0), Vec3i(1,0,0), | |||||
| Vec3i( 0,-1, 0), Vec3i(0,1,0), | |||||
| Vec3i( 0, 0,-1), Vec3i(0,0,1) }; | |||||
| Vec3i V (1,1,1); // reference coarse grid vertex at (1,1,1) | |||||
| FOR_VEC_MINMAX(U, V*2+mStencilMin, V*2+mStencilMax) { | |||||
| for (int i=0; i<1+2*mDim; i++) { | |||||
| Vec3i W = U + p7stencil[i]; | |||||
| FOR_VEC_MINMAX(N, W/2, (W+1)/2) { | |||||
| int s = dot(N,Vec3i(1,3,9)); | |||||
| if (s>=13) { | |||||
| CoarseningPath path; | |||||
| path.N = N-1; // offset of N on coarse grid | |||||
| path.U = U-V*2; // offset of U on fine grid | |||||
| path.W = W-V*2; // offset of W on fine grid | |||||
| path.sc = s-13; // stencil index corresponding to V<-N on coarse grid | |||||
| path.sf = (i+1)/2; // stencil index corresponding to U<-W on coarse grid | |||||
| path.inUStencil = (i%2==0); // fine grid stencil entry stored at U or W? | |||||
| path.rw = Real(1) / Real(1 << ((U.x % 2) + (U.y % 2) + (U.z % 2))); // restriction weight V<-U | |||||
| path.iw = Real(1) / Real(1 << ((W.x % 2) + (W.y % 2) + (W.z % 2))); // interpolation weight W<-N | |||||
| mCoarseningPaths0.push_back(path); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| auto pathLess = [](const GridMg::CoarseningPath& p1, const GridMg::CoarseningPath& p2) { | |||||
| if (p1.sc == p2.sc) return dot(p1.U+1,Vec3i(1,3,9)) < dot(p2.U+1,Vec3i(1,3,9)); | |||||
| return p1.sc < p2.sc; | |||||
| }; | |||||
| std::sort(mCoarseningPaths0.begin(), mCoarseningPaths0.end(), pathLess); | |||||
| } | |||||
| void GridMg::analyzeStencil(int v, bool is3D, bool& isStencilSumNonZero, bool& isEquationTrivial) const { | |||||
| Vec3i V = vecIdx(v,0); | |||||
| // collect stencil entries | |||||
| Real A[7]; | |||||
| A[0] = mA[0][v*mStencilSize0 + 0]; | |||||
| A[1] = mA[0][v*mStencilSize0 + 1]; | |||||
| A[2] = mA[0][v*mStencilSize0 + 2]; | |||||
| A[3] = is3D ? mA[0][v*mStencilSize0 + 3] : Real(0); | |||||
| A[4] = V.x!=0 ? mA[0][(v-mPitch[0].x)*mStencilSize0 + 1] : Real(0); | |||||
| A[5] = V.y!=0 ? mA[0][(v-mPitch[0].y)*mStencilSize0 + 2] : Real(0); | |||||
| A[6] = V.z!=0 && is3D ? mA[0][(v-mPitch[0].z)*mStencilSize0 + 3] : Real(0); | |||||
| // compute sum of stencil entries | |||||
| Real stencilMax = Real(0), stencilSum = Real(0); | |||||
| for (int i=0; i<7; i++) { | |||||
| stencilSum += A[i]; | |||||
| stencilMax = max(stencilMax, std::abs(A[i])); | |||||
| } | |||||
| // check if sum is numerically zero | |||||
| isStencilSumNonZero = std::abs(stencilSum / stencilMax) > Real(1E-6); | |||||
| // check for trivial equation (exact comparisons) | |||||
| isEquationTrivial = A[0]==Real(1) | |||||
| && A[1]==Real(0) && A[2]==Real(0) && A[3]==Real(0) | |||||
| && A[4]==Real(0) && A[5]==Real(0) && A[6]==Real(0); | |||||
| } | |||||
| struct knCopyA : public KernelBase { | |||||
| knCopyA(std::vector<Real>& sizeRef, std::vector<Real>& A0, int stencilSize0, bool is3D, const Grid<Real>* pA0, const Grid<Real>* pAi, const Grid<Real>* pAj, const Grid<Real>* pAk) : KernelBase(sizeRef.size()) ,sizeRef(sizeRef),A0(A0),stencilSize0(stencilSize0),is3D(is3D),pA0(pA0),pAi(pAi),pAj(pAj),pAk(pAk) { | |||||
| runMessage(); run(); } | |||||
| inline void op(IndexInt idx, std::vector<Real>& sizeRef, std::vector<Real>& A0, int stencilSize0, bool is3D, const Grid<Real>* pA0, const Grid<Real>* pAi, const Grid<Real>* pAj, const Grid<Real>* pAk ) const { | |||||
| A0[idx*stencilSize0 + 0] = (*pA0)[idx]; | |||||
| A0[idx*stencilSize0 + 1] = (*pAi)[idx]; | |||||
| A0[idx*stencilSize0 + 2] = (*pAj)[idx]; | |||||
| if (is3D) A0[idx*stencilSize0 + 3] = (*pAk)[idx]; | |||||
| } inline std::vector<Real>& getArg0() { | |||||
| return sizeRef; } | |||||
| typedef std::vector<Real> type0;inline std::vector<Real>& getArg1() { | |||||
| return A0; } | |||||
| typedef std::vector<Real> type1;inline int& getArg2() { | |||||
| return stencilSize0; } | |||||
| typedef int type2;inline bool& getArg3() { | |||||
| return is3D; } | |||||
| typedef bool type3;inline const Grid<Real>* getArg4() { | |||||
| return pA0; } | |||||
| typedef Grid<Real> type4;inline const Grid<Real>* getArg5() { | |||||
| return pAi; } | |||||
| typedef Grid<Real> type5;inline const Grid<Real>* getArg6() { | |||||
| return pAj; } | |||||
| typedef Grid<Real> type6;inline const Grid<Real>* getArg7() { | |||||
| return pAk; } | |||||
| typedef Grid<Real> type7; void runMessage() { debMsg("Executing kernel knCopyA ", 3); debMsg("Kernel range" << " size "<< size << " " , 4); }; void operator() (const tbb::blocked_range<IndexInt>& __r) const { | |||||
| for (IndexInt idx=__r.begin(); idx!=(IndexInt)__r.end(); idx++) op(idx, sizeRef,A0,stencilSize0,is3D,pA0,pAi,pAj,pAk); } | |||||
| void run() { | |||||
| tbb::parallel_for (tbb::blocked_range<IndexInt>(0, size), *this); } | |||||
| std::vector<Real>& sizeRef; std::vector<Real>& A0; int stencilSize0; bool is3D; const Grid<Real>* pA0; const Grid<Real>* pAi; const Grid<Real>* pAj; const Grid<Real>* pAk; } | |||||
| ; | |||||
| struct knActivateVertices : public KernelBase { | |||||
| knActivateVertices(std::vector<GridMg::VertexType>& type_0, std::vector<Real>& A0, bool& nonZeroStencilSumFound, bool& trivialEquationsFound, const GridMg& mg) : KernelBase(type_0.size()) ,type_0(type_0),A0(A0),nonZeroStencilSumFound(nonZeroStencilSumFound),trivialEquationsFound(trivialEquationsFound),mg(mg) { | |||||
| runMessage(); run(); } | |||||
| inline void op(IndexInt idx, std::vector<GridMg::VertexType>& type_0, std::vector<Real>& A0, bool& nonZeroStencilSumFound, bool& trivialEquationsFound, const GridMg& mg ) const { | |||||
| // active vertices on level 0 are vertices with non-zero diagonal entry in A | |||||
| type_0[idx] = GridMg::vtInactive; | |||||
| if (mg.mA[0][idx*mg.mStencilSize0 + 0] != Real(0)) { | |||||
| type_0[idx] = GridMg::vtActive; | |||||
| bool isStencilSumNonZero = false, isEquationTrivial = false; | |||||
| mg.analyzeStencil(int(idx), mg.mIs3D, isStencilSumNonZero, isEquationTrivial); | |||||
| // Note: nonZeroStencilSumFound and trivialEquationsFound are only | |||||
| // changed from false to true, and hence there are no race conditions. | |||||
| if (isStencilSumNonZero) nonZeroStencilSumFound = true; | |||||
| // scale down trivial equations | |||||
| if (isEquationTrivial) { | |||||
| type_0[idx] = GridMg::vtActiveTrivial; | |||||
| A0[idx*mg.mStencilSize0 + 0] *= mg.mTrivialEquationScale; | |||||
| trivialEquationsFound = true; | |||||
| }; | |||||
| } | |||||
| } inline std::vector<GridMg::VertexType>& getArg0() { | |||||
| return type_0; } | |||||
| typedef std::vector<GridMg::VertexType> type0;inline std::vector<Real>& getArg1() { | |||||
| return A0; } | |||||
| typedef std::vector<Real> type1;inline bool& getArg2() { | |||||
| return nonZeroStencilSumFound; } | |||||
| typedef bool type2;inline bool& getArg3() { | |||||
| return trivialEquationsFound; } | |||||
| typedef bool type3;inline const GridMg& getArg4() { | |||||
| return mg; } | |||||
| typedef GridMg type4; void runMessage() { debMsg("Executing kernel knActivateVertices ", 3); debMsg("Kernel range" << " size "<< size << " " , 4); }; void operator() (const tbb::blocked_range<IndexInt>& __r) const { | |||||
| for (IndexInt idx=__r.begin(); idx!=(IndexInt)__r.end(); idx++) op(idx, type_0,A0,nonZeroStencilSumFound,trivialEquationsFound,mg); } | |||||
| void run() { | |||||
| tbb::parallel_for (tbb::blocked_range<IndexInt>(0, size), *this); } | |||||
| std::vector<GridMg::VertexType>& type_0; std::vector<Real>& A0; bool& nonZeroStencilSumFound; bool& trivialEquationsFound; const GridMg& mg; } | |||||
| ; | |||||
| void GridMg::setA(const Grid<Real>* pA0, const Grid<Real>* pAi, const Grid<Real>* pAj, const Grid<Real>* pAk) | |||||
| { | |||||
| MG_TIMINGS(MuTime time;) | |||||
| // Copy level 0 | |||||
| knCopyA(mx[0], mA[0], mStencilSize0, mIs3D, pA0, pAi, pAj, pAk); | |||||
| // Determine active vertices and scale trivial equations | |||||
| bool nonZeroStencilSumFound = false; | |||||
| bool trivialEquationsFound = false; | |||||
| knActivateVertices(mType[0], mA[0], nonZeroStencilSumFound, trivialEquationsFound, *this); | |||||
| if (trivialEquationsFound) debMsg("GridMg::setA: Found at least one trivial equation", 2); | |||||
| // Sanity check: if all rows of A sum up to 0 --> A doesn't have full rank (opposite direction isn't necessarily true) | |||||
| if (!nonZeroStencilSumFound) debMsg("GridMg::setA: Found constant mode: A*1=0! A does not have full rank and multigrid may not converge. (forgot to fix a pressure value?)", 1); | |||||
| // Create coarse grids and operators on levels >0 | |||||
| for (int l=1; l<mA.size(); l++) { | |||||
| MG_TIMINGS(time.get();) | |||||
| genCoarseGrid(l); | |||||
| MG_TIMINGS(debMsg("GridMg: Generated level "<<l<<" in "<<time.update(), 1);) | |||||
| genCoraseGridOperator(l); | |||||
| MG_TIMINGS(debMsg("GridMg: Generated operator "<<l<<" in "<<time.update(), 1);) | |||||
| } | |||||
| mIsASet = true; | |||||
| mIsRhsSet = false; // invalidate rhs | |||||
| } | |||||
| struct knSetRhs : public KernelBase { | |||||
| knSetRhs(std::vector<Real>& b, const Grid<Real>& rhs, const GridMg& mg) : KernelBase(b.size()) ,b(b),rhs(rhs),mg(mg) { | |||||
| runMessage(); run(); } | |||||
| inline void op(IndexInt idx, std::vector<Real>& b, const Grid<Real>& rhs, const GridMg& mg ) const { | |||||
| b[idx] = rhs[idx]; | |||||
| // scale down trivial equations | |||||
| if (mg.mType[0][idx] == GridMg::vtActiveTrivial) { b[idx] *= mg.mTrivialEquationScale; }; | |||||
| } inline std::vector<Real>& getArg0() { | |||||
| return b; } | |||||
| typedef std::vector<Real> type0;inline const Grid<Real>& getArg1() { | |||||
| return rhs; } | |||||
| typedef Grid<Real> type1;inline const GridMg& getArg2() { | |||||
| return mg; } | |||||
| typedef GridMg type2; void runMessage() { debMsg("Executing kernel knSetRhs ", 3); debMsg("Kernel range" << " size "<< size << " " , 4); }; void operator() (const tbb::blocked_range<IndexInt>& __r) const { | |||||
| for (IndexInt idx=__r.begin(); idx!=(IndexInt)__r.end(); idx++) op(idx, b,rhs,mg); } | |||||
| void run() { | |||||
| tbb::parallel_for (tbb::blocked_range<IndexInt>(0, size), *this); } | |||||
| std::vector<Real>& b; const Grid<Real>& rhs; const GridMg& mg; } | |||||
| ; | |||||
| void GridMg::setRhs(const Grid<Real>& rhs) | |||||
| { | |||||
| assertMsg(mIsASet, "GridMg::setRhs Error: A has not been set."); | |||||
| knSetRhs(mb[0], rhs, *this); | |||||
| mIsRhsSet = true; | |||||
| } | |||||
| template <class T> struct knSet : public KernelBase { | |||||
| knSet(std::vector<T>& data, T value) : KernelBase(data.size()) ,data(data),value(value) { | |||||
| runMessage(); run(); } | |||||
| inline void op(IndexInt idx, std::vector<T>& data, T value ) const { data[idx] = value; } inline std::vector<T>& getArg0() { | |||||
| return data; } | |||||
| typedef std::vector<T> type0;inline T& getArg1() { | |||||
| return value; } | |||||
| typedef T type1; void runMessage() { debMsg("Executing kernel knSet ", 3); debMsg("Kernel range" << " size "<< size << " " , 4); }; void operator() (const tbb::blocked_range<IndexInt>& __r) const { | |||||
| for (IndexInt idx=__r.begin(); idx!=(IndexInt)__r.end(); idx++) op(idx, data,value); } | |||||
| void run() { | |||||
| tbb::parallel_for (tbb::blocked_range<IndexInt>(0, size), *this); } | |||||
| std::vector<T>& data; T value; } | |||||
| ; | |||||
| template <class T> struct knCopyToVector : public KernelBase { | |||||
| knCopyToVector(std::vector<T>& dst, const Grid<T>& src) : KernelBase(dst.size()) ,dst(dst),src(src) { | |||||
| runMessage(); run(); } | |||||
| inline void op(IndexInt idx, std::vector<T>& dst, const Grid<T>& src ) const { dst[idx] = src[idx]; } inline std::vector<T>& getArg0() { | |||||
| return dst; } | |||||
| typedef std::vector<T> type0;inline const Grid<T>& getArg1() { | |||||
| return src; } | |||||
| typedef Grid<T> type1; void runMessage() { debMsg("Executing kernel knCopyToVector ", 3); debMsg("Kernel range" << " size "<< size << " " , 4); }; void operator() (const tbb::blocked_range<IndexInt>& __r) const { | |||||
| for (IndexInt idx=__r.begin(); idx!=(IndexInt)__r.end(); idx++) op(idx, dst,src); } | |||||
| void run() { | |||||
| tbb::parallel_for (tbb::blocked_range<IndexInt>(0, size), *this); } | |||||
| std::vector<T>& dst; const Grid<T>& src; } | |||||
| ; | |||||
| template <class T> struct knCopyToGrid : public KernelBase { | |||||
| knCopyToGrid(const std::vector<T>& src, Grid<T>& dst) : KernelBase(src.size()) ,src(src),dst(dst) { | |||||
| runMessage(); run(); } | |||||
| inline void op(IndexInt idx, const std::vector<T>& src, Grid<T>& dst ) const { dst[idx] = src[idx]; } inline const std::vector<T>& getArg0() { | |||||
| return src; } | |||||
| typedef std::vector<T> type0;inline Grid<T>& getArg1() { | |||||
| return dst; } | |||||
| typedef Grid<T> type1; void runMessage() { debMsg("Executing kernel knCopyToGrid ", 3); debMsg("Kernel range" << " size "<< size << " " , 4); }; void operator() (const tbb::blocked_range<IndexInt>& __r) const { | |||||
| for (IndexInt idx=__r.begin(); idx!=(IndexInt)__r.end(); idx++) op(idx, src,dst); } | |||||
| void run() { | |||||
| tbb::parallel_for (tbb::blocked_range<IndexInt>(0, size), *this); } | |||||
| const std::vector<T>& src; Grid<T>& dst; } | |||||
| ; | |||||
| template <class T> struct knAddAssign : public KernelBase { | |||||
| knAddAssign(std::vector<T>& dst, const std::vector<T>& src) : KernelBase(dst.size()) ,dst(dst),src(src) { | |||||
| runMessage(); run(); } | |||||
| inline void op(IndexInt idx, std::vector<T>& dst, const std::vector<T>& src ) const { dst[idx] += src[idx]; } inline std::vector<T>& getArg0() { | |||||
| return dst; } | |||||
| typedef std::vector<T> type0;inline const std::vector<T>& getArg1() { | |||||
| return src; } | |||||
| typedef std::vector<T> type1; void runMessage() { debMsg("Executing kernel knAddAssign ", 3); debMsg("Kernel range" << " size "<< size << " " , 4); }; void operator() (const tbb::blocked_range<IndexInt>& __r) const { | |||||
| for (IndexInt idx=__r.begin(); idx!=(IndexInt)__r.end(); idx++) op(idx, dst,src); } | |||||
| void run() { | |||||
| tbb::parallel_for (tbb::blocked_range<IndexInt>(0, size), *this); } | |||||
| std::vector<T>& dst; const std::vector<T>& src; } | |||||
| ; | |||||
| Real GridMg::doVCycle(Grid<Real>& dst, const Grid<Real>* src) | |||||
| { | |||||
| MG_TIMINGS(MuTime timeSmooth; MuTime timeCG; MuTime timeI; MuTime timeR; MuTime timeTotal; MuTime time;) | |||||
| MG_TIMINGS(timeSmooth.clear(); timeCG.clear(); timeI.clear(); timeR.clear();) | |||||
| assertMsg(mIsASet && mIsRhsSet, "GridMg::doVCycle Error: A and/or rhs have not been set."); | |||||
| const int maxLevel = int(mA.size()) - 1; | |||||
| if (src) { knCopyToVector<Real>(mx[0], *src); } | |||||
| else { knSet<Real>(mx[0], Real(0)); } | |||||
| for (int l=0; l<maxLevel; l++) | |||||
| { | |||||
| MG_TIMINGS(time.update();) | |||||
| for (int i=0; i<mNumPreSmooth; i++) { | |||||
| smoothGS(l, false); | |||||
| } | |||||
| MG_TIMINGS(timeSmooth += time.update();) | |||||
| calcResidual(l); | |||||
| restrict(l+1, mr[l], mb[l+1]); | |||||
| knSet<Real>(mx[l+1], Real(0)); | |||||
| MG_TIMINGS(timeR += time.update();) | |||||
| } | |||||
| MG_TIMINGS(time.update();) | |||||
| solveCG(maxLevel); | |||||
| MG_TIMINGS(timeCG += time.update();) | |||||
| for (int l=maxLevel-1; l>=0; l--) | |||||
| { | |||||
| MG_TIMINGS(time.update();) | |||||
| interpolate(l, mx[l+1], mr[l]); | |||||
| knAddAssign<Real>(mx[l], mr[l]); | |||||
| MG_TIMINGS(timeI += time.update();) | |||||
| for (int i=0; i<mNumPostSmooth; i++) { | |||||
| smoothGS(l, true); | |||||
| } | |||||
| MG_TIMINGS(timeSmooth += time.update();) | |||||
| } | |||||
| calcResidual(0); | |||||
| Real res = calcResidualNorm(0); | |||||
| knCopyToGrid<Real>(mx[0], dst); | |||||
| MG_TIMINGS(debMsg("GridMg: Finished VCycle in "<<timeTotal.update()<<" (smoothing: "<<timeSmooth<<", CG: "<<timeCG<<", R: "<<timeR<<", I: "<<timeI<<")", 1);) | |||||
| return res; | |||||
| } | |||||
| struct knActivateCoarseVertices : public KernelBase { | |||||
| knActivateCoarseVertices(std::vector<GridMg::VertexType>& type, int unused) : KernelBase(type.size()) ,type(type),unused(unused) { | |||||
| runMessage(); run(); } | |||||
| inline void op(IndexInt idx, std::vector<GridMg::VertexType>& type, int unused ) const { | |||||
| // set all remaining 'free' vertices to 'removed', | |||||
| if (type[idx] == GridMg::vtFree) type[idx] = GridMg::vtRemoved; | |||||
| // then convert 'zero' vertices to 'active' and 'removed' vertices to 'inactive' | |||||
| if (type[idx] == GridMg::vtZero ) type[idx] = GridMg::vtActive; | |||||
| if (type[idx] == GridMg::vtRemoved) type[idx] = GridMg::vtInactive; | |||||
| } inline std::vector<GridMg::VertexType>& getArg0() { | |||||
| return type; } | |||||
| typedef std::vector<GridMg::VertexType> type0;inline int& getArg1() { | |||||
| return unused; } | |||||
| typedef int type1; void runMessage() { debMsg("Executing kernel knActivateCoarseVertices ", 3); debMsg("Kernel range" << " size "<< size << " " , 4); }; void operator() (const tbb::blocked_range<IndexInt>& __r) const { | |||||
| for (IndexInt idx=__r.begin(); idx!=(IndexInt)__r.end(); idx++) op(idx, type,unused); } | |||||
| void run() { | |||||
| tbb::parallel_for (tbb::blocked_range<IndexInt>(0, size), *this); } | |||||
| std::vector<GridMg::VertexType>& type; int unused; } | |||||
| ; | |||||
| // Determine active cells on coarse level l from active cells on fine level l-1 | |||||
| // while ensuring a full-rank interpolation operator (see Section 3.3 in [1]). | |||||
| void GridMg::genCoarseGrid(int l) | |||||
| { | |||||
| // AF_Free: unused/untouched vertices | |||||
| // AF_Zero: vertices selected for coarser level | |||||
| // AF_Removed: vertices removed from coarser level | |||||
| enum activeFlags : char {AF_Removed = 0, AF_Zero = 1, AF_Free = 2}; | |||||
| // initialize all coarse vertices with 'free' | |||||
| knSet<VertexType>(mType[l], vtFree); | |||||
| // initialize min heap of (ID: fine grid vertex, key: #free interpolation vertices) pairs | |||||
| NKMinHeap heap(int(mb[l-1].size()), mIs3D ? 9 : 5); // max 8 (or 4 in 2D) free interpolation vertices | |||||
| FOR_LVL(v,l-1) { | |||||
| if (mType[l-1][v] != vtInactive) { | |||||
| Vec3i V = vecIdx(v,l-1); | |||||
| int fiv = 1 << ((V.x % 2) + (V.y % 2) + (V.z % 2)); | |||||
| heap.setKey(v, fiv); | |||||
| } | |||||
| } | |||||
| // process fine vertices in heap consecutively, always choosing the vertex with | |||||
| // the currently smallest number of free interpolation vertices | |||||
| while (heap.size() > 0) | |||||
| { | |||||
| int v = heap.popMin().first; | |||||
| Vec3i V = vecIdx(v,l-1); | |||||
| // loop over associated interpolation vertices of V on coarse level l: | |||||
| // the first encountered 'free' vertex is set to 'zero', | |||||
| // all remaining 'free' vertices are set to 'removed'. | |||||
| bool vdone = false; | |||||
| FOR_VEC_MINMAX(I, V/2, (V+1)/2) { | |||||
| int i = linIdx(I,l); | |||||
| if (mType[l][i] == vtFree) { | |||||
| if (vdone) { | |||||
| mType[l][i] = vtRemoved; | |||||
| } else { | |||||
| mType[l][i] = vtZero; | |||||
| vdone = true; | |||||
| } | |||||
| // update #free interpolation vertices in heap: | |||||
| // loop over all associated restriction vertices of I on fine level l-1 | |||||
| FOR_VEC_MINMAX(R, vmax(0, I*2-1), vmin(mSize[l-1]-1, I*2+1)) { | |||||
| int r = linIdx(R,l-1); | |||||
| int key = heap.getKey(r); | |||||
| if (key > 1) { heap.setKey(r, key-1); } // decrease key of r | |||||
| else if (key >-1) { heap.setKey(r, -1); } // removes r from heap | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| knActivateCoarseVertices(mType[l],0); | |||||
| } | |||||
| struct knGenCoarseGridOperator : public KernelBase { | |||||
| knGenCoarseGridOperator(std::vector<Real>& sizeRef, std::vector<Real>& A, int l, const GridMg& mg) : KernelBase(sizeRef.size()) ,sizeRef(sizeRef),A(A),l(l),mg(mg) { | |||||
| runMessage(); run(); } | |||||
| inline void op(IndexInt idx, std::vector<Real>& sizeRef, std::vector<Real>& A, int l, const GridMg& mg ) const { | |||||
| if (mg.mType[l][idx] == GridMg::vtInactive) return; | |||||
| for (int i=0; i<mg.mStencilSize; i++) { A[idx*mg.mStencilSize+i] = Real(0); } // clear stencil | |||||
| Vec3i V = mg.vecIdx(int(idx),l); | |||||
| // Calculate the stencil of A_l at V by considering all vertex paths of the form: | |||||
| // (V) <--restriction-- (U) <--A_{l-1}-- (W) <--interpolation-- (N) | |||||
| // V and N are vertices on the coarse grid level l, | |||||
| // U and W are vertices on the fine grid level l-1. | |||||
| if (l==1) { | |||||
| // loop over precomputed paths | |||||
| for (auto it = mg.mCoarseningPaths0.begin(); it != mg.mCoarseningPaths0.end(); it++) { | |||||
| Vec3i N = V + it->N; | |||||
| int n = mg.linIdx(N,l); | |||||
| if (!mg.inGrid(N,l) || mg.mType[l][n]==GridMg::vtInactive) continue; | |||||
| Vec3i U = V*2 + it->U; | |||||
| int u = mg.linIdx(U,l-1); | |||||
| if (!mg.inGrid(U,l-1) || mg.mType[l-1][u]==GridMg::vtInactive) continue; | |||||
| Vec3i W = V*2 + it->W; | |||||
| int w = mg.linIdx(W,l-1); | |||||
| if (!mg.inGrid(W,l-1) || mg.mType[l-1][w]==GridMg::vtInactive) continue; | |||||
| if (it->inUStencil) { | |||||
| A[idx*mg.mStencilSize + it->sc] += it->rw * mg.mA[l-1][u*mg.mStencilSize0 + it->sf] *it->iw; | |||||
| } else { | |||||
| A[idx*mg.mStencilSize + it->sc] += it->rw * mg.mA[l-1][w*mg.mStencilSize0 + it->sf] *it->iw; | |||||
| } | |||||
| } | |||||
| } else { | |||||
| // l > 1: | |||||
| // loop over restriction vertices U on level l-1 associated with V | |||||
| FOR_VEC_MINMAX(U, vmax(0, V*2-1), vmin(mg.mSize[l-1]-1, V*2+1)) { | |||||
| int u = mg.linIdx(U,l-1); | |||||
| if (mg.mType[l-1][u] == GridMg::vtInactive) continue; | |||||
| // restriction weight | |||||
| Real rw = Real(1) / Real(1 << ((U.x % 2) + (U.y % 2) + (U.z % 2))); | |||||
| // loop over all stencil neighbors N of V on level l that can be reached via restriction to U | |||||
| FOR_VEC_MINMAX(N, (U-1)/2, vmin(mg.mSize[l]-1, (U+2)/2)) { | |||||
| int n = mg.linIdx(N,l); | |||||
| if (mg.mType[l][n] == GridMg::vtInactive) continue; | |||||
| // stencil entry at V associated to N (coarse grid level l) | |||||
| Vec3i SC = N - V + mg.mStencilMax; | |||||
| int sc = SC.x + 3*SC.y + 9*SC.z; | |||||
| if (sc < mg.mStencilSize-1) continue; | |||||
| // loop over all vertices W which are in the stencil of A_{l-1} at U | |||||
| // and which interpolate from N | |||||
| FOR_VEC_MINMAX(W, vmax( 0, vmax(U-1,N*2-1)), | |||||
| vmin(mg.mSize[l-1]-1, vmin(U+1,N*2+1))) { | |||||
| int w = mg.linIdx(W,l-1); | |||||
| if (mg.mType[l-1][w] == GridMg::vtInactive) continue; | |||||
| // stencil entry at U associated to W (fine grid level l-1) | |||||
| Vec3i SF = W - U + mg.mStencilMax; | |||||
| int sf = SF.x + 3*SF.y + 9*SF.z; | |||||
| Real iw = Real(1) / Real(1 << ((W.x % 2) + (W.y % 2) + (W.z % 2))); // interpolation weight | |||||
| if (sf < mg.mStencilSize) { | |||||
| A[idx*mg.mStencilSize + sc-mg.mStencilSize+1] += rw * mg.mA[l-1][w*mg.mStencilSize + mg.mStencilSize-1-sf] *iw; | |||||
| } else { | |||||
| A[idx*mg.mStencilSize + sc-mg.mStencilSize+1] += rw * mg.mA[l-1][u*mg.mStencilSize + sf-mg.mStencilSize+1] *iw; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } inline std::vector<Real>& getArg0() { | |||||
| return sizeRef; } | |||||
| typedef std::vector<Real> type0;inline std::vector<Real>& getArg1() { | |||||
| return A; } | |||||
| typedef std::vector<Real> type1;inline int& getArg2() { | |||||
| return l; } | |||||
| typedef int type2;inline const GridMg& getArg3() { | |||||
| return mg; } | |||||
| typedef GridMg type3; void runMessage() { debMsg("Executing kernel knGenCoarseGridOperator ", 3); debMsg("Kernel range" << " size "<< size << " " , 4); }; void operator() (const tbb::blocked_range<IndexInt>& __r) const { | |||||
| for (IndexInt idx=__r.begin(); idx!=(IndexInt)__r.end(); idx++) op(idx, sizeRef,A,l,mg); } | |||||
| void run() { | |||||
| tbb::parallel_for (tbb::blocked_range<IndexInt>(0, size), *this); } | |||||
| std::vector<Real>& sizeRef; std::vector<Real>& A; int l; const GridMg& mg; } | |||||
| ; | |||||
| // Calculate A_l on coarse level l from A_{l-1} on fine level l-1 using | |||||
| // Galerkin-based coarsening, i.e., compute A_l = R * A_{l-1} * I. | |||||
| void GridMg::genCoraseGridOperator(int l) | |||||
| { | |||||
| // for each coarse grid vertex V | |||||
| knGenCoarseGridOperator(mx[l], mA[l], l, *this); | |||||
| } | |||||
| struct knSmoothColor : public KernelBase { | |||||
| knSmoothColor(ThreadSize& numBlocks, std::vector<Real>& x, const Vec3i& blockSize, const std::vector<Vec3i>& colorOffs, int l, const GridMg& mg) : KernelBase(numBlocks.size()) ,numBlocks(numBlocks),x(x),blockSize(blockSize),colorOffs(colorOffs),l(l),mg(mg) { | |||||
| runMessage(); run(); } | |||||
| inline void op(IndexInt idx, ThreadSize& numBlocks, std::vector<Real>& x, const Vec3i& blockSize, const std::vector<Vec3i>& colorOffs, int l, const GridMg& mg ) const { | |||||
| Vec3i blockOff (int(idx)%blockSize.x, (int(idx)%(blockSize.x*blockSize.y))/blockSize.x, int(idx)/(blockSize.x*blockSize.y)); | |||||
| for (int off = 0; off < colorOffs.size(); off++) { | |||||
| Vec3i V = blockOff*2 + colorOffs[off]; | |||||
| if (!mg.inGrid(V,l)) continue; | |||||
| const int v = mg.linIdx(V,l); | |||||
| if (mg.mType[l][v] == GridMg::vtInactive) continue; | |||||
| Real sum = mg.mb[l][v]; | |||||
| if (l==0) { | |||||
| int n; | |||||
| for (int d=0; d<mg.mDim; d++) { | |||||
| if (V[d]>0) { n = v-mg.mPitch[0][d]; sum -= mg.mA[0][n*mg.mStencilSize0 + d+1] * mg.mx[0][n]; } | |||||
| if (V[d]<mg.mSize[0][d]-1) { n = v+mg.mPitch[0][d]; sum -= mg.mA[0][v*mg.mStencilSize0 + d+1] * mg.mx[0][n]; } | |||||
| } | |||||
| x[v] = sum / mg.mA[0][v*mg.mStencilSize0 + 0]; | |||||
| } else { | |||||
| FOR_VECLIN_MINMAX(S, s, mg.mStencilMin, mg.mStencilMax) { | |||||
| if (s == mg.mStencilSize-1) continue; | |||||
| Vec3i N = V + S; | |||||
| int n = mg.linIdx(N,l); | |||||
| if (mg.inGrid(N,l) && mg.mType[l][n]!=GridMg::vtInactive) { | |||||
| if (s < mg.mStencilSize) { | |||||
| sum -= mg.mA[l][n*mg.mStencilSize + mg.mStencilSize-1-s] * mg.mx[l][n]; | |||||
| } else { | |||||
| sum -= mg.mA[l][v*mg.mStencilSize + s-mg.mStencilSize+1] * mg.mx[l][n]; | |||||
| } | |||||
| } | |||||
| } | |||||
| x[v] = sum / mg.mA[l][v*mg.mStencilSize + 0]; | |||||
| } | |||||
| } | |||||
| } inline ThreadSize& getArg0() { | |||||
| return numBlocks; } | |||||
| typedef ThreadSize type0;inline std::vector<Real>& getArg1() { | |||||
| return x; } | |||||
| typedef std::vector<Real> type1;inline const Vec3i& getArg2() { | |||||
| return blockSize; } | |||||
| typedef Vec3i type2;inline const std::vector<Vec3i>& getArg3() { | |||||
| return colorOffs; } | |||||
| typedef std::vector<Vec3i> type3;inline int& getArg4() { | |||||
| return l; } | |||||
| typedef int type4;inline const GridMg& getArg5() { | |||||
| return mg; } | |||||
| typedef GridMg type5; void runMessage() { debMsg("Executing kernel knSmoothColor ", 3); debMsg("Kernel range" << " size "<< size << " " , 4); }; void operator() (const tbb::blocked_range<IndexInt>& __r) const { | |||||
| for (IndexInt idx=__r.begin(); idx!=(IndexInt)__r.end(); idx++) op(idx, numBlocks,x,blockSize,colorOffs,l,mg); } | |||||
| void run() { | |||||
| tbb::parallel_for (tbb::blocked_range<IndexInt>(0, size), *this); } | |||||
| ThreadSize& numBlocks; std::vector<Real>& x; const Vec3i& blockSize; const std::vector<Vec3i>& colorOffs; int l; const GridMg& mg; } | |||||
| ; | |||||
| void GridMg::smoothGS(int l, bool reversedOrder) | |||||
| { | |||||
| // Multicolor Gauss-Seidel with two colors for the 5/7-point stencil on level 0 | |||||
| // and with four/eight colors for the 9/27-point stencil on levels > 0 | |||||
| std::vector<std::vector<Vec3i>> colorOffs; | |||||
| const Vec3i a[8] = {Vec3i(0,0,0), Vec3i(1,0,0), Vec3i(0,1,0), Vec3i(1,1,0), | |||||
| Vec3i(0,0,1), Vec3i(1,0,1), Vec3i(0,1,1), Vec3i(1,1,1)}; | |||||
| if (mIs3D) { | |||||
| if (l==0) colorOffs = {{a[0],a[3],a[5],a[6]}, {a[1],a[2],a[4],a[7]}}; | |||||
| else colorOffs = {{a[0]}, {a[1]}, {a[2]}, {a[3]}, {a[4]}, {a[5]}, {a[6]}, {a[7]}}; | |||||
| } else { | |||||
| if (l==0) colorOffs = {{a[0],a[3]}, {a[1],a[2]}}; | |||||
| else colorOffs = {{a[0]}, {a[1]}, {a[2]}, {a[3]}}; | |||||
| } | |||||
| // Divide grid into 2x2 blocks for parallelization | |||||
| Vec3i blockSize = (mSize[l]+1)/2; | |||||
| ThreadSize numBlocks(blockSize.x * blockSize.y * blockSize.z); | |||||
| for (int c = 0; c < colorOffs.size(); c++) { | |||||
| int color = reversedOrder ? int(colorOffs.size())-1-c : c; | |||||
| knSmoothColor(numBlocks, mx[l], blockSize, colorOffs[color], l, *this); | |||||
| } | |||||
| } | |||||
| struct knCalcResidual : public KernelBase { | |||||
| knCalcResidual(std::vector<Real>& r, int l, const GridMg& mg) : KernelBase(r.size()) ,r(r),l(l),mg(mg) { | |||||
| runMessage(); run(); } | |||||
| inline void op(IndexInt idx, std::vector<Real>& r, int l, const GridMg& mg ) const { | |||||
| if (mg.mType[l][idx] == GridMg::vtInactive) return; | |||||
| Vec3i V = mg.vecIdx(int(idx),l); | |||||
| Real sum = mg.mb[l][idx]; | |||||
| if (l==0) { | |||||
| int n; | |||||
| for (int d=0; d<mg.mDim; d++) { | |||||
| if (V[d]>0) { n = int(idx)-mg.mPitch[0][d]; sum -= mg.mA[0][n *mg.mStencilSize0 + d+1] * mg.mx[0][n]; } | |||||
| if (V[d]<mg.mSize[0][d]-1) { n = int(idx)+mg.mPitch[0][d]; sum -= mg.mA[0][idx*mg.mStencilSize0 + d+1] * mg.mx[0][n]; } | |||||
| } | |||||
| sum -= mg.mA[0][idx*mg.mStencilSize0 + 0] * mg.mx[0][idx]; | |||||
| } else { | |||||
| FOR_VECLIN_MINMAX(S, s, mg.mStencilMin, mg.mStencilMax) { | |||||
| Vec3i N = V + S; | |||||
| int n = mg.linIdx(N,l); | |||||
| if (mg.inGrid(N,l) && mg.mType[l][n]!=GridMg::vtInactive) { | |||||
| if (s < mg.mStencilSize) { | |||||
| sum -= mg.mA[l][n *mg.mStencilSize + mg.mStencilSize-1-s] * mg.mx[l][n]; | |||||
| } else { | |||||
| sum -= mg.mA[l][idx*mg.mStencilSize + s-mg.mStencilSize+1] * mg.mx[l][n]; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| r[idx] = sum; | |||||
| } inline std::vector<Real>& getArg0() { | |||||
| return r; } | |||||
| typedef std::vector<Real> type0;inline int& getArg1() { | |||||
| return l; } | |||||
| typedef int type1;inline const GridMg& getArg2() { | |||||
| return mg; } | |||||
| typedef GridMg type2; void runMessage() { debMsg("Executing kernel knCalcResidual ", 3); debMsg("Kernel range" << " size "<< size << " " , 4); }; void operator() (const tbb::blocked_range<IndexInt>& __r) const { | |||||
| for (IndexInt idx=__r.begin(); idx!=(IndexInt)__r.end(); idx++) op(idx, r,l,mg); } | |||||
| void run() { | |||||
| tbb::parallel_for (tbb::blocked_range<IndexInt>(0, size), *this); } | |||||
| std::vector<Real>& r; int l; const GridMg& mg; } | |||||
| ; | |||||
| void GridMg::calcResidual(int l) | |||||
| { | |||||
| knCalcResidual(mr[l], l, *this); | |||||
| } | |||||
| struct knResidualNormSumSqr : public KernelBase { | |||||
| knResidualNormSumSqr(const vector<Real>& r, int l, const GridMg& mg) : KernelBase(r.size()) ,r(r),l(l),mg(mg) ,result(Real(0)) { | |||||
| runMessage(); run(); } | |||||
| inline void op(IndexInt idx, const vector<Real>& r, int l, const GridMg& mg ,Real& result) { | |||||
| if (mg.mType[l][idx] == GridMg::vtInactive) return; | |||||
| result += r[idx] * r[idx]; | |||||
| } inline operator Real () { | |||||
| return result; } | |||||
| inline Real & getRet() { | |||||
| return result; } | |||||
| inline const vector<Real>& getArg0() { | |||||
| return r; } | |||||
| typedef vector<Real> type0;inline int& getArg1() { | |||||
| return l; } | |||||
| typedef int type1;inline const GridMg& getArg2() { | |||||
| return mg; } | |||||
| typedef GridMg type2; void runMessage() { debMsg("Executing kernel knResidualNormSumSqr ", 3); debMsg("Kernel range" << " size "<< size << " " , 4); }; void operator() (const tbb::blocked_range<IndexInt>& __r) { | |||||
| for (IndexInt idx=__r.begin(); idx!=(IndexInt)__r.end(); idx++) op(idx, r,l,mg,result); } | |||||
| void run() { | |||||
| tbb::parallel_reduce (tbb::blocked_range<IndexInt>(0, size), *this); } | |||||
| knResidualNormSumSqr (knResidualNormSumSqr& o, tbb::split) : KernelBase(o) ,r(o.r),l(o.l),mg(o.mg) ,result(Real(0)) { | |||||
| } | |||||
| void join(const knResidualNormSumSqr & o) { | |||||
| result += o.result; } | |||||
| const vector<Real>& r; int l; const GridMg& mg; Real result; } | |||||
| ;; | |||||
| Real GridMg::calcResidualNorm(int l) | |||||
| { | |||||
| Real res = knResidualNormSumSqr(mr[l], l, *this); | |||||
| return std::sqrt(res); | |||||
| } | |||||
| // Standard conjugate gradients with Jacobi preconditioner | |||||
| // Notes: Always run at double precision. Not parallelized since | |||||
| // coarsest level is assumed to be small. | |||||
| void GridMg::solveCG(int l) | |||||
| { | |||||
| auto applyAStencil = [this](int v, int l, const std::vector<double>& vec) -> double { | |||||
| Vec3i V = vecIdx(v,l); | |||||
| double sum = 0; | |||||
| if (l==0) { | |||||
| int n; | |||||
| for (int d=0; d<mDim; d++) { | |||||
| if (V[d]>0) { n = v-mPitch[0][d]; sum += mA[0][n*mStencilSize0 + d+1] * vec[n]; } | |||||
| if (V[d]<mSize[0][d]-1) { n = v+mPitch[0][d]; sum += mA[0][v*mStencilSize0 + d+1] * vec[n]; } | |||||
| } | |||||
| sum += mA[0][v*mStencilSize0 + 0] * vec[v]; | |||||
| } else { | |||||
| FOR_VECLIN_MINMAX(S, s, mStencilMin, mStencilMax) { | |||||
| Vec3i N = V + S; | |||||
| int n = linIdx(N,l); | |||||
| if (inGrid(N,l) && mType[l][n]!=vtInactive) { | |||||
| if (s < mStencilSize) { | |||||
| sum += mA[l][n*mStencilSize + mStencilSize-1-s] * vec[n]; | |||||
| } else { | |||||
| sum += mA[l][v*mStencilSize + s-mStencilSize+1] * vec[n]; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| return sum; | |||||
| }; | |||||
| std::vector<double>& z = mCGtmp1[l]; | |||||
| std::vector<double>& p = mCGtmp2[l]; | |||||
| std::vector<double>& x = mCGtmp3[l]; | |||||
| std::vector<double>& r = mCGtmp4[l]; | |||||
| // Initialization: | |||||
| double alphaTop = 0; | |||||
| double initialResidual = 0; | |||||
| FOR_LVL(v,l) { x[v] = mx[l][v]; } | |||||
| FOR_LVL(v,l) { | |||||
| if (mType[l][v] == vtInactive) continue; | |||||
| r[v] = mb[l][v] - applyAStencil(v,l,x); | |||||
| if (l==0) { z[v] = r[v] / mA[0][v*mStencilSize0 + 0]; } | |||||
| else { z[v] = r[v] / mA[l][v*mStencilSize + 0]; } | |||||
| initialResidual += r[v] * r[v]; | |||||
| p[v] = z[v]; | |||||
| alphaTop += r[v] * z[v]; | |||||
| } | |||||
| initialResidual = std::sqrt(initialResidual); | |||||
| int iter = 0; | |||||
| const int maxIter = 10000; | |||||
| double residual = -1; | |||||
| // CG iterations | |||||
| for (; iter<maxIter && initialResidual>1E-12; iter++) | |||||
| { | |||||
| double alphaBot = 0; | |||||
| FOR_LVL(v,l) { | |||||
| if (mType[l][v] == vtInactive) continue; | |||||
| z[v] = applyAStencil(v,l,p); | |||||
| alphaBot += p[v] * z[v]; | |||||
| } | |||||
| double alpha = alphaTop / alphaBot; | |||||
| double alphaTopNew = 0; | |||||
| residual = 0; | |||||
| FOR_LVL(v,l) { | |||||
| if (mType[l][v] == vtInactive) continue; | |||||
| x[v] += alpha * p[v]; | |||||
| r[v] -= alpha * z[v]; | |||||
| residual += r[v] * r[v]; | |||||
| if (l==0) z[v] = r[v] / mA[0][v*mStencilSize0 + 0]; | |||||
| else z[v] = r[v] / mA[l][v*mStencilSize + 0]; | |||||
| alphaTopNew += r[v] * z[v]; | |||||
| } | |||||
| residual = std::sqrt(residual); | |||||
| if (residual / initialResidual < mCoarsestLevelAccuracy) break; | |||||
| double beta = alphaTopNew / alphaTop; | |||||
| alphaTop = alphaTopNew; | |||||
| FOR_LVL(v,l) { | |||||
| p[v] = z[v] + beta * p[v]; | |||||
| } | |||||
| debMsg("GridMg::solveCG i="<<iter<<" rel-residual="<< (residual / initialResidual) , 5); | |||||
| } | |||||
| FOR_LVL(v,l) { mx[l][v] = Real(x[v]); } | |||||
| if (iter == maxIter) { debMsg("GridMg::solveCG Warning: Reached maximum number of CG iterations", 1); } | |||||
| else { debMsg("GridMg::solveCG Info: Reached residual "<<residual<<" in "<<iter<<" iterations", 2); } | |||||
| } | |||||
| struct knRestrict : public KernelBase { | |||||
| knRestrict(std::vector<Real>& dst, const std::vector<Real>& src, int l_dst, const GridMg& mg) : KernelBase(dst.size()) ,dst(dst),src(src),l_dst(l_dst),mg(mg) { | |||||
| runMessage(); run(); } | |||||
| inline void op(IndexInt idx, std::vector<Real>& dst, const std::vector<Real>& src, int l_dst, const GridMg& mg ) const { | |||||
| if (mg.mType[l_dst][idx] == GridMg::vtInactive) return; | |||||
| const int l_src = l_dst - 1; | |||||
| // Coarse grid vertex | |||||
| Vec3i V = mg.vecIdx(int(idx),l_dst); | |||||
| Real sum = Real(0); | |||||
| FOR_VEC_MINMAX(R, vmax(0, V*2-1), vmin(mg.mSize[l_src]-1, V*2+1)) { | |||||
| int r = mg.linIdx(R,l_src); | |||||
| if (mg.mType[l_src][r] == GridMg::vtInactive) continue; | |||||
| // restriction weight | |||||
| Real rw = Real(1) / Real(1 << ((R.x % 2) + (R.y % 2) + (R.z % 2))); | |||||
| sum += rw * src[r]; | |||||
| } | |||||
| dst[idx] = sum; | |||||
| } inline std::vector<Real>& getArg0() { | |||||
| return dst; } | |||||
| typedef std::vector<Real> type0;inline const std::vector<Real>& getArg1() { | |||||
| return src; } | |||||
| typedef std::vector<Real> type1;inline int& getArg2() { | |||||
| return l_dst; } | |||||
| typedef int type2;inline const GridMg& getArg3() { | |||||
| return mg; } | |||||
| typedef GridMg type3; void runMessage() { debMsg("Executing kernel knRestrict ", 3); debMsg("Kernel range" << " size "<< size << " " , 4); }; void operator() (const tbb::blocked_range<IndexInt>& __r) const { | |||||
| for (IndexInt idx=__r.begin(); idx!=(IndexInt)__r.end(); idx++) op(idx, dst,src,l_dst,mg); } | |||||
| void run() { | |||||
| tbb::parallel_for (tbb::blocked_range<IndexInt>(0, size), *this); } | |||||
| std::vector<Real>& dst; const std::vector<Real>& src; int l_dst; const GridMg& mg; } | |||||
| ; | |||||
| void GridMg::restrict(int l_dst, const std::vector<Real>& src, std::vector<Real>& dst) const | |||||
| { | |||||
| knRestrict(dst, src, l_dst, *this); | |||||
| } | |||||
| struct knInterpolate : public KernelBase { | |||||
| knInterpolate(std::vector<Real>& dst, const std::vector<Real>& src, int l_dst, const GridMg& mg) : KernelBase(dst.size()) ,dst(dst),src(src),l_dst(l_dst),mg(mg) { | |||||
| runMessage(); run(); } | |||||
| inline void op(IndexInt idx, std::vector<Real>& dst, const std::vector<Real>& src, int l_dst, const GridMg& mg ) const { | |||||
| if (mg.mType[l_dst][idx] == GridMg::vtInactive) return; | |||||
| const int l_src = l_dst + 1; | |||||
| Vec3i V = mg.vecIdx(int(idx),l_dst); | |||||
| Real sum = Real(0); | |||||
| FOR_VEC_MINMAX(I, V/2, (V+1)/2) { | |||||
| int i = mg.linIdx(I,l_src); | |||||
| if (mg.mType[l_src][i] != GridMg::vtInactive) sum += src[i]; | |||||
| } | |||||
| // interpolation weight | |||||
| Real iw = Real(1) / Real(1 << ((V.x % 2) + (V.y % 2) + (V.z % 2))); | |||||
| dst[idx] = iw * sum; | |||||
| } inline std::vector<Real>& getArg0() { | |||||
| return dst; } | |||||
| typedef std::vector<Real> type0;inline const std::vector<Real>& getArg1() { | |||||
| return src; } | |||||
| typedef std::vector<Real> type1;inline int& getArg2() { | |||||
| return l_dst; } | |||||
| typedef int type2;inline const GridMg& getArg3() { | |||||
| return mg; } | |||||
| typedef GridMg type3; void runMessage() { debMsg("Executing kernel knInterpolate ", 3); debMsg("Kernel range" << " size "<< size << " " , 4); }; void operator() (const tbb::blocked_range<IndexInt>& __r) const { | |||||
| for (IndexInt idx=__r.begin(); idx!=(IndexInt)__r.end(); idx++) op(idx, dst,src,l_dst,mg); } | |||||
| void run() { | |||||
| tbb::parallel_for (tbb::blocked_range<IndexInt>(0, size), *this); } | |||||
| std::vector<Real>& dst; const std::vector<Real>& src; int l_dst; const GridMg& mg; } | |||||
| ; | |||||
| void GridMg::interpolate(int l_dst, const std::vector<Real>& src, std::vector<Real>& dst) const | |||||
| { | |||||
| knInterpolate(dst, src, l_dst, *this); | |||||
| } | |||||
| }; // DDF | |||||