-
Notifications
You must be signed in to change notification settings - Fork 2
/
PdeFiniteDifference.cuh
162 lines (139 loc) · 5.54 KB
/
PdeFiniteDifference.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#include <Common.cuh>
#include <Flags.cuh>
#include <Types.h>
#include "FiniteDifferenceTypes.h"
#include <CuBlasWrappers.cuh>
#include <BufferInitializer.cuh>
#include <MemoryManager.cuh>
#include <array>
namespace detail
{
/**
* Evolve the solution using the time discretizer.
* N.B.: solution is a memory tile, as some solver might require the solution history
* N.B.2: if provided, workBuffer is a previously allocated buffer used for matrix-vector multiplication
*/
int _Advance(MemoryTile& solution, const MemoryCube& timeDiscretizer, MemoryTile& workBuffer, const bool overwriteBuffer);
int _SparseAdvance(MemoryBuffer& solution, SparseMemoryTile& timeDiscretizer, MemoryBuffer& workBuffer, const bool overwriteBuffer);
// N is the size of the Butcher tableau table
// aMatrix is the lower triangular tableau matrix. If the diagonal is populated the method is an implicit RK
// bvector is the vector used for composing the "k"'s
// WARNING: This doesn't support dense aMatrix, but only lower triangular
template<unsigned N>
int _MakeRungeKuttaDiscretizer(const std::array<double, N * (N + 1) / 2>& aMatrix,
const std::array<double, N>& bVector,
const double dt,
const MemoryTile& spaceDiscretizer,
MemoryTile& timeDiscretizer)
{
auto getLowerTriangularIndex = [](const unsigned i, const unsigned j)
{
return j + i * (i + 1) / 2;
};
MemoryCube kVector(0, timeDiscretizer.nRows, timeDiscretizer.nCols, N, timeDiscretizer.memorySpace, timeDiscretizer.mathDomain);
_Alloc(kVector);
MemoryTile kRhs(timeDiscretizer); // kRhs is a working buffer that stores k_i r.h.s.
_Alloc(kRhs);
// loop for calculating k_i
for (unsigned i = 0; i < N; ++i)
{
_Eye(kRhs);
// aMatrix * k multiplication
for (unsigned j = 0; j < i; ++j)
{
MemoryTile k_j;
ExtractMatrixBufferFromCube(k_j, kVector, j);
if (aMatrix[getLowerTriangularIndex(i, j)] != 0.0)
_AddEqualMatrix(kRhs, k_j, MatrixOperation::None, MatrixOperation::None, 1.0, aMatrix[getLowerTriangularIndex(i, j)] * dt);
}
MemoryTile k_i;
ExtractMatrixBufferFromCube(k_i, kVector, i);
_Multiply(k_i, spaceDiscretizer, kRhs, MatrixOperation::None, MatrixOperation::None);
if (aMatrix[getLowerTriangularIndex(i, i)] != 0.0)
{
// re-set kRhs instead of allocating kLhs
_Eye(kRhs);
_AddEqual(kRhs, spaceDiscretizer, -aMatrix[getLowerTriangularIndex(i, i)] * dt);
_Solve(kRhs, k_i);
}
}
//now that all kVector items are set, fo the b * k multiplication
_Eye(timeDiscretizer); // initialise time discretizer with the identity
for (unsigned j = 0; j < N; ++j)
{
MemoryTile k_j;
ExtractMatrixBufferFromCube(k_j, kVector, j);
_AddEqualMatrix(timeDiscretizer, k_j, MatrixOperation::None, MatrixOperation::None, 1.0, bVector[j] * dt);
}
_Free(kVector);
_Free(kRhs);
return cudaGetLastError();
}
int _MakeRungeKuttaGaussLegendre(const double dt, const MemoryTile& spaceDiscretizer, const MemoryTile& timeDiscretizer);
}
EXTERN_C
{
/**
* Calculates the time discretization for the ODE (that comes from the space discretization of the advection-diffusion PDE)
u' = L * u
*/
EXPORT int _MakeTimeDiscretizerAdvectionDiffusion(MemoryCube& timeDiscretizer, const MemoryTile& spaceDiscretizer, const SolverType solverType, const double dt);
/**
* Calculates the time discretization for the ODE (that comes from the space discretization of the advection-diffusion PDE)
u'' = L * u
*/
EXPORT int _MakeTimeDiscretizerWaveEquation(MemoryCube& timeDiscretizer, const MemoryTile& spaceDiscretizer, const SolverType solverType, const double dt);
}
template <typename T>
__forceinline__ DEVICE void __MakeSpaceDiscretizerWorker__(T& up, T& mid, T& down,
SpaceDiscretizerType discretizerType,
const T velocity, const T diffusion,
const T dPlus, const T dMinus, const T dMid,
const T multiplierMinus, const T multiplierPlus,
const T dt)
{
// discretize advection with the given space discretizer
if (discretizerType == SpaceDiscretizerType::Centered)
{
down -= dPlus * velocity * multiplierMinus;
up += dMinus * velocity * multiplierPlus;
mid += down + up;
}
else if (discretizerType == SpaceDiscretizerType::Upwind)
{
if (velocity > 0)
{
const T discretizerValue = velocity / dPlus;
up += discretizerValue;
mid -= discretizerValue;
}
else
{
const T discretizerValue = velocity / dMinus;
mid += discretizerValue;
down -= discretizerValue;
}
}
else if (discretizerType == SpaceDiscretizerType::LaxWendroff)
{
// discretize with a centered scheme
down -= dPlus * velocity * multiplierMinus;
up += dMinus * velocity * multiplierPlus;
// central point will be corrected along with diffusion!
// add artifical diffusion: .5 * velocity^2 * dt
const T diffusionMultiplier = velocity * velocity * dt;
const T diffusionContributionMinus = diffusionMultiplier * multiplierMinus;
const T diffusionContributionPlus = diffusionMultiplier * multiplierPlus;
down += diffusionContributionMinus;
up += diffusionContributionPlus;
// correct for both advection and artifical diffusion
mid -= down + up;
}
// discretize diffusion with 3 points
const T diffusionMultiplier = static_cast<T>(2.0) * diffusion;
const T diffusionContributionMinus = diffusionMultiplier * multiplierMinus;
const T diffusionContributionPlus = diffusionMultiplier * multiplierPlus;
down += diffusionContributionMinus;
up += diffusionContributionPlus;
mid -= diffusionContributionMinus + diffusionContributionPlus;
}