OPALX (Object Oriented Parallel Accelerator Library for Exascale) MINIorX
OPALX
MultiVariateGaussian.cpp
Go to the documentation of this file.
2#include <Kokkos_Core.hpp>
3#include <mpi.h>
4
6
13MultiVariateGaussian::MultiVariateGaussian(std::shared_ptr<ParticleContainer_t> &pc,
14 std::shared_ptr<FieldContainer_t> &fc,
15 std::shared_ptr<Distribution_t> &opalDist)
16 : SamplingBase(pc, fc, opalDist) {
17
18 // Initialize covariance matrix from the distribution.
19 for (unsigned int i = 0; i < 6; i++) {
20 for (unsigned int j = 0; j < 6; j++) {
21 cov_m[i][j] = opalDist_m->correlationMatrix_m[i][j];
22 }
23 }
24
25 setSigmaR(opalDist_m->getSigmaR());
26 setSigmaP(opalDist_m->getSigmaP());
27 setCutoffR(opalDist_m->getCutoffR());
28 setCutoffP(opalDist_m->getCutoffP());
29
30 meanR_m = 0.0;
31 meanP_m = 0.0;
32 meanP_m[2] = opalDist_m->getAvrgpz();
33
34 samplerTimer_m = IpplTimings::getTimer("SamplingTimer");
36 }
37
38MultiVariateGaussian::MultiVariateGaussian(std::shared_ptr<ParticleContainer_t> pc,
39 const Vector_t<double, 3>& meanR,
40 const Vector_t<double, 3>& meanP,
41 const Vector_t<double, 3>& sigmaR,
42 const Vector_t<double, 3>& sigmaP,
43 const Vector_t<double, 3>& cutoffR,
44 const Vector_t<double, 3>& cutoffP,
45 bool fixMeanR,
46 bool fixMeanP)
47 : SamplingBase(pc) {
48 // Initialize covariance matrix from the distribution.
49 for (unsigned int i = 0; i < 6; i++) {
50 for (unsigned int j = 0; j < 6; j++) {
51 cov_m[i][j] = 0.0;
52 if(i==j && i%2==0){
53 cov_m[i][j] = sigmaR[i/2]*sigmaR[i/2];
54 }
55 if(i==j && i%2==1){
56 cov_m[i][j] = sigmaP[i/2]*sigmaP[i/2];
57 }
58 }
59 }
60 setMeanR(meanR);
61 setMeanP(meanP);
62 setSigmaR(sigmaR);
63 setSigmaP(sigmaP);
64 setCutoffR(cutoffR);
65 setCutoffP(cutoffP);
66 setFixMeanR(fixMeanR);
67 setFixMeanP(fixMeanP);
68
69 samplerTimer_m = IpplTimings::getTimer("SamplingTimer");
71}
72
73
74MultiVariateGaussian::MultiVariateGaussian(std::shared_ptr<ParticleContainer_t> pc,
75 const Vector_t<double, 3>& meanR,
76 const Vector_t<double, 3>& meanP,
77 const Matrix_t &cov,
78 const Vector_t<double, 3>& cutoffR,
79 const Vector_t<double, 3>& cutoffP,
80 bool fixMeanR,
81 bool fixMeanP)
82 : SamplingBase(pc) {
83
84 cov_m = cov;
85
86 setMeanR(meanR);
87 setMeanP(meanP);
88 setSigmaR(ippl::Vector<double,3>(Kokkos::sqrt(cov_m[0][0]),
89 Kokkos::sqrt(cov_m[2][2]),
90 Kokkos::sqrt(cov_m[4][4])));
91
92 setSigmaP(ippl::Vector<double,3>(Kokkos::sqrt(cov_m[1][1]),
93 Kokkos::sqrt(cov_m[3][3]),
94 Kokkos::sqrt(cov_m[5][5])));
95 setCutoffR(cutoffR);
96 setCutoffP(cutoffP);
97 setFixMeanR(fixMeanR);
98 setFixMeanP(fixMeanP);
99
100 samplerTimer_m = IpplTimings::getTimer("SamplingTimer");
102}
103
108 extern Inform* gmsg;
109 size_t randInit;
110
111 if (Options::seed == -1) {
112 randInit = 1234567;
113 *gmsg << "* Seed = " << randInit << " on all ranks" << endl;
114 } else {
115 randInit = static_cast<size_t>(Options::seed + 100 * ippl::Comm->rank());
116 }
117
118 GeneratorPool rand_pool64(randInit);
119 randPool_m = rand_pool64;
120 return;
121}
122
127 for (unsigned int i = 0; i < 6; i++) {
128 for (unsigned int j = 0; j < 6; j++) {
129 L_m[i][j] = 0.0;
130 }
131 }
132 double sum = 0.0;
133 for (unsigned int i = 0; i < 6; i++) {
134 for (unsigned int j = 0; j <= i; j++) {
135 sum = 0.0;
136 for (unsigned int k = 0; k < j; k++) {
137 sum += L_m[i][k] * L_m[j][k];
138 }
139 if (j == i) {
140 L_m[j][j] = Kokkos::sqrt(cov_m[j][j] - sum);
141 } else {
142 L_m[i][j] = (cov_m[i][j] - sum) / L_m[j][j];
143 }
144 }
145 }
146}
147
152 rmin_m = -cutoffR_m;
154 pmin_m = -cutoffP_m;
156
157 for (int i = 0; i < 3; i++) {
158 rmin_m(i) *= sigmaR_m(i);
159 rmax_m(i) *= sigmaR_m(i);
160 pmin_m(i) *= sigmaP_m(i);
161 pmax_m(i) *= sigmaP_m(i);
162
163 min_m(i * 2) = rmin_m(i);
164 max_m(i * 2) = rmax_m(i);
165 min_m(i * 2 + 1) = pmin_m(i);
166 max_m(i * 2 + 1) = pmax_m(i);
167 }
168
169 normMin_m = 0.0;
170 normMax_m = 0.0;
171 double sumMin, sumMax;
172 for(int i=0; i<6; i++){
173 sumMin = 0.0;
174 sumMax = 0.0;
175 for(int j=0; j<i; j++){
176 sumMin += -L_m[i][j]*normMin_m(j);
177 sumMax += -L_m[i][j]*normMax_m(j);
178 }
179 normMin_m(i) = (min_m(i)-sumMin)/L_m[i][i];
180 normMax_m(i) = (max_m(i)-sumMax)/L_m[i][i];
181 }
182
183 for(int i=0; i<3; i++){
184 normRmin_m(i) = min_m(2*i)/sigmaR_m(i);
185 normRmax_m(i) = max_m(2*i)/sigmaR_m(i);
186 normPmin_m(i) = min_m(2*i+1)/sigmaP_m(i);
187 normPmax_m(i) = max_m(2*i+1)/sigmaP_m(i);
188
189 rmin_m(i) /= sigmaR_m(i);
190 rmax_m(i) /= sigmaR_m(i);
191 pmin_m(i) /= sigmaP_m(i);
192 pmax_m(i) /= sigmaP_m(i);
193 }
194}
195
199void MultiVariateGaussian::generateParticles(size_t &numberOfParticles, Vector_t<double, 3> /*nr*/) {
201
202 auto rand_pool64 = randPool_m;
203 // compute L using Cholesky factorization cov=L*LT
205
206 // compute boundaries of normal random numbers
208
209 view_type &Rview = pc_m->R.getView();
210 view_type &Pview = pc_m->P.getView();
211
212 const double par[6] = {0.0, 1.0, 0.0, 1.0, 0.0, 1.0};
215 Dist_t dist(par);
216
217 MPI_Comm comm = MPI_COMM_WORLD;
218 int nranks, rank;
219 MPI_Comm_size(comm, &nranks);
220 MPI_Comm_rank(comm, &rank);
221
222 // if nlocal*nranks > numberOfParticles, put the remaining in rank 0
223 size_t nlocal = floor(numberOfParticles / nranks);
224 size_t remaining = numberOfParticles - nlocal * nranks;
225 if (remaining > 0 && rank == 0) {
226 nlocal += remaining;
227 }
228
229 sampling_t sampling(dist, normRmax_m, normRmin_m, normRmax_m, normRmin_m, nlocal);
230 pc_m->create(nlocal);
231 sampling.generate(Rview, rand_pool64);
232
233 sampling.updateBounds(normPmax_m, normPmin_m, normPmax_m, normPmin_m);
234 sampling.generate(Pview, rand_pool64);
235
236 Matrix_t L;
237 for (unsigned int i = 0; i < 6; i++) {
238 for (unsigned int j = 0; j < 6; j++) {
239 L[i][j] = L_m[i][j];
240 }
241 }
242
243 // Apply Cholesky transformation
244 Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(const int k) {
245 double vec_old[6], vec[6] = {0.0};
246 for (unsigned i = 0; i < 3; ++i) {
247 vec_old[2 * i] = Rview(k)[i];
248 vec_old[2 * i + 1] = Pview(k)[i];
249 }
250 for (unsigned i = 0; i < 6; ++i) {
251 for (unsigned j = 0; j < i + 1; ++j) {
252 vec[i] += L[i][j] * vec_old[j];
253 }
254 }
255 for (unsigned i = 0; i < 3; ++i) {
256 Rview(k)[i] = vec[2 * i];
257 Pview(k)[i] = vec[2 * i + 1];
258 }
259 });
260
262
263 // zero mean of R
264 double meanR[3], loc_meanR[3];
265
266 if (fixMeanR_m) {
267 for(int i=0; i<3; i++){
268 meanR[i] = 0.0;
269 loc_meanR[i] = 0.0;
270 }
271
272 Kokkos::parallel_reduce("calc moments of particle distr.", nlocal,
273 KOKKOS_LAMBDA(const int k, double& cent0, double& cent1, double& cent2) {
274 cent0 += Rview(k)[0];
275 cent1 += Rview(k)[1];
276 cent2 += Rview(k)[2];
277 },
278 Kokkos::Sum<double>(loc_meanR[0]), Kokkos::Sum<double>(loc_meanR[1]), Kokkos::Sum<double>(loc_meanR[2]));
280
281 MPI_Allreduce(loc_meanR, meanR, 3, MPI_DOUBLE, MPI_SUM, ippl::Comm->getCommunicator());
282 ippl::Comm->barrier();
283
284 for(int i=0; i<3; i++){
285 meanR[i] = meanR[i]/(1.*numberOfParticles);
286 }
287
288 Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(const int k) {
289 Rview(k)[0] -= meanR[0];
290 Rview(k)[1] -= meanR[1];
291 Rview(k)[2] -= meanR[2];
292 });
294 }
295
296 // zero mean of P
297 double meanP[3], loc_meanP[3];
298 if(fixMeanP_m){
299
300 for(int i=0; i<3; i++){
301 meanP[i] = 0.0;
302 loc_meanP[i] = 0.0;
303 }
304 Kokkos::parallel_reduce("calc moments of particle distr.", nlocal,
305 KOKKOS_LAMBDA(const int k, double& cent0, double& cent1, double& cent2) {
306 cent0 += Pview(k)[0];
307 cent1 += Pview(k)[1];
308 cent2 += Pview(k)[2];
309 },
310 Kokkos::Sum<double>(loc_meanP[0]), Kokkos::Sum<double>(loc_meanP[1]), Kokkos::Sum<double>(loc_meanP[2]));
312
313 MPI_Allreduce(loc_meanP, meanP, 3, MPI_DOUBLE, MPI_SUM, ippl::Comm->getCommunicator());
314 ippl::Comm->barrier();
315
316 for(int i=0; i<3; i++){
317 meanP[i] = meanP[i]/(1.*numberOfParticles);
318 }
319
320 Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(const int k) {
321 Pview(k)[0] -= meanP[0];
322 Pview(k)[1] -= meanP[1];
323 Pview(k)[2] -= meanP[2];
324 });
326 }
327
328 // correct the means of R and P from input
329 for(int i=0; i<3; i++){
330 meanR[i] = meanR_m[i];
331 meanP[i] = meanP_m[i];
332 }
333
334 Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(const int k) {
335 for(int i=0; i<3; i++){
336 Rview(k)[i] += meanR[i];
337 Pview(k)[i] += meanP[i];
338 }
339 });
341
343}
Inform * gmsg
Definition: changes.cpp:7
typename Kokkos::Random_XorShift64_Pool<> GeneratorPool
typename ippl::detail::ViewType< ippl::Vector< double, Dim >, 1 >::view_type view_type
Inform & endl(Inform &inf)
Definition: Inform.cpp:42
int seed
The current random seed.
Definition: Options.cpp:37
void fence()
Definition: Ippl.cpp:103
void parallel_for(const std::string &name, const ExecPolicy &policy, const FunctorType &functor)
void parallel_reduce(const std::string &name, const ExecPolicy &policy, const FunctorType &functor, ReducerArgument &&... reducer)
std::unique_ptr< mpi::Communicator > Comm
Definition: Ippl.h:22
Vector_t< double, 6 > normMax_m
Vector_t< double, 3 > sigmaP_m
IpplTimings::TimerRef samplerTimer_m
Timer for performance profiling.
bool fixMeanR_m
Flag to exactly fix the mean R and P of particles after sampling.
Vector_t< double, 3 > meanP_m
void setCutoffR(const Vector_t< double, 3 > &cutoffR)
void setMeanP(const Vector_t< double, 3 > &meanP)
Vector_t< double, 6 > max_m
Vector_t< double, 3 > normPmin_m
void setCutoffP(const Vector_t< double, 3 > &cutoffP)
Vector_t< double, 3 > pmax_m
void ComputeCholeskyFactorization()
Computes the Cholesky factorization of the covariance matrix.
Vector_t< double, 3 > rmin_m
Vector_t< double, 3 > rmax_m
Vector_t< double, 3 > pmin_m
Vector_t< double, 3 > cutoffR_m
Cutoff multipliers for position and momentum distributions.
Vector_t< double, 6 > min_m
Min and Max bounds for all 6 dimensions (R0,P0,R1,P1,R2,P2).
Vector_t< double, 3 > normPmax_m
void setFixMeanR(bool fixMeanR)
void setMeanR(const Vector_t< double, 3 > &meanR)
Vector_t< double, 6 > normMin_m
void initRandomPool()
Initializes the random number generator pool.
Vector_t< double, 3 > normRmax_m
void generateParticles(size_t &numberOfParticles, Vector_t< double, 3 > nr) override
Generates particles based on the defined Gaussian distribution.
Vector_t< double, 3 > meanR_m
Vector_t< double, 3 > sigmaR_m
Standard deviations for position and momentum distributions.
MultiVariateGaussian(std::shared_ptr< ParticleContainer_t > &pc, std::shared_ptr< FieldContainer_t > &fc, std::shared_ptr< Distribution_t > &opalDist)
Constructor for MultiVariateGaussian.
void setSigmaP(const Vector_t< double, 3 > &sigmaP)
GeneratorPool randPool_m
Pool of random number generators for parallel sampling.
void setSigmaR(const Vector_t< double, 3 > &sigmaR)
void ComputeCenteredBounds()
Computes centered bounds for the particle distribution.
Vector_t< double, 3 > cutoffP_m
Vector_t< double, 3 > normRmin_m
void setFixMeanP(bool fixMeanP)
std::shared_ptr< Distribution_t > opalDist_m
std::shared_ptr< ParticleContainer_t > pc_m
A class for inverse transform sampling.
Definition: Inform.h:40
static TimerRef getTimer(const char *nm)
Definition: IpplTimings.h:150
static void stopTimer(TimerRef t)
Definition: IpplTimings.h:156
static void startTimer(TimerRef t)
Definition: IpplTimings.h:153