0% found this document useful (0 votes)
26 views

Include

The document describes an MPI program for simulating an N-body system. It defines particle data structures and functions for simulation steps, output, and diagnostics. The main function initializes MPI, reads inputs, performs the simulation using MPI routines, and finalizes MPI before ending.

Uploaded by

Mickey Cruise
Copyright
© Attribution Non-Commercial (BY-NC)
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
26 views

Include

The document describes an MPI program for simulating an N-body system. It defines particle data structures and functions for simulation steps, output, and diagnostics. The main function initializes MPI, reads inputs, performs the simulation using MPI routines, and finalizes MPI before ending.

Uploaded by

Mickey Cruise
Copyright
© Attribution Non-Commercial (BY-NC)
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 11

#include <iostream>

#include <cmath> // to include sqrt(), etc.


#include <cstdlib> // for atoi() and atof()
#include <unistd.h> // for getopt()
#include <mpi.h> // include the MPI library
#include "pipe.h" // the MPI pipe routines

using namespace std;


typedef double real; // "real" as a general name for
the
// standard floating-point data
type

// practical for debugging


#define PRI(x) {for (int __pri__ = 0; __pri__ < x; __pri__++) cerr << "
";}
#define PR(x) cerr << #x << " = " << x << " "
#define PRC(x) cerr << #x << " = " << x << ", "
#define PRL(x) cerr << #x << " = " << x << endl

const int NDIM = 3; // number of spatial dimensions


const real VERY_LARGE_NUMBER = 1e300;
const int root = 0; // identity of the root
processor

// The Particle structure


typedef struct {
int id;
real mass;
real pos[NDIM];
real vel[NDIM];
real acc[NDIM];
real jerk[NDIM];
} Particle;

void correct_step(Particle p[], Particle po[], int n, real dt);


void evolve(Particle p[],
int n, real & t, real dt_param, real dt_dia, real dt_out,
real dt_tot, bool init_out, bool x_flag, void *pipe,
MPI_Datatype particletype);
void evolve_step(Particle p[], int n, real & t,
real dt, real & epot, real & coll_time,
void *pipe);
void get_acc_jerk_pot_coll(Particle pl[], int nl,
Particle po[], int no,
real & epot, real & coll_time);
Particle * get_snapshot(int &n, real &t, MPI_Datatype &particletype);
void predict_step(Particle p[], int n, real dt);
void put_snapshot(Particle p[], int n, real t, MPI_Datatype particletype);
bool read_options(int argc, char *argv[], real & dt_param, real & dt_dia,
real & dt_out, real & dt_tot, bool & i_flag, bool &
x_flag);
void write_diagnostics(Particle p[], int n, real t, real epot,
int nsteps, real & einit, bool init_flag,
bool x_flag, real &tcpu);

#define loop(idx,last) for (idx = 0; idx < last ; idx++)

void uniform(real a, real *x);


void uniform(Particle p[], int n);
void get_acc_jerk_pot_coll(Particle p[], int n, real &epot, real
&coll_time,
void *pipe);

int main(int argc, char *argv[])


{

// initialize MPI
int rank, size;
MPI_Init( &argc, &argv );
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
MPI_Comm_size( MPI_COMM_WORLD, &size );

real dt_param = 0.03; // control parameter to determine time step


size
real dt_dia = 1; // time interval between diagnostics output
real dt_out = 1; // time interval between output of
snapshots
real dt_tot = 10; // duration of the integration
bool init_out = false; // if true: snapshot output with start at t
= 0
// with an echo of the input
snapshot
bool x_flag = false; // if true: extra debugging diagnostics
output

if (! read_options(argc, argv, dt_param, dt_dia, dt_out, dt_tot,


init_out,
x_flag))
return 1; // halt criterion detected by
read_options()

int n;
real t;
MPI_Datatype particletype;
Particle *p = get_snapshot(n, t, particletype);

real noutp = 1;
real dt;
put_snapshot(p, n, t, particletype);

void * pipe; // Create a MPI pipe for a 1-dimensional ring topology


MPE_Pipe_create( MPI_COMM_WORLD, particletype, n, &pipe );

evolve(p, n, t, dt_param, dt_dia, dt_out, dt_tot, init_out,


x_flag, pipe, particletype);
delete []p;

MPE_Pipe_free( &pipe ); // Clean up MPI


MPI_Type_free( &particletype );
MPI_Finalize();

bool read_options(int argc, char *argv[], real & dt_param, real & dt_dia,
real & dt_out, real & dt_tot, bool & i_flag, bool &
x_flag)
{
int c;
while ((c = getopt(argc, argv, "hd:e:o:t:ix")) != -1)
switch(c){
case 'h': cerr << "usage: " << argv[0]
<< " [-h (for help)]"
<< " [-d step_size_control_parameter]\n"
<< " [-e diagnostics_interval]"
<< " [-o output_interval]\n"
<< " [-t total_duration]"
<< " [-i (start output at t = 0)]\n"
<< " [-x (extra debugging
diagnostics)]"
<< endl;
return false; // execution should stop after
help
case 'd': dt_param = atof(optarg);
break;
case 'e': dt_dia = atof(optarg);
break;
case 'i': i_flag = true;
break;
case 'o': dt_out = atof(optarg);
break;
case 't': dt_tot = atof(optarg);
break;
case 'x': x_flag = true;
break;
case '?': cerr << "usage: " << argv[0]
<< " [-h (for help)]"
<< " [-d step_size_control_parameter]\n"
<< " [-e diagnostics_interval]"
<< " [-o output_interval]\n"
<< " [-t total_duration]"
<< " [-i (start output at t = 0)]\n"
<< " [-x (extra debugging
diagnostics)]"
<< endl;
return false; // execution should stop after
error
}
return true; // ready to continue program
execution
}

Particle *get_snapshot(int &n, real &t,


MPI_Datatype &particletype)
{

int rank, size;


MPI_Comm_rank( MPI_COMM_WORLD, &rank );
MPI_Comm_size( MPI_COMM_WORLD, &size );

Particle *p_tmp;
if(rank==root) {
cerr << "Reading snapshot" << endl;
cin >> n;
cin >> t;

p_tmp = new Particle[n];

for(int i=0; i<n; i++) {


p_tmp[i].id = i;
cin >> p_tmp[i].mass; // mass of particle i
for (int k = 0; k < NDIM; k++)
cin >> p_tmp[i].pos[k]; // position of particle i
for (int k = 0; k < NDIM; k++)
cin >> p_tmp[i].vel[k]; // velocity of particle i
}
}

MPI_Bcast(&n,1,MPI_INT,root,MPI_COMM_WORLD); // broadcasts particle


number

int n_local = (int)(floor(1.0*n/size));


if(n != n_local*size && rank==root) {
cerr << "WARNING: Paticle number in input is not a mulitple of the
number of processors."
<< endl;
cerr << " Action: Reduce particle number to n = "
<< n_local*size << "." << endl;
}
Particle *p = new Particle[n_local];

// defining the particletype


int inputblockcounts[2] = {1, 13};
MPI_Datatype ntypes[] = {MPI::INT, MPI::DOUBLE};
MPI::Aint displs[2];
MPI_Address(&p[0].id, &displs[0]);
MPI_Address(&p[0].mass, &displs[1]);
displs[1] -= displs[0]; //make them relative
displs[0] = 0;
MPI_Type_struct(2, inputblockcounts, displs, ntypes, &particletype);
MPI_Type_commit(&particletype);

// Distribute the particles over the processors

MPI_Scatter(p_tmp,n_local,particletype,p,n_local,particletype,root,MPI_COM
M_WORLD);
n = n_local;
MPI_Bcast(&t,1,MPI_DOUBLE,root,MPI_COMM_WORLD);

delete []p_tmp;
return p;
}

void put_snapshot(Particle p[], int n, real t,


MPI_Datatype particletype)
{

int rank;
MPI_Comm_rank( MPI_COMM_WORLD, &rank );

int ntot;
MPI_Allreduce(&n, &ntot, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );

Particle *p_all;
if(rank==root) {
p_all = new Particle[ntot];
}
MPI_Gather(p,n,particletype,p_all,n,particletype,root,MPI_COMM_WORLD);

cout.precision(16); // full double precision


if(rank==root) {
cout << ntot << endl; // N, total particle
number
cout << t << endl; // current time
for (int i = 0; i < ntot ; i++){
cout << p_all[i].mass; // mass of particle i
for (int k = 0; k < NDIM; k++)
cout << ' ' << p_all[i].pos[k]; // position of
particle i
for (int k = 0; k < NDIM; k++)
cout << ' ' << p_all[i].vel[k]; // velocity of
particle i
cout << endl;
}
delete []p_all;
}
}

void write_diagnostics(Particle p[], int n, real t, real epot_local,


int nsteps, real & einit, bool init_flag,
bool x_flag, real &tcpu)
{

int rank;
MPI_Comm_rank( MPI_COMM_WORLD, &rank );

real ekin_local = 0; // kinetic energy of the n-body system


for (int i = 0; i < n ; i++)
for (int k = 0; k < NDIM ; k++)
ekin_local += 0.5 * p[i].mass * p[i].vel[k] * p[i].vel[k];

real ekin;
MPI_Allreduce(&ekin_local, &ekin, 1, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD );

real epot;
MPI_Allreduce(&epot_local, &epot, 1, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD );
epot *= 0.5; // against double counting

real etot = ekin + epot; // total energy of the n-body


system

if (init_flag) // at first pass, pass the


initial
einit = etot; // energy back to the calling
function

tcpu = MPI_Wtime() - tcpu;


if(rank==0) {

cerr << "at time t = " << t << ", after " << nsteps
<< " steps (CPU = " << tcpu << "): \n E_kin = " << ekin
<< " , E_pot = " << epot
<< " , E_tot = " << etot << endl;
cerr << " "
<< "absolute energy error: E_tot - E_init = "
<< etot - einit << endl;
cerr << " "
<< "relative energy error: (E_tot - E_init) / E_init = "
<< (etot - einit) / einit << endl;
}
if (x_flag){
cerr << " for debugging purposes, here is the internal data "
<< "representation:\n";
for (int i = 0; i < n ; i++){
cerr << " internal data for particle " << i+1 << " : " <<
endl;
cerr << " ";
cerr << p[i].id << " " << p[i].mass;
for (int k = 0; k < NDIM; k++)
cerr << ' ' << p[i].pos[k];
for (int k = 0; k < NDIM; k++)
cerr << ' ' << p[i].vel[k];
for (int k = 0; k < NDIM; k++)
cerr << ' ' << p[i].acc[k];
for (int k = 0; k < NDIM; k++)
cerr << ' ' << p[i].jerk[k];
cerr << endl;
}
}
}

void evolve(Particle p[],


int n, real & t, real dt_param, real dt_dia, real dt_out,
real dt_tot, bool init_out, bool x_flag,
void *pipe, MPI_Datatype particletype)
{
int rank, size;
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
MPI_Comm_size( MPI_COMM_WORLD, &size );

if(rank==root)
cerr << "Starting a Hermite integration for a " << n*size
<< "-body system,\n from time t = " << t
<< " with time step control parameter dt_param = " << dt_param
<< " until time " << t + dt_tot
<< " ,\n with diagnostics output interval dt_dia = "
<< dt_dia << ",\n and snapshot output interval dt_out = "
<< dt_out << "." << endl;

real tcpu = MPI_Wtime(); // check CPU usage

real epot = 0; // potential energy of the n-body


system
real coll_time = VERY_LARGE_NUMBER;// collision (close encounter) time
scale
get_acc_jerk_pot_coll(p, n, epot, coll_time, pipe);

int nsteps = 0; // number of integration time steps


completed
real einit; // initial total energy of the system

write_diagnostics(p, n, t, epot, nsteps, einit,


true, x_flag, tcpu);
if (init_out) // flag for initial
output
put_snapshot(p, n, t, particletype);

real t_dia = t + dt_dia; // next time for diagnostics output


real t_out = t + dt_out; // next time for snapshot output
real t_end = t + dt_tot; // final time, to finish the
integration

while (true){
while (t < t_dia && t < t_out && t < t_end){
real dt_local = dt_param * coll_time;
real dt;
MPI_Allreduce(&dt_local, &dt, 1, MPI_DOUBLE, MPI_MIN,
MPI_COMM_WORLD); // synchronize time step
evolve_step(p, n, t, dt, epot, coll_time, pipe);
nsteps++;
}
if (t >= t_dia){
write_diagnostics(p, n, t, epot, nsteps,
einit, false, x_flag, tcpu);
t_dia += dt_dia;
}
if (t >= t_out){
put_snapshot(p, n, t, particletype);
t_out += dt_out;
}
if (t >= t_end)
break;
}
}

void evolve_step(Particle p[], int n, real & t,


real dt, real & epot, real & coll_time,
void *pipe)
{

Particle *po = new Particle[n];

for (int i = 0; i < n ; i++)


for (int k = 0; k < NDIM ; k++){
po[i].pos[k] = p[i].pos[k];
po[i].vel[k] = p[i].vel[k];
po[i].acc[k] = p[i].acc[k];
po[i].jerk[k] = p[i].jerk[k];
}

predict_step(p, n, dt);
get_acc_jerk_pot_coll(p, n, epot, coll_time, pipe);
correct_step(p, po, n, dt);
t += dt;

delete[] po;
}

void predict_step(Particle p[], int n, real dt)


{
for (int i = 0; i < n ; i++)
for (int k = 0; k < NDIM ; k++){
p[i].pos[k] += p[i].vel[k]*dt + p[i].acc[k]*dt*dt/2
+ p[i].jerk[k]*dt*dt*dt/6;
p[i].vel[k] += p[i].acc[k]*dt + p[i].jerk[k]*dt*dt/2;
}
}

void correct_step(Particle p[], Particle po[], int n, real dt)


{
for (int i = 0; i < n ; i++)
for (int k = 0; k < NDIM ; k++){
p[i].vel[k] = po[i].vel[k] + (po[i].acc[k] + p[i].acc[k])*dt/2
+ (po[i].jerk[k] -
p[i].jerk[k])*dt*dt/12;
p[i].pos[k] = po[i].pos[k] + (po[i].vel[k] + p[i].vel[k])*dt/2
+ (po[i].acc[k] -
p[i].acc[k])*dt*dt/12;
}
}

void get_acc_jerk_pot_coll(Particle pl[], int nl,


Particle po[], int no,
real & epot, real & coll_time)
{

real coll_time_q = VERY_LARGE_NUMBER; // collision time to 4th


power
real coll_est_q; // collision time scale
estimate
// to 4th power (quartic)
for (int i = 0; i < nl ; i++){
for (int j = 0; j < no ; j++){ // rji[] is the vector
from
if(pl[i].id!=po[j].id) {
real rji[NDIM]; // particle i to
particle j
real vji[NDIM]; // vji[] = d rji[] / d
t
for (int k = 0; k < NDIM ; k++){
rji[k] = po[j].pos[k] - pl[i].pos[k];
vji[k] = po[j].vel[k] - pl[i].vel[k];
}
real r2 = 0; // | rji |^2
real v2 = 0; // | vji |^2
real rv_r2 = 0; // ( rij . vij ) / |
rji |^2
for (int k = 0; k < NDIM ; k++){
r2 += rji[k] * rji[k];
v2 += vji[k] * vji[k];
rv_r2 += rji[k] * vji[k];
}
rv_r2 /= r2;
real r = sqrt(r2); // | rji |
real r3 = r * r2; // | rji |^3

// add the {i,j} contribution to the total potential energy for the
system:
epot -= pl[i].mass * po[j].mass / r;

// add the {j (i)} contribution to the {i (j)} values of acceleration and


jerk:

real da[3]; // main terms in


pairwise
real dj[3]; // acceleration and
jerk
for (int k = 0; k < NDIM ; k++){
da[k] = rji[k] / r3; // see
equations
dj[k] = (vji[k] - 3 * rv_r2 * rji[k]) / r3; // in the
header
}
for (int k = 0; k < NDIM ; k++){
pl[i].acc[k] += po[j].mass * da[k]; // using
symmetry
pl[i].jerk[k] += po[j].mass * dj[k]; // acceleration

// in the original version pij = -pji for acc and jerk.


// in this parallel version this is unpractical.
//po[j].acc[k] -= pl[i].mass * da[k]; // find
pairwise
//po[j].jerk[k] -= pl[i].mass * dj[k]; // and jerk
}

// first collision time estimate, based on unaccelerated linear motion:

coll_est_q = (r2*r2) / (v2*v2);


if (coll_time_q > coll_est_q)
coll_time_q = coll_est_q;

// second collision time estimate, based on free fall:

real da2 = 0; // da2 becomes


the
for (int k = 0; k < NDIM ; k++) // square of
the
da2 += da[k] * da[k]; // pair-wise
accel-
double mij = pl[i].mass + po[j].mass; // eration
between
da2 *= mij * mij; // particles i
and j

coll_est_q = r2/da2;
if (coll_time_q > coll_est_q)
coll_time_q = coll_est_q;
}
}
}
// from q for quartic back to linear collision time and taking the
minimum
coll_time = min(coll_time, sqrt(sqrt(coll_time_q)));
}

void get_acc_jerk_pot_coll(Particle p[], int n, real &epot, real


&coll_time,
void *pipe) {

int rank, size;


MPI_Comm_rank( MPI_COMM_WORLD, &rank );
MPI_Comm_size( MPI_COMM_WORLD, &size );

int rlen;
Particle *recvbuf;

for (int i = 0; i < n ; i++)


for (int k = 0; k < NDIM ; k++) {
p[i].acc[k] = p[i].jerk[k] = 0;
}

MPE_Pipe_start( pipe, p, n, 1 ); // load the initial sendbuffer

epot = 0; // initialize epot and coll_time


coll_time = VERY_LARGE_NUMBER;
get_acc_jerk_pot_coll(p, n, p, n, epot, coll_time);

for (int step=1; step<size; step++) { // compute forces for other


particles

MPE_Pipe_push( pipe, (void**)&recvbuf, &rlen ); // get new


data

// Compute forces
get_acc_jerk_pot_coll(p, n, recvbuf, rlen, epot, coll_time);
}
}

You might also like