#include #include #include #include #include #include "nbgenerator.hh" #include "nbio.hh" #include "nbtypes.hh" #include "nbstopwatch.hh" /* * Global constants */ /* physical parameters */ const double mass_sun = 1.988435e30; const double pc_in_m = 3.08567758129e16; const double gamma_si = 6.67428e-11; /* set gravity constant */ const double G = gamma_si/(pc_in_m*pc_in_m*pc_in_m)*mass_sun*(365.25*86400)*(365.25*86400); /* The epsilon^2 for the softening */ const double epsilon2 = 1E-3; /* Block size for tiling */ const int B = 90; typedef struct acceleration_data { int n,rank,numthreads; double *m; vector3 *a, *r; } acceleration_data; pthread_mutex_t *mutex_a; /** * Compute acceleration (with tiling) * * \param[in] n Number of bodies * \param[in] r Positions of the bodies * \param[in] m Masses of the bodies * \param[out] a The computed accelerations of the bodies * * The inner loop executes 26 floating point operations and is executed * ((n*n)-n)/2 times, resulting in 13*n*(n-1) floating point operations total * per call to acceleration(). */ void acceleration (acceleration_data *data) { int n, rank, numthreads; vector3 *r, *a; double *m; int I,J,I_,J_,i,j,iend,jstart,jend; double d0,d1,d2,d,d_sq,factori,factorj; double invfact; vector3 Ri[B], Rj[B], Ai[B], Aj[B]; double Mi[B], Mj[B]; /* extract data */ n = data->n; rank = data->rank; numthreads = data->numthreads; r = data->r; a = data->a; m = data->m; /* compute acceleration exploiting symmetry */ for (I_=0; I_<=n/B; I_++) { for (J_=I_; J_<=n/B; J_++) { if ((I_+J_) % numthreads == rank) { /*printf("i am thread %d of %d and i compute block (%d,%d)\n",rank,numthreads,I_,J_);*/ I=I_*B; J=J_*B; /* store local copy of the blocks to avoid aliasing */ iend = std::min(I+B,n); jend = std::min(J+B,n); for (i=I; i
\n", argv[0]); return 1; } sscanf(argv[1],"%d",&N); sscanf(argv[2],"%lf",&Tend); sscanf(argv[3],"%d",&mod); sscanf(argv[4],"%lf",&dt); sscanf(argv[5],"%d",&numthreads); /* arrays for position, velocity, mass, and acceleration of the bodies */ vector3* r = new vector3[N]; vector3* v = new vector3[N]; double* m = new double[N]; vector3* a = new vector3[N]; /* loop counter */ int i; /* count the time steps done */ int k = 0; /* for measuring the elapsed time and calculating the MFLOP rate */ double start,stop,elapsed,flop; /* get initial values from one of the generator functions */ int seed = 42; collision(N,seed,r,v,m); mutex_a = new pthread_mutex_t[N]; /* initialize mutexes */ for (i=0; i