As we'd mentioned earlier, if you are beginning a parallel project from scratch, writing the serial code is typically the first step.
In C, our serialDiffusion program is shown below:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
const double pi = 3.1415926535897932385;
int main( int argc, char *argv[] ) {
/* Arguments required for executing argv[0]: */
/* 0 -> serialDiffusion */
/* 1 -> nni */
/* 2 -> nnj */
/* 3 -> numTimeSteps */
double **uo; /* solution at previous time step */
double **u; /* solution at current time step */
double mu, nu, c; /* parameters of the FDE */
double ell1,ell2; /* x,y grid dimensions, 0 <= x <= ell1, etc. */
double dx,dy; /* x,y grid spacing, or spacial step size */
int nni,nnj; /* x,y number of interior grid points; excl. bdy. pts.*/
int numTimeSteps; /* when to end simulation */
double dt; /* time step size */
double dcoeff; /* diffusion coefficient D */
double u_0; /* uniform initial conditions */
double actual; /* the true solution */
int i,j,m,n,timeStep;
/* Get the number of interior grid points and */
/* number of time steps from the command line. */
nni = atoi( argv[1] );
nnj = atoi( argv[2] );
numTimeSteps = atoi( argv[3] );
/* Allocate memory for the solution arrays; include boundaries. */
/* As declared above, u is a pointer to a pointer to a double. */
/* Allocate memory so u points to the first element of an */
/* (nni+2)-long array of pointers to doubles. Likewise for uo. */
u = (double **) malloc( (size_t) ( (nni+2) * sizeof(double*) ) );
uo = (double **) malloc( (size_t) ( (nni+2) * sizeof(double*) ) );
/* u[0] is a pointer to a double. */
/* Allocate memory so u[0] points to the first element of an */
/* (nni+2)*(nnj+2)-long array of doubles. Likewise for uo[0]. */
u[0] = (double *) malloc( (size_t) ( ((nni+2)*(nnj+2)) * sizeof(double) ) );
uo[0] = (double *) malloc( (size_t) ( ((nni+2)*(nnj+2)) * sizeof(double) ) );
/* Now set pointer u[1] to point to *(u[0] + (nnj+2)). */
/* And set pointer u[2] to point to *(u[0] + 2*(nnj+2)) */
/* or *(u[1] + (nnj+2)). */
/* And so forth. Likewise for uo[1], uo[2], etc. */
for ( i = 1; i < nni+2; i++ ) {
u[i] = u[i-1] + (nnj+2);
uo[i] = uo[i-1] + (nnj+2);
}
/* We have now dynamically allocated memory for the arrays */
/* u[nni+2][nnj+2] and uo[nni+2][nnj+2] which are large */
/* enough to include the boundary points. */
/* Set the diffusion coefficient, grid dimensions, */
/* and the uniform initial conditions. */
dcoeff = 1.0;
ell1 = 7.0;
ell2 = 1.0;
u_0 = 1.0;
/* Compute dx and dy, and set dt so that the Courant condition */
/* is satisfied. */
dx = ell1 / ( nni + 1 );
dy = ell2 / ( nnj + 1 );
dt = 0.49 / ( dcoeff*( 1.0/(dx*dx) + 1.0/(dy*dy) ) );
/* Set uniform initial conditions. */
for ( i = 1; i <= nni; i++ ) {
for ( j = 1; j <= nnj; j++ ) {
u[i][j] = u_0;
}
}
/* Set boundary conditions. */
/* Assume Dirichlet boundary conditions equal to 0. */
/* First the y = 0 boundary. */
for ( i = 0; i <= nni+1; i++ ) {
u[i][0] = 0.0;
}
/* Second the y = ell2 boundary. */
for ( i = 0; i <= nni+1; i++ ) {
u[i][nnj+1] = 0.0;
}
/* Third the x = 0 boundary. */
for ( j = 1; j <= nnj; j++ ) {
u[0][j] = 0.0;
}
/* Finally the x = ell1 boundary. */
for ( j = 1; j <= nnj; j++ ) {
u[nni+1][j] = 0.0;
}
/* Solve the 2-D diffusion equation by the explicit FTCS method. */
/* Pre-compute the FDE coefficients. */
mu = dcoeff*dt / (dx*dx);
nu = dcoeff*dt / (dy*dy);
c = 1.0 - 2.0*( mu + nu );
for ( timeStep = 1; timeStep <= numTimeSteps; timeStep++ ) {
/* Update previous time step data. */
for ( i = 0; i <= nni+1; i++ ) {
for ( j = 0; j <= nnj+1; j++ ) {
uo[i][j] = u[i][j];
}
}
/* Now compute the stencil for each i,j pair. */
for ( i = 1; i <= nni; i++ ) {
for ( j = 1; j <= nnj; j++ ) {
u[i][j] = mu*(uo[i+1][j]+uo[i-1][j]) + c*uo[i][j]
+ nu*(uo[i][j+1]+uo[i][j-1]);
}
}
/* Now proceed to the next time step. */
}
/* Okay, the numerical computation of u is completed. */
/* Let's look now at a specific grid point. */
i = (int) nni/8;
j = 10;
/* Compute the actual temperature at this point. */
actual = 0.0;
for ( m = 1; m < 1000; m += 2 ) {
for ( n = 1; n < 1000; n += 2 ) {
actual += 1.0/(m*n) * exp( - dcoeff * pi*pi *
( m*m/(ell1*ell1) + n*n/(ell2*ell2) ) *
numTimeSteps*dt )
* sin( m*pi*i*dx/ell1 ) * sin( n*pi*j*dy/ell2 );
}
}
actual *= 16.0*u_0 / (pi*pi);
/* Compare the actual with the numerical result at this point. */
printf( "After %d time steps some results are:\n", numTimeSteps );
printf( "actual u[%d][%d] = %f\n", i, j, actual );
printf( "computed u[%d][%d] = %f\n", i, j, u[i][j] );
/* And let's look at another specific grid point. */
i = (int) nni/4;
/* Compute the actual temperature at this point. */
actual = 0.0;
for ( m = 1; m < 1000; m += 2 ) {
for ( n = 1; n < 1000; n += 2 ) {
actual += 1.0/(m*n) * exp( - dcoeff * pi*pi *
( m*m/(ell1*ell1) + n*n/(ell2*ell2) ) *
numTimeSteps*dt )
* sin( m*pi*i*dx/ell1 ) * sin( n*pi*j*dy/ell2 );
}
}
actual *= 16.0*u_0 / (pi*pi);
/* Compare the actual with the numerical result at this point. */
printf( "After %d time steps some results are:\n", numTimeSteps );
printf( "actual u[%d][%d] = %f\n", i, j, actual );
printf( "computed u[%d][%d] = %f\n", i, j, u[i][j] );
return 0;
}
Note: If you didn't copy source files from HPC's tutorial directory, then copy the above source code into a file called serialDiffusion.c inside your ~/MPI_Tutorial/SerialDiffusion/ directory.
Exercise: Do this!
To compile the code, change to the ~/MPI_Tutorial/SerialDiffusion/ directory on the head node (bhX) and then type [since this is not a MPI program, we are not going to use the Makefile we've been using all along]:
[agopu@bh2 agopu]$ cd ~/MPI_Tutorial/SerialDiffusion/ [agopu@bh2 SerialDiffusion]$ icc serialDiffusion.c -o serialDiffusion -lmNote: Our serialDiffusion program uses functions like sin () and exp (). That's whay we've used the -lm flag in the above call to icc ...to link with the Math library.
To run this program on a interactive node you got through qsub -I (bcXX), do:
[agopu@bc81 agopu]$ cd ~/MPI_Tutorial/SerialDiffusion/ [agopu@bc81 SerialDiffusion]$ ./serialDiffusion 8000 1000 100
The output of serialDiffusion 8000 1000 100 is:
After 100 time steps some results are: actual u[1000][10] = 0.874806 computed u[1000][10] = 0.873978 After 100 time steps some results are: actual u[2000][10] = 0.874775 computed u[2000][10] = 0.873978
The computed and actual values differ by one part in a thousand, which is not too bad considering we used 8000 and 1000 as parameters (not huge values).
| Previous: The 2-D Diffusion... | Up: Table of Contents | Next: Parallelizing the Numerical Solution |
|---|