PROGRAM ShareNeighbors
IMPLICIT REAL (a-h,o-z)
INCLUDE "mpif.h"
PARAMETER (m = 500, n = 500)
DIMENSION a(m,n), b(m,n)
DIMENSION h(m,n)
INTEGER istatus(MPI_STATUS_SIZE)
INTEGER iprocs, jprocs
PARAMETER (ROOT = 0)
integer dims(2),coords(2)
logical periods(2)
data periods/2*.false./
integer status(MPI_STATUS_SIZE)
integer comm2d,req,source
CALL MPI_INIT(ierr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD, nprocs, ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD, myrank, ierr)
! Get a new communicator for a decomposition of the domain.
! Let MPI find a "good" decomposition
dims(1) = 0
dims(2) = 0
CALL MPI_DIMS_CREATE(nprocs,2,dims,ierr)
if (myrank.EQ.Root) then
print *,nprocs,'processors have been arranged into',dims(1),'X',dims(2),'grid'
endif
CALL MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,periods,.true., &
comm2d,ierr)
! Get my position in this communicator
CALL MPI_COMM_RANK(comm2d,myrank,ierr)
! Get the decomposition
CALL fnd2ddecomp(comm2d,m,n,ista,iend,jsta,jend)
! print *,ista,jsta,iend,jend
ilen = iend - ista + 1
jlen = jend - jsta + 1
CALL MPI_Cart_get(comm2d,2,dims,periods,coords,ierr)
iprocs = dims(1)
jprocs = dims(2)
myranki = coords(1)
myrankj = coords(2)
DO j = jsta, jend
DO i = ista, iend
a(i,j) = myrank+1
ENDDO
ENDDO
! Send data from each processor to Root
call MPI_ISEND(ista,1,MPI_INTEGER,Root,1, &
MPI_COMM_WORLD,req,ierr)
call MPI_ISEND(iend,1,MPI_INTEGER,Root,1, &
MPI_COMM_WORLD,req,ierr)
call MPI_ISEND(jsta,1,MPI_INTEGER,Root,1, &
MPI_COMM_WORLD,req,ierr)
call MPI_ISEND(jend,1,MPI_INTEGER,Root,1, &
MPI_COMM_WORLD,req,ierr)
call MPI_ISEND(a(ista:iend,jsta:jend),(ilen)*(jlen),MPI_REAL, &
Root,1,MPI_COMM_WORLD,req,ierr )
! Recieved the results from othe precessors
if (myrank.EQ.Root) then
do source = 0,nprocs-1
call MPI_RECV(ista,1,MPI_INTEGER,source, &
1,MPI_COMM_WORLD,status,ierr )
call MPI_RECV(iend,1,MPI_INTEGER,source, &
1,MPI_COMM_WORLD,status,ierr )
call MPI_RECV(jsta,1,MPI_INTEGER,source, &
1,MPI_COMM_WORLD,status,ierr )
call MPI_RECV(jend,1,MPI_INTEGER,source, &
1,MPI_COMM_WORLD,status,ierr )
ilen = iend - ista + 1
jlen = jend - jsta + 1
call MPI_RECV(a(ista:iend,jsta:jend),(ilen)*(jlen),MPI_REAL, &
source,1,MPI_COMM_WORLD,status,ierr)
! print the results
call ZMINMAX(m,n,ista,iend,jsta,jend,a(:,:),amin,amax)
print *, 'myid=',source,amin,amax
call MPI_Wait(req, status, ierr)
enddo
endif
CALL MPI_FINALIZE(ierr)
END
subroutine fnd2ddecomp(comm2d,m,n,ista,iend,jsta,jend)
integer comm2d
integer m,n,ista,jsta,iend,jend
integer dims(2),coords(2),ierr
logical periods(2)
! Get (i,j) position of a processor from Cartesian topology.
CALL MPI_Cart_get(comm2d,2,dims,periods,coords,ierr)
! Decomposition in first (ie. X) direction
CALL MPE_DECOMP1D(m,dims(1),coords(1),ista,iend)
! Decomposition in second (ie. Y) direction
CALL MPE_DECOMP1D(n,dims(2),coords(2),jsta,jend)
return
end
SUBROUTINE MPE_DECOMP1D(n,numprocs,myid,s,e)
integer n,numprocs,myid,s,e,nlocal,deficit
nlocal = n / numprocs
s = myid * nlocal + 1
deficit = mod(n,numprocs)
s = s + min(myid,deficit)
! Give one more slice to processors
if (myid .lt. deficit) then
nlocal = nlocal + 1
endif
e = s + nlocal - 1
if (e .gt. n .or. myid .eq. numprocs-1) e = n
return
end
SUBROUTINE ZMINMAX(IX,JX,SX,EX,SY,EY,ZX,ZXMIN,ZXMAX)
INTEGER :: IX,JX,SX,EX,SY,EY
REAL :: ZX(IX,JX)
REAL :: ZXMIN,ZXMAX
ZXMIN=1000.
ZXMAX=-1000.
DO II=SX,EX
DO JJ=SY,EY
IF(ZX(II,JJ).LT.ZXMIN)ZXMIN=ZX(II,JJ)
IF(ZX(II,JJ).GT.ZXMAX)ZXMAX=ZX(II,JJ)
ENDDO
ENDDO
RETURN
END
When I am running the above code with 4 processors Root receives garbage values. Where as for 15 processors, the data transfer is proper. How I can tackle this? I guess it is related buffer, a point which is not clear to me. How I have to tackle the buffer wisely?
use mpiinstead ofINCLUDE "mpif.h". That will enable large number of checks the compiler can do for you.