Вы находитесь на странице: 1из 26

:

: , ...

, . . ., . . .,
...
2004

0.
0. ............................................................................ 2
1. ............................................................................................... 3
2. .................................................. 5
2.1

................................................... 5

2.2

................................................................................ 6

2.3

........................................................ 9

3. ........... 11
3.1

......................................... 11

3.2

............................ 12

3.3

master-slave ......................................................................................... 14
4. ........................................................ 17
4.1

Hello world! MPI .............................................. 18

4.2

MPI ............................................................. 19

4.3

MPI............................................................ 20

4.4

MPI............................... 20

4.5

MPI ............. 21

5. ........................................... 23
5.1

........................................................... 23

5.2

................................................................ 24

1.

.
250.000 (degrees of freedom d.o.f.)
.

1.1.

,
264.000 .
50
MSC/NASTRAN CRAY Y-MP 8
3.289 CPU 68.203 / .

Challenger.

MSC/NASTRAN 281.584
.
CRAY X-MP 6.5 CPU hours 20 Gigabytes
. ANSYS,
18.678
18.42 CPU CRAY XMP.

, CPU

. , ,
,
CPU
.
(
)

.


.

. , t n

n ,

:

t=

tn
n


,

.

2.


,
.
, ,
.

2.1
,
:

(fine-grained) (massively
parallel)

(medium-grained)

(coarse-grained)
.

,

(scalability),
( )
.

(workstation networks). ,

( workstations)
.

2.2


. , ,
:

(shared memory)

(distributed memory)

.


( ), 4
16, (.. CRAY SGI).




(bandwidth). ,


.

(
),

,

.


.
.

,
.. , , .
:





.


, ,

.



. ,

(application

programming interface API)


.

2.3
, , :
, ( ) .

, .
,

(instructions) (data)
.

Flynn,
:

SISD (Single Instruction-Single Data)


(sequential)

SIMD (Single Instruction-Multiple Data)


.

(data-loop parallelism fine-grain parallelism)

MISD (Multiple Instruction-Single Data)


SIMD

MIMD

(Multiple

Instruction-Multiple

Data)

SISD


SIMD.

SIMD MIMD
MIMD.
,
,
,
API.
PVM (Parallel Virtual Machine) MPI (Message Passing Interface)
.

3.



.
.
,

.
:

, :

,

. ,



(loops)

,
.

3.1

. ,
(collisions)

.

(synchronization points).

:

(asynchronous

communication)

(synchronized communication)


.


(..
) .

3.2

:

(natural

parallelism)


.
..

(artificial or computational
parallelism)

. ..


(domain decompositioning).


.
:

Pipelining or processor farming



.

Geometric parallelism
(..
)

.

Algorithmic parallelism

.


.

Task farming (master-slave model)



,

.
,

.

,
(granularity)
.
(load
balancing) .
:

Fine-grain,

Medium-grain,

Large or coarse-grain,
.


. ,
,

.

3.3
master-slave

task farming master-slave
:

(mesh

generation),



master .

(mesh partitioning)

(submeshes),
(substructure

or

subdomain).

master . ,
,

(subdomain cluster).

slave , master
,

slave
,
, , . ,


.


,

. slave

master
. slave

master .
,

(skyline

storage),

slave
master .


slave

.


.

.

(result post-processing)

, ,
.

slave ,

master .
, ,
slave , master

,

.

slave

slave
,

master
,
.

4.


.
-

(tasks or

processes)

.

.
:

Socket interface
(system calls)
TCP/IP UDP.

Remote

procedure

call

(RPC)

TCP/IP UDP


.
task farming master-slave RPC
master ,
slave

master .

API (message passing APIs)





.
, .. PVM MPI,

.

(portability)

APIs.

API MPI
FORTRAN
90/95.

4.1 Hello world! MPI


Hello world!
.

,
(process)
.
MPI,
. , n
(rank) 0, 1, , n-1.
, FORTRAN 95
1 n-1 0
.

Program MPIHelloWorld
Implicit None
Include "fmpi.h"
Integer :: iRank, iNoOfProcs, iErr, iSource, iDest, iTag=50
Character, Len=50 :: cMessage
Type (MPI_Status) :: status
Call MPI_Init(iErr)
Call MPI_Comm_rank(MPI_COMM_WORLD, iRank, iErr)
Call MPI_Comm_size(MPI_COMM_WORLD, iNoOfProcs, iErr)
if (iRank.GT.0) then
iDest = 0
Write(cMessage, *) "Greetings from process:", iRank
Call MPI_Send(cMessage, 50, MPI_CHARACTER, iDest, iTag, &
MPI_COMM_WORLD, iErr)
else
Do iSource=1, iNoOfProcs 1
Call MPI_Recv(cMessage, 50, MPI_CHARACTER, iSource, &
iTag, MPI_COMM_WORLD, status, iErr)
Write(*, *) cMessage
End Do
end if
Call MPI_Finalize(iErr)
End Program

, ,
:
Greetings from process:

:
Greetings from process:

Greetings from process:

Greetings from process:

Greetings from process:

4.2 MPI
MPI
,
:
Include "fmpi.h"

(compilation)

MPI .
, (initialization)
(finalization)
. :
Call MPI_Init(iErr)

Call MPI_Finalize(iErr)

Integer
0

.

Integer
.
MPI :
Program ...
Implicit None
Include "fmpi.h"
Integer :: iErr, ...
Call MPI_Init(iErr)
...
Call MPI_Finalize(iErr)
End Program

4.3 MPI
MPI
:
Call MPI_Comm_rank(MPI_COMM_WORLD, iRank, iErr)

Call MPI_Comm_size(MPI_COMM_WORLD, iNoOfProcs, iErr)

iRank
(rank) (process) .
iErr MPI. ,
,
.

MPI_COMM_WORLD

MPI
.
MPI.
iNoOfProcs
,
MPI_Comm_rank.

4.4 MPI
MPI
:
Call MPI_Send(cMessage, 50, MPI_CHARACTER, iDest, iTag, &
MPI_COMM_WORLD, iErr)

Call MPI_Recv(cMessage, 50, MPI_CHARACTER, iSource, iTag, &


MPI_COMM_WORLD, status, iErr)


cMessage ( character
) 50 (
) char (
MPI_CHAR)

(rank) iDest.

MPI FORTRAN
MPI (MPI_DATATYPE)
MPI_COMPLEX
MPI_DOUBLE_COMPLEX
MPI_LOGICAL
MPI_REAL
MPI_DOUBLE_PRECISION
MPI_INTEGER
MPI_CHARACTER

FORTRAN
COMPLEX
DOUBLE COMPLEX
LOGICAL
REAL
DOUBLE PRECISION
INTEGER
CHARACTER

iTag
(tag=)
.
iTag
. ,

,
iTag
iTag
. ,

MPI_Comm_rank.

4.5
MPI

(slave processes) (master
process), ..

.
(collective communication) MPI
:

Call MPI_BCast(Message, iSize, MPI_DATATYPE, iRoot, &


MPI_COMM_WORLD, iErr)

Call MPI_Reduce(Operand, Message, iSize, MPI_DATATYPE, MPI_OPERAND, &


iRoot, MPI_COMM_WORLD, iErr)

(MPI_OPERAND)
MPI_MAX

MPI_MIN

MPI_SUM

MPI_PROD

MPI_LAND

Logical AND

MPI_BAND

Bitwise AND

MPI_LOR

Logical OR

MPI_BOR

Bitwise OR

MPI_LXOR

Logical Exclusive OR

MPI_BXOR

Bitwise Exclusive OR

0
fValue double precision :
Call MPI_BCast(fValue, 1, MPI_DOULBE_PRECISION, 0,
MPI_COMM_WORLD, iErr)

,
fValue fSum 0
:
Call MPI_Reduce(fValue, fSum, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
0, MPI_COMM_WORLD, iErr)

MPI

:
http://www-unix.mcs.anl.gov/mpi/

Windows NT/2000/XP:
http://www-unix.mcs.anl.gov/mpi/mpich/mpich-nt/

5.

Hestenes Stiefel 1952
.
, :

[K ]{u} = { f }
:
:

{u } = 0, { } = {g } = { f } [K ]{u } = { f }
( 0)

(0)

(0)

( 0)

k=0,1,...

k =

{g } {g }
{ } [K ]{ }
} = {u }+ { }
} = {g }+ [K ]{ }
(k ) T

(k )

(k ) T

{u

( k +1)

{g

( k +1)

(k )

(k )

(k )

(k )

(k )

{g } {g }
=
{g } {g }
{ } = {g }+ { }
( k +1) T

k +1

( k +1)

(k ) T

( k +1)

(k )

( k +1)

k +1

(k )

5.1


{f} m,
m/n {f}
:
:

{u }= 0, { } = {g } = { f } [K ]{u } = { f }
( 0)

(0)

( 0)

(0)

k=0,1,...

nv

(k )

{ } {g }

= gv

(k ) T

(k )

master n0

(k )

= na
a =1

(k )

(k )

dv

{ } [K ]{ }

= v

(k ) T

(k )

master d 0

master 0

{u
{g

(k )

( k +1)

d0

(k )

(k )

(k )

(k )

( k +1) T

( k +1)

( k +1)
v

( k +1)

master d 0

n =1

(k )

(k )

(k )

(k )

dv

n0

(k )

} = {u }+ { }
} = {g }+ [K ]{ }
} {g }
= {g

( k +1)

(k )

= dn

(k )

d0

(k )

n0

(k )

}= {g

( k +1)
v

= dn

(k )

(k )

n =1

}+ { }
(k )

k +1

5.2


Subroutine SolveCG(iEqNo, iStart, iEnd, fTolerance, piKIx, pfK, pfRHS)
Implicit None
Include "fmpi.h"
Integer, Intent(IN) :: iEqNo, iStart, iEnd, piKIx(iEqNo)
Double Precision, Intent(IN) :: fTolerance, pfK(1)
Double Precision, Intent(INOUT) :: pfRHS(iEqNo)
Logical :: bIsMaster
Integer :: I, J, iErr
Double Precision :: DotProduct, fResidual, fd, fdNext, fn, fNumerator,
fDenominator, &
fTemp
Double Precision, Pointer :: pfu(:), pfd(:), pfg(:), pfdK(:),
pfvTemp(:)
! Main routine
Call MPI_Comm_rank(MPI_COMM_WORLD, I, iErr)
bIsMaster = I.EQ.0
fd = 0D0

fTemp = 0D0
Allocate(pfu(iEqNo),
pfd(iEqNo),
pfg(iEqNo),
pfdK(iEqNo),
pfvTemp(iEqNo))
! Calculate u=0, d=-g=RHS-K*u
Do I = iStart, iEnd
pfu(I) = 0D0
pfd(I) = pfRHS(I)
pfg(I) = -pfRHS(I)
fTemp = fTemp + pfRHS(I) * pfRHS(I)
End Do
! Collect and add sum of squares from all processes and calculate fd=|d|
Call MPI_Reduce(fTemp, fd, 1, MPI_DOUBLE_PRECISION, MPI_SUM, 0,
MPI_COMM_WORLD, iErr)
fd = dsqrt(fd)
Call MPI_BCast(fd, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, iErr)
fResidual = 10D20
Do While (fResidual.GT.fTolerance)
! Calculate h=(g'*g)/(d'*K*d)
! Collect and add all parts of g'*g
fNumerator = 0D0
fDenominator = 0D0
fTemp = DotProduct(iStart, iEnd, pfg, pfg)
Call MPI_Reduce(fTemp, fNumerator, 1, MPI_DOUBLE_PRECISION,
MPI_SUM, 0, MPI_COMM_WORLD, iErr)
! Calculate d'*K (we need the WHOLE d vector which we store at pfdK!!)
Do I = 1, iEqNo
pfdK(I) = 0D0
pfvTemp(I) = 0D0
End Do
Call
MPI_Reduce(pfd,
pfdK,
iEqNo,
MPI_DOUBLE_PRECISION,
MPI_SUM, 0, MPI_COMM_WORLD, iErr)
Call
MPI_BCast(pfdK,
iEqNo,
MPI_DOUBLE_PRECISION,
0,
MPI_COMM_WORLD, iErr)
Call MulSkylineVec(iStart, iEnd, piKIx, pfK, pfdK, pfvTemp)
! Collect and add all parts of the d'*K multiplication and send back to
all
Do I = 1, iEqNo
pfdK(I) = 0D0
End Do
Call MPI_Reduce(pfvTemp, pfdK, iEqNo, MPI_DOUBLE_PRECISION,
MPI_SUM, 0, MPI_COMM_WORLD, iErr)
Call
MPI_BCast(pfdK,
iEqNo,
MPI_DOUBLE_PRECISION,
0,
MPI_COMM_WORLD, iErr)
! Collect and add all parts of (d'*K)*d
fTemp = DotProduct(iStart, iEnd, pfdK, pfd)
Call MPI_Reduce(fTemp, fDenominator, 1, MPI_DOUBLE_PRECISION,
MPI_SUM, 0, MPI_COMM_WORLD, iErr)
! Master performs the division to calculate h and sends result to all
processes
if (bIsMaster.EQ..TRUE.) fn = fNumerator / fDenominator

Call MPI_BCast(fn, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD,


iErr)
! Calculate RHS=u+h*d, gNext=g+n*K*d
Do I = iStart, iEnd
pfRHS(I) = pfu(I) + fn * pfd(I)
pfg(I) = pfg(I) + fn * pfdK(I)
End Do
! Calculate h=(gNext'*gNext)/(g'*g)
if (bIsMaster.EQ..TRUE.) fDenominator = fNumerator
! Collect and add all parts of gNext'*gNext
fTemp = DotProduct(iStart, iEnd, pfg, pfg)
fNumerator = 0D0
Call MPI_Reduce(fTemp, fNumerator, 1, MPI_DOUBLE_PRECISION,
MPI_SUM, 0, MPI_COMM_WORLD, iErr)
! Master performs the division to calculate h and sends result to all
processes
if (bIsMaster.EQ..TRUE.) fn = fNumerator / fDenominator
Call MPI_BCast(fn, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD,
iErr)
! Calculate d=-g+n*d, fdNext=|d|
fdNext = 0D0
fTemp = 0D0
Do I = iStart, iEnd
pfd(I) = fn * pfd(I) - pfg(I)
pfu(I) = pfRHS(I)
fTemp = fTemp + pfd(I) * pfd(I)
End Do
! Collect and add sum of squares from all processes and calculate
fdNext=|dNext|
Call
MPI_Reduce(fTemp,
fdNext,
1,
MPI_DOUBLE_PRECISION,
MPI_SUM, 0, MPI_COMM_WORLD, iErr)
fdNext = dsqrt(fdNext)
Call
MPI_BCast(fdNext,
1,
MPI_DOUBLE_PRECISION,
0,
MPI_COMM_WORLD, iErr)
! Calculate residual=fdNext/fd
fResidual = fdNext / fd
End Do
DeAllocate(pfu, pfd, pfg, pfdK)
End Subroutine

Вам также может понравиться