Commit 62072ed5 authored by acloirec's avatar acloirec
Browse files

modif hello world mpi_omp_cuda

parent e77d4d52
......@@ -315,14 +315,11 @@ hello_mpi_omp_cuda.o:hello_mpi_omp_cuda.cu
hello_mpi_omp_cuda: hello_mpi_omp_cuda_mpi.o hello_mpi_omp_cuda.o
$(EXEC) $(MPICXX) $(MPI_LDFLAGS) -o $@ $+ $(LIBRARIES)
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
run: build
$(EXEC) ./hello_mpi_omp_cuda
clean:
rm -f hello_mpi_omp_cuda hello_mpi_omp_cuda_mpi.o hello_mpi_omp_cuda.o
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/hello_mpi_omp_cuda
clobber: clean
#!/bin/bash
#SBATCH -J comp_hello_world
#SBATCH -C SKL224
#SBATCH -t 00:30:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --threads-per-core=1
#SBATCH --gres=gpu:1
#SBATCH --mem=100GB
module purge
module load intel/18.1 intelmpi/2018.1.163 gcc/6.2.0 cuda/10.1
make -f Makefile_mpi_omp_cuda
......@@ -19,14 +19,15 @@ int main(int argc, char *argv[])
int mpiRank, mpiSize;
char hostname[128];
int nthreads, tid, cpuid;
int i, j=0;
int igpu,i, j=0;
int k =0;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
gethostname(hostname, sizeof hostname);
char DGPU[100];
int DGPU;
int nb_GPU;
nb_GPU=hello_countGPU();
......@@ -35,80 +36,44 @@ int main(int argc, char *argv[])
nthreads = omp_get_num_threads();
}
if ( mpiRank== 0)
printf("Run executed using %d MPI processes, with %d threads per process \n", mpiSize, nthreads);
if (nb_GPU == 1)
if ( mpiRank== 0)
{
for(i = 0; i < mpiSize; i++)
printf("Run executed using %d MPI processes, with %d threads per process on %d GPUs\n", mpiSize, nthreads,nb_GPU);
if (mpiSize < nb_GPU || (mpiSize%nb_GPU) != 0)
{
MPI_Barrier(MPI_COMM_WORLD);
if (i == mpiRank)
{
hello_displayGPU(0, DGPU, sizeof(DGPU));
printf("%s: MPI n° %d -> cpuid %d -> on GPU n°%s\n",hostname, mpiRank,sched_getcpu(),DGPU);
#pragma omp parallel private(tid, nthreads, cpuid) shared(i)
{
tid=omp_get_thread_num();
nthreads = omp_get_num_threads();
cpuid = sched_getcpu();
while(j < tid){
#pragma omp flush(j)
}
printf("\t thread n° %d -> cpuid %d on MPI n° %d on %s\n", tid, cpuid, mpiRank,hostname);
j++;
#pragma omp flush(j)
}
}
printf("nb of mpi tasks must be >= nb of GPUs \n");
printf("or \n");
printf("nb of mpi tasks must be divisible by nb of GPUs \n");
return 0;
}
}
else if (nb_GPU == 2)
{
for(i = 0; i < mpiSize/2; i++)
}
for(igpu = 0; igpu < nb_GPU; igpu++)
{
MPI_Barrier(MPI_COMM_WORLD);
if (i == mpiRank)
for(i = k; i < (igpu+1)*(mpiSize/nb_GPU); i++)
{
hello_displayGPU(0, DGPU, sizeof(DGPU));
printf("%s: MPI n° %d -> cpuid %d -> on GPU n°%s\n",hostname, mpiRank,sched_getcpu(),DGPU);
MPI_Barrier(MPI_COMM_WORLD);
if (i == mpiRank)
{
hello_displayGPU(igpu, &DGPU);
printf("%s: MPI n° %d -> cpuid %d -> on GPU busId n°%d\n",hostname, mpiRank,sched_getcpu(),DGPU);
#pragma omp parallel private(tid, nthreads, cpuid) shared(i)
{
tid=omp_get_thread_num();
nthreads = omp_get_num_threads();
cpuid = sched_getcpu();
while(j < tid){
{
tid=omp_get_thread_num();
nthreads = omp_get_num_threads();
cpuid = sched_getcpu();
while(j < tid)
{
#pragma omp flush(j)
}
printf("\t thread n° %d -> cpuid %d on MPI n° %d on %s\n", tid, cpuid, mpiRank,hostname);
j++;
#pragma omp flush(j)
}
}
}
for(i = mpiSize/2; i < mpiSize; i++)
{
MPI_Barrier(MPI_COMM_WORLD);
if (i == mpiRank)
{
hello_displayGPU(1, DGPU, sizeof(DGPU));
printf("%s: MPI n° %d -> cpuid %d -> on GPU n°%s\n",hostname, mpiRank,sched_getcpu(),DGPU);
#pragma omp parallel private(tid, nthreads, cpuid) shared(i)
{
tid=omp_get_thread_num();
nthreads = omp_get_num_threads();
cpuid = sched_getcpu();
while(j < tid){
}
printf("\t thread n° %d -> cpuid %d on MPI n° %d on %s\n", tid, cpuid, mpiRank,hostname);
j++;
#pragma omp flush(j)
}
}
printf("\t thread n° %d -> cpuid %d on MPI n° %d on %s\n", tid, cpuid, mpiRank,hostname);
j++;
#pragma omp flush(j)
}
}
k=k+(mpiSize/nb_GPU);
}
}
MPI_Finalize();
MPI_Finalize();
}
......@@ -7,11 +7,6 @@ using std::endl;
// CUDA runtime
#include <cuda_runtime.h>
// helper functions and utilities to work with CUDA
#include <helper_functions.h>
#include <helper_cuda.h>
#include <timer.h>
// Error handling macro
#define CUDA_CHECK(call) \
......@@ -34,19 +29,12 @@ int hello_countGPU()
return deviceCount;
}
int hello_displayGPU(int gpuid, char* DGPU, size_t len)
int hello_displayGPU(int gpuid, int *DGPU)
{
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, gpuid);
if (deviceProp.pciBusID == 27)
{
strcpy(DGPU," 0, pciBusID 27");
}
else if (deviceProp.pciBusID == 56)
{
strcpy(DGPU," 1, pciBusID 56");
}
*DGPU=deviceProp.pciBusID;
return 0;
}
......
extern "C" {
int hello_countGPU();
int hello_displayGPU(int gpuid, char* DGPU, size_t len);
int hello_displayGPU(int gpuid, int *DGPU);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment