Commit 5fa6910a authored by Gab's avatar Gab
Browse files

Merge branch 'dev' into 'master'

Release

See merge request !2
parents 2377a9c7 2c36f100
......@@ -7,12 +7,12 @@ Hello World hybrid (MP and/or OpenMP) for process binding verification
Clone the repo:
```
git clone https://dci-gitlab.cines.fr/hautreux/hello\_world.git
git clone https://dci-gitlab.cines.fr/hautreux/hello_world.git
```
and enter the floder:
```
cd hello\_world
cd hello_world
```
### Prerequisites
......@@ -39,14 +39,14 @@ Tests are available in test\_scripts directory
You can run them using Slurm:
```
cd test_scripts/occigen/hsw24/
sbatch hsw24_hybrid.sh
cd test_scripts/occigen/mpi/
sbatch hsw24.sh
```
Then you can check the created output file to verify the binding.
## Authors
* **Gabriel Hautreux**
* **CINES support team**
## License
......
#include <omp.h>
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
#include <sys/unistd.h>
int main(int argc,char**argv)
{
int mpiRank, mpiSize;
char hostname[128];
int nthreads, tid, cpuid;
int i, j=0;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
gethostname(hostname, sizeof hostname);
#pragma omp parallel
{
nthreads = omp_get_num_threads();
}
if ( mpiRank== 0)
printf("Run executed using %d MPI processes, with %d threads per process \n", mpiSize, nthreads);
for(i = 0; i < mpiSize; i++) {
MPI_Barrier(MPI_COMM_WORLD);
if (i == mpiRank) {
printf("%s: MPI n° %d -> cpuid %d \n",hostname, mpiRank,sched_getcpu());
#pragma omp parallel private(tid, nthreads, cpuid) shared(i)
{
tid=omp_get_thread_num();
nthreads = omp_get_num_threads();
cpuid = sched_getcpu();
while(j < tid){
#pragma omp flush(j)
}
printf("\t thread n° %d -> cpuid %d on MPI n° %d on %s\n", tid, cpuid, mpiRank,hostname);
j++;
#pragma omp flush(j)
}
}
}
MPI_Finalize();
}
#!/bin/bash
#SBATCH -J omp
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --time=0:40:00
#SBATCH -C flat|cache
#SBATCH --exclusive
#SBATCH --mem=50GB
#SBATCH --output omp.output.slurm
set -e
#Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
export KMP_HW_SUBSET=2T
export OMP_NUM_THREADS=136
export KMP_AFFINITY=compact,1,0,granularity=fine
module load intel
ulimit -s unlimited
rm -f *.out
srun --cpus-per-task=$SLURM_CPUS_ON_NODE ../../../bin/hello_omp
#!/bin/bash
#SBATCH -J mpi_hello
#SBATCH --ntasks=48
#SBATCH --ntasks-per-node=16
#SBATCH --time=0:40:00
#SBATCH -C BDW28
#SBATCH --exclusive
#SBATCH --output mpi.output.slurm.%J
set -e
module load intel openmpi/icc/2.0.2
ulimit -s unlimited
export MXM_LOG_LEVEL=error
srun ../../../bin/hello_mpi
#!/bin/bash
#SBATCH -J mpi_hello
#SBATCH --ntasks=48
#SBATCH --ntasks-per-node=24
#SBATCH --time=0:40:00
#SBATCH -C HSW24
#SBATCH --exclusive
#SBATCH --output mpi.output.slurm.%J
set -e
module load intel openmpi/icc/2.0.2
ulimit -s unlimited
export MXM_LOG_LEVEL=error
srun ../../../bin/hello_mpi
#!/bin/bash
#SBATCH -J mpi
#SBATCH --ntasks=176
#SBATCH --time=0:40:00
#EXPLICIT AND
#SBATCH -C [BDW28*8&HSW24*2]
#SBATCH --exclusive
#SBATCH --output mpi.output.slurm
set -e
module load intel openmpi/icc/2.0.2
export MXM_LOG_LEVEL=error
srun ../../../bin/hello_mpi
#!/bin/bash
#SBATCH -J mpi_hello
#SBATCH --ntasks=32
#SBATCH --ntasks-per-node=16
#SBATCH --time=0:40:00
#INCLUSIVE OR
#SBATCH -C HSW24|BDW28
#SBATCH --exclusive
#SBATCH --output mpi.output.slurm.%J
set -e
module load intel openmpi/icc/2.0.2
ulimit -s unlimited
export MXM_LOG_LEVEL=error
srun ../../../bin/hello_mpi
#!/bin/bash
#SBATCH -J mpi
#SBATCH --ntasks=32
#SBATCH --ntasks-per-node=16
#SBATCH --time=0:40:00
#EXCLUSIVE OR
#SBATCH -C [HSW24|BDW28]
#SBATCH --exclusive
#SBATCH --output mpi.output.slurm.%J
set -e
module load intel openmpi/icc/2.0.2
export MXM_LOG_LEVEL=error
srun ../../../bin/hello_mpi
......@@ -4,28 +4,30 @@
#SBATCH --ntasks=12
#SBATCH --ntasks-per-node=2
#SBATCH --cpus-per-task=14
#SBATCH --time=0:40:00
#SBATCH --time=0:30:00
#SBATCH -C BDW28
#SBATCH --exclusive
#SBATCH --mem=50GB
#SBATCH --output bdw_hybrid.output.slurm
#SBATCH --output bdw_hybrid.output.slurm.%J
set -e
export I_MPI_DOMAIN=auto
export I_MPI_PIN_RESPECT_CPUSET=0
export I_MPI_DEBUG=4
#####Intelmpi
# module load intel intelmpi
# export I_MPI_DOMAIN=auto
# export I_MPI_PIN_RESPECT_CPUSET=0
# #Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
# export KMP_HW_SUBSET=1T
# export OMP_NUM_THREADS=14
# export KMP_AFFINITY=verbose,compact,1,0,granularity=fine
# ulimit -s unlimited
# srun ../../../bin/hello_hybrid
#####Openmpi
module load intel/18.1 openmpi/intel/2.0.2
#Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
export KMP_HW_SUBSET=1T
export OMP_NUM_THREADS=14
export KMP_AFFINITY=verbose,compact,1,0,granularity=fine
module load intel intelmpi
ulimit -s unlimited
rm -f *.out
srun ../../../bin/hello_hybrid
......@@ -12,20 +12,22 @@
set -e
export I_MPI_DOMAIN=auto
export I_MPI_PIN_RESPECT_CPUSET=0
export I_MPI_DEBUG=4
#####Intelmpi
# module load intel intelmpi
# export I_MPI_DOMAIN=auto
# export I_MPI_PIN_RESPECT_CPUSET=0
# #Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
# export KMP_HW_SUBSET=1T
# export OMP_NUM_THREADS=12
# export KMP_AFFINITY=verbose,compact,1,0,granularity=fine
# ulimit -s unlimited
# srun ../../../bin/hello_hybrid
#####Openmpi
module load intel/18.1 openmpi/intel/2.0.2
#Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
export KMP_HW_SUBSET=1T
export OMP_NUM_THREADS=12
export KMP_AFFINITY=verbose,compact,1,0,granularity=fine
module load intel intelmpi
ulimit -s unlimited
rm -f *.out
srun ../../../bin/hello_hybrid
#!/bin/bash
#SBATCH -J and_hybrid
#SBATCH --threads-per-core=1
#SBATCH --cpus-per-task=12
#SBATCH --time=0:40:00
#SBATCH --exclusive
#SBATCH --mem=50GB
#SBATCH --output and_hybrid.output.slurm
#EXPLICIT AND
#SBATCH --constraint="[HSW24*4&BDW28*3]"
set -e
#####Intelmpi
# module load intel intelmpi
# export I_MPI_DOMAIN=auto
# export I_MPI_PIN_RESPECT_CPUSET=0
# #Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
# export KMP_HW_SUBSET=1T
# export OMP_NUM_THREADS=12
# export KMP_AFFINITY=verbose,compact,1,0,granularity=fine
# ulimit -s unlimited
# srun ../../../bin/hello_hybrid
#####Openmpi
module load intel/18.1 openmpi/intel/2.0.2
#Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
export KMP_HW_SUBSET=1T
export OMP_NUM_THREADS=12
export KMP_AFFINITY=verbose,compact,1,0,granularity=fine
ulimit -s unlimited
srun ../../../bin/hello_hybrid
#!/bin/bash
#SBATCH -J or_hybrid
#SBATCH -N 3
#SBATCH --threads-per-core=1
#SBATCH --cpus-per-task=4
#SBATCH --time=0:30:00
#SBATCH --exclusive
#SBATCH --mem=50GB
#SBATCH --output or_hybrid.output.slurm.%J
#OR
#SBATCH -C HSW24|BDW28
set -e
#####Intelmpi
# module load intel intelmpi
# export I_MPI_DOMAIN=auto
# export I_MPI_PIN_RESPECT_CPUSET=0
# #Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
# export KMP_HW_SUBSET=1T
# export OMP_NUM_THREADS=4
# export KMP_AFFINITY=verbose,compact,1,0,granularity=fine
# export NCPUS=$((($SLURM_CPUS_ON_NODE/$OMP_NUM_THREADS)*$SLURM_NNODES))
# ulimit -s unlimited
# srun -n $NCPUS ../../../bin/hello_hybrid
#####Openmpi
module load intel/18.1 openmpi/intel/2.0.2
#Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
export KMP_HW_SUBSET=1T
export OMP_NUM_THREADS=4
export KMP_AFFINITY=verbose,compact,1,0,granularity=fine
export NCPUS=$((($SLURM_CPUS_ON_NODE/$OMP_NUM_THREADS)*$SLURM_NNODES))
ulimit -s unlimited
srun -n $NCPUS ../../../bin/hello_hybrid
#!/bin/bash
#SBATCH -J or_hybrid
#SBATCH --ntasks=10
#SBATCH --threads-per-core=1
#SBATCH --cpus-per-task=4
#SBATCH --time=0:30:00
#SBATCH --exclusive
#SBATCH --mem=50GB
#SBATCH --output or_hybrid.output.slurm.%J
#OR
#SBATCH -C BDW28|HSW24
set -e
#####Intelmpi
# module load intel intelmpi
# export I_MPI_DOMAIN=auto
# export I_MPI_PIN_RESPECT_CPUSET=0
# #Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
# export KMP_HW_SUBSET=1T
# export OMP_NUM_THREADS=4
# export KMP_AFFINITY=verbose,compact,1,0,granularity=fine
# ulimit -s unlimited
# srun ../../../bin/hello_hybrid
#####Openmpi
module load intel/18.1 openmpi/intel/2.0.2
#Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
export KMP_HW_SUBSET=1T
export OMP_NUM_THREADS=4
export KMP_AFFINITY=verbose,compact,1,0,granularity=fine
ulimit -s unlimited
srun ../../../bin/hello_hybrid
#!/bin/bash
#SBATCH -J xor_hybrid
#SBATCH -N 3
#SBATCH --threads-per-core=1
#SBATCH --cpus-per-task=4
#SBATCH --time=0:40:00
#SBATCH --exclusive
#SBATCH --mem=50GB
#SBATCH --output xor_hybrid.output.slurm
#EXCLUSIVE OR
#SBATCH -C [HSW24|BDW28]
set -e
#####Intelmpi
# module load intel intelmpi
# export I_MPI_DOMAIN=auto
# export I_MPI_PIN_RESPECT_CPUSET=0
# #Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
# export KMP_HW_SUBSET=1T
# export OMP_NUM_THREADS=4
# export KMP_AFFINITY=verbose,compact,1,0,granularity=fine
# export NCPUS=$((($SLURM_CPUS_ON_NODE/$OMP_NUM_THREADS)*$SLURM_NNODES))
# ulimit -s unlimited
# srun -n $NCPUS ../../../bin/hello_hybrid
#####Openmpi
module load intel/18.1 openmpi/intel/2.0.2
#Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
export KMP_HW_SUBSET=1T
export OMP_NUM_THREADS=4
export KMP_AFFINITY=verbose,compact,1,0,granularity=fine
export NCPUS=$((($SLURM_CPUS_ON_NODE/$OMP_NUM_THREADS)*$SLURM_NNODES))
ulimit -s unlimited
srun -n $NCPUS ../../../bin/hello_hybrid
#!/bin/bash
#SBATCH -J xor_hybrid
#SBATCH --ntasks=10
#SBATCH --threads-per-core=1
#SBATCH --cpus-per-task=4
#SBATCH --time=0:40:00
#SBATCH --exclusive
#SBATCH --mem=50GB
#SBATCH --output xor_hybrid.output.slurm
#EXCLUSIVE OR
#SBATCH -C [HSW24|BDW28]
set -e
#####Intelmpi
# module load intel intelmpi
# export I_MPI_DOMAIN=auto
# export I_MPI_PIN_RESPECT_CPUSET=0
# #Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
# export KMP_HW_SUBSET=1T
# export OMP_NUM_THREADS=4
# export KMP_AFFINITY=verbose,compact,1,0,granularity=fine
# ulimit -s unlimited
# srun ../../../bin/hello_hybrid
#####Openmpi
module load intel/18.1 openmpi/intel/2.0.2
#Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
export KMP_HW_SUBSET=1T
export OMP_NUM_THREADS=4
export KMP_AFFINITY=verbose,compact,1,0,granularity=fine
ulimit -s unlimited
srun ../../../bin/hello_hybrid
#!/bin/bash
#SBATCH -J skl224_hybrid
#SBATCH --nodes=1
#SBATCH --ntasks=8
#SBATCH --ntasks-per-node=8
#SBATCH --cpus-per-task=28
#SBATCH --time=0:40:00
#SBATCH -C SKL224
#SBATCH --exclusive
#SBATCH --mem=50GB
#SBATCH --output skl_hybrid.output.slurm
set -e
#####Intelmpi
# module load intel intelmpi
# export I_MPI_DOMAIN=auto
# export I_MPI_PIN_RESPECT_CPUSET=0
# #Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
# export KMP_HW_SUBSET=1T
# export OMP_NUM_THREADS=12
# export KMP_AFFINITY=verbose,compact,1,0,granularity=fine
# ulimit -s unlimited
# srun ../../../bin/hello_hybrid
#####Openmpi
module load intel/18.1 openmpi/intel/2.0.2
#Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
export KMP_HW_SUBSET=1T
export OMP_NUM_THREADS=28
export KMP_AFFINITY=verbose,compact,1,0,granularity=fine
ulimit -s unlimited
srun ../../../bin/hello_hybrid
......@@ -11,15 +11,27 @@
set -e
export I_MPI_DOMAIN=auto
export I_MPI_PIN_RESPECT_CPUSET=0
export I_MPI_DEBUG=4
module load intel intelmpi
#####Intelmpi placement auto
# module load intel/18.1 intelmpi/2018.1.163
# export I_MPI_DOMAIN=auto
# export I_MPI_PIN_RESPECT_CPUSET=0
# ulimit -s unlimited
# srun ../../../bin/hello_mpi
#####Intelmpi avec placement pour mpirun
# module load intel/18.1 intelmpi/2018.1.163
# export SLURM_CPU_BIND=NONE
# export I_MPI_PIN=1
# export I_MPI_PIN_PROCESSOR_LIST=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27
# ulimit -s unlimited
# mpirun ../../../bin/hello_mpi
####Openmpi placement auto
module load intel/18.1 openmpi/intel/2.0.2
ulimit -s unlimited
srun ../../../bin/hello_mpi
......@@ -4,7 +4,7 @@
#SBATCH --ntasks=168
#SBATCH --ntasks-per-node=24
#SBATCH --cpus-per-task=1
#SBATCH --time=0:40:00
#SBATCH --time=0:30:00
#SBATCH -C HSW24
#SBATCH --exclusive
#SBATCH --mem=50GB
......@@ -12,15 +12,14 @@
set -e
export I_MPI_DOMAIN=auto
export I_MPI_PIN_RESPECT_CPUSET=0
export I_MPI_DEBUG=4
module load intel intelmpi
#####Intelmpi placement auto
# module load intel/18.1 intelmpi/2018.1.163
# export I_MPI_DOMAIN=auto
# export I_MPI_PIN_RESPECT_CPUSET=0
# ulimit -s unlimited
# srun ../../../bin/hello_mpi
####Openmpi placement auto
module load intel/18.1 openmpi/intel/2.0.2
ulimit -s unlimited
srun ../../../bin/hello_mpi
#!/bin/bash
#SBATCH -J mpi_hello
#SBATCH --ntasks=128
#SBATCH --threads-per-core=1
#SBATCH --time=0:30:00
#EXPLICIT AND
#SBATCH --constraint="[HSW24*3&BDW28*2]"
#SBATCH --exclusive
#SBATCH --mem=50GB
#SBATCH --output h_and_b_mpi.output.slurm
set -e
#####Intelmpi placement auto
# module load intel/18.1 intelmpi/2018.1.163
# export I_MPI_DOMAIN=auto
# export I_MPI_PIN_RESPECT_CPUSET=0
# ulimit -s unlimited
# srun ../../../bin/hello_mpi
####Openmpi placement auto
module load intel/18.1 openmpi/intel/2.0.2
ulimit -s unlimited
srun ../../../bin/hello_mpi
<
#!/bin/bash
#SBATCH -J mpi_hello
#SBATCH -N 20
#SBATCH --threads-per-core=1
#SBATCH --time=0:40:00
#OR
#SBATCH -C HSW24|BDW28
#SBATCH --exclusive