#!/bin/bash
#SBATCH -N 64
#SBATCH -n 128               # Number of MPI processes
#SBATCH -c 14              # Number of threads
#SBATCH -t 2:0:0
#SBATCH -J abinit_test_ti255
#SBATCH -C BDW28
#SBATCH --cpu-freq=2600000   # fixed CPU frequency at 2.6GHz

cd $SLURM_SUBMIT_DIR
module purge
module load intel/19.4 intelmpi/2019.4.243

unset I_MPI_DAPL_DIRECT_COPY_THRESHOLD
export MLX5_SINGLE_THREADED=0
export MLX4_SINGLE_THREADED=0
#module load openmpi/intel

#export PATH=$SCRATCHDIR/AO2020/OPENMPI/bin:$PATH
#export LD_LIBRARY_PATH=$SCRATCHDIR/AO2020/OPENMPI/lib:$LD_LIBRARY_PATH
#export LIBRARY_PATH=$SCRATCHDIR/AO2020/OPENMPI/lib:$LIBRARY_PATH
#export CPATH=$SCRATCHDIR/AO2020/OPENMPI/include:$CPATH

# set max 2GB stack
ulimit -s 2000000
export OMP_STACKSIZE=500M
#export KMP_DETERMINISTIC_REDUCTION=1

# OCCIGEN OMP_NUM_THREADS=1 - Proc.   0 individual time (sec): cpu=      18945.1  wall=      18962.5
# OCCIGEN OMP_NUM_THREADS=2 - Proc.   0 individual time (sec): cpu=      36255.7  wall=      18135.0
# OCCIGEN OMP_NUM_THREADS=12 Proc.   0 individual time (sec): cpu=      40857.9  wall=       3407.8


export KMP_LIBRARY=turnaround
export KMP_BLOCKTIME=infinite
export KMP_AFFINITY=verbose,granularity=fine,compact,1,0
export OMP_NUM_THREADS=12
export MKL_NUM_THREADS=12
export PATH=/scratch/cnusc/dci/cirou/AO2020/ABINIT/bin:$PATH
export LD_LIBRARY_PATH=/scratch/cnusc/dci/cirou/AO2020/ABINIT/lib:$LD_LIBRARY_PATH

source /opt/software/common/intel/vtune_amplifier_2019/apsvars.sh
srun --cpu-freq=2600000 aps abinit -i abinit_test_ti255.files > abinit_test_ti255.log.$SLURM_JOBID 2>&1

#aps --report=aps_result_*