Parallel Computing with MPI
mpiexec -x -np 2 xterm -e cuda-gdb ./myapp
mpirun –np 2 nvprof –log-file profile.out.%p
nvprof ./addMatrices -n 4000
cuda-memcheck ./memcheck_demo
MV2_USE_CUDA=1 mpirun -np 4 nvprof –output-profile profile.%p.nvprof ./main [args]
MV2_USE_CUDA=1 makes MVAPICH2 CUDA aware.
MV2_USE_CUDA=1 mpirun -np 1 nvprof –kernels gpu_GEMM –analysis-metrics –output-profile GEMMmetrics.out.%p.nvprof ./main [args]
nvvp &