Skip to content

Commit 1636262

Browse files
committed
Improved device affinity handling to allow using CUDA_VISIBLE_DEVICES to handle device affinity.
1 parent 4c9806c commit 1636262

File tree

6 files changed

+58
-36
lines changed

6 files changed

+58
-36
lines changed

mpi/jacobi.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ int main(int argc, char* argv[]) {
158158
MPI_CALL(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
159159
int size;
160160
MPI_CALL(MPI_Comm_size(MPI_COMM_WORLD, &size));
161+
int num_devices = 0;
162+
CUDA_RT_CALL(cudaGetDeviceCount(&num_devices));
161163
162164
const int iter_max = get_argval<int>(argv, argv + argc, "-niter", 1000);
163165
const int nccheck = get_argval<int>(argv, argv + argc, "-nccheck", 1);
@@ -176,7 +178,7 @@ int main(int argc, char* argv[]) {
176178
MPI_CALL(MPI_Comm_free(&local_comm));
177179
}
178180
179-
CUDA_RT_CALL(cudaSetDevice(local_rank));
181+
CUDA_RT_CALL(cudaSetDevice(local_rank%num_devices));
180182
CUDA_RT_CALL(cudaFree(0));
181183
182184
real* a_ref_h;

mpi_overlap/jacobi.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ int main(int argc, char* argv[]) {
158158
MPI_CALL(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
159159
int size;
160160
MPI_CALL(MPI_Comm_size(MPI_COMM_WORLD, &size));
161+
int num_devices = 0;
162+
CUDA_RT_CALL(cudaGetDeviceCount(&num_devices));
161163
162164
const int iter_max = get_argval<int>(argv, argv + argc, "-niter", 1000);
163165
const int nccheck = get_argval<int>(argv, argv + argc, "-nccheck", 1);
@@ -177,7 +179,7 @@ int main(int argc, char* argv[]) {
177179
MPI_CALL(MPI_Comm_free(&local_comm));
178180
}
179181
180-
CUDA_RT_CALL(cudaSetDevice(local_rank));
182+
CUDA_RT_CALL(cudaSetDevice(local_rank%num_devices));
181183
CUDA_RT_CALL(cudaFree(0));
182184
183185
real* a_ref_h;

nccl/jacobi.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,8 @@ int main(int argc, char* argv[]) {
161161
MPI_CALL(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
162162
int size;
163163
MPI_CALL(MPI_Comm_size(MPI_COMM_WORLD, &size));
164+
int num_devices = 0;
165+
CUDA_RT_CALL(cudaGetDeviceCount(&num_devices));
164166

165167
ncclUniqueId nccl_uid;
166168
if (rank == 0) NCCL_CALL(ncclGetUniqueId(&nccl_uid));
@@ -173,17 +175,29 @@ int main(int argc, char* argv[]) {
173175
const bool csv = get_arg(argv, argv + argc, "-csv");
174176

175177
int local_rank = -1;
178+
int local_size = 1;
176179
{
177180
MPI_Comm local_comm;
178181
MPI_CALL(MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL,
179182
&local_comm));
180183

181184
MPI_CALL(MPI_Comm_rank(local_comm, &local_rank));
185+
MPI_CALL(MPI_Comm_size(local_comm, &local_size));
182186

183187
MPI_CALL(MPI_Comm_free(&local_comm));
184188
}
185-
186-
CUDA_RT_CALL(cudaSetDevice(local_rank));
189+
if ( 1 < num_devices && num_devices < local_size )
190+
{
191+
fprintf(stderr,"ERROR Number of visible devices (%d) is less than number of ranks on the node (%d)!\n", num_devices, local_size);
192+
MPI_CALL(MPI_Finalize());
193+
return 1;
194+
}
195+
if ( 1 == num_devices ) {
196+
// Only 1 device visbile assuming GPU affinity is handled via CUDA_VISIBLE_DEVICES
197+
CUDA_RT_CALL(cudaSetDevice(0));
198+
} else {
199+
CUDA_RT_CALL(cudaSetDevice(local_rank));
200+
}
187201
CUDA_RT_CALL(cudaFree(0));
188202

189203
ncclComm_t nccl_comm;

nccl_overlap/jacobi.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,8 @@ int main(int argc, char* argv[]) {
161161
MPI_CALL(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
162162
int size;
163163
MPI_CALL(MPI_Comm_size(MPI_COMM_WORLD, &size));
164+
int num_devices = 0;
165+
CUDA_RT_CALL(cudaGetDeviceCount(&num_devices));
164166

165167
ncclUniqueId nccl_uid;
166168
if (rank == 0) NCCL_CALL(ncclGetUniqueId(&nccl_uid));
@@ -173,17 +175,29 @@ int main(int argc, char* argv[]) {
173175
const bool csv = get_arg(argv, argv + argc, "-csv");
174176

175177
int local_rank = -1;
178+
int local_size = 1;
176179
{
177180
MPI_Comm local_comm;
178181
MPI_CALL(MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL,
179182
&local_comm));
180183

181184
MPI_CALL(MPI_Comm_rank(local_comm, &local_rank));
185+
MPI_CALL(MPI_Comm_size(local_comm, &local_size));
182186

183187
MPI_CALL(MPI_Comm_free(&local_comm));
184188
}
185-
186-
CUDA_RT_CALL(cudaSetDevice(local_rank));
189+
if ( 1 < num_devices && num_devices < local_size )
190+
{
191+
fprintf(stderr,"ERROR Number of visible devices (%d) is less than number of ranks on the node (%d)!\n", num_devices, local_size);
192+
MPI_CALL(MPI_Finalize());
193+
return 1;
194+
}
195+
if ( 1 == num_devices ) {
196+
// Only 1 device visbile assuming GPU affinity is handled via CUDA_VISIBLE_DEVICES
197+
CUDA_RT_CALL(cudaSetDevice(0));
198+
} else {
199+
CUDA_RT_CALL(cudaSetDevice(local_rank));
200+
}
187201
CUDA_RT_CALL(cudaFree(0));
188202

189203
ncclComm_t nccl_comm;

nvshmem/jacobi.cu

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -231,28 +231,23 @@ int main(int argc, char* argv[]) {
231231
int num_devices;
232232
CUDA_RT_CALL(cudaGetDeviceCount(&num_devices));
233233

234-
int local_rank = -1, local_size = 1;
234+
int local_rank = -1;
235+
int local_size = 1;
235236
{
236237
MPI_Comm local_comm;
237-
MPI_Info info;
238-
MPI_CALL(MPI_Info_create(&info));
239-
MPI_CALL(
240-
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, info, &local_comm));
238+
MPI_CALL(MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL,
239+
&local_comm));
241240

242241
MPI_CALL(MPI_Comm_rank(local_comm, &local_rank));
243242
MPI_CALL(MPI_Comm_size(local_comm, &local_size));
244-
if (num_devices < local_size) {
245-
fprintf(stderr,
246-
"ERROR: Number of devices is less numer of PEs \
247-
on the node!\n");
248-
MPI_CALL(MPI_Comm_free(&local_comm));
249-
MPI_CALL(MPI_Info_free(&info));
250-
MPI_CALL(MPI_Finalize());
251-
return -1;
252-
}
253243

254244
MPI_CALL(MPI_Comm_free(&local_comm));
255-
MPI_CALL(MPI_Info_free(&info));
245+
}
246+
if ( num_devices < local_size )
247+
{
248+
fprintf(stderr,"ERROR Number of visible devices (%d) is less than number of ranks on the node (%d)!\n", num_devices, local_size);
249+
MPI_CALL(MPI_Finalize());
250+
return 1;
256251
}
257252
CUDA_RT_CALL(cudaSetDevice(local_rank));
258253
CUDA_RT_CALL(cudaFree(0));

nvshmem_opt/jacobi.cu

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -256,28 +256,23 @@ int main(int argc, char* argv[]) {
256256
int num_devices;
257257
CUDA_RT_CALL(cudaGetDeviceCount(&num_devices));
258258

259-
int local_rank = -1, local_size = 1;
259+
int local_rank = -1;
260+
int local_size = 1;
260261
{
261262
MPI_Comm local_comm;
262-
MPI_Info info;
263-
MPI_CALL(MPI_Info_create(&info));
264-
MPI_CALL(
265-
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, info, &local_comm));
263+
MPI_CALL(MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL,
264+
&local_comm));
266265

267266
MPI_CALL(MPI_Comm_rank(local_comm, &local_rank));
268267
MPI_CALL(MPI_Comm_size(local_comm, &local_size));
269-
if (num_devices < local_size) {
270-
fprintf(stderr,
271-
"ERROR: Number of devices is less numer of PEs \
272-
on the node!\n");
273-
MPI_CALL(MPI_Comm_free(&local_comm));
274-
MPI_CALL(MPI_Info_free(&info));
275-
MPI_CALL(MPI_Finalize());
276-
return -1;
277-
}
278268

279269
MPI_CALL(MPI_Comm_free(&local_comm));
280-
MPI_CALL(MPI_Info_free(&info));
270+
}
271+
if ( num_devices < local_size )
272+
{
273+
fprintf(stderr,"ERROR Number of visible devices (%d) is less than number of ranks on the node (%d)!\n", num_devices, local_size);
274+
MPI_CALL(MPI_Finalize());
275+
return 1;
281276
}
282277
CUDA_RT_CALL(cudaSetDevice(local_rank));
283278
CUDA_RT_CALL(cudaFree(0));

0 commit comments

Comments
 (0)