Skip to content

Commit

Permalink
Properly build the nidmap
Browse files Browse the repository at this point in the history
When operating in a managed environment where the
user subdivides the allocation (e.g., by providing
a hostfile that contains only some of the allocated
nodes), the nidmap was overrunning the allocated
array for daemon vpids. Properly index the vpid
entries to avoid the memory corruption problem.

Signed-off-by: Ralph Castain <[email protected]>
(cherry picked from commit cfda800)
  • Loading branch information
rhc54 committed Nov 22, 2024
1 parent 2dc54cc commit 9c901be
Showing 1 changed file with 17 additions and 20 deletions.
37 changes: 17 additions & 20 deletions src/util/nidmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ int prte_util_nidmap_create(pmix_pointer_array_t *pool, pmix_data_buffer_t *buff
if (NULL == (nptr = (prte_node_t *) pmix_pointer_array_get_item(pool, n))) {
continue;
}
if (NULL == nptr->daemon) {
continue;
}
/* add the hostname to the argv */
PMIX_ARGV_APPEND_NOSIZE_COMPAT(&names, nptr->name);
als = NULL;
Expand All @@ -101,11 +104,7 @@ int prte_util_nidmap_create(pmix_pointer_array_t *pool, pmix_data_buffer_t *buff
PMIX_ARGV_APPEND_NOSIZE_COMPAT(&aliases, "PRTENONE");
}
/* store the vpid */
if (NULL == nptr->daemon) {
vpids[ndaemons] = PMIX_RANK_INVALID;
} else {
vpids[ndaemons] = nptr->daemon->name.rank;
}
vpids[ndaemons] = nptr->daemon->name.rank;
++ndaemons;
}

Expand Down Expand Up @@ -398,22 +397,20 @@ int prte_util_decode_nidmap(pmix_data_buffer_t *buf)
/* set the topology - always default to homogeneous
* as that is the most common scenario */
nd->topology = t;
/* see if it has a daemon on it */
if (PMIX_RANK_INVALID != vpid[n]) {
proc = (prte_proc_t *) pmix_pointer_array_get_item(daemons->procs, vpid[n]);
if (NULL == proc) {
proc = PMIX_NEW(prte_proc_t);
PMIX_LOAD_PROCID(&proc->name, PRTE_PROC_MY_NAME->nspace, vpid[n]);
proc->state = PRTE_PROC_STATE_RUNNING;
PRTE_FLAG_SET(proc, PRTE_PROC_FLAG_ALIVE);
daemons->num_procs++;
pmix_pointer_array_set_item(daemons->procs, proc->name.rank, proc);
}
PMIX_RETAIN(nd);
proc->node = nd;
PMIX_RETAIN(proc);
nd->daemon = proc;
/* record the daemon on it */
proc = (prte_proc_t *) pmix_pointer_array_get_item(daemons->procs, vpid[n]);
if (NULL == proc) {
proc = PMIX_NEW(prte_proc_t);
PMIX_LOAD_PROCID(&proc->name, PRTE_PROC_MY_NAME->nspace, vpid[n]);
proc->state = PRTE_PROC_STATE_RUNNING;
PRTE_FLAG_SET(proc, PRTE_PROC_FLAG_ALIVE);
daemons->num_procs++;
pmix_pointer_array_set_item(daemons->procs, proc->name.rank, proc);
}
PMIX_RETAIN(nd);
proc->node = nd;
PMIX_RETAIN(proc);
nd->daemon = proc;
}

/* update num procs */
Expand Down

0 comments on commit 9c901be

Please sign in to comment.