Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] ENH: DAOS and DFS modules #1014

Open
wants to merge 60 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
e2073ce
initial stubbed out DAOS DFS module
Jan 19, 2021
daace3c
first cut at entire dfs runtime/util code
Jan 27, 2021
c2d99f1
autoconf/automake support for daos module
Apr 29, 2022
8d3beca
adopt new darshan-core module api
Apr 29, 2022
6762c06
fix up new compile errors/warnings
Apr 29, 2022
b2b239b
teach automake about daos ld-opts
Apr 29, 2022
6240488
updated comments on missing functionality
May 11, 2022
7e16b75
comment out move/exchange wrappers, need more work
Jun 24, 2022
a090647
changes to support instrumenting obj_global2local
Jun 24, 2022
7ae1129
add example log to temporarily test with
Aug 15, 2022
da89496
MAINT: PR 739 revisions
tylerjereddy Aug 17, 2022
073f639
added new IOR example log files
Aug 18, 2022
7c0979b
MAINT: PR 739 revisions
tylerjereddy Aug 18, 2022
0f46d6a
MAINT: PR 739 revisions
tylerjereddy Aug 18, 2022
0aa838f
MAINT: PR 739 revisions
tylerjereddy Aug 18, 2022
4b85588
MAINT: PR 739 revisions
tylerjereddy Sep 4, 2022
804a2b7
rename existing DAOS files to DFS
Jun 21, 2023
e76b437
fix header guard
Jun 21, 2023
7bf1463
instrument initial DAOS obj/array routines
Dec 5, 2023
e00d27a
more instrumentation of native daos APIs
Jan 9, 2024
bbd8000
more includes needed for DAOS header ac checks
Jan 10, 2024
0365cef
use filename rather than OID to generate DFS IDs
Jan 24, 2024
3f6f845
Revert "use filename rather than OID to generate DFS IDs"
Jan 30, 2024
3f02d83
make sure to set object oid in daos redux
Feb 13, 2024
5b12999
add DAOS module access size histogram
Feb 14, 2024
98fe560
add wrapper for dfs_remove
Feb 14, 2024
aec7a23
only instrument DFS calls if no error
Feb 14, 2024
073075a
move locking for DAOS module
Feb 14, 2024
542fae7
add daos kv api instrumentation
Feb 15, 2024
1275d03
drop support for dfs_stat, dfs_move, dfs_exchange
Feb 16, 2024
3462d49
add more DAOS counters
Feb 19, 2024
da8f909
generate record id properly for dfs_remove
Feb 19, 2024
3eddf10
bug fix in DAOS IOD size calculation
Feb 19, 2024
ce7c416
don't count kv_get size if no buf given
Feb 20, 2024
c393674
don't get oclass_name
Feb 20, 2024
ebec3fb
store DFS pool/cont UUIDs in file records
Feb 21, 2024
e845c8f
update DAOS module to include pool/cont uuids
Feb 27, 2024
53e52a2
refactor darshan_core_register_record
Feb 27, 2024
d74235f
add darshan-util code to handle multiple namerecs
Feb 27, 2024
db2c2f6
improved comments
Feb 28, 2024
7bfd7fc
finish implementing DAOS/DFS logutils functions
Feb 28, 2024
15bef37
add accumulator and advanced parser support
Feb 29, 2024
b7102e6
updated comments about global2local support
Mar 5, 2024
ff0d0ac
proper size calculation for array API
Mar 8, 2024
331dc95
drop DFS_USE_DTX counter
Mar 8, 2024
8d9fb59
filter out DFS records that do no I/O operations
Mar 8, 2024
8a4cc35
cleanup some runtime daos/dfs code
Nov 4, 2024
4d573cb
updated darshan-runtime docs for daos
shanedsnyder Nov 4, 2024
2adec07
updated darshan-util docs for daos
shanedsnyder Nov 4, 2024
31f45f2
small darshan-util tweaks
Nov 5, 2024
a3f2b32
add checks for libuuid to darshan-util configure
shanedsnyder Nov 6, 2024
c83718f
updated pydarshan for DFS module
shanedsnyder Nov 9, 2024
1f0532b
pydarshan updates for DAOS module
shanedsnyder Nov 11, 2024
4f99f64
forgot to compare DFS vs DAOS values
shanedsnyder Nov 11, 2024
b14c589
drop DAOS records with no real I/O activity
shanedsnyder Nov 11, 2024
47a1a97
updated pydarshan to support DFS/DAOS heatmaps
shanedsnyder Nov 11, 2024
1320444
enforce order for DAOS heatmag figs
shanedsnyder Nov 12, 2024
24b1891
add more DAOS ops to opcount plots
shanedsnyder Nov 12, 2024
e44ea71
generate module overview table for DAOS
shanedsnyder Nov 12, 2024
9d96289
update DAOS opcount tests
shanedsnyder Nov 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions darshan-runtime/configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,46 @@ if test "x$enable_darshan_runtime" = xyes ; then
use --with-pnetcdf to provide the PnetCDF install prefix, if needed.]))
fi

# inform about DAOS installs not found in default locations
AC_ARG_WITH([daos],
[AS_HELP_STRING([--with-daos@<:@=DIR@:>@],
[Installation directory for DAOS.])],
[], [with_daos=no]
)

# DAOS module (disabled by default)
AC_ARG_ENABLE([daos-mod],
[AS_HELP_STRING([--enable-daos-mod],
[Enables compilation and use of DAOS module])],
[], [enable_daos_mod=no]
)
if test "x$enable_daos_mod" = xyes ; then
AC_CHECK_HEADERS(m4_normalize([daos_types.h daos_prop.h daos_pool.h daos_cont.h
daos_obj.h daos_array.h daos_fs.h]),
[],
[AC_MSG_ERROR([Cannot find required DAOS headers])],
[[
#ifdef HAVE_DAOS_TYPES_H
# include <daos_types.h>
#endif
#ifdef HAVE_DAOS_PROP_H
# include <daos_prop.h>
#endif
#ifdef HAVE_DAOS_CONT_H
# include <daos_cont.h>
#endif
#ifdef HAVE_DAOS_OBJ_H
# include <daos_obj.h>
#endif
#ifdef HAVE_DAOS_ARRAY_H
# include <daos_array.h>
#endif
]])
elif test "x$enable_daos_mod" != xno ; then
AC_MSG_ERROR(m4_normalize([--enable-daos-mod does not take any argument,
use --with-daos to provide the DAOS install prefix, if needed.]))
fi

# BG/Q module
AC_ARG_ENABLE([bgq-mod],
[AS_HELP_STRING([--disable-bgq-mod],
Expand Down Expand Up @@ -809,6 +849,7 @@ AM_CONDITIONAL(BUILD_MDHIM_MODULE, [test "x$enable_mdhim_mod" = xyes])
AM_CONDITIONAL(BUILD_APMPI_MODULE, [test "x$enable_apmpi_mod" = xyes])
AM_CONDITIONAL(BUILD_APXC_MODULE, [test "x$enable_apxc_mod" = xyes])
AM_CONDITIONAL(BUILD_HEATMAP_MODULE,[test "x$enable_heatmap_mod" = xyes])
AM_CONDITIONAL(BUILD_DAOS_MODULE, [test "x$enable_daos_mod" = xyes])
AM_CONDITIONAL(HAVE_LDMS, [test "x$enable_ldms_mod" = xyes])

AC_CONFIG_FILES(Makefile \
Expand Down Expand Up @@ -884,6 +925,7 @@ if test "x$enable_darshan_runtime" = xyes ; then
AUTOPERF MPI module support - $enable_apmpi_mod
AUTOPERF XC module support - $enable_apxc_mod
HDF5 module support - $enable_hdf5_mod
DAOS module support - $enable_daos_mod
PnetCDF module support - $enable_pnetcdf_mod
BG/Q module support - $enable_bgq_mod
Lustre module support - $enable_lustre_mod
Expand Down
4 changes: 4 additions & 0 deletions darshan-runtime/doc/darshan-runtime.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ make install
** NOTE: PnetCDF instrumentation only works on PnetCDF library versions >=1.8
* `--disable-lustre-mod`: disables compilation and use of Darshan's Lustre
module (default=enabled)
* `--enable-daos-mod`: enables compilation and use of Darshan's DAOS module
(default=enabled)
* `--with-daos=DIR`: installation directory for DAOS
** NOTE: Users must call `--enable-daos-mod` to enable DAOS modules, `--with-daos` is only used to additionally provide a DAOS install prefix.
* `--enable-mdhim-mod`: enables compilation and use of Darshan's MDHIM module
(default=disabled)
* `--enable-ldms-mod`: enables compilation and use of Darshan’s LDMS runtime module (default=disabled)
Expand Down
9 changes: 8 additions & 1 deletion darshan-runtime/lib/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ if BUILD_HEATMAP_MODULE
AM_CPPFLAGS += -DDARSHAN_HEATMAP
endif

if BUILD_DAOS_MODULE
C_SRCS += darshan-dfs.c darshan-daos.c
AM_CPPFLAGS += -DDARSHAN_DAOS
endif

.m4.c:
$(M4) $(AM_M4FLAGS) $(M4FLAGS) $< >$@

Expand Down Expand Up @@ -138,5 +143,7 @@ EXTRA_DIST = $(H_SRCS) \
darshan-bgq.c \
darshan-lustre.c \
darshan-mdhim.c \
darshan-heatmap.c
darshan-heatmap.c \
darshan-dfs.c \
darshan-daos.c

145 changes: 83 additions & 62 deletions darshan-runtime/lib/darshan-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ static int darshan_should_instrument_rank(
struct darshan_core_runtime *core);
static void darshan_fs_info_from_path(
const char *path, struct darshan_fs_info *fs_info);
static int darshan_add_name_record_ref(
static int darshan_update_name_record_ref(
struct darshan_core_runtime *core, darshan_record_id rec_id,
const char *name, darshan_module_id mod_id);
static void darshan_get_user_name(
Expand Down Expand Up @@ -1301,51 +1301,87 @@ static void darshan_fs_info_from_path(const char *path, struct darshan_fs_info *
return;
}

static int darshan_add_name_record_ref(struct darshan_core_runtime *core,
static int darshan_update_name_record_ref(struct darshan_core_runtime *core,
darshan_record_id rec_id, const char *name, darshan_module_id mod_id)
{
struct darshan_core_name_record_ref *ref;
struct darshan_core_name_record_ref *check_ref;
int record_size = sizeof(darshan_record_id) + strlen(name) + 1;
int is_new_rec = 0;
struct darshan_core_name_record_ref *ref, *check_ref;

if((record_size + core->name_mem_used) > core->config.name_mem)
return(0);
/* if no name given, use the empty string */
if(!name) name = "";

/* drop core lock while we allocate reference. Note that
* this means we must check for existence again in hash table once we
* re-acquire the lock, but this code path will only happen once per
* file.
*/
__DARSHAN_CORE_UNLOCK();
ref = malloc(sizeof(*ref));
__DARSHAN_CORE_LOCK();
/* check to see if we've already stored the id->name mapping for this record */
HASH_FIND(hlink, core->name_hash, &rec_id, sizeof(rec_id), ref);
if(!ref)
{
return(0);
}
memset(ref, 0, sizeof(*ref));
/* drop core lock while we allocate reference. Note that
* this means we must check for existence again in hash table once we
* re-acquire the lock, but this code path will only happen once per
* file.
*/
__DARSHAN_CORE_UNLOCK();
ref = malloc(sizeof(*ref));
__DARSHAN_CORE_LOCK();
if(!ref)
{
return(0);
}
memset(ref, 0, sizeof(*ref));

/* make sure no one else added it while we dropped the lock */
HASH_FIND(hlink, core->name_hash, &rec_id,
sizeof(darshan_record_id), check_ref);
if(check_ref)
return(1);
HASH_FIND(hlink, core->name_hash, &rec_id, sizeof(rec_id), check_ref);
if(check_ref)
{
/* someone else added the ref while we dropped the lock */
free(ref);
ref = check_ref;
}
else
{
/* we need to allocate and add a new record ref */
is_new_rec = 1;
}
}

/* initialize the name record */
ref->name_record = (struct darshan_name_record *)
((char *)core->log_name_p + core->name_mem_used);
memset(ref->name_record, 0, record_size);
ref->name_record->id = rec_id;
strcpy(ref->name_record->name, name);
DARSHAN_MOD_FLAG_SET(ref->mod_flags, mod_id);
/* set a new name record reference in 2 scenarios:
* 1.) creation of a new record ref
* 2.) detecting zero-length name on an existing record ref
* (i.e., initial creator of the ref didn't specify a name)
*/
if(is_new_rec || ((strlen(ref->name_record->name) == 0) && strlen(name) > 0))
{
int record_size = sizeof(darshan_record_id) + strlen(name) + 1;
if((record_size + core->name_mem_used) > core->config.name_mem)
{
/* no more room for this name record */
if(is_new_rec) free(ref);
return(0);
}
else
{
/* initialize new name record structure */
ref->name_record = (struct darshan_name_record *)
((char *)core->log_name_p + core->name_mem_used);
memset(ref->name_record, 0, record_size);
ref->name_record->id = rec_id;
strcpy(ref->name_record->name, name);

HASH_ADD(hlink, core->name_hash, name_record->id,
sizeof(darshan_record_id), ref);
core->name_mem_used += record_size;
core->name_mem_used += record_size;
#ifdef __DARSHAN_ENABLE_MMAP_LOGS
core->log_hdr_p->name_map.len += record_size;
core->log_hdr_p->name_map.len += record_size;
#endif
}
}

DARSHAN_MOD_FLAG_SET(ref->mod_flags, mod_id);

if(is_new_rec)
{
/* add new record reference */
HASH_ADD(hlink, core->name_hash, name_record->id,
sizeof(darshan_record_id), ref);
}

/* successfully updated core record ref */
return(1);
}

Expand Down Expand Up @@ -2207,6 +2243,9 @@ static int darshan_core_name_is_excluded(const char *name, darshan_module_id mod
int tmp_index = 0;
struct darshan_core_regex *regex;

if(!name)
return(0);

/* set flag if this module's record names are based on file paths */
name_is_path = 1;
if((mod_id == DARSHAN_APMPI_MOD) || (mod_id == DARSHAN_APXC_MOD) ||
Expand Down Expand Up @@ -2606,9 +2645,7 @@ void *darshan_core_register_record(
size_t rec_size,
struct darshan_fs_info *fs_info)
{
struct darshan_core_name_record_ref *ref;
void *rec_buf;
int ret;

__DARSHAN_CORE_LOCK();
if(!__darshan_core)
Expand All @@ -2625,35 +2662,19 @@ void *darshan_core_register_record(
return(NULL);
}

/* register a name record if a name is given for this record */
if(name)
if(darshan_core_name_is_excluded(name, mod_id))
{
if(darshan_core_name_is_excluded(name, mod_id))
{
/* do not register record if name matches any exclusion rules */
__DARSHAN_CORE_UNLOCK();
return(NULL);
}
/* do not register record if name matches any exclusion rules */
__DARSHAN_CORE_UNLOCK();
return(NULL);
}

/* check to see if we've already stored the id->name mapping for
* this record, and add a new name record if not
*/
HASH_FIND(hlink, __darshan_core->name_hash, &rec_id,
sizeof(darshan_record_id), ref);
if(!ref)
if(!darshan_update_name_record_ref(__darshan_core, rec_id, name, mod_id))
{
ret = darshan_add_name_record_ref(__darshan_core, rec_id, name, mod_id);
if(ret == 0)
{
DARSHAN_MOD_FLAG_SET(__darshan_core->log_hdr_p->partial_flag, mod_id);
__DARSHAN_CORE_UNLOCK();
return(NULL);
}
}
else
{
DARSHAN_MOD_FLAG_SET(ref->mod_flags, mod_id);
/* unable to update record ref, fail and set this module's partial flag */
DARSHAN_MOD_FLAG_SET(__darshan_core->log_hdr_p->partial_flag, mod_id);
__DARSHAN_CORE_UNLOCK();
return(NULL);
}

__darshan_core->mod_array[mod_id]->rec_mem_avail -= rec_size;
Expand Down Expand Up @@ -2683,7 +2704,7 @@ void *darshan_core_register_record(
if(fs_info)
darshan_fs_info_from_path(name, fs_info);

return(rec_buf);;
return(rec_buf);
}

char *darshan_core_lookup_record_name(darshan_record_id rec_id)
Expand Down
Loading
Loading