Skip to content

Commit

Permalink
[WholeGraph] Rename the flag for wholegraph sparse embedding (#744)
Browse files Browse the repository at this point in the history
This is to address the concern brought up here:
#737 (comment).
It changes the flag name `use-wholegraph-sparse-emb` -->
`use-wholegraph-embed`, in consistent with
#737.

*Issue #, if available:*

*Description of changes:*


By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.
  • Loading branch information
chang-l authored and jalencato committed Feb 21, 2024
1 parent ae7b4c2 commit 58a229d
Show file tree
Hide file tree
Showing 12 changed files with 14 additions and 39 deletions.
18 changes: 0 additions & 18 deletions python/graphstorm/config/argument.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,6 @@ def verify_arguments(self, is_train):
_ = self.grad_norm_type
_ = self.gnn_norm
_ = self.sparse_optimizer_lr
_ = self.use_wholegraph_sparse_emb
_ = self.num_epochs
_ = self.save_model_path
_ = self.save_model_frequency
Expand Down Expand Up @@ -1191,18 +1190,6 @@ def sparse_optimizer_lr(self): # pylint: disable=invalid-name

return self.lr

@property
def use_wholegraph_sparse_emb(self):
""" Whether to use wholegraph for updating learnable node embeddings
"""
# pylint: disable=no-member
if hasattr(self, "_use_wholegraph_sparse_emb"):
assert self._use_wholegraph_sparse_emb in [True, False], \
"Invalid value for _use_wholegraph_sparse_emb. Must be either True or False."
return self._use_wholegraph_sparse_emb
# By default do not use wholegraph for learnable node embeddings
return False

@property
def use_node_embeddings(self):
""" Whether to use extra learnable node embeddings
Expand Down Expand Up @@ -2431,11 +2418,6 @@ def _add_hyperparam_args(parser):
type=lambda x: (str(x).lower() in ['true', '1']),
default=argparse.SUPPRESS,
help="Whether to use extra learnable node embeddings")
group.add_argument(
"--use-wholegraph-sparse-emb",
type=lambda x: (str(x).lower() in ['true', '1']),
default=argparse.SUPPRESS,
help="Whether to use WholeGraph library to update learnable node embeddings")
group.add_argument("--construct-feat-ntype", type=str, nargs="+",
help="The node types whose features are constructed from neighbors' features.")
group.add_argument("--construct-feat-encoder", type=str, default=argparse.SUPPRESS,
Expand Down
2 changes: 1 addition & 1 deletion python/graphstorm/gsf.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ def set_encoder(model, g, config, train_task):
use_node_embeddings=config.use_node_embeddings,
force_no_embeddings=config.construct_feat_ntype,
num_ffn_layers_in_input=config.num_ffn_layers_in_input,
use_wholegraph_sparse_emb=config.use_wholegraph_sparse_emb)
use_wholegraph_sparse_emb=config.use_wholegraph_embed)
# The number of feature dimensions can change. For example, the feature dimensions
# of BERT embeddings are determined when the input encoder is created.
feat_size = encoder.in_dims
Expand Down
3 changes: 1 addition & 2 deletions python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,8 @@ def main(config_args):
config.verify_arguments(True)

use_wg_feats = use_wholegraph(config.part_config)
use_wg_embed = config.use_wholegraph_sparse_emb or config.use_wholegraph_embed
gs.initialize(ip_config=config.ip_config, backend=config.backend,
use_wholegraph=use_wg_embed or use_wg_feats)
use_wholegraph=config.use_wholegraph_embed or use_wg_feats)
rt_profiler.init(config.profile_path, rank=gs.get_rank())
sys_tracker.init(config.verbose, rank=gs.get_rank())
device = setup_device(config.local_rank)
Expand Down
3 changes: 1 addition & 2 deletions python/graphstorm/run/gsgnn_ep/ep_infer_gnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,8 @@ def main(config_args):
config.verify_arguments(False)

use_wg_feats = use_wholegraph(config.part_config)
use_wg_embed = config.use_wholegraph_sparse_emb or config.use_wholegraph_embed
gs.initialize(ip_config=config.ip_config, backend=config.backend,
use_wholegraph=use_wg_embed or use_wg_feats)
use_wholegraph=config.use_wholegraph_embed or use_wg_feats)
device = setup_device(config.local_rank)

infer_data = GSgnnEdgeInferData(config.graph_name,
Expand Down
3 changes: 1 addition & 2 deletions python/graphstorm/run/gsgnn_ep/gsgnn_ep.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,8 @@ def main(config_args):
config.verify_arguments(True)

use_wg_feats = use_wholegraph(config.part_config)
use_wg_embed = config.use_wholegraph_sparse_emb or config.use_wholegraph_embed
gs.initialize(ip_config=config.ip_config, backend=config.backend,
use_wholegraph=use_wg_embed or use_wg_feats)
use_wholegraph=config.use_wholegraph_embed or use_wg_feats)
rt_profiler.init(config.profile_path, rank=gs.get_rank())
sys_tracker.init(config.verbose, rank=gs.get_rank())
device = setup_device(config.local_rank)
Expand Down
3 changes: 1 addition & 2 deletions python/graphstorm/run/gsgnn_lp/gsgnn_lp.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,8 @@ def main(config_args):
config.verify_arguments(True)

use_wg_feats = use_wholegraph(config.part_config)
use_wg_embed = config.use_wholegraph_sparse_emb or config.use_wholegraph_embed
gs.initialize(ip_config=config.ip_config, backend=config.backend,
use_wholegraph=use_wg_embed or use_wg_feats)
use_wholegraph=config.use_wholegraph_embed or use_wg_feats)
rt_profiler.init(config.profile_path, rank=gs.get_rank())
sys_tracker.init(config.verbose, rank=gs.get_rank())
device = setup_device(config.local_rank)
Expand Down
3 changes: 1 addition & 2 deletions python/graphstorm/run/gsgnn_lp/lp_infer_gnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,8 @@ def main(config_args):
config.verify_arguments(False)

use_wg_feats = use_wholegraph(config.part_config)
use_wg_embed = config.use_wholegraph_sparse_emb or config.use_wholegraph_embed
gs.initialize(ip_config=config.ip_config, backend=config.backend,
use_wholegraph=use_wg_embed or use_wg_feats)
use_wholegraph=config.use_wholegraph_embed or use_wg_feats)
device = setup_device(config.local_rank)

infer_data = GSgnnEdgeInferData(config.graph_name,
Expand Down
3 changes: 1 addition & 2 deletions python/graphstorm/run/gsgnn_np/gsgnn_np.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,8 @@ def main(config_args):
config.verify_arguments(True)

use_wg_feats = use_wholegraph(config.part_config)
use_wg_embed = config.use_wholegraph_sparse_emb or config.use_wholegraph_embed
gs.initialize(ip_config=config.ip_config, backend=config.backend,
use_wholegraph=use_wg_embed or use_wg_feats)
use_wholegraph=config.use_wholegraph_embed or use_wg_feats)
rt_profiler.init(config.profile_path, rank=gs.get_rank())
sys_tracker.init(config.verbose, rank=gs.get_rank())
device = setup_device(config.local_rank)
Expand Down
3 changes: 1 addition & 2 deletions python/graphstorm/run/gsgnn_np/np_infer_gnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,8 @@ def main(config_args):
config.verify_arguments(False)

use_wg_feats = use_wholegraph(config.part_config)
use_wg_embed = config.use_wholegraph_sparse_emb or config.use_wholegraph_embed
gs.initialize(ip_config=config.ip_config, backend=config.backend,
use_wholegraph=use_wg_embed or use_wg_feats)
use_wholegraph=config.use_wholegraph_embed or use_wg_feats)
device = setup_device(config.local_rank)

infer_data = GSgnnNodeInferData(config.graph_name,
Expand Down
4 changes: 2 additions & 2 deletions tests/end2end-tests/graphstorm-ec/mgpu_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ error_and_exit $?
rm -fr /data/gsgnn_ec/*

echo "**************dataset: Generated multilabel MovieLens EC, RGCN layer: 1, node feat: generated feature, inference: full graph, exclude-training-targets: True, wholegraph learnable emb"
python3 -m graphstorm.run.gs_edge_classification --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --exclude-training-targets True --use-node-embeddings true --multilabel true --num-classes 5 --use-mini-batch-infer false --topk-model-to-save 1 --save-embed-path /data/gsgnn_wg_ec/emb/ --save-model-path /data/gsgnn_wg_ec/ --save-model-frequency 1000 --logging-file /tmp/train_log.txt --logging-level debug --preserve-input True --backend nccl --use-wholegraph-sparse-emb True
python3 -m graphstorm.run.gs_edge_classification --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --exclude-training-targets True --use-node-embeddings true --multilabel true --num-classes 5 --use-mini-batch-infer false --topk-model-to-save 1 --save-embed-path /data/gsgnn_wg_ec/emb/ --save-model-path /data/gsgnn_wg_ec/ --save-model-frequency 1000 --logging-file /tmp/train_log.txt --logging-level debug --preserve-input True --backend nccl --use-wholegraph-embed True

error_and_exit $?

Expand Down Expand Up @@ -259,7 +259,7 @@ echo "The best model is saved in epoch $best_epoch"
rm /tmp/train_log.txt

echo "**************dataset: Generated multilabel MovieLens EC, do inference on saved model, wholegraph learnable emb"
python3 -m graphstorm.run.gs_edge_classification --inference --workspace $GS_HOME/inference_scripts/ep_infer --num-trainers $NUM_INFO_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec_infer.yaml --multilabel true --num-classes 5 --use-node-embeddings true --use-mini-batch-infer false --save-embed-path /data/gsgnn_wg_ec/infer-emb/ --restore-model-path /data/gsgnn_wg_ec/epoch-$best_epoch/ --save-prediction-path /data/gsgnn_wg_ec/prediction/ --logging-file /tmp/log.txt --logging-level debug --preserve-input True --backend nccl --use-wholegraph-sparse-emb True
python3 -m graphstorm.run.gs_edge_classification --inference --workspace $GS_HOME/inference_scripts/ep_infer --num-trainers $NUM_INFO_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec_infer.yaml --multilabel true --num-classes 5 --use-node-embeddings true --use-mini-batch-infer false --save-embed-path /data/gsgnn_wg_ec/infer-emb/ --restore-model-path /data/gsgnn_wg_ec/epoch-$best_epoch/ --save-prediction-path /data/gsgnn_wg_ec/prediction/ --logging-file /tmp/log.txt --logging-level debug --preserve-input True --backend nccl --use-wholegraph-embed True

error_and_exit $?

Expand Down
4 changes: 2 additions & 2 deletions tests/end2end-tests/graphstorm-lp/mgpu_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ rm -fr /data/gsgnn_lp_ml_hard_dot/*

# wholegraph sparse embedding
echo "**************dataset: Movielens, RGCN layer 2, node feat: fixed HF BERT & sparse embed, BERT nodes: movie, inference: full-graph, negative_sampler: joint, exclude_training_targets: true, save model, wholegraph learnable emb"
python3 -m graphstorm.run.gs_link_prediction --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-mini-batch-infer false --use-node-embeddings true --eval-batch-size 1024 --exclude-training-targets True --reverse-edge-types-map user,rating,rating-rev,movie --save-model-path /data/gsgnn_lp_ml_wg_dot/ --topk-model-to-save 1 --save-model-frequency 1000 --save-embed-path /data/gsgnn_lp_ml_wg_dot/emb/ --logging-file /tmp/train_log.txt --logging-level debug --preserve-input True --use-wholegraph-sparse-emb True --backend nccl
python3 -m graphstorm.run.gs_link_prediction --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-mini-batch-infer false --use-node-embeddings true --eval-batch-size 1024 --exclude-training-targets True --reverse-edge-types-map user,rating,rating-rev,movie --save-model-path /data/gsgnn_lp_ml_wg_dot/ --topk-model-to-save 1 --save-model-frequency 1000 --save-embed-path /data/gsgnn_lp_ml_wg_dot/emb/ --logging-file /tmp/train_log.txt --logging-level debug --preserve-input True --use-wholegraph-embed True --backend nccl

error_and_exit $?

Expand Down Expand Up @@ -595,7 +595,7 @@ then
fi

echo "**************dataset: Movielens, do inference on saved model, decoder: dot, wholegraph learnable emb"
python3 -m graphstorm.run.gs_link_prediction --inference --workspace $GS_HOME/inference_scripts/lp_infer --num-trainers $NUM_INFO_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp_infer.yaml --fanout '10,15' --num-layers 2 --use-mini-batch-infer false --use-node-embeddings true --eval-batch-size 1024 --save-embed-path /data/gsgnn_lp_ml_wg_dot/infer-emb/ --restore-model-path /data/gsgnn_lp_ml_wg_dot/epoch-$best_epoch_dot/ --logging-file /tmp/log.txt --preserve-input True --use-wholegraph-sparse-emb True --backend nccl
python3 -m graphstorm.run.gs_link_prediction --inference --workspace $GS_HOME/inference_scripts/lp_infer --num-trainers $NUM_INFO_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp_infer.yaml --fanout '10,15' --num-layers 2 --use-mini-batch-infer false --use-node-embeddings true --eval-batch-size 1024 --save-embed-path /data/gsgnn_lp_ml_wg_dot/infer-emb/ --restore-model-path /data/gsgnn_lp_ml_wg_dot/epoch-$best_epoch_dot/ --logging-file /tmp/log.txt --preserve-input True --use-wholegraph-embed True --backend nccl

error_and_exit $?

Expand Down
4 changes: 2 additions & 2 deletions tests/end2end-tests/graphstorm-nc/mgpu_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ error_and_exit $?


echo "**************dataset: MovieLens classification, RGCN layer: 1, node feat: fixed HF BERT, BERT nodes: movie, inference: mini-batch save model save emb node, wholegraph learnable emb"
python3 -m graphstorm.run.gs_node_classification --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --save-model-path /data/gsgnn_wg_nc_ml/ --topk-model-to-save 1 --save-embed-path /data/gsgnn_wg_nc_ml/emb/ --num-epochs 3 --logging-file /tmp/train_log.txt --logging-level debug --preserve-input True --backend nccl --use-node-embeddings true --use-wholegraph-sparse-emb True
python3 -m graphstorm.run.gs_node_classification --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --save-model-path /data/gsgnn_wg_nc_ml/ --topk-model-to-save 1 --save-embed-path /data/gsgnn_wg_nc_ml/emb/ --num-epochs 3 --logging-file /tmp/train_log.txt --logging-level debug --preserve-input True --backend nccl --use-node-embeddings true --use-wholegraph-embed True

error_and_exit $?

Expand Down Expand Up @@ -497,7 +497,7 @@ echo "The best model is saved in epoch $best_epoch"
rm /tmp/train_log.txt

echo "**************dataset: Movielens, do inference on saved model, wholegraph learnable emb"
python3 -m graphstorm.run.gs_node_classification --inference --workspace $GS_HOME/inference_scripts/np_infer/ --num-trainers $NUM_INFERs --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc_infer.yaml --use-mini-batch-infer false --save-embed-path /data/gsgnn_wg_nc_ml/infer-emb/ --restore-model-path /data/gsgnn_wg_nc_ml/epoch-$best_epoch/ --save-prediction-path /data/gsgnn_wg_nc_ml/prediction/ --logging-file /tmp/log.txt --preserve-input True --backend nccl --use-node-embeddings true --use-wholegraph-sparse-emb True
python3 -m graphstorm.run.gs_node_classification --inference --workspace $GS_HOME/inference_scripts/np_infer/ --num-trainers $NUM_INFERs --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc_infer.yaml --use-mini-batch-infer false --save-embed-path /data/gsgnn_wg_nc_ml/infer-emb/ --restore-model-path /data/gsgnn_wg_nc_ml/epoch-$best_epoch/ --save-prediction-path /data/gsgnn_wg_nc_ml/prediction/ --logging-file /tmp/log.txt --preserve-input True --backend nccl --use-node-embeddings true --use-wholegraph-embed True

error_and_exit $?

Expand Down

0 comments on commit 58a229d

Please sign in to comment.