[WholeGraph] Rename the flag for wholegraph sparse embedding (#744)

This is to address the concern brought up here: #737 (comment). It changes the flag name `use-wholegraph-sparse-emb` --> `use-wholegraph-embed`, in consistent with #737. *Issue #, if available:* *Description of changes:* By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
awslabs · Feb 21, 2024 · 58a229d · 58a229d
1 parent ae7b4c2
commit 58a229d
Show file tree

Hide file tree

Showing 12 changed files with 14 additions and 39 deletions.
diff --git a/python/graphstorm/config/argument.py b/python/graphstorm/config/argument.py
@@ -290,7 +290,6 @@ def verify_arguments(self, is_train):
             _ = self.grad_norm_type
             _ = self.gnn_norm
             _ = self.sparse_optimizer_lr
-            _ = self.use_wholegraph_sparse_emb
             _ = self.num_epochs
             _ = self.save_model_path
             _ = self.save_model_frequency
@@ -1191,18 +1190,6 @@ def sparse_optimizer_lr(self): # pylint: disable=invalid-name
 
         return self.lr
 
-    @property
-    def use_wholegraph_sparse_emb(self):
-        """ Whether to use wholegraph for updating learnable node embeddings
-        """
-        # pylint: disable=no-member
-        if hasattr(self, "_use_wholegraph_sparse_emb"):
-            assert self._use_wholegraph_sparse_emb in [True, False], \
-                "Invalid value for _use_wholegraph_sparse_emb. Must be either True or False."
-            return self._use_wholegraph_sparse_emb
-        # By default do not use wholegraph for learnable node embeddings
-        return False
-
     @property
     def use_node_embeddings(self):
         """ Whether to use extra learnable node embeddings
@@ -2431,11 +2418,6 @@ def _add_hyperparam_args(parser):
             type=lambda x: (str(x).lower() in ['true', '1']),
             default=argparse.SUPPRESS,
             help="Whether to use extra learnable node embeddings")
-    group.add_argument(
-            "--use-wholegraph-sparse-emb",
-            type=lambda x: (str(x).lower() in ['true', '1']),
-            default=argparse.SUPPRESS,
-            help="Whether to use WholeGraph library to update learnable node embeddings")
     group.add_argument("--construct-feat-ntype", type=str, nargs="+",
             help="The node types whose features are constructed from neighbors' features.")
     group.add_argument("--construct-feat-encoder", type=str, default=argparse.SUPPRESS,

diff --git a/python/graphstorm/gsf.py b/python/graphstorm/gsf.py
@@ -536,7 +536,7 @@ def set_encoder(model, g, config, train_task):
                                           use_node_embeddings=config.use_node_embeddings,
                                           force_no_embeddings=config.construct_feat_ntype,
                                           num_ffn_layers_in_input=config.num_ffn_layers_in_input,
-                                          use_wholegraph_sparse_emb=config.use_wholegraph_sparse_emb)
+                                          use_wholegraph_sparse_emb=config.use_wholegraph_embed)
     # The number of feature dimensions can change. For example, the feature dimensions
     # of BERT embeddings are determined when the input encoder is created.
     feat_size = encoder.in_dims

diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
@@ -35,9 +35,8 @@ def main(config_args):
     config.verify_arguments(True)
 
     use_wg_feats = use_wholegraph(config.part_config)
-    use_wg_embed = config.use_wholegraph_sparse_emb or config.use_wholegraph_embed
     gs.initialize(ip_config=config.ip_config, backend=config.backend,
-                  use_wholegraph=use_wg_embed or use_wg_feats)
+                  use_wholegraph=config.use_wholegraph_embed or use_wg_feats)
     rt_profiler.init(config.profile_path, rank=gs.get_rank())
     sys_tracker.init(config.verbose, rank=gs.get_rank())
     device = setup_device(config.local_rank)

diff --git a/python/graphstorm/run/gsgnn_ep/ep_infer_gnn.py b/python/graphstorm/run/gsgnn_ep/ep_infer_gnn.py
@@ -44,9 +44,8 @@ def main(config_args):
     config.verify_arguments(False)
 
     use_wg_feats = use_wholegraph(config.part_config)
-    use_wg_embed = config.use_wholegraph_sparse_emb or config.use_wholegraph_embed
     gs.initialize(ip_config=config.ip_config, backend=config.backend,
-                  use_wholegraph=use_wg_embed or use_wg_feats)
+                  use_wholegraph=config.use_wholegraph_embed or use_wg_feats)
     device = setup_device(config.local_rank)
 
     infer_data = GSgnnEdgeInferData(config.graph_name,

diff --git a/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py b/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py
@@ -58,9 +58,8 @@ def main(config_args):
     config.verify_arguments(True)
 
     use_wg_feats = use_wholegraph(config.part_config)
-    use_wg_embed = config.use_wholegraph_sparse_emb or config.use_wholegraph_embed
     gs.initialize(ip_config=config.ip_config, backend=config.backend,
-                  use_wholegraph=use_wg_embed or use_wg_feats)
+                  use_wholegraph=config.use_wholegraph_embed or use_wg_feats)
     rt_profiler.init(config.profile_path, rank=gs.get_rank())
     sys_tracker.init(config.verbose, rank=gs.get_rank())
     device = setup_device(config.local_rank)

diff --git a/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py b/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py
@@ -98,9 +98,8 @@ def main(config_args):
     config.verify_arguments(True)
 
     use_wg_feats = use_wholegraph(config.part_config)
-    use_wg_embed = config.use_wholegraph_sparse_emb or config.use_wholegraph_embed
     gs.initialize(ip_config=config.ip_config, backend=config.backend,
-                  use_wholegraph=use_wg_embed or use_wg_feats)
+                  use_wholegraph=config.use_wholegraph_embed or use_wg_feats)
     rt_profiler.init(config.profile_path, rank=gs.get_rank())
     sys_tracker.init(config.verbose, rank=gs.get_rank())
     device = setup_device(config.local_rank)

diff --git a/python/graphstorm/run/gsgnn_lp/lp_infer_gnn.py b/python/graphstorm/run/gsgnn_lp/lp_infer_gnn.py
@@ -40,9 +40,8 @@ def main(config_args):
     config.verify_arguments(False)
 
     use_wg_feats = use_wholegraph(config.part_config)
-    use_wg_embed = config.use_wholegraph_sparse_emb or config.use_wholegraph_embed
     gs.initialize(ip_config=config.ip_config, backend=config.backend,
-                  use_wholegraph=use_wg_embed or use_wg_feats)
+                  use_wholegraph=config.use_wholegraph_embed or use_wg_feats)
     device = setup_device(config.local_rank)
 
     infer_data = GSgnnEdgeInferData(config.graph_name,

diff --git a/python/graphstorm/run/gsgnn_np/gsgnn_np.py b/python/graphstorm/run/gsgnn_np/gsgnn_np.py
@@ -62,9 +62,8 @@ def main(config_args):
     config.verify_arguments(True)
 
     use_wg_feats = use_wholegraph(config.part_config)
-    use_wg_embed = config.use_wholegraph_sparse_emb or config.use_wholegraph_embed
     gs.initialize(ip_config=config.ip_config, backend=config.backend,
-                  use_wholegraph=use_wg_embed or use_wg_feats)
+                  use_wholegraph=config.use_wholegraph_embed or use_wg_feats)
     rt_profiler.init(config.profile_path, rank=gs.get_rank())
     sys_tracker.init(config.verbose, rank=gs.get_rank())
     device = setup_device(config.local_rank)

diff --git a/python/graphstorm/run/gsgnn_np/np_infer_gnn.py b/python/graphstorm/run/gsgnn_np/np_infer_gnn.py
@@ -43,9 +43,8 @@ def main(config_args):
     config.verify_arguments(False)
 
     use_wg_feats = use_wholegraph(config.part_config)
-    use_wg_embed = config.use_wholegraph_sparse_emb or config.use_wholegraph_embed
     gs.initialize(ip_config=config.ip_config, backend=config.backend,
-                  use_wholegraph=use_wg_embed or use_wg_feats)
+                  use_wholegraph=config.use_wholegraph_embed or use_wg_feats)
     device = setup_device(config.local_rank)
 
     infer_data = GSgnnNodeInferData(config.graph_name,

diff --git a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh
@@ -226,7 +226,7 @@ error_and_exit $?
 rm -fr /data/gsgnn_ec/*
 
 echo "**************dataset: Generated multilabel MovieLens EC, RGCN layer: 1, node feat: generated feature, inference: full graph, exclude-training-targets: True, wholegraph learnable emb"
-python3 -m graphstorm.run.gs_edge_classification --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --exclude-training-targets True --use-node-embeddings true --multilabel true --num-classes 5  --use-mini-batch-infer false --topk-model-to-save 1  --save-embed-path /data/gsgnn_wg_ec/emb/ --save-model-path /data/gsgnn_wg_ec/ --save-model-frequency 1000 --logging-file /tmp/train_log.txt --logging-level debug --preserve-input True --backend nccl --use-wholegraph-sparse-emb True
+python3 -m graphstorm.run.gs_edge_classification --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --exclude-training-targets True --use-node-embeddings true --multilabel true --num-classes 5  --use-mini-batch-infer false --topk-model-to-save 1  --save-embed-path /data/gsgnn_wg_ec/emb/ --save-model-path /data/gsgnn_wg_ec/ --save-model-frequency 1000 --logging-file /tmp/train_log.txt --logging-level debug --preserve-input True --backend nccl --use-wholegraph-embed True
 
 error_and_exit $?
 
@@ -259,7 +259,7 @@ echo "The best model is saved in epoch $best_epoch"
 rm /tmp/train_log.txt
 
 echo "**************dataset: Generated multilabel MovieLens EC, do inference on saved model, wholegraph learnable emb"
-python3 -m graphstorm.run.gs_edge_classification --inference --workspace $GS_HOME/inference_scripts/ep_infer --num-trainers $NUM_INFO_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec_infer.yaml  --multilabel true --num-classes 5 --use-node-embeddings true --use-mini-batch-infer false --save-embed-path /data/gsgnn_wg_ec/infer-emb/ --restore-model-path /data/gsgnn_wg_ec/epoch-$best_epoch/ --save-prediction-path /data/gsgnn_wg_ec/prediction/ --logging-file /tmp/log.txt  --logging-level debug --preserve-input True --backend nccl --use-wholegraph-sparse-emb True
+python3 -m graphstorm.run.gs_edge_classification --inference --workspace $GS_HOME/inference_scripts/ep_infer --num-trainers $NUM_INFO_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec_infer.yaml  --multilabel true --num-classes 5 --use-node-embeddings true --use-mini-batch-infer false --save-embed-path /data/gsgnn_wg_ec/infer-emb/ --restore-model-path /data/gsgnn_wg_ec/epoch-$best_epoch/ --save-prediction-path /data/gsgnn_wg_ec/prediction/ --logging-file /tmp/log.txt  --logging-level debug --preserve-input True --backend nccl --use-wholegraph-embed True
 
 error_and_exit $?
 

diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
@@ -551,7 +551,7 @@ rm -fr /data/gsgnn_lp_ml_hard_dot/*
 
 # wholegraph sparse embedding
 echo "**************dataset: Movielens, RGCN layer 2, node feat: fixed HF BERT & sparse embed, BERT nodes: movie, inference: full-graph, negative_sampler: joint, exclude_training_targets: true, save model, wholegraph learnable emb"
-python3 -m graphstorm.run.gs_link_prediction --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-mini-batch-infer false  --use-node-embeddings true --eval-batch-size 1024 --exclude-training-targets True --reverse-edge-types-map user,rating,rating-rev,movie  --save-model-path /data/gsgnn_lp_ml_wg_dot/ --topk-model-to-save 1 --save-model-frequency 1000 --save-embed-path /data/gsgnn_lp_ml_wg_dot/emb/ --logging-file /tmp/train_log.txt --logging-level debug --preserve-input True --use-wholegraph-sparse-emb True  --backend nccl
+python3 -m graphstorm.run.gs_link_prediction --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-mini-batch-infer false  --use-node-embeddings true --eval-batch-size 1024 --exclude-training-targets True --reverse-edge-types-map user,rating,rating-rev,movie  --save-model-path /data/gsgnn_lp_ml_wg_dot/ --topk-model-to-save 1 --save-model-frequency 1000 --save-embed-path /data/gsgnn_lp_ml_wg_dot/emb/ --logging-file /tmp/train_log.txt --logging-level debug --preserve-input True --use-wholegraph-embed True  --backend nccl
 
 error_and_exit $?
 
@@ -595,7 +595,7 @@ then
 fi
 
 echo "**************dataset: Movielens, do inference on saved model, decoder: dot, wholegraph learnable emb"
-python3 -m graphstorm.run.gs_link_prediction --inference --workspace $GS_HOME/inference_scripts/lp_infer --num-trainers $NUM_INFO_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp_infer.yaml --fanout '10,15' --num-layers 2 --use-mini-batch-infer false --use-node-embeddings true --eval-batch-size 1024 --save-embed-path /data/gsgnn_lp_ml_wg_dot/infer-emb/ --restore-model-path /data/gsgnn_lp_ml_wg_dot/epoch-$best_epoch_dot/ --logging-file /tmp/log.txt --preserve-input True --use-wholegraph-sparse-emb True  --backend nccl
+python3 -m graphstorm.run.gs_link_prediction --inference --workspace $GS_HOME/inference_scripts/lp_infer --num-trainers $NUM_INFO_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp_infer.yaml --fanout '10,15' --num-layers 2 --use-mini-batch-infer false --use-node-embeddings true --eval-batch-size 1024 --save-embed-path /data/gsgnn_lp_ml_wg_dot/infer-emb/ --restore-model-path /data/gsgnn_lp_ml_wg_dot/epoch-$best_epoch_dot/ --logging-file /tmp/log.txt --preserve-input True --use-wholegraph-embed True  --backend nccl
 
 error_and_exit $?
 

diff --git a/tests/end2end-tests/graphstorm-nc/mgpu_test.sh b/tests/end2end-tests/graphstorm-nc/mgpu_test.sh
@@ -465,7 +465,7 @@ error_and_exit $?
 
 
 echo "**************dataset: MovieLens classification, RGCN layer: 1, node feat: fixed HF BERT, BERT nodes: movie, inference: mini-batch save model save emb node, wholegraph learnable emb"
-python3 -m graphstorm.run.gs_node_classification --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --save-model-path /data/gsgnn_wg_nc_ml/ --topk-model-to-save 1 --save-embed-path /data/gsgnn_wg_nc_ml/emb/ --num-epochs 3 --logging-file /tmp/train_log.txt --logging-level debug --preserve-input True --backend nccl --use-node-embeddings true --use-wholegraph-sparse-emb True
+python3 -m graphstorm.run.gs_node_classification --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --save-model-path /data/gsgnn_wg_nc_ml/ --topk-model-to-save 1 --save-embed-path /data/gsgnn_wg_nc_ml/emb/ --num-epochs 3 --logging-file /tmp/train_log.txt --logging-level debug --preserve-input True --backend nccl --use-node-embeddings true --use-wholegraph-embed True
 
 error_and_exit $?
 
@@ -497,7 +497,7 @@ echo "The best model is saved in epoch $best_epoch"
 rm /tmp/train_log.txt
 
 echo "**************dataset: Movielens, do inference on saved model, wholegraph learnable emb"
-python3 -m graphstorm.run.gs_node_classification --inference --workspace $GS_HOME/inference_scripts/np_infer/ --num-trainers $NUM_INFERs --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc_infer.yaml --use-mini-batch-infer false  --save-embed-path /data/gsgnn_wg_nc_ml/infer-emb/ --restore-model-path /data/gsgnn_wg_nc_ml/epoch-$best_epoch/ --save-prediction-path /data/gsgnn_wg_nc_ml/prediction/ --logging-file /tmp/log.txt --preserve-input True --backend nccl --use-node-embeddings true --use-wholegraph-sparse-emb True
+python3 -m graphstorm.run.gs_node_classification --inference --workspace $GS_HOME/inference_scripts/np_infer/ --num-trainers $NUM_INFERs --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc_infer.yaml --use-mini-batch-infer false  --save-embed-path /data/gsgnn_wg_nc_ml/infer-emb/ --restore-model-path /data/gsgnn_wg_nc_ml/epoch-$best_epoch/ --save-prediction-path /data/gsgnn_wg_nc_ml/prediction/ --logging-file /tmp/log.txt --preserve-input True --backend nccl --use-node-embeddings true --use-wholegraph-embed True
 
 error_and_exit $?