diff --git a/README.md b/README.md index 2c5b88c92..23ed163d6 100644 --- a/README.md +++ b/README.md @@ -37,12 +37,32 @@ then #### Guide for Helmholtz GPU cluster ``` +conda create --name domainlab_py39 python=3.9 +conda activate domainlab_py39 conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.6 -c pytorch -c conda-forge conda install torchmetric==0.10.3 +git checkout fbopt pip install -r requirements_notorch.txt conda install tensorboard ``` +#### Download PACS + +step 1: + +use the following script to download PACS to your local laptop and upload it to your cluster + +https://github.com/marrlab/DomainLab/blob/fbopt/data/script/download_pacs.py + +step 2: +make a symbolic link following the example script in https://github.com/marrlab/DomainLab/blob/master/sh_pacs.sh + +where `mkdir -p data/pacs` is executed under the repository directory, + +`ln -s /dir/to/yourdata/pacs/raw ./data/pacs/PACS` +will create a symbolic link under the repository directory + + #### Windows installation details To install DomainLab on Windows, please remove the `snakemake` dependency from the `requirements.txt` file. diff --git a/ci_run_examples.sh b/ci_run_examples.sh index 23249327f..3c00bef61 100644 --- a/ci_run_examples.sh +++ b/ci_run_examples.sh @@ -8,6 +8,8 @@ sed -n '/```shell/,/```/ p' docs/doc_examples.md | sed '/^```/ d' >> ./sh_temp_e bash -x -v -e sh_temp_example.sh echo "general examples done" +rm -r zoutput + echo "#!/bin/bash -x -v" > sh_temp_mnist.sh sed -n '/```shell/,/```/ p' docs/doc_MNIST_classification.md | sed '/^```/ d' >> ./sh_temp_mnist.sh bash -x -v -e sh_temp_mnist.sh diff --git a/domainlab/algos/builder_diva.py b/domainlab/algos/builder_diva.py index d13de99d8..e3e5256cd 100644 --- a/domainlab/algos/builder_diva.py +++ b/domainlab/algos/builder_diva.py @@ -35,7 +35,7 @@ def init_business(self, exp): request = RequestVAEBuilderCHW( task.isize.c, task.isize.h, task.isize.w, args) node = VAEChainNodeGetter(request)() - model = mk_diva()(node, + model = mk_diva(str_mu=args.str_mu)(node, zd_dim=args.zd_dim, zy_dim=args.zy_dim, zx_dim=args.zx_dim, diff --git a/domainlab/algos/msels/a_model_sel.py b/domainlab/algos/msels/a_model_sel.py index 1f20ff912..f6ae57799 100644 --- a/domainlab/algos/msels/a_model_sel.py +++ b/domainlab/algos/msels/a_model_sel.py @@ -25,7 +25,7 @@ def accept(self, trainer, tr_obs): self.tr_obs = tr_obs @abc.abstractmethod - def update(self): + def update(self, clear_counter=False): """ observer + visitor pattern to trainer if the best model should be updated diff --git a/domainlab/algos/msels/c_msel_oracle.py b/domainlab/algos/msels/c_msel_oracle.py index eb672f46e..299a9e48b 100644 --- a/domainlab/algos/msels/c_msel_oracle.py +++ b/domainlab/algos/msels/c_msel_oracle.py @@ -18,7 +18,7 @@ def __init__(self, msel=None): self.best_oracle_acc = 0 self.msel = msel - def update(self): + def update(self, clear_counter=False): """ if the best model should be updated """ @@ -35,7 +35,7 @@ def update(self): logger.info("new oracle model saved") flag = True if self.msel is not None: - return self.msel.update() + return self.msel.update(clear_counter) return flag def if_stop(self): diff --git a/domainlab/algos/msels/c_msel_tr_loss.py b/domainlab/algos/msels/c_msel_tr_loss.py index c42f324b8..3b9d4581e 100644 --- a/domainlab/algos/msels/c_msel_tr_loss.py +++ b/domainlab/algos/msels/c_msel_tr_loss.py @@ -17,7 +17,7 @@ def __init__(self, max_es): self.max_es = max_es super().__init__() - def update(self): + def update(self, clear_counter=False): """ if the best model should be updated """ @@ -34,6 +34,9 @@ def update(self): logger.info(f"early stop counter: {self.es_c}") logger.info(f"loss:{loss}, best loss: {self.best_loss}") flag = False # do not update best model + if clear_counter: + logger.info("clearing counter") + self.es_c = 0 return flag def if_stop(self): diff --git a/domainlab/algos/msels/c_msel_val.py b/domainlab/algos/msels/c_msel_val.py index 939cc47a0..01497f0c4 100644 --- a/domainlab/algos/msels/c_msel_val.py +++ b/domainlab/algos/msels/c_msel_val.py @@ -16,19 +16,19 @@ def __init__(self, max_es): self.best_te_metric = 0.0 super().__init__(max_es) # construct self.tr_obs (observer) - def update(self): + def update(self, clear_counter=False): """ if the best model should be updated """ flag = True if self.tr_obs.metric_val is None or self.tr_obs.str_msel == "loss_tr": - return super().update() + return super().update(clear_counter) metric = self.tr_obs.metric_val[self.tr_obs.str_metric4msel] if self.tr_obs.metric_te is not None: metric_te_current = self.tr_obs.metric_te[self.tr_obs.str_metric4msel] self.best_te_metric = max(self.best_te_metric, metric_te_current) - if metric > self.best_val_acc: # observer + if metric > self.best_val_acc: # update hat{model} # different from loss, accuracy should be improved: the bigger the better self.best_val_acc = metric self.es_c = 0 # restore counter @@ -45,5 +45,7 @@ def update(self): f"corresponding to test acc: \ {self.sel_model_te_acc} / {self.best_te_metric}") flag = False # do not update best model - + if clear_counter: + logger.info("clearing counter") + self.es_c = 0 return flag diff --git a/domainlab/algos/trainers/args_fbopt.py b/domainlab/algos/trainers/args_fbopt.py index a84969efd..c144f3d58 100644 --- a/domainlab/algos/trainers/args_fbopt.py +++ b/domainlab/algos/trainers/args_fbopt.py @@ -36,6 +36,9 @@ def add_args2parser_fbopt(parser): parser.add_argument('--no_setpoint_update', action='store_true', default=False, help='disable setpoint update') + parser.add_argument('--str_mu', type=str, default="default", help='which penalty to tune') + + # the following hyperparamters do not need to be tuned parser.add_argument('--beta_mu', type=float, default=1.1, help='how much to multiply mu each time') diff --git a/domainlab/algos/trainers/fbopt_alternate.py b/domainlab/algos/trainers/fbopt_alternate.py index a7d795626..67fecec23 100644 --- a/domainlab/algos/trainers/fbopt_alternate.py +++ b/domainlab/algos/trainers/fbopt_alternate.py @@ -98,7 +98,7 @@ def cal_delta4control(self, list1, list_setpoint): def cal_delta_integration(self, list_old, list_new, coeff): return [(1-coeff)*a + coeff*b for a, b in zip(list_old, list_new)] - def search_mu(self, epo_reg_loss, epo_task_loss, dict_theta=None, miter=None): + def search_mu(self, epo_reg_loss, epo_task_loss, epo_loss_tr, dict_theta=None, miter=None): """ start from parameter dictionary dict_theta: {"layer":tensor}, enlarge mu w.r.t. its current value @@ -137,9 +137,6 @@ def search_mu(self, epo_reg_loss, epo_task_loss, dict_theta=None, miter=None): f'reg/setpoint{i}': reg_set, }, miter) self.writer.add_scalar(f'x-axis=task vs y-axis=reg/dyn{i}', reg_dyn, epo_task_loss) - - epo_loss_tr = epo_task_loss + torch.inner( - torch.Tensor(list(self.mmu.values())), torch.Tensor(epo_reg_loss)) self.writer.add_scalar('loss_penalized', epo_loss_tr, miter) self.writer.add_scalar('task', epo_task_loss, miter) acc_te = 0 diff --git a/domainlab/algos/trainers/train_mu_controller.py b/domainlab/algos/trainers/train_mu_controller.py index b0c982e4a..c015930c4 100644 --- a/domainlab/algos/trainers/train_mu_controller.py +++ b/domainlab/algos/trainers/train_mu_controller.py @@ -45,24 +45,27 @@ def eval_r_loss(self): # mock the model hyper-parameter to be from dict4mu epo_reg_loss = [] epo_task_loss = 0 + epo_p_loss = 0 counter = 0.0 with torch.no_grad(): for _, (tensor_x, vec_y, vec_d, *_) in enumerate(self.loader_tr_no_drop): tensor_x, vec_y, vec_d = \ tensor_x.to(self.device), vec_y.to(self.device), vec_d.to(self.device) tuple_reg_loss = self.model.cal_reg_loss(tensor_x, vec_y, vec_d) + p_loss, *_ = self.model.cal_loss(tensor_x, vec_y, vec_d) # NOTE: first [0] extract the loss, second [0] get the list list_b_reg_loss = tuple_reg_loss[0] - list_b_reg_loss_sumed = [ele.sum().item() for ele in list_b_reg_loss] + list_b_reg_loss_sumed = [ele.sum().detach().item() for ele in list_b_reg_loss] if len(epo_reg_loss) == 0: epo_reg_loss = list_b_reg_loss_sumed else: epo_reg_loss = list(map(add, epo_reg_loss, list_b_reg_loss_sumed)) - b_task_loss = self.model.cal_task_loss(tensor_x, vec_y).sum() + b_task_loss = self.model.cal_task_loss(tensor_x, vec_y).sum().detach().item() # sum will kill the dimension of the mini batch epo_task_loss += b_task_loss + epo_p_loss += p_loss.sum().detach().item() counter += 1.0 - return list_divide(epo_reg_loss, counter), epo_task_loss/counter + return list_divide(epo_reg_loss, counter), epo_task_loss/counter, epo_p_loss / counter def before_batch(self, epoch, ind_batch): """ @@ -77,7 +80,7 @@ def before_batch(self, epoch, ind_batch): def before_tr(self): self.set_scheduler(scheduler=HyperSchedulerFeedbackAlternave) self.model.hyper_update(epoch=None, fun_scheduler=HyperSetter(self.hyper_scheduler.mmu)) - self.epo_reg_loss_tr, self.epo_task_loss_tr = self.eval_r_loss() + self.epo_reg_loss_tr, self.epo_task_loss_tr, self.epo_loss_tr = self.eval_r_loss() self.hyper_scheduler.set_setpoint( [ele * self.aconf.ini_setpoint_ratio for ele in self.epo_reg_loss_tr], self.epo_task_loss_tr) @@ -90,6 +93,7 @@ def tr_epoch(self, epoch): self.hyper_scheduler.search_mu( self.epo_reg_loss_tr, self.epo_task_loss_tr, + self.epo_loss_tr, dict(self.model.named_parameters()), miter=epoch) self.hyper_scheduler.update_setpoint(self.epo_reg_loss_tr, self.epo_task_loss_tr) diff --git a/domainlab/models/a_model_classif.py b/domainlab/models/a_model_classif.py index f5ec1034d..ae1814b95 100644 --- a/domainlab/models/a_model_classif.py +++ b/domainlab/models/a_model_classif.py @@ -197,4 +197,6 @@ def cal_reg_loss(self, tensor_x, tensor_y, tensor_d, others=None): """ for ERM to adapt to the interface of other regularized learners """ - return [torch.Tensor([0])], [0.0] \ No newline at end of file + device = tensor_x.device + bsize = tensor_x.shape[0] + return [torch.zeros(bsize, 1).to(device)], [0.0] \ No newline at end of file diff --git a/domainlab/models/model_diva.py b/domainlab/models/model_diva.py index 752d5acf2..305be1162 100644 --- a/domainlab/models/model_diva.py +++ b/domainlab/models/model_diva.py @@ -9,7 +9,7 @@ from domainlab.utils.utils_class import store_args -def mk_diva(parent_class=VAEXYDClassif): +def mk_diva(parent_class=VAEXYDClassif, str_mu="default"): """ Instantiate a domain invariant variational autoencoder (DIVA) with arbitrary task loss. @@ -89,8 +89,6 @@ def hyper_update(self, epoch, fun_scheduler): self.beta_d = dict_rst["beta_d"] self.beta_y = dict_rst["beta_y"] self.beta_x = dict_rst["beta_x"] - self.gamma_d = dict_rst["gamma_d"] - self.mu_recon = dict_rst["mu_recon"] def hyper_init(self, functor_scheduler, trainer=None): """ @@ -100,11 +98,9 @@ def hyper_init(self, functor_scheduler, trainer=None): """ return functor_scheduler( trainer=trainer, - mu_recon=self.mu_recon, beta_d=self.beta_d, beta_y=self.beta_y, beta_x=self.beta_x, - gamma_d=self.gamma_d, ) def get_list_str_y(self): @@ -142,4 +138,71 @@ def cal_reg_loss(self, tensor_x, tensor_y, tensor_d, others=None): lc_d = F.cross_entropy(logit_d, d_target, reduction="none") return [loss_recon_x, zd_p_minus_zd_q, zx_p_minus_zx_q, zy_p_minus_zy_q, lc_d], \ [self.mu_recon, -self.beta_d, -self.beta_x, -self.beta_y, -self.gamma_d] - return ModelDIVA + + class ModelDIVAGammadRecon(ModelDIVA): + def hyper_update(self, epoch, fun_scheduler): + """hyper_update. + + :param epoch: + :param fun_scheduler: + """ + dict_rst = fun_scheduler(epoch) + self.beta_d = dict_rst["beta_d"] + self.beta_y = dict_rst["beta_y"] + self.beta_x = dict_rst["beta_x"] + self.gamma_d = dict_rst["gamma_d"] + self.mu_recon = dict_rst["mu_recon"] + + def hyper_init(self, functor_scheduler, trainer=None): + """ + initiate a scheduler object via class name and things inside this model + + :param functor_scheduler: the class name of the scheduler + """ + return functor_scheduler( + trainer=trainer, + mu_recon=self.mu_recon, + beta_d=self.beta_d, + beta_y=self.beta_y, + beta_x=self.beta_x, + gamma_d=self.gamma_d, + ) + + + class ModelDIVAGammad(ModelDIVA): + def hyper_update(self, epoch, fun_scheduler): + """hyper_update. + + :param epoch: + :param fun_scheduler: + """ + dict_rst = fun_scheduler(epoch) + self.beta_d = dict_rst["beta_d"] + self.beta_y = dict_rst["beta_y"] + self.beta_x = dict_rst["beta_x"] + self.gamma_d = dict_rst["gamma_d"] + + def hyper_init(self, functor_scheduler, trainer=None): + """ + initiate a scheduler object via class name and things inside this model + + :param functor_scheduler: the class name of the scheduler + """ + return functor_scheduler( + trainer=trainer, + beta_d=self.beta_d, + beta_y=self.beta_y, + beta_x=self.beta_x, + gamma_d=self.gamma_d, + ) + + class ModelDIVADefault(ModelDIVA): + """ + """ + if str_mu == "gammad_recon": + return ModelDIVAGammadRecon + if str_mu == "gammad": + return ModelDIVAGammad + if str_mu == "default": + return ModelDIVADefault + raise RuntimeError("not support argument candiates for str_mu: allowed: default, gammad_recon, gammad") diff --git a/domainlab/utils/generate_fbopt_phase_portrait.py b/domainlab/utils/generate_fbopt_phase_portrait.py new file mode 100644 index 000000000..88b2bd97a --- /dev/null +++ b/domainlab/utils/generate_fbopt_phase_portrait.py @@ -0,0 +1,63 @@ +import glob +import os + +import matplotlib.pyplot as plt +from tensorboard.backend.event_processing.event_accumulator import EventAccumulator + + +# FIXME: maybe adjust the output path where the png is saved +output_dir = "../.." + +def get_xy_from_event_file(event_file, tf_size_guidance=None): + if tf_size_guidance is None: + # settings for which/how much data is loaded from the tensorboard event files + tf_size_guidance = { + 'compressedHistograms': 0, + 'images': 0, + 'scalars': 1e10, # keep unlimited number + 'histograms': 0 + } + # load event file + event = EventAccumulator(event_file, tf_size_guidance) + event.Reload() + # extract the reg/dyn0 values + y_event = event.Scalars('x-axis=task vs y-axis=reg/dyn0') + y = [s.value for s in y_event] + x_int = [s.step for s in y_event] # the .step data are saved as ints in tensorboard, so we will re-extact from 'task' + # extract the corresponding 'task' values + x_event = event.Scalars('task') + x = [s.value for s in x_event] + # sanity check: + for i in range(len(x)): + assert int(x[i]) == x_int[i] + return x, y + +def phase_portrain_combined(event_files, colors): + plt.figure() + + for event_i in range(len(event_files)): + x, y = get_xy_from_event_file(event_files[event_i]) + + assert len(x) == len(y) + for i in range(len(x)-1): + plt.arrow(x[i], y[i], (x[i+1]-x[i]), (y[i+1]-y[i]), + head_width=0.2, head_length=0.2, length_includes_head=True, + fc=colors[event_i], ec=colors[event_i], alpha=0.4) + + plt.plot(x[0], y[0], 'ko') + plt.scatter(x, y, s=1, c='black') + + plt.xlabel("task") + plt.ylabel("reg/dyn0") + plt.title("x-axis=task vs y-axis=reg/dyn0") + + plt.savefig(os.path.join(output_dir, 'phase_portrain_combined.png'), dpi=300) + + +if __name__ == "__main__": + event_files = glob.glob("../../runs/*/events*") + print("Using the following tensorboard event files:\n{}".format("\n".join(event_files))) + cmap = plt.get_cmap('tab10') # Choose a colormap + colors = [cmap(i) for i in range(len(event_files))] # Different colors for the different runs + phase_portrain_combined(event_files, colors) + diff --git a/examples/benchmark/benchmark_fbopt_mnist_diva.yaml b/examples/benchmark/benchmark_fbopt_mnist_diva.yaml index f705f4da1..12878acb0 100644 --- a/examples/benchmark/benchmark_fbopt_mnist_diva.yaml +++ b/examples/benchmark/benchmark_fbopt_mnist_diva.yaml @@ -4,7 +4,7 @@ output_dir: zoutput/benchmarks/benchmark_fbopt sampling_seed: 0 startseed: 0 -endseed: 2 +endseed: 4 test_domains: - 0 @@ -21,7 +21,6 @@ domainlab_args: zx_dim: 0 zy_dim: 32 zd_dim: 32 - gamma_y: 1.0 nname: conv_bn_pool_2 nname_dom: conv_bn_pool_2 nname_topic_distrib_img2topic: conv_bn_pool_2 @@ -48,23 +47,60 @@ Shared params: distribution: loguniform mu_init: - min: 0.01 - max: 0.05 + min: 0.000001 + max: 0.00001 num: 5 distribution: loguniform + gamma_y: + min: 1.0 + max: 1e6 + num: 3 + distribution: loguniform + gamma_d: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform # Test fbopt with different hyperparameter configurations -diva_fbopt: +diva_fbopt_a: aname: diva trainer: fbopt + str_mu: gammad_recon + gamma_y: 1.0 + init_setpoint_ratio: 0.99 shared: - - ini_setpoint_ratio - k_i_gain - mu_init +diva_feedforward_a: + aname: diva + trainer: hyperscheduler + str_mu: gammad_recon + gamma_y: 1.0 + shared: + - gamma_d + +diva_default: + aname: diva + trainer: hyperscheduler + str_mu: default + shared: + - gamma_d + - gamma_y + +diva_fixed_penalty: + aname: diva + trainer: basic + str_mu: default + shared: + - gamma_d + - gamma_y + erm: aname: deepall diff --git a/examples/benchmark/benchmark_fbopt_mnist_jigen.yaml b/examples/benchmark/benchmark_fbopt_mnist_jigen.yaml index c0f3e25e5..c58868965 100644 --- a/examples/benchmark/benchmark_fbopt_mnist_jigen.yaml +++ b/examples/benchmark/benchmark_fbopt_mnist_jigen.yaml @@ -4,7 +4,7 @@ output_dir: zoutput/benchmarks/benchmark_fbopt sampling_seed: 0 startseed: 0 -endseed: 2 +endseed: 4 test_domains: - 0 @@ -24,52 +24,52 @@ domainlab_args: mu_clip: 10 coeff_ma: 0.5 no_tensorboard: False + pperm: 0.5 Shared params: - ini_setpoint_ratio: - min: 0.95 - max: 0.99 - num: 2 - distribution: uniform - k_i_gain: min: 0.0001 max: 0.01 num: 2 - step: 0.0001 distribution: uniform mu_init: - min: 0.001 - max: 0.1 + min: 0.000001 + max: 0.00001 num: 3 distribution: loguniform - pperm: - min: 0.1 - max: 0.9 - num: 3 - distribution: uniform + gamma_reg: + min: 0.01 + max: 10_000 + num: 10 + distribution: loguniform + + # Test fbopt with different hyperparameter configurations jigen_feedback: aname: jigen trainer: fbopt - + ini_setpoint_ratio: 0.99 shared: - - ini_setpoint_ratio - k_i_gain - mu_init - - pperm jigen_feedforward: aname: jigen - trainer: fbopt + trainer: hyperscheduler shared: - - pperm - + - gamma_reg + +jigen_fixed_penalty: + aname: jigen + trainer: basic + shared: + - gamma_reg + erm: aname: deepall diff --git a/examples/benchmark/benchmark_fbopt_pacs_diva.yaml b/examples/benchmark/benchmark_fbopt_pacs_diva.yaml index 68d9d55e2..81a6a4e9a 100644 --- a/examples/benchmark/benchmark_fbopt_pacs_diva.yaml +++ b/examples/benchmark/benchmark_fbopt_pacs_diva.yaml @@ -23,7 +23,7 @@ domainlab_args: npath_topic_distrib_img2topic: examples/nets/resnet50domainbed.py npath_encoder_sandwich_layer_img2h4zd: examples/nets/resnet50domainbed.py exp_shoulder_clip: 10 - mu_clip: 10_000 + mu_clip: 1000_000 coeff_ma: 0.5 zx_dim: 0 zy_dim: 64 @@ -41,13 +41,27 @@ Shared params: k_i_gain: min: 0.0001 - max: 0.1 + max: 0.01 num: 3 distribution: uniform mu_init: - min: 0.0001 - max: 1.0 + min: 0.000001 + max: 0.00001 + num: 3 + distribution: loguniform + + gamma_y: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + gamma_d: + min: 1.0 + max: 1e6 + step: 100 num: 3 distribution: loguniform @@ -55,11 +69,39 @@ Shared params: # Test fbopt with different hyperparameter configurations -diva_fbopt: +diva_fbopt_full: aname: diva trainer: fbopt + str_mu: gammad_recon gamma_y: 1.0 + ini_setpoint_ratio: 0.99 shared: - - ini_setpoint_ratio - k_i_gain - mu_init + +diva_feedforward_full: + aname: diva + trainer: hyperscheduler + str_mu: gammad_recon + gamma_y: 1.0 + shared: + - gamma_d + +diva_default: + aname: diva + trainer: hyperscheduler + str_mu: default + shared: + - gamma_d + - gamma_y + +diva_fixed_penalty: + aname: diva + trainer: basic + str_mu: default + shared: + - gamma_d + - gamma_y + +erm: + aname: deepall diff --git a/examples/benchmark/benchmark_fbopt_pacs_jigen.yaml b/examples/benchmark/benchmark_fbopt_pacs_jigen.yaml index 2ee334078..aa9d56696 100644 --- a/examples/benchmark/benchmark_fbopt_pacs_jigen.yaml +++ b/examples/benchmark/benchmark_fbopt_pacs_jigen.yaml @@ -5,7 +5,7 @@ output_dir: zoutput/benchmarks/benchmark_fbopt_pacs_full sampling_seed: 0 startseed: 0 -endseed: 1 +endseed: 4 test_domains: - sketch @@ -28,6 +28,7 @@ domainlab_args: zx_dim: 0 zy_dim: 64 zd_dim: 64 + pperm: 0.5 Shared params: @@ -38,9 +39,9 @@ Shared params: distribution: loguniform mu_init: - min: 0.00001 - max: 1.0 - num: 4 + min: 0.000001 + max: 0.00005 + num: 3 distribution: loguniform pperm: @@ -49,13 +50,33 @@ Shared params: num: 3 distribution: uniform + gamma_reg: + min: 0.01 + max: 10_000 + num: 10 + distribution: loguniform + # Test fbopt with different hyperparameter configurations -jigen_fbopt: +jigen_feedback: aname: jigen - init_setpoint_ratio: 0.99 trainer: fbopt + ini_setpoint_ratio: 0.99 shared: - k_i_gain - mu_init - - pperm + +jigen_feedforward: + aname: jigen + trainer: hyperscheduler + shared: + - gamma_reg + +jigen_fixed_penalty: + aname: jigen + trainer: basic + shared: + - gamma_reg + +erm: + aname: deepall diff --git a/pyproject.toml b/pyproject.toml index 30419abf9..c08701030 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ python = "^3.9" numpy = "^1.23.4" matplotlib = "^3.6.1" seaborn = "0.12.2" -snakemake = "7.21.0" +snakemake = "^7.32.4" torchmetrics = "^0.10.0" torch = "^1.12.0" torchvision = "^0.13.0" diff --git a/requirements.txt b/requirements.txt index ef70b5567..df7bf73a0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -61,7 +61,7 @@ setuptools==68.0.0 ; python_version >= "3.9" and python_version < "4.0" six==1.16.0 ; python_version >= "3.9" and python_version < "4.0" smart-open==6.3.0 ; python_version >= "3.9" and python_version < "4.0" smmap==5.0.0 ; python_version >= "3.9" and python_version < "4.0" -snakemake==7.21.0 ; python_version >= "3.9" and python_version < "4.0" +snakemake==7.32.4 ; python_version >= "3.9" and python_version < "4.0" soupsieve==2.4.1 ; python_version >= "3.9" and python_version < "4.0" stopit==1.1.2 ; python_version >= "3.9" and python_version < "4.0" tabulate==0.9.0 ; python_version >= "3.9" and python_version < "4.0" diff --git a/run_erm.sh b/run_erm.sh new file mode 100644 index 000000000..e5d44a151 --- /dev/null +++ b/run_erm.sh @@ -0,0 +1 @@ +python main_out.py --te_d=1 --tr_d 0 3 --task=mnistcolor10 --bs=16 --aname=deepall --nname=conv_bn_pool_2 --epos=10 diff --git a/run_fbopt.sh b/run_fbopt.sh index ed7873414..33226f290 100644 --- a/run_fbopt.sh +++ b/run_fbopt.sh @@ -3,4 +3,4 @@ # although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error # so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring # pytest -s tests/test_fbopt.py -python main_out.py --te_d=caltech --task=mini_vlcs --bs=16 --aname=jigen --trainer=fbopt --nname=alexnet --epos=200 --es=100 --init_mu=1.0 +python main_out.py --te_d=caltech --task=mini_vlcs --bs=16 --aname=jigen --trainer=fbopt --nname=alexnet --epos=200 --es=5 --mu_init=1.0 diff --git a/run_fbopt_mnist.sh b/run_fbopt_mnist.sh index 212244e1d..55434f380 100644 --- a/run_fbopt_mnist.sh +++ b/run_fbopt_mnist.sh @@ -3,4 +3,4 @@ # although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error # so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring # pytest -s tests/test_fbopt.py -python main_out.py --te_d=1 --tr_d 0 3 --task=mnistcolor10 --bs=16 --aname=jigen --trainer=fbopt --nname=conv_bn_pool_2 --epos=200 --es=100 --mu_init=0.00001 +python main_out.py --te_d=1 --tr_d 0 3 --task=mnistcolor10 --bs=16 --aname=jigen --trainer=fbopt --nname=conv_bn_pool_2 --epos=2000 --es=50 --mu_init=0.00001 diff --git a/tests/test_observer.py b/tests/test_observer.py new file mode 100644 index 000000000..46e2cfd87 --- /dev/null +++ b/tests/test_observer.py @@ -0,0 +1,46 @@ +""" +unit and end-end test for deep all, dann +""" +import gc +import torch +from domainlab.compos.exp.exp_main import Exp +from domainlab.arg_parser import mk_parser_main + + +def test_deepall(): + """ + unit deep all + """ + parser = mk_parser_main() + margs = parser.parse_args(["--te_d", "caltech", + "--task", "mini_vlcs", + "--aname", "deepall", "--bs", "2", + "--nname", "conv_bn_pool_2" + ]) + exp = Exp(margs) + exp.trainer.before_tr() + exp.trainer.tr_epoch(0) + exp.trainer.observer.update(True) + del exp + torch.cuda.empty_cache() + gc.collect() + + +def test_deepall_trloss(): + """ + unit deep all + """ + parser = mk_parser_main() + margs = parser.parse_args(["--te_d", "caltech", + "--task", "mini_vlcs", + "--aname", "deepall", "--bs", "2", + "--nname", "conv_bn_pool_2", + "--msel", "loss_tr" + ]) + exp = Exp(margs) + exp.trainer.before_tr() + exp.trainer.tr_epoch(0) + exp.trainer.observer.update(True) + del exp + torch.cuda.empty_cache() + gc.collect()