From d7a4ed39d3ad94cba5304d53b1a8a8a0afb14080 Mon Sep 17 00:00:00 2001
From: smilesun <smilesun.east@gmail.com>
Date: Thu, 25 Jul 2024 16:37:45 +0200
Subject: [PATCH 01/12] first version causal irl

---
 domainlab/algos/trainers/train_causIRL.py | 75 +++++++++++++++++++++++
 domainlab/algos/trainers/zoo_trainer.py   |  2 +
 tests/test_causal_irl.py                  | 13 ++++
 3 files changed, 90 insertions(+)
 create mode 100644 domainlab/algos/trainers/train_causIRL.py
 create mode 100644 tests/test_causal_irl.py

diff --git a/domainlab/algos/trainers/train_causIRL.py b/domainlab/algos/trainers/train_causIRL.py
new file mode 100644
index 000000000..139223c58
--- /dev/null
+++ b/domainlab/algos/trainers/train_causIRL.py
@@ -0,0 +1,75 @@
+"""
+Alex, Xudong
+"""
+import numpy as np
+import torch
+from domainlab.algos.trainers.train_basic import TrainerBasic
+
+
+class TrainerCausIRL(TrainerBasic):
+    def my_cdist(self, x1, x2):
+        x1_norm = x1.pow(2).sum(dim=-1, keepdim=True)
+        x2_norm = x2.pow(2).sum(dim=-1, keepdim=True)
+        res = torch.addmm(x2_norm.transpose(-2, -1),
+                          x1,
+                          x2.transpose(-2, -1), alpha=-2).add_(x1_norm)
+        return res.clamp_min_(1e-30)
+
+    def gaussian_kernel(self, x, y, gamma=[0.001, 0.01, 0.1, 1, 10, 100,
+                                           1000]):
+        D = self.my_cdist(x, y)
+        K = torch.zeros_like(D)
+
+        for g in gamma:
+            K.add_(torch.exp(D.mul(-g)))
+
+        return K
+
+    def mmd(self, x, y):
+        if self.kernel_type == "gaussian":
+            Kxx = self.gaussian_kernel(x, x).mean()
+            Kyy = self.gaussian_kernel(y, y).mean()
+            Kxy = self.gaussian_kernel(x, y).mean()
+            return Kxx + Kyy - 2 * Kxy
+        else:
+            mean_x = x.mean(0, keepdim=True)
+            mean_y = y.mean(0, keepdim=True)
+            cent_x = x - mean_x
+            cent_y = y - mean_y
+            cova_x = (cent_x.t() @ cent_x) / (len(x) - 1)
+            cova_y = (cent_y.t() @ cent_y) / (len(y) - 1)
+
+            mean_diff = (mean_x - mean_y).pow(2).mean()
+            cova_diff = (cova_x - cova_y).pow(2).mean()
+
+            return mean_diff + cova_diff
+
+
+    def tr_batch(self, tensor_x, tensor_y, tensor_d, others, ind_batch, epoch):
+        """
+        optimize neural network one step upon a mini-batch of data
+        """
+        self.kernel_type = "gaussian"
+        self.before_batch(epoch, ind_batch)
+        tensor_x, tensor_y, tensor_d = (
+            tensor_x.to(self.device),
+            tensor_y.to(self.device),
+            tensor_d.to(self.device),
+        )
+        self.optimizer.zero_grad()
+
+        features = self.get_model().extract_semantic_feat(tensor_x)
+
+        pos_batch_break = np.random.randint(0, tensor_x.shape[0])
+        first = features[:pos_batch_break]
+        second = features[pos_batch_break:]
+        if len(first) > 1 and len(second) > 1:
+            penalty = torch.nan_to_num(self.mmd(first, second))
+        else:
+            penalty = torch.tensor(0)
+        loss = self.cal_loss(tensor_x, tensor_y, tensor_d, others)
+        loss = loss + penalty
+        loss.backward()
+        self.optimizer.step()
+        self.after_batch(epoch, ind_batch)
+        self.counter_batch += 1
diff --git a/domainlab/algos/trainers/zoo_trainer.py b/domainlab/algos/trainers/zoo_trainer.py
index e4a8c7bd5..22445c76d 100644
--- a/domainlab/algos/trainers/zoo_trainer.py
+++ b/domainlab/algos/trainers/zoo_trainer.py
@@ -9,6 +9,7 @@
 from domainlab.algos.trainers.train_mldg import TrainerMLDG
 from domainlab.algos.trainers.train_fishr import TrainerFishr
 from domainlab.algos.trainers.train_irm import TrainerIRM
+from domainlab.algos.trainers.train_causIRL import TrainerCausIRL
 
 
 class TrainerChainNodeGetter(object):
@@ -54,6 +55,7 @@ def __call__(self, lst_candidates=None, default=None, lst_excludes=None):
         chain = TrainerFishr(chain)
         chain = TrainerIRM(chain)
         chain = TrainerHyperScheduler(chain)
+        chain = TrainerCausIRL(chain)
         node = chain.handle(self.request)
         head = node
         while self._list_str_trainer:
diff --git a/tests/test_causal_irl.py b/tests/test_causal_irl.py
new file mode 100644
index 000000000..0c1ea513d
--- /dev/null
+++ b/tests/test_causal_irl.py
@@ -0,0 +1,13 @@
+"""
+ end-end test
+"""
+from tests.utils_test import utils_test_algo
+
+
+def test_causal_irl():
+    """
+    causal irl
+    """
+    args = "--te_d 0 --tr_d 3 7 --bs=32 --debug --task=mnistcolor10 \
+        --model=erm --nname=conv_bn_pool_2 --trainer=causirl"
+    utils_test_algo(args)

From d7365f46dcaaf42b6e9c6b35dbfc837297ba6c9e Mon Sep 17 00:00:00 2001
From: smilesun <smilesun.east@gmail.com>
Date: Fri, 26 Jul 2024 01:01:39 +0200
Subject: [PATCH 02/12] codacy

---
 domainlab/algos/trainers/train_causIRL.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/domainlab/algos/trainers/train_causIRL.py b/domainlab/algos/trainers/train_causIRL.py
index 139223c58..bb8fa8399 100644
--- a/domainlab/algos/trainers/train_causIRL.py
+++ b/domainlab/algos/trainers/train_causIRL.py
@@ -7,7 +7,13 @@
 
 
 class TrainerCausIRL(TrainerBasic):
+    """
+    causal matching
+    """
     def my_cdist(self, x1, x2):
+        """
+        distance for Gaussian
+        """
         x1_norm = x1.pow(2).sum(dim=-1, keepdim=True)
         x2_norm = x2.pow(2).sum(dim=-1, keepdim=True)
         res = torch.addmm(x2_norm.transpose(-2, -1),
@@ -17,15 +23,21 @@ def my_cdist(self, x1, x2):
 
     def gaussian_kernel(self, x, y, gamma=[0.001, 0.01, 0.1, 1, 10, 100,
                                            1000]):
-        D = self.my_cdist(x, y)
-        K = torch.zeros_like(D)
+        """
+        kernel for MMD
+        """
+        dist = self.my_cdist(x, y)
+        K = torch.zeros_like(dist)
 
         for g in gamma:
-            K.add_(torch.exp(D.mul(-g)))
+            K.add_(torch.exp(dist.mul(-g)))
 
         return K
 
     def mmd(self, x, y):
+        """
+        maximum mean discrepancy
+        """
         if self.kernel_type == "gaussian":
             Kxx = self.gaussian_kernel(x, x).mean()
             Kyy = self.gaussian_kernel(y, y).mean()

From 2c1cdc939c732a629dec601f1f7a233dbc6ed47e Mon Sep 17 00:00:00 2001
From: smilesun <smilesun.east@gmail.com>
Date: Thu, 1 Aug 2024 16:14:30 +0200
Subject: [PATCH 03/12] coverage

---
 domainlab/algos/trainers/train_causIRL.py | 44 +++++++++--------------
 domainlab/algos/trainers/zoo_trainer.py   |  7 ++--
 tests/test_causal_irl.py                  |  2 +-
 3 files changed, 22 insertions(+), 31 deletions(-)

diff --git a/domainlab/algos/trainers/train_causIRL.py b/domainlab/algos/trainers/train_causIRL.py
index bb8fa8399..085abcbfa 100644
--- a/domainlab/algos/trainers/train_causIRL.py
+++ b/domainlab/algos/trainers/train_causIRL.py
@@ -6,7 +6,7 @@
 from domainlab.algos.trainers.train_basic import TrainerBasic
 
 
-class TrainerCausIRL(TrainerBasic):
+class TrainerCausalIRL(TrainerBasic):
     """
     causal matching
     """
@@ -14,54 +14,44 @@ def my_cdist(self, x1, x2):
         """
         distance for Gaussian
         """
+        # along the last dimension
         x1_norm = x1.pow(2).sum(dim=-1, keepdim=True)
         x2_norm = x2.pow(2).sum(dim=-1, keepdim=True)
+        # x_2_norm is [batchsize, 1]
+        # matrix multiplication (2nd, 3rd) and addition to first argument
+        # X1[batchsize, dimfeat] * X2[dimfeat, batchsize)
+        # alpha: Scaling factor for the matrix product (default: 1)
+        # x2_norm.transpose(-2, -1) is row vector
+        # x_1_norm is column vector
         res = torch.addmm(x2_norm.transpose(-2, -1),
                           x1,
                           x2.transpose(-2, -1), alpha=-2).add_(x1_norm)
         return res.clamp_min_(1e-30)
 
-    def gaussian_kernel(self, x, y, gamma=[0.001, 0.01, 0.1, 1, 10, 100,
-                                           1000]):
+    def gaussian_kernel(self, x, y):
         """
         kernel for MMD
         """
+        gamma=[0.001, 0.01, 0.1, 1, 10, 100, 1000]
         dist = self.my_cdist(x, y)
-        K = torch.zeros_like(dist)
-
+        tensor = torch.zeros_like(dist)
         for g in gamma:
-            K.add_(torch.exp(dist.mul(-g)))
-
-        return K
+            tensor.add_(torch.exp(dist.mul(-g)))
+        return tensor
 
     def mmd(self, x, y):
         """
         maximum mean discrepancy
         """
-        if self.kernel_type == "gaussian":
-            Kxx = self.gaussian_kernel(x, x).mean()
-            Kyy = self.gaussian_kernel(y, y).mean()
-            Kxy = self.gaussian_kernel(x, y).mean()
-            return Kxx + Kyy - 2 * Kxy
-        else:
-            mean_x = x.mean(0, keepdim=True)
-            mean_y = y.mean(0, keepdim=True)
-            cent_x = x - mean_x
-            cent_y = y - mean_y
-            cova_x = (cent_x.t() @ cent_x) / (len(x) - 1)
-            cova_y = (cent_y.t() @ cent_y) / (len(y) - 1)
-
-            mean_diff = (mean_x - mean_y).pow(2).mean()
-            cova_diff = (cova_x - cova_y).pow(2).mean()
-
-            return mean_diff + cova_diff
-
+        kxx = self.gaussian_kernel(x, x).mean()
+        kyy = self.gaussian_kernel(y, y).mean()
+        kxy = self.gaussian_kernel(x, y).mean()
+        return kxx + kyy - 2 * kxy
 
     def tr_batch(self, tensor_x, tensor_y, tensor_d, others, ind_batch, epoch):
         """
         optimize neural network one step upon a mini-batch of data
         """
-        self.kernel_type = "gaussian"
         self.before_batch(epoch, ind_batch)
         tensor_x, tensor_y, tensor_d = (
             tensor_x.to(self.device),
diff --git a/domainlab/algos/trainers/zoo_trainer.py b/domainlab/algos/trainers/zoo_trainer.py
index 22445c76d..d1eccb59b 100644
--- a/domainlab/algos/trainers/zoo_trainer.py
+++ b/domainlab/algos/trainers/zoo_trainer.py
@@ -4,12 +4,13 @@
 from domainlab.algos.trainers.train_basic import TrainerBasic
 from domainlab.algos.trainers.train_ema import TrainerMA
 from domainlab.algos.trainers.train_dial import TrainerDIAL
-from domainlab.algos.trainers.train_hyper_scheduler import TrainerHyperScheduler
+from domainlab.algos.trainers.train_hyper_scheduler \
+    import TrainerHyperScheduler
 from domainlab.algos.trainers.train_matchdg import TrainerMatchDG
 from domainlab.algos.trainers.train_mldg import TrainerMLDG
 from domainlab.algos.trainers.train_fishr import TrainerFishr
 from domainlab.algos.trainers.train_irm import TrainerIRM
-from domainlab.algos.trainers.train_causIRL import TrainerCausIRL
+from domainlab.algos.trainers.train_causIRL import TrainerCausalIRL
 
 
 class TrainerChainNodeGetter(object):
@@ -55,7 +56,7 @@ def __call__(self, lst_candidates=None, default=None, lst_excludes=None):
         chain = TrainerFishr(chain)
         chain = TrainerIRM(chain)
         chain = TrainerHyperScheduler(chain)
-        chain = TrainerCausIRL(chain)
+        chain = TrainerCausalIRL(chain)
         node = chain.handle(self.request)
         head = node
         while self._list_str_trainer:
diff --git a/tests/test_causal_irl.py b/tests/test_causal_irl.py
index 0c1ea513d..5292ef18c 100644
--- a/tests/test_causal_irl.py
+++ b/tests/test_causal_irl.py
@@ -9,5 +9,5 @@ def test_causal_irl():
     causal irl
     """
     args = "--te_d 0 --tr_d 3 7 --bs=32 --debug --task=mnistcolor10 \
-        --model=erm --nname=conv_bn_pool_2 --trainer=causirl"
+        --model=erm --nname=conv_bn_pool_2 --trainer=causalirl"
     utils_test_algo(args)

From 6f6169ec9aa4be6fa680bcf8f4d383426bbad6c9 Mon Sep 17 00:00:00 2001
From: smilesun <smilesun.east@gmail.com>
Date: Tue, 6 Aug 2024 14:30:16 +0200
Subject: [PATCH 04/12] split examples into algo doc

---
 scripts/ci_run_examples.sh | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/scripts/ci_run_examples.sh b/scripts/ci_run_examples.sh
index 9f6b4e041..837d47433 100644
--- a/scripts/ci_run_examples.sh
+++ b/scripts/ci_run_examples.sh
@@ -3,6 +3,20 @@ set -e  # exit upon first error
 # >> append content
 # > erase original content
 
+
+files=("docs/docDIAL.md" "docs/docFishr.md")
+
+for file in "${files[@]}"
+do
+echo "Processing $file"
+echo "#!/bin/bash -x -v" > sh_temp_algo.sh  
+sed -n '/```shell/,/```/ p' $file | sed '/^```/ d' >> ./sh_temp_algo.sh
+bash -x -v -e sh_temp_also.sh
+# Add your commands to process each file here
+done
+
+
+
 # echo "#!/bin/bash -x -v" > sh_temp_example.sh
 sed -n '/```shell/,/```/ p' docs/doc_examples.md | sed '/^```/ d' >> ./sh_temp_example.sh
 split -l 5 sh_temp_example.sh sh_example_split

From 1e2ff25da0c87779719c7a04e5a5fb4d2608e00b Mon Sep 17 00:00:00 2001
From: smilesun <smilesun.east@gmail.com>
Date: Thu, 22 Aug 2024 15:58:55 +0200
Subject: [PATCH 05/12] .

---
 docs/docDIAL.md            |  6 ++++++
 docs/docFishr.md           |  5 +++++
 docs/docIRM.md             |  4 ++++
 scripts/ci_run_examples.sh | 17 ++++++-----------
 4 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/docs/docDIAL.md b/docs/docDIAL.md
index 8b8111de8..600f8b2cb 100644
--- a/docs/docDIAL.md
+++ b/docs/docDIAL.md
@@ -72,3 +72,9 @@ This procedure yields to the following availability of hyperparameter:
 - `--dial_epsilon`: pixel wise threshold to perturb images
 - `--gamma_reg`: ? ($\epsilon$ in the paper)
 - `--lr`: learning rate ($\alpha$ in the paper)
+
+# Examples
+
+```
+python main_out.py --te_d=0 --task=mnistcolor10 --model=erm --trainer=dial --nname=conv_bn_pool_2
+```
diff --git a/docs/docFishr.md b/docs/docFishr.md
index 08580d9fe..e2ba4c1b9 100644
--- a/docs/docFishr.md
+++ b/docs/docFishr.md
@@ -72,6 +72,10 @@ For more details, see the reference below or the domainlab code.
 
 
 
+# Examples
+```
+python main_out.py --te_d=0 --task=mini_vlcs --model=erm --trainer=fishr --nname=alexnet --bs=2 --nocu
+```
 
 
 
@@ -79,3 +83,4 @@ _Reference:_
 Rame, Alexandre, Corentin Dancette, and Matthieu Cord. "Fishr:
 Invariant gradient variances for out-of-distribution generalization."
 International Conference on Machine Learning. PMLR, 2022.
+
diff --git a/docs/docIRM.md b/docs/docIRM.md
index 955ff14a3..c8a78a346 100644
--- a/docs/docIRM.md
+++ b/docs/docIRM.md
@@ -26,4 +26,8 @@ where $\lambda$ is a hyperparameter that controls the trade-off between the empi
 In practice, one could simply divide one mini-batch into two subsets, let $i$ and $j$ to index these two subsets, multiply  subset $i$ and subset $j$ forms an unbiased estimation of the L2 norm of gradient.
 In detail: the squared gradient norm via inner product between $\nabla_{w|w=1} \ell(w \circ \Phi(X^{(d, i)}), Y^{(d, i)})$ of dimension dim(Grad) with $\nabla_{w|w=1} \ell(w \circ \Phi(X^{(d, j)}), Y^{(d, j)})$ of dimension dim(Grad) For more details, see section 3.2 and Appendix D of : Arjovsky et al., “Invariant Risk Minimization.”
 
+# Examples
+```
+python main_out.py --te_d=0 --task=mnistcolor10 --model=erm --trainer=irm --nname=conv_bn_pool_2
 
+```
diff --git a/scripts/ci_run_examples.sh b/scripts/ci_run_examples.sh
index 837d47433..0433bd79c 100644
--- a/scripts/ci_run_examples.sh
+++ b/scripts/ci_run_examples.sh
@@ -4,28 +4,23 @@ set -e  # exit upon first error
 # > erase original content
 
 
-files=("docs/docDIAL.md" "docs/docFishr.md")
+files=("docs/docDIAL.md" "docs/docIRM.md")
 
 for file in "${files[@]}"
 do
 echo "Processing $file"
+# no need to remove sh_temp_algo.sh since the following line overwrite it each time
 echo "#!/bin/bash -x -v" > sh_temp_algo.sh  
+# remove code marker ```
+# we use >> here to append to keep the header #!/bin/bash -x -v
 sed -n '/```shell/,/```/ p' $file | sed '/^```/ d' >> ./sh_temp_algo.sh
-bash -x -v -e sh_temp_also.sh
+bash -x -v -e sh_temp_algo.sh
 # Add your commands to process each file here
+echo "finished with $file"
 done
 
 
 
-# echo "#!/bin/bash -x -v" > sh_temp_example.sh
-sed -n '/```shell/,/```/ p' docs/doc_examples.md | sed '/^```/ d' >> ./sh_temp_example.sh
-split -l 5 sh_temp_example.sh sh_example_split
-for file in sh_example_split*;
-do (echo "#!/bin/bash -x -v" > "$file"_exe && cat "$file" >> "$file"_exe && bash -x -v "$file"_exe && rm -r zoutput);
-done
-# bash -x -v -e sh_temp_example.sh
-echo "general examples done"
-
 echo "#!/bin/bash -x -v" > sh_temp_mnist.sh
 sed -n '/```shell/,/```/ p' docs/doc_MNIST_classification.md | sed '/^```/ d' >> ./sh_temp_mnist.sh
 bash -x -v -e sh_temp_mnist.sh

From ea97541633eafa032396d8afc3ec5ec95894895b Mon Sep 17 00:00:00 2001
From: Xudong Sun <smilesun@users.noreply.github.com>
Date: Mon, 2 Sep 2024 16:56:44 +0200
Subject: [PATCH 06/12] Update ci_run_examples.sh

---
 scripts/ci_run_examples.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/ci_run_examples.sh b/scripts/ci_run_examples.sh
index 0433bd79c..850fad0f4 100644
--- a/scripts/ci_run_examples.sh
+++ b/scripts/ci_run_examples.sh
@@ -4,7 +4,7 @@ set -e  # exit upon first error
 # > erase original content
 
 
-files=("docs/docDIAL.md" "docs/docIRM.md")
+files=("docs/docDIAL.md" "docs/docIRM.md" "docs/doc_examples.md")
 
 for file in "${files[@]}"
 do

From 77330913ff70a5bdcbb3dd60844e4cdf6d853482 Mon Sep 17 00:00:00 2001
From: Xudong Sun <smilesun@users.noreply.github.com>
Date: Thu, 5 Sep 2024 15:23:44 +0200
Subject: [PATCH 07/12] Update docIRM.md to check if CI fails

---
 docs/docIRM.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docIRM.md b/docs/docIRM.md
index c8a78a346..5dbab424a 100644
--- a/docs/docIRM.md
+++ b/docs/docIRM.md
@@ -28,6 +28,6 @@ In detail: the squared gradient norm via inner product between $\nabla_{w|w=1} \
 
 # Examples
 ```
-python main_out.py --te_d=0 --task=mnistcolor10 --model=erm --trainer=irm --nname=conv_bn_pool_2
+python main_out.py --te_d=0 --task=mnistcolor10 --model=erm --trainer=irm --nname=conv_bn_pool_2 --agas
 
 ```

From bb1500b68d3c6240b970a7fe12bc3a353e0e53ee Mon Sep 17 00:00:00 2001
From: smilesun <smilesun.east@gmail.com>
Date: Thu, 5 Sep 2024 15:42:00 +0200
Subject: [PATCH 08/12] split hduva

---
 docs/docHDUVA.md           | 12 ++++++++++++
 docs/docIRM.md             |  2 +-
 docs/doc_examples.md       | 10 ----------
 scripts/ci_run_examples.sh |  2 +-
 4 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/docs/docHDUVA.md b/docs/docHDUVA.md
index 4abcb71e9..47af4d344 100644
--- a/docs/docHDUVA.md
+++ b/docs/docHDUVA.md
@@ -52,6 +52,18 @@ Alternatively, one could use an existing neural network in DomainLab using `nnam
 
 ## Hyperparameter for warmup
 Finally, the number of epochs for hyper-parameter warm-up can be specified via the argument `warmup`.
+## Examples
+### use hduva on color mnist, train on 2 domains
+```shell
+python main_out.py --tr_d 0 1 2 --te_d 3 --bs=2 --task=mnistcolor10 --model=hduva  --nname=conv_bn_pool_2 --gamma_y=7e5 --nname_encoder_x2topic_h=conv_bn_pool_2 --nname_encoder_sandwich_x2h4zd=conv_bn_pool_2
+```
+
+### hduva is domain-unsupervised, so it works also with a single domain
+```shell
+python main_out.py --tr_d 0  --te_d 3 4 --bs=2 --task=mnistcolor10 --model=hduva --nname=conv_bn_pool_2 --gamma_y=7e5 --nname_encoder_x2topic_h=conv_bn_pool_2 --nname_encoder_sandwich_x2h4zd=conv_bn_pool_2
+```
+
+
 
 Please cite our paper if you find it useful!
 ```text
diff --git a/docs/docIRM.md b/docs/docIRM.md
index 5dbab424a..c8a78a346 100644
--- a/docs/docIRM.md
+++ b/docs/docIRM.md
@@ -28,6 +28,6 @@ In detail: the squared gradient norm via inner product between $\nabla_{w|w=1} \
 
 # Examples
 ```
-python main_out.py --te_d=0 --task=mnistcolor10 --model=erm --trainer=irm --nname=conv_bn_pool_2 --agas
+python main_out.py --te_d=0 --task=mnistcolor10 --model=erm --trainer=irm --nname=conv_bn_pool_2
 
 ```
diff --git a/docs/doc_examples.md b/docs/doc_examples.md
index 21d0b2eb2..0dddc5f5b 100755
--- a/docs/doc_examples.md
+++ b/docs/doc_examples.md
@@ -26,16 +26,6 @@ python main_out.py --te_d 0 1 --tr_d 3 5 --task=mnistcolor10 --debug --bs=2 --mo
 python main_out.py --te_d=0 --task=mnistcolor10 --keep_model --model=diva --nname=conv_bn_pool_2 --nname_dom=conv_bn_pool_2 --gamma_y=10e5 --gamma_d=1e5 --gen
 ```
 
-### use hduva on color mnist, train on 2 domains
-```shell
-python main_out.py --tr_d 0 1 2 --te_d 3 --bs=2 --task=mnistcolor10 --model=hduva  --nname=conv_bn_pool_2 --gamma_y=7e5 --nname_encoder_x2topic_h=conv_bn_pool_2 --nname_encoder_sandwich_x2h4zd=conv_bn_pool_2
-```
-
-### hduva is domain-unsupervised, so it works also with a single domain
-```shell
-python main_out.py --tr_d 0  --te_d 3 4 --bs=2 --task=mnistcolor10 --model=hduva --nname=conv_bn_pool_2 --gamma_y=7e5 --nname_encoder_x2topic_h=conv_bn_pool_2 --nname_encoder_sandwich_x2h4zd=conv_bn_pool_2
-```
-
 
 ## Larger images:
 
diff --git a/scripts/ci_run_examples.sh b/scripts/ci_run_examples.sh
index 850fad0f4..6cf1db18f 100644
--- a/scripts/ci_run_examples.sh
+++ b/scripts/ci_run_examples.sh
@@ -4,7 +4,7 @@ set -e  # exit upon first error
 # > erase original content
 
 
-files=("docs/docDIAL.md" "docs/docIRM.md" "docs/doc_examples.md")
+files=("docs/docDIAL.md" "docs/docIRM.md" "docs/doc_examples.md" "docs/docHDUVA.md")
 
 for file in "${files[@]}"
 do

From b0983bf7ce480a2edce7f4887c46cce931a9d208 Mon Sep 17 00:00:00 2001
From: Xudong Sun <smilesun@users.noreply.github.com>
Date: Tue, 10 Sep 2024 17:49:02 +0200
Subject: [PATCH 09/12] Update ci_run_examples.sh, cat command from each
 markdown file

---
 scripts/ci_run_examples.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/ci_run_examples.sh b/scripts/ci_run_examples.sh
index 6cf1db18f..98e0046a6 100644
--- a/scripts/ci_run_examples.sh
+++ b/scripts/ci_run_examples.sh
@@ -14,6 +14,7 @@ echo "#!/bin/bash -x -v" > sh_temp_algo.sh
 # remove code marker ```
 # we use >> here to append to keep the header #!/bin/bash -x -v
 sed -n '/```shell/,/```/ p' $file | sed '/^```/ d' >> ./sh_temp_algo.sh
+cat sh_temp_algo.sh
 bash -x -v -e sh_temp_algo.sh
 # Add your commands to process each file here
 echo "finished with $file"

From 05e41fb91876e87045ba7a0917dc37dc6777c821 Mon Sep 17 00:00:00 2001
From: Xudong Sun <smilesun@users.noreply.github.com>
Date: Tue, 10 Sep 2024 17:50:48 +0200
Subject: [PATCH 10/12] Update docIRM.md

---
 docs/docIRM.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docIRM.md b/docs/docIRM.md
index c8a78a346..5dbab424a 100644
--- a/docs/docIRM.md
+++ b/docs/docIRM.md
@@ -28,6 +28,6 @@ In detail: the squared gradient norm via inner product between $\nabla_{w|w=1} \
 
 # Examples
 ```
-python main_out.py --te_d=0 --task=mnistcolor10 --model=erm --trainer=irm --nname=conv_bn_pool_2
+python main_out.py --te_d=0 --task=mnistcolor10 --model=erm --trainer=irm --nname=conv_bn_pool_2 --agas
 
 ```

From 0ff6ae7d90254856fd18b2ba13b6b67e14618732 Mon Sep 17 00:00:00 2001
From: Xudong Sun <smilesun@users.noreply.github.com>
Date: Wed, 11 Sep 2024 16:26:11 +0200
Subject: [PATCH 11/12] Update docIRM.md

---
 docs/docIRM.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docIRM.md b/docs/docIRM.md
index 5dbab424a..621ef6ab8 100644
--- a/docs/docIRM.md
+++ b/docs/docIRM.md
@@ -27,7 +27,7 @@ In practice, one could simply divide one mini-batch into two subsets, let $i$ an
 In detail: the squared gradient norm via inner product between $\nabla_{w|w=1} \ell(w \circ \Phi(X^{(d, i)}), Y^{(d, i)})$ of dimension dim(Grad) with $\nabla_{w|w=1} \ell(w \circ \Phi(X^{(d, j)}), Y^{(d, j)})$ of dimension dim(Grad) For more details, see section 3.2 and Appendix D of : Arjovsky et al., “Invariant Risk Minimization.”
 
 # Examples
-```
+```shell
 python main_out.py --te_d=0 --task=mnistcolor10 --model=erm --trainer=irm --nname=conv_bn_pool_2 --agas
 
 ```

From 6b01a3aadb0a2d6b69852911366ebbdbdc30461a Mon Sep 17 00:00:00 2001
From: Xudong Sun <smilesun@users.noreply.github.com>
Date: Wed, 11 Sep 2024 16:39:19 +0200
Subject: [PATCH 12/12] Update docIRM.md, remove agas to see if it works

---
 docs/docIRM.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docIRM.md b/docs/docIRM.md
index 621ef6ab8..0b8114de8 100644
--- a/docs/docIRM.md
+++ b/docs/docIRM.md
@@ -28,6 +28,6 @@ In detail: the squared gradient norm via inner product between $\nabla_{w|w=1} \
 
 # Examples
 ```shell
-python main_out.py --te_d=0 --task=mnistcolor10 --model=erm --trainer=irm --nname=conv_bn_pool_2 --agas
+python main_out.py --te_d=0 --task=mnistcolor10 --model=erm --trainer=irm --nname=conv_bn_pool_2
 
 ```