adapter-hub · calpt · Jun 30, 2020
diff --git a/src/transformers/adapter_bert.py b/src/transformers/adapter_bert.py
@@ -38,7 +38,7 @@ def add_adapter(self, adapter_name: str, adapter_type: AdapterType):
         if adapter_config and adapter_config["mh_adapter"]:
             adapter = Adapter(
                 input_size=self.config.hidden_size,
-                down_sample=self.config.hidden_size // adapter_config["reduction_factor"],
+                down_sample=adapter_config["bottleneck_size"],
                 add_layer_norm_before=adapter_config["ln_before"],
                 add_layer_norm_after=adapter_config["ln_after"],
                 non_linearity=adapter_config["non_linearity"],
@@ -220,7 +220,7 @@ def add_adapter(self, adapter_name: str, adapter_type: AdapterType):
         if adapter_config and adapter_config["output_adapter"]:
             adapter = Adapter(
                 input_size=self.config.hidden_size,
-                down_sample=self.config.hidden_size // adapter_config["reduction_factor"],
+                down_sample=adapter_config["bottleneck_size"],
                 add_layer_norm_before=adapter_config["ln_before"],
                 add_layer_norm_after=adapter_config["ln_after"],
                 non_linearity=adapter_config["non_linearity"],
@@ -480,13 +480,13 @@ def add_invertible_lang_adapter(self, language):
             inv_adap = NICECouplingBlock(
                 [[self.config.hidden_size]],
                 non_linearity=inv_adap_config["non_linearity"],
-                reduction_factor=inv_adap_config["reduction_factor"],
+                bottleneck_size=inv_adap_config["bottleneck_size"],
             )
         elif inv_adap_config["block_type"] == "glow":
             inv_adap = GLOWCouplingBlock(
                 [[self.config.hidden_size]],
                 non_linearity=inv_adap_config["non_linearity"],
-                reduction_factor=inv_adap_config["reduction_fector"],
+                bottleneck_size=inv_adap_config["bottleneck_size"],
             )
         else:
             raise ValueError(f"Invalid invertible adapter type '{inv_adap_config['block_type']}'.")

diff --git a/src/transformers/adapter_config.py b/src/transformers/adapter_config.py
@@ -16,7 +16,7 @@
 class InvertibleAdapterConfig(Mapping):
     block_type: str
     non_linearity: str
-    reduction_factor: int
+    bottleneck_size: int
 
     def __getitem__(self, key):
         return self.__dict__[key]
@@ -41,7 +41,7 @@ class AdapterConfig(Mapping):
     mh_adapter: bool
     output_adapter: bool
     non_linearity: str
-    reduction_factor: int
+    bottleneck_size: int
     invertible_adapter: Optional[InvertibleAdapterConfig] = None
     leave_out: List[int] = field(default_factory=list)
 
@@ -83,9 +83,9 @@ class PfeifferConfig(AdapterConfig):
     mh_adapter: bool = False
     output_adapter: bool = True
     non_linearity: str = "relu"
-    reduction_factor: int = 16
+    bottleneck_size: int = 48
     invertible_adapter: Optional[dict] = InvertibleAdapterConfig(
-        block_type="nice", non_linearity="relu", reduction_factor=2
+        block_type="nice", non_linearity="relu", bottleneck_size=384
     )
 
 
@@ -105,7 +105,7 @@ class HoulsbyConfig(AdapterConfig):
     mh_adapter: bool = True
     output_adapter: bool = True
     non_linearity: str = "swish"
-    reduction_factor: int = 16
+    bottleneck_size: int = 48
 
 
 ADAPTER_CONFIG_MAP = {"pfeiffer": PfeifferConfig(), "houlsby": HoulsbyConfig()}

diff --git a/src/transformers/adapter_modeling.py b/src/transformers/adapter_modeling.py
@@ -429,12 +429,12 @@ def forward(self, query, key, value):
         return result
 
 
-def get_subnet_constructor(non_linearity, reduction_factor):
+def get_subnet_constructor(non_linearity, bottleneck_size):
     def subnet(dims_in, dims_out):
         return nn.Sequential(
-            nn.Linear(dims_in, dims_in // reduction_factor),
+            nn.Linear(dims_in, bottleneck_size),
             Activation_Function_Class(non_linearity),
-            nn.Linear(dims_in // reduction_factor, dims_out),
+            nn.Linear(bottleneck_size, dims_out),
         )
 
     return subnet
@@ -443,7 +443,7 @@ def subnet(dims_in, dims_out):
 class NICECouplingBlock(nn.Module):
     """Coupling Block following the NICE design."""
 
-    def __init__(self, dims_in, dims_c=[], non_linearity="relu", reduction_factor=2):
+    def __init__(self, dims_in, dims_c=[], non_linearity="relu", bottleneck_size=384):
         super().__init__()
 
         channels = dims_in[0][0]
@@ -456,7 +456,7 @@ def __init__(self, dims_in, dims_c=[], non_linearity="relu", reduction_factor=2)
         self.conditional = len(dims_c) > 0
         condition_length = sum([dims_c[i][0] for i in range(len(dims_c))])
 
-        subnet_constructor = get_subnet_constructor(non_linearity, reduction_factor)
+        subnet_constructor = get_subnet_constructor(non_linearity, bottleneck_size)
         self.F = subnet_constructor(self.split_len2 + condition_length, self.split_len1)
         self.G = subnet_constructor(self.split_len1 + condition_length, self.split_len2)
 
@@ -493,7 +493,7 @@ class GLOWCouplingBlock(nn.Module):
     clamp:              Soft clamping for the multiplicative component. The amplification or attenuation
                         of each input dimension can be at most ±exp(clamp)."""
 
-    def __init__(self, dims_in, dims_c=[], non_linearity="relu", reduction_factor=2, clamp=5.0):
+    def __init__(self, dims_in, dims_c=[], non_linearity="relu", bottleneck_size=384, clamp=5.0):
         super().__init__()
 
         channels = dims_in[0][0]
@@ -511,7 +511,7 @@ def __init__(self, dims_in, dims_c=[], non_linearity="relu", reduction_factor=2,
         self.conditional = len(dims_c) > 0
         condition_length = sum([dims_c[i][0] for i in range(len(dims_c))])
 
-        subnet_constructor = get_subnet_constructor(non_linearity, reduction_factor)
+        subnet_constructor = get_subnet_constructor(non_linearity, bottleneck_size)
         self.s1 = subnet_constructor(self.split_len1 + condition_length, self.split_len2 * 2)
         self.s2 = subnet_constructor(self.split_len2 + condition_length, self.split_len1 * 2)