Skip to content

Commit

Permalink
test_reuse_params_map_custom_transitive_dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
Zettelkasten committed Feb 22, 2022
1 parent b71bea9 commit 25f1773
Showing 1 changed file with 188 additions and 0 deletions.
188 changes: 188 additions & 0 deletions tests/test_TFNetworkLayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3206,6 +3206,194 @@ def test_name_scope_share_params():
assert_equal(set(network.get_trainable_params()), {l1.params["W"], l1.params["b"]})


def test_reuse_params_map_custom_transitive_dependency():
# target_embed_raw shares from base:source_embed_raw
# output_prob shares from target_embed_raw (via custom)
config = Config()
n_in, n_out = 3, 3
net_dict = {'dec_01_att_key': {'axis': 'F', 'class': 'split_dims', 'dims': (8, 64), 'from': ['dec_01_att_key0']},
'dec_01_att_key0': {'activation': None,
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', scale=1.0)",
'from': ['encoder'],
'n_out': 512,
'with_bias': False},
'dec_01_att_value': {'axis': 'F', 'class': 'split_dims', 'dims': (8, 64), 'from': ['dec_01_att_value0']},
'dec_01_att_value0': {'activation': None,
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', scale=1.0)",
'from': ['encoder'],
'n_out': 512,
'with_bias': False},
'decision': {'class': 'decide', 'from': ['output'], 'loss': 'edit_distance', 'loss_opts': {}, 'target': 'classes'},
'enc_01': {'class': 'copy', 'from': ['enc_01_ff_out']},
'enc_01_ff_conv1': {'activation': 'relu',
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', scale=1.0)",
'from': ['enc_01_ff_laynorm'],
'n_out': 2048,
'with_bias': True},
'enc_01_ff_conv2': {'activation': None,
'class': 'linear',
'dropout': 0.3,
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', scale=1.0)",
'from': ['enc_01_ff_conv1'],
'n_out': 512,
'with_bias': True},
'enc_01_ff_drop': {'class': 'dropout', 'dropout': 0.3, 'from': ['enc_01_ff_conv2']},
'enc_01_ff_laynorm': {'class': 'layer_norm', 'from': ['enc_01_self_att_out']},
'enc_01_ff_out': {'class': 'combine', 'from': ['enc_01_self_att_out', 'enc_01_ff_drop'], 'kind': 'add',
'n_out': 512},
'enc_01_self_att_att': {'attention_dropout': 0.3,
'attention_left_only': False,
'class': 'self_attention',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', scale=1.0)",
'from': ['enc_01_self_att_laynorm'],
'n_out': 512,
'num_heads': 8,
'total_key_dim': 512},
'enc_01_self_att_drop': {'class': 'dropout', 'dropout': 0.3, 'from': ['enc_01_self_att_lin']},
'enc_01_self_att_laynorm': {'class': 'layer_norm', 'from': ['source_embed']},
'enc_01_self_att_lin': {'activation': None,
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', scale=1.0)",
'from': ['enc_01_self_att_att'],
'n_out': 512,
'with_bias': False},
'enc_01_self_att_out': {'class': 'combine', 'from': ['source_embed', 'enc_01_self_att_drop'], 'kind': 'add',
'n_out': 512},
'encoder': {'class': 'layer_norm', 'from': ['enc_01']},
'output': {'class': 'rec',
'from': [],
'max_seq_len': "max_len_from('base:encoder') * 3",
'target': 'classes',
'unit': {'dec_01': {'class': 'copy', 'from': ['dec_01_ff_out']},
'dec_01_att0': {'base': 'base:dec_01_att_value', 'class': 'generic_attention',
'weights': 'dec_01_att_weights_drop'},
'dec_01_att_att': {'axes': ['dim:8', 'dim:64'], 'class': 'merge_dims', 'from': ['dec_01_att0']},
'dec_01_att_drop': {'class': 'dropout', 'dropout': 0.3, 'from': ['dec_01_att_lin']},
'dec_01_att_energy': {'class': 'dot',
'from': ['base:dec_01_att_key', 'dec_01_att_query'],
'red1': 'F', 'red2': 'F', 'var1': 'T', 'var2': 'T?'},
'dec_01_att_laynorm': {'class': 'layer_norm', 'from': ['dec_01_self_att_out']},
'dec_01_att_lin': {'activation': None,
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', "
'scale=1.0)',
'from': ['dec_01_att_att'],
'n_out': 512,
'with_bias': False},
'dec_01_att_out': {'class': 'combine', 'from': ['dec_01_self_att_out', 'dec_01_att_drop'], 'kind': 'add',
'n_out': 512},
'dec_01_att_query': {'axis': 'F', 'class': 'split_dims', 'dims': (8, 64), 'from': ['dec_01_att_query0']},
'dec_01_att_query0': {'activation': None,
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', "
'scale=1.0)',
'from': ['dec_01_att_laynorm'],
'n_out': 512,
'with_bias': False},
'dec_01_att_weights': {'axis': 'stag:extern_data:data',
'class': 'softmax_over_spatial',
'energy_factor': 0.125,
'from': ['dec_01_att_energy']},
'dec_01_att_weights_drop': {'class': 'dropout',
'dropout': 0.3,
'dropout_noise_shape': {'*': None},
'from': ['dec_01_att_weights']},
'dec_01_ff_conv1': {'activation': 'relu',
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', "
'scale=1.0)',
'from': ['dec_01_ff_laynorm'],
'n_out': 2048,
'with_bias': True},
'dec_01_ff_conv2': {'activation': None,
'class': 'linear',
'dropout': 0.3,
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', "
'scale=1.0)',
'from': ['dec_01_ff_conv1'],
'n_out': 512,
'with_bias': True},
'dec_01_ff_drop': {'class': 'dropout', 'dropout': 0.3, 'from': ['dec_01_ff_conv2']},
'dec_01_ff_laynorm': {'class': 'layer_norm', 'from': ['dec_01_att_out']},
'dec_01_ff_out': {'class': 'combine', 'from': ['dec_01_att_out', 'dec_01_ff_drop'], 'kind': 'add',
'n_out': 512},
'dec_01_self_att_att': {'attention_dropout': 0.3,
'attention_left_only': True,
'class': 'self_attention',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', "
"distribution='uniform', scale=1.0)",
'from': ['dec_01_self_att_laynorm'],
'n_out': 512,
'num_heads': 8,
'total_key_dim': 512},
'dec_01_self_att_drop': {'class': 'dropout', 'dropout': 0.3, 'from': ['dec_01_self_att_lin']},
'dec_01_self_att_laynorm': {'class': 'layer_norm', 'from': ['target_embed']},
'dec_01_self_att_lin': {'activation': None,
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', "
"distribution='uniform', scale=1.0)",
'from': ['dec_01_self_att_att'],
'n_out': 512,
'with_bias': False},
'dec_01_self_att_out': {'class': 'combine',
'from': ['target_embed', 'dec_01_self_att_drop'],
'kind': 'add',
'n_out': 512},
'decoder': {'class': 'layer_norm', 'from': ['dec_01']},
'end': {'class': 'compare', 'from': ['output'], 'value': 0},
'output': {'beam_size': 12, 'class': 'choice', 'from': ['output_prob'], 'initial_output': 0,
'target': 'classes'},
'output_prob': {'class': 'softmax',
'dropout': 0.0,
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', "
'scale=1.0)',
'from': ['decoder'],
'loss': 'ce',
'loss_opts': {'label_smoothing': 0.2, 'use_normalized_loss': True},
'reuse_params': {
'map': {'W': {'custom': (lambda reuse_layer, **kwargs: tf.transpose(reuse_layer.params["W"])),
'reuse_layer': 'target_embed_raw'},
'b': None}},
'target': 'classes',
'with_bias': True},
'target_embed': {'class': 'dropout', 'dropout': 0.0, 'from': ['target_embed_with_pos']},
'target_embed_raw': {'activation': None,
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', "
'scale=1.0)',
'from': ['prev:output'],
'n_out': 512,
'reuse_params': {'map': {'W': {'reuse_layer': 'base:source_embed_raw'}, 'b': None}},
'with_bias': False},
'target_embed_weighted': {'class': 'eval', 'eval': 'source(0) * 22.627417', 'from': ['target_embed_raw']},
'target_embed_with_pos': {'add_to_input': True, 'class': 'positional_encoding',
'from': ['target_embed_weighted']}}},
'source_embed': {'class': 'dropout', 'dropout': 0.0, 'from': ['source_embed_with_pos']},
'source_embed_raw': {'activation': None,
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', scale=1.0)",
'n_out': 512,
'with_bias': False, 'from': 'data:data'},
'source_embed_weighted': {'class': 'eval', 'eval': 'source(0) * 22.627417', 'from': ['source_embed_raw']},
'source_embed_with_pos': {'add_to_input': True, 'class': 'positional_encoding', 'from': ['source_embed_weighted']}}
config.update({
"num_outputs": n_out,
"num_inputs": n_in,
"network": net_dict})
with make_scope() as session:
print("Construct for training")
from returnn.tf.layers.rec import RecLayer, _SubnetworkRecCell
train_net = TFNetwork(config=config, train_flag=True)
train_net.construct_from_dict(config.typed_dict["network"])
with make_scope() as session:
print("Construct for search")
search_net = TFNetwork(config=config, train_flag=False, eval_flag=True, search_flag=True)
search_net.construct_from_dict(config.typed_dict["network"])


def test_SliceLayer_output_placeholder():
with make_scope() as session:
net = TFNetwork(extern_data=ExternData())
Expand Down

0 comments on commit 25f1773

Please sign in to comment.