Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test case for chained reuse_params #968

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 188 additions & 0 deletions tests/test_TFNetworkLayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3206,6 +3206,194 @@ def test_name_scope_share_params():
assert_equal(set(network.get_trainable_params()), {l1.params["W"], l1.params["b"]})


def test_reuse_params_map_custom_transitive_dependency():
# target_embed_raw shares from base:source_embed_raw
# output_prob shares from target_embed_raw (via custom)
config = Config()
n_in, n_out = 3, 3
net_dict = {'dec_01_att_key': {'axis': 'F', 'class': 'split_dims', 'dims': (8, 64), 'from': ['dec_01_att_key0']},
'dec_01_att_key0': {'activation': None,
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', scale=1.0)",
'from': ['encoder'],
'n_out': 512,
'with_bias': False},
'dec_01_att_value': {'axis': 'F', 'class': 'split_dims', 'dims': (8, 64), 'from': ['dec_01_att_value0']},
'dec_01_att_value0': {'activation': None,
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', scale=1.0)",
'from': ['encoder'],
'n_out': 512,
'with_bias': False},
'decision': {'class': 'decide', 'from': ['output'], 'loss': 'edit_distance', 'loss_opts': {}, 'target': 'classes'},
'enc_01': {'class': 'copy', 'from': ['enc_01_ff_out']},
'enc_01_ff_conv1': {'activation': 'relu',
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', scale=1.0)",
'from': ['enc_01_ff_laynorm'],
'n_out': 2048,
'with_bias': True},
'enc_01_ff_conv2': {'activation': None,
'class': 'linear',
'dropout': 0.3,
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', scale=1.0)",
'from': ['enc_01_ff_conv1'],
'n_out': 512,
'with_bias': True},
'enc_01_ff_drop': {'class': 'dropout', 'dropout': 0.3, 'from': ['enc_01_ff_conv2']},
'enc_01_ff_laynorm': {'class': 'layer_norm', 'from': ['enc_01_self_att_out']},
'enc_01_ff_out': {'class': 'combine', 'from': ['enc_01_self_att_out', 'enc_01_ff_drop'], 'kind': 'add',
'n_out': 512},
'enc_01_self_att_att': {'attention_dropout': 0.3,
'attention_left_only': False,
'class': 'self_attention',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', scale=1.0)",
'from': ['enc_01_self_att_laynorm'],
'n_out': 512,
'num_heads': 8,
'total_key_dim': 512},
'enc_01_self_att_drop': {'class': 'dropout', 'dropout': 0.3, 'from': ['enc_01_self_att_lin']},
'enc_01_self_att_laynorm': {'class': 'layer_norm', 'from': ['source_embed']},
'enc_01_self_att_lin': {'activation': None,
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', scale=1.0)",
'from': ['enc_01_self_att_att'],
'n_out': 512,
'with_bias': False},
'enc_01_self_att_out': {'class': 'combine', 'from': ['source_embed', 'enc_01_self_att_drop'], 'kind': 'add',
'n_out': 512},
'encoder': {'class': 'layer_norm', 'from': ['enc_01']},
'output': {'class': 'rec',
'from': [],
'max_seq_len': "max_len_from('base:encoder') * 3",
'target': 'classes',
'unit': {'dec_01': {'class': 'copy', 'from': ['dec_01_ff_out']},
'dec_01_att0': {'base': 'base:dec_01_att_value', 'class': 'generic_attention',
'weights': 'dec_01_att_weights_drop'},
'dec_01_att_att': {'axes': ['dim:8', 'dim:64'], 'class': 'merge_dims', 'from': ['dec_01_att0']},
'dec_01_att_drop': {'class': 'dropout', 'dropout': 0.3, 'from': ['dec_01_att_lin']},
'dec_01_att_energy': {'class': 'dot',
'from': ['base:dec_01_att_key', 'dec_01_att_query'],
'red1': 'F', 'red2': 'F', 'var1': 'T', 'var2': 'T?'},
'dec_01_att_laynorm': {'class': 'layer_norm', 'from': ['dec_01_self_att_out']},
'dec_01_att_lin': {'activation': None,
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', "
'scale=1.0)',
'from': ['dec_01_att_att'],
'n_out': 512,
'with_bias': False},
'dec_01_att_out': {'class': 'combine', 'from': ['dec_01_self_att_out', 'dec_01_att_drop'], 'kind': 'add',
'n_out': 512},
'dec_01_att_query': {'axis': 'F', 'class': 'split_dims', 'dims': (8, 64), 'from': ['dec_01_att_query0']},
'dec_01_att_query0': {'activation': None,
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', "
'scale=1.0)',
'from': ['dec_01_att_laynorm'],
'n_out': 512,
'with_bias': False},
'dec_01_att_weights': {'axis': 'stag:extern_data:data',
'class': 'softmax_over_spatial',
'energy_factor': 0.125,
'from': ['dec_01_att_energy']},
'dec_01_att_weights_drop': {'class': 'dropout',
'dropout': 0.3,
'dropout_noise_shape': {'*': None},
'from': ['dec_01_att_weights']},
'dec_01_ff_conv1': {'activation': 'relu',
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', "
'scale=1.0)',
'from': ['dec_01_ff_laynorm'],
'n_out': 2048,
'with_bias': True},
'dec_01_ff_conv2': {'activation': None,
'class': 'linear',
'dropout': 0.3,
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', "
'scale=1.0)',
'from': ['dec_01_ff_conv1'],
'n_out': 512,
'with_bias': True},
'dec_01_ff_drop': {'class': 'dropout', 'dropout': 0.3, 'from': ['dec_01_ff_conv2']},
'dec_01_ff_laynorm': {'class': 'layer_norm', 'from': ['dec_01_att_out']},
'dec_01_ff_out': {'class': 'combine', 'from': ['dec_01_att_out', 'dec_01_ff_drop'], 'kind': 'add',
'n_out': 512},
'dec_01_self_att_att': {'attention_dropout': 0.3,
'attention_left_only': True,
'class': 'self_attention',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', "
"distribution='uniform', scale=1.0)",
'from': ['dec_01_self_att_laynorm'],
'n_out': 512,
'num_heads': 8,
'total_key_dim': 512},
'dec_01_self_att_drop': {'class': 'dropout', 'dropout': 0.3, 'from': ['dec_01_self_att_lin']},
'dec_01_self_att_laynorm': {'class': 'layer_norm', 'from': ['target_embed']},
'dec_01_self_att_lin': {'activation': None,
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', "
"distribution='uniform', scale=1.0)",
'from': ['dec_01_self_att_att'],
'n_out': 512,
'with_bias': False},
'dec_01_self_att_out': {'class': 'combine',
'from': ['target_embed', 'dec_01_self_att_drop'],
'kind': 'add',
'n_out': 512},
'decoder': {'class': 'layer_norm', 'from': ['dec_01']},
'end': {'class': 'compare', 'from': ['output'], 'value': 0},
'output': {'beam_size': 12, 'class': 'choice', 'from': ['output_prob'], 'initial_output': 0,
'target': 'classes'},
'output_prob': {'class': 'softmax',
'dropout': 0.0,
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', "
'scale=1.0)',
'from': ['decoder'],
'loss': 'ce',
'loss_opts': {'label_smoothing': 0.2, 'use_normalized_loss': True},
'reuse_params': {
'map': {'W': {'custom': (lambda reuse_layer, **kwargs: tf.transpose(reuse_layer.params["W"])),
'reuse_layer': 'target_embed_raw'},
'b': None}},
'target': 'classes',
'with_bias': True},
'target_embed': {'class': 'dropout', 'dropout': 0.0, 'from': ['target_embed_with_pos']},
'target_embed_raw': {'activation': None,
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', "
'scale=1.0)',
'from': ['prev:output'],
'n_out': 512,
'reuse_params': {'map': {'W': {'reuse_layer': 'base:source_embed_raw'}, 'b': None}},
'with_bias': False},
'target_embed_weighted': {'class': 'eval', 'eval': 'source(0) * 22.627417', 'from': ['target_embed_raw']},
'target_embed_with_pos': {'add_to_input': True, 'class': 'positional_encoding',
'from': ['target_embed_weighted']}}},
'source_embed': {'class': 'dropout', 'dropout': 0.0, 'from': ['source_embed_with_pos']},
'source_embed_raw': {'activation': None,
'class': 'linear',
'forward_weights_init': "variance_scaling_initializer(mode='fan_in', distribution='uniform', scale=1.0)",
'n_out': 512,
'with_bias': False, 'from': 'data:data'},
'source_embed_weighted': {'class': 'eval', 'eval': 'source(0) * 22.627417', 'from': ['source_embed_raw']},
'source_embed_with_pos': {'add_to_input': True, 'class': 'positional_encoding', 'from': ['source_embed_weighted']}}
config.update({
"num_outputs": n_out,
"num_inputs": n_in,
"network": net_dict})
with make_scope() as session:
print("Construct for training")
from returnn.tf.layers.rec import RecLayer, _SubnetworkRecCell
train_net = TFNetwork(config=config, train_flag=True)
train_net.construct_from_dict(config.typed_dict["network"])
with make_scope() as session:
print("Construct for search")
search_net = TFNetwork(config=config, train_flag=False, eval_flag=True, search_flag=True)
search_net.construct_from_dict(config.typed_dict["network"])


def test_SliceLayer_output_placeholder():
with make_scope() as session:
net = TFNetwork(extern_data=ExternData())
Expand Down