-
Notifications
You must be signed in to change notification settings - Fork 0
/
seq2seq_lib.py
132 lines (112 loc) · 5.39 KB
/
seq2seq_lib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import tensorflow as tf
# Adapted to support sampled_softmax loss function, which accepts activations instead of logits.
def sequence_loss_by_example(inputs, targets, weights, loss_function, average_across_timesteps=True, name=None):
"""Sampled softmax loss for a sequence of inputs (per example).
Args:
inputs: List of 2D Tensors of shape [batch_size x hid_dim].
targets: List of 1D batch-sized int32 Tensors of the same length as logits.
weights: List of 1D batch-sized float-Tensors of the same length as logits.
loss_function: Sampled softmax function (inputs, labels) -> loss
average_across_timesteps: If set, divide the returned cost by the total label weight.
name: Optional name for this operation, default: 'sequence_loss_by_example'.
Returns:
1D batch-sized float Tensor: The log-perplexity for each sequence.
Raises:
ValueError: If len(inputs) is different from len(targets) or len(weights).
Information:
perplexity is a measurement of how well a probability distribution or probability
model predicts a sample.
It may be used to compare probability models.
A low perplexity indicates the probability distribution is good at predicting the sample.
"""
if len(targets) != len(inputs) or len(weights) != len(inputs):
raise ValueError('Lengths of logits, weights, and targets must be the same '
'%d, %d, %d.' % (len(inputs), len(weights), len(targets)))
with tf.name_scope(name, 'sequence_loss_by_example', inputs + targets + weights):
log_perp_list = []
for inp, target, weight in zip(inputs, targets, weights):
crossent = loss_function(inp, target)
log_perp_list.append(crossent * weight)
log_perps = tf.add_n(log_perp_list)
if average_across_timesteps:
total_size = tf.add_n(weights)
total_size += 1e-12
# Just to avoid division by 0 for all-0 weights.
log_perps /= total_size
return log_perps
def sampled_sequence_loss(inputs, targets, weights, loss_function, average_across_timesteps=True,
average_across_batch=True, name=None):
"""Weighted cross-entropy loss for a sequence of logits, batch-collapsed.
Args:
inputs: List of 2D Tensors of shape [batch_size x hid_dim].
targets: List of 1D batch-sized int32 Tensors of the same length as inputs.
weights: List of 1D batch-sized float-Tensors of the same length as inputs.
loss_function: Sampled softmax function (inputs, labels) -> loss
average_across_timesteps: If set, divide the returned cost by the total
label weight.
average_across_batch: If set, divide the returned cost by the batch size.
name: Optional name for this operation, defaults to 'sequence_loss'.
Returns:
A scalar float Tensor: The average log-perplexity per symbol (weighted).
Raises:
ValueError: If len(inputs) is different from len(targets) or len(weights).
Information:
'x' is [[1, 1, 1],[1, 1, 1]]
tf.reduce_sum(x) ==> 6
"""
with tf.name_scope(name, 'sequence_loss_by_example', inputs + targets + weights):
cost = tf.reduce_sum(sequence_loss_by_example(inputs, targets, weights, loss_function,
average_across_timesteps=average_across_timesteps))
if average_across_batch:
batch_size = tf.shape(targets[0])[0]
cost /= tf.cast(batch_size, tf.float32)
return cost
# def linear(args, output_size, bias, bias_start=0.0, scope=None):
# """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
#
# Args:
# args: a 2D Tensor or a list of 2D, batch x n, Tensors.
# output_size: int, second dimension of W[i].
# bias: boolean, whether to add a bias term or not.
# bias_start: starting value to initialize the bias; 0 by default.
# scope: VariableScope for the created subgraph; defaults to "Linear".
#
# Returns:
# A 2D Tensor with shape [batch x output_size] equal to
# sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
#
# Raises:
# ValueError: if some of the arguments has unspecified or wrong shape.
# """
#
# if args is None or (isinstance(args, (list, tuple)) and not args):
# raise ValueError('`args` must be specified')
# if not isinstance(args, (list, tuple)):
# args = [args]
#
# # Calculate the total size of arguments on dimension 1.
#
# total_arg_size = 0
# shapes = [a.get_shape().as_list() for a in args]
# for shape in shapes:
# if len(shape) != 2:
# raise ValueError('Linear is expecting 2D arguments: %s' % str(shapes))
# if not shape[1]:
# raise ValueError('Linear expects shape[1] of arguments: %s' % str(shapes))
# else:
# total_arg_size += shape[1]
#
# # Now the computation.
#
# with tf.variable_scope(scope or 'Linear'):
# matrix = tf.get_variable('Matrix', [total_arg_size, output_size])
# if len(args) == 1:
# res = tf.matmul(args[0], matrix)
# else:
# res = tf.matmul(tf.concat(1, args), matrix)
# if not bias:
# return res
# bias_term = tf.get_variable(
# 'Bias', [output_size],
# initializer=tf.constant_initializer(bias_start))
# return res + bias_term