forked from GoogleCloudPlatform/tensorflow-without-a-phd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mnist_1.0_softmax.py
183 lines (145 loc) · 7.7 KB
/
mnist_1.0_softmax.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# encoding: UTF-8
# Copyright 2016 Google.com
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
import tensorflowvisu
import mnistdata
import math
print("Tensorflow version " + tf.__version__)
tf.set_random_seed(0)
# neural network with 1 layer of 10 softmax neurons
#
# · · · · · · · · · · (input data, flattened pixels) X [batch, 784] # 784 = 28 * 28
# \x/x\x/x\x/x\x/x\x/ -- fully connected layer (softmax) W [784, 10] b[10]
# · · · · · · · · Y [batch, 10]
# The model is:
#
# Y = softmax( X * W + b)
# X: matrix for 100 grayscale images of 28x28 pixels, flattened (there are 100 images in a mini-batch)
# W: weight matrix with 784 lines and 10 columns
# b: bias vector with 10 dimensions
# +: add with broadcasting: adds the vector to each line of the matrix (numpy)
# softmax(matrix) applies softmax on each line
# softmax(line) applies an exp to each value then divides by the norm of the resulting line
# Y: output matrix with 100 lines and 10 columns
# Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels)
mnist = mnistdata.read_data_sets("data", one_hot=True, reshape=False)
## /tensorflow-without-a-phd/tensorflow-mnist-tutorial/mnistdata.py
print(type(mnist))
## <class 'mnistdata.Mnist'>
# input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch
X = tf.placeholder(tf.float32, [None, 28, 28, 1])
X_1 = tf.placeholder(tf.float32, [None, 28, 28, 1])
print("---------X_1-----------",type(X_1))
## <class 'tensorflow.python.framework.ops.Tensor'>
# correct answers will go here
Y_ = tf.placeholder(tf.float32, [None, 10])
print("---------Y_-----------",type(Y_))
## <class 'tensorflow.python.framework.ops.Tensor'>
# weights W[784, 10] 784=28*28
W = tf.Variable(tf.zeros([784, 10]))
print("---------W-----------",type(W))
## <class 'tensorflow.python.ops.variables.Variable'>
"""
The weights being passed in as INITIAL Weights
"""
weights_1 = tf.Variable(tf.zeros([784, 10]))
print("The weights being passed in as INITIAL Weights ---->>",type(weights_1))
# <class 'tensorflow.python.ops.variables.Variable'>
print("The weights being passed in as INITIAL Weights ---->>",weights_1)
## <tf.Variable 'Variable_1:0' shape=(784, 10) dtype=float32_ref>
"""
The Python Class of the Weights Variable is ```# <class 'tensorflow.python.ops.variables.Variable'>``` .
If we were to Print() the Weights variable in the
terminal - we get to see ```<tf.Variable 'Variable_1:0' shape=(784, 10) dtype=float32_ref> ```.
We dont see a ARRAY of any sort ?? Why ?? Nothing ASSIGNED yet ??
"""
# biases b[10]
b = tf.Variable(tf.zeros([10]))
biases_1 = tf.Variable(tf.zeros([10]))
print("biases_1-----------",biases_1)
## <tf.Variable 'Variable_3:0' shape=(10,) dtype=float32_ref>
"""
# Source == https://stackoverflow.com/questions/37958706/in-tensorflow-what-is-the-difference-between-a-tensor-that-has-a-type-ending-in
# https://github.com/mrry?tab=repositories
In addition, variants of these types with the _ref suffix are defined for reference-typed tensors.
#
A reference-typed tensor is mutable. The most common way to create a reference-typed tensor
is to define a tf.Variable: defining a tf.Variable whose initial value has dtype tf.float32 will
create a reference-typed tensor with dtype tf.float32_ref.
You can mutate a reference-typed tensor by passing it as the first argument to tf.assign().
"""
print(" "*90)
# flatten the images into a single line of pixels
# -1 in the shape definition means "the only possible dimension that will preserve the number of elements"
XX = tf.reshape(X, [-1, 784])
print("type-----------XX---------",type(XX))
## <class 'tensorflow.python.framework.ops.Tensor'>
print("tf.reshape-----------XX---------",XX)
## Tensor("Reshape_2:0", shape=(?, 784), dtype=float32)
print(" "*90)
# The model
Y = tf.nn.softmax(tf.matmul(XX, W) + b)
print("type--------MODEl == Y-----",type(Y))
## <class 'tensorflow.python.framework.ops.Tensor'>
### DHANKAR --- cross-entropy ---- https://github.com/digital-cognition-co-in/Tensors_et_al/blob/master/README_tfNotes_.md
# loss function: cross-entropy = - sum( Y_i * log(Yi) )
# Y: the computed output vector
# Y_: the desired output vector
# cross-entropy
# log takes the log of each element, * multiplies the tensors element by element
# reduce_mean will add all the components in the tensor
# so here we end up with the total cross-entropy for all images in the batch
cross_entropy = -tf.reduce_mean(Y_ * tf.log(Y)) * 1000.0 # normalized for batches of 100 images,
# *10 because "mean" included an unwanted division by 10
# accuracy of the trained model, between 0 (worst) and 1 (best)
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# training, learning rate = 0.005
train_step = tf.train.GradientDescentOptimizer(0.005).minimize(cross_entropy)
# matplotlib visualisation
allweights = tf.reshape(W, [-1])
allbiases = tf.reshape(b, [-1])
I = tensorflowvisu.tf_format_mnist_images(X, Y, Y_) # assembles 10x10 images by default
It = tensorflowvisu.tf_format_mnist_images(X, Y, Y_, 1000, lines=25) # 1000 images on 25 lines
datavis = tensorflowvisu.MnistDataVis()
# init
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# You can call this function in a loop to train the model, 100 images at a time
def training_step(i, update_test_data, update_train_data):
# training on batches of 100 images with 100 labels
batch_X, batch_Y = mnist.train.next_batch(100)
# compute training values for visualisation
if update_train_data:
a, c, im, w, b = sess.run([accuracy, cross_entropy, I, allweights, allbiases], feed_dict={X: batch_X, Y_: batch_Y})
datavis.append_training_curves_data(i, a, c)
datavis.append_data_histograms(i, w, b)
datavis.update_image1(im)
print(str(i) + ": accuracy:" + str(a) + " loss: " + str(c))
# compute test values for visualisation
if update_test_data:
a, c, im = sess.run([accuracy, cross_entropy, It], feed_dict={X: mnist.test.images, Y_: mnist.test.labels})
datavis.append_test_curves_data(i, a, c)
datavis.update_image2(im)
print(str(i) + ": ********* epoch " + str(i*100//mnist.train.images.shape[0]+1) + " ********* test accuracy:" + str(a) + " test loss: " + str(c))
# the backpropagation training step
sess.run(train_step, feed_dict={X: batch_X, Y_: batch_Y})
datavis.animate(training_step, iterations=2000+1, train_data_update_freq=10, test_data_update_freq=50, more_tests_at_start=True)
# to save the animation as a movie, add save_movie=True as an argument to datavis.animate
# to disable the visualisation use the following line instead of the datavis.animate line
# for i in range(2000+1): training_step(i, i % 50 == 0, i % 10 == 0)
print("max test accuracy: " + str(datavis.get_max_test_accuracy()))
# final max test accuracy = 0.9268 (10K iterations). Accuracy should peak above 0.92 in the first 2000 iterations.