tensorflow library
please help me to solve my assignment asap!
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
“””
@author: tarodz
“””
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
import tensorflow as tf;
#for reproducibility between runs
np.random.seed(123)
def makeFakeDataset():
sCnt=10000;
sCnt2=2000;
numberOfFeatures=784;
# true parameters w and b
true_w=np.zeros((numberOfFeatures,1))
true_w[0]=-0.5;
true_w[1]=1.3;
true_w[2]=1.3;
true_b=-0.3;
# sample some random point in feature space
X=np.random.randn(sCnt+sCnt2,numberOfFeatures).astype(dtype=’float32′);
# calculate u=w^Tx+b
true_u = np.dot(X,true_w) + true_b;
# add gaussian noise
Y=true_u + 0.01*np.random.randn(sCnt+sCnt2,1);
# split into training and test set
return X[0:sCnt,:],Y[0:sCnt,:],X[sCnt:sCnt+sCnt2,:],Y[sCnt:sCnt+sCnt2,:];
## we use the dataset with x_train being the matrix “n by fCnt”
## with samples as rows, and the features as columns
## y_train is the true value of dependent variable, we have it as a matrix “n by 1″
x_train,y_train,x_test,y_test=makeFakeDataset()
n_train=x_train.shape[0];
fCnt=x_train.shape[1];
#### START OF LEARNING
#number of epchos. 1 epoch is when all training data is seen
n_epochs=100;
#number of samples to present as micro-batch
#could be just n_train
#or if dataset is large, could be a small chunk of if
batch_size=128;
#^^^ says: present the training set in chunks (micro-batches) of 128 samples
## define variables for tensorflow
##define and initialize shared variables
## (the variable persist, they encode the state of the classifier throughout learning via gradient descent)
# w is the feature weights, a [fCnt x 1] vector
initialW=np.random.rand(fCnt,1).astype(dtype=’float32′);
w = tf.Variable(initialW,name=”w”);
# b is the bias, so just a single number
initialB=0.0
b = tf.Variable(initialB,name=”b”);
## define non-shared/placeholder variable types
# x will be our [#samples x #features] matrix of all training samples
x = tf.placeholder(dtype=tf.float32,name=’x’);
# y will be our vector of dependent variable for all training samples
y = tf.placeholder(dtype=tf.float32,name=’y’)
## set up new variables that are functions/transformations of the above
#predicted class for each sample (a vector)
#tf.matmul(x,w) is a vector with #samples entries
# even though b is just a number, + will work (through “broadcasting”)
# b will be “replicated” #samples times to make both sides of + have same dimension
#thre result is a vector with #samples entries
predictions=tf.matmul(x,w)+b
#loss (square error of prediction) for each sample (a vector)
loss=tf.square(y-predictions)
#risk over all samples (a number)
risk=tf.reduce_mean(loss);
#define which optimizer to use
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(risk)
# create a tensorflow session and initialize the variables
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# calculate and print Mean Squared Error on the full test set, using initial (random) w,b
y_pred=sess.run([predictions],feed_dict={x: x_test, y: y_test})[0];
MSE=np.mean(np.square(y_pred-y_test),axis=0);
print(MSE)
#start the iterations of training
#1 epoch == all data samples were presented
for i in range(0,n_epochs):
#if dataset is large, we want to present it in chunks (called micro-batches)
for j in range(0,n_train,batch_size):
jS=j;jE=min(n_train,j+batch_size);
x_batch=x_train[jS:jE,:];
y_batch=y_train[jS:jE,:];
#do a step of gradient descent on the micro-batch
_,curr_batch_risk,predBatchY=sess.run([train,risk,predictions],feed_dict={x: x_batch, y: y_batch});
# training done in this epoch
# but, just so that the user can monitor progress, try current w,b on full test set
y_pred,curr_w,curr_b=sess.run([predictions,w,b],feed_dict={x: x_test, y: y_test});
# calculate and print Mean Squared Error
MSE=np.mean(np.mean(np.square(y_pred-y_test),axis=1),axis=0);
print(MSE)
print(np.transpose(curr_w))
print(curr_b)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
“””
@author: tarodz
“””
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
import tensorflow as tf;
minimum=[-.25,2]
# THIS SCRIPT PERFORMS PROJECTED GRADIENT DESCENT ON FUNCTION F,
# ASSUMING Q(the feasible region) is w1>=0, w2>=0
def f(w):
shiftedW=w-np.array(minimum);
return tf.reduce_sum(tf.multiply(shiftedW,shiftedW));
#define starting value of W for gradient descent
#here, W is a 2D vector
initialW=np.random.randn(2)
#create a shared variable (i.e. a variable that persists between calls to a tensorflow function)
w = tf.Variable(initialW,name=”w”);
#define output of applying f to w
#out goal will be to minimize f(w), i.e. find w with lowest possible f(w)
z=f(w);
# if you want more accurate result, replace step size 0.01 with something smaller
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(z)
#initialize tensorflow session
sess = tf.Session()
sess.run(tf.global_variables_initializer())
with sess:
# hard-coded number of steps, could be too little, may need to be increased
for i in range(300):
#perform gradient step
train.run();
#get the numpy vector with current value of w
w_value=w.eval();
# run proximal operator (here it’s simple, just replace negative values with 0)
new_w_value=np.maximum(w_value,0);
print((w_value,new_w_value))
# update tensorflow value using numpy value
new_w_assign = tf.assign(w,new_w_value);
sess.run(new_w_assign);
#sess.close()
print(“True minimum: “+str(np.maximum(minimum,0)));
print(“Found minimum:”+str(new_w_value));
CMSC 510 – Fall 2020
Homework Assignment 3
Announced: 10/6
Due: Tuesday, 10/27, noon
The problem
• Implement and test:
• Logistic regression (LR) with L1 regularization
• LR is differentiable
• But L1 norm is not
• Use proximal gradient descent
• For L1 norm, that’s soft-thresholding
• Use tensorflow library
• Dataset – the same as in HW2:
• Classify two digits from MNIST dataset
Hints about tensorflow
• See: tensorflow_minimizeF.py
• Performs projected gradient descent on a simple
function
• The function has global minimum at
• w1=-0.25, w2=2
• But the feasible set Q is: w1>=0, w2>=0
• For this function, the best solution is w1=0, w2=2
• The code does the following, in a loop:
• Gradient step on the function, followed up by proximal step
• Here, the proximal step is just “make w nonnegative” by
replacing negative values with 0, the closest non-negative value
• Feasible set Q is set of all vectors with nonnegative
coordinates, i.e., for 2D, w1>=0, w2>=0
• In your actual code, you should use soft-thresholding
instead
Hints about tensorflow
• See: tensorflow_leastSquares.py
• Performs gradient descent on a function based on data
• We have some fake data x,y, where
y=w*x+b+small_gaussian_noise
• The code tries to find best wbest, bbest that predict y
• It uses the loss: (y-ypredicted)2
• ypredicted = wbest*x + bbest
• In your code:
• x,y will be taken from the MNIST dataset
• the loss should be logistic loss
• you need to add the proximal step / soft-thresholding
• Constant L is unknown, you should try several gradient step sizes
• Constant in front of L1 penalty is unknown, you should try several values
Returning the Assignment
n Solution code should be written by you and you
only (no web/book/friend/etc. code)
n You can freely use the code provided on BB as your
starting point
n Upload through Blackboard
n A report in PDF
n Results of tests of the method on MNIST dataset, for decreasing training set
sizes (include you V#, and what are your two digits defining the two-class
problem).
n Code in python for solving the MNIST classification problem (for
full size of the training set):
n The file should have your name
in a comment at the top