-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.py
124 lines (105 loc) · 4.75 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# Jacob Morrison
import tensorflow as tf
import numpy as np
import data_helpers
import os
import time
import datetime
# parameters
learning_rate = 0.001
training_iters = 500000
batch_size = 64
display_step = 10
# network parameters
n_input = 20 # truncate sentences (pad sentences with <PAD> tokens if less than this, cut off if larger)
sen_dim = 300
n_classes = 2 # 15 total senses
dropout = 0.70 # dropout probability
# tf graph input
x1 = tf.placeholder(tf.float32, [None, sen_dim])
x2 = tf.placeholder(tf.float32, [None, sen_dim])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32) # dropout (keep probability)
# Store layers weight & bias
weights = {
# 5x5 conv, 1 input, 32 outputs
'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32],dtype=tf.float32)),
# 5x5 conv, 32 inputs, 64 outputs
'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64],dtype=tf.float32)),
# fully connected, 7*7*64 inputs, 64 outputs
'wd1': tf.Variable(tf.random_normal([104*256, 64],dtype=tf.float32)),
# 128 inputs, 15 outputs (class prediction)
'out': tf.Variable(tf.random_normal([300, n_classes],dtype=tf.float32))
}
biases = {
'bc1': tf.Variable(tf.random_normal([32],dtype=tf.float32)),
'bc2': tf.Variable(tf.random_normal([64],dtype=tf.float32)),
'bd1': tf.Variable(tf.random_normal([64],dtype=tf.float32)),
'out': tf.Variable(tf.random_normal([n_classes],dtype=tf.float32))
}
# need to get a prediction for each sentence
# get the vector representation of each word
#pred1 = np.mean(x1, axis=1)#conv_net(x1, weights, biases, keep_prob)
#pred2 = np.mean(x2, axis=1)#conv_net(x2, weights, biases, keep_prob)
# concatenate both representations
#out = tf.concat(1, [x1, x2])#[pred1, pred2])
out = x2
# predict the relation class
pred = tf.add(tf.batch_matmul(out, weights['out']), biases['out'])
print(tf.shape(pred))
# define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# initializing all variables
init = tf.global_variables_initializer()
# launch the graph
saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
tf.add_to_collection('accuracy', accuracy)
tf.add_to_collection('x1', x1)
tf.add_to_collection('x2', x2)
tf.add_to_collection('y', y)
tf.add_to_collection('keep_prob', keep_prob)
with tf.Session() as sess:
sess.run(init)
step = 1
sentences1, sentences2, labels = data_helpers.load_labels_and_data('../discourse-cnn/Data/GoogleNews-vectors-negative300.bin', './A5.train_set.labeled', True)
# keep training until we reach max iterations
print(len(sentences1))
while step * batch_size < training_iters:
start = (step * batch_size) % len(sentences1)
end = ((step + 1) * batch_size) % len(sentences1)
if end < start:
end = len(sentences1)
batch_x1 = sentences1[start : end]
batch_x2 = sentences2[start : end]
batch_y = labels[start : end]
#print(len(batch_x1))
#print(len(batch_x2))
#print(len(batch_y))
sess.run(optimizer, feed_dict={x1: batch_x1, x2: batch_x2, y: batch_y, keep_prob: dropout})
if step % display_step == 0:
#calculate batch loss and accuracy
loss, acc = sess.run([cost, accuracy], feed_dict={x1: batch_x1, x2: batch_x2, y: batch_y, keep_prob: 1.})
print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc)
step += 1
print "Training finished!"
#then save model
timestamp = str(int(time.time()))
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
# Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
checkpoint_prefix = os.path.join(checkpoint_dir, "model")
if not os.path.exists(checkpoint_dir):
os.makedirs(checkpoint_dir)
path = saver.save(sess, checkpoint_prefix, global_step=1)
print("Saved model checkpoint to {}\n".format(path))
accc = sess.run(accuracy, feed_dict={x1: sentences1, x2: sentences2, y: labels, keep_prob: 1.})
print("testing accuracy on training set: " + str(accc))
sentences12, sentences22, labels2 = data_helpers.load_labels_and_data('../discourse-cnn/Data/GoogleNews-vectors-negative300.bin', './A5.dev_set.labeled', True)
accc = sess.run(accuracy, feed_dict={x1: sentences12, x2: sentences22, y: labels2, keep_prob: 1.})
print("testing accuracy on dev set: " + str(accc))