2

I am training the "Show and tell" model using tensorflow in which the model automatically generates the captions of the images. How ever I am getting this error.

This is the traceback:

TypeError                                 Traceback (most recent call 
last)
<ipython-input-15-b6da0a27b701> in <module>()
  1 try:
  2     #train(.001,False,False) #train from scratch
----> 3     train(.001,True,True)    #continue training from pretrained weights @epoch500
  4     #train(.001)  #train from previously saved weights
  5 except KeyboardInterrupt:

<ipython-input-14-39693d0edd0a> in train(learning_rate, continue_training, transfer)
 23     n_words = len(wordtoix)
 24     maxlen = np.max( [x for x in map(lambda x: len(x.split(' ')), captions) ] )
---> 25     caption_generator = Caption_Generator(dim_in, dim_hidden, dim_embed, batch_size, maxlen+2, n_words, init_b)
 26 
 27     loss, image, sentence, mask = caption_generator.build_model()

<ipython-input-12-7ef491a16183> in __init__(self, dim_in, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, init_b)
 11         # declare the variables to be used for our word embeddings
 12         with tf.device("/cpu:0"):
---> 13             self.word_embedding = tf.get_variable("word_embedding", tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1))
 14 
 15             self.embedding_bias = tf.get_variable("embedding_bias", tf.zeros([dim_embed]))

/home/niraj/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in get_variable(name, shape, dtype, initializer, regularizer, trainable, collections, caching_device, partitioner, validate_shape, use_resource, custom_getter)
1063       collections=collections, caching_device=caching_device,
1064       partitioner=partitioner, validate_shape=validate_shape,
-> 1065       use_resource=use_resource, custom_getter=custom_getter)
1066 get_variable_or_local_docstring = (
1067     """%s

/home/niraj/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in get_variable(self, var_store, name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device, partitioner, validate_shape, use_resource, custom_getter)
960           collections=collections, caching_device=caching_device,
961           partitioner=partitioner, validate_shape=validate_shape,
--> 962           use_resource=use_resource, custom_getter=custom_getter)
963 
964   def _get_partitioned_variable(self,

/home/niraj/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in get_variable(self, name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device, partitioner, validate_shape, use_resource, custom_getter)
365           reuse=reuse, trainable=trainable, collections=collections,
366           caching_device=caching_device, partitioner=partitioner,
--> 367           validate_shape=validate_shape, use_resource=use_resource)
368 
369   def _get_partitioned_variable(

/home/niraj/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in _true_getter(name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device, partitioner, validate_shape, use_resource)
301                      trainable=True, collections=None, caching_device=None,
302                      partitioner=None, validate_shape=True, use_resource=None):
--> 303       is_scalar = shape is not None and not shape
304       # Partitioned variable case
305       if partitioner is not None and not is_scalar:

/home/niraj/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in __nonzero__(self)
511       `TypeError`.
512     """
--> 513     raise TypeError("Using a `tf.Tensor` as a Python `bool` is not allowed. "
514                     "Use `if t is not None:` instead of `if t:` to test if a "
515                     "tensor is defined, and use TensorFlow ops such as "

TypeError: Using a tf.Tensor as a Python bool is not allowed. Use if t is not None: instead of if t: to test if a tensor is defined, and use TensorFlow ops such as tf.cond to execute subgraphs conditioned on the value of a tensor.

Here is the code:

def preProBuildWordVocab(sentence_iterator, word_count_threshold=30): # function from Andre Karpathy's NeuralTalk
print('preprocessing %d word vocab' % (word_count_threshold, ))
word_counts = {}
nsents = 0
for sent in sentence_iterator:
  nsents += 1
  for w in sent.lower().split(' '):
    word_counts[w] = word_counts.get(w, 0) + 1
vocab = [w for w in word_counts if word_counts[w] >= word_count_threshold]
print('preprocessed words %d -> %d' % (len(word_counts), len(vocab)))


ixtoword = {}
ixtoword[0] = '.'  
wordtoix = {}
wordtoix['#START#'] = 0 
ix = 1
for w in vocab:
  wordtoix[w] = ix
  ixtoword[ix] = w
  ix += 1

word_counts['.'] = nsents
bias_init_vector = np.array([1.0*word_counts[ixtoword[i]] for i in ixtoword])
bias_init_vector /= np.sum(bias_init_vector) 
bias_init_vector = np.log(bias_init_vector)
bias_init_vector -= np.max(bias_init_vector) 
return wordtoix, ixtoword, bias_init_vector.astype(np.float32)

class Caption_Generator():
def __init__(self, dim_in, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, init_b):

    self.dim_in = dim_in
    self.dim_embed = dim_embed
    self.dim_hidden = dim_hidden
    self.batch_size = batch_size
    self.n_lstm_steps = n_lstm_steps
    self.n_words = n_words

    # declare the variables to be used for our word embeddings
    with tf.device("/cpu:0"):
        self.word_embedding = tf.get_variable("word_embedding", tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1))

        self.embedding_bias = tf.get_variable("embedding_bias", tf.zeros([dim_embed]))

    # declare the LSTM itself
        self.lstm = tf.contrib.rnn.BasicLSTMCell(dim_hidden)

    # declare the variables to be used to embed the image feature embedding to the word embedding space
        self.img_embedding = tf.get_variable("img_embedding", tf.random_uniform([dim_in, dim_hidden], -0.1, 0.1))
        self.img_embedding_bias = tf.get_variable("img_embedding_bias", tf.zeros([dim_hidden]))

    # declare the variables to go from an LSTM output to a word encoding output
        self.word_encoding = tf.get_variable("word_encoding", tf.random_uniform([dim_hidden, n_words], -0.1, 0.1))
    # initialize this bias variable from the preProBuildWordVocab output
        self.word_encoding_bias = tf.get_variable("word_encoding_bias", init_b)

def build_model(self):
    # declaring the placeholders for our extracted image feature vectors, our caption, and our mask
    # (describes how long our caption is with an array of 0/1 values of length `maxlen`  
    img = tf.placeholder(tf.float32, [self.batch_size, self.dim_in])
    caption_placeholder = tf.placeholder(tf.int32, [self.batch_size, self.n_lstm_steps])
    mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps])

    # getting an initial LSTM embedding from our image_imbedding
    image_embedding = tf.matmul(img, self.img_embedding) + self.img_embedding_bias

    # setting initial state of our LSTM
    state = self.lstm.zero_state(self.batch_size, dtype=tf.float32)

    total_loss = 0.0
    with tf.variable_scope("RNN"):
        for i in range(self.n_lstm_steps): 
            if i > 0:
               #if this isn’t the first iteration of our LSTM we need to get the word_embedding corresponding
               # to the (i-1)th word in our caption 
                with tf.device("/cpu:0"):
                    current_embedding = tf.nn.embedding_lookup(self.word_embedding, caption_placeholder[:,i-1]) + self.embedding_bias
            else:
                 #if this is the first iteration of our LSTM we utilize the embedded image as our input 
                current_embedding = image_embedding
            if i > 0: 
                # allows us to reuse the LSTM tensor variable on each iteration
                tf.get_variable_scope().reuse_variables()

                out, state = self.lstm(current_embedding, state)
                    #out, state = self.tf.nn.dynamic_rnn(current_embedding, state)


            if i > 0:
                #get the one-hot representation of the next word in our caption 
                labels = tf.expand_dims(caption_placeholder[:, i], 1)
                ix_range=tf.range(0, self.batch_size, 1)
                ixs = tf.expand_dims(ix_range, 1)
                concat = tf.concat([ixs, labels],1)
                onehot = tf.sparse_to_dense(
                concat, tf.stack([self.batch_size, self.n_words]), 1.0, 0.0)


                #perform a softmax classification to generate the next word in the caption
                logit = tf.matmul(out, self.word_encoding) + self.word_encoding_bias
                xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=onehot)
                xentropy = xentropy * mask[:,i]

                loss = tf.reduce_sum(xentropy)
                total_loss += loss

        total_loss = total_loss / tf.reduce_sum(mask[:,1:])
        return total_loss, img,  caption_placeholder, mask

### Parameters ###
dim_embed = 256
dim_hidden = 256
dim_in = 4096
batch_size = 128
momentum = 0.9
n_epochs = 150

def train(learning_rate=0.001, continue_training=False, transfer=True):

tf.reset_default_graph()

feats, captions = get_data(annotation_path, feature_path)
wordtoix, ixtoword, init_b = preProBuildWordVocab(captions)

np.save('data/ixtoword', ixtoword)

index = (np.arange(len(feats)).astype(int))
np.random.shuffle(index)


sess = tf.InteractiveSession()
n_words = len(wordtoix)
maxlen = np.max( [x for x in map(lambda x: len(x.split(' ')), captions) ] )
caption_generator = Caption_Generator(dim_in, dim_hidden, dim_embed, batch_size, maxlen+2, n_words, init_b)

loss, image, sentence, mask = caption_generator.build_model()

saver = tf.train.Saver(max_to_keep=100)
global_step=tf.Variable(0,trainable=False)
learning_rate = tf.train.exponential_decay(learning_rate, global_step,
                                   int(len(index)/batch_size), 0.95)
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
tf.global_variables_initializer().run()

if continue_training:
    if not transfer:
        saver.restore(sess,tf.train.latest_checkpoint(model_path))
    else:
        saver.restore(sess,tf.train.latest_checkpoint(model_path_transfer))
losses=[]
for epoch in range(n_epochs):
    for start, end in zip( range(0, len(index), batch_size), range(batch_size, len(index), batch_size)):

        current_feats = feats[index[start:end]]
        current_captions = captions[index[start:end]]
        current_caption_ind = [x for x in map(lambda cap: [wordtoix[word] for word in cap.lower().split(' ')[:-1] if word in wordtoix], current_captions)]

        current_caption_matrix = sequence.pad_sequences(current_caption_ind, padding='post', maxlen=maxlen+1)
        current_caption_matrix = np.hstack( [np.full( (len(current_caption_matrix),1), 0), current_caption_matrix] )

        current_mask_matrix = np.zeros((current_caption_matrix.shape[0], current_caption_matrix.shape[1]))
        nonzeros = np.array([x for x in map(lambda x: (x != 0).sum()+2, current_caption_matrix )])

        for ind, row in enumerate(current_mask_matrix):
            row[:nonzeros[ind]] = 1

        _, loss_value = sess.run([train_op, loss], feed_dict={
            image: current_feats.astype(np.float32),
            sentence : current_caption_matrix.astype(np.int32),
            mask : current_mask_matrix.astype(np.float32)
            })

        print("Current Cost: ", loss_value, "\t Epoch {}/{}".format(epoch, n_epochs), "\t Iter {}/{}".format(start,len(feats)))
    print("Saving the model from epoch: ", epoch)
    saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch)

1 Answer 1

2

The problem stems from passing a tf.Tensor as the shape argument of tf.get_variable(name, shape=None, ...) on this line:

self.word_embedding = tf.get_variable("word_embedding", tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1))

I suspect you meant to pass the random tensor as the initializer argument. The easiest way to fix this is by specifying a name for the argument:

self.word_embedding = tf.get_variable(
    "word_embedding",
    initializer=tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1))

It looks like all of your calls to tf.get_variable() will need a similar fix.

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.