vgg16 in pytorch got size error

Question

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, transforms, models
import time
import argparse
import os

batch_size = 64

train_dataset = datasets.CIFAR10(root='./data/cifar10/',
                                 train=True,
                                 transform=transforms.ToTensor(),
                                 download=True)
test_dataset = datasets.CIFAR10(root='./data/cifar10/',
                                train=False,
                                transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


class Vgg16(nn.Module):
    def __init__(self, num_classes=10):
        super(Vgg16, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2, dilation=1),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2, dilation=1),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2, dilation=1),

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2, dilation=1),

            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2, dilation=1)
        )
        self.classifier = nn.Sequential(
            nn.Linear(25088, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return F.softmax(x)


model = Vgg16()

# print(model)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss().cuda()


if torch.cuda.device_count() > 0:
    # os.environ["CUDA_VISIBLE_DEVICES"]= '0'
    print("USE", torch.cuda.device_count(), "GPUs!")
    model = nn.DataParallel(model)
else:
    print("USE ONLY CPU!")

if torch.cuda.is_available():
    model.cuda()


def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if torch.cuda.is_available():
            data, target = Variable(data.cuda()), Variable(target.cuda())
        else:
            data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        # loss = F.nll_loss(output, target)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))


def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if torch.cuda.is_available():
            data, target = Variable(data.cuda(), volatile=True), Variable(target.cuda())
        else:
            data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target, size_average=False).data[0]
        pred = output.data.max(1, keepdim=True)[1]  # [0] : value, [1]: index
        correct += pred.eq(target.data.view_as(pred)).sum()
        test_loss /= len(test_loader.dataset)
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))


for epoch in range(0, 200):
    train(epoch)
    test()

When I run this codes.. this error occured.

-> RuntimeError: size mismatch at /pytorch/torch/lib/THC/generic/THCTensorMathBlas.cu:243

When I print(model) in other vgg codes in pytorch, There is a 25088 input size of FC layer... So, I tried to set this parameter 25088,There is size mismatch error. When I change this input_size from 25088 to 512, there is no error but training is not works well.(Never changed loss in training process and Always have 10% accuracy in test process) So I think this input size of FC layer is the problem.. What can I do in this situation? Thanks in advance;

how is your data preprocessing done? I suspect you are feeding the network with an image with wrong size — Manuel Lagunas
– Manuel Lagunas, Commented Feb 12, 2018 at 9:06
@ManuelLagunas I just use DataLoader in pytorch. I upload my full codes. — Nazzzz
– Nazzzz, Commented Feb 13, 2018 at 4:41

Manuel Lagunas · Accepted Answer · 2018-02-13 11:19:56Z

I identified the problem. When you define the classifier, you are defining a fully-connected layer nn.Linear(25088, 4096) while the output from the convolutional part after doing x = x.view(x.size(0), -1) is (batch_size, 512). To match the size of the output of the convolutional part with the beginning of the classifier, you should change the classifier definition to:.

self.classifier = nn.Sequential(
        nn.Linear(512, 128),
        nn.ReLU(True),
        nn.Dropout(),
        nn.Linear(128, 128),
        nn.ReLU(True),
        nn.Dropout(),
        nn.Linear(128, num_classes)
)

Like this, it should work.

Note that I put 128 out_features in each fully-connected but you can change those numbers as you prefer.

Also adding the BatchNorm layer after the convolution might help the training to converge

class Vgg16(nn.Module):
    def __init__(self, num_classes=10):
        super(Vgg16, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2, dilation=1),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2, dilation=1),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2, dilation=1),

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2, dilation=1),

            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2, dilation=1)
        )
        self.classifier = nn.Linear(512, 10)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

thanks for your comment but, as I mentioned I tried to change from 25088 to 512. then training is working without any errors. but training is not works fine. In training process, loss is not decrease(always 4.xxxxx) and in test process auccracy is always 10%... t.t
Thanks! there is no problem with the structure i build. After I add BatchNorm, it works fine.

Gadosey P. · Accepted Answer · 2018-02-13 04:46:59Z

0

Your problem is here : def init(self, num_classes=1000):

change num_classes=10

Cifar10 only has 10 classes, 1000 is meant for the imagenet dataset

answered Feb 13, 2018 at 4:46

Gadosey P.

859 bronze badges

2 Comments

Nazzzz Over a year ago

sorry I changed that 1000 to 10 for CIFAR10, but It can help this problem..I think the issue is in self.classfifier = nn.Sequential( nn.Linear(512,4096),... which one is right as input_channel 512 or 512*7*7...

Gadosey P. Over a year ago

for cifar 10 , try 512*2*2

takethelongsh0t · Accepted Answer · 2018-05-23 16:43:14Z

-1

You need to commit a forward pass in your train loop if you are not having any accuracy updates.

CNN require a forward pass followed by back propagation to successfully train.

instead of:

output = model(data)

Try:

output = model.forward(data)

Let me know how that goes.

answered May 23, 2018 at 16:43

takethelongsh0t

561 gold badge1 silver badge11 bronze badges

Collectives™ on Stack Overflow

vgg16 in pytorch got size error

3 Answers 3

2 Comments

2 Comments

Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

3 Answers 3

2 Comments

2 Comments

Comments

Your Answer

Sign up or log in

Post as a guest

Related