In [1]:
# Pytorch Implementation of LeNet, from https://medium.datadriveninvestor.com/architecture-implementation-of-lenet-from-scratch-in-pytorch-709cc38c00a9

import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

from torchsummaryX import summary as summaryX
from torchsummaryX import summary

In [2]:
class LeNet(nn.Module):
  def __init__(self):
    super(LeNet, self).__init__()

    self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 6, 
                           kernel_size = 5, stride = 1, padding = 0)
    self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 16, 
                           kernel_size = 5, stride = 1, padding = 0)
    self.conv3 = nn.Conv2d(in_channels = 16, out_channels = 120, 
                           kernel_size = 5, stride = 1, padding = 0)
    self.linear1 = nn.Linear(120, 84)
    self.linear2 = nn.Linear(84, 10)
    self.tanh = nn.Tanh()
    self.avgpool = nn.AvgPool2d(kernel_size = 2, stride = 2)

  def forward(self, x):
    x = self.conv1(x)
    x = self.tanh(x)
    x = self.avgpool(x)
    x = self.conv2(x)
    x = self.tanh(x)
    x = self.avgpool(x)
    x = self.conv3(x)
    x = self.tanh(x)
    
    x = x.reshape(x.shape[0], -1)
    x = self.linear1(x)
    x = self.tanh(x)
    x = self.linear2(x)
    return x

model = LeNet()
x = torch.randn(64,1,32,32)
output = model(x)

print(model)
#summary(model, (1,32,32))
print("output.shape : ",output.shape)


LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(16, 120, kernel_size=(5, 5), stride=(1, 1))
  (linear1): Linear(in_features=120, out_features=84, bias=True)
  (linear2): Linear(in_features=84, out_features=10, bias=True)
  (tanh): Tanh()
  (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
output.shape :  torch.Size([64, 10])


In [6]:
# Load the dataset

# Hyperparameters
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
learning_rate = 1e-3
num_epochs = 10

train_dataset = datasets.FashionMNIST(root='dataset/', train=True, 
                               transform=transforms.Compose([transforms.Pad(2), transforms.ToTensor()]), download=True)
test_dataset = datasets.FashionMNIST(root='dataset/', train=False, 
                              transform=transforms.Compose([transforms.Pad(2), transforms.ToTensor()]), download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=True)
dataset_sizes = {'train':len(train_dataset), 'test':len(test_dataset)}

model = LeNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [4]:
# Does one epoch of training

def train_epoch(dataloader, model, criterion, optimizer):
    
    size = len(dataloader.dataset)
    
    for batchnum, (X, y) in enumerate(dataloader):
        # Compute prediction and loss on the minibatch
        yhat = model(X) # this calls forward on the minibatch (do not call forward directly)
        loss = criterion(yhat, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batchnum % 100 == 0:
            # report the loss on current minibatch every 100 minibatches
            loss, current = loss.item(), batchnum * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            
# Computes loss and accuracy on the validation set

def validate(dataloader, model, criterion):
    
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad(): # we don't want to accumulate the gradients during validation
        for X, y in dataloader:
            yhat = model(X)
            test_loss += criterion(yhat, y).item()
            correct += (yhat.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [7]:
device

device(type='cpu')

In [8]:
# Train

for t in range(num_epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    model.train() # turn model to train mode (for dropout, etc)
    train_epoch(train_loader, model, criterion, optimizer)
    model.eval() # turn model to evaluation mode (for dropout, etc)
    validate(test_loader, model, criterion)
    
    
print("Done!")

Epoch 1
-------------------------------
loss: 2.308832  [    0/60000]
loss: 0.888915  [ 6400/60000]
loss: 0.592438  [12800/60000]
loss: 0.558781  [19200/60000]
loss: 0.706380  [25600/60000]
loss: 0.450450  [32000/60000]
loss: 0.376392  [38400/60000]
loss: 0.652220  [44800/60000]
loss: 0.399252  [51200/60000]
loss: 0.462861  [57600/60000]
Test Error: 
 Accuracy: 82.9%, Avg loss: 0.467550 

Epoch 2
-------------------------------
loss: 0.443425  [    0/60000]
loss: 0.333417  [ 6400/60000]
loss: 0.299488  [12800/60000]
loss: 0.557332  [19200/60000]
loss: 0.474349  [25600/60000]
loss: 0.501709  [32000/60000]
loss: 0.372465  [38400/60000]
loss: 0.327966  [44800/60000]
loss: 0.451039  [51200/60000]
loss: 0.258897  [57600/60000]
Test Error: 
 Accuracy: 85.0%, Avg loss: 0.405115 

Epoch 3
-------------------------------
loss: 0.373407  [    0/60000]
loss: 0.370548  [ 6400/60000]
loss: 0.504661  [12800/60000]
loss: 0.512634  [19200/60000]
loss: 0.630447  [25600/60000]
loss: 0.395045  [32000/600

KeyboardInterrupt: 

In [10]:
torch.save(model, "lenet")

In [11]:
m2 = torch.load("lenet")

In [12]:
m2

LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(16, 120, kernel_size=(5, 5), stride=(1, 1))
  (linear1): Linear(in_features=120, out_features=84, bias=True)
  (linear2): Linear(in_features=84, out_features=10, bias=True)
  (tanh): Tanh()
  (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
)