Convolutional Neural Networks
Authors
In this notebook, you will learn about Convolutional Neural Networks (CNNs), a type of neural network designed for processing structured grid data like images. CNNs use convolution operations to automatically learn spatial features, making them effective for tasks like image classification.
You will:
- Understand how convolution works as a feature detector
- See how convolution reduces parameters compared to fully connected networks
- Build CNNs using both
nn.Sequentialand customnn.Moduleclasses
Setup
Import Libraries
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from matplotlib.animation import FuncAnimation, PillowWriter
import ipywidgets as widgets
import owncloudUtility Functions
def convolve1d_interactive(kernel_type: str = 'boxcar') -> None:
a = np.zeros(100)
a[30:70] = 1
if kernel_type == 'boxcar':
kernel = np.zeros(100)
kernel[45:55] = 1
elif kernel_type == 'gaussian':
mu, sigma = 5, 0.5
x = np.linspace(0,10,100)
kernel = np.zeros_like(x)
kernel[35:65] = np.exp(-(x[35:65] - mu)**2 / (2 * sigma**2))
elif kernel_type == 'exponential':
mu, tau = 5, 0.3
x = np.linspace(0, 10, 100)
kernel = np.zeros_like(x)
kernel[35:50] = np.exp((x[35:50] - mu) / tau) # Rising exponential on left, drops to 0 at peak
else:
raise ValueError(f"Unknown kernel_type: {kernel_type}. Use 'boxcar', 'gaussian', or 'exponential'.")
result = np.zeros_like(a)
def do_and_plot_convolution(idx):
shifted_kernel = np.roll(kernel, idx-len(kernel) // 2)
result[:idx+1] = [(a * np.roll(kernel, i - len(kernel) // 2)).sum() for i in range(idx+1)]
fig, axes = plt.subplots(nrows = 2, figsize = (5,5))
axes[0].plot(a, color = 'b', label = 'Original vector')
axes[0].plot(shifted_kernel, color = 'g', label = f'Kernel at index {idx}')
axes[1].plot(np.arange(len(a)), result)
axes[1].set_ylabel('Result of Convolution')
fig.legend(bbox_to_anchor = (1.6,0.8))
plt.tight_layout()
plt.show()
slider = widgets.IntSlider(min = 10, max = len(a)-1, step = 1, description = 'index')
widgets.interact(do_and_plot_convolution, idx=slider);
def convolve2d_interactive(original_image, kernel=nn.Conv2d(1,1,kernel_size=(2,2))):
img_h, img_w = original_image.shape[1], original_image.shape[2]
kernel_shape = kernel.weight[0, 0, :].shape
stride_h, stride_w = kernel.stride
k_h, k_w = kernel_shape[0], kernel_shape[1]
positions_per_row = (img_w - k_w) // stride_w + 1
positions_per_col = (img_h - k_h) // stride_h + 1
max_steps = positions_per_row * positions_per_col - 1
def do_plot(step=0):
plt.imshow(original_image[0])
for i in range(original_image.shape[1]):
for j in range(original_image.shape[2]):
plt.text(j, i, f'{int(original_image[0,i, j])}', ha='center', va='center', color='white', fontsize=12)
row = (step // positions_per_row) * stride_h
col = (step % positions_per_row) * stride_w
rect = plt.Rectangle((col - 0.5, row - 0.5), k_w, k_h,
fill=False, edgecolor='red', linewidth=2)
plt.gca().add_patch(rect)
plt.show()
step_slider = widgets.IntSlider(min=0, max=max_steps, step=1, value=0, description='step')
widgets.interact(do_plot, step=step_slider)
def plot_toy_image_data(original_image):
plt.imshow(original_image[0])
for i in range(original_image.shape[1]):
for j in range(original_image.shape[2]):
plt.text(j, i, f'{int(original_image[0,i, j])}', ha='center', va='center', color='white', fontsize=12)
def train(model: nn.Module, images: torch.Tensor, labels: torch.Tensor, nepochs: int = 100) -> None:
optimizer = torch.optim.Adam(model.parameters())
loss_function = nn.CrossEntropyLoss()
for epoch in range(nepochs):
optimizer.zero_grad()
output = model.forward(images)
loss = loss_function(output, labels)
loss.backward()
optimizer.step()
class utils:
convolve1d_interactive = convolve1d_interactive
convolve2d_interactive = convolve2d_interactive
plot_toy_image_data = plot_toy_image_data
train = trainDownload Data
Path('data').mkdir(exist_ok=True, parents=True)
owncloud.Client.from_public_link('https://uni-bonn.sciebo.de/s/6rBtQcJy7GeseaZ').get_file('/', f'data/stripe_dataset.pt')Truedata = torch.load('data/stripe_dataset.pt')
images = data['features']
labels = data['labels']Section 1: Convolution is a Weighted Filter and Can Work as a Feature Detector
Convolution is a mathematical operation that slides a small matrix (called a kernel or filter) across an input and computes weighted sums at each position. If the shape of a kernel matches the shape of the data, the output will have a higher amplitude. This property means that convolution with kernels can be used to detect features in images - from low-level features like edges or corners to more complex, high-level features like faces or buildings. Convolutional neural networks takes advantage of this property and combines convolutional layers with fully-connected layers (the kinds of networks we have worked with up until now).
Exercises
Exercise: Run the cell below to create an interactive plot where a vector is convolved with a boxcar. Drag the slider to perform the 1D convolution. The resulting line in the bottom plot is the weighted sum of the original vector and the kernel at that index. When is the result (the bottom plot) 0 and when is it at its max value?
utils.convolve1d_interactive('boxcar')Solution
Exercise: Run the cell below to create an interactive plot where a vector is convolved with a gaussian. Drag the slider to perform the 1D convolution. The resulting line in the bottom plot is the weighted sum of the original vector and the kernel at that index. Notice how the shape of the kernel affects the shape of the output here compared to the previous example.
utils.convolve1d_interactive('gaussian')Solution
Run the cell below to create toy image data.
original_image = torch.tensor([
[1.0, 2.0, 3.0, 4.0],
[5.0, 6.0, 7.0, 8.0],
[9.0, 10.0, 11.0, 12.0],
[13.0, 14.0, 15.0, 16.0]
])
original_image = original_image.unsqueeze(0)
utils.plot_toy_image_data(original_image)Example: Create a 2x2 convolutional kernel kernel_size=(2,2) with a single input and output channel. Then, run the cell that creates an interactive plot of the original image with the kernel (the red box) overlayed.
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(2,2))When you drag the slider, the kernel moves along the image, just like the interactive plots in the previous exercises. When a convolution is carried out, the pixels inside the red box are multiplied with the weights of the kernel.
utils.convolve2d_interactive(original_image, conv2d)Solution
Exercise: Create a 2x3 convolutional kernel kernel_size=(2,3) with a single input and output channel. Then, run the cell that creates an interactive plot of the original image with the kernel (the red box) overlayed.
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(2,3), stride=1)utils.convolve2d_interactive(original_image, conv2d)Solution
Exercise: Create a 3x3 convolutional kernel (kernel_size=(3,3)) with a single input and output channel. Then, run the cell that creates an interactive plot of the original image with the kernel (the red box) overlayed.
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(3,3))utils.convolve2d_interactive(original_image, conv2d)Solution
Section 2: Convolution Lets Us Reduce the Number of Parameters and Save Computation
One of the main benefits of convolution is that it can reduce the number of parameters that need to be optimized in our model. The underlying assumption is that neighboring data are related; they contain similar information. For example, neighboring pixels in an image are often from the same object in the image. Therefore, we can aggregate the values in neighboring pixels of the image, and this reduces the number of parameters that need to be trained, which makes training faster.
| Code | Description |
|---|---|
nn.Conv2d(in_channels, out_channels, kernel_size) |
Creates a 2D convolution layer with the specified number of input channels, output channels, and kernel size. |
nn.Conv2d(in_channels, out_channels, kernel_size, stride) |
Creates a 2D convolution layer with a specified stride (how many pixels the kernel jumps between steps). |
output = conv2d(image) |
Apply the convolution layer to an image tensor and store the result. |
Run the cell below to create toy image data.
original_image = torch.tensor([
[1.0, 2.0, 3.0, 4.0],
[5.0, 6.0, 7.0, 8.0],
[9.0, 10.0, 11.0, 12.0],
[13.0, 14.0, 15.0, 16.0]
])
original_image = original_image.unsqueeze(0)
utils.plot_toy_image_data(original_image)Example: Create a 2x2 convolutional kernel (kernel_size=(2,2)) with a single input and output channel. Then, convolve the original image with the kernel and check the shape of the output. Run the cell that illustrates the convolution with the kernel below. Can you figure out why the output has the shape that it has?
Note: The first dimension when you check the shape is just the number of output channels. Focus on the last two numbers in the shape.
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(2,2))
output = conv2d(original_image)
print(f'original image shape: {original_image.shape}')
print(f'output shape: {output.shape}')original image shape: torch.Size([1, 4, 4])
output shape: torch.Size([1, 3, 3])utils.convolve2d_interactive(original_image, conv2d)Exercise: Create a 3x3 convolutional kernel kernel_size=(3,3) with a single input and output channel. Then, convolve the original image with the kernel and check the shape of the output. Run the cell that illustrates the convolution with the kernel below. Can you figure out why the output has the shape that it has?
Note: The first dimension is just the number of output channels.
Solution
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(3,3))
output = conv2d(original_image)
print(f'original image shape: {original_image.shape}')
print(f'output shape: {output.shape}')original image shape: torch.Size([1, 4, 4])
output shape: torch.Size([1, 2, 2])utils.convolve2d_interactive(original_image, conv2d)Solution
Exercise: Create a 2x3 convolutional kernel kernel_size=(2,3) with a single input and output channel. Then, convolve the original image with the kernel and check the shape of the output. Run the cell that illustrates the convolution with the kernel below. Can you figure out why the output has the shape that it has?
Note: The first dimension in the shape is just the number of output channels.
Solution
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(2,3))
output = conv2d(original_image)
print(f'original image shape: {original_image.shape}')
print(f'output shape: {output.shape}')original image shape: torch.Size([1, 4, 4])
output shape: torch.Size([1, 3, 2])utils.convolve2d_interactive(original_image, conv2d)Solution
Unless you have a 1x1 kernel, the output has fewer rows and/or columns than the original image. In other words, we have reduced the size of the data, and this, in turn, is what reduces the computational resources needed when a model is trained on the convolved image rather than the original image.
The size of the convolved image can be reduced further by introducing “stride”. Stride is how many pixels the kernel should jump between each step in the convolution.
Example: Create a 2x1 convolutional kernel kernel_size=(2,1) with stride = 2. Then, convolve the original image with the kernel and check the shape of the output. If you run the cell with the interactive plot below you can see the effect of stride.
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(2,1), stride = 2)
output = conv2d(original_image)
print(f'original image shape: {original_image.shape}')
print(f'output shape {output.shape}')original image shape: torch.Size([1, 4, 4])
output shape torch.Size([1, 2, 2])When you drag the slider, how many pixels either columnwise or rowwise does the kernel jump with each step?
utils.convolve2d_interactive(original_image, conv2d)Solution
Exercise: Create a 2x1 convolutional kernel kernel_size=(2,1) with stride = 3. Then, convolve the original image with the kernel and check the shape of the output. If you run the cell with the interactive plot below you can see the effect of stride.
Solution
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(2,1), stride = 3)
output = conv2d(original_image)
print(f'output shape {output.shape}')output shape torch.Size([1, 1, 2])When you drag the slider, how many pixels either columnwise or rowwise does the kernel jump with each step? Note that the kernel can’t jump outside of the image (the way we have defined it here), that’s why it doesn’t jump down.
utils.convolve2d_interactive(original_image, conv2d)Solution
Exercise: Below, a 2x2 convolutional kernel kernel_size=(2,2) with stride = 2 is created and the weights of the kernel are set equal to 1. Convolve the kernel with the original image and assign the result to a variable named output.
conv2d = nn.Conv2d(1, 1, (2,2), stride=2, bias=False)
with torch.no_grad():
conv2d.weight[:] = torch.ones_like(conv2d.weight[:])Solution
output = conv2d(original_image)Run the cell below to visualize the output. The number inside each square is the result of convolving the kernel with the pixels inside it. In a cell further down, an interactive visualization is created. Try summing up the numbers inside the red box in each step and see if it matches the numbers in the output plot.
utils.plot_toy_image_data(output.detach())utils.convolve2d_interactive(original_image, conv2d)Section 3: Making a CNN using nn.Sequential and Using it To Classify Images
In this section, we’ll build Convolutional Neural Networks (CNNs) using nn.Sequential, which allows us to define networks as a sequence of layers. We’ll apply these networks to a simple toy dataset to understand how CNNs process images.
| Code | Description |
|---|---|
nn.Sequential(nn.Conv2d(...), nn.Flatten(), ) |
Creates a container that passes input through layers in order. |
nn.Conv2d(in_channels, out_channels, kernel_size = (NxM)) |
2D convolution layer. in_channels is input depth (e.g., 1 for grayscale), out_channels is number of filters. |
nn.ReLU() |
Creates ReLU activation function. |
nn.Flatten() |
Flattens all dimensions except batch into a single vector. Required before nn.Linear in CNNs. |
nn.Linear(in_features, out_features) |
Fully connected layer. Maps in_features inputs to out_features outputs in CNN. |
Visualize Sample Images
Run the cell below to see examples of the toy image data. We have two classes of images: One with horizontal stripes and one with vertical stripes. (The grey pixels are just added noise.)
# Visualize sample images from the toy dataset
fig, axes = plt.subplots(2, 5, figsize=(10, 4))
class_names_toy = ['Horizontal Stripes', 'Vertical Stripes']
# Show 5 horizontal stripe images
horizontal_idx = torch.where(labels == 0)[0][:5]
for i, idx in enumerate(horizontal_idx):
axes[0, i].imshow(images[idx, 0], cmap='gray', vmin=0, vmax=1)
axes[0, i].set_title(f'{class_names_toy[0]}')
axes[0, i].axis('off')
# Show 5 vertical stripe images
vertical_idx = torch.where(labels == 1)[0][:5]
for i, idx in enumerate(vertical_idx):
axes[1, i].imshow(images[idx, 0], cmap='gray', vmin=0, vmax=1)
axes[1, i].set_title(f'{class_names_toy[1]}')
axes[1, i].axis('off')
plt.tight_layout()
plt.show()Exercises
To create a (simple) convolutional neural network, we need three basic ingredients:
- A convolutional layer -
Conv2d - A
Flattenlayer. This flattens the output from the convolutional layer so that it can be fed to a dense neural network. - A dense neural network: A
Linearlayer + potential activation functions
Example: Create the simplest possible CNN using nn.Sequential. It should have:
- A single
Conv2dlayer: 1 input channel, 4 output channels, 3x3 kernel - A
Flattenlayer - A
Linearlayer that outputs 2 classes
For images of shape 8x8, the resulting output after a convolution with a 3x3 kernel will have the shape 6x6. With 1 output channel, the flattened size is 1 × 6 × 6 = 36.
model = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3), # 8x8 -> 6x6
nn.Flatten(), # flattens the output from the convolution layer from 1*6*6 to 36
nn.Linear(6 * 6, 2) # 36 -> 2 classes
)
print(model)Sequential(
(0): Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))
(1): Flatten(start_dim=1, end_dim=-1)
(2): Linear(in_features=36, out_features=2, bias=True)
)Run the cell below to apply the untrained CNN to a single image.
# Run on a single image (no training yet)
single_image = images[0:1]
single_label = labels[0:1]
with torch.no_grad():
output = model(single_image)
print(f"Predicted class: {output.argmax(dim=1).item()}")
print(f"True class: {single_label.item()} ({class_names_toy[single_label.item()]})")Predicted class: 1
True class: 1 (Vertical Stripes)Run the cell below to check how accurate the untrained CNN is on the entire dataset. Since it has random weights, the accuracy should be close to 50% (random guessing for 2 classes).
with torch.no_grad():
outputs = model(images)
predictions = outputs.argmax(dim=1)
accuracy = (predictions == labels).float().mean()
print(f"Accuracy without training: {accuracy:.2%}")Accuracy without training: 50.00%Exercise: Create a CNN using nn.Sequential. It should have:
- A single
Conv2dlayer:1input channel,4output channels,3x3kernel - A
Flattenlayer - A
Linearlayer that outputs 2 classes
The number of output channels represents the number of filters, or kernels, used. You can use different filters that can detect different things in the image. In this exercise, we use 4 filters.
model = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3), # 1x8x8 -> 4x6x6
___, # UPDATE ME - flattens the output from the convolution layer from 4x6x6 to 144
nn.Linear(___, 2) # UPDATE ME - 144 -> 2 classes
)
print(model)Solution
model = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3), # 1x8x8 -> 4x6x6
nn.Flatten(), # UPDATE ME - flattens the output from the convolution layer from 4x6x6 to 144
nn.Linear(4*6*6, 2) # UPDATE ME - 144 -> 2 classes
)
print(model)Sequential(
(0): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1))
(1): Flatten(start_dim=1, end_dim=-1)
(2): Linear(in_features=144, out_features=2, bias=True)
)Run the cell below to train the model.
utils.train(model, images, labels)Run the cell below to check how accurate the trained CNN is on the entire dataset.
Note: It’s to be expected that the accuracy is still not very high.
with torch.no_grad():
outputs = model(images)
predictions = outputs.argmax(dim=1)
accuracy = (predictions == labels).float().mean()
print(f"Accuracy with training: {accuracy:.2%}")Accuracy with training: 56.00%Exercise: Add a ReLU activation after the convolutional layer.
Create a CNN with:
Conv2d: 1 input channel, 4 output channels, 3x3 kernelReLU: activation functionFlattenLinear: outputs 2 classes
Solution
model = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=4, kernel_size=(3,3)), # 8x8 -> 6x6
nn.ReLU(),
nn.Flatten(),
nn.Linear(4 * 6 * 6, 2)
)
print(model)Sequential(
(0): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1))
(1): ReLU()
(2): Flatten(start_dim=1, end_dim=-1)
(3): Linear(in_features=144, out_features=2, bias=True)
)Run the cell below to train the model.
utils.train(model, images, labels)Run the cell below to check how accurate the trained CNN is on the entire dataset. How high is the accuracy now?
with torch.no_grad():
outputs = model(images)
predictions = outputs.argmax(dim=1)
accuracy = (predictions == labels).float().mean()
print(f"Accuracy with training: {accuracy:.2%}")Accuracy with training: 100.00%Exercise: Create a model with two convolutional layers. Deeper networks can learn more complex features.
Create a CNN with:
Conv2d: 1 input channel and 4 output channels, 3x3 kernelReLUConv2d: 4 input channels and 2 output channels, 2x2 kernelReLUFlattenLinear: outputs 2 classes
model = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=4, kernel_size=(3,3)), # 1x8x8 -> 4x6x6
nn.ReLU(),
___, # UPDATE ME - 4x6x6 -> 2x5x5
___, # UPDATE ME
nn.Flatten(), # 2x5x5 = 50
nn.Linear(2 * 5 * 5, 2)
)
print(model)Solution
model = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=4, kernel_size=(3,3)), # 1x8x8 -> 4x6x6
nn.ReLU(),
nn.Conv2d(in_channels=4, out_channels=2, kernel_size=(2,2)), # 4x6x6 -> 2x5x5
nn.ReLU(),
nn.Flatten(), # 2x5x5 = 50
nn.Linear(2 * 5 * 5, 2)
)
print(model)Sequential(
(0): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1))
(1): ReLU()
(2): Conv2d(4, 2, kernel_size=(2, 2), stride=(1, 1))
(3): ReLU()
(4): Flatten(start_dim=1, end_dim=-1)
(5): Linear(in_features=50, out_features=2, bias=True)
)Section 4: Create CNNs with Object-Oriented Programming
While nn.Sequential is convenient for simple networks, PyTorch’s object-oriented approach using nn.Module gives you full control over your model architecture. By defining a custom class, you can implement complex behaviors like skip connections, conditional logic, and multiple outputs that aren’t possible with nn.Sequential.
| Code | Description |
|---|---|
class Model(nn.Module)... |
Define a class named Model that inherits from nn.Module. |
class Model(nn.Module)def __init__(self):super().__init__() |
Define a class named Model that inherits from nn.Module.super().__init__() initializes the parent nn.Module. |
class Model(nn.Module)def __init__(self):... self.conv = nn.Conv2d(1,4,3) |
Add a convolutional layer with 1 input channel, 4 output channels, and a 3x3 kernel to the model. |
class Model(nn.Module)def __init__(self):... self.conv = nn.Conv2d(1,4,3) self.fc = nn.Linear(144,2) |
Add a convolutional layer and a fully connected layer to the model. |
class Model(nn.Module)def __init__(self):...def forward(self,x):out = self.conv(x)return self.fc(out) |
Add a method forward to the model that carries out the forward pass of the input x through the network. |
model = Model() |
Create an instance of Model and assign it to a variable named model. |
Example Create the model below with a custom class that inherits from nn.Module instead of using nn.Sequential.
model = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3), # 8x8 -> 6x6
nn.Flatten(), # flattens the output from the convolution layer from 1*6*6 to 36
nn.Linear(6 * 6, 2) # 36 -> 2 classes
)
modelSequential(
(0): Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))
(1): Flatten(start_dim=1, end_dim=-1)
(2): Linear(in_features=36, out_features=2, bias=True)
)class Model(nn.Module):
def __init__(self):
super().__init__()
# define the layers
self.conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3)
self.flatten = nn.Flatten()
self.fc = nn.Linear(6 * 6, 2)
def forward(self, x):
x = self.conv(x)
x = self.flatten(x)
x = self.fc(x)
return x
# create an instance of the model
model = Model()Exercise Create the model below with a custom class that inherits from nn.Module instead of using nn.Sequential.
model = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=4, kernel_size=(3,3)), # 8x8 -> 6x6
nn.ReLU(),
nn.Flatten(),
nn.Linear(4 * 6 * 6, 2)
)Solution
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
# Define the layers
self.conv = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=(3, 3))
self.relu = nn.ReLU()
self.flatten = nn.Flatten()
self.fc = nn.Linear(4 * 6 * 6, 2)
def forward(self, x):
out = self.conv(x)
out = self.relu(out)
out = self.flatten(out)
out = self.fc(out)
return outExercise Create the model below with a custom class that inherits from nn.Module instead of using nn.Sequential.
model = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=4, kernel_size=(3,3)), # 1x8x8 -> 4x6x6
nn.ReLU(),
nn.Conv2d(in_channels=4, out_channels=2, kernel_size=(2,2)), # 4x6x6 -> 2x5x5
nn.ReLU(),
nn.Flatten(), # 2x5x5 = 50
nn.Linear(2 * 5 * 5, 2)
)
print(model)Sequential(
(0): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1))
(1): ReLU()
(2): Conv2d(4, 2, kernel_size=(2, 2), stride=(1, 1))
(3): ReLU()
(4): Flatten(start_dim=1, end_dim=-1)
(5): Linear(in_features=50, out_features=2, bias=True)
)Solution
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
# define the layers
self.conv1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=(3, 3))
self.relu1 = nn.ReLU()
self.conv2 = nn.Conv2d(in_channels=4, out_channels=2, kernel_size=(2, 2))
self.relu2 = nn.ReLU()
self.flatten = nn.Flatten()
self.fc = nn.Linear(2 * 5 * 5, 2)
def forward(self, x):
out = self.conv1(x)
out = self.relu1(out)
out = self.conv2(out)
out = self.relu2(out)
out = self.flatten(out)
out = self.fc(out)
return outDemo: The following models cannot be created using nn.Sequential:
CNN with conditional logic in forward pass. We might be applying the model to data that has 2 classes or 10 classes. The if-statement makes sure that we use the right fully-connected network depending on what the case is for the data.
class Model(nn.Module):
def __init__(self):
super().__init__()
self.conv = nn.Conv2d(1, 4, 3)
self.fc1 = nn.Linear(144, 2)
self.fc2 = nn.Linear(144, 10)
def forward(self, x, num_classes=2):
out = self.conv(x)
out = out.flatten(1)
# different paths based on condition
if num_classes == 2:
return self.fc1(out)
else:
return self.fc2(out)CNN with skip connections. A real-world example of a network with this kind of architecture is the U-net - a model that’s used for biomedical image segmentation.
class SkipModel(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1)
self.relu1 = nn.ReLU()
self.conv2 = nn.Conv2d(in_channels=4, out_channels=4, kernel_size=3, padding=1)
self.relu2 = nn.ReLU()
self.conv3 = nn.Conv2d(in_channels=8, out_channels=2, kernel_size=1) # 8 = 4 + 4 from skip
self.flatten = nn.Flatten()
self.fc = nn.Linear(2 * 8 * 8, 2)
def forward(self, x):
# first conv block
skip = self.relu1(self.conv1(x)) # (skip connection)
# second conv block
x = self.relu2(self.conv2(skip))
# concatenate skip connection
x = torch.cat([x, skip], dim=1) # This is why we can't use nn.Sequential
# final layers
x = self.conv3(x)
x = self.flatten(x)
x = self.fc(x)
return x