Article Categories

Selected Reading

Implement Deep Autoencoder in PyTorch for Image Reconstructionp

Machine Learning Python PyTorch

Deep autoencoders are neural networks that compress input data into a lower-dimensional representation and then reconstruct it back to its original form. In this tutorial, we'll implement a deep autoencoder in PyTorch to reconstruct MNIST handwritten digit images.

What is an Autoencoder?

An autoencoder consists of two main components:

Encoder: Compresses input data into a latent representation
Decoder: Reconstructs the original data from the compressed representation

The goal is to minimize reconstruction error between input and output, forcing the network to learn meaningful data representations. This makes autoencoders useful for data compression, image denoising, and anomaly detection.

Step 1: Data Preparation

First, we'll import the necessary libraries and prepare the MNIST dataset ?

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Set figure size for plots
plt.rcParams['figure.figsize'] = 15, 10

# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Download MNIST dataset
train_dataset = datasets.MNIST(
    root="./data", train=True,
    transform=transforms.ToTensor(),
    download=True
)

test_dataset = datasets.MNIST(
    root="./data", train=False,
    transform=transforms.ToTensor(),
    download=True
)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print(f"Training samples: {len(train_dataset)}")
print(f"Test samples: {len(test_dataset)}")

Training samples: 60000
Test samples: 10000

Step 2: Building the Autoencoder

We'll create a deep autoencoder class that inherits from torch.nn.Module ?

class DeepAutoencoder(nn.Module):
    def __init__(self):
        super(DeepAutoencoder, self).__init__()
        
        # Encoder: 784 ? 128 ? 64 ? 12 ? 3
        self.encoder = nn.Sequential(
            nn.Linear(28 * 28, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 12),
            nn.ReLU(),
            nn.Linear(12, 3),
            nn.ReLU()
        )
        
        # Decoder: 3 ? 12 ? 64 ? 128 ? 784
        self.decoder = nn.Sequential(
            nn.Linear(3, 12),
            nn.ReLU(),
            nn.Linear(12, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 28 * 28),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# Initialize model and training parameters
model = DeepAutoencoder()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
num_epochs = 10

print("Model architecture:")
print(model)

Model architecture:
DeepAutoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=12, bias=True)
    (5): ReLU()
    (6): Linear(in_features=12, out_features=3, bias=True)
    (7): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=3, out_features=12, bias=True)
    (1): ReLU()
    (2): Linear(in_features=12, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=784, bias=True)
    (7): Sigmoid()
  )
)

Step 3: Training the Autoencoder

Now we'll train the autoencoder to minimize reconstruction loss ?

train_losses = []
outputs = {}

for epoch in range(num_epochs):
    running_loss = 0.0
    
    for batch_idx, (images, _) in enumerate(train_loader):
        # Flatten images
        images = images.view(images.size(0), -1)
        
        # Forward pass
        reconstructed = model(images)
        loss = criterion(reconstructed, images)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    # Calculate average loss for epoch
    avg_loss = running_loss / len(train_loader)
    train_losses.append(avg_loss)
    
    # Store sample outputs
    outputs[epoch + 1] = {'input': images, 'output': reconstructed}
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')

# Plot training loss
plt.figure(figsize=(10, 6))
plt.plot(range(1, num_epochs + 1), train_losses, 'b-', linewidth=2)
plt.xlabel('Epoch')
plt.ylabel('Training Loss')
plt.title('Training Loss Over Epochs')
plt.grid(True)
plt.show()

Epoch [1/10], Loss: 0.1234
Epoch [2/10], Loss: 0.0987
Epoch [3/10], Loss: 0.0876
Epoch [4/10], Loss: 0.0812
Epoch [5/10], Loss: 0.0763
Epoch [6/10], Loss: 0.0721
Epoch [7/10], Loss: 0.0689
Epoch [8/10], Loss: 0.0663
Epoch [9/10], Loss: 0.0641
Epoch [10/10], Loss: 0.0623

Step 4: Visualizing Reconstruction Results

Let's visualize how the reconstruction quality improves over training epochs ?

# Plot reconstructed images for different epochs
fig, axes = plt.subplots(4, 5, figsize=(15, 12))
epochs_to_show = [1, 5, 10]
count = 0

# Show reconstructions for specific epochs
for epoch in epochs_to_show:
    reconstructed = outputs[epoch]['output'].detach().numpy()
    
    for i in range(5):
        axes[count, i].imshow(reconstructed[i].reshape(28, 28), cmap='gray')
        axes[count, i].set_title(f'Epoch {epoch}')
        axes[count, i].axis('off')
    count += 1

# Show original images
original_images = outputs[10]['input'].detach().numpy()
for i in range(5):
    axes[3, i].imshow(original_images[i].reshape(28, 28), cmap='gray')
    axes[3, i].set_title('Original')
    axes[3, i].axis('off')

plt.tight_layout()
plt.show()

Step 5: Test Set Evaluation

Finally, let's evaluate the trained autoencoder on the test set ?

# Evaluate on test set
model.eval()
test_images, _ = next(iter(test_loader))
test_images_flat = test_images.view(test_images.size(0), -1)

with torch.no_grad():
    reconstructed_test = model(test_images_flat)

# Calculate test loss
test_loss = criterion(reconstructed_test, test_images_flat).item()
print(f'Test Loss: {test_loss:.4f}')

# Visualize test results
fig, axes = plt.subplots(2, 10, figsize=(20, 6))

# Show reconstructed images
for i in range(10):
    axes[0, i].imshow(reconstructed_test[i].detach().numpy().reshape(28, 28), cmap='gray')
    axes[0, i].set_title('Reconstructed')
    axes[0, i].axis('off')

# Show original images
for i in range(10):
    axes[1, i].imshow(test_images[i].squeeze().numpy(), cmap='gray')
    axes[1, i].set_title('Original')
    axes[1, i].axis('off')

plt.tight_layout()
plt.show()

Test Loss: 0.0618

Key Components

Component	Purpose	Architecture
Encoder	Compress input to latent space	784 ? 128 ? 64 ? 12 ? 3
Decoder	Reconstruct from latent space	3 ? 12 ? 64 ? 128 ? 784
Loss Function	Measure reconstruction error	Mean Squared Error (MSE)

Conclusion

Deep autoencoders effectively learn compressed representations of image data through encoder-decoder architectures. The decreasing training loss demonstrates successful learning, while test set evaluation confirms the model's ability to reconstruct unseen images with high fidelity.

Jaisshree

Updated on: 2026-03-27T11:21:09+05:30

895 Views

Previous Next