| name | deep-learning |
| description | Neural networks, CNNs, RNNs, Transformers with TensorFlow and PyTorch. Use for image classification, NLP, sequence modeling, or complex pattern recognition. |
| sasmp_version | 1.3.0 |
| bonded_agent | 04-machine-learning-ai |
| bond_type | PRIMARY_BOND |
Deep Learning
Build neural networks for computer vision, NLP, and complex data patterns.
Quick Start with PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
# Define model
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
# Initialize
model = NeuralNet(input_size=784, hidden_size=128, num_classes=10)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training loop
for epoch in range(10):
for images, labels in train_loader:
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
CNN for Image Classification
class CNN(nn.Module):
def __init__(self, num_classes=10):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(64 * 8 * 8, 512)
self.fc2 = nn.Linear(512, num_classes)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 64 * 8 * 8)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
Transfer Learning
import torchvision.models as models
# Load pre-trained ResNet
model = models.resnet50(pretrained=True)
# Freeze layers
for param in model.parameters():
param.requires_grad = False
# Replace final layer
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, num_classes)
# Only train final layer
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
LSTM for Sequences
class LSTMModel(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size):
super(LSTMModel, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
batch_first=True, dropout=0.2)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
out, _ = self.lstm(x, (h0, c0))
out = self.fc(out[:, -1, :])
return out
Transformers with Hugging Face
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments
model = AutoModelForSequenceClassification.from_pretrained(
'bert-base-uncased',
num_labels=2
)
training_args = TrainingArguments(
output_dir='./results',
num_train_epochs=3,
per_device_train_batch_size=16,
warmup_steps=500,
weight_decay=0.01,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset
)
trainer.train()
Tips & Tricks
Regularization:
- Dropout:
nn.Dropout(0.5) - Batch Normalization:
nn.BatchNorm2d(channels) - Weight Decay:
optimizer = optim.Adam(params, weight_decay=0.01) - Early Stopping: Monitor validation loss
Optimization:
- Learning Rate Scheduling:
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) - Gradient Clipping:
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
Data Augmentation:
from torchvision import transforms
transform = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(10),
transforms.ColorJitter(brightness=0.2, contrast=0.2),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
Common Issues
Overfitting:
- Add dropout
- Data augmentation
- Early stopping
- Reduce model complexity
Underfitting:
- Increase model capacity
- Train longer
- Reduce regularization
- Better features
Vanishing Gradients:
- Use ReLU activation
- Batch normalization
- Residual connections
- Proper initialization