| name | model-optimization |
| description | Quantization, pruning, AutoML, hyperparameter tuning, and performance optimization. Use for improving model performance, reducing size, or automated ML. |
| sasmp_version | 1.3.0 |
| bonded_agent | 06-mlops-deployment |
| bond_type | SECONDARY_BOND |
Model Optimization
Optimize models for better performance, efficiency, and faster inference.
Hyperparameter Tuning
Grid Search
from sklearn.model_selection import GridSearchCV
param_grid = {
'n_estimators': [100, 200, 300],
'max_depth': [5, 10, 15],
'min_samples_split': [2, 5, 10]
}
grid_search = GridSearchCV(
RandomForestClassifier(),
param_grid,
cv=5,
scoring='f1_weighted',
n_jobs=-1
)
grid_search.fit(X_train, y_train)
print(f"Best params: {grid_search.best_params_}")
print(f"Best score: {grid_search.best_score_:.3f}")
Bayesian Optimization
from skopt import BayesSearchCV
param_space = {
'n_estimators': (100, 500),
'max_depth': (5, 50),
'learning_rate': (0.01, 0.3, 'log-uniform')
}
bayes_search = BayesSearchCV(
xgb.XGBClassifier(),
param_space,
n_iter=50,
cv=5,
scoring='f1_weighted'
)
bayes_search.fit(X_train, y_train)
Optuna
import optuna
def objective(trial):
params = {
'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
'max_depth': trial.suggest_int('max_depth', 3, 10),
'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3)
}
model = xgb.XGBClassifier(**params)
score = cross_val_score(model, X_train, y_train,
cv=5, scoring='f1').mean()
return score
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)
print(f"Best params: {study.best_params}")
print(f"Best score: {study.best_value:.3f}")
Model Compression
Quantization (PyTorch)
import torch
# Post-training dynamic quantization
model_fp32 = MyModel()
model_int8 = torch.quantization.quantize_dynamic(
model_fp32,
{torch.nn.Linear},
dtype=torch.qint8
)
# 4x smaller model, 2-4x faster inference
# Quantization-aware training
model = MyModel()
model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
model_prepared = torch.quantization.prepare_qat(model)
# Train
for epoch in range(epochs):
train(model_prepared)
model_quantized = torch.quantization.convert(model_prepared)
Pruning
import torch.nn.utils.prune as prune
# Global unstructured pruning
parameters_to_prune = [
(module, 'weight') for module in model.modules()
if isinstance(module, torch.nn.Linear)
]
prune.global_unstructured(
parameters_to_prune,
pruning_method=prune.L1Unstructured,
amount=0.2 # Remove 20% of weights
)
# Remove pruning reparametrization
for module, _ in parameters_to_prune:
prune.remove(module, 'weight')
Knowledge Distillation
import torch.nn.functional as F
def distillation_loss(student_logits, teacher_logits, labels, T=3.0, alpha=0.5):
"""
Distillation loss: combination of soft targets from teacher
and hard targets from ground truth
"""
# Soft targets (knowledge from teacher)
soft_targets = F.softmax(teacher_logits / T, dim=1)
soft_prob = F.log_softmax(student_logits / T, dim=1)
soft_loss = F.kl_div(soft_prob, soft_targets, reduction='batchmean') * (T ** 2)
# Hard targets (ground truth)
hard_loss = F.cross_entropy(student_logits, labels)
# Combined loss
return alpha * soft_loss + (1 - alpha) * hard_loss
# Train student model
teacher_model.eval()
student_model.train()
for images, labels in train_loader:
with torch.no_grad():
teacher_logits = teacher_model(images)
student_logits = student_model(images)
loss = distillation_loss(student_logits, teacher_logits, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
AutoML
Auto-sklearn
import autosklearn.classification
automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=3600, # 1 hour
per_run_time_limit=300,
memory_limit=3072
)
automl.fit(X_train, y_train)
predictions = automl.predict(X_test)
print(automl.leaderboard())
print(automl.show_models())
H2O AutoML
import h2o
from h2o.automl import H2OAutoML
h2o.init()
train = h2o.H2OFrame(pd.concat([X_train, y_train], axis=1))
test = h2o.H2OFrame(pd.concat([X_test, y_test], axis=1))
aml = H2OAutoML(max_runtime_secs=3600, max_models=20)
aml.train(x=X_train.columns.tolist(), y='target',
training_frame=train)
# Leaderboard
lb = aml.leaderboard
print(lb)
# Best model
best_model = aml.leader
predictions = best_model.predict(test)
TPOT
from tpot import TPOTClassifier
tpot = TPOTClassifier(
generations=5,
population_size=50,
verbosity=2,
random_state=42,
n_jobs=-1
)
tpot.fit(X_train, y_train)
print(f"Score: {tpot.score(X_test, y_test):.3f}")
# Export best pipeline
tpot.export('best_pipeline.py')
Feature Selection
from sklearn.feature_selection import (
SelectKBest, f_classif, RFE, SelectFromModel
)
# Univariate selection
selector = SelectKBest(f_classif, k=10)
X_new = selector.fit_transform(X, y)
# Recursive Feature Elimination
estimator = RandomForestClassifier()
rfe = RFE(estimator, n_features_to_select=10)
X_new = rfe.fit_transform(X, y)
# Model-based selection
selector = SelectFromModel(RandomForestClassifier(), max_features=10)
X_new = selector.fit_transform(X, y)
Performance Optimization
Inference Optimization (ONNX)
import torch.onnx
# Export PyTorch to ONNX
dummy_input = torch.randn(1, 3, 224, 224)
torch.onnx.export(
model,
dummy_input,
"model.onnx",
opset_version=11,
input_names=['input'],
output_names=['output']
)
# Run with ONNX Runtime
import onnxruntime as ort
session = ort.InferenceSession("model.onnx")
input_name = session.get_inputs()[0].name
output = session.run(None, {input_name: input_data})
TensorRT (NVIDIA GPU)
import tensorrt as trt
# Convert ONNX to TensorRT
logger = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(logger)
network = builder.create_network()
parser = trt.OnnxParser(network, logger)
with open('model.onnx', 'rb') as f:
parser.parse(f.read())
config = builder.create_builder_config()
config.max_workspace_size = 1 << 30 # 1GB
engine = builder.build_engine(network, config)
# 10x faster inference on GPU
Learning Rate Scheduling
from torch.optim.lr_scheduler import StepLR, CosineAnnealingLR
# Step decay
scheduler = StepLR(optimizer, step_size=30, gamma=0.1)
# Cosine annealing
scheduler = CosineAnnealingLR(optimizer, T_max=100)
# Training loop
for epoch in range(epochs):
train(model, optimizer)
scheduler.step()
Early Stopping
class EarlyStopping:
def __init__(self, patience=7, min_delta=0):
self.patience = patience
self.min_delta = min_delta
self.counter = 0
self.best_loss = None
self.early_stop = False
def __call__(self, val_loss):
if self.best_loss is None:
self.best_loss = val_loss
elif val_loss > self.best_loss - self.min_delta:
self.counter += 1
if self.counter >= self.patience:
self.early_stop = True
else:
self.best_loss = val_loss
self.counter = 0
# Usage
early_stopping = EarlyStopping(patience=10)
for epoch in range(epochs):
train_loss = train(model)
val_loss = validate(model)
early_stopping(val_loss)
if early_stopping.early_stop:
print("Early stopping triggered")
break
Best Practices
- Start simple: Baseline model first
- Profile before optimizing: Find bottlenecks
- Measure everything: Track metrics
- Trade-offs: Accuracy vs speed vs size
- Validate improvements: A/B testing
- Automate: Use AutoML for initial exploration