name	model-optimization
description	Quantization, pruning, AutoML, hyperparameter tuning, and performance optimization. Use for improving model performance, reducing size, or automated ML.
sasmp_version	1.3.0
bonded_agent	06-mlops-deployment
bond_type	SECONDARY_BOND

Model Optimization

Name: model-optimization
Author: pluginagentmarketplace

Optimize models for better performance, efficiency, and faster inference.

Hyperparameter Tuning

Grid Search

from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 10, 15],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(
    RandomForestClassifier(),
    param_grid,
    cv=5,
    scoring='f1_weighted',
    n_jobs=-1
)

grid_search.fit(X_train, y_train)
print(f"Best params: {grid_search.best_params_}")
print(f"Best score: {grid_search.best_score_:.3f}")

Bayesian Optimization

from skopt import BayesSearchCV

param_space = {
    'n_estimators': (100, 500),
    'max_depth': (5, 50),
    'learning_rate': (0.01, 0.3, 'log-uniform')
}

bayes_search = BayesSearchCV(
    xgb.XGBClassifier(),
    param_space,
    n_iter=50,
    cv=5,
    scoring='f1_weighted'
)

bayes_search.fit(X_train, y_train)

Optuna

import optuna

def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3)
    }

    model = xgb.XGBClassifier(**params)
    score = cross_val_score(model, X_train, y_train,
                           cv=5, scoring='f1').mean()
    return score

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

print(f"Best params: {study.best_params}")
print(f"Best score: {study.best_value:.3f}")

Model Compression

Quantization (PyTorch)

import torch

# Post-training dynamic quantization
model_fp32 = MyModel()
model_int8 = torch.quantization.quantize_dynamic(
    model_fp32,
    {torch.nn.Linear},
    dtype=torch.qint8
)

# 4x smaller model, 2-4x faster inference

# Quantization-aware training
model = MyModel()
model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
model_prepared = torch.quantization.prepare_qat(model)

# Train
for epoch in range(epochs):
    train(model_prepared)

model_quantized = torch.quantization.convert(model_prepared)

Pruning

import torch.nn.utils.prune as prune

# Global unstructured pruning
parameters_to_prune = [
    (module, 'weight') for module in model.modules()
    if isinstance(module, torch.nn.Linear)
]

prune.global_unstructured(
    parameters_to_prune,
    pruning_method=prune.L1Unstructured,
    amount=0.2  # Remove 20% of weights
)

# Remove pruning reparametrization
for module, _ in parameters_to_prune:
    prune.remove(module, 'weight')

Knowledge Distillation

import torch.nn.functional as F

def distillation_loss(student_logits, teacher_logits, labels, T=3.0, alpha=0.5):
    """
    Distillation loss: combination of soft targets from teacher
    and hard targets from ground truth
    """
    # Soft targets (knowledge from teacher)
    soft_targets = F.softmax(teacher_logits / T, dim=1)
    soft_prob = F.log_softmax(student_logits / T, dim=1)
    soft_loss = F.kl_div(soft_prob, soft_targets, reduction='batchmean') * (T ** 2)

    # Hard targets (ground truth)
    hard_loss = F.cross_entropy(student_logits, labels)

    # Combined loss
    return alpha * soft_loss + (1 - alpha) * hard_loss

# Train student model
teacher_model.eval()
student_model.train()

for images, labels in train_loader:
    with torch.no_grad():
        teacher_logits = teacher_model(images)

    student_logits = student_model(images)
    loss = distillation_loss(student_logits, teacher_logits, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

AutoML

Auto-sklearn

import autosklearn.classification

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=3600,  # 1 hour
    per_run_time_limit=300,
    memory_limit=3072
)

automl.fit(X_train, y_train)
predictions = automl.predict(X_test)

print(automl.leaderboard())
print(automl.show_models())

H2O AutoML

import h2o
from h2o.automl import H2OAutoML

h2o.init()

train = h2o.H2OFrame(pd.concat([X_train, y_train], axis=1))
test = h2o.H2OFrame(pd.concat([X_test, y_test], axis=1))

aml = H2OAutoML(max_runtime_secs=3600, max_models=20)
aml.train(x=X_train.columns.tolist(), y='target',
         training_frame=train)

# Leaderboard
lb = aml.leaderboard
print(lb)

# Best model
best_model = aml.leader
predictions = best_model.predict(test)

TPOT

from tpot import TPOTClassifier

tpot = TPOTClassifier(
    generations=5,
    population_size=50,
    verbosity=2,
    random_state=42,
    n_jobs=-1
)

tpot.fit(X_train, y_train)
print(f"Score: {tpot.score(X_test, y_test):.3f}")

# Export best pipeline
tpot.export('best_pipeline.py')

Feature Selection

from sklearn.feature_selection import (
    SelectKBest, f_classif, RFE, SelectFromModel
)

# Univariate selection
selector = SelectKBest(f_classif, k=10)
X_new = selector.fit_transform(X, y)

# Recursive Feature Elimination
estimator = RandomForestClassifier()
rfe = RFE(estimator, n_features_to_select=10)
X_new = rfe.fit_transform(X, y)

# Model-based selection
selector = SelectFromModel(RandomForestClassifier(), max_features=10)
X_new = selector.fit_transform(X, y)

Performance Optimization

Inference Optimization (ONNX)

import torch.onnx

# Export PyTorch to ONNX
dummy_input = torch.randn(1, 3, 224, 224)
torch.onnx.export(
    model,
    dummy_input,
    "model.onnx",
    opset_version=11,
    input_names=['input'],
    output_names=['output']
)

# Run with ONNX Runtime
import onnxruntime as ort

session = ort.InferenceSession("model.onnx")
input_name = session.get_inputs()[0].name
output = session.run(None, {input_name: input_data})

TensorRT (NVIDIA GPU)

import tensorrt as trt

# Convert ONNX to TensorRT
logger = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(logger)
network = builder.create_network()
parser = trt.OnnxParser(network, logger)

with open('model.onnx', 'rb') as f:
    parser.parse(f.read())

config = builder.create_builder_config()
config.max_workspace_size = 1 << 30  # 1GB

engine = builder.build_engine(network, config)

# 10x faster inference on GPU

Learning Rate Scheduling

from torch.optim.lr_scheduler import StepLR, CosineAnnealingLR

# Step decay
scheduler = StepLR(optimizer, step_size=30, gamma=0.1)

# Cosine annealing
scheduler = CosineAnnealingLR(optimizer, T_max=100)

# Training loop
for epoch in range(epochs):
    train(model, optimizer)
    scheduler.step()

Early Stopping

class EarlyStopping:
    def __init__(self, patience=7, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

# Usage
early_stopping = EarlyStopping(patience=10)

for epoch in range(epochs):
    train_loss = train(model)
    val_loss = validate(model)

    early_stopping(val_loss)
    if early_stopping.early_stop:
        print("Early stopping triggered")
        break

Best Practices

Start simple: Baseline model first
Profile before optimizing: Find bottlenecks
Measure everything: Track metrics
Trade-offs: Accuracy vs speed vs size
Validate improvements: A/B testing
Automate: Use AutoML for initial exploration

model-optimization

Install Skill

SKILL.md

Model Optimization

Hyperparameter Tuning

Grid Search

Bayesian Optimization

Optuna

Model Compression

Quantization (PyTorch)

Pruning

Knowledge Distillation

AutoML

Auto-sklearn

H2O AutoML

TPOT

Feature Selection

Performance Optimization

Inference Optimization (ONNX)

TensorRT (NVIDIA GPU)

Learning Rate Scheduling

Early Stopping

Best Practices