Claude Code Plugins

Community-maintained marketplace

Feedback

Quantization, pruning, AutoML, hyperparameter tuning, and performance optimization. Use for improving model performance, reducing size, or automated ML.

Install Skill

1Download skill
2Enable skills in Claude

Open claude.ai/settings/capabilities and find the "Skills" section

3Upload to Claude

Click "Upload skill" and select the downloaded ZIP file

Note: Please verify skill by going through its instructions before using it.

SKILL.md

name model-optimization
description Quantization, pruning, AutoML, hyperparameter tuning, and performance optimization. Use for improving model performance, reducing size, or automated ML.
sasmp_version 1.3.0
bonded_agent 06-mlops-deployment
bond_type SECONDARY_BOND

Model Optimization

Optimize models for better performance, efficiency, and faster inference.

Hyperparameter Tuning

Grid Search

from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 10, 15],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(
    RandomForestClassifier(),
    param_grid,
    cv=5,
    scoring='f1_weighted',
    n_jobs=-1
)

grid_search.fit(X_train, y_train)
print(f"Best params: {grid_search.best_params_}")
print(f"Best score: {grid_search.best_score_:.3f}")

Bayesian Optimization

from skopt import BayesSearchCV

param_space = {
    'n_estimators': (100, 500),
    'max_depth': (5, 50),
    'learning_rate': (0.01, 0.3, 'log-uniform')
}

bayes_search = BayesSearchCV(
    xgb.XGBClassifier(),
    param_space,
    n_iter=50,
    cv=5,
    scoring='f1_weighted'
)

bayes_search.fit(X_train, y_train)

Optuna

import optuna

def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3)
    }

    model = xgb.XGBClassifier(**params)
    score = cross_val_score(model, X_train, y_train,
                           cv=5, scoring='f1').mean()
    return score

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

print(f"Best params: {study.best_params}")
print(f"Best score: {study.best_value:.3f}")

Model Compression

Quantization (PyTorch)

import torch

# Post-training dynamic quantization
model_fp32 = MyModel()
model_int8 = torch.quantization.quantize_dynamic(
    model_fp32,
    {torch.nn.Linear},
    dtype=torch.qint8
)

# 4x smaller model, 2-4x faster inference

# Quantization-aware training
model = MyModel()
model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
model_prepared = torch.quantization.prepare_qat(model)

# Train
for epoch in range(epochs):
    train(model_prepared)

model_quantized = torch.quantization.convert(model_prepared)

Pruning

import torch.nn.utils.prune as prune

# Global unstructured pruning
parameters_to_prune = [
    (module, 'weight') for module in model.modules()
    if isinstance(module, torch.nn.Linear)
]

prune.global_unstructured(
    parameters_to_prune,
    pruning_method=prune.L1Unstructured,
    amount=0.2  # Remove 20% of weights
)

# Remove pruning reparametrization
for module, _ in parameters_to_prune:
    prune.remove(module, 'weight')

Knowledge Distillation

import torch.nn.functional as F

def distillation_loss(student_logits, teacher_logits, labels, T=3.0, alpha=0.5):
    """
    Distillation loss: combination of soft targets from teacher
    and hard targets from ground truth
    """
    # Soft targets (knowledge from teacher)
    soft_targets = F.softmax(teacher_logits / T, dim=1)
    soft_prob = F.log_softmax(student_logits / T, dim=1)
    soft_loss = F.kl_div(soft_prob, soft_targets, reduction='batchmean') * (T ** 2)

    # Hard targets (ground truth)
    hard_loss = F.cross_entropy(student_logits, labels)

    # Combined loss
    return alpha * soft_loss + (1 - alpha) * hard_loss

# Train student model
teacher_model.eval()
student_model.train()

for images, labels in train_loader:
    with torch.no_grad():
        teacher_logits = teacher_model(images)

    student_logits = student_model(images)
    loss = distillation_loss(student_logits, teacher_logits, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

AutoML

Auto-sklearn

import autosklearn.classification

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=3600,  # 1 hour
    per_run_time_limit=300,
    memory_limit=3072
)

automl.fit(X_train, y_train)
predictions = automl.predict(X_test)

print(automl.leaderboard())
print(automl.show_models())

H2O AutoML

import h2o
from h2o.automl import H2OAutoML

h2o.init()

train = h2o.H2OFrame(pd.concat([X_train, y_train], axis=1))
test = h2o.H2OFrame(pd.concat([X_test, y_test], axis=1))

aml = H2OAutoML(max_runtime_secs=3600, max_models=20)
aml.train(x=X_train.columns.tolist(), y='target',
         training_frame=train)

# Leaderboard
lb = aml.leaderboard
print(lb)

# Best model
best_model = aml.leader
predictions = best_model.predict(test)

TPOT

from tpot import TPOTClassifier

tpot = TPOTClassifier(
    generations=5,
    population_size=50,
    verbosity=2,
    random_state=42,
    n_jobs=-1
)

tpot.fit(X_train, y_train)
print(f"Score: {tpot.score(X_test, y_test):.3f}")

# Export best pipeline
tpot.export('best_pipeline.py')

Feature Selection

from sklearn.feature_selection import (
    SelectKBest, f_classif, RFE, SelectFromModel
)

# Univariate selection
selector = SelectKBest(f_classif, k=10)
X_new = selector.fit_transform(X, y)

# Recursive Feature Elimination
estimator = RandomForestClassifier()
rfe = RFE(estimator, n_features_to_select=10)
X_new = rfe.fit_transform(X, y)

# Model-based selection
selector = SelectFromModel(RandomForestClassifier(), max_features=10)
X_new = selector.fit_transform(X, y)

Performance Optimization

Inference Optimization (ONNX)

import torch.onnx

# Export PyTorch to ONNX
dummy_input = torch.randn(1, 3, 224, 224)
torch.onnx.export(
    model,
    dummy_input,
    "model.onnx",
    opset_version=11,
    input_names=['input'],
    output_names=['output']
)

# Run with ONNX Runtime
import onnxruntime as ort

session = ort.InferenceSession("model.onnx")
input_name = session.get_inputs()[0].name
output = session.run(None, {input_name: input_data})

TensorRT (NVIDIA GPU)

import tensorrt as trt

# Convert ONNX to TensorRT
logger = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(logger)
network = builder.create_network()
parser = trt.OnnxParser(network, logger)

with open('model.onnx', 'rb') as f:
    parser.parse(f.read())

config = builder.create_builder_config()
config.max_workspace_size = 1 << 30  # 1GB

engine = builder.build_engine(network, config)

# 10x faster inference on GPU

Learning Rate Scheduling

from torch.optim.lr_scheduler import StepLR, CosineAnnealingLR

# Step decay
scheduler = StepLR(optimizer, step_size=30, gamma=0.1)

# Cosine annealing
scheduler = CosineAnnealingLR(optimizer, T_max=100)

# Training loop
for epoch in range(epochs):
    train(model, optimizer)
    scheduler.step()

Early Stopping

class EarlyStopping:
    def __init__(self, patience=7, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

# Usage
early_stopping = EarlyStopping(patience=10)

for epoch in range(epochs):
    train_loss = train(model)
    val_loss = validate(model)

    early_stopping(val_loss)
    if early_stopping.early_stop:
        print("Early stopping triggered")
        break

Best Practices

  1. Start simple: Baseline model first
  2. Profile before optimizing: Find bottlenecks
  3. Measure everything: Track metrics
  4. Trade-offs: Accuracy vs speed vs size
  5. Validate improvements: A/B testing
  6. Automate: Use AutoML for initial exploration