Claude Code Plugins

Community-maintained marketplace

Feedback

Linear algebra, calculus, probability, and statistical analysis

Install Skill

1Download skill
2Enable skills in Claude

Open claude.ai/settings/capabilities and find the "Skills" section

3Upload to Claude

Click "Upload skill" and select the downloaded ZIP file

Note: Please verify skill by going through its instructions before using it.

SKILL.md

name statistics-math
description Statistics, probability, linear algebra, and mathematical foundations for data science
sasmp_version 1.3.0
bonded_agent 04-data-scientist
bond_type PRIMARY_BOND
skill_version 2.0.0
last_updated 2025-01
complexity foundational
estimated_mastery_hours 120
prerequisites
unlocks machine-learning, deep-learning, data-engineering

Statistics & Mathematics

Mathematical foundations for data science, machine learning, and statistical analysis.

Quick Start

import numpy as np
import scipy.stats as stats
from sklearn.linear_model import LinearRegression

# Descriptive Statistics
data = np.array([23, 45, 67, 32, 45, 67, 89, 12, 34, 56])
print(f"Mean: {np.mean(data):.2f}")
print(f"Median: {np.median(data):.2f}")
print(f"Std Dev: {np.std(data, ddof=1):.2f}")
print(f"IQR: {np.percentile(data, 75) - np.percentile(data, 25):.2f}")

# Hypothesis Testing
sample_a = [23, 45, 67, 32, 45]
sample_b = [56, 78, 45, 67, 89]
t_stat, p_value = stats.ttest_ind(sample_a, sample_b)
print(f"T-statistic: {t_stat:.4f}, p-value: {p_value:.4f}")

if p_value < 0.05:
    print("Reject null hypothesis: significant difference")
else:
    print("Fail to reject null hypothesis")

Core Concepts

1. Probability Distributions

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

# Normal Distribution
mu, sigma = 100, 15
normal_dist = stats.norm(loc=mu, scale=sigma)
x = np.linspace(50, 150, 100)

# PDF, CDF calculations
print(f"P(X < 85): {normal_dist.cdf(85):.4f}")
print(f"P(X > 115): {1 - normal_dist.cdf(115):.4f}")
print(f"95th percentile: {normal_dist.ppf(0.95):.2f}")

# Binomial Distribution (discrete)
n, p = 100, 0.3
binom_dist = stats.binom(n=n, p=p)
print(f"P(X = 30): {binom_dist.pmf(30):.4f}")
print(f"P(X <= 30): {binom_dist.cdf(30):.4f}")

# Poisson Distribution (events per time)
lambda_param = 5
poisson_dist = stats.poisson(mu=lambda_param)
print(f"P(X = 3): {poisson_dist.pmf(3):.4f}")

# Central Limit Theorem demonstration
population = np.random.exponential(scale=10, size=100000)
sample_means = [np.mean(np.random.choice(population, 30)) for _ in range(1000)]
print(f"Sample means are approximately normal: mean={np.mean(sample_means):.2f}")

2. Hypothesis Testing Framework

from scipy import stats
import numpy as np

class HypothesisTest:
    """Framework for statistical hypothesis testing."""

    @staticmethod
    def two_sample_ttest(group_a, group_b, alpha=0.05):
        """Independent samples t-test."""
        t_stat, p_value = stats.ttest_ind(group_a, group_b)
        effect_size = (np.mean(group_a) - np.mean(group_b)) / np.sqrt(
            (np.var(group_a) + np.var(group_b)) / 2
        )
        return {
            "t_statistic": t_stat,
            "p_value": p_value,
            "significant": p_value < alpha,
            "effect_size_cohens_d": effect_size
        }

    @staticmethod
    def chi_square_test(observed, expected=None, alpha=0.05):
        """Chi-square test for categorical data."""
        if expected is None:
            chi2, p_value, dof, expected = stats.chi2_contingency(observed)
        else:
            chi2, p_value = stats.chisquare(observed, expected)
            dof = len(observed) - 1
        return {
            "chi2_statistic": chi2,
            "p_value": p_value,
            "degrees_of_freedom": dof,
            "significant": p_value < alpha
        }

    @staticmethod
    def ab_test_proportion(conversions_a, total_a, conversions_b, total_b, alpha=0.05):
        """Two-proportion z-test for A/B testing."""
        p_a = conversions_a / total_a
        p_b = conversions_b / total_b
        p_pooled = (conversions_a + conversions_b) / (total_a + total_b)

        se = np.sqrt(p_pooled * (1 - p_pooled) * (1/total_a + 1/total_b))
        z_stat = (p_a - p_b) / se
        p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))

        return {
            "conversion_a": p_a,
            "conversion_b": p_b,
            "lift": (p_b - p_a) / p_a * 100,
            "z_statistic": z_stat,
            "p_value": p_value,
            "significant": p_value < alpha
        }

# Usage
result = HypothesisTest.ab_test_proportion(
    conversions_a=120, total_a=1000,
    conversions_b=150, total_b=1000
)
print(f"Lift: {result['lift']:.1f}%, p-value: {result['p_value']:.4f}")

3. Linear Algebra Essentials

import numpy as np

# Matrix operations
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

# Basic operations
print("Matrix multiplication:", A @ B)
print("Element-wise:", A * B)
print("Transpose:", A.T)
print("Inverse:", np.linalg.inv(A))
print("Determinant:", np.linalg.det(A))

# Eigenvalues and eigenvectors (PCA foundation)
eigenvalues, eigenvectors = np.linalg.eig(A)
print(f"Eigenvalues: {eigenvalues}")

# Singular Value Decomposition (dimensionality reduction)
U, S, Vt = np.linalg.svd(A)
print(f"Singular values: {S}")

# Solving linear systems: Ax = b
b = np.array([5, 11])
x = np.linalg.solve(A, b)
print(f"Solution: {x}")

# Cosine similarity (NLP, recommendations)
def cosine_similarity(v1, v2):
    return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))

vec1 = np.array([1, 2, 3])
vec2 = np.array([4, 5, 6])
print(f"Cosine similarity: {cosine_similarity(vec1, vec2):.4f}")

4. Regression Analysis

import numpy as np
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score, mean_squared_error
import statsmodels.api as sm

# Multiple Linear Regression with statsmodels
X = np.random.randn(100, 3)
y = 2*X[:, 0] + 3*X[:, 1] - X[:, 2] + np.random.randn(100)*0.5

X_with_const = sm.add_constant(X)
model = sm.OLS(y, X_with_const).fit()

print(model.summary())
print(f"R-squared: {model.rsquared:.4f}")
print(f"Coefficients: {model.params}")
print(f"P-values: {model.pvalues}")

# Regularization comparison
X_train, y_train = X[:80], y[:80]
X_test, y_test = X[80:], y[80:]

models = {
    "OLS": LinearRegression(),
    "Ridge": Ridge(alpha=1.0),
    "Lasso": Lasso(alpha=0.1)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{name}: R²={r2_score(y_test, y_pred):.4f}, RMSE={np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")

Tools & Technologies

Tool Purpose Version (2025)
NumPy Numerical computing 1.26+
SciPy Scientific computing 1.12+
pandas Data manipulation 2.2+
statsmodels Statistical models 0.14+
scikit-learn ML algorithms 1.4+

Troubleshooting Guide

Issue Symptoms Root Cause Fix
Low p-value, small effect Significant but meaningless Large sample size Check effect size
High variance Unstable estimates Small sample, outliers More data, robust methods
Multicollinearity Inflated coefficients Correlated features VIF check, remove features
Heteroscedasticity Invalid inference Non-constant variance Weighted least squares

Best Practices

# ✅ DO: Check assumptions before testing
from scipy.stats import shapiro
stat, p = shapiro(data)
if p > 0.05:
    print("Data is approximately normal")

# ✅ DO: Use effect sizes, not just p-values
# ✅ DO: Correct for multiple comparisons (Bonferroni)
# ✅ DO: Report confidence intervals

# ❌ DON'T: p-hack by trying many tests
# ❌ DON'T: Confuse correlation with causation
# ❌ DON'T: Ignore sample size requirements

Resources


Skill Certification Checklist:

  • Can calculate descriptive statistics
  • Can perform hypothesis tests (t-test, chi-square)
  • Can implement A/B testing
  • Can perform regression analysis
  • Can use matrix operations for ML