name	sql-patterns
description	Advanced SQL patterns including window functions, CTEs, recursive queries, and optimization techniques.
allowed-tools	Read, Write, Edit, Grep, Glob, Bash

SQL-Patterns

Name: sql-patterns
Author: majesticlabs-dev

Advanced SQL patterns for data engineering beyond basic SELECT/JOIN.

Common Table Expressions (CTEs)

-- Chain transformations readably
WITH
  active_users AS (
    SELECT user_id, email
    FROM users
    WHERE status = 'active'
  ),
  user_orders AS (
    SELECT u.user_id, COUNT(*) as order_count
    FROM active_users u
    JOIN orders o ON u.user_id = o.user_id
    GROUP BY u.user_id
  )
SELECT * FROM user_orders WHERE order_count > 5;

Window Functions

-- Row numbering within groups
SELECT *,
  ROW_NUMBER() OVER (PARTITION BY category ORDER BY created_at DESC) as rn
FROM products;

-- Running totals
SELECT
  date,
  revenue,
  SUM(revenue) OVER (ORDER BY date) as cumulative_revenue
FROM daily_sales;

-- Percent of total
SELECT
  category,
  sales,
  sales * 100.0 / SUM(sales) OVER () as pct_of_total
FROM category_sales;

-- Lead/Lag for time series
SELECT
  date,
  value,
  LAG(value, 1) OVER (ORDER BY date) as prev_value,
  value - LAG(value, 1) OVER (ORDER BY date) as change
FROM metrics;

-- Ranking with ties
SELECT *,
  RANK() OVER (ORDER BY score DESC) as rank,       -- 1,2,2,4
  DENSE_RANK() OVER (ORDER BY score DESC) as drank -- 1,2,2,3
FROM scores;

Recursive CTEs

-- Hierarchical data (org chart, categories)
WITH RECURSIVE org_tree AS (
  -- Base case: top-level managers
  SELECT id, name, manager_id, 1 as depth
  FROM employees
  WHERE manager_id IS NULL

  UNION ALL

  -- Recursive case: subordinates
  SELECT e.id, e.name, e.manager_id, t.depth + 1
  FROM employees e
  JOIN org_tree t ON e.manager_id = t.id
)
SELECT * FROM org_tree;

-- Generate date series
WITH RECURSIVE dates AS (
  SELECT DATE '2024-01-01' as dt
  UNION ALL
  SELECT dt + INTERVAL '1 day'
  FROM dates
  WHERE dt < DATE '2024-12-31'
)
SELECT * FROM dates;

CASE Expressions

-- Simple CASE
SELECT
  CASE status
    WHEN 'A' THEN 'Active'
    WHEN 'I' THEN 'Inactive'
    ELSE 'Unknown'
  END as status_label
FROM users;

-- Searched CASE for ranges
SELECT
  CASE
    WHEN age < 18 THEN 'Minor'
    WHEN age < 65 THEN 'Adult'
    ELSE 'Senior'
  END as age_group
FROM users;

-- Conditional aggregation
SELECT
  COUNT(*) as total,
  COUNT(*) FILTER (WHERE status = 'active') as active_count,  -- PostgreSQL
  SUM(CASE WHEN status = 'active' THEN 1 ELSE 0 END) as active_count  -- ANSI
FROM users;

UPSERT Patterns

-- PostgreSQL: INSERT ON CONFLICT
INSERT INTO inventory (sku, quantity, updated_at)
VALUES ('ABC123', 100, NOW())
ON CONFLICT (sku) DO UPDATE SET
  quantity = EXCLUDED.quantity,
  updated_at = EXCLUDED.updated_at;

-- MySQL: INSERT ON DUPLICATE KEY
INSERT INTO inventory (sku, quantity, updated_at)
VALUES ('ABC123', 100, NOW())
ON DUPLICATE KEY UPDATE
  quantity = VALUES(quantity),
  updated_at = VALUES(updated_at);

-- SQLite: INSERT OR REPLACE
INSERT OR REPLACE INTO inventory (sku, quantity, updated_at)
VALUES ('ABC123', 100, datetime('now'));

Efficient Pagination

-- BAD: OFFSET for large pages
SELECT * FROM orders ORDER BY id LIMIT 20 OFFSET 10000;

-- GOOD: Keyset pagination
SELECT * FROM orders
WHERE id > 10000  -- last seen id
ORDER BY id
LIMIT 20;

Batch Operations

-- Batch DELETE with limit (avoid long locks)
DELETE FROM logs
WHERE created_at < NOW() - INTERVAL '90 days'
LIMIT 10000;

-- Batch UPDATE
UPDATE orders
SET status = 'archived'
WHERE id IN (
  SELECT id FROM orders
  WHERE status = 'completed'
  AND completed_at < NOW() - INTERVAL '1 year'
  LIMIT 1000
);

Index-Friendly Queries

-- BAD: Function on indexed column
SELECT * FROM users WHERE LOWER(email) = 'test@example.com';

-- GOOD: Store lowercase or use expression index
SELECT * FROM users WHERE email_lower = 'test@example.com';
-- Or: CREATE INDEX idx_email_lower ON users (LOWER(email));

-- BAD: Leading wildcard
SELECT * FROM products WHERE name LIKE '%widget%';

-- GOOD: Full-text search or prefix match
SELECT * FROM products WHERE name LIKE 'widget%';

NULL Handling

-- COALESCE for defaults
SELECT COALESCE(nickname, first_name, 'Anonymous') as display_name
FROM users;

-- NULLIF to convert values to NULL
SELECT NULLIF(status, '') as status  -- empty string -> NULL
FROM records;

-- IS DISTINCT FROM (NULL-safe comparison)
SELECT * FROM a
WHERE a.value IS DISTINCT FROM b.value;  -- treats NULL != NULL as false

LATERAL Joins

-- Top N per group
SELECT d.name, t.product, t.revenue
FROM departments d
CROSS JOIN LATERAL (
  SELECT product, revenue
  FROM sales
  WHERE sales.dept_id = d.id
  ORDER BY revenue DESC
  LIMIT 3
) t;

Materialized Views

-- Create for expensive aggregations
CREATE MATERIALIZED VIEW daily_stats AS
SELECT
  DATE_TRUNC('day', created_at) as date,
  COUNT(*) as total_orders,
  SUM(amount) as revenue
FROM orders
GROUP BY 1;

-- Refresh periodically
REFRESH MATERIALIZED VIEW CONCURRENTLY daily_stats;

Query Optimization Checklist

Check EXPLAIN ANALYZE - Look for sequential scans on large tables
Add missing indexes - Columns in WHERE, JOIN, ORDER BY
**Avoid SELECT *** - Fetch only needed columns
Use EXISTS over IN - For correlated subqueries
Batch large operations - Avoid long-running transactions
Partition large tables - By date or category
Use connection pooling - Avoid connection overhead

sql-patterns

Install Skill

SKILL.md

SQL-Patterns

Common Table Expressions (CTEs)

Window Functions

Recursive CTEs

CASE Expressions

UPSERT Patterns

Efficient Pagination

Batch Operations

Index-Friendly Queries

NULL Handling

LATERAL Joins

Materialized Views

Query Optimization Checklist