Claude Code Plugins

Community-maintained marketplace

Feedback

Data warehouse design mastery with star schema, dimensional modeling, fact/dimension tables, slowly changing dimensions, and enterprise best practices. Complete schema examples included.

Install Skill

1Download skill
2Enable skills in Claude

Open claude.ai/settings/capabilities and find the "Skills" section

3Upload to Claude

Click "Upload skill" and select the downloaded ZIP file

Note: Please verify skill by going through its instructions before using it.

SKILL.md

name data-warehouse
description Data warehouse design mastery with star schema, dimensional modeling, fact/dimension tables, slowly changing dimensions, and enterprise best practices. Complete schema examples included.
sasmp_version 1.3.0
bonded_agent 06-data-engineer
bond_type PRIMARY_BOND

Data Warehouse Design

Star Schema Basics

Fact Table Design

-- Star schema with sales fact table
CREATE TABLE fact_sales (
  sales_id BIGINT PRIMARY KEY,
  date_id INT NOT NULL,
  customer_id INT NOT NULL,
  product_id INT NOT NULL,
  store_id INT NOT NULL,
  quantity INT NOT NULL,
  unit_price DECIMAL(10, 2),
  sale_amount DECIMAL(12, 2),
  discount_amount DECIMAL(12, 2),
  net_sales DECIMAL(12, 2),
  tax_amount DECIMAL(12, 2),
  total_sale DECIMAL(12, 2),

  -- Foreign keys
  FOREIGN KEY (date_id) REFERENCES dim_date(date_id),
  FOREIGN KEY (customer_id) REFERENCES dim_customer(customer_id),
  FOREIGN KEY (product_id) REFERENCES dim_product(product_id),
  FOREIGN KEY (store_id) REFERENCES dim_store(store_id)
);

-- Create indexes on foreign keys for query performance
CREATE INDEX idx_fact_sales_date ON fact_sales(date_id);
CREATE INDEX idx_fact_sales_customer ON fact_sales(customer_id);
CREATE INDEX idx_fact_sales_product ON fact_sales(product_id);
CREATE INDEX idx_fact_sales_store ON fact_sales(store_id);

Dimension Table Design

-- Date dimension (conformed dimension - used across multiple facts)
CREATE TABLE dim_date (
  date_id INT PRIMARY KEY,
  full_date DATE UNIQUE,
  day_of_week INT,
  day_of_week_name VARCHAR(10),
  day_of_month INT,
  week_of_year INT,
  month_number INT,
  month_name VARCHAR(12),
  quarter INT,
  year INT,
  fiscal_quarter INT,
  fiscal_year INT,
  is_holiday BOOLEAN,
  is_weekend BOOLEAN,
  is_weekday BOOLEAN
);

-- Customer dimension
CREATE TABLE dim_customer (
  customer_id INT PRIMARY KEY,
  customer_code VARCHAR(20),
  first_name VARCHAR(50),
  last_name VARCHAR(50),
  email VARCHAR(100),
  phone VARCHAR(20),
  gender VARCHAR(10),
  birth_date DATE,
  -- Address hierarchy
  street_address VARCHAR(100),
  city VARCHAR(50),
  state_province VARCHAR(50),
  postal_code VARCHAR(10),
  country VARCHAR(50),
  region VARCHAR(50),
  -- Customer segment
  customer_segment VARCHAR(50),
  customer_lifetime_value DECIMAL(12, 2),
  -- Slowly changing dimension columns
  effective_date DATE,
  end_date DATE,
  is_current BOOLEAN,
  -- Audit columns
  created_date DATE,
  updated_date DATE
);

-- Product dimension
CREATE TABLE dim_product (
  product_id INT PRIMARY KEY,
  product_code VARCHAR(50),
  product_name VARCHAR(200),
  product_category VARCHAR(50),
  product_subcategory VARCHAR(50),
  product_line VARCHAR(50),
  supplier_id INT,
  brand VARCHAR(50),
  model VARCHAR(100),
  color VARCHAR(30),
  size VARCHAR(10),
  unit_cost DECIMAL(10, 2),
  list_price DECIMAL(10, 2),
  cost_to_list_ratio DECIMAL(5, 4),
  product_status VARCHAR(20),
  effective_date DATE,
  end_date DATE,
  is_current BOOLEAN
);

Slowly Changing Dimensions (SCD)

Type 1: Overwrite

-- Simply update the existing record
UPDATE dim_customer
SET
  email = 'new_email@example.com',
  phone = '555-9999',
  updated_date = CURRENT_DATE
WHERE customer_id = 1;

Type 2: Add New Row

-- Close old row
UPDATE dim_customer
SET
  is_current = FALSE,
  end_date = CURRENT_DATE - INTERVAL '1 day'
WHERE customer_id = 1 AND is_current = TRUE;

-- Insert new row
INSERT INTO dim_customer
VALUES (
  customer_id,
  customer_code,
  new_values...,
  CURRENT_DATE,  -- effective_date
  NULL,          -- end_date
  TRUE,          -- is_current
  CURRENT_DATE   -- created_date
);

Type 3: Add New Column

-- Add previous value columns
ALTER TABLE dim_customer ADD COLUMN previous_city VARCHAR(50);
ALTER TABLE dim_customer ADD COLUMN previous_city_start_date DATE;

-- Update previous columns when changing current
UPDATE dim_customer
SET
  previous_city = city,
  previous_city_start_date = CURRENT_DATE,
  city = 'New York'
WHERE customer_id = 1;

Conformed Dimensions

-- Single dim_date used across all fact tables
SELECT
  f.sales_id,
  f.quantity * f.unit_price as revenue,
  d.month_name,
  d.year
FROM fact_sales f
JOIN dim_date d ON f.date_id = d.date_id;

-- Reuse dim_customer in multiple facts
SELECT
  fs.sales_id,
  fc.call_id,
  c.customer_segment
FROM fact_sales fs
JOIN dim_customer c ON fs.customer_id = c.customer_id
LEFT JOIN fact_customer_calls fc ON c.customer_id = fc.customer_id;

Aggregate Tables (Materialized Views)

-- Pre-calculate common aggregations for performance
CREATE MATERIALIZED VIEW sales_summary_daily AS
SELECT
  d.full_date,
  d.month_name,
  d.year,
  p.product_category,
  c.customer_segment,
  COUNT(DISTINCT fs.sales_id) as transaction_count,
  SUM(fs.quantity) as total_quantity,
  ROUND(SUM(fs.net_sales), 2) as total_sales,
  ROUND(AVG(fs.net_sales), 2) as avg_sale,
  COUNT(DISTINCT fs.customer_id) as unique_customers
FROM fact_sales fs
JOIN dim_date d ON fs.date_id = d.date_id
JOIN dim_product p ON fs.product_id = p.product_id
JOIN dim_customer c ON fs.customer_id = c.customer_id
GROUP BY d.full_date, d.month_name, d.year, p.product_category, c.customer_segment;

-- Refresh materialized view
REFRESH MATERIALIZED VIEW sales_summary_daily;

-- Query aggregate table instead of fact table
SELECT
  month_name,
  product_category,
  SUM(total_sales) as monthly_sales
FROM sales_summary_daily
WHERE year = 2024
GROUP BY month_name, product_category;

Bridge Tables (Many-to-Many)

-- For many-to-many relationships (e.g., product to categories)
CREATE TABLE bridge_product_category (
  product_id INT,
  category_id INT,
  PRIMARY KEY (product_id, category_id),
  FOREIGN KEY (product_id) REFERENCES dim_product(product_id),
  FOREIGN KEY (category_id) REFERENCES dim_category(category_id)
);

-- Query with bridge table
SELECT
  p.product_name,
  STRING_AGG(DISTINCT c.category_name, ', ') as categories,
  COUNT(DISTINCT bc.category_id) as category_count
FROM dim_product p
LEFT JOIN bridge_product_category bc ON p.product_id = bc.product_id
LEFT JOIN dim_category c ON bc.category_id = c.category_id
GROUP BY p.product_id, p.product_name;

Data Quality Metrics

-- Monitor fact table metrics
SELECT
  COUNT(*) as total_records,
  COUNT(DISTINCT customer_id) as unique_customers,
  COUNT(DISTINCT product_id) as unique_products,
  MIN(sale_amount) as min_sale,
  MAX(sale_amount) as max_sale,
  ROUND(AVG(sale_amount), 2) as avg_sale,
  COUNT(CASE WHEN sale_amount < 0 THEN 1 END) as negative_sales,
  COUNT(CASE WHEN sale_amount IS NULL THEN 1 END) as null_sales,
  MAX(load_timestamp) as last_load_time
FROM fact_sales;

-- Dimension quality checks
SELECT
  'dim_customer' as dimension,
  COUNT(*) as total_records,
  COUNT(DISTINCT customer_id) as distinct_ids,
  COUNT(CASE WHEN first_name IS NULL THEN 1 END) as null_first_names,
  COUNT(CASE WHEN is_current = TRUE THEN 1 END) as current_records
FROM dim_customer;

Next Steps

Learn ETL/ELT pipeline design and data transformation patterns in the etl-pipelines skill.