Claude Code Plugins

Community-maintained marketplace

Feedback

data-integrity-auditor

@patricio0312rev/skillset
2
0

Detects data integrity issues including orphaned records, broken foreign key relationships, constraint violations, and provides automated fix migrations. Use for "data integrity", "orphaned records", "broken relationships", or "data quality".

Install Skill

1Download skill
2Enable skills in Claude

Open claude.ai/settings/capabilities and find the "Skills" section

3Upload to Claude

Click "Upload skill" and select the downloaded ZIP file

Note: Please verify skill by going through its instructions before using it.

SKILL.md

name data-integrity-auditor
description Detects data integrity issues including orphaned records, broken foreign key relationships, constraint violations, and provides automated fix migrations. Use for "data integrity", "orphaned records", "broken relationships", or "data quality".

Data Integrity Auditor

Detect and fix data integrity issues automatically.

Integrity Check Types

1. Orphaned Records

-- Find orphaned orders (no matching user)
SELECT o.id, o.user_id
FROM orders o
LEFT JOIN users u ON u.id = o.user_id
WHERE u.id IS NULL;

-- Find orphaned order items (no matching order)
SELECT oi.id, oi.order_id
FROM order_items oi
LEFT JOIN orders o ON o.id = oi.order_id
WHERE o.id IS NULL;

2. Broken Foreign Keys

// scripts/check-foreign-keys.ts
async function checkForeignKeys() {
  const issues: string[] = [];

  // Orders → Users
  const orphanedOrders = await prisma.$queryRaw<any[]>`
    SELECT o.id, o.user_id
    FROM orders o
    LEFT JOIN users u ON u.id = o.user_id
    WHERE u.id IS NULL
    LIMIT 100
  `;

  if (orphanedOrders.length > 0) {
    issues.push(
      `❌ Found ${orphanedOrders.length} orders with invalid user_id`
    );
    console.log(
      "  Sample IDs:",
      orphanedOrders.slice(0, 5).map((o) => o.id)
    );
  }

  // Order Items → Orders
  const orphanedItems = await prisma.$queryRaw<any[]>`
    SELECT oi.id, oi.order_id
    FROM order_items oi
    LEFT JOIN orders o ON o.id = oi.order_id
    WHERE o.id IS NULL
    LIMIT 100
  `;

  if (orphanedItems.length > 0) {
    issues.push(
      `❌ Found ${orphanedItems.length} order items with invalid order_id`
    );
  }

  // Products → Categories
  const orphanedProducts = await prisma.$queryRaw<any[]>`
    SELECT p.id, p.category_id
    FROM products p
    LEFT JOIN categories c ON c.id = p.category_id
    WHERE p.category_id IS NOT NULL
    AND c.id IS NULL
    LIMIT 100
  `;

  if (orphanedProducts.length > 0) {
    issues.push(
      `❌ Found ${orphanedProducts.length} products with invalid category_id`
    );
  }

  return issues;
}

3. Constraint Violations

async function checkConstraints() {
  const issues: string[] = [];

  // Check email uniqueness (should be caught by DB, but verify)
  const duplicateEmails = await prisma.$queryRaw<any[]>`
    SELECT email, COUNT(*) as count
    FROM users
    GROUP BY email
    HAVING COUNT(*) > 1
  `;

  if (duplicateEmails.length > 0) {
    issues.push(`❌ Found ${duplicateEmails.length} duplicate emails`);
  }

  // Check negative quantities
  const negativeStock = await prisma.$queryRaw<any[]>`
    SELECT id, name, stock
    FROM products
    WHERE stock < 0
  `;

  if (negativeStock.length > 0) {
    issues.push(
      `❌ Found ${negativeStock.length} products with negative stock`
    );
  }

  // Check negative prices
  const negativePrices = await prisma.$queryRaw<any[]>`
    SELECT id, name, price
    FROM products
    WHERE price < 0
  `;

  if (negativePrices.length > 0) {
    issues.push(
      `❌ Found ${negativePrices.length} products with negative prices`
    );
  }

  // Check invalid order status
  const invalidStatus = await prisma.$queryRaw<any[]>`
    SELECT id, status
    FROM orders
    WHERE status NOT IN ('pending', 'paid', 'shipped', 'delivered', 'cancelled')
  `;

  if (invalidStatus.length > 0) {
    issues.push(`❌ Found ${invalidStatus.length} orders with invalid status`);
  }

  return issues;
}

4. Missing Required Fields

async function checkMissingFields() {
  const issues: string[] = [];

  // Users missing required fields
  const usersNoEmail = await prisma.user.count({
    where: { email: null },
  });

  if (usersNoEmail > 0) {
    issues.push(`❌ Found ${usersNoEmail} users without email`);
  }

  // Orders with NULL totals
  const ordersNoTotal = await prisma.order.count({
    where: { total: null },
  });

  if (ordersNoTotal > 0) {
    issues.push(`❌ Found ${ordersNoTotal} orders without total`);
  }

  return issues;
}

Comprehensive Audit Script

// scripts/audit-data-integrity.ts
interface IntegrityIssue {
  severity: "critical" | "warning" | "info";
  category: string;
  message: string;
  count: number;
  query?: string;
  fix?: string;
}

async function auditDataIntegrity(): Promise<IntegrityIssue[]> {
  const issues: IntegrityIssue[] = [];

  console.log("🔍 Auditing data integrity...\n");

  // 1. Check orphaned records
  const orphanedOrders = await prisma.$queryRaw<any[]>`
    SELECT COUNT(*) as count FROM orders o
    LEFT JOIN users u ON u.id = o.user_id
    WHERE u.id IS NULL
  `;

  if (orphanedOrders[0].count > 0) {
    issues.push({
      severity: "critical",
      category: "orphaned-records",
      message: "Orders with invalid user references",
      count: orphanedOrders[0].count,
      query:
        "SELECT id, user_id FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL",
      fix: "DELETE FROM orders WHERE id IN (...)",
    });
  }

  // 2. Check duplicate unique constraints
  const duplicateEmails = await prisma.$queryRaw<any[]>`
    SELECT email, COUNT(*) as count
    FROM users
    GROUP BY email
    HAVING COUNT(*) > 1
  `;

  if (duplicateEmails.length > 0) {
    issues.push({
      severity: "critical",
      category: "constraint-violation",
      message: "Duplicate email addresses",
      count: duplicateEmails.length,
      fix: "Keep newest record, delete duplicates",
    });
  }

  // 3. Check data inconsistencies
  const invalidPrices = await prisma.$queryRaw<any[]>`
    SELECT COUNT(*) as count FROM products WHERE price < 0
  `;

  if (invalidPrices[0].count > 0) {
    issues.push({
      severity: "warning",
      category: "data-quality",
      message: "Products with negative prices",
      count: invalidPrices[0].count,
      fix: "UPDATE products SET price = ABS(price) WHERE price < 0",
    });
  }

  // 4. Check referential integrity
  const brokenOrderItems = await prisma.$queryRaw<any[]>`
    SELECT COUNT(*) as count FROM order_items oi
    LEFT JOIN orders o ON o.id = oi.order_id
    WHERE o.id IS NULL
  `;

  if (brokenOrderItems[0].count > 0) {
    issues.push({
      severity: "critical",
      category: "referential-integrity",
      message: "Order items referencing non-existent orders",
      count: brokenOrderItems[0].count,
      fix: "DELETE FROM order_items WHERE order_id NOT IN (SELECT id FROM orders)",
    });
  }

  return issues;
}

async function generateReport() {
  const issues = await auditDataIntegrity();

  console.log("\n📊 Data Integrity Report\n");
  console.log(`Total issues: ${issues.length}\n`);

  const grouped = issues.reduce((acc, issue) => {
    if (!acc[issue.severity]) acc[issue.severity] = [];
    acc[issue.severity].push(issue);
    return acc;
  }, {} as Record<string, IntegrityIssue[]>);

  (["critical", "warning", "info"] as const).forEach((severity) => {
    const items = grouped[severity] || [];
    if (items.length === 0) return;

    console.log(`\n${severity.toUpperCase()} (${items.length})\n`);

    items.forEach((issue, i) => {
      console.log(`${i + 1}. [${issue.category}] ${issue.message}`);
      console.log(`   Count: ${issue.count}`);
      if (issue.query) {
        console.log(`   Query: ${issue.query.substring(0, 80)}...`);
      }
      if (issue.fix) {
        console.log(`   Fix: ${issue.fix}`);
      }
      console.log();
    });
  });

  // Exit with error if critical issues
  process.exit(grouped.critical?.length > 0 ? 1 : 0);
}

generateReport();

Automated Fixes

// scripts/fix-integrity-issues.ts
async function fixOrphanedRecords() {
  console.log("🔧 Fixing orphaned records...\n");

  // Delete orphaned orders
  const deletedOrders = await prisma.$executeRaw`
    DELETE FROM orders
    WHERE id IN (
      SELECT o.id FROM orders o
      LEFT JOIN users u ON u.id = o.user_id
      WHERE u.id IS NULL
    )
  `;
  console.log(`✅ Deleted ${deletedOrders} orphaned orders`);

  // Delete orphaned order items
  const deletedItems = await prisma.$executeRaw`
    DELETE FROM order_items
    WHERE id IN (
      SELECT oi.id FROM order_items oi
      LEFT JOIN orders o ON o.id = oi.order_id
      WHERE o.id IS NULL
    )
  `;
  console.log(`✅ Deleted ${deletedItems} orphaned order items`);
}

async function fixDuplicates() {
  console.log("🔧 Fixing duplicate records...\n");

  // Keep newest user, delete old duplicates
  await prisma.$executeRaw`
    DELETE FROM users
    WHERE id IN (
      SELECT id FROM (
        SELECT id, 
               ROW_NUMBER() OVER (PARTITION BY email ORDER BY created_at DESC) as rn
        FROM users
      ) t
      WHERE rn > 1
    )
  `;
  console.log(`✅ Fixed duplicate emails`);
}

async function fixConstraintViolations() {
  console.log("🔧 Fixing constraint violations...\n");

  // Fix negative prices
  const fixedPrices = await prisma.$executeRaw`
    UPDATE products
    SET price = ABS(price)
    WHERE price < 0
  `;
  console.log(`✅ Fixed ${fixedPrices} negative prices`);

  // Fix negative stock
  const fixedStock = await prisma.$executeRaw`
    UPDATE products
    SET stock = 0
    WHERE stock < 0
  `;
  console.log(`✅ Fixed ${fixedStock} negative stock values`);
}

Prevention: Add Missing Constraints

-- Migration to add missing constraints

-- 1. Add foreign key constraints
ALTER TABLE orders
  ADD CONSTRAINT fk_orders_user_id
  FOREIGN KEY (user_id) REFERENCES users(id)
  ON DELETE CASCADE;

ALTER TABLE order_items
  ADD CONSTRAINT fk_order_items_order_id
  FOREIGN KEY (order_id) REFERENCES orders(id)
  ON DELETE CASCADE;

-- 2. Add check constraints
ALTER TABLE products
  ADD CONSTRAINT chk_products_price_positive
  CHECK (price >= 0);

ALTER TABLE products
  ADD CONSTRAINT chk_products_stock_non_negative
  CHECK (stock >= 0);

-- 3. Add unique constraints
CREATE UNIQUE INDEX idx_users_email_unique
  ON users(LOWER(email));

-- 4. Add NOT NULL constraints
ALTER TABLE users
  ALTER COLUMN email SET NOT NULL;

ALTER TABLE orders
  ALTER COLUMN total SET NOT NULL;

Automated Testing

// tests/data-integrity.test.ts
describe("Data Integrity", () => {
  it("should not allow orphaned orders", async () => {
    // Try to create order with non-existent user
    await expect(
      prisma.order.create({
        data: {
          userId: 99999, // Non-existent
          total: 100,
          status: "pending",
        },
      })
    ).rejects.toThrow("Foreign key constraint");
  });

  it("should not allow negative prices", async () => {
    await expect(
      prisma.product.create({
        data: {
          name: "Test",
          price: -10, // Invalid
          stock: 100,
        },
      })
    ).rejects.toThrow("Check constraint");
  });

  it("should not allow duplicate emails", async () => {
    await prisma.user.create({
      data: { email: "test@example.com", name: "Test" },
    });

    await expect(
      prisma.user.create({
        data: { email: "test@example.com", name: "Test 2" },
      })
    ).rejects.toThrow("Unique constraint");
  });
});

Monitoring Dashboard

// Monitor data quality metrics
async function getDataQualityMetrics() {
  return {
    orphanedOrders: await prisma.$queryRaw`
      SELECT COUNT(*) FROM orders o
      LEFT JOIN users u ON u.id = o.user_id
      WHERE u.id IS NULL
    `,
    duplicateEmails: await prisma.$queryRaw`
      SELECT COUNT(*) FROM (
        SELECT email FROM users GROUP BY email HAVING COUNT(*) > 1
      ) t
    `,
    invalidPrices: await prisma.$queryRaw`
      SELECT COUNT(*) FROM products WHERE price < 0
    `,
    missingData: await prisma.$queryRaw`
      SELECT 
        SUM(CASE WHEN email IS NULL THEN 1 ELSE 0 END) as users_no_email,
        SUM(CASE WHEN total IS NULL THEN 1 ELSE 0 END) as orders_no_total
      FROM users
      CROSS JOIN orders
    `,
  };
}

Best Practices

  1. Add constraints: Prevent issues at database level
  2. Regular audits: Weekly integrity checks
  3. Automated fixes: Safe, reversible repairs
  4. Monitor metrics: Track data quality over time
  5. Test constraints: Ensure they work
  6. Soft deletes: Easier recovery
  7. Backup before fixes: Always

Output Checklist

  • Orphaned record detection
  • Foreign key integrity checks
  • Constraint violation detection
  • Missing field checks
  • Automated audit script
  • Fix scripts (with dry-run)
  • Prevention migrations (add constraints)
  • Automated tests
  • Monitoring dashboard
  • Regular audit schedule