Database Expert
Comprehensive guide for database design, optimization, and administration.
Database Selection
| Database |
Type |
Best For |
| PostgreSQL |
Relational |
Complex queries, ACID, JSON support |
| MongoDB |
Document |
Flexible schemas, rapid iteration |
| Redis |
Key-Value |
Caching, sessions, real-time |
| SQLite |
Embedded |
Mobile, desktop, testing |
PostgreSQL
Schema Design
-- Users table with proper constraints
CREATE TABLE users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
email VARCHAR(255) NOT NULL UNIQUE,
password_hash VARCHAR(255) NOT NULL,
full_name VARCHAR(100) NOT NULL,
status VARCHAR(20) NOT NULL DEFAULT 'active'
CHECK (status IN ('active', 'inactive', 'suspended')),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Index for common queries
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_users_status ON users(status) WHERE status = 'active';
-- Automatic updated_at trigger
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER update_users_updated_at
BEFORE UPDATE ON users
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- Posts with full-text search
CREATE TABLE posts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
author_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
title VARCHAR(255) NOT NULL,
content TEXT NOT NULL,
tags TEXT[] DEFAULT '{}',
search_vector TSVECTOR,
published_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Full-text search index
CREATE INDEX idx_posts_search ON posts USING GIN(search_vector);
Query Optimization
-- EXPLAIN ANALYZE for query analysis
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT u.full_name, COUNT(p.id) as post_count
FROM users u
LEFT JOIN posts p ON p.author_id = u.id
WHERE u.status = 'active'
GROUP BY u.id
ORDER BY post_count DESC
LIMIT 10;
-- Partial index for filtered queries
CREATE INDEX idx_posts_published
ON posts(published_at)
WHERE published_at IS NOT NULL;
-- Covering index (index-only scan)
CREATE INDEX idx_posts_author_title
ON posts(author_id)
INCLUDE (title, published_at);
-- Expression index
CREATE INDEX idx_users_email_lower
ON users(LOWER(email));
Advanced Features
-- Common Table Expression (CTE)
WITH active_authors AS (
SELECT DISTINCT author_id
FROM posts
WHERE published_at > NOW() - INTERVAL '30 days'
),
author_stats AS (
SELECT
u.id,
u.full_name,
COUNT(p.id) as total_posts
FROM users u
JOIN active_authors aa ON aa.author_id = u.id
LEFT JOIN posts p ON p.author_id = u.id
GROUP BY u.id
)
SELECT * FROM author_stats
ORDER BY total_posts DESC;
-- Window functions
SELECT
id,
title,
author_id,
published_at,
ROW_NUMBER() OVER (PARTITION BY author_id ORDER BY published_at DESC) as author_rank,
LAG(published_at) OVER (PARTITION BY author_id ORDER BY published_at) as prev_post_date
FROM posts
WHERE published_at IS NOT NULL;
-- JSON operations
SELECT
id,
metadata->>'source' as source,
metadata->'stats'->>'views' as views
FROM posts
WHERE metadata @> '{"featured": true}';
-- Recursive CTE (for hierarchies)
WITH RECURSIVE category_tree AS (
SELECT id, name, parent_id, 0 as depth
FROM categories
WHERE parent_id IS NULL
UNION ALL
SELECT c.id, c.name, c.parent_id, ct.depth + 1
FROM categories c
JOIN category_tree ct ON c.parent_id = ct.id
WHERE ct.depth < 10
)
SELECT * FROM category_tree ORDER BY depth;
MongoDB
Schema Design Patterns
// Embedding (for 1:few relationships)
const userSchema = {
_id: ObjectId,
email: String,
profile: {
firstName: String,
lastName: String,
avatar: String,
},
addresses: [
{
type: String,
street: String,
city: String,
}
],
};
// Referencing (for 1:many relationships)
const postSchema = {
_id: ObjectId,
authorId: ObjectId,
title: String,
content: String,
tags: [String],
};
Aggregation Pipeline
// Complex aggregation
db.orders.aggregate([
{
$match: {
status: "completed",
createdAt: { $gte: ISODate("2024-01-01") }
}
},
{
$lookup: {
from: "users",
localField: "userId",
foreignField: "_id",
as: "user"
}
},
{ $unwind: "$user" },
{
$group: {
_id: { month: { $month: "$createdAt" } },
totalRevenue: { $sum: "$total" },
orderCount: { $sum: 1 }
}
},
{ $sort: { totalRevenue: -1 } }
]);
Indexes
// Compound index
db.posts.createIndex({ authorId: 1, createdAt: -1 });
// Text index
db.posts.createIndex(
{ title: "text", content: "text" },
{ weights: { title: 10, content: 1 } }
);
// Partial index
db.orders.createIndex(
{ createdAt: 1 },
{ partialFilterExpression: { status: "pending" } }
);
// TTL index (auto-expire)
db.sessions.createIndex({ createdAt: 1 }, { expireAfterSeconds: 3600 });
Redis
Data Structures
# Strings
SET user:1:name "John Doe"
GET user:1:name
SETEX session:abc123 3600 "user_data"
INCR page:home:views
# Hashes
HSET user:1 name "John" email "john@example.com"
HGETALL user:1
# Lists (queues)
LPUSH queue:jobs '{"type":"email"}'
RPOP queue:jobs
# Sets
SADD user:1:roles admin editor
SISMEMBER user:1:roles admin
# Sorted Sets (leaderboards)
ZADD leaderboard 1000 "player1" 1500 "player2"
ZREVRANGE leaderboard 0 9 WITHSCORES
# Streams (event log)
XADD events * type "user_login" user_id "123"
XREAD COUNT 10 STREAMS events 0
Caching Patterns
// Cache-aside pattern
async function getUser(userId: string): Promise<User> {
const cacheKey = `user:${userId}`;
const cached = await redis.get(cacheKey);
if (cached) {
return JSON.parse(cached);
}
const user = await db.users.findById(userId);
await redis.setex(cacheKey, 3600, JSON.stringify(user));
return user;
}
// Rate limiting
async function checkRateLimit(userId: string, limit: number, window: number): Promise<boolean> {
const key = `ratelimit:${userId}`;
const current = await redis.incr(key);
if (current === 1) {
await redis.expire(key, window);
}
return current <= limit;
}
// Distributed lock
async function acquireLock(resource: string, ttl: number): Promise<string | null> {
const lockId = crypto.randomUUID();
const acquired = await redis.set(
`lock:${resource}`,
lockId,
'NX',
'EX',
ttl
);
return acquired ? lockId : null;
}
// Release lock with Lua script (atomic operation)
// Use EVALSHA with pre-loaded script for production
const releaseLockScript = `
if redis.call("get", KEYS[1]) == ARGV[1] then
return redis.call("del", KEYS[1])
else
return 0
end
`;
Pub/Sub
// Publisher
async function publishEvent(channel: string, event: object): Promise<void> {
await redis.publish(channel, JSON.stringify(event));
}
// Subscriber
const subscriber = redis.duplicate();
subscriber.subscribe('events');
subscriber.on('message', (channel, message) => {
const event = JSON.parse(message);
handleEvent(event);
});
Query Optimization Checklist
PostgreSQL
MongoDB
Redis