11 KiB
11 KiB
Database Setup and Configuration
Connection Strategies
PostgreSQL Connection
Node.js with pg:
const { Pool } = require('pg');
const pool = new Pool({
connectionString: process.env.DATABASE_URL,
max: 20,
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000,
});
module.exports = pool;
Python with psycopg2:
import psycopg2
from psycopg2 import pool
connection_pool = psycopg2.pool.SimpleConnectionPool(
1, 20,
host=os.getenv('DB_HOST'),
database=os.getenv('DB_NAME'),
user=os.getenv('DB_USER'),
password=os.getenv('DB_PASSWORD')
)
Connection Pooling Best Practices
- Pool size - Set based on concurrent requests (typically 10-20)
- Idle timeout - Release idle connections (30s recommended)
- Connection timeout - Fail fast on connection issues (2s)
- Health checks - Test connections before use
Migration Strategies
Node.js Migration Tools
Using node-pg-migrate:
// migrations/1234567890_create_users.js
exports.up = pgm => {
pgm.createTable('users', {
id: 'uuid',
email: { type: 'varchar(255)', unique: true, notNull: true },
password_hash: { type: 'varchar(255)', notNull: true },
created_at: {
type: 'timestamp',
notNull: true,
default: pgm.func('current_timestamp'),
},
});
pgm.createIndex('users', 'email');
};
exports.down = pgm => {
pgm.dropTable('users');
};
Using Knex.js:
exports.up = function(knex) {
return knex.schema.createTable('users', table => {
table.uuid('id').primary().defaultTo(knex.raw('gen_random_uuid()'));
table.string('email').unique().notNull();
table.string('password_hash').notNull();
table.timestamps(true, true);
});
};
exports.down = function(knex) {
return knex.schema.dropTable('users');
};
Python Migration Tools
Using Alembic (with SQLAlchemy):
# alembic/versions/001_create_users.py
from alembic import op
import sqlalchemy as sa
def upgrade():
op.create_table(
'users',
sa.Column('id', sa.UUID(), primary_key=True),
sa.Column('email', sa.String(255), unique=True, nullable=False),
sa.Column('password_hash', sa.String(255), nullable=False),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
)
op.create_index('idx_users_email', 'users', ['email'])
def downgrade():
op.drop_table('users')
Schema Design Patterns
Timestamps Pattern
Always include standard timestamp fields:
created_at TIMESTAMP DEFAULT NOW() NOT NULL,
updated_at TIMESTAMP DEFAULT NOW() NOT NULL
Trigger for auto-updating updated_at:
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ language 'plpgsql';
CREATE TRIGGER update_users_updated_at
BEFORE UPDATE ON users
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
Soft Delete Pattern
ALTER TABLE users ADD COLUMN deleted_at TIMESTAMP NULL;
CREATE INDEX idx_users_deleted_at ON users(deleted_at);
-- Query only active records
SELECT * FROM users WHERE deleted_at IS NULL;
-- Soft delete
UPDATE users SET deleted_at = NOW() WHERE id = ?;
UUID vs Auto-increment IDs
UUID advantages:
- Globally unique
- Secure (non-sequential)
- Distributed system friendly
Auto-increment advantages:
- Smaller storage
- Better index performance
- Easier debugging
PostgreSQL UUID setup:
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE TABLE users (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4()
);
Enum Types
PostgreSQL enums:
CREATE TYPE user_role AS ENUM ('admin', 'user', 'guest');
CREATE TABLE users (
id UUID PRIMARY KEY,
role user_role NOT NULL DEFAULT 'user'
);
Alternative: Check constraints:
CREATE TABLE users (
id UUID PRIMARY KEY,
role VARCHAR(20) NOT NULL DEFAULT 'user',
CONSTRAINT valid_role CHECK (role IN ('admin', 'user', 'guest'))
);
Indexing Strategies
Common Index Types
B-tree (default):
CREATE INDEX idx_users_email ON users(email);
Unique index:
CREATE UNIQUE INDEX idx_users_email ON users(email);
Composite index:
CREATE INDEX idx_posts_author_date ON posts(author_id, created_at DESC);
Partial index:
CREATE INDEX idx_active_users ON users(email) WHERE deleted_at IS NULL;
Full-text search:
CREATE INDEX idx_posts_search ON posts USING GIN(to_tsvector('english', title || ' ' || content));
When to Add Indexes
✅ Add indexes for:
- Foreign keys
- Columns in WHERE clauses
- Columns in ORDER BY
- Columns in JOIN conditions
- Unique constraints
❌ Avoid indexes on:
- Small tables (<1000 rows)
- Columns with low cardinality
- Columns that are frequently updated
Relationship Patterns
One-to-Many
CREATE TABLE authors (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(255) NOT NULL
);
CREATE TABLE books (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
title VARCHAR(255) NOT NULL,
author_id UUID REFERENCES authors(id) ON DELETE CASCADE,
FOREIGN KEY (author_id) REFERENCES authors(id)
);
CREATE INDEX idx_books_author ON books(author_id);
Many-to-Many
CREATE TABLE students (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(255) NOT NULL
);
CREATE TABLE courses (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(255) NOT NULL
);
CREATE TABLE enrollments (
student_id UUID REFERENCES students(id) ON DELETE CASCADE,
course_id UUID REFERENCES courses(id) ON DELETE CASCADE,
enrolled_at TIMESTAMP DEFAULT NOW(),
PRIMARY KEY (student_id, course_id)
);
CREATE INDEX idx_enrollments_student ON enrollments(student_id);
CREATE INDEX idx_enrollments_course ON enrollments(course_id);
Self-Referencing (Tree Structure)
CREATE TABLE categories (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(255) NOT NULL,
parent_id UUID REFERENCES categories(id) ON DELETE SET NULL
);
CREATE INDEX idx_categories_parent ON categories(parent_id);
Database Functions and Procedures
Stored Procedure Example
CREATE OR REPLACE FUNCTION create_user_with_profile(
p_email VARCHAR,
p_password VARCHAR,
p_name VARCHAR
) RETURNS UUID AS $$
DECLARE
v_user_id UUID;
BEGIN
INSERT INTO users (email, password_hash)
VALUES (p_email, crypt(p_password, gen_salt('bf')))
RETURNING id INTO v_user_id;
INSERT INTO profiles (user_id, name)
VALUES (v_user_id, p_name);
RETURN v_user_id;
END;
$$ LANGUAGE plpgsql;
Trigger Example
CREATE OR REPLACE FUNCTION log_user_changes()
RETURNS TRIGGER AS $$
BEGIN
INSERT INTO audit_log (table_name, record_id, action, changed_at)
VALUES ('users', NEW.id, TG_OP, NOW());
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER user_audit_trigger
AFTER INSERT OR UPDATE OR DELETE ON users
FOR EACH ROW EXECUTE FUNCTION log_user_changes();
Performance Optimization
Query Optimization
Use EXPLAIN ANALYZE:
EXPLAIN ANALYZE
SELECT * FROM users WHERE email = 'user@example.com';
Avoid N+1 queries:
// Bad
const users = await User.findAll();
for (const user of users) {
const posts = await Post.findAll({ where: { userId: user.id } });
}
// Good
const users = await User.findAll({
include: [{ model: Post }]
});
Connection Pool Tuning
const pool = new Pool({
max: 20, // Maximum connections
min: 5, // Minimum connections
idleTimeoutMillis: 30000, // Close idle after 30s
connectionTimeoutMillis: 2000,
statement_timeout: 10000, // Query timeout 10s
});
Caching Strategy
Application-level cache (Redis):
const cacheKey = `user:${userId}`;
let user = await redis.get(cacheKey);
if (!user) {
user = await db.query('SELECT * FROM users WHERE id = $1', [userId]);
await redis.setex(cacheKey, 3600, JSON.stringify(user));
}
Database query cache:
-- PostgreSQL: Use materialized views for expensive queries
CREATE MATERIALIZED VIEW user_stats AS
SELECT
user_id,
COUNT(*) as post_count,
MAX(created_at) as last_post_at
FROM posts
GROUP BY user_id;
-- Refresh periodically
REFRESH MATERIALIZED VIEW user_stats;
Backup and Recovery
Backup Script (PostgreSQL)
#!/bin/bash
# backup_db.sh
BACKUP_DIR="/backups"
DB_NAME="myapp_db"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_FILE="${BACKUP_DIR}/${DB_NAME}_${TIMESTAMP}.sql.gz"
# Create backup
pg_dump -U postgres -d $DB_NAME | gzip > $BACKUP_FILE
# Keep only last 7 days
find $BACKUP_DIR -name "*.sql.gz" -mtime +7 -delete
echo "Backup completed: $BACKUP_FILE"
Point-in-Time Recovery
# Enable WAL archiving in postgresql.conf
wal_level = replica
archive_mode = on
archive_command = 'cp %p /archive/%f'
# Restore to specific point
pg_restore -d myapp_db -t "2024-12-09 10:30:00" backup.sql
Multi-Database Setup
Read Replicas
const masterPool = new Pool({
connectionString: process.env.MASTER_DB_URL,
max: 20
});
const replicaPool = new Pool({
connectionString: process.env.REPLICA_DB_URL,
max: 50 // More connections for read-heavy load
});
// Write operations
async function createUser(data) {
return masterPool.query('INSERT INTO users...', [data]);
}
// Read operations
async function getUsers() {
return replicaPool.query('SELECT * FROM users...');
}
Sharding Strategy
function getShardId(userId) {
// Hash-based sharding
return parseInt(userId, 16) % NUM_SHARDS;
}
function getPool(shardId) {
return pools[shardId];
}
// Usage
const shardId = getShardId(userId);
const pool = getPool(shardId);
await pool.query('SELECT...', [userId]);
Security Considerations
SQL Injection Prevention
// ❌ NEVER DO THIS
const query = `SELECT * FROM users WHERE email = '${email}'`;
// ✅ ALWAYS USE PARAMETERIZED QUERIES
const query = 'SELECT * FROM users WHERE email = $1';
await pool.query(query, [email]);
Encryption at Rest
-- Enable pgcrypto extension
CREATE EXTENSION IF NOT EXISTS pgcrypto;
-- Encrypt sensitive data
INSERT INTO users (email, ssn_encrypted)
VALUES ('user@example.com', pgp_sym_encrypt('123-45-6789', 'encryption_key'));
-- Decrypt when needed
SELECT email, pgp_sym_decrypt(ssn_encrypted, 'encryption_key')
FROM users;
Row-Level Security (PostgreSQL)
-- Enable RLS
ALTER TABLE posts ENABLE ROW LEVEL SECURITY;
-- Create policy
CREATE POLICY user_posts_policy ON posts
FOR ALL
USING (author_id = current_setting('app.current_user_id')::uuid);
-- Set user context in application
await pool.query("SET app.current_user_id = $1", [userId]);