513 lines
11 KiB
Markdown
513 lines
11 KiB
Markdown
# Database Setup and Configuration
|
|
|
|
## Connection Strategies
|
|
|
|
### PostgreSQL Connection
|
|
|
|
**Node.js with pg:**
|
|
```javascript
|
|
const { Pool } = require('pg');
|
|
|
|
const pool = new Pool({
|
|
connectionString: process.env.DATABASE_URL,
|
|
max: 20,
|
|
idleTimeoutMillis: 30000,
|
|
connectionTimeoutMillis: 2000,
|
|
});
|
|
|
|
module.exports = pool;
|
|
```
|
|
|
|
**Python with psycopg2:**
|
|
```python
|
|
import psycopg2
|
|
from psycopg2 import pool
|
|
|
|
connection_pool = psycopg2.pool.SimpleConnectionPool(
|
|
1, 20,
|
|
host=os.getenv('DB_HOST'),
|
|
database=os.getenv('DB_NAME'),
|
|
user=os.getenv('DB_USER'),
|
|
password=os.getenv('DB_PASSWORD')
|
|
)
|
|
```
|
|
|
|
### Connection Pooling Best Practices
|
|
|
|
1. **Pool size** - Set based on concurrent requests (typically 10-20)
|
|
2. **Idle timeout** - Release idle connections (30s recommended)
|
|
3. **Connection timeout** - Fail fast on connection issues (2s)
|
|
4. **Health checks** - Test connections before use
|
|
|
|
## Migration Strategies
|
|
|
|
### Node.js Migration Tools
|
|
|
|
**Using node-pg-migrate:**
|
|
```javascript
|
|
// migrations/1234567890_create_users.js
|
|
exports.up = pgm => {
|
|
pgm.createTable('users', {
|
|
id: 'uuid',
|
|
email: { type: 'varchar(255)', unique: true, notNull: true },
|
|
password_hash: { type: 'varchar(255)', notNull: true },
|
|
created_at: {
|
|
type: 'timestamp',
|
|
notNull: true,
|
|
default: pgm.func('current_timestamp'),
|
|
},
|
|
});
|
|
|
|
pgm.createIndex('users', 'email');
|
|
};
|
|
|
|
exports.down = pgm => {
|
|
pgm.dropTable('users');
|
|
};
|
|
```
|
|
|
|
**Using Knex.js:**
|
|
```javascript
|
|
exports.up = function(knex) {
|
|
return knex.schema.createTable('users', table => {
|
|
table.uuid('id').primary().defaultTo(knex.raw('gen_random_uuid()'));
|
|
table.string('email').unique().notNull();
|
|
table.string('password_hash').notNull();
|
|
table.timestamps(true, true);
|
|
});
|
|
};
|
|
|
|
exports.down = function(knex) {
|
|
return knex.schema.dropTable('users');
|
|
};
|
|
```
|
|
|
|
### Python Migration Tools
|
|
|
|
**Using Alembic (with SQLAlchemy):**
|
|
```python
|
|
# alembic/versions/001_create_users.py
|
|
from alembic import op
|
|
import sqlalchemy as sa
|
|
|
|
def upgrade():
|
|
op.create_table(
|
|
'users',
|
|
sa.Column('id', sa.UUID(), primary_key=True),
|
|
sa.Column('email', sa.String(255), unique=True, nullable=False),
|
|
sa.Column('password_hash', sa.String(255), nullable=False),
|
|
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
|
|
)
|
|
op.create_index('idx_users_email', 'users', ['email'])
|
|
|
|
def downgrade():
|
|
op.drop_table('users')
|
|
```
|
|
|
|
## Schema Design Patterns
|
|
|
|
### Timestamps Pattern
|
|
|
|
Always include standard timestamp fields:
|
|
```sql
|
|
created_at TIMESTAMP DEFAULT NOW() NOT NULL,
|
|
updated_at TIMESTAMP DEFAULT NOW() NOT NULL
|
|
```
|
|
|
|
Trigger for auto-updating `updated_at`:
|
|
```sql
|
|
CREATE OR REPLACE FUNCTION update_updated_at_column()
|
|
RETURNS TRIGGER AS $$
|
|
BEGIN
|
|
NEW.updated_at = NOW();
|
|
RETURN NEW;
|
|
END;
|
|
$$ language 'plpgsql';
|
|
|
|
CREATE TRIGGER update_users_updated_at
|
|
BEFORE UPDATE ON users
|
|
FOR EACH ROW
|
|
EXECUTE FUNCTION update_updated_at_column();
|
|
```
|
|
|
|
### Soft Delete Pattern
|
|
|
|
```sql
|
|
ALTER TABLE users ADD COLUMN deleted_at TIMESTAMP NULL;
|
|
CREATE INDEX idx_users_deleted_at ON users(deleted_at);
|
|
|
|
-- Query only active records
|
|
SELECT * FROM users WHERE deleted_at IS NULL;
|
|
|
|
-- Soft delete
|
|
UPDATE users SET deleted_at = NOW() WHERE id = ?;
|
|
```
|
|
|
|
### UUID vs Auto-increment IDs
|
|
|
|
**UUID advantages:**
|
|
- Globally unique
|
|
- Secure (non-sequential)
|
|
- Distributed system friendly
|
|
|
|
**Auto-increment advantages:**
|
|
- Smaller storage
|
|
- Better index performance
|
|
- Easier debugging
|
|
|
|
**PostgreSQL UUID setup:**
|
|
```sql
|
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
|
|
CREATE TABLE users (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4()
|
|
);
|
|
```
|
|
|
|
### Enum Types
|
|
|
|
**PostgreSQL enums:**
|
|
```sql
|
|
CREATE TYPE user_role AS ENUM ('admin', 'user', 'guest');
|
|
|
|
CREATE TABLE users (
|
|
id UUID PRIMARY KEY,
|
|
role user_role NOT NULL DEFAULT 'user'
|
|
);
|
|
```
|
|
|
|
**Alternative: Check constraints:**
|
|
```sql
|
|
CREATE TABLE users (
|
|
id UUID PRIMARY KEY,
|
|
role VARCHAR(20) NOT NULL DEFAULT 'user',
|
|
CONSTRAINT valid_role CHECK (role IN ('admin', 'user', 'guest'))
|
|
);
|
|
```
|
|
|
|
## Indexing Strategies
|
|
|
|
### Common Index Types
|
|
|
|
**B-tree (default):**
|
|
```sql
|
|
CREATE INDEX idx_users_email ON users(email);
|
|
```
|
|
|
|
**Unique index:**
|
|
```sql
|
|
CREATE UNIQUE INDEX idx_users_email ON users(email);
|
|
```
|
|
|
|
**Composite index:**
|
|
```sql
|
|
CREATE INDEX idx_posts_author_date ON posts(author_id, created_at DESC);
|
|
```
|
|
|
|
**Partial index:**
|
|
```sql
|
|
CREATE INDEX idx_active_users ON users(email) WHERE deleted_at IS NULL;
|
|
```
|
|
|
|
**Full-text search:**
|
|
```sql
|
|
CREATE INDEX idx_posts_search ON posts USING GIN(to_tsvector('english', title || ' ' || content));
|
|
```
|
|
|
|
### When to Add Indexes
|
|
|
|
✅ Add indexes for:
|
|
- Foreign keys
|
|
- Columns in WHERE clauses
|
|
- Columns in ORDER BY
|
|
- Columns in JOIN conditions
|
|
- Unique constraints
|
|
|
|
❌ Avoid indexes on:
|
|
- Small tables (<1000 rows)
|
|
- Columns with low cardinality
|
|
- Columns that are frequently updated
|
|
|
|
## Relationship Patterns
|
|
|
|
### One-to-Many
|
|
|
|
```sql
|
|
CREATE TABLE authors (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
name VARCHAR(255) NOT NULL
|
|
);
|
|
|
|
CREATE TABLE books (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
title VARCHAR(255) NOT NULL,
|
|
author_id UUID REFERENCES authors(id) ON DELETE CASCADE,
|
|
FOREIGN KEY (author_id) REFERENCES authors(id)
|
|
);
|
|
|
|
CREATE INDEX idx_books_author ON books(author_id);
|
|
```
|
|
|
|
### Many-to-Many
|
|
|
|
```sql
|
|
CREATE TABLE students (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
name VARCHAR(255) NOT NULL
|
|
);
|
|
|
|
CREATE TABLE courses (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
name VARCHAR(255) NOT NULL
|
|
);
|
|
|
|
CREATE TABLE enrollments (
|
|
student_id UUID REFERENCES students(id) ON DELETE CASCADE,
|
|
course_id UUID REFERENCES courses(id) ON DELETE CASCADE,
|
|
enrolled_at TIMESTAMP DEFAULT NOW(),
|
|
PRIMARY KEY (student_id, course_id)
|
|
);
|
|
|
|
CREATE INDEX idx_enrollments_student ON enrollments(student_id);
|
|
CREATE INDEX idx_enrollments_course ON enrollments(course_id);
|
|
```
|
|
|
|
### Self-Referencing (Tree Structure)
|
|
|
|
```sql
|
|
CREATE TABLE categories (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
name VARCHAR(255) NOT NULL,
|
|
parent_id UUID REFERENCES categories(id) ON DELETE SET NULL
|
|
);
|
|
|
|
CREATE INDEX idx_categories_parent ON categories(parent_id);
|
|
```
|
|
|
|
## Database Functions and Procedures
|
|
|
|
### Stored Procedure Example
|
|
|
|
```sql
|
|
CREATE OR REPLACE FUNCTION create_user_with_profile(
|
|
p_email VARCHAR,
|
|
p_password VARCHAR,
|
|
p_name VARCHAR
|
|
) RETURNS UUID AS $$
|
|
DECLARE
|
|
v_user_id UUID;
|
|
BEGIN
|
|
INSERT INTO users (email, password_hash)
|
|
VALUES (p_email, crypt(p_password, gen_salt('bf')))
|
|
RETURNING id INTO v_user_id;
|
|
|
|
INSERT INTO profiles (user_id, name)
|
|
VALUES (v_user_id, p_name);
|
|
|
|
RETURN v_user_id;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
```
|
|
|
|
### Trigger Example
|
|
|
|
```sql
|
|
CREATE OR REPLACE FUNCTION log_user_changes()
|
|
RETURNS TRIGGER AS $$
|
|
BEGIN
|
|
INSERT INTO audit_log (table_name, record_id, action, changed_at)
|
|
VALUES ('users', NEW.id, TG_OP, NOW());
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
CREATE TRIGGER user_audit_trigger
|
|
AFTER INSERT OR UPDATE OR DELETE ON users
|
|
FOR EACH ROW EXECUTE FUNCTION log_user_changes();
|
|
```
|
|
|
|
## Performance Optimization
|
|
|
|
### Query Optimization
|
|
|
|
**Use EXPLAIN ANALYZE:**
|
|
```sql
|
|
EXPLAIN ANALYZE
|
|
SELECT * FROM users WHERE email = 'user@example.com';
|
|
```
|
|
|
|
**Avoid N+1 queries:**
|
|
```javascript
|
|
// Bad
|
|
const users = await User.findAll();
|
|
for (const user of users) {
|
|
const posts = await Post.findAll({ where: { userId: user.id } });
|
|
}
|
|
|
|
// Good
|
|
const users = await User.findAll({
|
|
include: [{ model: Post }]
|
|
});
|
|
```
|
|
|
|
### Connection Pool Tuning
|
|
|
|
```javascript
|
|
const pool = new Pool({
|
|
max: 20, // Maximum connections
|
|
min: 5, // Minimum connections
|
|
idleTimeoutMillis: 30000, // Close idle after 30s
|
|
connectionTimeoutMillis: 2000,
|
|
statement_timeout: 10000, // Query timeout 10s
|
|
});
|
|
```
|
|
|
|
### Caching Strategy
|
|
|
|
**Application-level cache (Redis):**
|
|
```javascript
|
|
const cacheKey = `user:${userId}`;
|
|
let user = await redis.get(cacheKey);
|
|
|
|
if (!user) {
|
|
user = await db.query('SELECT * FROM users WHERE id = $1', [userId]);
|
|
await redis.setex(cacheKey, 3600, JSON.stringify(user));
|
|
}
|
|
```
|
|
|
|
**Database query cache:**
|
|
```sql
|
|
-- PostgreSQL: Use materialized views for expensive queries
|
|
CREATE MATERIALIZED VIEW user_stats AS
|
|
SELECT
|
|
user_id,
|
|
COUNT(*) as post_count,
|
|
MAX(created_at) as last_post_at
|
|
FROM posts
|
|
GROUP BY user_id;
|
|
|
|
-- Refresh periodically
|
|
REFRESH MATERIALIZED VIEW user_stats;
|
|
```
|
|
|
|
## Backup and Recovery
|
|
|
|
### Backup Script (PostgreSQL)
|
|
|
|
```bash
|
|
#!/bin/bash
|
|
# backup_db.sh
|
|
|
|
BACKUP_DIR="/backups"
|
|
DB_NAME="myapp_db"
|
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
|
BACKUP_FILE="${BACKUP_DIR}/${DB_NAME}_${TIMESTAMP}.sql.gz"
|
|
|
|
# Create backup
|
|
pg_dump -U postgres -d $DB_NAME | gzip > $BACKUP_FILE
|
|
|
|
# Keep only last 7 days
|
|
find $BACKUP_DIR -name "*.sql.gz" -mtime +7 -delete
|
|
|
|
echo "Backup completed: $BACKUP_FILE"
|
|
```
|
|
|
|
### Point-in-Time Recovery
|
|
|
|
```bash
|
|
# Enable WAL archiving in postgresql.conf
|
|
wal_level = replica
|
|
archive_mode = on
|
|
archive_command = 'cp %p /archive/%f'
|
|
|
|
# Restore to specific point
|
|
pg_restore -d myapp_db -t "2024-12-09 10:30:00" backup.sql
|
|
```
|
|
|
|
## Multi-Database Setup
|
|
|
|
### Read Replicas
|
|
|
|
```javascript
|
|
const masterPool = new Pool({
|
|
connectionString: process.env.MASTER_DB_URL,
|
|
max: 20
|
|
});
|
|
|
|
const replicaPool = new Pool({
|
|
connectionString: process.env.REPLICA_DB_URL,
|
|
max: 50 // More connections for read-heavy load
|
|
});
|
|
|
|
// Write operations
|
|
async function createUser(data) {
|
|
return masterPool.query('INSERT INTO users...', [data]);
|
|
}
|
|
|
|
// Read operations
|
|
async function getUsers() {
|
|
return replicaPool.query('SELECT * FROM users...');
|
|
}
|
|
```
|
|
|
|
### Sharding Strategy
|
|
|
|
```javascript
|
|
function getShardId(userId) {
|
|
// Hash-based sharding
|
|
return parseInt(userId, 16) % NUM_SHARDS;
|
|
}
|
|
|
|
function getPool(shardId) {
|
|
return pools[shardId];
|
|
}
|
|
|
|
// Usage
|
|
const shardId = getShardId(userId);
|
|
const pool = getPool(shardId);
|
|
await pool.query('SELECT...', [userId]);
|
|
```
|
|
|
|
## Security Considerations
|
|
|
|
### SQL Injection Prevention
|
|
|
|
```javascript
|
|
// ❌ NEVER DO THIS
|
|
const query = `SELECT * FROM users WHERE email = '${email}'`;
|
|
|
|
// ✅ ALWAYS USE PARAMETERIZED QUERIES
|
|
const query = 'SELECT * FROM users WHERE email = $1';
|
|
await pool.query(query, [email]);
|
|
```
|
|
|
|
### Encryption at Rest
|
|
|
|
```sql
|
|
-- Enable pgcrypto extension
|
|
CREATE EXTENSION IF NOT EXISTS pgcrypto;
|
|
|
|
-- Encrypt sensitive data
|
|
INSERT INTO users (email, ssn_encrypted)
|
|
VALUES ('user@example.com', pgp_sym_encrypt('123-45-6789', 'encryption_key'));
|
|
|
|
-- Decrypt when needed
|
|
SELECT email, pgp_sym_decrypt(ssn_encrypted, 'encryption_key')
|
|
FROM users;
|
|
```
|
|
|
|
### Row-Level Security (PostgreSQL)
|
|
|
|
```sql
|
|
-- Enable RLS
|
|
ALTER TABLE posts ENABLE ROW LEVEL SECURITY;
|
|
|
|
-- Create policy
|
|
CREATE POLICY user_posts_policy ON posts
|
|
FOR ALL
|
|
USING (author_id = current_setting('app.current_user_id')::uuid);
|
|
|
|
-- Set user context in application
|
|
await pool.query("SET app.current_user_id = $1", [userId]);
|
|
```
|