bookworm-smart-assistant/skills/backend-builder/references/database_setup.md

513 lines
11 KiB
Markdown
Raw Normal View History

# Database Setup and Configuration
## Connection Strategies
### PostgreSQL Connection
**Node.js with pg:**
```javascript
const { Pool } = require('pg');
const pool = new Pool({
connectionString: process.env.DATABASE_URL,
max: 20,
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000,
});
module.exports = pool;
```
**Python with psycopg2:**
```python
import psycopg2
from psycopg2 import pool
connection_pool = psycopg2.pool.SimpleConnectionPool(
1, 20,
host=os.getenv('DB_HOST'),
database=os.getenv('DB_NAME'),
user=os.getenv('DB_USER'),
password=os.getenv('DB_PASSWORD')
)
```
### Connection Pooling Best Practices
1. **Pool size** - Set based on concurrent requests (typically 10-20)
2. **Idle timeout** - Release idle connections (30s recommended)
3. **Connection timeout** - Fail fast on connection issues (2s)
4. **Health checks** - Test connections before use
## Migration Strategies
### Node.js Migration Tools
**Using node-pg-migrate:**
```javascript
// migrations/1234567890_create_users.js
exports.up = pgm => {
pgm.createTable('users', {
id: 'uuid',
email: { type: 'varchar(255)', unique: true, notNull: true },
password_hash: { type: 'varchar(255)', notNull: true },
created_at: {
type: 'timestamp',
notNull: true,
default: pgm.func('current_timestamp'),
},
});
pgm.createIndex('users', 'email');
};
exports.down = pgm => {
pgm.dropTable('users');
};
```
**Using Knex.js:**
```javascript
exports.up = function(knex) {
return knex.schema.createTable('users', table => {
table.uuid('id').primary().defaultTo(knex.raw('gen_random_uuid()'));
table.string('email').unique().notNull();
table.string('password_hash').notNull();
table.timestamps(true, true);
});
};
exports.down = function(knex) {
return knex.schema.dropTable('users');
};
```
### Python Migration Tools
**Using Alembic (with SQLAlchemy):**
```python
# alembic/versions/001_create_users.py
from alembic import op
import sqlalchemy as sa
def upgrade():
op.create_table(
'users',
sa.Column('id', sa.UUID(), primary_key=True),
sa.Column('email', sa.String(255), unique=True, nullable=False),
sa.Column('password_hash', sa.String(255), nullable=False),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
)
op.create_index('idx_users_email', 'users', ['email'])
def downgrade():
op.drop_table('users')
```
## Schema Design Patterns
### Timestamps Pattern
Always include standard timestamp fields:
```sql
created_at TIMESTAMP DEFAULT NOW() NOT NULL,
updated_at TIMESTAMP DEFAULT NOW() NOT NULL
```
Trigger for auto-updating `updated_at`:
```sql
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ language 'plpgsql';
CREATE TRIGGER update_users_updated_at
BEFORE UPDATE ON users
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
```
### Soft Delete Pattern
```sql
ALTER TABLE users ADD COLUMN deleted_at TIMESTAMP NULL;
CREATE INDEX idx_users_deleted_at ON users(deleted_at);
-- Query only active records
SELECT * FROM users WHERE deleted_at IS NULL;
-- Soft delete
UPDATE users SET deleted_at = NOW() WHERE id = ?;
```
### UUID vs Auto-increment IDs
**UUID advantages:**
- Globally unique
- Secure (non-sequential)
- Distributed system friendly
**Auto-increment advantages:**
- Smaller storage
- Better index performance
- Easier debugging
**PostgreSQL UUID setup:**
```sql
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE TABLE users (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4()
);
```
### Enum Types
**PostgreSQL enums:**
```sql
CREATE TYPE user_role AS ENUM ('admin', 'user', 'guest');
CREATE TABLE users (
id UUID PRIMARY KEY,
role user_role NOT NULL DEFAULT 'user'
);
```
**Alternative: Check constraints:**
```sql
CREATE TABLE users (
id UUID PRIMARY KEY,
role VARCHAR(20) NOT NULL DEFAULT 'user',
CONSTRAINT valid_role CHECK (role IN ('admin', 'user', 'guest'))
);
```
## Indexing Strategies
### Common Index Types
**B-tree (default):**
```sql
CREATE INDEX idx_users_email ON users(email);
```
**Unique index:**
```sql
CREATE UNIQUE INDEX idx_users_email ON users(email);
```
**Composite index:**
```sql
CREATE INDEX idx_posts_author_date ON posts(author_id, created_at DESC);
```
**Partial index:**
```sql
CREATE INDEX idx_active_users ON users(email) WHERE deleted_at IS NULL;
```
**Full-text search:**
```sql
CREATE INDEX idx_posts_search ON posts USING GIN(to_tsvector('english', title || ' ' || content));
```
### When to Add Indexes
✅ Add indexes for:
- Foreign keys
- Columns in WHERE clauses
- Columns in ORDER BY
- Columns in JOIN conditions
- Unique constraints
❌ Avoid indexes on:
- Small tables (<1000 rows)
- Columns with low cardinality
- Columns that are frequently updated
## Relationship Patterns
### One-to-Many
```sql
CREATE TABLE authors (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(255) NOT NULL
);
CREATE TABLE books (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
title VARCHAR(255) NOT NULL,
author_id UUID REFERENCES authors(id) ON DELETE CASCADE,
FOREIGN KEY (author_id) REFERENCES authors(id)
);
CREATE INDEX idx_books_author ON books(author_id);
```
### Many-to-Many
```sql
CREATE TABLE students (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(255) NOT NULL
);
CREATE TABLE courses (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(255) NOT NULL
);
CREATE TABLE enrollments (
student_id UUID REFERENCES students(id) ON DELETE CASCADE,
course_id UUID REFERENCES courses(id) ON DELETE CASCADE,
enrolled_at TIMESTAMP DEFAULT NOW(),
PRIMARY KEY (student_id, course_id)
);
CREATE INDEX idx_enrollments_student ON enrollments(student_id);
CREATE INDEX idx_enrollments_course ON enrollments(course_id);
```
### Self-Referencing (Tree Structure)
```sql
CREATE TABLE categories (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(255) NOT NULL,
parent_id UUID REFERENCES categories(id) ON DELETE SET NULL
);
CREATE INDEX idx_categories_parent ON categories(parent_id);
```
## Database Functions and Procedures
### Stored Procedure Example
```sql
CREATE OR REPLACE FUNCTION create_user_with_profile(
p_email VARCHAR,
p_password VARCHAR,
p_name VARCHAR
) RETURNS UUID AS $$
DECLARE
v_user_id UUID;
BEGIN
INSERT INTO users (email, password_hash)
VALUES (p_email, crypt(p_password, gen_salt('bf')))
RETURNING id INTO v_user_id;
INSERT INTO profiles (user_id, name)
VALUES (v_user_id, p_name);
RETURN v_user_id;
END;
$$ LANGUAGE plpgsql;
```
### Trigger Example
```sql
CREATE OR REPLACE FUNCTION log_user_changes()
RETURNS TRIGGER AS $$
BEGIN
INSERT INTO audit_log (table_name, record_id, action, changed_at)
VALUES ('users', NEW.id, TG_OP, NOW());
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER user_audit_trigger
AFTER INSERT OR UPDATE OR DELETE ON users
FOR EACH ROW EXECUTE FUNCTION log_user_changes();
```
## Performance Optimization
### Query Optimization
**Use EXPLAIN ANALYZE:**
```sql
EXPLAIN ANALYZE
SELECT * FROM users WHERE email = 'user@example.com';
```
**Avoid N+1 queries:**
```javascript
// Bad
const users = await User.findAll();
for (const user of users) {
const posts = await Post.findAll({ where: { userId: user.id } });
}
// Good
const users = await User.findAll({
include: [{ model: Post }]
});
```
### Connection Pool Tuning
```javascript
const pool = new Pool({
max: 20, // Maximum connections
min: 5, // Minimum connections
idleTimeoutMillis: 30000, // Close idle after 30s
connectionTimeoutMillis: 2000,
statement_timeout: 10000, // Query timeout 10s
});
```
### Caching Strategy
**Application-level cache (Redis):**
```javascript
const cacheKey = `user:${userId}`;
let user = await redis.get(cacheKey);
if (!user) {
user = await db.query('SELECT * FROM users WHERE id = $1', [userId]);
await redis.setex(cacheKey, 3600, JSON.stringify(user));
}
```
**Database query cache:**
```sql
-- PostgreSQL: Use materialized views for expensive queries
CREATE MATERIALIZED VIEW user_stats AS
SELECT
user_id,
COUNT(*) as post_count,
MAX(created_at) as last_post_at
FROM posts
GROUP BY user_id;
-- Refresh periodically
REFRESH MATERIALIZED VIEW user_stats;
```
## Backup and Recovery
### Backup Script (PostgreSQL)
```bash
#!/bin/bash
# backup_db.sh
BACKUP_DIR="/backups"
DB_NAME="myapp_db"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_FILE="${BACKUP_DIR}/${DB_NAME}_${TIMESTAMP}.sql.gz"
# Create backup
pg_dump -U postgres -d $DB_NAME | gzip > $BACKUP_FILE
# Keep only last 7 days
find $BACKUP_DIR -name "*.sql.gz" -mtime +7 -delete
echo "Backup completed: $BACKUP_FILE"
```
### Point-in-Time Recovery
```bash
# Enable WAL archiving in postgresql.conf
wal_level = replica
archive_mode = on
archive_command = 'cp %p /archive/%f'
# Restore to specific point
pg_restore -d myapp_db -t "2024-12-09 10:30:00" backup.sql
```
## Multi-Database Setup
### Read Replicas
```javascript
const masterPool = new Pool({
connectionString: process.env.MASTER_DB_URL,
max: 20
});
const replicaPool = new Pool({
connectionString: process.env.REPLICA_DB_URL,
max: 50 // More connections for read-heavy load
});
// Write operations
async function createUser(data) {
return masterPool.query('INSERT INTO users...', [data]);
}
// Read operations
async function getUsers() {
return replicaPool.query('SELECT * FROM users...');
}
```
### Sharding Strategy
```javascript
function getShardId(userId) {
// Hash-based sharding
return parseInt(userId, 16) % NUM_SHARDS;
}
function getPool(shardId) {
return pools[shardId];
}
// Usage
const shardId = getShardId(userId);
const pool = getPool(shardId);
await pool.query('SELECT...', [userId]);
```
## Security Considerations
### SQL Injection Prevention
```javascript
// ❌ NEVER DO THIS
const query = `SELECT * FROM users WHERE email = '${email}'`;
// ✅ ALWAYS USE PARAMETERIZED QUERIES
const query = 'SELECT * FROM users WHERE email = $1';
await pool.query(query, [email]);
```
### Encryption at Rest
```sql
-- Enable pgcrypto extension
CREATE EXTENSION IF NOT EXISTS pgcrypto;
-- Encrypt sensitive data
INSERT INTO users (email, ssn_encrypted)
VALUES ('user@example.com', pgp_sym_encrypt('123-45-6789', 'encryption_key'));
-- Decrypt when needed
SELECT email, pgp_sym_decrypt(ssn_encrypted, 'encryption_key')
FROM users;
```
### Row-Level Security (PostgreSQL)
```sql
-- Enable RLS
ALTER TABLE posts ENABLE ROW LEVEL SECURITY;
-- Create policy
CREATE POLICY user_posts_policy ON posts
FOR ALL
USING (author_id = current_setting('app.current_user_id')::uuid);
-- Set user context in application
await pool.query("SET app.current_user_id = $1", [userId]);
```