Database Indexing
Definition

Index Types

Creating Indexes
-- BASIC INDEX
CREATE INDEX idx_users_email ON users(email);
-- UNIQUE INDEX (also enforces uniqueness)
CREATE UNIQUE INDEX idx_users_email ON users(email);
-- COMPOSITE INDEX (multiple columns)
CREATE INDEX idx_orders_user_date ON orders(user_id, order_date);
-- Order matters! Supports:
-- WHERE user_id = ?
-- WHERE user_id = ? AND order_date = ?
-- WHERE user_id = ? AND order_date > ?
-- Does NOT support (efficiently):
-- WHERE order_date = ? (second column only)
-- PARTIAL INDEX (filtered)
CREATE INDEX idx_active_users ON users(email)
WHERE status = 'active';
-- Smaller index, only includes active users
-- COVERING INDEX (includes all needed columns)
CREATE INDEX idx_users_email_name ON users(email) INCLUDE (name);
-- Query can be answered from index alone (no table lookup)
-- EXPRESSION INDEX
CREATE INDEX idx_users_lower_email ON users(LOWER(email));
-- Now this query can use index:
-- SELECT * FROM users WHERE LOWER(email) = '[email protected]'
-- CONCURRENT INDEX (no locking)
CREATE INDEX CONCURRENTLY idx_users_email ON users(email);
-- Takes longer but doesn't block writes
-- DROP INDEX
DROP INDEX idx_users_email;
DROP INDEX CONCURRENTLY idx_users_email; -- Non-blocking
Composite Index Rules

Query Plan Analysis
-- EXPLAIN shows execution plan
EXPLAIN SELECT * FROM users WHERE email = '[email protected]';
-- EXPLAIN ANALYZE runs query and shows actual stats
EXPLAIN ANALYZE SELECT * FROM users WHERE email = '[email protected]';
-- PLAN TYPES (good → bad):
-- Index Scan: Uses index, fetches rows
Index Scan using idx_users_email on users
Index Cond: (email = '[email protected]')
-- Index Only Scan: Answers from index alone (best)
Index Only Scan using idx_users_email_name on users
Index Cond: (email = '[email protected]')
-- Bitmap Index Scan: Multiple indexes combined
Bitmap Heap Scan on orders
Recheck Cond: (user_id = 5)
-> Bitmap Index Scan on idx_orders_user
Index Cond: (user_id = 5)
-- Seq Scan: Full table scan (usually bad for large tables)
Seq Scan on users
Filter: (email = '[email protected]')
-- KEY METRICS:
-- rows: Estimated/actual rows
-- cost: Arbitrary unit, lower is better
-- actual time: Real execution time (ms)
-- buffers: Pages read (shared/local)
-- COMMON ISSUES:
-- "rows=1000" estimated but "actual rows=100000"
-- → Statistics outdated, run ANALYZE
-- Seq Scan on large table
-- → Missing index or query can't use existing index
Index Anti-Patterns
-- ANTI-PATTERN 1: Function on indexed column
-- BAD: Index on 'email' won't be used
SELECT * FROM users WHERE LOWER(email) = '[email protected]';
-- FIX: Create expression index or store lowercase
-- ANTI-PATTERN 2: Implicit type conversion
-- BAD: phone is varchar, comparing to int
SELECT * FROM users WHERE phone = 5551234;
-- FIX: Use correct type
SELECT * FROM users WHERE phone = '5551234';
-- ANTI-PATTERN 3: Leading wildcard
-- BAD: Can't use index
SELECT * FROM users WHERE email LIKE '%@gmail.com';
-- FIX: Use full-text search or reverse index
-- ANTI-PATTERN 4: OR conditions
-- BAD: May not use index efficiently
SELECT * FROM orders WHERE user_id = 5 OR status = 'pending';
-- FIX: Use UNION
SELECT * FROM orders WHERE user_id = 5
UNION
SELECT * FROM orders WHERE status = 'pending';
-- ANTI-PATTERN 5: Too many indexes
-- Each write must update all indexes
-- Unused indexes waste space and slow writes
-- Regular audit: pg_stat_user_indexes
-- ANTI-PATTERN 6: Wrong column order in composite index
-- Index: (date, user_id)
-- Query: WHERE user_id = ? AND date > ?
-- → Can only use date for range, not user_id for filtering
Tips & Tricks
