Database Migrations
This guide covers database migrations using Alembic with SQLModel.
tip
For comprehensive documentation, see the Alembic documentation.
Common Migration Scenarios
Editing the current schema
# 1. Edit the schema (add/edit/remove tables and fields) in `src/models/db_models.py` (Pydantics with `table=True`)
# 2. Generate migration: this will create a migration script by checking the diff between the current DB (see `.env`) and the one from `db_models.py`
uv run alembic revision --autogenerate -m "Add priority to tasks"
# 3. Check the generated migration script (upgrade and downgrade function) in `alembic/versions/`
# 4. When everything seems good and you want to update the DB, use:
uv run alembic upgrade head
Data Migration
# Migration with data transformation
from alembic import op
import sqlalchemy as sa
def upgrade() -> None:
# Add new column
op.add_column('task', sa.Column('status', sa.String(20), nullable=True))
# Migrate data
connection = op.get_bind()
connection.execute(
"UPDATE task SET status = CASE WHEN completed THEN 'completed' ELSE 'pending' END"
)
# Make column not nullable
op.alter_column('task', 'status', nullable=False)
Creating Indexes
def upgrade() -> None:
# Create composite index
op.create_index('ix_task_user_completed', 'task', ['user_id', 'completed'])
# Create partial index (PostgreSQL)
op.execute(
"CREATE INDEX ix_task_active ON task (user_id) WHERE completed = false"
)
def downgrade() -> None:
op.drop_index('ix_task_user_completed', 'task')
op.execute("DROP INDEX ix_task_active")
Migration Best Practices
1. Always Review Generated Migrations
# Generate migration
uv run alembic revision --autogenerate -m "Add task categories"
# Review the generated file before applying
# Check: alembic/versions/[hash]_add_task_categories.py
2. Use Descriptive Migration Messages
# Good migration messages
uv run alembic revision --autogenerate -m "Add task categories and tags"
uv run alembic revision --autogenerate -m "Add user preferences table"
uv run alembic revision --autogenerate -m "Fix task due_date timezone issues"
# Avoid generic messages
uv run alembic revision --autogenerate -m "Update models" # Too vague
3. Handle Large Data Migrations
# For large tables, process in batches
def upgrade() -> None:
connection = op.get_bind()
# Process in batches of 1000
batch_size = 1000
offset = 0
while True:
result = connection.execute(
f"SELECT id FROM task LIMIT {batch_size} OFFSET {offset}"
)
rows = result.fetchall()
if not rows:
break
# Update batch
task_ids = [str(row[0]) for row in rows]
connection.execute(
f"UPDATE task SET status = 'pending' WHERE id IN ({','.join(['%s'] * len(task_ids))})",
task_ids
)
offset += batch_size
Migration Checklist
Before deploying migrations to production:
- Review generated migration files
- Test migration up and down on development data
- Backup production database
- Plan for rollback if needed
- Check for breaking changes in application code
- Coordinate with team for deployment