Skip to main content

Database Migrations

This guide covers database migrations using Alembic with SQLModel.

tip

For comprehensive documentation, see the Alembic documentation.

Common Migration Scenarios

Editing the current schema

# 1. Edit the schema (add/edit/remove tables and fields) in `src/models/db_models.py` (Pydantics with `table=True`)

# 2. Generate migration: this will create a migration script by checking the diff between the current DB (see `.env`) and the one from `db_models.py`
uv run alembic revision --autogenerate -m "Add priority to tasks"

# 3. Check the generated migration script (upgrade and downgrade function) in `alembic/versions/`

# 4. When everything seems good and you want to update the DB, use:
uv run alembic upgrade head

Data Migration

# Migration with data transformation
from alembic import op
import sqlalchemy as sa

def upgrade() -> None:
# Add new column
op.add_column('task', sa.Column('status', sa.String(20), nullable=True))

# Migrate data
connection = op.get_bind()
connection.execute(
"UPDATE task SET status = CASE WHEN completed THEN 'completed' ELSE 'pending' END"
)

# Make column not nullable
op.alter_column('task', 'status', nullable=False)

Creating Indexes

def upgrade() -> None:
# Create composite index
op.create_index('ix_task_user_completed', 'task', ['user_id', 'completed'])

# Create partial index (PostgreSQL)
op.execute(
"CREATE INDEX ix_task_active ON task (user_id) WHERE completed = false"
)

def downgrade() -> None:
op.drop_index('ix_task_user_completed', 'task')
op.execute("DROP INDEX ix_task_active")

Migration Best Practices

1. Always Review Generated Migrations

# Generate migration
uv run alembic revision --autogenerate -m "Add task categories"

# Review the generated file before applying
# Check: alembic/versions/[hash]_add_task_categories.py

2. Use Descriptive Migration Messages

# Good migration messages
uv run alembic revision --autogenerate -m "Add task categories and tags"
uv run alembic revision --autogenerate -m "Add user preferences table"
uv run alembic revision --autogenerate -m "Fix task due_date timezone issues"

# Avoid generic messages
uv run alembic revision --autogenerate -m "Update models" # Too vague

3. Handle Large Data Migrations

# For large tables, process in batches
def upgrade() -> None:
connection = op.get_bind()

# Process in batches of 1000
batch_size = 1000
offset = 0

while True:
result = connection.execute(
f"SELECT id FROM task LIMIT {batch_size} OFFSET {offset}"
)
rows = result.fetchall()

if not rows:
break

# Update batch
task_ids = [str(row[0]) for row in rows]
connection.execute(
f"UPDATE task SET status = 'pending' WHERE id IN ({','.join(['%s'] * len(task_ids))})",
task_ids
)

offset += batch_size

Migration Checklist

Before deploying migrations to production:

  • Review generated migration files
  • Test migration up and down on development data
  • Backup production database
  • Plan for rollback if needed
  • Check for breaking changes in application code
  • Coordinate with team for deployment