feat: postgres prep on start

2026-05-07 06:30:03 +00:00 · 2026-04-17 20:45:22 +01:00
parent 66541e934b
commit e103799f81
11 changed files with 501 additions and 141 deletions
--- a/application/app.py
+++ b/application/app.py
@@ -1,3 +1,4 @@
+import logging
 import os
 import platform
 import uuid
@@ -20,6 +21,7 @@ from application.api.connector.routes import connector  # noqa: E402
 from application.api.v1 import v1_bp  # noqa: E402
 from application.celery_init import celery  # noqa: E402
 from application.core.settings import settings  # noqa: E402
+from application.storage.db.bootstrap import ensure_database_ready  # noqa: E402
 from application.stt.upload_limits import (  # noqa: E402
    build_stt_file_size_limit_message,
    should_reject_stt_request,
@@ -32,6 +34,17 @@ if platform.system() == "Windows":
    pathlib.PosixPath = pathlib.WindowsPath
 dotenv.load_dotenv()

+# Self-bootstrap the user-data Postgres DB. Runs before any blueprint or
+# repository touches the engine, so the first request can't race the
+# schema being created. Gated by AUTO_CREATE_DB / AUTO_MIGRATE settings
+# (default ON for dev; disable in prod if schema is managed out-of-band).
+ensure_database_ready(
+    settings.POSTGRES_URI,
+    create_db=settings.AUTO_CREATE_DB,
+    migrate=settings.AUTO_MIGRATE,
+    logger=logging.getLogger("application.app"),
+)
+
 app = Flask(__name__)
 app.register_blueprint(user)
 app.register_blueprint(answer)
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -30,6 +30,10 @@ class Settings(BaseSettings):
    MONGO_URI: Optional[str] = None
    # User-data Postgres DB.
    POSTGRES_URI: Optional[str] = None
+    # On app startup, apply pending Alembic migrations. Default ON for dev; disable in prod if you manage schema out-of-band.
+    AUTO_MIGRATE: bool = True
+    # On app startup, create the target Postgres database if it's missing (requires CREATEDB privilege). Dev-friendly default.
+    AUTO_CREATE_DB: bool = True
    LLM_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
    DEFAULT_MAX_HISTORY: int = 150
    DEFAULT_LLM_TOKEN_LIMIT: int = 128000  # Fallback when model not found in registry
--- a/application/storage/db/bootstrap.py
+++ b/application/storage/db/bootstrap.py
@@ -0,0 +1,320 @@
+"""Self-bootstrapping database setup for the DocsGPT user-data Postgres DB.
+
+On app startup the Flask factory (and Celery worker init) can call
+:func:`ensure_database_ready` to:
+
+1. Create the target database if it's missing (dev-friendly; requires the
+   configured role to have ``CREATEDB`` privilege).
+2. Apply every pending Alembic migration up to ``head``.
+
+Both steps are gated by settings that default ON for dev convenience and
+can be turned off in prod (``AUTO_CREATE_DB`` / ``AUTO_MIGRATE``) where
+schema is managed out-of-band by a deploy pipeline.
+
+All heavy imports (alembic, psycopg, sqlalchemy.exc sub-symbols) are
+deferred to inside the function so merely importing this module has no
+side effects and is cheap for test collection.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Optional
+
+
+def ensure_database_ready(
+    uri: Optional[str],
+    *,
+    create_db: bool,
+    migrate: bool,
+    logger: Optional[logging.Logger] = None,
+) -> None:
+    """Make sure the target Postgres DB exists and is migrated to ``head``.
+
+    This is idempotent and safe to call once per process. Each step is
+    independently gated so prod deployments that manage schema externally
+    can disable the migrate step while still allowing the process to boot
+    against an already-provisioned database.
+
+    Args:
+        uri: SQLAlchemy URI for the user-data Postgres database. If
+            ``None`` or empty, the function logs and returns — the app
+            supports running without a configured URI for certain dev
+            flows that don't touch user data.
+        create_db: If ``True``, auto-create the database when it's
+            missing. Requires the configured role to have ``CREATEDB``.
+        migrate: If ``True``, run ``alembic upgrade head`` after the
+            database is reachable.
+        logger: Optional logger to use. Defaults to this module's logger.
+
+    Raises:
+        Exception: Any failure in an explicitly-enabled step is re-raised
+            so the app fails fast rather than booting into a broken state.
+            Missing-role / auth errors surface cleanly without a
+            mis-directed auto-create attempt.
+    """
+    log = logger or logging.getLogger(__name__)
+
+    if not uri:
+        log.info(
+            "ensure_database_ready: POSTGRES_URI is not set; "
+            "skipping database bootstrap."
+        )
+        return
+
+    if create_db:
+        _ensure_database_exists(uri, log)
+
+    if migrate:
+        _run_migrations(log)
+
+
+def _ensure_database_exists(uri: str, log: logging.Logger) -> None:
+    """Create the target database if a connection reveals it's missing.
+
+    We probe with a lightweight ``connect().close()``. If Postgres
+    reports ``InvalidCatalogName`` (SQLSTATE ``3D000``), we reconnect to
+    the server's ``postgres`` maintenance DB and issue ``CREATE DATABASE``
+    in AUTOCOMMIT mode (required — CREATE DATABASE can't run in a
+    transaction). Any other connection failure (bad host, auth failure,
+    missing role) is re-raised untouched so the operator sees the true
+    cause instead of a mis-directed auto-create attempt.
+    """
+    # Lazy imports keep module import side-effect free.
+    from sqlalchemy import create_engine
+    from sqlalchemy.engine import make_url
+    from sqlalchemy.exc import OperationalError
+
+    url = make_url(uri)
+    target_db = url.database
+    if not target_db:
+        raise RuntimeError(
+            f"POSTGRES_URI is missing a database name: {uri!r}. "
+            "Expected something like "
+            "'postgresql+psycopg://user:pass@host:5432/docsgpt'."
+        )
+
+    probe_engine = create_engine(uri, pool_pre_ping=False)
+    try:
+        try:
+            conn = probe_engine.connect()
+        except OperationalError as exc:
+            if _is_missing_database(exc):
+                log.info(
+                    "ensure_database_ready: database %r is missing; "
+                    "creating it...",
+                    target_db,
+                )
+                _create_database(url, target_db, log)
+                log.info("ensure_database_ready: database %r ready.", target_db)
+                return
+            # Not a missing-DB error — surface it as-is. This is the path
+            # for bad host/auth/role-missing, and auto-creating would be
+            # actively wrong there.
+            log.error(
+                "ensure_database_ready: cannot connect to Postgres for "
+                "database %r: %s",
+                target_db,
+                exc,
+            )
+            raise
+        else:
+            conn.close()
+            log.info("ensure_database_ready: database %r ready.", target_db)
+    finally:
+        probe_engine.dispose()
+
+
+def _create_database(url, target_db: str, log: logging.Logger) -> None:
+    """Issue ``CREATE DATABASE`` against the server's ``postgres`` DB.
+
+    Uses AUTOCOMMIT (required by Postgres — ``CREATE DATABASE`` cannot run
+    inside a transaction). The database identifier is quoted via
+    ``psycopg.sql.Identifier`` so unusual names (hyphens, reserved words)
+    are handled correctly.
+
+    Args:
+        url: Parsed SQLAlchemy URL for the target DB; we reuse
+            host/port/credentials and swap the database to ``postgres``.
+        target_db: The target database name to create.
+        log: Logger for INFO/ERROR breadcrumbs.
+    """
+    from sqlalchemy import create_engine
+    from sqlalchemy.exc import OperationalError, ProgrammingError
+
+    # psycopg is imported lazily — its error classes are the canonical
+    # cause markers Postgres hands us back.
+    import psycopg
+    from psycopg import sql as pg_sql
+
+    maintenance_url = url.set(database="postgres")
+    maintenance_engine = create_engine(
+        maintenance_url,
+        isolation_level="AUTOCOMMIT",
+        pool_pre_ping=False,
+    )
+    try:
+        with maintenance_engine.connect() as conn:
+            # Use psycopg's Identifier to quote the DB name safely. The
+            # SQL object renders as a literal ``CREATE DATABASE "<name>"``
+            # which SQLAlchemy passes through to psycopg verbatim.
+            stmt = pg_sql.SQL("CREATE DATABASE {}").format(
+                pg_sql.Identifier(target_db)
+            )
+            raw = conn.connection.dbapi_connection  # psycopg connection
+            with raw.cursor() as cur:
+                try:
+                    cur.execute(stmt)
+                except psycopg.errors.DuplicateDatabase:
+                    # Another worker won the race — benign.
+                    log.info(
+                        "ensure_database_ready: database %r already "
+                        "created by a concurrent worker; continuing.",
+                        target_db,
+                    )
+                except psycopg.errors.InsufficientPrivilege as exc:
+                    log.error(
+                        "ensure_database_ready: role lacks CREATEDB "
+                        "privilege to create %r. Either GRANT CREATEDB "
+                        "to the role, create the database manually, or "
+                        "set AUTO_CREATE_DB=False and provision it "
+                        "out-of-band. See docs/Deploying/Postgres-"
+                        "Migration for guidance. Underlying error: %s",
+                        target_db,
+                        exc,
+                    )
+                    raise
+    except (OperationalError, ProgrammingError) as exc:
+        log.error(
+            "ensure_database_ready: failed to create database %r: %s. "
+            "See docs/Deploying/Postgres-Migration for manual setup.",
+            target_db,
+            exc,
+        )
+        raise
+    finally:
+        maintenance_engine.dispose()
+
+
+def _is_missing_database(exc: Exception) -> bool:
+    """Return True if ``exc`` indicates the target database doesn't exist.
+
+    We check three signals in the cause chain:
+
+    1. ``psycopg.errors.InvalidCatalogName`` — the canonical class for
+       SQLSTATE ``3D000`` when raised during a query.
+    2. ``pgcode`` / ``diag.sqlstate`` equal to ``3D000`` — defensive, for
+       driver versions that surface the code on a generic class.
+    3. The canonical server message phrasing ``database "..." does not
+       exist`` — **required** for connection-time failures, because
+       psycopg 3's ``OperationalError`` raised by ``connect()`` does NOT
+       populate ``sqlstate`` (the connection never completed the protocol
+       handshake, so the attributes stay ``None``). The server's error
+       message itself is stable across Postgres versions, so this is a
+       reliable fallback for the only case that matters: DB missing at
+       boot.
+    """
+    try:
+        import psycopg
+
+        invalid_catalog = psycopg.errors.InvalidCatalogName
+    except Exception:  # noqa: BLE001 — defensive; never break on import
+        invalid_catalog = None
+
+    seen: set[int] = set()
+    cursor: Optional[BaseException] = exc
+    while cursor is not None and id(cursor) not in seen:
+        seen.add(id(cursor))
+        if invalid_catalog is not None and isinstance(cursor, invalid_catalog):
+            return True
+        pgcode = getattr(cursor, "pgcode", None) or getattr(
+            getattr(cursor, "diag", None), "sqlstate", None
+        )
+        if pgcode == "3D000":
+            return True
+        msg = str(cursor)
+        if 'database "' in msg and "does not exist" in msg:
+            return True
+        cursor = cursor.__cause__ or cursor.__context__
+    return False
+
+
+def _run_migrations(log: logging.Logger) -> None:
+    """Run ``alembic upgrade head`` against ``POSTGRES_URI``.
+
+    Alembic serializes concurrent workers via its ``alembic_version``
+    table, so no extra application-level locking is needed. Failures are
+    logged and re-raised so the app fails fast.
+    """
+    from pathlib import Path
+
+    # Lazy imports — alembic pulls in a fair amount of code.
+    from alembic import command
+    from alembic.config import Config
+    from alembic.runtime.migration import MigrationContext
+    from alembic.script import ScriptDirectory
+    from sqlalchemy import create_engine
+
+    # Mirror the discovery path used by scripts/db/init_postgres.py so
+    # both entry points resolve the same alembic.ini regardless of cwd.
+    alembic_ini = Path(__file__).resolve().parents[2] / "alembic.ini"
+    if not alembic_ini.exists():
+        raise RuntimeError(f"alembic.ini not found at {alembic_ini}")
+
+    cfg = Config(str(alembic_ini))
+    cfg.set_main_option("script_location", str(alembic_ini.parent / "alembic"))
+
+    # Cheap pre-check: if we're already at head, say so explicitly.
+    try:
+        script = ScriptDirectory.from_config(cfg)
+        head_rev = script.get_current_head()
+        url = cfg.get_main_option("sqlalchemy.url")
+        # env.py populates sqlalchemy.url from settings.POSTGRES_URI when
+        # it's imported, but our Config instance hasn't loaded env.py
+        # yet. Fall back to reading settings directly for the precheck.
+        if not url:
+            from application.core.settings import settings as _settings
+
+            url = _settings.POSTGRES_URI
+        current_rev: Optional[str] = None
+        if url:
+            precheck_engine = create_engine(url, pool_pre_ping=False)
+            try:
+                with precheck_engine.connect() as conn:
+                    ctx = MigrationContext.configure(conn)
+                    current_rev = ctx.get_current_revision()
+            finally:
+                precheck_engine.dispose()
+        if current_rev is not None and current_rev == head_rev:
+            log.info(
+                "ensure_database_ready: migrations already at head (%s); "
+                "nothing to do.",
+                head_rev,
+            )
+            return
+        log.info(
+            "ensure_database_ready: applying Alembic migrations "
+            "(current=%s, target=%s)...",
+            current_rev,
+            head_rev,
+        )
+    except Exception as exc:  # noqa: BLE001 — precheck is best-effort
+        # If the precheck itself fails we still want to try the upgrade;
+        # alembic will give a more actionable error if something's off.
+        log.info(
+            "ensure_database_ready: revision precheck failed (%s); "
+            "proceeding with upgrade anyway.",
+            exc,
+        )
+
+    try:
+        command.upgrade(cfg, "head")
+    except Exception as exc:  # noqa: BLE001 — surface everything
+        log.error(
+            "ensure_database_ready: alembic upgrade failed: %s. "
+            "Check migration logs and DB connectivity; the app will not "
+            "boot until this is resolved (or AUTO_MIGRATE is disabled).",
+            exc,
+        )
+        raise
+    log.info("ensure_database_ready: migrations applied.")
--- a/deployment/docker-compose-azure.yaml
+++ b/deployment/docker-compose-azure.yaml
@@ -27,8 +27,8 @@ services:
    depends_on:
      redis:
        condition: service_started
-      postgres-init:
-        condition: service_completed_successfully
+      postgres:
+        condition: service_healthy

  worker:
    build: ../application
@@ -44,8 +44,8 @@ services:
    depends_on:
      redis:
        condition: service_started
-      postgres-init:
-        condition: service_completed_successfully
+      postgres:
+        condition: service_healthy

  redis:
    image: redis:6-alpine
@@ -68,17 +68,5 @@ services:
      timeout: 5s
      retries: 10

-  postgres-init:
-    build: ../application
-    command: python scripts/db/init_postgres.py
-    env_file:
-      - ../.env
-    environment:
-      - POSTGRES_URI=postgresql://docsgpt:docsgpt@postgres:5432/docsgpt
-    depends_on:
-      postgres:
-        condition: service_healthy
-    restart: "no"
-
 volumes:
  postgres_data:
--- a/deployment/docker-compose-hub.yaml
+++ b/deployment/docker-compose-hub.yaml
@@ -32,8 +32,8 @@ services:
    depends_on:
      redis:
        condition: service_started
-      postgres-init:
-        condition: service_completed_successfully
+      postgres:
+        condition: service_healthy


  worker:
@@ -55,8 +55,8 @@ services:
    depends_on:
      redis:
        condition: service_started
-      postgres-init:
-        condition: service_completed_successfully
+      postgres:
+        condition: service_healthy

  redis:
    image: redis:6-alpine
@@ -79,17 +79,5 @@ services:
      timeout: 5s
      retries: 10

-  postgres-init:
-    image: arc53/docsgpt:develop
-    command: python scripts/db/init_postgres.py
-    env_file:
-      - ../.env
-    environment:
-      - POSTGRES_URI=postgresql://docsgpt:docsgpt@postgres:5432/docsgpt
-    depends_on:
-      postgres:
-        condition: service_healthy
-    restart: "no"
-
 volumes:
  postgres_data:
--- a/deployment/docker-compose.yaml
+++ b/deployment/docker-compose.yaml
@@ -33,8 +33,8 @@ services:
    depends_on:
      redis:
        condition: service_started
-      postgres-init:
-        condition: service_completed_successfully
+      postgres:
+        condition: service_healthy

  worker:
    user: root
@@ -56,8 +56,8 @@ services:
    depends_on:
      redis:
        condition: service_started
-      postgres-init:
-        condition: service_completed_successfully
+      postgres:
+        condition: service_healthy

  redis:
    image: redis:6-alpine
@@ -80,21 +80,6 @@ services:
      timeout: 5s
      retries: 10

-  # One-shot migrator: runs alembic upgrade head, then exits. The backend
-  # and worker services wait for it via `service_completed_successfully`,
-  # so they never see a partially-migrated schema.
-  postgres-init:
-    build: ../application
-    command: python scripts/db/init_postgres.py
-    env_file:
-      - ../.env
-    environment:
-      - POSTGRES_URI=postgresql://docsgpt:docsgpt@postgres:5432/docsgpt
-    depends_on:
-      postgres:
-        condition: service_healthy
-    restart: "no"
-
 volumes:
  postgres_data:

--- a/deployment/k8s/deployments/docsgpt-deploy.yaml
+++ b/deployment/k8s/deployments/docsgpt-deploy.yaml
@@ -50,6 +50,13 @@ spec:
            secretKeyRef:
              name: docsgpt-secrets
              key: POSTGRES_URI
+        # Disable in-app auto-bootstrap. The `postgres-init` Job under
+        # deployment/k8s/jobs/ owns schema creation and Alembic migrations,
+        # so application pods must not race with it on rollout.
+        - name: AUTO_MIGRATE
+          value: "false"
+        - name: AUTO_CREATE_DB
+          value: "false"
 ---
 apiVersion: apps/v1
 kind: Deployment
@@ -97,6 +104,13 @@ spec:
            secretKeyRef:
              name: docsgpt-secrets
              key: POSTGRES_URI
+        # Disable in-app auto-bootstrap. The `postgres-init` Job under
+        # deployment/k8s/jobs/ owns schema creation and Alembic migrations,
+        # so application pods must not race with it on rollout.
+        - name: AUTO_MIGRATE
+          value: "false"
+        - name: AUTO_CREATE_DB
+          value: "false"
 ---
 apiVersion: apps/v1
 kind: Deployment
--- a/docs/content/Deploying/Development-Environment.mdx
+++ b/docs/content/Deploying/Development-Environment.mdx
@@ -11,7 +11,7 @@ This guide will walk you through setting up a development environment for DocsGP

 ## 1. Spin Up Postgres and Redis

-For development purposes, you can quickly start Postgres and Redis containers. Postgres is the user-data store for DocsGPT (conversations, agents, prompts, sources, attachments, workflows, logs, and token usage), and Redis is used as the cache and Celery broker. We provide a dedicated Docker Compose file, `docker-compose-dev.yaml`, located in the `deployment` directory, that includes only these essential services along with a one-shot `postgres-init` migrator that applies the Alembic schema.
+For development purposes, you can quickly start Postgres and Redis containers. Postgres is the user-data store for DocsGPT (conversations, agents, prompts, sources, attachments, workflows, logs, and token usage), and Redis is used as the cache and Celery broker. We provide a dedicated Docker Compose file, `docker-compose-dev.yaml`, located in the `deployment` directory, that includes only these essential services. The backend applies the Alembic schema automatically on first boot (`AUTO_MIGRATE=true` / `AUTO_CREATE_DB=true` ship enabled), so no separate migration step is required. You can still run `python scripts/db/init_postgres.py` explicitly if you prefer.

 You can find the `docker-compose-dev.yaml` file [here](https://github.com/arc53/DocsGPT/blob/main/deployment/docker-compose-dev.yaml).

@@ -26,7 +26,7 @@ You can find the `docker-compose-dev.yaml` file [here](https://github.com/arc53/
    docker compose -f deployment/docker-compose-dev.yaml up -d
    ```

-    These commands will start Postgres and Redis in detached mode, running in the background. The `postgres-init` service runs once against the fresh database and then exits.
+    These commands will start Postgres and Redis in detached mode, running in the background. When the Flask backend boots against the fresh Postgres instance, it will automatically create the database (if missing) and apply the current Alembic schema.

 <Callout type="info" emoji="ℹ️">
  MongoDB is no longer required for a default DocsGPT install. If you
--- a/docs/content/Deploying/DocsGPT-Settings.mdx
+++ b/docs/content/Deploying/DocsGPT-Settings.mdx
@@ -248,6 +248,8 @@ DocsGPT stores user data — conversations, agents, prompts, sources, attachment
 | Setting | Description | Default |
 | --- | --- | --- |
 | `POSTGRES_URI` | SQLAlchemy-compatible Postgres URI. Any standard `postgresql://` form works — DocsGPT normalizes it internally to the `psycopg` v3 dialect. | — |
+| `AUTO_CREATE_DB` | On startup, connect to the server's `postgres` maintenance DB and issue `CREATE DATABASE` if the target is missing. Requires `CREATEDB` or superuser. No-op when the database already exists. Disable in production. | `true` |
+| `AUTO_MIGRATE` | On startup, run `alembic upgrade head` against the target database. Idempotent and serialized across workers via `alembic_version`. Disable in production in favor of an explicit migration step. | `true` |

 Example:

@@ -256,13 +258,19 @@ POSTGRES_URI=postgresql://docsgpt:docsgpt@localhost:5432/docsgpt
 # Append ?sslmode=require for managed providers that enforce SSL.
 ```

-Apply the schema once (idempotent):
+With the defaults, the app applies the schema automatically on first
+boot. To run it explicitly instead (e.g., in CI/CD or a k8s `Job`):

 ```bash
 python scripts/db/init_postgres.py
 ```

-The default Docker Compose file bundles a `postgres` service plus a one-shot `postgres-init` migrator, so you don't have to run this by hand for containerized deployments.
+The default Docker Compose file bundles a `postgres` service, and the
+app auto-bootstraps the database on boot, so containerized deployments
+need no manual migration step. See
+[PostgreSQL for User Data](/Deploying/Postgres-Migration#production-hardening)
+for the recommended production flow (both flags `false`, migrations
+gated by CI/CD).

 <Callout type="info" emoji="ℹ️">
  `MONGO_URI` is **opt-in**. It is only consulted when you select the
--- a/docs/content/Deploying/Postgres-Migration.mdx
+++ b/docs/content/Deploying/Postgres-Migration.mdx
@@ -1,126 +1,151 @@
 ---
 title: PostgreSQL for User Data
-description: PostgreSQL is the user-data store for DocsGPT. This page covers fresh installs and the one-shot migration from legacy MongoDB deployments.
+description: PostgreSQL is the user-data store for DocsGPT. Covers auto-bootstrap, production hardening, and the one-shot migration from legacy MongoDB deployments.
 ---

 import { Callout } from 'nextra/components'

 # PostgreSQL for User Data

-DocsGPT uses **PostgreSQL** as the user-data store for conversations,
-agents, prompts, sources, attachments, workflows, logs, token usage,
-and the rest of the application's structured state. MongoDB is no
-longer required for a default install.
+DocsGPT stores conversations, agents, prompts, sources, attachments,
+workflows, logs, and token usage in **PostgreSQL**. MongoDB is no longer
+required.

 <Callout type="info" emoji="ℹ️">
-  Vector stores are independent from user-data storage. `VECTOR_STORE`
-  can still be `pgvector`, `faiss`, `qdrant`, `milvus`, `elasticsearch`,
-  or `mongodb` (Mongo Atlas Vector Search) — your choice there does not
-  affect this page.
+  Vector stores are independent — `VECTOR_STORE` can still be `pgvector`,
+  `faiss`, `qdrant`, `milvus`, `elasticsearch`, or `mongodb`.
 </Callout>

-## Fresh install
+## Quickstart

-1. **Run Postgres 13+.** Native install, Docker, or managed (Neon, RDS,
-   Supabase, Cloud SQL…) — all work. The default Docker Compose file
-   ships a `postgres` service plus a one-shot `postgres-init` migrator
-   that applies the schema automatically.
+Three common paths. Each assumes Postgres 13+ and the default env vars
+`AUTO_MIGRATE=true` / `AUTO_CREATE_DB=true` (both ship enabled).

-2. **Create a database and role** (skip if your managed provider gave
-   you these, or if you're using the bundled compose `postgres`
-   service):
+### Docker Compose

-   ```sql
-   CREATE ROLE docsgpt LOGIN PASSWORD 'docsgpt';
-   CREATE DATABASE docsgpt OWNER docsgpt;
-   ```
+The bundled compose file ships a `postgres` service. App boot handles the
+rest — no sidecar, no init job.

-3. **Set `POSTGRES_URI` in `.env`.** Any standard Postgres URI works —
-   DocsGPT normalizes it internally to the SQLAlchemy `psycopg` (v3)
-   dialect.
+```bash
+cd deployment && docker compose up
+```

-   ```bash
-   POSTGRES_URI=postgresql://docsgpt:docsgpt@localhost:5432/docsgpt
-   # Append ?sslmode=require for managed providers that enforce SSL.
-   ```
+### Managed Postgres (Neon, RDS, Supabase, Cloud SQL)

-4. **Apply the schema** (idempotent — safe to re-run). The bundled
-   `postgres-init` compose service does this for you; if you're running
-   the backend outside compose, run it manually:
+Point `POSTGRES_URI` at the provider-given URI. The app applies the
+schema on first boot.

-   ```bash
-   python scripts/db/init_postgres.py
-   # or equivalently:
-   alembic -c application/alembic.ini upgrade head
-   ```
+```bash
+export POSTGRES_URI="postgresql://user:pass@host/docsgpt?sslmode=require"
+flask --app application/app.py run --host=0.0.0.0 --port=7091
+```

-That's it — the backend will come up against Postgres.
+### Bare-metal Postgres

-## Migrating from a legacy MongoDB install
+Run Postgres locally and point `POSTGRES_URI` at the default superuser.
+First boot creates both the database and the schema.

-If you are upgrading from an older DocsGPT deployment that stored user
-data in MongoDB, a one-shot migration tool copies every collection into
-Postgres. The tool is run **once**, offline, with the app stopped.
+```bash
+export POSTGRES_URI="postgresql://postgres@localhost/docsgpt"
+flask --app application/app.py run --host=0.0.0.0 --port=7091
+```

-1. **Install the optional Mongo client libraries.** `pymongo` and
-   `dnspython` are no longer part of the default backend install;
-   install them directly alongside the base requirements:
+Prefer a dedicated non-superuser role? Create it once as superuser — the
+app never creates roles.

-   ```bash
-   pip install -r application/requirements.txt
-   pip install 'pymongo>=4.6'
-   ```
+```sql
+CREATE ROLE docsgpt LOGIN PASSWORD 'docsgpt' CREATEDB;
+-- Then: POSTGRES_URI=postgresql://docsgpt:docsgpt@localhost/docsgpt
+```

-2. **Provision Postgres** following the [Fresh install](#fresh-install)
-   steps above, so `POSTGRES_URI` is set and the schema is applied.
+## How auto-bootstrap works

-3. **Point the backfill at both databases.** Set `MONGO_URI` in the
-   environment alongside `POSTGRES_URI` for the duration of the
-   migration:
+Two env vars control startup behavior. Both default to `true` in the
+app and are idempotent.

-   ```bash
-   export MONGO_URI="mongodb://user:pass@host:27017/docsgpt"
-   export POSTGRES_URI="postgresql://docsgpt:docsgpt@localhost:5432/docsgpt"
-   ```
+| Setting | Effect | Requires |
+| --- | --- | --- |
+| `AUTO_CREATE_DB` | If the target database is missing, connects to the server's `postgres` maintenance DB and issues `CREATE DATABASE`. | `CREATEDB` privilege (or superuser) |
+| `AUTO_MIGRATE` | Runs `alembic upgrade head` against the target database. | Table-owner or superuser on the target DB |

-4. **Run the backfill.** Idempotent — re-run any time to re-sync
-   drifted rows. Without arguments, backfills every registered table;
-   pass `--tables` to limit.
+Concurrent workers serialize through `alembic_version`, so rolling
+restarts are safe. If the role lacks the required privilege, startup
+fails fast with a clear error rather than silently skipping.

-   ```bash
-   python scripts/db/backfill.py --dry-run        # preview everything
-   python scripts/db/backfill.py                   # real run, everything
-   python scripts/db/backfill.py --tables users    # only specific tables
-   ```
+<Callout type="info" emoji="ℹ️">
+  Convenient in dev. In production, disable both and run migrations as
+  an explicit step — see [Production hardening](#production-hardening).
+</Callout>

-5. **Restart the app against Postgres only.** Unset `MONGO_URI` (or
-   leave it unset — it is `Optional[str] = None` in settings) and start
-   the backend. Nothing in the default code path consults MongoDB
-   anymore.
+## Production hardening
+
+Set both flags to `false` in prod and run migrations as a gated,
+auditable step before rolling out the app.
+
+```env
+AUTO_MIGRATE=false
+AUTO_CREATE_DB=false
+```
+
+Run migrations from your CI/CD pipeline, a Kubernetes `Job`, or an
+init-container ahead of the app rollout:
+
+```bash
+python scripts/db/init_postgres.py
+# equivalently:
+alembic -c application/alembic.ini upgrade head
+```
+
+The reasoning: the app's runtime role shouldn't carry DDL privileges,
+migrations should gate each rollout, and an explicit step is
+auditable — implicit first-boot bootstrap is fine for dev but muddies
+prod deploys.

 <Callout type="warning" emoji="⚠️">
-  The backfill is a one-shot tool. There is no dual-write window and no
-  runtime feature flag — once you're on the current version, Postgres
-  is the only user-data store the backend reads from or writes to.
+  Migrations are not reversible by the app. Always back up production
+  Postgres before running `alembic upgrade head` on a new release.
 </Callout>

-<Callout type="info" emoji="ℹ️">
-  Keep your MongoDB instance online until you have verified the
-  Postgres data is complete. You can re-run `backfill.py` at any time
-  to re-sync. Once you're satisfied, decommission MongoDB — unless you
-  also use it as your vector store (`VECTOR_STORE=mongodb`), in which
-  case keep it for that purpose.
+## Migrating from MongoDB
+
+One-shot, offline, app stopped. The app itself will create the
+Postgres schema when it boots — you only need to run the data copy.
+
+```bash
+pip install -r application/requirements.txt
+pip install 'pymongo>=4.6'
+
+export POSTGRES_URI="postgresql://docsgpt:docsgpt@localhost:5432/docsgpt"
+export MONGO_URI="mongodb://user:pass@host:27017/docsgpt"
+
+python scripts/db/backfill.py --dry-run    # preview
+python scripts/db/backfill.py              # real run
+# or: python scripts/db/backfill.py --tables users,agents
+```
+
+Then unset `MONGO_URI` and start the backend — nothing consults Mongo
+in the default path anymore. The backfill is idempotent (per-table
+`ON CONFLICT` upserts, event-log tables deduped via `mongo_id`), so
+re-running is safe and re-syncs any drifted rows. Keep Mongo online
+until you've verified Postgres is complete; decommission afterwards
+unless you still use it as a vector store.
+
+<Callout type="warning" emoji="⚠️">
+  No dual-write window and no runtime flag — on the current version,
+  Postgres is the only user-data store the backend reads or writes.
 </Callout>

 ## Troubleshooting

- **`relation "..." does not exist`** — run `python scripts/db/init_postgres.py`
-  (or `alembic -c application/alembic.ini upgrade head`).
- **`FATAL: role "docsgpt" does not exist`** — run the `CREATE ROLE` /
-  `CREATE DATABASE` statements from step 2 of the fresh install as a
-  Postgres superuser.
+- **`relation "..." does not exist`** — schema not applied. Either let
+  the app bootstrap it (`AUTO_MIGRATE=true`) or run
+  `python scripts/db/init_postgres.py`.
+- **`permission denied to create database`** — the role lacks
+  `CREATEDB`. As superuser: `ALTER ROLE <name> CREATEDB;`. Or create
+  the database manually and set `AUTO_CREATE_DB=false`.
+- **`role "docsgpt" does not exist`** — roles are never auto-created.
+  As superuser: `CREATE ROLE docsgpt LOGIN PASSWORD '...';`.
 - **SSL errors on a managed provider** — append `?sslmode=require` to
  `POSTGRES_URI`.
- **`ModuleNotFoundError: pymongo` when running `backfill.py`** —
-  install the Mongo client directly:
-  `pip install 'pymongo>=4.6'`.
+- **`ModuleNotFoundError: pymongo`** — `pip install 'pymongo>=4.6'`
+  (only needed for the one-shot Mongo backfill).
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -22,6 +22,21 @@ corresponding route handler is migrated to a repository read.

 from __future__ import annotations

+import os
+
+# Disable the app's self-bootstrap (AUTO_CREATE_DB / AUTO_MIGRATE) before
+# any ``application.*`` module is imported. ``application/app.py`` runs
+# ``ensure_database_ready`` at import time using whatever ``POSTGRES_URI``
+# is set in the environment — which in dev is the operator's local DB, not
+# the ephemeral ``pytest-postgresql`` cluster that the fixtures below spin
+# up. Tests manage their own schema via the ``pg_engine`` fixture
+# (subprocess ``alembic upgrade head`` against the per-test URI), so the
+# import-time bootstrap would at best be redundant and at worst would
+# mutate the operator's dev DB. ``setdefault`` so a test run can still
+# opt back in by setting the env var explicitly.
+os.environ.setdefault("AUTO_MIGRATE", "false")
+os.environ.setdefault("AUTO_CREATE_DB", "false")
+
 import subprocess
 import sys
 from pathlib import Path