From 441fa79e5d11963d2bb4afbdf719b0d0deec70ee Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Sat, 24 Jan 2026 14:24:05 -0600 Subject: [PATCH 1/4] docs: add production deployment how-to guide New guide covering: - database.create_tables setting (production mode) - database.schema_prefix for project isolation - Environment-based configuration patterns - Verification script example Co-Authored-By: Claude Opus 4.5 --- mkdocs.yaml | 1 + src/how-to/deploy-production.md | 307 ++++++++++++++++++++++++++ src/how-to/index.md | 1 + src/how-to/manage-pipeline-project.md | 1 + 4 files changed, 310 insertions(+) create mode 100644 src/how-to/deploy-production.md diff --git a/mkdocs.yaml b/mkdocs.yaml index 0a8d29ab..6f25d20d 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -61,6 +61,7 @@ nav: - Read Diagrams: how-to/read-diagrams.ipynb - Project Management: - Manage Pipeline Project: how-to/manage-pipeline-project.md + - Deploy to Production: how-to/deploy-production.md - Data Operations: - Insert Data: how-to/insert-data.md - Query Data: how-to/query-data.md diff --git a/src/how-to/deploy-production.md b/src/how-to/deploy-production.md new file mode 100644 index 00000000..9e7c7e95 --- /dev/null +++ b/src/how-to/deploy-production.md @@ -0,0 +1,307 @@ +# Deploy to Production + +Configure DataJoint for production environments with controlled schema changes and project isolation. + +## Overview + +Development and production environments have different requirements: + +| Concern | Development | Production | +|---------|-------------|------------| +| Schema changes | Automatic table creation | Controlled, explicit changes only | +| Naming | Ad-hoc schema names | Consistent project prefixes | +| Configuration | Local settings | Environment-based | + +DataJoint 2.0 provides settings to enforce production discipline. + +## Prevent Automatic Table Creation + +By default, DataJoint creates tables automatically when you first access them. This is convenient during development but dangerous in production—a typo or code bug could create unintended tables. + +### Enable Production Mode + +Set `create_tables=False` to prevent automatic table creation: + +```python +import datajoint as dj + +# Production mode: no automatic table creation +dj.config.database.create_tables = False +``` + +Or via environment variable: + +```bash +export DJ_CREATE_TABLES=false +``` + +Or in `datajoint.json`: + +```json +{ + "database": { + "create_tables": false + } +} +``` + +### What Changes + +With `create_tables=False`: + +| Action | Development (True) | Production (False) | +|--------|-------------------|-------------------| +| Access existing table | Works | Works | +| Access missing table | Creates it | **Raises error** | +| Explicit `Schema(create_tables=True)` | Creates | Creates (override) | + +### Example: Production Safety + +```python +import datajoint as dj + +dj.config.database.create_tables = False +schema = dj.Schema('myproject_ephys') + +@schema +class Recording(dj.Manual): + definition = """ + recording_id : int + --- + path : varchar(255) + """ + +# If table doesn't exist in database: +Recording() # Raises DataJointError: Table not found +``` + +### Override for Migrations + +When you need to create tables during a controlled migration: + +```python +# Explicit override for this schema only +schema = dj.Schema('myproject_ephys', create_tables=True) + +@schema +class NewTable(dj.Manual): + definition = """...""" + +NewTable() # Creates the table +``` + +## Use Schema Prefixes + +When multiple projects share a database server, use prefixes to avoid naming collisions and organize schemas. + +### Configure Project Prefix + +```python +import datajoint as dj + +dj.config.database.schema_prefix = 'myproject_' +``` + +Or via environment variable: + +```bash +export DJ_SCHEMA_PREFIX=myproject_ +``` + +Or in `datajoint.json`: + +```json +{ + "database": { + "schema_prefix": "myproject_" + } +} +``` + +### Apply Prefix to Schemas + +Use the prefix when creating schemas: + +```python +import datajoint as dj + +prefix = dj.config.database.schema_prefix # 'myproject_' + +# Schema names include prefix +subject_schema = dj.Schema(prefix + 'subject') # myproject_subject +session_schema = dj.Schema(prefix + 'session') # myproject_session +ephys_schema = dj.Schema(prefix + 'ephys') # myproject_ephys +``` + +### Benefits + +- **Isolation**: Multiple projects coexist without conflicts +- **Visibility**: Easy to identify which schemas belong to which project +- **Permissions**: Grant access by prefix pattern (`myproject_*`) +- **Cleanup**: Drop all project schemas by prefix + +### Database Permissions by Prefix + +```sql +-- Grant access to all schemas with prefix +GRANT ALL PRIVILEGES ON `myproject\_%`.* TO 'developer'@'%'; + +-- Read-only access to another project +GRANT SELECT ON `otherproject\_%`.* TO 'developer'@'%'; +``` + +## Environment-Based Configuration + +Use different configurations for development, staging, and production. + +### Configuration Hierarchy + +DataJoint loads settings in priority order: + +1. **Environment variables** (highest priority) +2. **Secrets directory** (`.secrets/`) +3. **Config file** (`datajoint.json`) +4. **Defaults** (lowest priority) + +### Development Setup + +**datajoint.json** (committed): +```json +{ + "database": { + "host": "localhost", + "create_tables": true + } +} +``` + +**.secrets/database.user**: +``` +dev_user +``` + +### Production Setup + +Override via environment: + +```bash +# Production database +export DJ_HOST=prod-db.example.com +export DJ_USER=prod_user +export DJ_PASS=prod_password + +# Production mode +export DJ_CREATE_TABLES=false +export DJ_SCHEMA_PREFIX=myproject_ + +# Disable interactive prompts +export DJ_SAFEMODE=false +``` + +### Docker/Kubernetes Example + +```yaml +# docker-compose.yaml +services: + worker: + image: my-pipeline:latest + environment: + - DJ_HOST=db.example.com + - DJ_USER_FILE=/run/secrets/db_user + - DJ_PASS_FILE=/run/secrets/db_password + - DJ_CREATE_TABLES=false + - DJ_SCHEMA_PREFIX=prod_ + secrets: + - db_user + - db_password +``` + +## Complete Production Configuration + +### datajoint.json (committed) + +```json +{ + "database": { + "host": "localhost", + "port": 3306 + }, + "stores": { + "default": "main", + "main": { + "protocol": "s3", + "endpoint": "s3.amazonaws.com", + "bucket": "my-org-data", + "location": "myproject" + } + } +} +``` + +### Production Environment Variables + +```bash +# Database +export DJ_HOST=prod-mysql.example.com +export DJ_USER=prod_service +export DJ_PASS= + +# Production behavior +export DJ_CREATE_TABLES=false +export DJ_SCHEMA_PREFIX=prod_ +export DJ_SAFEMODE=false + +# Logging +export DJ_LOG_LEVEL=WARNING +``` + +### Verification Script + +```python +#!/usr/bin/env python +"""Verify production configuration before deployment.""" +import datajoint as dj + +def verify_production_config(): + """Check that production settings are correctly applied.""" + errors = [] + + # Check create_tables is disabled + if dj.config.database.create_tables: + errors.append("create_tables should be False in production") + + # Check schema prefix is set + if not dj.config.database.schema_prefix: + errors.append("schema_prefix should be set in production") + + # Check not pointing to localhost + if dj.config.database.host == 'localhost': + errors.append("database.host is localhost - expected production host") + + if errors: + for e in errors: + print(f"ERROR: {e}") + return False + + print("Production configuration verified") + return True + +if __name__ == '__main__': + import sys + sys.exit(0 if verify_production_config() else 1) +``` + +## Summary + +| Setting | Development | Production | +|---------|-------------|------------| +| `database.create_tables` | `true` | `false` | +| `database.schema_prefix` | `""` or `dev_` | `prod_` | +| `safemode` | `true` | `false` (automated) | +| `loglevel` | `DEBUG` | `WARNING` | + +## See Also + +- [Manage Pipeline Project](manage-pipeline-project.md) — Project organization +- [Configuration Reference](../reference/configuration.md) — All settings +- [Manage Secrets](manage-secrets.md) — Credential management diff --git a/src/how-to/index.md b/src/how-to/index.md index d54246ab..9d42be54 100644 --- a/src/how-to/index.md +++ b/src/how-to/index.md @@ -22,6 +22,7 @@ they assume you understand the basics and focus on getting things done. ## Project Management - [Manage a Pipeline Project](manage-pipeline-project.md) — Multi-schema pipelines, team collaboration +- [Deploy to Production](deploy-production.md) — Production mode, schema prefixes, environment config ## Data Operations diff --git a/src/how-to/manage-pipeline-project.md b/src/how-to/manage-pipeline-project.md index 35177ec1..0c8701ea 100644 --- a/src/how-to/manage-pipeline-project.md +++ b/src/how-to/manage-pipeline-project.md @@ -368,6 +368,7 @@ These challenges grow with team size and pipeline complexity. The [DataJoint Pla ## See Also +- [Deploy to Production](deploy-production.md) — Production mode and environment configuration - [Data Pipelines](../explanation/data-pipelines.md) — Conceptual overview and architecture - [Configure Object Storage](configure-storage.md) — Storage setup - [Distributed Computing](distributed-computing.md) — Multi-worker pipelines From 3e12b0f23ce40920dde2f23a35aad748ada18acb Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Sun, 25 Jan 2026 23:50:33 -0600 Subject: [PATCH 2/4] fix: add schema cleanup at start of object-storage tutorial Ensures clean slate when re-running notebook by dropping existing schema before creating tables. Co-Authored-By: Claude Opus 4.5 --- src/tutorials/basics/06-object-storage.ipynb | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/src/tutorials/basics/06-object-storage.ipynb b/src/tutorials/basics/06-object-storage.ipynb index 8c12bb86..a1366dad 100644 --- a/src/tutorials/basics/06-object-storage.ipynb +++ b/src/tutorials/basics/06-object-storage.ipynb @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "cell-1", "metadata": { "execution": { @@ -31,21 +31,8 @@ "shell.execute_reply": "2026-01-14T07:34:00.094318Z" } }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[2026-01-14 01:34:00,086][INFO]: DataJoint 2.0.0a22 connected to root@127.0.0.1:3306\n" - ] - } - ], - "source": [ - "import datajoint as dj\n", - "import numpy as np\n", - "\n", - "schema = dj.Schema('tutorial_oas')" - ] + "outputs": [], + "source": "import datajoint as dj\nimport numpy as np\n\nschema = dj.Schema('tutorial_oas')\n\n# Clean slate: drop existing schema if re-running\nschema.drop(prompt=False)\nschema = dj.Schema('tutorial_oas')" }, { "cell_type": "markdown", From c63f8c6b124c58c04b595b27e98d028cbc5dfa5f Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Mon, 26 Jan 2026 07:32:32 -0600 Subject: [PATCH 3/4] fix: remove datajoint.admin from API docs The admin module was removed in the remove/dj-kill branch. Co-Authored-By: Claude Opus 4.5 --- scripts/gen_api_pages.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/gen_api_pages.py b/scripts/gen_api_pages.py index 4fe22b14..f297049a 100644 --- a/scripts/gen_api_pages.py +++ b/scripts/gen_api_pages.py @@ -24,7 +24,6 @@ "datajoint.blob", "datajoint.hash_registry", "datajoint.jobs", - "datajoint.admin", "datajoint.migrate", ] @@ -44,7 +43,6 @@ "datajoint.blob": ("Blob", "Binary serialization"), "datajoint.hash_registry": ("Hash Registry", "Content hashing for external storage"), "datajoint.jobs": ("Jobs", "Job queue for AutoPopulate"), - "datajoint.admin": ("Admin", "Administrative functions"), "datajoint.migrate": ("Migrate", "Schema migration utilities"), } From dd2846a6b96a0f04d10ee92736e7f729f531ae08 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Mon, 26 Jan 2026 08:04:44 -0600 Subject: [PATCH 4/4] fix: address PR review comments on deploy-production.md - Remove backslash escapes from MySQL GRANT patterns (not needed with backticks) - Replace '%' host with '10.0.0.%' subnet and add security warning - Fix Docker/Kubernetes secrets example to use /run/secrets/datajoint/ directory pattern (DJ_USER_FILE and DJ_PASS_FILE don't exist) - Add Kubernetes Secret mounting example Co-Authored-By: Claude Opus 4.5 --- src/how-to/deploy-production.md | 48 ++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/src/how-to/deploy-production.md b/src/how-to/deploy-production.md index 9e7c7e95..c54ca544 100644 --- a/src/how-to/deploy-production.md +++ b/src/how-to/deploy-production.md @@ -144,12 +144,17 @@ ephys_schema = dj.Schema(prefix + 'ephys') # myproject_ephys ```sql -- Grant access to all schemas with prefix -GRANT ALL PRIVILEGES ON `myproject\_%`.* TO 'developer'@'%'; +GRANT ALL PRIVILEGES ON `myproject_%`.* TO 'developer'@'10.0.0.%'; -- Read-only access to another project -GRANT SELECT ON `otherproject\_%`.* TO 'developer'@'%'; +GRANT SELECT ON `otherproject_%`.* TO 'developer'@'10.0.0.%'; ``` +!!! warning "Restrict Host Access" + Avoid using `'%'` for the host in production GRANT statements—this allows + connections from any IP address. Use specific IP addresses or subnet patterns + like `'10.0.0.%'` to limit access to your internal network. + ## Environment-Based Configuration Use different configurations for development, staging, and production. @@ -200,6 +205,8 @@ export DJ_SAFEMODE=false ### Docker/Kubernetes Example +DataJoint automatically loads credentials from `/run/secrets/datajoint/` when that directory exists (standard Docker/Kubernetes secrets mount point). + ```yaml # docker-compose.yaml services: @@ -207,13 +214,40 @@ services: image: my-pipeline:latest environment: - DJ_HOST=db.example.com - - DJ_USER_FILE=/run/secrets/db_user - - DJ_PASS_FILE=/run/secrets/db_password - DJ_CREATE_TABLES=false - DJ_SCHEMA_PREFIX=prod_ - secrets: - - db_user - - db_password + volumes: + # Mount secrets directory + - type: bind + source: ./secrets + target: /run/secrets/datajoint + read_only: true +``` + +Create the secrets directory with credential files: + +```bash +mkdir -p secrets +echo "prod_user" > secrets/database.user +echo "prod_password" > secrets/database.password +chmod 600 secrets/* +``` + +For Kubernetes, use a Secret mounted to `/run/secrets/datajoint/`: + +```yaml +# kubernetes deployment +spec: + containers: + - name: worker + volumeMounts: + - name: dj-secrets + mountPath: /run/secrets/datajoint + readOnly: true + volumes: + - name: dj-secrets + secret: + secretName: datajoint-credentials ``` ## Complete Production Configuration